summaryrefslogtreecommitdiff
path: root/src/ToolBox/superpmi/superpmi/neardiffer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/ToolBox/superpmi/superpmi/neardiffer.cpp')
-rw-r--r--src/ToolBox/superpmi/superpmi/neardiffer.cpp1031
1 files changed, 1031 insertions, 0 deletions
diff --git a/src/ToolBox/superpmi/superpmi/neardiffer.cpp b/src/ToolBox/superpmi/superpmi/neardiffer.cpp
new file mode 100644
index 0000000000..5b2e3b1b57
--- /dev/null
+++ b/src/ToolBox/superpmi/superpmi/neardiffer.cpp
@@ -0,0 +1,1031 @@
+//
+// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE file in the project root for full license information.
+//
+
+//----------------------------------------------------------
+// nearDiffer.cpp - differ that handles code that is very similar
+//----------------------------------------------------------
+
+#include "standardpch.h"
+
+#ifdef USE_COREDISTOOLS
+#include "coredistools.h"
+#endif // USE_COREDISTOOLS
+
+#include "logging.h"
+#include "neardiffer.h"
+
+#ifdef USE_COREDISTOOLS
+
+//
+// Helper functions to print messages from CoreDisTools Library
+// The file/linenumber information is from this helper itself,
+// since we are only linking with the CoreDisTools library.
+//
+static void LogFromCoreDisToolsHelper(LogLevel level, const char *msg, va_list argList)
+{
+ Logger::LogVprintf(__func__, __FILE__, __LINE__, level, argList, msg);
+}
+
+#define LOGGER(L) \
+static void Log##L(const char *msg, ...) \
+{\
+ va_list argList; \
+ va_start(argList, msg); \
+ LogFromCoreDisToolsHelper (LOGLEVEL_##L, msg, argList); \
+ va_end(argList); \
+}
+
+LOGGER(VERBOSE)
+LOGGER(ERROR)
+LOGGER(WARNING)
+
+const PrintControl CorPrinter= { LogERROR, LogWARNING, LogVERBOSE, LogVERBOSE };
+
+#endif // USE_COREDISTOOLS
+
+//
+// The NearDiff Disassembler Initialization
+//
+void NearDiffer::InitAsmDiff()
+{
+#ifdef USE_COREDISTOOLS
+ if (UseCoreDisTools)
+ {
+ corAsmDiff = NewDiffer(Target_Host, &CorPrinter, NearDiffer::compareOffsets);
+ }
+#endif // USE_COREDISTOOLS
+}
+
+//
+// The NearDiff destructor
+//
+NearDiffer::~NearDiffer()
+{
+#ifdef USE_COREDISTOOLS
+ if (corAsmDiff != nullptr)
+ {
+ FinishDiff(corAsmDiff);
+ }
+#endif // USE_COREDISTOOLS
+}
+
+// At a high level, the near differ takes in a method context and two compile results, performs
+// some simple fixups, and then compares the main artifacts of the compile result (i.e. generated
+// code, GC info, EH info, debug info, etc.) for equality. In order to be fast, the fixups and
+// definitions of "equality" are minimal; for example, the GC info check just does a simple memcmp.
+//
+// The entrypoint into the near differ is nearDiffer::compare; its doc comments will have more
+// details on what it does. That function in turn fans out to various other components. For asm
+// diffing, the main function of interest will be nearDiffer::compareCodeSection.
+//
+// Most of the diffing logic is architecture-independent, with the following exceptions:
+//
+// - The MSDIS instance must be created with knowledge of the architecture it is working with.
+// - The heuristics to compare different literal operand values has some architecture-specific
+// assumptions.
+// - The code stream is fixed up using relocations recorded during compilation time. The logic
+// for applying these should, in theory, be architecture independent, but depending on how
+// the runtime implements this from platform to platform, there might be subtle differences here.
+//
+
+#ifdef USE_MSVCDIS
+
+DIS* NearDiffer::GetMsVcDis()
+{
+ DIS *disasm;
+
+#ifdef _TARGET_AMD64_
+ if ((TargetArchitecture != nullptr) && (0 == _stricmp(TargetArchitecture, "arm64")))
+ {
+ disasm = DIS::PdisNew(DIS::distArm64);
+ }
+ else
+ {
+ disasm = DIS::PdisNew(DIS::distX8664);
+ }
+#elif defined(_TARGET_X86_)
+ disasm = DIS::PdisNew(DIS::distX86);
+#endif
+
+ return disasm;
+}
+
+#endif // USE_MSVCDIS
+
+//
+// Simple, quick-and-dirty disassembler. If NearDiffer::compareCodeSection finds that two code
+// streams differ, it will call this to dump the two differing code blocks to the log. The dump
+// is logged under the verbose logging level.
+//
+// The output format is in MSDIS's disassembly format.
+//
+// Arguments:
+// block - A pointer to the code block to diassemble.
+// blocksize - The size of the code block to disassemble.
+// originalAddr - The original base address of the code block.
+//
+void NearDiffer::DumpCodeBlock(unsigned char *block, ULONG blocksize, void *originalAddr)
+{
+#ifdef USE_MSVCDIS
+ DIS *disasm = GetMsVcDis();
+ size_t offset = 0;
+ std::string codeBlock;
+
+ while (offset < blocksize)
+ {
+ DIS::INSTRUCTION instr;
+ DIS::OPERAND ops[3];
+
+ size_t instrSize = disasm->CbDisassemble((DIS::ADDR)originalAddr + offset, (void *)(block + offset), 15);
+ if(instrSize==0)
+ {
+ LogWarning("Zero sized instruction");
+ break;
+ }
+ disasm->FDecode(&instr, ops, 3);
+
+ wchar_t instrMnemonicWide[64]; // I never know how much to allocate...
+ disasm->CchFormatInstr(instrMnemonicWide, 64);
+ char instrMnemonic[128];
+ size_t count;
+ wcstombs_s(&count, instrMnemonic, 128, instrMnemonicWide, 64);
+
+ const size_t minInstrBytes = 7;
+ size_t instrBytes = max(instrSize, minInstrBytes);
+ size_t buffSize = _snprintf(nullptr, 0, "%p %s\n", (void*)((size_t)originalAddr+offset), instrMnemonic) + 3 * instrBytes + 1;
+ char *buff = new char[buffSize];
+ int written = 0;
+ written += sprintf_s(buff, buffSize, "%p ", (void*)((size_t)originalAddr+offset));
+ for (size_t i = 0; i < instrBytes; i++)
+ {
+ if (i < instrSize)
+ {
+ written += sprintf_s(buff + written, buffSize - written, "%02X ", *(const uint8_t*)(block + offset + i));
+ }
+ else
+ {
+ written += sprintf_s(buff + written, buffSize - written, " ");
+ }
+ }
+ written += sprintf_s(buff + written, buffSize - written, "%s\n", instrMnemonic);
+ codeBlock += buff;
+ delete[] buff;
+ offset += instrSize;
+ }
+ LogVerbose("Code dump:\n%s", codeBlock.c_str());
+ delete disasm;
+#else // !USE_MSVCDIS
+ LogVerbose("No disassembler");
+#endif // !USE_MSVCDIS
+}
+
+//
+// Struct to capture the information required by offset comparator.
+//
+struct DiffData
+{
+ // Common Data
+ CompileResult *cr;
+
+ // Details of the first block
+ size_t blocksize1;
+ size_t datablock1;
+ size_t datablockSize1;
+ size_t originalBlock1;
+ size_t originalDataBlock1;
+ size_t otherCodeBlock1;
+ size_t otherCodeBlockSize1;
+
+ // Details of the second block
+ size_t blocksize2;
+ size_t datablock2;
+ size_t datablockSize2;
+ size_t originalBlock2;
+ size_t originalDataBlock2;
+ size_t otherCodeBlock2;
+ size_t otherCodeBlockSize2;
+};
+
+//
+// NearDiff Offset Comparator.
+// Determine whether two syntactically different constants are
+// semantically equivalent, using certain heuristics.
+//
+bool NearDiffer::compareOffsets(const void *payload,
+ size_t blockOffset,
+ size_t instrLen,
+ uint64_t offset1,
+ uint64_t offset2)
+{
+ // The trivial case
+ if (offset1 == offset2)
+ {
+ return true;
+ }
+
+ const DiffData *data = (const DiffData *)payload;
+ size_t ip1 = data->originalBlock1 + blockOffset;
+ size_t ip2 = data->originalBlock2 + blockOffset;
+ size_t ipRelOffset1 = ip1 + instrLen + (size_t)offset1;
+ size_t ipRelOffset2 = ip2 + instrLen + (size_t)offset2;
+
+ // Case where we have a call into flat address -- the most common case.
+ size_t gOffset1 = ipRelOffset1;
+ size_t gOffset2 = ipRelOffset2;
+ if ((DWORD)gOffset1 == (DWORD)gOffset2) //make sure the lower 32bits match (best we can do in the current replay form)
+ return true;
+
+ //Case where we have an offset into the read only section (e.g. loading a float value)
+ size_t roOffset1a = (size_t)offset1 - data->originalDataBlock1;
+ size_t roOffset2a = (size_t)offset2 - data->originalDataBlock2;
+ if ((roOffset1a == roOffset2a) && (roOffset1a < data->datablockSize1)) //Confirm its an offset that fits inside our RoRegion
+ return true;
+
+ // This case is written to catch IP-relative offsets to the RO data-section
+ // For example:
+ //
+ size_t roOffset1b = ipRelOffset1 - data->originalDataBlock1;
+ size_t roOffset2b = ipRelOffset2 - data->originalDataBlock2;
+ if ((roOffset1b == roOffset2b) && (roOffset1b < data->datablockSize1)) //Confirm its an offset that fits inside our RoRegion
+ return true;
+
+ //Case where we push an address to our own code section.
+ size_t gOffset1a = (size_t)offset1 - data->originalBlock1;
+ size_t gOffset2a = (size_t)offset2 - data->originalBlock2;
+ if ((gOffset1a == gOffset2a) && (gOffset1a < data->blocksize1)) //Confirm its in our code region
+ return true;
+
+ //Case where we push an address in the other codeblock.
+ size_t gOffset1b = (size_t)offset1 - data->otherCodeBlock1;
+ size_t gOffset2b = (size_t)offset2 - data->otherCodeBlock2;
+ if ((gOffset1b == gOffset2b) && (gOffset1b < data->otherCodeBlockSize1)) //Confirm it's in the other code region
+ return true;
+
+ //Case where we have an offset into the hot codeblock from the cold code block (why?)
+ size_t ocOffset1 = ipRelOffset1 - data->otherCodeBlock1;
+ size_t ocOffset2 = ipRelOffset2 - data->otherCodeBlock2;
+ if (ocOffset1 == ocOffset2) //Would be nice to check to see if it fits in the other code block
+ return true;
+
+ //VSD calling case.
+ size_t Offset1 = (ipRelOffset1 - 8);
+ if (data->cr->CallTargetTypes->GetIndex((DWORDLONG)Offset1) != (DWORD)-1)
+ {
+ // This logging is too noisy, so disable it.
+ //LogVerbose("Found VSD callsite, did softer compare than ideal");
+ return true;
+ }
+
+ //x86 VSD calling cases.
+ size_t Offset1b = (size_t)offset1 - 4;
+ size_t Offset2b = (size_t)offset2;
+ if (data->cr->CallTargetTypes->GetIndex((DWORDLONG)Offset1b) != (DWORD)-1)
+ {
+ // This logging is too noisy, so disable it.
+ //LogVerbose("Found VSD callsite, did softer compare than ideal");
+ return true;
+ }
+ if (data->cr->CallTargetTypes->GetIndex((DWORDLONG)Offset2b) != (DWORD)-1)
+ {
+ // This logging is too noisy, so disable it.
+ //LogVerbose("Found VSD callsite, did softer compare than ideal");
+ return true;
+ }
+
+ //Case might be a field address that we handed out to handle inlined values being loaded into
+ //a register as an immediate value (and where the address is encoded as an indirect immediate load)
+ size_t realTargetAddr = (size_t)data->cr->searchAddressMap((void*)gOffset2);
+ if (realTargetAddr == gOffset1)
+ return true;
+
+ //Case might be a field address that we handed out to handle inlined values being loaded into
+ //a register as an immediate value (and where the address is encoded and loaded by immediate into a register)
+ realTargetAddr = (size_t)data->cr->searchAddressMap((void*)offset2);
+ if (realTargetAddr == offset1)
+ return true;
+ if (realTargetAddr == 0x424242)//this offset matches what we got back from a getTailCallCopyArgsThunk
+ return true;
+
+ realTargetAddr = (size_t)data->cr->searchAddressMap((void*)(gOffset2));
+ if (realTargetAddr != -1) //we know this was passed out as a bbloc
+ return true;
+
+ return false;
+}
+
+//
+// Compares two code sections for syntactic equality. This is the core of the asm diffing logic.
+//
+// This mostly relies on MSDIS's decoded representation of an instruction to compare for equality.
+// That is, using MSDIS's internal IR, this goes through the code stream and compares, instruction
+// by instruction, op code and operand values for equality.
+//
+// Obviously, just blindly comparing operand values will raise a lot of false alarms. In order to
+// compensate for phenomena like literal pointer addresses in the code stream changing, this applies
+// some heuristics on mismatching operand values to try to normalize them a little bit. Essentially,
+// if operand values don't match, they are re-interpreted as various relative deltas from known base
+// addresses. For example, a common case is a pointer into the read-only data section. One of the
+// heuristics subtracts both operand values from the base address of the read-only data section and
+// checks to see if they are the same distance away from their respective read-only base addresses.
+//
+// Notes:
+// - The core syntactic comparison is platform agnostic; we compare op codes and operand values
+// using MSDIS's architecture-independent IR (i.e. the data structures defined in msvcdis.h).
+// Only the disassembler instance itself is initialized differently based on the target arch-
+// itecture.
+// - That being said, the heuristics themselves are not guaranteed to be platform agnostic. For
+// instance, there is a case that applies only to x86 VSD calls. When porting the near differ
+// to new platforms, these special cases should be examined and ported with care.
+//
+// Arguments:
+// mc - The method context of the method to diff. Unused.
+// cr1 - The first compile result to compare. Unused.
+// cr2 - The second compile result to compare. Unused.
+// block1 - A pointer to the first code block to diassemble.
+// blocksize1 - The size of the first code block to compare.
+// datablock1 - A pointer to the first read-only data block to compare. Unused.
+// datablockSize1 - The size of the first read-only data block to compare.
+// originalBlock1 - The original base address of the first code block.
+// originalDataBlock1 - The original base address of the first read-only data block.
+// otherCodeBlock1 - The original base address of the first cold code block. Note that this is
+// just an address; we don't need the cold code buffer.
+// otherCodeBlockSize1- The size of the first cold code block.
+// block2 - A pointer to the second code block to diassemble.
+// blocksize2 - The size of the second code block to compare.
+// datablock2 - A pointer to the second read-only data block to compare.
+// datablockSize2 - The size of the second read-only data block to compare.
+// originalBlock2 - The original base address of the second code block.
+// originalDataBlock2 - The original base address of the second read-only data block.
+// otherCodeBlock2 - The original base address of the second cold code block. Note that this is
+// just an address; we don't need the cold code buffer.
+// otherCodeBlockSize2- The size of the second cold code block.
+//
+// Return Value:
+// True if the code sections are syntactically identical; false otherwise.
+//
+
+bool NearDiffer::compareCodeSection(
+ MethodContext *mc,
+ CompileResult *cr1,
+ CompileResult *cr2,
+ unsigned char *block1,
+ ULONG blocksize1,
+ unsigned char *datablock1,
+ ULONG datablockSize1,
+ void *originalBlock1,
+ void *originalDataBlock1,
+ void *otherCodeBlock1,
+ ULONG otherCodeBlockSize1,
+ unsigned char *block2,
+ ULONG blocksize2,
+ unsigned char *datablock2,
+ ULONG datablockSize2,
+ void *originalBlock2,
+ void *originalDataBlock2,
+ void *otherCodeBlock2,
+ ULONG otherCodeBlockSize2)
+{
+ DiffData data =
+ {
+ cr2,
+
+ // Details of the first block
+ (size_t)blocksize1,
+ (size_t)datablock1,
+ (size_t)datablockSize1,
+ (size_t)originalBlock1,
+ (size_t)originalDataBlock1,
+ (size_t)otherCodeBlock1,
+ (size_t)otherCodeBlockSize1,
+
+ // Details of the second block
+ (size_t)blocksize2,
+ (size_t)datablock2,
+ (size_t)datablockSize2,
+ (size_t)originalBlock2,
+ (size_t)originalDataBlock2,
+ (size_t)otherCodeBlock2,
+ (size_t)otherCodeBlockSize2
+ };
+
+#ifdef USE_COREDISTOOLS
+ if (UseCoreDisTools)
+ {
+ bool areSame = NearDiffCodeBlocks(corAsmDiff, &data,
+ (const uint8_t *)originalBlock1, block1, blocksize1,
+ (const uint8_t *)originalBlock2, block2, blocksize2);
+
+ if (!areSame)
+ {
+ DumpDiffBlocks(corAsmDiff, (const uint8_t *) originalBlock1,
+ block1, blocksize1, (const uint8_t *) originalBlock2,
+ block2, blocksize2);
+ }
+
+ return areSame;
+ }
+#endif // USE_COREDISTOOLS
+
+#ifdef USE_MSVCDIS
+ bool haveSeenRet = false;
+ DIS *disasm_1 = GetMsVcDis();
+ DIS *disasm_2 = GetMsVcDis();
+
+ size_t offset = 0;
+
+ if (blocksize1 != blocksize2)
+ {
+ LogVerbose("Code sizes don't match %u != %u", blocksize1, blocksize2);
+ goto DumpDetails;
+ }
+
+ while (offset < blocksize1)
+ {
+ DIS::INSTRUCTION instr_1;
+ DIS::INSTRUCTION instr_2;
+ const int MaxOperandCount = 5;
+ DIS::OPERAND ops_1[MaxOperandCount];
+ DIS::OPERAND ops_2[MaxOperandCount];
+
+ // Zero out the locals, just in case.
+ memset(&instr_1, 0, sizeof(instr_1));
+ memset(&instr_2, 0, sizeof(instr_2));
+ memset(&ops_1, 0, sizeof(ops_1));
+ memset(&ops_2, 0, sizeof(ops_2));
+
+ size_t instrSize_1 = disasm_1->CbDisassemble((DIS::ADDR)originalBlock1 + offset, (void *)(block1 + offset), 15);
+ size_t instrSize_2 = disasm_2->CbDisassemble((DIS::ADDR)originalBlock2 + offset, (void *)(block2 + offset), 15);
+
+ if (instrSize_1 != instrSize_2)
+ {
+ LogVerbose("Different instruction sizes %llu %llu", instrSize_1, instrSize_2);
+ goto DumpDetails;
+ }
+ if (instrSize_1 == 0)
+ {
+ if (haveSeenRet)
+ {
+ // This logging is pretty noisy, so disable it.
+ //LogVerbose("instruction size of zero after seeing a ret (soft issue?).");
+ break;
+ }
+ LogWarning("instruction size of zero.");
+ goto DumpDetails;
+ }
+
+ bool FDecodeError = false;
+ if (!disasm_1->FDecode(&instr_1, ops_1, MaxOperandCount))
+ {
+ LogWarning("FDecode of instr_1 returned false.");
+ FDecodeError = true;
+ }
+ if (!disasm_2->FDecode(&instr_2, ops_2, MaxOperandCount))
+ {
+ LogWarning("FDecode of instr_2 returned false.");
+ FDecodeError = true;
+ }
+
+ wchar_t instrMnemonic_1[64]; // I never know how much to allocate...
+ disasm_1->CchFormatInstr(instrMnemonic_1, 64);
+ wchar_t instrMnemonic_2[64]; // I never know how much to allocate...
+ disasm_2->CchFormatInstr(instrMnemonic_2, 64);
+ if (wcscmp(instrMnemonic_1, L"ret") == 0)
+ haveSeenRet = true;
+ if (wcscmp(instrMnemonic_1, L"rep ret") == 0)
+ haveSeenRet = true;
+
+ // First, check to see if these instructions are actually identical.
+ // This is done 1) to avoid the detailed comparison of the fields of instr_1
+ // and instr_2 if they are identical, and 2) because in the event that
+ // there are bugs or unimplemented instructions in FDecode, we don't want
+ // to count them as diffs if they are bitwise identical.
+
+ if (memcmp((block1 + offset), (block2 + offset), instrSize_1) != 0)
+ {
+ if (FDecodeError)
+ {
+ LogWarning("FDecode returned false.");
+ goto DumpDetails;
+ }
+
+ if (instr_1.opa != instr_2.opa)
+ {
+ LogVerbose("different opa %d %d", instr_1.opa, instr_2.opa);
+ goto DumpDetails;
+ }
+ if (instr_1.coperand != instr_2.coperand)
+ {
+ LogVerbose("different coperand %u %u", (unsigned int)instr_1.coperand, (unsigned int)instr_2.coperand);
+ goto DumpDetails;
+ }
+ if (instr_1.dwModifiers != instr_2.dwModifiers)
+ {
+ LogVerbose("different dwModifiers %u %u", instr_1.dwModifiers, instr_2.dwModifiers);
+ goto DumpDetails;
+ }
+
+ for (size_t i = 0; i < instr_1.coperand; i++)
+ {
+ if (ops_1[i].cb != ops_2[i].cb)
+ {
+ LogVerbose("different cb %llu %llu", ops_1[i].cb, ops_2[i].cb);
+ goto DumpDetails;
+ }
+ if (ops_1[i].imcls != ops_2[i].imcls)
+ {
+ LogVerbose("different imcls %d %d", ops_1[i].imcls, ops_2[i].imcls);
+ goto DumpDetails;
+ }
+ if (ops_1[i].opcls != ops_2[i].opcls)
+ {
+ LogVerbose("different opcls %d %d", ops_1[i].opcls, ops_2[i].opcls);
+ goto DumpDetails;
+ }
+ if (ops_1[i].rega1 != ops_2[i].rega1)
+ {
+ LogVerbose("different rega1 %d %d", ops_1[i].rega1, ops_2[i].rega1);
+ goto DumpDetails;
+ }
+ if (ops_1[i].rega2 != ops_2[i].rega2)
+ {
+ LogVerbose("different rega2 %d %d", ops_1[i].rega2, ops_2[i].rega2);
+ goto DumpDetails;
+ }
+ if (ops_1[i].rega3 != ops_2[i].rega3)
+ {
+ LogVerbose("different rega3 %d %d", ops_1[i].rega3, ops_2[i].rega3);
+ goto DumpDetails;
+ }
+ if (ops_1[i].wScale != ops_2[i].wScale)
+ {
+ LogVerbose("different wScale %u %u", ops_1[i].wScale, ops_2[i].wScale);
+ goto DumpDetails;
+ }
+
+ //
+ // These are special.. we can often reason out exactly why these values
+ // are different using heuristics.
+ //
+ // Why is Instruction size passed as zero?
+ // Ans: Because the implementation of areOffsetsEquivalent() uses
+ // the instruction size to compute absolute offsets in the case of
+ // PC-relative addressing, and MSVCDis already reports the
+ // absolute offsets! For example:
+ // 0F 2E 05 67 00 9A FD ucomiss xmm0, dword ptr[FFFFFFFFFD9A006Eh]
+ //
+
+ if (compareOffsets(&data, offset, 0, ops_1[i].dwl, ops_2[i].dwl))
+ {
+ continue;
+ }
+ else
+ {
+ size_t gOffset1 = (size_t)originalBlock1 + offset + (size_t)ops_1[i].dwl;
+ size_t gOffset2 = (size_t)originalBlock2 + offset + (size_t)ops_2[i].dwl;
+
+ LogVerbose("operand %d dwl is different", i);
+#ifdef _TARGET_AMD64_
+ LogVerbose("gOffset1 %016llX", gOffset1);
+ LogVerbose("gOffset2 %016llX", gOffset2);
+ LogVerbose("gOffset1 - gOffset2 %016llX", gOffset1 - gOffset2);
+#elif defined(_TARGET_X86_)
+ LogVerbose("gOffset1 %08X", gOffset1);
+ LogVerbose("gOffset2 %08X", gOffset2);
+ LogVerbose("gOffset1 - gOffset2 %08X", gOffset1 - gOffset2);
+#endif
+ LogVerbose("dwl1 %016llX", ops_1[i].dwl);
+ LogVerbose("dwl2 %016llX", ops_2[i].dwl);
+ goto DumpDetails;
+ }
+ }
+ }
+ offset += instrSize_1;
+ }
+ delete disasm_1;
+ delete disasm_2;
+ return true;
+
+DumpDetails:
+ LogVerbose("block1 %p", block1);
+ LogVerbose("block2 %p", block2);
+ LogVerbose("originalBlock1 [%p,%p)", originalBlock1, (const uint8_t *)originalBlock1 + blocksize1);
+ LogVerbose("originalBlock2 [%p,%p)", originalBlock2, (const uint8_t *)originalBlock2 + blocksize2);
+ LogVerbose("blocksize1 %08X", blocksize1);
+ LogVerbose("blocksize2 %08X", blocksize2);
+ LogVerbose("dataBlock1 [%p,%p)", originalDataBlock1, (const uint8_t *)originalDataBlock1 + datablockSize1);
+ LogVerbose("dataBlock2 [%p,%p)", originalDataBlock2, (const uint8_t *)originalDataBlock2 + datablockSize2);
+ LogVerbose("datablockSize1 %08X", datablockSize1);
+ LogVerbose("datablockSize2 %08X", datablockSize2);
+ LogVerbose("otherCodeBlock1 [%p,%p)", otherCodeBlock1, (const uint8_t *)otherCodeBlock1 + otherCodeBlockSize1);
+ LogVerbose("otherCodeBlock2 [%p,%p)", otherCodeBlock2, (const uint8_t *)otherCodeBlock2 + otherCodeBlockSize2);
+ LogVerbose("otherCodeBlockSize1 %08X", otherCodeBlockSize1);
+ LogVerbose("otherCodeBlockSize2 %08X", otherCodeBlockSize2);
+
+#ifdef _TARGET_AMD64_
+ LogVerbose("offset %016llX", offset);
+ LogVerbose("addr1 %016llX", (size_t)originalBlock1 + offset);
+ LogVerbose("addr2 %016llX", (size_t)originalBlock2 + offset);
+#elif defined(_TARGET_X86_)
+ LogVerbose("offset %08X", offset);
+ LogVerbose("addr1 %08X", (size_t)originalBlock1 + offset);
+ LogVerbose("addr2 %08X", (size_t)originalBlock2 + offset);
+#endif
+
+ LogVerbose("Block1:");
+ DumpCodeBlock(block1, blocksize1, originalBlock1);
+ LogVerbose("Block2:");
+ DumpCodeBlock(block2, blocksize2, originalBlock2);
+
+ if (disasm_1 != nullptr)
+ delete disasm_1;
+ if (disasm_2 != nullptr)
+ delete disasm_2;
+ return false;
+#else // !USE_MSVCDIS
+ return false; // No disassembler; assume there are differences
+#endif // !USE_MSVCDIS
+}
+
+//
+// Compares two read-only data sections for equality.
+//
+// Arguments:
+// mc - The method context of the method to diff.
+// cr1 - The first compile result to compare.
+// cr2 - The second compile result to compare.
+// block1 - A pointer to the first code block to diassemble.
+// blocksize1 - The size of the first code block to compare.
+// originalDataBlock1 - The original base address of the first read-only data block.
+// block2 - A pointer to the second code block to diassemble.
+// blocksize2 - The size of the second code block to compare.
+// originalDataBlock2 - The original base address of the second read-only data block.
+//
+// Return Value:
+// True if the read-only data sections are identical; false otherwise.
+//
+bool NearDiffer::compareReadOnlyDataBlock(MethodContext *mc, CompileResult *cr1, CompileResult *cr2,
+ unsigned char *block1, ULONG blocksize1, void *originalDataBlock1,
+ unsigned char *block2, ULONG blocksize2, void *originalDataBlock2)
+{
+ //no rodata
+ if(blocksize1==0 && blocksize2==0)
+ return true;
+
+ if(blocksize1!=blocksize2)
+ {
+ LogVerbose("compareReadOnlyDataBlock found non-matching sizes %u %u", blocksize1, blocksize2);
+ return false;
+ }
+
+ //TODO-Cleanup: The values on the datablock seem to wobble. Need further investigation to evaluate a good near comparison for these
+ return true;
+}
+
+//
+// Compares two EH info blocks for equality.
+//
+// Arguments:
+// mc - The method context of the method to diff.
+// cr1 - The first compile result to compare.
+// cr2 - The second compile result to compare.
+//
+// Return Value:
+// True if the EH info blocks are identical; false otherwise.
+//
+bool NearDiffer::compareEHInfo(MethodContext *mc, CompileResult *cr1, CompileResult *cr2)
+{
+ ULONG cEHSize_1;
+ ULONG ehFlags_1;
+ ULONG tryOffset_1;
+ ULONG tryLength_1;
+ ULONG handlerOffset_1;
+ ULONG handlerLength_1;
+ ULONG classToken_1;
+
+ ULONG cEHSize_2;
+ ULONG ehFlags_2;
+ ULONG tryOffset_2;
+ ULONG tryLength_2;
+ ULONG handlerOffset_2;
+ ULONG handlerLength_2;
+ ULONG classToken_2;
+
+
+ cEHSize_1 = cr1->repSetEHcount();
+ cEHSize_2 = cr2->repSetEHcount();
+
+ //no exception
+ if(cEHSize_1==0 && cEHSize_2==0)
+ return true;
+
+ if(cEHSize_1!=cEHSize_2)
+ {
+ LogVerbose("compareEHInfo found non-matching sizes %u %u", cEHSize_1, cEHSize_2);
+ return false;
+ }
+
+ for(unsigned int i=0;i<cEHSize_1;i++)
+ {
+ cr1->repSetEHinfo(i, &ehFlags_1, &tryOffset_1, &tryLength_1, &handlerOffset_1, &handlerLength_1, &classToken_1);
+ cr2->repSetEHinfo(i, &ehFlags_2, &tryOffset_2, &tryLength_2, &handlerOffset_2, &handlerLength_2, &classToken_2);
+ if(ehFlags_1!=ehFlags_2)
+ {
+ LogVerbose("EH flags don't match %u != %u", ehFlags_1, ehFlags_2);
+ return false;
+ }
+ if((tryOffset_1!=tryOffset_2) || (tryLength_1!=tryLength_2))
+ {
+ LogVerbose("EH try information don't match, offset: %u %u, length: %u %u", tryOffset_1, tryOffset_2, tryLength_1, tryLength_2);
+ return false;
+ }
+ if((handlerOffset_1!=handlerOffset_2) || (handlerLength_1!=handlerLength_2))
+ {
+ LogVerbose("EH handler information don't match, offset: %u %u, length: %u %u", handlerOffset_1, handlerOffset_2, handlerLength_1, handlerLength_2);
+ return false;
+ }
+ if(classToken_1!=classToken_2)
+ {
+ LogVerbose("EH class tokens don't match %u!=%u", classToken_1, classToken_2);
+ return false;
+ }
+ }
+
+ return true;
+}
+
+//
+// Compares two GC info blocks for equality.
+//
+// Arguments:
+// mc - The method context of the method to diff.
+// cr1 - The first compile result to compare.
+// cr2 - The second compile result to compare.
+//
+// Return Value:
+// True if the GC info blocks are identical; false otherwise.
+//
+bool NearDiffer::compareGCInfo(MethodContext *mc, CompileResult *cr1, CompileResult *cr2)
+{
+ void *gcInfo1;
+ size_t gcInfo1Size;
+ void *gcInfo2;
+ size_t gcInfo2Size;
+
+ cr1->repAllocGCInfo(&gcInfo1Size, &gcInfo1);
+ cr2->repAllocGCInfo(&gcInfo2Size, &gcInfo2);
+
+ if (gcInfo1Size != gcInfo2Size)
+ {
+ LogVerbose("Reported GCInfo sizes don't match: %u != %u", (unsigned int)gcInfo1Size, (unsigned int)gcInfo2Size);
+ return false;
+ }
+
+ if (memcmp(gcInfo1, gcInfo2, gcInfo1Size) != 0)
+ {
+ LogVerbose("GCInfo doesn't match.");
+ return false;
+ }
+
+ return true;
+}
+
+//
+// Compares two sets of native var info for equality.
+//
+// Arguments:
+// mc - The method context of the method to diff.
+// cr1 - The first compile result to compare.
+// cr2 - The second compile result to compare.
+//
+// Return Value:
+// True if the native var info is identical; false otherwise.
+//
+bool NearDiffer::compareVars(MethodContext *mc, CompileResult *cr1, CompileResult *cr2)
+{
+ CORINFO_METHOD_HANDLE ftn_1;
+ ULONG32 cVars_1;
+ ICorDebugInfo::NativeVarInfo *vars_1;
+
+ CORINFO_METHOD_HANDLE ftn_2;
+ ULONG32 cVars_2;
+ ICorDebugInfo::NativeVarInfo *vars_2;
+
+ CORINFO_METHOD_INFO info;
+ unsigned flags = 0;
+ mc->repCompileMethod(&info, &flags);
+
+ bool set1 = cr1->repSetVars(&ftn_1, &cVars_1, &vars_1);
+ bool set2 = cr2->repSetVars(&ftn_2, &cVars_2, &vars_2);
+ if((set1==false)&&(set2==false))
+ return true; // we don't have boundaries for either of these.
+ if(((set1==true)&&(set2==false))||((set1==false)&&(set2==true)))
+ {
+ LogVerbose("missing matching vars sets");
+ return false;
+ }
+
+ //no vars
+ if(cVars_1==0 && cVars_2==0)
+ {
+ return true;
+ }
+
+ if(ftn_1!=ftn_2)
+ {
+ //We would like to find out this situation
+ __debugbreak();
+ LogVerbose("compareVars found non-matching CORINFO_METHOD_HANDLE %p %p", ftn_1, ftn_2);
+ return false;
+ }
+ if(ftn_1!=info.ftn)
+ {
+ LogVerbose("compareVars found issues with the CORINFO_METHOD_HANDLE %p %p", ftn_1, info.ftn);
+ return false;
+ }
+
+ if(cVars_1!=cVars_2)
+ {
+ LogVerbose("compareVars found non-matching var count %u %u", cVars_1, cVars_2);
+ return false;
+ }
+
+ //TODO-Cleanup: The values on the NativeVarInfo array seem to wobble. Need further investigation to evaluate a good near comparison for these
+ //for(unsigned int i=0;i<cVars_1;i++)
+ //{
+ // if(vars_1[i].startOffset!=vars_2[i].startOffset)
+ // {
+ // LogVerbose("compareVars found non-matching startOffsets %u %u for var: %u", vars_1[i].startOffset, vars_2[i].startOffset, i);
+ // return false;
+ // }
+ //}
+
+ return true;
+}
+
+//
+// Compares two sets of native offset mappings for equality.
+//
+// Arguments:
+// mc - The method context of the method to diff.
+// cr1 - The first compile result to compare.
+// cr2 - The second compile result to compare.
+//
+// Return Value:
+// True if the native offset mappings are identical; false otherwise.
+//
+bool NearDiffer::compareBoundaries(MethodContext *mc, CompileResult *cr1, CompileResult *cr2)
+{
+ CORINFO_METHOD_HANDLE ftn_1;
+ ULONG32 cMap_1;
+ ICorDebugInfo::OffsetMapping *map_1;
+
+ CORINFO_METHOD_HANDLE ftn_2;
+ ULONG32 cMap_2;
+ ICorDebugInfo::OffsetMapping *map_2;
+
+ CORINFO_METHOD_INFO info;
+ unsigned flags = 0;
+ mc->repCompileMethod(&info, &flags);
+
+ bool set1 = cr1->repSetBoundaries(&ftn_1, &cMap_1, &map_1);
+ bool set2 = cr2->repSetBoundaries(&ftn_2, &cMap_2, &map_2);
+ if((set1==false)&&(set2==false))
+ return true; // we don't have boundaries for either of these.
+ if(((set1==true)&&(set2==false))||((set1==false)&&(set2==true)))
+ {
+ LogVerbose("missing matching boundary sets");
+ return false;
+ }
+
+ if(ftn_1!=ftn_2)
+ {
+ LogVerbose("compareBoundaries found non-matching CORINFO_METHOD_HANDLE %p %p", ftn_1, ftn_2);
+ return false;
+ }
+
+ //no maps
+ if(cMap_1==0 && cMap_2==0)
+ return true;
+
+ if(cMap_1!=cMap_2)
+ {
+ LogVerbose("compareBoundaries found non-matching var count %u %u", cMap_1, cMap_2);
+ return false;
+ }
+
+ for(unsigned int i=0;i<cMap_1;i++)
+ {
+ if(map_1[i].ilOffset!=map_2[i].ilOffset)
+ {
+ LogVerbose("compareBoundaries found non-matching ilOffset %u %u for map: %u", map_1[i].ilOffset, map_2[i].ilOffset, i);
+ return false;
+ }
+ if(map_1[i].nativeOffset!=map_2[i].nativeOffset)
+ {
+ LogVerbose("compareBoundaries found non-matching nativeOffset %u %u for map: %u", map_1[i].nativeOffset, map_2[i].nativeOffset, i);
+ return false;
+ }
+ if(map_1[i].source!=map_2[i].source)
+ {
+ LogVerbose("compareBoundaries found non-matching source %u %u for map: %u", (unsigned int)map_1[i].source, (unsigned int)map_2[i].source, i);
+ return false;
+ }
+ }
+
+
+ return true;
+}
+
+//
+// Compares two compiled versions of a method for equality. This is the main driver for the various
+// components of near diffing.
+//
+// Before starting the diffing process, this applies some fixups to the code stream based on relocations
+// recorded during compilation, using the original base address that was used when compiling the method.
+//
+// Arguments:
+// mc - The method context of the method to diff.
+// cr1 - The first compile result to compare.
+// cr2 - The second compile result to compare.
+//
+// Return Value:
+// True if the compile results are identical; false otherwise.
+//
+bool NearDiffer::compare(MethodContext *mc, CompileResult *cr1, CompileResult *cr2)
+{
+ ULONG hotCodeSize_1;
+ ULONG coldCodeSize_1;
+ ULONG roDataSize_1;
+ ULONG xcptnsCount_1;
+ CorJitAllocMemFlag flag_1;
+ unsigned char *hotCodeBlock_1;
+ unsigned char *coldCodeBlock_1;
+ unsigned char *roDataBlock_1;
+ void *orig_hotCodeBlock_1;
+ void *orig_coldCodeBlock_1;
+ void *orig_roDataBlock_1;
+
+ ULONG hotCodeSize_2;
+ ULONG coldCodeSize_2;
+ ULONG roDataSize_2;
+ ULONG xcptnsCount_2;
+ CorJitAllocMemFlag flag_2;
+ unsigned char *hotCodeBlock_2;
+ unsigned char *coldCodeBlock_2;
+ unsigned char *roDataBlock_2;
+ void *orig_hotCodeBlock_2;
+ void *orig_coldCodeBlock_2;
+ void *orig_roDataBlock_2;
+
+ cr1->repAllocMem(&hotCodeSize_1, &coldCodeSize_1, &roDataSize_1, &xcptnsCount_1, &flag_1,
+ &hotCodeBlock_1, &coldCodeBlock_1, &roDataBlock_1, &orig_hotCodeBlock_1, &orig_coldCodeBlock_1, &orig_roDataBlock_1);
+ cr2->repAllocMem(&hotCodeSize_2, &coldCodeSize_2, &roDataSize_2, &xcptnsCount_2, &flag_2,
+ &hotCodeBlock_2, &coldCodeBlock_2, &roDataBlock_2, &orig_hotCodeBlock_2, &orig_coldCodeBlock_2, &orig_roDataBlock_2);
+
+ LogDebug("HCS1 %d CCS1 %d RDS1 %d xcpnt1 %d flag1 %08X, HCB %p CCB %p RDB %p ohcb %p occb %p odb %p",
+ hotCodeSize_1, coldCodeSize_1, roDataSize_1, xcptnsCount_1, flag_1,
+ hotCodeBlock_1, coldCodeBlock_1, roDataBlock_1,
+ orig_hotCodeBlock_1, orig_coldCodeBlock_1, orig_roDataBlock_1);
+ LogDebug("HCS2 %d CCS2 %d RDS2 %d xcpnt2 %d flag2 %08X, HCB %p CCB %p RDB %p ohcb %p occb %p odb %p",
+ hotCodeSize_2, coldCodeSize_2, roDataSize_2, xcptnsCount_2, flag_2,
+ hotCodeBlock_2, coldCodeBlock_2, roDataBlock_2,
+ orig_hotCodeBlock_2, orig_coldCodeBlock_2, orig_roDataBlock_2);
+
+ cr1->applyRelocs(hotCodeBlock_1, hotCodeSize_1, orig_hotCodeBlock_1);
+ cr2->applyRelocs(hotCodeBlock_2, hotCodeSize_2, orig_hotCodeBlock_2);
+ cr1->applyRelocs(coldCodeBlock_1, coldCodeSize_1, orig_coldCodeBlock_1);
+ cr2->applyRelocs(coldCodeBlock_2, coldCodeSize_2, orig_coldCodeBlock_2);
+ cr1->applyRelocs(roDataBlock_1, roDataSize_1, orig_roDataBlock_1);
+ cr2->applyRelocs(roDataBlock_2, roDataSize_2, orig_roDataBlock_2);
+
+ if(!compareCodeSection(mc, cr1, cr2,
+ hotCodeBlock_1, hotCodeSize_1, roDataBlock_1, roDataSize_1, orig_hotCodeBlock_1, orig_roDataBlock_1, orig_coldCodeBlock_1, coldCodeSize_1,
+ hotCodeBlock_2, hotCodeSize_2, roDataBlock_2, roDataSize_2, orig_hotCodeBlock_2, orig_roDataBlock_2, orig_coldCodeBlock_2, coldCodeSize_2))
+ return false;
+
+ if(!compareCodeSection(mc, cr1, cr2,
+ coldCodeBlock_1, coldCodeSize_1, roDataBlock_1, roDataSize_1, orig_coldCodeBlock_1, orig_roDataBlock_1, orig_hotCodeBlock_1, hotCodeSize_1,
+ coldCodeBlock_2, coldCodeSize_2, roDataBlock_2, roDataSize_2, orig_coldCodeBlock_2, orig_roDataBlock_2, orig_hotCodeBlock_2, hotCodeSize_2))
+ return false;
+
+ if(!compareReadOnlyDataBlock(mc, cr1, cr2,
+ roDataBlock_1, roDataSize_1, orig_roDataBlock_1,
+ roDataBlock_2, roDataSize_2, orig_roDataBlock_2))
+ return false;
+
+ if(!compareEHInfo(mc, cr1, cr2))
+ return false;
+
+ if (!compareGCInfo(mc, cr1, cr2))
+ return false;
+
+ if (!compareVars(mc, cr1, cr2))
+ return false;
+
+ if(!compareBoundaries(mc, cr1, cr2))
+ return false;
+
+ return true;
+}