diff options
Diffstat (limited to 'src/ToolBox/superpmi/superpmi/neardiffer.cpp')
-rw-r--r-- | src/ToolBox/superpmi/superpmi/neardiffer.cpp | 1031 |
1 files changed, 1031 insertions, 0 deletions
diff --git a/src/ToolBox/superpmi/superpmi/neardiffer.cpp b/src/ToolBox/superpmi/superpmi/neardiffer.cpp new file mode 100644 index 0000000000..5b2e3b1b57 --- /dev/null +++ b/src/ToolBox/superpmi/superpmi/neardiffer.cpp @@ -0,0 +1,1031 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. +// + +//---------------------------------------------------------- +// nearDiffer.cpp - differ that handles code that is very similar +//---------------------------------------------------------- + +#include "standardpch.h" + +#ifdef USE_COREDISTOOLS +#include "coredistools.h" +#endif // USE_COREDISTOOLS + +#include "logging.h" +#include "neardiffer.h" + +#ifdef USE_COREDISTOOLS + +// +// Helper functions to print messages from CoreDisTools Library +// The file/linenumber information is from this helper itself, +// since we are only linking with the CoreDisTools library. +// +static void LogFromCoreDisToolsHelper(LogLevel level, const char *msg, va_list argList) +{ + Logger::LogVprintf(__func__, __FILE__, __LINE__, level, argList, msg); +} + +#define LOGGER(L) \ +static void Log##L(const char *msg, ...) \ +{\ + va_list argList; \ + va_start(argList, msg); \ + LogFromCoreDisToolsHelper (LOGLEVEL_##L, msg, argList); \ + va_end(argList); \ +} + +LOGGER(VERBOSE) +LOGGER(ERROR) +LOGGER(WARNING) + +const PrintControl CorPrinter= { LogERROR, LogWARNING, LogVERBOSE, LogVERBOSE }; + +#endif // USE_COREDISTOOLS + +// +// The NearDiff Disassembler Initialization +// +void NearDiffer::InitAsmDiff() +{ +#ifdef USE_COREDISTOOLS + if (UseCoreDisTools) + { + corAsmDiff = NewDiffer(Target_Host, &CorPrinter, NearDiffer::compareOffsets); + } +#endif // USE_COREDISTOOLS +} + +// +// The NearDiff destructor +// +NearDiffer::~NearDiffer() +{ +#ifdef USE_COREDISTOOLS + if (corAsmDiff != nullptr) + { + FinishDiff(corAsmDiff); + } +#endif // USE_COREDISTOOLS +} + +// At a high level, the near differ takes in a method context and two compile results, performs +// some simple fixups, and then compares the main artifacts of the compile result (i.e. generated +// code, GC info, EH info, debug info, etc.) for equality. In order to be fast, the fixups and +// definitions of "equality" are minimal; for example, the GC info check just does a simple memcmp. +// +// The entrypoint into the near differ is nearDiffer::compare; its doc comments will have more +// details on what it does. That function in turn fans out to various other components. For asm +// diffing, the main function of interest will be nearDiffer::compareCodeSection. +// +// Most of the diffing logic is architecture-independent, with the following exceptions: +// +// - The MSDIS instance must be created with knowledge of the architecture it is working with. +// - The heuristics to compare different literal operand values has some architecture-specific +// assumptions. +// - The code stream is fixed up using relocations recorded during compilation time. The logic +// for applying these should, in theory, be architecture independent, but depending on how +// the runtime implements this from platform to platform, there might be subtle differences here. +// + +#ifdef USE_MSVCDIS + +DIS* NearDiffer::GetMsVcDis() +{ + DIS *disasm; + +#ifdef _TARGET_AMD64_ + if ((TargetArchitecture != nullptr) && (0 == _stricmp(TargetArchitecture, "arm64"))) + { + disasm = DIS::PdisNew(DIS::distArm64); + } + else + { + disasm = DIS::PdisNew(DIS::distX8664); + } +#elif defined(_TARGET_X86_) + disasm = DIS::PdisNew(DIS::distX86); +#endif + + return disasm; +} + +#endif // USE_MSVCDIS + +// +// Simple, quick-and-dirty disassembler. If NearDiffer::compareCodeSection finds that two code +// streams differ, it will call this to dump the two differing code blocks to the log. The dump +// is logged under the verbose logging level. +// +// The output format is in MSDIS's disassembly format. +// +// Arguments: +// block - A pointer to the code block to diassemble. +// blocksize - The size of the code block to disassemble. +// originalAddr - The original base address of the code block. +// +void NearDiffer::DumpCodeBlock(unsigned char *block, ULONG blocksize, void *originalAddr) +{ +#ifdef USE_MSVCDIS + DIS *disasm = GetMsVcDis(); + size_t offset = 0; + std::string codeBlock; + + while (offset < blocksize) + { + DIS::INSTRUCTION instr; + DIS::OPERAND ops[3]; + + size_t instrSize = disasm->CbDisassemble((DIS::ADDR)originalAddr + offset, (void *)(block + offset), 15); + if(instrSize==0) + { + LogWarning("Zero sized instruction"); + break; + } + disasm->FDecode(&instr, ops, 3); + + wchar_t instrMnemonicWide[64]; // I never know how much to allocate... + disasm->CchFormatInstr(instrMnemonicWide, 64); + char instrMnemonic[128]; + size_t count; + wcstombs_s(&count, instrMnemonic, 128, instrMnemonicWide, 64); + + const size_t minInstrBytes = 7; + size_t instrBytes = max(instrSize, minInstrBytes); + size_t buffSize = _snprintf(nullptr, 0, "%p %s\n", (void*)((size_t)originalAddr+offset), instrMnemonic) + 3 * instrBytes + 1; + char *buff = new char[buffSize]; + int written = 0; + written += sprintf_s(buff, buffSize, "%p ", (void*)((size_t)originalAddr+offset)); + for (size_t i = 0; i < instrBytes; i++) + { + if (i < instrSize) + { + written += sprintf_s(buff + written, buffSize - written, "%02X ", *(const uint8_t*)(block + offset + i)); + } + else + { + written += sprintf_s(buff + written, buffSize - written, " "); + } + } + written += sprintf_s(buff + written, buffSize - written, "%s\n", instrMnemonic); + codeBlock += buff; + delete[] buff; + offset += instrSize; + } + LogVerbose("Code dump:\n%s", codeBlock.c_str()); + delete disasm; +#else // !USE_MSVCDIS + LogVerbose("No disassembler"); +#endif // !USE_MSVCDIS +} + +// +// Struct to capture the information required by offset comparator. +// +struct DiffData +{ + // Common Data + CompileResult *cr; + + // Details of the first block + size_t blocksize1; + size_t datablock1; + size_t datablockSize1; + size_t originalBlock1; + size_t originalDataBlock1; + size_t otherCodeBlock1; + size_t otherCodeBlockSize1; + + // Details of the second block + size_t blocksize2; + size_t datablock2; + size_t datablockSize2; + size_t originalBlock2; + size_t originalDataBlock2; + size_t otherCodeBlock2; + size_t otherCodeBlockSize2; +}; + +// +// NearDiff Offset Comparator. +// Determine whether two syntactically different constants are +// semantically equivalent, using certain heuristics. +// +bool NearDiffer::compareOffsets(const void *payload, + size_t blockOffset, + size_t instrLen, + uint64_t offset1, + uint64_t offset2) +{ + // The trivial case + if (offset1 == offset2) + { + return true; + } + + const DiffData *data = (const DiffData *)payload; + size_t ip1 = data->originalBlock1 + blockOffset; + size_t ip2 = data->originalBlock2 + blockOffset; + size_t ipRelOffset1 = ip1 + instrLen + (size_t)offset1; + size_t ipRelOffset2 = ip2 + instrLen + (size_t)offset2; + + // Case where we have a call into flat address -- the most common case. + size_t gOffset1 = ipRelOffset1; + size_t gOffset2 = ipRelOffset2; + if ((DWORD)gOffset1 == (DWORD)gOffset2) //make sure the lower 32bits match (best we can do in the current replay form) + return true; + + //Case where we have an offset into the read only section (e.g. loading a float value) + size_t roOffset1a = (size_t)offset1 - data->originalDataBlock1; + size_t roOffset2a = (size_t)offset2 - data->originalDataBlock2; + if ((roOffset1a == roOffset2a) && (roOffset1a < data->datablockSize1)) //Confirm its an offset that fits inside our RoRegion + return true; + + // This case is written to catch IP-relative offsets to the RO data-section + // For example: + // + size_t roOffset1b = ipRelOffset1 - data->originalDataBlock1; + size_t roOffset2b = ipRelOffset2 - data->originalDataBlock2; + if ((roOffset1b == roOffset2b) && (roOffset1b < data->datablockSize1)) //Confirm its an offset that fits inside our RoRegion + return true; + + //Case where we push an address to our own code section. + size_t gOffset1a = (size_t)offset1 - data->originalBlock1; + size_t gOffset2a = (size_t)offset2 - data->originalBlock2; + if ((gOffset1a == gOffset2a) && (gOffset1a < data->blocksize1)) //Confirm its in our code region + return true; + + //Case where we push an address in the other codeblock. + size_t gOffset1b = (size_t)offset1 - data->otherCodeBlock1; + size_t gOffset2b = (size_t)offset2 - data->otherCodeBlock2; + if ((gOffset1b == gOffset2b) && (gOffset1b < data->otherCodeBlockSize1)) //Confirm it's in the other code region + return true; + + //Case where we have an offset into the hot codeblock from the cold code block (why?) + size_t ocOffset1 = ipRelOffset1 - data->otherCodeBlock1; + size_t ocOffset2 = ipRelOffset2 - data->otherCodeBlock2; + if (ocOffset1 == ocOffset2) //Would be nice to check to see if it fits in the other code block + return true; + + //VSD calling case. + size_t Offset1 = (ipRelOffset1 - 8); + if (data->cr->CallTargetTypes->GetIndex((DWORDLONG)Offset1) != (DWORD)-1) + { + // This logging is too noisy, so disable it. + //LogVerbose("Found VSD callsite, did softer compare than ideal"); + return true; + } + + //x86 VSD calling cases. + size_t Offset1b = (size_t)offset1 - 4; + size_t Offset2b = (size_t)offset2; + if (data->cr->CallTargetTypes->GetIndex((DWORDLONG)Offset1b) != (DWORD)-1) + { + // This logging is too noisy, so disable it. + //LogVerbose("Found VSD callsite, did softer compare than ideal"); + return true; + } + if (data->cr->CallTargetTypes->GetIndex((DWORDLONG)Offset2b) != (DWORD)-1) + { + // This logging is too noisy, so disable it. + //LogVerbose("Found VSD callsite, did softer compare than ideal"); + return true; + } + + //Case might be a field address that we handed out to handle inlined values being loaded into + //a register as an immediate value (and where the address is encoded as an indirect immediate load) + size_t realTargetAddr = (size_t)data->cr->searchAddressMap((void*)gOffset2); + if (realTargetAddr == gOffset1) + return true; + + //Case might be a field address that we handed out to handle inlined values being loaded into + //a register as an immediate value (and where the address is encoded and loaded by immediate into a register) + realTargetAddr = (size_t)data->cr->searchAddressMap((void*)offset2); + if (realTargetAddr == offset1) + return true; + if (realTargetAddr == 0x424242)//this offset matches what we got back from a getTailCallCopyArgsThunk + return true; + + realTargetAddr = (size_t)data->cr->searchAddressMap((void*)(gOffset2)); + if (realTargetAddr != -1) //we know this was passed out as a bbloc + return true; + + return false; +} + +// +// Compares two code sections for syntactic equality. This is the core of the asm diffing logic. +// +// This mostly relies on MSDIS's decoded representation of an instruction to compare for equality. +// That is, using MSDIS's internal IR, this goes through the code stream and compares, instruction +// by instruction, op code and operand values for equality. +// +// Obviously, just blindly comparing operand values will raise a lot of false alarms. In order to +// compensate for phenomena like literal pointer addresses in the code stream changing, this applies +// some heuristics on mismatching operand values to try to normalize them a little bit. Essentially, +// if operand values don't match, they are re-interpreted as various relative deltas from known base +// addresses. For example, a common case is a pointer into the read-only data section. One of the +// heuristics subtracts both operand values from the base address of the read-only data section and +// checks to see if they are the same distance away from their respective read-only base addresses. +// +// Notes: +// - The core syntactic comparison is platform agnostic; we compare op codes and operand values +// using MSDIS's architecture-independent IR (i.e. the data structures defined in msvcdis.h). +// Only the disassembler instance itself is initialized differently based on the target arch- +// itecture. +// - That being said, the heuristics themselves are not guaranteed to be platform agnostic. For +// instance, there is a case that applies only to x86 VSD calls. When porting the near differ +// to new platforms, these special cases should be examined and ported with care. +// +// Arguments: +// mc - The method context of the method to diff. Unused. +// cr1 - The first compile result to compare. Unused. +// cr2 - The second compile result to compare. Unused. +// block1 - A pointer to the first code block to diassemble. +// blocksize1 - The size of the first code block to compare. +// datablock1 - A pointer to the first read-only data block to compare. Unused. +// datablockSize1 - The size of the first read-only data block to compare. +// originalBlock1 - The original base address of the first code block. +// originalDataBlock1 - The original base address of the first read-only data block. +// otherCodeBlock1 - The original base address of the first cold code block. Note that this is +// just an address; we don't need the cold code buffer. +// otherCodeBlockSize1- The size of the first cold code block. +// block2 - A pointer to the second code block to diassemble. +// blocksize2 - The size of the second code block to compare. +// datablock2 - A pointer to the second read-only data block to compare. +// datablockSize2 - The size of the second read-only data block to compare. +// originalBlock2 - The original base address of the second code block. +// originalDataBlock2 - The original base address of the second read-only data block. +// otherCodeBlock2 - The original base address of the second cold code block. Note that this is +// just an address; we don't need the cold code buffer. +// otherCodeBlockSize2- The size of the second cold code block. +// +// Return Value: +// True if the code sections are syntactically identical; false otherwise. +// + +bool NearDiffer::compareCodeSection( + MethodContext *mc, + CompileResult *cr1, + CompileResult *cr2, + unsigned char *block1, + ULONG blocksize1, + unsigned char *datablock1, + ULONG datablockSize1, + void *originalBlock1, + void *originalDataBlock1, + void *otherCodeBlock1, + ULONG otherCodeBlockSize1, + unsigned char *block2, + ULONG blocksize2, + unsigned char *datablock2, + ULONG datablockSize2, + void *originalBlock2, + void *originalDataBlock2, + void *otherCodeBlock2, + ULONG otherCodeBlockSize2) +{ + DiffData data = + { + cr2, + + // Details of the first block + (size_t)blocksize1, + (size_t)datablock1, + (size_t)datablockSize1, + (size_t)originalBlock1, + (size_t)originalDataBlock1, + (size_t)otherCodeBlock1, + (size_t)otherCodeBlockSize1, + + // Details of the second block + (size_t)blocksize2, + (size_t)datablock2, + (size_t)datablockSize2, + (size_t)originalBlock2, + (size_t)originalDataBlock2, + (size_t)otherCodeBlock2, + (size_t)otherCodeBlockSize2 + }; + +#ifdef USE_COREDISTOOLS + if (UseCoreDisTools) + { + bool areSame = NearDiffCodeBlocks(corAsmDiff, &data, + (const uint8_t *)originalBlock1, block1, blocksize1, + (const uint8_t *)originalBlock2, block2, blocksize2); + + if (!areSame) + { + DumpDiffBlocks(corAsmDiff, (const uint8_t *) originalBlock1, + block1, blocksize1, (const uint8_t *) originalBlock2, + block2, blocksize2); + } + + return areSame; + } +#endif // USE_COREDISTOOLS + +#ifdef USE_MSVCDIS + bool haveSeenRet = false; + DIS *disasm_1 = GetMsVcDis(); + DIS *disasm_2 = GetMsVcDis(); + + size_t offset = 0; + + if (blocksize1 != blocksize2) + { + LogVerbose("Code sizes don't match %u != %u", blocksize1, blocksize2); + goto DumpDetails; + } + + while (offset < blocksize1) + { + DIS::INSTRUCTION instr_1; + DIS::INSTRUCTION instr_2; + const int MaxOperandCount = 5; + DIS::OPERAND ops_1[MaxOperandCount]; + DIS::OPERAND ops_2[MaxOperandCount]; + + // Zero out the locals, just in case. + memset(&instr_1, 0, sizeof(instr_1)); + memset(&instr_2, 0, sizeof(instr_2)); + memset(&ops_1, 0, sizeof(ops_1)); + memset(&ops_2, 0, sizeof(ops_2)); + + size_t instrSize_1 = disasm_1->CbDisassemble((DIS::ADDR)originalBlock1 + offset, (void *)(block1 + offset), 15); + size_t instrSize_2 = disasm_2->CbDisassemble((DIS::ADDR)originalBlock2 + offset, (void *)(block2 + offset), 15); + + if (instrSize_1 != instrSize_2) + { + LogVerbose("Different instruction sizes %llu %llu", instrSize_1, instrSize_2); + goto DumpDetails; + } + if (instrSize_1 == 0) + { + if (haveSeenRet) + { + // This logging is pretty noisy, so disable it. + //LogVerbose("instruction size of zero after seeing a ret (soft issue?)."); + break; + } + LogWarning("instruction size of zero."); + goto DumpDetails; + } + + bool FDecodeError = false; + if (!disasm_1->FDecode(&instr_1, ops_1, MaxOperandCount)) + { + LogWarning("FDecode of instr_1 returned false."); + FDecodeError = true; + } + if (!disasm_2->FDecode(&instr_2, ops_2, MaxOperandCount)) + { + LogWarning("FDecode of instr_2 returned false."); + FDecodeError = true; + } + + wchar_t instrMnemonic_1[64]; // I never know how much to allocate... + disasm_1->CchFormatInstr(instrMnemonic_1, 64); + wchar_t instrMnemonic_2[64]; // I never know how much to allocate... + disasm_2->CchFormatInstr(instrMnemonic_2, 64); + if (wcscmp(instrMnemonic_1, L"ret") == 0) + haveSeenRet = true; + if (wcscmp(instrMnemonic_1, L"rep ret") == 0) + haveSeenRet = true; + + // First, check to see if these instructions are actually identical. + // This is done 1) to avoid the detailed comparison of the fields of instr_1 + // and instr_2 if they are identical, and 2) because in the event that + // there are bugs or unimplemented instructions in FDecode, we don't want + // to count them as diffs if they are bitwise identical. + + if (memcmp((block1 + offset), (block2 + offset), instrSize_1) != 0) + { + if (FDecodeError) + { + LogWarning("FDecode returned false."); + goto DumpDetails; + } + + if (instr_1.opa != instr_2.opa) + { + LogVerbose("different opa %d %d", instr_1.opa, instr_2.opa); + goto DumpDetails; + } + if (instr_1.coperand != instr_2.coperand) + { + LogVerbose("different coperand %u %u", (unsigned int)instr_1.coperand, (unsigned int)instr_2.coperand); + goto DumpDetails; + } + if (instr_1.dwModifiers != instr_2.dwModifiers) + { + LogVerbose("different dwModifiers %u %u", instr_1.dwModifiers, instr_2.dwModifiers); + goto DumpDetails; + } + + for (size_t i = 0; i < instr_1.coperand; i++) + { + if (ops_1[i].cb != ops_2[i].cb) + { + LogVerbose("different cb %llu %llu", ops_1[i].cb, ops_2[i].cb); + goto DumpDetails; + } + if (ops_1[i].imcls != ops_2[i].imcls) + { + LogVerbose("different imcls %d %d", ops_1[i].imcls, ops_2[i].imcls); + goto DumpDetails; + } + if (ops_1[i].opcls != ops_2[i].opcls) + { + LogVerbose("different opcls %d %d", ops_1[i].opcls, ops_2[i].opcls); + goto DumpDetails; + } + if (ops_1[i].rega1 != ops_2[i].rega1) + { + LogVerbose("different rega1 %d %d", ops_1[i].rega1, ops_2[i].rega1); + goto DumpDetails; + } + if (ops_1[i].rega2 != ops_2[i].rega2) + { + LogVerbose("different rega2 %d %d", ops_1[i].rega2, ops_2[i].rega2); + goto DumpDetails; + } + if (ops_1[i].rega3 != ops_2[i].rega3) + { + LogVerbose("different rega3 %d %d", ops_1[i].rega3, ops_2[i].rega3); + goto DumpDetails; + } + if (ops_1[i].wScale != ops_2[i].wScale) + { + LogVerbose("different wScale %u %u", ops_1[i].wScale, ops_2[i].wScale); + goto DumpDetails; + } + + // + // These are special.. we can often reason out exactly why these values + // are different using heuristics. + // + // Why is Instruction size passed as zero? + // Ans: Because the implementation of areOffsetsEquivalent() uses + // the instruction size to compute absolute offsets in the case of + // PC-relative addressing, and MSVCDis already reports the + // absolute offsets! For example: + // 0F 2E 05 67 00 9A FD ucomiss xmm0, dword ptr[FFFFFFFFFD9A006Eh] + // + + if (compareOffsets(&data, offset, 0, ops_1[i].dwl, ops_2[i].dwl)) + { + continue; + } + else + { + size_t gOffset1 = (size_t)originalBlock1 + offset + (size_t)ops_1[i].dwl; + size_t gOffset2 = (size_t)originalBlock2 + offset + (size_t)ops_2[i].dwl; + + LogVerbose("operand %d dwl is different", i); +#ifdef _TARGET_AMD64_ + LogVerbose("gOffset1 %016llX", gOffset1); + LogVerbose("gOffset2 %016llX", gOffset2); + LogVerbose("gOffset1 - gOffset2 %016llX", gOffset1 - gOffset2); +#elif defined(_TARGET_X86_) + LogVerbose("gOffset1 %08X", gOffset1); + LogVerbose("gOffset2 %08X", gOffset2); + LogVerbose("gOffset1 - gOffset2 %08X", gOffset1 - gOffset2); +#endif + LogVerbose("dwl1 %016llX", ops_1[i].dwl); + LogVerbose("dwl2 %016llX", ops_2[i].dwl); + goto DumpDetails; + } + } + } + offset += instrSize_1; + } + delete disasm_1; + delete disasm_2; + return true; + +DumpDetails: + LogVerbose("block1 %p", block1); + LogVerbose("block2 %p", block2); + LogVerbose("originalBlock1 [%p,%p)", originalBlock1, (const uint8_t *)originalBlock1 + blocksize1); + LogVerbose("originalBlock2 [%p,%p)", originalBlock2, (const uint8_t *)originalBlock2 + blocksize2); + LogVerbose("blocksize1 %08X", blocksize1); + LogVerbose("blocksize2 %08X", blocksize2); + LogVerbose("dataBlock1 [%p,%p)", originalDataBlock1, (const uint8_t *)originalDataBlock1 + datablockSize1); + LogVerbose("dataBlock2 [%p,%p)", originalDataBlock2, (const uint8_t *)originalDataBlock2 + datablockSize2); + LogVerbose("datablockSize1 %08X", datablockSize1); + LogVerbose("datablockSize2 %08X", datablockSize2); + LogVerbose("otherCodeBlock1 [%p,%p)", otherCodeBlock1, (const uint8_t *)otherCodeBlock1 + otherCodeBlockSize1); + LogVerbose("otherCodeBlock2 [%p,%p)", otherCodeBlock2, (const uint8_t *)otherCodeBlock2 + otherCodeBlockSize2); + LogVerbose("otherCodeBlockSize1 %08X", otherCodeBlockSize1); + LogVerbose("otherCodeBlockSize2 %08X", otherCodeBlockSize2); + +#ifdef _TARGET_AMD64_ + LogVerbose("offset %016llX", offset); + LogVerbose("addr1 %016llX", (size_t)originalBlock1 + offset); + LogVerbose("addr2 %016llX", (size_t)originalBlock2 + offset); +#elif defined(_TARGET_X86_) + LogVerbose("offset %08X", offset); + LogVerbose("addr1 %08X", (size_t)originalBlock1 + offset); + LogVerbose("addr2 %08X", (size_t)originalBlock2 + offset); +#endif + + LogVerbose("Block1:"); + DumpCodeBlock(block1, blocksize1, originalBlock1); + LogVerbose("Block2:"); + DumpCodeBlock(block2, blocksize2, originalBlock2); + + if (disasm_1 != nullptr) + delete disasm_1; + if (disasm_2 != nullptr) + delete disasm_2; + return false; +#else // !USE_MSVCDIS + return false; // No disassembler; assume there are differences +#endif // !USE_MSVCDIS +} + +// +// Compares two read-only data sections for equality. +// +// Arguments: +// mc - The method context of the method to diff. +// cr1 - The first compile result to compare. +// cr2 - The second compile result to compare. +// block1 - A pointer to the first code block to diassemble. +// blocksize1 - The size of the first code block to compare. +// originalDataBlock1 - The original base address of the first read-only data block. +// block2 - A pointer to the second code block to diassemble. +// blocksize2 - The size of the second code block to compare. +// originalDataBlock2 - The original base address of the second read-only data block. +// +// Return Value: +// True if the read-only data sections are identical; false otherwise. +// +bool NearDiffer::compareReadOnlyDataBlock(MethodContext *mc, CompileResult *cr1, CompileResult *cr2, + unsigned char *block1, ULONG blocksize1, void *originalDataBlock1, + unsigned char *block2, ULONG blocksize2, void *originalDataBlock2) +{ + //no rodata + if(blocksize1==0 && blocksize2==0) + return true; + + if(blocksize1!=blocksize2) + { + LogVerbose("compareReadOnlyDataBlock found non-matching sizes %u %u", blocksize1, blocksize2); + return false; + } + + //TODO-Cleanup: The values on the datablock seem to wobble. Need further investigation to evaluate a good near comparison for these + return true; +} + +// +// Compares two EH info blocks for equality. +// +// Arguments: +// mc - The method context of the method to diff. +// cr1 - The first compile result to compare. +// cr2 - The second compile result to compare. +// +// Return Value: +// True if the EH info blocks are identical; false otherwise. +// +bool NearDiffer::compareEHInfo(MethodContext *mc, CompileResult *cr1, CompileResult *cr2) +{ + ULONG cEHSize_1; + ULONG ehFlags_1; + ULONG tryOffset_1; + ULONG tryLength_1; + ULONG handlerOffset_1; + ULONG handlerLength_1; + ULONG classToken_1; + + ULONG cEHSize_2; + ULONG ehFlags_2; + ULONG tryOffset_2; + ULONG tryLength_2; + ULONG handlerOffset_2; + ULONG handlerLength_2; + ULONG classToken_2; + + + cEHSize_1 = cr1->repSetEHcount(); + cEHSize_2 = cr2->repSetEHcount(); + + //no exception + if(cEHSize_1==0 && cEHSize_2==0) + return true; + + if(cEHSize_1!=cEHSize_2) + { + LogVerbose("compareEHInfo found non-matching sizes %u %u", cEHSize_1, cEHSize_2); + return false; + } + + for(unsigned int i=0;i<cEHSize_1;i++) + { + cr1->repSetEHinfo(i, &ehFlags_1, &tryOffset_1, &tryLength_1, &handlerOffset_1, &handlerLength_1, &classToken_1); + cr2->repSetEHinfo(i, &ehFlags_2, &tryOffset_2, &tryLength_2, &handlerOffset_2, &handlerLength_2, &classToken_2); + if(ehFlags_1!=ehFlags_2) + { + LogVerbose("EH flags don't match %u != %u", ehFlags_1, ehFlags_2); + return false; + } + if((tryOffset_1!=tryOffset_2) || (tryLength_1!=tryLength_2)) + { + LogVerbose("EH try information don't match, offset: %u %u, length: %u %u", tryOffset_1, tryOffset_2, tryLength_1, tryLength_2); + return false; + } + if((handlerOffset_1!=handlerOffset_2) || (handlerLength_1!=handlerLength_2)) + { + LogVerbose("EH handler information don't match, offset: %u %u, length: %u %u", handlerOffset_1, handlerOffset_2, handlerLength_1, handlerLength_2); + return false; + } + if(classToken_1!=classToken_2) + { + LogVerbose("EH class tokens don't match %u!=%u", classToken_1, classToken_2); + return false; + } + } + + return true; +} + +// +// Compares two GC info blocks for equality. +// +// Arguments: +// mc - The method context of the method to diff. +// cr1 - The first compile result to compare. +// cr2 - The second compile result to compare. +// +// Return Value: +// True if the GC info blocks are identical; false otherwise. +// +bool NearDiffer::compareGCInfo(MethodContext *mc, CompileResult *cr1, CompileResult *cr2) +{ + void *gcInfo1; + size_t gcInfo1Size; + void *gcInfo2; + size_t gcInfo2Size; + + cr1->repAllocGCInfo(&gcInfo1Size, &gcInfo1); + cr2->repAllocGCInfo(&gcInfo2Size, &gcInfo2); + + if (gcInfo1Size != gcInfo2Size) + { + LogVerbose("Reported GCInfo sizes don't match: %u != %u", (unsigned int)gcInfo1Size, (unsigned int)gcInfo2Size); + return false; + } + + if (memcmp(gcInfo1, gcInfo2, gcInfo1Size) != 0) + { + LogVerbose("GCInfo doesn't match."); + return false; + } + + return true; +} + +// +// Compares two sets of native var info for equality. +// +// Arguments: +// mc - The method context of the method to diff. +// cr1 - The first compile result to compare. +// cr2 - The second compile result to compare. +// +// Return Value: +// True if the native var info is identical; false otherwise. +// +bool NearDiffer::compareVars(MethodContext *mc, CompileResult *cr1, CompileResult *cr2) +{ + CORINFO_METHOD_HANDLE ftn_1; + ULONG32 cVars_1; + ICorDebugInfo::NativeVarInfo *vars_1; + + CORINFO_METHOD_HANDLE ftn_2; + ULONG32 cVars_2; + ICorDebugInfo::NativeVarInfo *vars_2; + + CORINFO_METHOD_INFO info; + unsigned flags = 0; + mc->repCompileMethod(&info, &flags); + + bool set1 = cr1->repSetVars(&ftn_1, &cVars_1, &vars_1); + bool set2 = cr2->repSetVars(&ftn_2, &cVars_2, &vars_2); + if((set1==false)&&(set2==false)) + return true; // we don't have boundaries for either of these. + if(((set1==true)&&(set2==false))||((set1==false)&&(set2==true))) + { + LogVerbose("missing matching vars sets"); + return false; + } + + //no vars + if(cVars_1==0 && cVars_2==0) + { + return true; + } + + if(ftn_1!=ftn_2) + { + //We would like to find out this situation + __debugbreak(); + LogVerbose("compareVars found non-matching CORINFO_METHOD_HANDLE %p %p", ftn_1, ftn_2); + return false; + } + if(ftn_1!=info.ftn) + { + LogVerbose("compareVars found issues with the CORINFO_METHOD_HANDLE %p %p", ftn_1, info.ftn); + return false; + } + + if(cVars_1!=cVars_2) + { + LogVerbose("compareVars found non-matching var count %u %u", cVars_1, cVars_2); + return false; + } + + //TODO-Cleanup: The values on the NativeVarInfo array seem to wobble. Need further investigation to evaluate a good near comparison for these + //for(unsigned int i=0;i<cVars_1;i++) + //{ + // if(vars_1[i].startOffset!=vars_2[i].startOffset) + // { + // LogVerbose("compareVars found non-matching startOffsets %u %u for var: %u", vars_1[i].startOffset, vars_2[i].startOffset, i); + // return false; + // } + //} + + return true; +} + +// +// Compares two sets of native offset mappings for equality. +// +// Arguments: +// mc - The method context of the method to diff. +// cr1 - The first compile result to compare. +// cr2 - The second compile result to compare. +// +// Return Value: +// True if the native offset mappings are identical; false otherwise. +// +bool NearDiffer::compareBoundaries(MethodContext *mc, CompileResult *cr1, CompileResult *cr2) +{ + CORINFO_METHOD_HANDLE ftn_1; + ULONG32 cMap_1; + ICorDebugInfo::OffsetMapping *map_1; + + CORINFO_METHOD_HANDLE ftn_2; + ULONG32 cMap_2; + ICorDebugInfo::OffsetMapping *map_2; + + CORINFO_METHOD_INFO info; + unsigned flags = 0; + mc->repCompileMethod(&info, &flags); + + bool set1 = cr1->repSetBoundaries(&ftn_1, &cMap_1, &map_1); + bool set2 = cr2->repSetBoundaries(&ftn_2, &cMap_2, &map_2); + if((set1==false)&&(set2==false)) + return true; // we don't have boundaries for either of these. + if(((set1==true)&&(set2==false))||((set1==false)&&(set2==true))) + { + LogVerbose("missing matching boundary sets"); + return false; + } + + if(ftn_1!=ftn_2) + { + LogVerbose("compareBoundaries found non-matching CORINFO_METHOD_HANDLE %p %p", ftn_1, ftn_2); + return false; + } + + //no maps + if(cMap_1==0 && cMap_2==0) + return true; + + if(cMap_1!=cMap_2) + { + LogVerbose("compareBoundaries found non-matching var count %u %u", cMap_1, cMap_2); + return false; + } + + for(unsigned int i=0;i<cMap_1;i++) + { + if(map_1[i].ilOffset!=map_2[i].ilOffset) + { + LogVerbose("compareBoundaries found non-matching ilOffset %u %u for map: %u", map_1[i].ilOffset, map_2[i].ilOffset, i); + return false; + } + if(map_1[i].nativeOffset!=map_2[i].nativeOffset) + { + LogVerbose("compareBoundaries found non-matching nativeOffset %u %u for map: %u", map_1[i].nativeOffset, map_2[i].nativeOffset, i); + return false; + } + if(map_1[i].source!=map_2[i].source) + { + LogVerbose("compareBoundaries found non-matching source %u %u for map: %u", (unsigned int)map_1[i].source, (unsigned int)map_2[i].source, i); + return false; + } + } + + + return true; +} + +// +// Compares two compiled versions of a method for equality. This is the main driver for the various +// components of near diffing. +// +// Before starting the diffing process, this applies some fixups to the code stream based on relocations +// recorded during compilation, using the original base address that was used when compiling the method. +// +// Arguments: +// mc - The method context of the method to diff. +// cr1 - The first compile result to compare. +// cr2 - The second compile result to compare. +// +// Return Value: +// True if the compile results are identical; false otherwise. +// +bool NearDiffer::compare(MethodContext *mc, CompileResult *cr1, CompileResult *cr2) +{ + ULONG hotCodeSize_1; + ULONG coldCodeSize_1; + ULONG roDataSize_1; + ULONG xcptnsCount_1; + CorJitAllocMemFlag flag_1; + unsigned char *hotCodeBlock_1; + unsigned char *coldCodeBlock_1; + unsigned char *roDataBlock_1; + void *orig_hotCodeBlock_1; + void *orig_coldCodeBlock_1; + void *orig_roDataBlock_1; + + ULONG hotCodeSize_2; + ULONG coldCodeSize_2; + ULONG roDataSize_2; + ULONG xcptnsCount_2; + CorJitAllocMemFlag flag_2; + unsigned char *hotCodeBlock_2; + unsigned char *coldCodeBlock_2; + unsigned char *roDataBlock_2; + void *orig_hotCodeBlock_2; + void *orig_coldCodeBlock_2; + void *orig_roDataBlock_2; + + cr1->repAllocMem(&hotCodeSize_1, &coldCodeSize_1, &roDataSize_1, &xcptnsCount_1, &flag_1, + &hotCodeBlock_1, &coldCodeBlock_1, &roDataBlock_1, &orig_hotCodeBlock_1, &orig_coldCodeBlock_1, &orig_roDataBlock_1); + cr2->repAllocMem(&hotCodeSize_2, &coldCodeSize_2, &roDataSize_2, &xcptnsCount_2, &flag_2, + &hotCodeBlock_2, &coldCodeBlock_2, &roDataBlock_2, &orig_hotCodeBlock_2, &orig_coldCodeBlock_2, &orig_roDataBlock_2); + + LogDebug("HCS1 %d CCS1 %d RDS1 %d xcpnt1 %d flag1 %08X, HCB %p CCB %p RDB %p ohcb %p occb %p odb %p", + hotCodeSize_1, coldCodeSize_1, roDataSize_1, xcptnsCount_1, flag_1, + hotCodeBlock_1, coldCodeBlock_1, roDataBlock_1, + orig_hotCodeBlock_1, orig_coldCodeBlock_1, orig_roDataBlock_1); + LogDebug("HCS2 %d CCS2 %d RDS2 %d xcpnt2 %d flag2 %08X, HCB %p CCB %p RDB %p ohcb %p occb %p odb %p", + hotCodeSize_2, coldCodeSize_2, roDataSize_2, xcptnsCount_2, flag_2, + hotCodeBlock_2, coldCodeBlock_2, roDataBlock_2, + orig_hotCodeBlock_2, orig_coldCodeBlock_2, orig_roDataBlock_2); + + cr1->applyRelocs(hotCodeBlock_1, hotCodeSize_1, orig_hotCodeBlock_1); + cr2->applyRelocs(hotCodeBlock_2, hotCodeSize_2, orig_hotCodeBlock_2); + cr1->applyRelocs(coldCodeBlock_1, coldCodeSize_1, orig_coldCodeBlock_1); + cr2->applyRelocs(coldCodeBlock_2, coldCodeSize_2, orig_coldCodeBlock_2); + cr1->applyRelocs(roDataBlock_1, roDataSize_1, orig_roDataBlock_1); + cr2->applyRelocs(roDataBlock_2, roDataSize_2, orig_roDataBlock_2); + + if(!compareCodeSection(mc, cr1, cr2, + hotCodeBlock_1, hotCodeSize_1, roDataBlock_1, roDataSize_1, orig_hotCodeBlock_1, orig_roDataBlock_1, orig_coldCodeBlock_1, coldCodeSize_1, + hotCodeBlock_2, hotCodeSize_2, roDataBlock_2, roDataSize_2, orig_hotCodeBlock_2, orig_roDataBlock_2, orig_coldCodeBlock_2, coldCodeSize_2)) + return false; + + if(!compareCodeSection(mc, cr1, cr2, + coldCodeBlock_1, coldCodeSize_1, roDataBlock_1, roDataSize_1, orig_coldCodeBlock_1, orig_roDataBlock_1, orig_hotCodeBlock_1, hotCodeSize_1, + coldCodeBlock_2, coldCodeSize_2, roDataBlock_2, roDataSize_2, orig_coldCodeBlock_2, orig_roDataBlock_2, orig_hotCodeBlock_2, hotCodeSize_2)) + return false; + + if(!compareReadOnlyDataBlock(mc, cr1, cr2, + roDataBlock_1, roDataSize_1, orig_roDataBlock_1, + roDataBlock_2, roDataSize_2, orig_roDataBlock_2)) + return false; + + if(!compareEHInfo(mc, cr1, cr2)) + return false; + + if (!compareGCInfo(mc, cr1, cr2)) + return false; + + if (!compareVars(mc, cr1, cr2)) + return false; + + if(!compareBoundaries(mc, cr1, cr2)) + return false; + + return true; +} |