diff options
author | Jiyoung Yun <jy910.yun@samsung.com> | 2016-11-23 19:09:09 +0900 |
---|---|---|
committer | Jiyoung Yun <jy910.yun@samsung.com> | 2016-11-23 19:09:09 +0900 |
commit | 4b4aad7217d3292650e77eec2cf4c198ea9c3b4b (patch) | |
tree | 98110734c91668dfdbb126fcc0e15ddbd93738ca /src/vm/i386/gmsx86.cpp | |
parent | fa45f57ed55137c75ac870356a1b8f76c84b229c (diff) | |
download | coreclr-4b4aad7217d3292650e77eec2cf4c198ea9c3b4b.tar.gz coreclr-4b4aad7217d3292650e77eec2cf4c198ea9c3b4b.tar.bz2 coreclr-4b4aad7217d3292650e77eec2cf4c198ea9c3b4b.zip |
Imported Upstream version 1.1.0upstream/1.1.0
Diffstat (limited to 'src/vm/i386/gmsx86.cpp')
-rw-r--r-- | src/vm/i386/gmsx86.cpp | 1245 |
1 files changed, 1245 insertions, 0 deletions
diff --git a/src/vm/i386/gmsx86.cpp b/src/vm/i386/gmsx86.cpp new file mode 100644 index 0000000000..e7e16b70ab --- /dev/null +++ b/src/vm/i386/gmsx86.cpp @@ -0,0 +1,1245 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +/**************************************************************/ +/* gmsx86.cpp */ +/**************************************************************/ + +#include "common.h" +#include "gmscpu.h" + +/***************************************************************/ +/* setMachState figures out what the state of the CPU will be + when the function that calls 'setMachState' returns. It stores + this information in 'frame' + + setMachState works by simulating the execution of the + instructions starting at the instruction following the + call to 'setMachState' and continuing until a return instruction + is simulated. To avoid having to process arbitrary code, the + call to 'setMachState' should be called as follows + + if (machState.setMachState != 0) return; + + setMachState is guarnenteed to return 0 (so the return + statement will never be executed), but the expression above + insures insures that there is a 'quick' path to epilog + of the function. This insures that setMachState will only + have to parse a limited number of X86 instructions. */ + + +/***************************************************************/ +#ifndef POISONC +#define POISONC ((sizeof(int *) == 4)?0xCCCCCCCCU:UI64(0xCCCCCCCCCCCCCCCC)) +#endif + +/***************************************************************/ +/* the 'zeroFtn and 'recursiveFtn' are only here to determine + if if mscorwks itself has been instrumented by a profiler + that intercepts calls or epilogs of functions. (the + callsInstrumented and epilogInstrumented functions). */ + +#if !defined(DACCESS_COMPILE) + +#pragma optimize("gsy", on ) // optimize to insure that code generation does not have junk in it +#pragma warning(disable:4717) + +static int __stdcall zeroFtn() { + return 0; +} + +static int __stdcall recursiveFtn() { + return recursiveFtn()+1; +} + +#pragma optimize("", on ) + + +/* Has mscorwks been instrumented so that calls are morphed into push XXXX call <helper> */ +static bool callsInstrumented() { + // Does the recusive function begin with push XXXX call <helper> + PTR_BYTE ptr = PTR_BYTE(recursiveFtn); + + return (ptr[0] == 0x68 && ptr[5] == 0xe8); // PUSH XXXX, call <helper> +} + +/* Has mscorwks been instrumented so function prolog and epilogs are replaced with + jmp [XXXX] */ + +static bool epilogInstrumented() { + + PTR_BYTE ptr = PTR_BYTE(zeroFtn); + if (ptr[0] == 0xe8) // call <helper> (prolog instrumentation) + ptr += 5; + if (ptr[0] == 0x33 && ptr[1] == 0xc0) // xor eax eax + ptr += 2; + return (ptr[0] == 0xeb || ptr[0] == 0xe9); // jmp <XXXX> +} + +#else + + // Note that we have the callsInstrumeted and epilogInstrumented + // functions so that the looser heuristics used for instrumented code + // can't foul up an instrumented mscorwks. For simplicity sake we + // don't bother with this in the DAC, which means that the DAC could + // be misled more frequently than mscorwks itself, but I still think + // it will not be misled in any real scenario +static bool callsInstrumented() { LIMITED_METHOD_DAC_CONTRACT; return true; } +static bool epilogInstrumented() { LIMITED_METHOD_DAC_CONTRACT; return true; } + +#endif // !defined(DACCESS_COMPILE) + +/***************************************************************/ +/* returns true if a call to 'ip' should be entered by the + epilog walker. Bascically we are looking for things that look + like __SEH_epilog. In particular we look for things that + pops a register before doing a push. If we see something + that we don't recognise, we dont consider it a epilog helper + and return false. +*/ + +static bool shouldEnterCall(PTR_BYTE ip) { + SUPPORTS_DAC; + + int datasize; // helper variable for decoding of address modes + int mod; // helper variable for decoding of mod r/m + int rm; // helper variable for decoding of mod r/m + + int pushes = 0; + + // we should start unbalenced pops within 48 instrs. If not, it is not a special epilog function + // the only reason we need as many instructions as we have below is because coreclr + // gets instrumented for profiling, code coverage, BBT etc, and we want these things to + // just work. + for (int i = 0; i < 48; i++) { + switch(*ip) { + case 0xF2: // repne + case 0xF3: // repe + ip++; + break; + + case 0x68: // push 0xXXXXXXXX + ip += 5; + + // For office profiler. They morph tail calls into push TARGET; jmp helper + // so if you see + // + // push XXXX + // jmp xxxx + // + // and we notice that coreclr has been instrumented and + // xxxx starts with a JMP [] then do what you would do for jmp XXXX + if (*ip == 0xE9 && callsInstrumented()) { // jmp helper + PTR_BYTE tmpIp = ip + 5; + PTR_BYTE target = tmpIp + (__int32)*((PTR_TADDR)(PTR_TO_TADDR(tmpIp) - 4)); + if (target[0] == 0xFF && target[1] == 0x25) { // jmp [xxxx] (to external dll) + ip = PTR_BYTE(*((PTR_TADDR)(PTR_TO_TADDR(ip) - 4))); + } + } + else { + pushes++; + } + break; + + case 0x50: // push EAX + case 0x51: // push ECX + case 0x52: // push EDX + case 0x53: // push EBX + case 0x55: // push EBP + case 0x56: // push ESI + case 0x57: // push EDI + pushes++; + ip++; + break; + + case 0xE8: // call <disp32> + ip += 5; + pushes = 0; // This assumes that all of the previous pushes are arguments to this call + break; + + case 0xFF: + if (ip[1] != 0x15) // call [XXXX] is OK (prolog of epilog helper is intrumented) + return false; // but everything else is not OK. + ip += 6; + pushes = 0; // This assumes that all of the previous pushes are arguments to this call + break; + + case 0x9C: // pushfd + case 0x9D: // popfd + // a pushfd can never be an argument, so we model a pair of + // these instruction as not changing the stack so that a call + // that occurs between them does not consume the value of pushfd + ip++; + break; + + case 0x5D: // pop EBP + case 0x5E: // pop ESI + case 0x5F: // pop EDI + case 0x5B: // pop EBX + case 0x58: // pop EAX + case 0x59: // pop ECX + case 0x5A: // pop EDX + if (pushes <= 0) { + // We now have more pops than pushes. This is our indication + // that we are in an EH_epilog function so we return true. + // This is the only way to exit this method with a retval of true. + return true; + } + --pushes; + ip++; + break; + + case 0xA1: // MOV EAX, [XXXX] + ip += 5; + break; + + case 0xC6: // MOV r/m8, imm8 + datasize = 1; + goto decodeRM; + + case 0x89: // MOV r/m, reg + if (ip[1] == 0xE5) // MOV EBP, ESP + return false; + if (ip[1] == 0xEC) // MOV ESP, EBP + return false; + goto move; + + case 0x8B: // MOV reg, r/m + if (ip[1] == 0xE5) // MOV ESP, EBP + return false; + if (ip[1] == 0xEC) // MOV EBP, ESP + return false; + goto move; + + case 0x88: // MOV reg, r/m (BYTE) + case 0x8A: // MOV r/m, reg (BYTE) + + case 0x31: // XOR + case 0x32: // XOR + case 0x33: // XOR + + move: + datasize = 0; + + decodeRM: + // Note that we don't want to read from ip[] after + // we do ANY incrementing of ip + + mod = (ip[1] & 0xC0) >> 6; + if (mod != 3) { + rm = (ip[1] & 0x07); + if (mod == 0) { // (mod == 0) + if (rm == 5) + ip += 4; // disp32 + else if (rm == 4) + ip += 1; // [reg*K+reg] + // otherwise [reg] + + } + else if (mod == 1) { // (mod == 1) + ip += 1; // for disp8 + if (rm == 4) + ip += 1; // [reg*K+reg+disp8] + // otherwise [reg+disp8] + } + else { // (mod == 2) + ip += 4; // for disp32 + if (rm == 4) + ip += 1; // [reg*K+reg+disp32] + // otherwise [reg+disp32] + } + } + + ip += 2; + ip += datasize; + break; + + case 0x64: // FS: prefix + ip++; + break; + + case 0xEB: // jmp <disp8> + ip += (signed __int8) ip[1] + 2; + break; + + case 0xE9: // jmp <disp32> + ip += (__int32)*PTR_DWORD(PTR_TO_TADDR(ip) + 1) + 5; + break; + + case 0xF7: // test r/m32, imm32 + // Magellan code coverage build + if ( (ip[1] & 0x38) == 0x00) + { + datasize = 4; + goto decodeRM; + } + else + { + return false; + } + break; + + case 0x75: // jnz <target> + // Magellan code coverage build + // We always follow forward jump to avoid possible looping. + { + PTR_BYTE tmpIp = ip + (TADDR)(signed __int8) ip[1] + 2; + if (tmpIp > ip) { + ip = tmpIp; // follow forwards jump + } + else { + return false; // backwards jump implies not EH_epilog function + } + } + break; + + case 0xC2: // ret + case 0xC3: // ret n + default: + return false; + } + } + + return false; +} + + +/***************************************************************/ +#ifdef _PREFAST_ +#pragma warning(push) +#pragma warning(disable:21000) // Suppress PREFast warning about overly large function +#endif + +/***************************************************************/ +// A fundamental requirement of managed code is that we need to be able to enumerate all GC references on the +// stack at GC time. To do this we need to be able to 'crawl' the stack. We know how to do this in JIT +// compiled code (it generates additional information like the frame size etc), but we don't know how to do +// this for unmanaged code. For PINVOKE calls, we leave a pointer to the transition boundary between managed +// and unmanaged code and we simply ignore the lower part of the stack. However setting up this transition is +// a bit expensive (1-2 dozen instructions), and while that is acceptable for PINVOKE, it is not acceptable +// for high volume calls, like NEW, CAST, WriterBarrier, Stack field fetch and others. +// +// To get around this, for transitions into the runtime (which we call FCALLS), we DEFER setting up the +// boundary variables (what we call the transition frame), until we actually need it (we will do an operation +// that might cause a GC). This allow us to handle the common case (where we might find the thing in a cache, +// or be service the 'new' from a allocation quantum), and only pay the cost of setting up the transition +// frame when it will actually be used. +// +// The problem is that in order to set up a transition frame we need to be able to find ALL REGISTERS AT THE +// TIME THE TRANSITION TO UNMANAGED CODE WAS MADE (because we might need to update them if they have GC +// references). Because we have executed ordinary C++ code (which might spill the registers to the stack at +// any time), we have a problem. LazyMachState is our 'solution' to this problem. We take advantage of the +// fact that the C++ code MUST RESTORE the register before returning. Thus we simulate the execution from the +// current location to the return and 'watch' where the registers got restored from. This is what +// unwindLazyState does (determine what the registers would be IF you had never executed and unmanaged C++ +// code). +// +// By design, this code does not handle all X86 instructions, but only those instructions needed in an +// epilog. If you get a failure because of a missing instruction, it MAY simply be because the compiler +// changed and now emits a new instruction in the epilog, but it MAY also be because the unwinder is +// 'confused' and is trying to follow a code path that is NOT AN EPILOG, and in this case adding +// instructions to 'fix' it is inappropriate. +// +void LazyMachState::unwindLazyState(LazyMachState* baseState, + MachState* lazyState, + DWORD threadId, + int funCallDepth /* = 1 */, + HostCallPreference hostCallPreference /* = (HostCallPreference)(-1) */) +{ + CONTRACTL { + NOTHROW; + GC_NOTRIGGER; + SO_TOLERANT; + SUPPORTS_DAC; + } CONTRACTL_END; + + lazyState->_edi = baseState->_edi; + lazyState->_esi = baseState->_esi; + lazyState->_ebx = baseState->_ebx; + lazyState->_ebp = baseState->captureEbp; +#ifndef DACCESS_COMPILE + lazyState->_pEdi = &baseState->_edi; + lazyState->_pEsi = &baseState->_esi; + lazyState->_pEbx = &baseState->_ebx; + lazyState->_pEbp = &baseState->_ebp; +#endif + + // We have captured the state of the registers as they exist in 'captureState' + // we need to simulate execution from the return address captured in 'captureState + // until we return from the caller of captureState. + + PTR_BYTE ip = PTR_BYTE(baseState->captureEip); + PTR_TADDR ESP = PTR_TADDR(baseState->captureEsp); + ESP++; // pop captureState's return address + + + // VC now has small helper calls that it uses in epilogs. We need to walk into these + // helpers if we are to decode the stack properly. After we walk the helper we need + // to return and continue walking the epiliog. This varaible remembers were to return to + PTR_BYTE epilogCallRet = PTR_BYTE((TADDR)0); + + // The very first conditional jump that we are going to encounter is + // the one testing for the return value of LazyMachStateCaptureState. + // The non-zero path is the one directly leading to a return statement. + // This variable keeps track of whether we are still looking for that + // first conditional jump. + BOOL bFirstCondJmp = TRUE; + + // The general strategy is that we always try to plough forward: + // we follow a conditional jump if and only if it is a forward jump. + // However, in fcall functions that set up a HELPER_METHOD_FRAME in + // more than one place, gcc will have both of them share the same + // epilog - and the second one may actually be a backward jump. + // This can lead us to loop in a destructor code loop. To protect + // against this, we remember the ip of the last conditional jump + // we followed, and if we encounter it again, we take the other branch. + PTR_BYTE lastCondJmpIp = PTR_BYTE((TADDR)0); + + int datasize; // helper variable for decoding of address modes + int mod; // helper variable for decoding of mod r/m + int rm; // helper variable for decoding of mod r/m + +#ifdef _DEBUG + int count = 0; + const DWORD cInstructions = 1000; + PTR_BYTE *instructionBytes = (PTR_BYTE*)alloca(cInstructions * sizeof(PTR_BYTE)); + memset(instructionBytes, 0, cInstructions * sizeof(PTR_BYTE)); +#endif + bool bset16bit=false; + bool b16bit=false; + for(;;) + { + _ASSERTE(count++ < 1000); // we should never walk more than 1000 instructions! + b16bit=bset16bit; + bset16bit=false; + +#ifndef DACCESS_COMPILE + again: +#endif +#ifdef _DEBUG + instructionBytes[count-1] = ip; +#endif + switch(*ip) + { + + case 0x64: // FS: prefix + bset16bit=b16bit; // In case we have just seen a 0x66 prefix + goto incIp1; + + case 0x66: + bset16bit=true; // Remember that we saw the 0x66 prefix [16-bit datasize override] + goto incIp1; + + case 0x50: // push EAX + case 0x51: // push ECX + case 0x52: // push EDX + case 0x53: // push EBX + case 0x55: // push EBP + case 0x56: // push ESI + case 0x57: // push EDI + case 0x9C: // pushfd + --ESP; + case 0x40: // inc EAX + case 0x41: // inc ECX + case 0x42: // inc EDX + case 0x43: // inc EBX + case 0x46: // inc ESI + case 0x47: // inc EDI + goto incIp1; + + case 0x58: // pop EAX + case 0x59: // pop ECX + case 0x5A: // pop EDX + case 0x9D: // popfd + ESP++; + // FALL THROUGH + + case 0x90: // nop + incIp1: + ip++; + break; + + case 0x5B: // pop EBX + lazyState->_pEbx = ESP; + lazyState->_ebx = *ESP++; + goto incIp1; + case 0x5D: // pop EBP + lazyState->_pEbp = ESP; + lazyState->_ebp = *ESP++; + goto incIp1; + case 0x5E: // pop ESI + lazyState->_pEsi = ESP; + lazyState->_esi = *ESP++; + goto incIp1; + case 0x5F: // pop EDI + lazyState->_pEdi = ESP; + lazyState->_edi = *ESP++; + goto incIp1; + + case 0xEB: // jmp <disp8> + ip += (signed __int8) ip[1] + 2; + break; + + case 0x72: // jb <disp8> for gcc. + { + PTR_BYTE tmpIp = ip + (int)(signed __int8)ip[1] + 2; + if (tmpIp > ip) + ip = tmpIp; + else + ip += 2; + } + break; + + case 0xE8: // call <disp32> + ip += 5; + if (epilogCallRet == 0) + { + PTR_BYTE target = ip + (__int32)*PTR_DWORD(PTR_TO_TADDR(ip) - 4); // calculate target + + if (shouldEnterCall(target)) + { + epilogCallRet = ip; // remember our return address + --ESP; // simulate pushing the return address + ip = target; + } + } + break; + + case 0xE9: // jmp <disp32> + { + PTR_BYTE tmpIp = ip + + ((__int32)*dac_cast<PTR_DWORD>(ip + 1) + 5); + ip = tmpIp; + } + break; + + case 0x0f: // follow non-zero jumps: + if (ip[1] >= 0x90 && ip[1] <= 0x9f) { + if ((ip[2] & 0xC0) != 0xC0) // set<cc> reg + goto badOpcode; + ip += 3; + break; + } + else if ((ip[1] & 0xf0) == 0x40) { //cmov mod/rm + ++ip; + datasize = 0; + goto decodeRM; + } + else if (ip[1] >= 0x10 && ip[1] <= 0x17) { // movups, movlps, movhps, unpcklpd, unpckhpd + ++ip; + datasize = 0; + goto decodeRM; + } + else if (ip[1] == 0x1f) { // nop (multi-byte) + ++ip; + datasize = 0; + goto decodeRM; + } + else if (ip[1] == 0x57) { // xorps + ++ip; + datasize = 0; + goto decodeRM; + } + else if (ip[1] == 0xb6 || ip[1] == 0xb7) { //movzx reg, r/m8 + ++ip; + datasize = 0; + goto decodeRM; + } + else if (ip[1] == 0xbf) { //movsx reg, r/m16 + ++ip; + datasize = 0; + goto decodeRM; + } + else if (ip[1] == 0xd6 || ip[1] == 0x7e) { // movq + ++ip; + datasize = 0; + goto decodeRM; + } + else if (bFirstCondJmp) { + bFirstCondJmp = FALSE; + if (ip[1] == 0x85) // jne <disp32> + ip += (__int32)*dac_cast<PTR_DWORD>(ip + 2) + 6; + else if (ip[1] >= 0x80 && ip[1] <= 0x8F) // jcc <disp32> + ip += 6; + else + goto badOpcode; + } + else { + if ((ip[1] >= 0x80) && (ip[1] <= 0x8F)) { + PTR_BYTE tmpIp = ip + (__int32)*dac_cast<PTR_DWORD>(ip + 2) + 6; + + if ((tmpIp > ip) == (lastCondJmpIp != ip)) { + lastCondJmpIp = ip; + ip = tmpIp; + } + else { + lastCondJmpIp = ip; + ip += 6; + } + } + else + goto badOpcode; + } + break; + + // This is here because VC seems to not always optimize + // away a test for a literal constant + case 0x6A: // push 0xXX + ip += 2; + --ESP; + break; + + case 0x68: // push 0xXXXXXXXX + if ((ip[5] == 0xFF) && (ip[6] == 0x15)) { + ip += 11; // + } + else { + ip += 5; + + // For office profiler. They morph calls into push TARGET; call helper + // so if you see + // + // push XXXX + // call xxxx + // + // and we notice that mscorwks has been instrumented and + // xxxx starts with a JMP [] then do what you would do for call XXXX + if ((*ip & 0xFE) == 0xE8 && callsInstrumented()) { // It is a call or a jump (E8 or E9) + PTR_BYTE tmpIp = ip + 5; + PTR_BYTE target = tmpIp + (__int32)*PTR_DWORD(PTR_TO_TADDR(tmpIp) - 4); + if (target[0] == 0xFF && target[1] == 0x25) { // jmp [xxxx] (to external dll) + target = PTR_BYTE(*PTR_TADDR(PTR_TO_TADDR(ip) - 4)); + if (*ip == 0xE9) { // Do logic for jmp + ip = target; + } + else if (shouldEnterCall(target)) { // Do logic for calls + epilogCallRet = ip; // remember our return address + --ESP; // simulate pushing the return address + ip = target; + } + } + } + } + break; + + case 0x74: // jz <target> + if (bFirstCondJmp) { + bFirstCondJmp = FALSE; + ip += 2; // follow the non-zero path + break; + } + goto condJumpDisp8; + + case 0x75: // jnz <target> + // Except the first jump, we always follow forward jump to avoid possible looping. + // + if (bFirstCondJmp) { + bFirstCondJmp = FALSE; + ip += (signed __int8) ip[1] + 2; // follow the non-zero path + break; + } + goto condJumpDisp8; + + case 0x77: // ja <target> + case 0x78: // js <target> + case 0x79: // jns <target> + case 0x7d: // jge <target> + case 0x7c: // jl <target> + goto condJumpDisp8; + + condJumpDisp8: + { + PTR_BYTE tmpIp = ip + (TADDR)(signed __int8) ip[1] + 2; + if ((tmpIp > ip) == (lastCondJmpIp != ip)) { + lastCondJmpIp = ip; + ip = tmpIp; + } + else { + lastCondJmpIp = ip; + ip += 2; + } + } + break; + + case 0x84: + case 0x85: + mod = (ip[1] & 0xC0) >> 6; + if (mod != 3) // test reg1, reg2 + goto badOpcode; + ip += 2; + break; + + case 0x31: + case 0x32: + case 0x33: +#ifdef __GNUC__ + //there are lots of special workarounds for XOR for msvc. For GnuC + //just do the normal Mod/rm stuff. + datasize = 0; + goto decodeRM; +#else + mod = (ip[1] & 0xC0) >> 6; + if (mod == 3) + { + // XOR reg1, reg2 + + // VC generates this sequence in some code: + // xor reg, reg + // test reg reg + // je <target> + // This is just an unconditional branch, so jump to it + if ((ip[1] & 7) == ((ip[1] >> 3) & 7)) { // reg1 == reg2? + if (ip[2] == 0x85 && ip[3] == ip[1]) { // TEST reg, reg + if (ip[4] == 0x74) { + ip += (signed __int8) ip[5] + 6; // follow the non-zero path + break; + } + _ASSERTE(ip[4] != 0x0f || ((ip[5] & 0xF0)!=0x80)); // If this goes off, we need the big jumps + } + else + { + if (ip[2]==0x74) + { + ip += (signed __int8) ip[3] + 4; + break; + } + _ASSERTE(ip[2] != 0x0f || ((ip[3] & 0xF0)!=0x80)); // If this goes off, we need the big jumps + } + } + ip += 2; + } + else if (mod == 1) + { + // XOR reg1, [reg+offs8] + // Used by the /GS flag for call to __security_check_cookie() + // Should only be XOR ECX,[EBP+4] + _ASSERTE((((ip[1] >> 3) & 0x7) == 0x1) && ((ip[1] & 0x7) == 0x5) && (ip[2] == 4)); + ip += 3; + } + else if (mod == 2) + { + // XOR reg1, [reg+offs32] + // Should not happen but may occur with __security_check_cookie() + _ASSERTE(!"Unexpected XOR reg1, [reg+offs32]"); + ip += 6; + } + else // (mod == 0) + { + // XOR reg1, [reg] + goto badOpcode; + } + break; +#endif + + case 0x05: + // added to handle gcc 3.3 generated code + // add %reg, constant + ip += 5; + break; + + case 0xFF: + if ( (ip[1] & 0x38) == 0x30) + { + // opcode generated by Vulcan/BBT instrumentation + // search for push dword ptr[esp]; push imm32; call disp32 and if found ignore it + if ((ip[1] == 0x34) && (ip[2] == 0x24) && // push dword ptr[esp] (length 3 bytes) + (ip[3] == 0x68) && // push imm32 (length 5 bytes) + (ip[8] == 0xe8)) // call disp32 (length 5 bytes) + { + // found the magic seq emitted by Vulcan instrumentation + ip += 13; // (3+5+5) + break; + } + + --ESP; // push r/m + datasize = 0; + goto decodeRM; + } + else if ( (ip[1] & 0x38) == 0x10) + { + // added to handle gcc 3.3 generated code + // This is a call *(%eax) generated by gcc for destructor calls. + // We can safely skip over the call + datasize = 0; + goto decodeRM; + } + else if (ip[1] == 0xe0) + { + goto badOpcode; +#if 0 + // Handles jmp *%eax from gcc + datasize = 0; + goto decodeRM; +#endif + } + else if (ip[1] == 0x25 && epilogInstrumented()) // is it jmp [XXXX] + { + // this is a office profiler epilog (this jmp is acting as a return instruction) + PTR_BYTE epilogHelper = PTR_BYTE(*PTR_TADDR(*PTR_TADDR(PTR_TO_TADDR(ip) + 2))); + + ip = PTR_BYTE(*ESP); + lazyState->_pRetAddr = ESP++; + + if (epilogHelper[0] != 0x6A) // push <number of dwords to pop> + goto badOpcode; + unsigned disp = *PTR_BYTE(PTR_TO_TADDR(epilogHelper) + 1) * 4; + ESP = PTR_TADDR(PTR_TO_TADDR(ESP) + disp); // pop args + goto ret_with_epilogHelperCheck; + + } + else + { + goto badOpcode; + } + break; + + case 0x39: // comp r/m, reg + case 0x3B: // comp reg, r/m + datasize = 0; + goto decodeRM; + + case 0xA1: // MOV EAX, [XXXX] + ip += 5; + break; + + case 0x89: // MOV r/m, reg + if (ip[1] == 0xEC) // MOV ESP, EBP + goto mov_esp_ebp; + // FALL THROUGH + + case 0x18: // SBB r/m8, r8 + case 0x19: // SBB r/m[16|32], r[16|32] + case 0x1A: // SBB r8, r/m8 + case 0x1B: // SBB r[16|32], r/m[16|32] + + case 0x88: // MOV reg, r/m (BYTE) + case 0x8A: // MOV r/m, reg (BYTE) + + move: + datasize = 0; + + decodeRM: + // Note that we don't want to read from ip[] + // after we do ANY incrementing of ip + + mod = (ip[1] & 0xC0) >> 6; + if (mod != 3) { + rm = (ip[1] & 0x07); + if (mod == 0) { // (mod == 0) + if (rm == 5) // has disp32? + ip += 4; // [disp32] + else if (rm == 4) // has SIB byte? + ip += 1; // [reg*K+reg] + } + else if (mod == 1) { // (mod == 1) + if (rm == 4) // has SIB byte? + ip += 1; // [reg*K+reg+disp8] + ip += 1; // for disp8 + } + else { // (mod == 2) + if (rm == 4) // has SIB byte? + ip += 1; // [reg*K+reg+disp32] + ip += 4; // for disp32 + } + } + ip += 2; // opcode and Mod R/M byte + ip += datasize; + break; + + case 0x80: // OP r/m8, <imm8> + datasize = 1; + goto decodeRM; + + case 0x81: // OP r/m32, <imm32> + if (!b16bit && ip[1] == 0xC4) { // ADD ESP, <imm32> + ESP = dac_cast<PTR_TADDR>(dac_cast<TADDR>(ESP) + + (__int32)*dac_cast<PTR_DWORD>(ip + 2)); + ip += 6; + break; + } else if (!b16bit && ip[1] == 0xC5) { // ADD EBP, <imm32> + lazyState->_ebp += (__int32)*dac_cast<PTR_DWORD>(ip + 2); + ip += 6; + break; + } + + datasize = b16bit?2:4; + goto decodeRM; + + case 0x01: // ADD mod/rm + case 0x03: + case 0x29: // SUB mod/rm + case 0x2B: + datasize = 0; + goto decodeRM; + case 0x83: // OP r/m32, <imm8> + if (ip[1] == 0xC4) { // ADD ESP, <imm8> + ESP = dac_cast<PTR_TADDR>(dac_cast<TADDR>(ESP) + (signed __int8)ip[2]); + ip += 3; + break; + } + if (ip[1] == 0xec) { // SUB ESP, <imm8> + ESP = PTR_TADDR(PTR_TO_TADDR(ESP) - (signed __int8)ip[2]); + ip += 3; + break; + } + if (ip[1] == 0xe4) { // AND ESP, <imm8> + ESP = PTR_TADDR(PTR_TO_TADDR(ESP) & (signed __int8)ip[2]); + ip += 3; + break; + } + if (ip[1] == 0xc5) { // ADD EBP, <imm8> + lazyState->_ebp += (signed __int8)ip[2]; + ip += 3; + break; + } + + datasize = 1; + goto decodeRM; + + case 0x8B: // MOV reg, r/m + if (ip[1] == 0xE5) { // MOV ESP, EBP + mov_esp_ebp: + ESP = PTR_TADDR(lazyState->_ebp); + ip += 2; + break; + } + + if ((ip[1] & 0xc7) == 0x4 && ip[2] == 0x24) // move reg, [esp] + { + if ( ip[1] == 0x1C ) { // MOV EBX, [ESP] + lazyState->_pEbx = ESP; + lazyState->_ebx = *lazyState->_pEbx; + } + else if ( ip[1] == 0x34 ) { // MOV ESI, [ESP] + lazyState->_pEsi = ESP; + lazyState->_esi = *lazyState->_pEsi; + } + else if ( ip[1] == 0x3C ) { // MOV EDI, [ESP] + lazyState->_pEdi = ESP; + lazyState->_edi = *lazyState->_pEdi; + } + else if ( ip[1] == 0x24 /*ESP*/ || ip[1] == 0x2C /*EBP*/) + goto badOpcode; + + ip += 3; + break; + } + + if ((ip[1] & 0xc7) == 0x44 && ip[2] == 0x24) // move reg, [esp+imm8] + { + if ( ip[1] == 0x5C ) { // MOV EBX, [ESP+XX] + lazyState->_pEbx = PTR_TADDR(PTR_TO_TADDR(ESP) + (signed __int8)ip[3]); + lazyState->_ebx = *lazyState->_pEbx ; + } + else if ( ip[1] == 0x74 ) { // MOV ESI, [ESP+XX] + lazyState->_pEsi = PTR_TADDR(PTR_TO_TADDR(ESP) + (signed __int8)ip[3]); + lazyState->_esi = *lazyState->_pEsi; + } + else if ( ip[1] == 0x7C ) { // MOV EDI, [ESP+XX] + lazyState->_pEdi = PTR_TADDR(PTR_TO_TADDR(ESP) + (signed __int8)ip[3]); + lazyState->_edi = *lazyState->_pEdi; + } + else if ( ip[1] == 0x64 /*ESP*/ || ip[1] == 0x6C /*EBP*/) + goto badOpcode; + + ip += 4; + break; + } + + if ((ip[1] & 0xC7) == 0x45) { // MOV reg, [EBP + imm8] + // gcc sometimes restores callee-preserved registers + // via 'mov reg, [ebp-xx]' instead of 'pop reg' + if ( ip[1] == 0x5D ) { // MOV EBX, [EBP+XX] + lazyState->_pEbx = PTR_TADDR(lazyState->_ebp + (signed __int8)ip[2]); + lazyState->_ebx = *lazyState->_pEbx ; + } + else if ( ip[1] == 0x75 ) { // MOV ESI, [EBP+XX] + lazyState->_pEsi = PTR_TADDR(lazyState->_ebp + (signed __int8)ip[2]); + lazyState->_esi = *lazyState->_pEsi; + } + else if ( ip[1] == 0x7D ) { // MOV EDI, [EBP+XX] + lazyState->_pEdi = PTR_TADDR(lazyState->_ebp + (signed __int8)ip[2]); + lazyState->_edi = *lazyState->_pEdi; + } + else if ( ip[1] == 0x65 /*ESP*/ || ip[1] == 0x6D /*EBP*/) + goto badOpcode; + + // We don't track the values of EAX,ECX,EDX + + ip += 3; // MOV reg, [reg + imm8] + break; + } + + if ((ip[1] & 0xC7) == 0x85) { // MOV reg, [EBP+imm32] + // gcc sometimes restores callee-preserved registers + // via 'mov reg, [ebp-xx]' instead of 'pop reg' + if ( ip[1] == 0xDD ) { // MOV EBX, [EBP+XXXXXXXX] + lazyState->_pEbx = PTR_TADDR(lazyState->_ebp + (__int32)*dac_cast<PTR_DWORD>(ip + 2)); + lazyState->_ebx = *lazyState->_pEbx ; + } + else if ( ip[1] == 0xF5 ) { // MOV ESI, [EBP+XXXXXXXX] + lazyState->_pEsi = PTR_TADDR(lazyState->_ebp + (__int32)*dac_cast<PTR_DWORD>(ip + 2)); + lazyState->_esi = *lazyState->_pEsi; + } + else if ( ip[1] == 0xFD ) { // MOV EDI, [EBP+XXXXXXXX] + lazyState->_pEdi = PTR_TADDR(lazyState->_ebp + (__int32)*dac_cast<PTR_DWORD>(ip + 2)); + lazyState->_edi = *lazyState->_pEdi; + } + else if ( ip[1] == 0xE5 /*ESP*/ || ip[1] == 0xED /*EBP*/) + goto badOpcode; // Add more registers + + // We don't track the values of EAX,ECX,EDX + + ip += 6; // MOV reg, [reg + imm32] + break; + } + goto move; + + case 0x8D: // LEA + if ((ip[1] & 0x38) == 0x20) { // Don't allow ESP to be updated + if (ip[1] == 0xA5) // LEA ESP, [EBP+XXXX] + ESP = PTR_TADDR(lazyState->_ebp + (__int32)*dac_cast<PTR_DWORD>(ip + 2)); + else if (ip[1] == 0x65) // LEA ESP, [EBP+XX] + ESP = PTR_TADDR(lazyState->_ebp + (signed __int8) ip[2]); + else if (ip[1] == 0x24 && ip[2] == 0x24) // LEA ESP, [ESP] + ; + else if (ip[1] == 0xa4 && ip[2] == 0x24 && *((DWORD *)(&ip[3])) == 0) // Another form of: LEA ESP, [ESP] + ; + else if (ip[1] == 0x64 && ip[2] == 0x24 && ip[3] == 0) // Yet another form of: LEA ESP, [ESP] (8 bit offset) + ; + else + { + goto badOpcode; + } + } + + datasize = 0; + goto decodeRM; + + case 0xB0: // MOV AL, imm8 + ip += 2; + break; + case 0xB8: // MOV EAX, imm32 + case 0xB9: // MOV ECX, imm32 + case 0xBA: // MOV EDX, imm32 + case 0xBB: // MOV EBX, imm32 + case 0xBE: // MOV ESI, imm32 + case 0xBF: // MOV EDI, imm32 + if(b16bit) + ip += 3; + else + ip += 5; + break; + + case 0xC2: // ret N + { + unsigned __int16 disp = *dac_cast<PTR_WORD>(ip + 1); + ip = PTR_BYTE(*ESP); + lazyState->_pRetAddr = ESP++; + _ASSERTE(disp < 64); // sanity check (although strictly speaking not impossible) + ESP = dac_cast<PTR_TADDR>(dac_cast<TADDR>(ESP) + disp); // pop args + goto ret; + } + case 0xC3: // ret + ip = PTR_BYTE(*ESP); + lazyState->_pRetAddr = ESP++; + + ret_with_epilogHelperCheck: + if (epilogCallRet != 0) { // we are returning from a special epilog helper + ip = epilogCallRet; + epilogCallRet = 0; + break; // this does not count toward funCallDepth + } + ret: + if (funCallDepth > 0) + { + --funCallDepth; + if (funCallDepth == 0) + goto done; + } + else + { + // Determine whether given IP resides in JITted code. (It returns nonzero in that case.) + // Use it now to see if we've unwound to managed code yet. + BOOL fFailedReaderLock = FALSE; + BOOL fIsManagedCode = ExecutionManager::IsManagedCode(*lazyState->pRetAddr(), hostCallPreference, &fFailedReaderLock); + if (fFailedReaderLock) + { + // We don't know if we would have been able to find a JIT + // manager, because we couldn't enter the reader lock without + // yielding (and our caller doesn't want us to yield). So abort + // now. + + // Invalidate the lazyState we're returning, so the caller knows + // we aborted before we could fully unwind + lazyState->_pRetAddr = NULL; + return; + } + + if (fIsManagedCode) + goto done; + } + + bFirstCondJmp = TRUE; + break; + + case 0xC6: // MOV r/m8, imm8 + datasize = 1; + goto decodeRM; + + case 0xC7: // MOV r/m32, imm32 + datasize = b16bit?2:4; + goto decodeRM; + + case 0xC9: // leave + ESP = PTR_TADDR(lazyState->_ebp); + lazyState->_pEbp = ESP; + lazyState->_ebp = *ESP++; + ip++; + break; + +#ifndef DACCESS_COMPILE + case 0xCC: + if (IsDebuggerPresent()) + { + OutputDebugStringA("CLR: Invalid breakpoint in a helpermethod frame epilog\n"); + DebugBreak(); + goto again; + } +#ifndef _PREFIX_ + *((int*) 0) = 1; // If you get at this error, it is because yout + // set a breakpoint in a helpermethod frame epilog + // you can't do that unfortunately. Just move it + // into the interior of the method to fix it +#endif // !_PREFIX_ + goto done; +#endif //!DACCESS_COMPILE + + case 0xD0: // shl REG16, 1 + case 0xD1: // shl REG32, 1 + if (0xE4 == ip[1] || 0xE5 == ip[1]) // shl, ESP, 1 or shl EBP, 1 + goto badOpcode; // Doesn't look like valid code + ip += 2; + break; + + case 0xC1: // shl REG32, imm8 + if (0xE4 == ip[1] || 0xE5 == ip[1]) // shl, ESP, imm8 or shl EBP, imm8 + goto badOpcode; // Doesn't look like valid code + ip += 3; + break; + + case 0xD9: // single prefix + if (0xEE == ip[1]) + { + ip += 2; // FLDZ + break; + } + // + // INTENTIONAL FALL THRU + // + case 0xDD: // double prefix + if ((ip[1] & 0xC0) != 0xC0) + { + datasize = 0; // floatop r/m + goto decodeRM; + } + else + { + goto badOpcode; + } + break; + + case 0xf2: // repne prefix + case 0xF3: // rep prefix + ip += 1; + break; + + case 0xA4: // MOVS byte + case 0xA5: // MOVS word/dword + ip += 1; + break; + + case 0xA8: //test AL, imm8 + ip += 2; + break; + case 0xA9: //test EAX, imm32 + ip += 5; + break; + case 0xF6: + if ( (ip[1] & 0x38) == 0x00) // TEST r/m8, imm8 + { + datasize = 1; + goto decodeRM; + } + else + { + goto badOpcode; + } + break; + + case 0xF7: + if ( (ip[1] & 0x38) == 0x00) // TEST r/m32, imm32 + { + datasize = b16bit?2:4; + goto decodeRM; + } + else if ((ip[1] & 0xC8) == 0xC8) //neg reg + { + ip += 2; + break; + } + else if ((ip[1] & 0x30) == 0x30) //div eax by mod/rm + { + datasize = 0; + goto decodeRM; + } + else + { + goto badOpcode; + } + break; + +#ifdef __GNUC__ + case 0x2e: + // Group 2 instruction prefix. + if (ip[1] == 0x0f && ip[2] == 0x1f) + { + // Although not the recommended multi-byte sequence for 9-byte + // nops (the suggestion is to use 0x66 as the prefix), this shows + // up in GCC-optimized code. + ip += 2; + datasize = 0; + goto decodeRM; + } + else + { + goto badOpcode; + } + break; +#endif // __GNUC__ + + default: + badOpcode: + _ASSERTE(!"Bad opcode"); + // FIX what to do here? +#ifndef DACCESS_COMPILE +#ifndef _PREFIX_ + *((unsigned __int8**) 0) = ip; // cause an access violation (Free Build assert) +#endif // !_PREFIX_ +#else + DacNotImpl(); +#endif + goto done; + } + } +done: + _ASSERTE(epilogCallRet == 0); + + // At this point the fields in 'frame' coorespond exactly to the register + // state when the the helper returns to its caller. + lazyState->_esp = dac_cast<TADDR>(ESP); +} +#ifdef _PREFAST_ +#pragma warning(pop) +#endif |