diff options
Diffstat (limited to 'src/vm/arm/armsinglestepper.cpp')
-rw-r--r-- | src/vm/arm/armsinglestepper.cpp | 1212 |
1 files changed, 1212 insertions, 0 deletions
diff --git a/src/vm/arm/armsinglestepper.cpp b/src/vm/arm/armsinglestepper.cpp new file mode 100644 index 0000000000..e000959ef9 --- /dev/null +++ b/src/vm/arm/armsinglestepper.cpp @@ -0,0 +1,1212 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. +// + +// +// Emulate hardware single-step on ARM. +// + +#include "common.h" +#include "armsinglestepper.h" + +// +// ITState methods. +// + +ITState::ITState() +{ +#ifdef _DEBUG + m_fValid = false; +#endif +} + +// Must call Get() (or Init()) to initialize this instance from a specific context before calling any other +// (non-static) method. +void ITState::Get(T_CONTEXT *pCtx) +{ + m_bITState = (BYTE)((BitExtract((WORD)pCtx->Cpsr, 15, 10) << 2) | + BitExtract((WORD)(pCtx->Cpsr >> 16), 10, 9)); +#ifdef _DEBUG + m_fValid = true; +#endif +} + +// Must call Init() (or Get()) to initialize this instance from a raw byte value before calling any other +// (non-static) method. +void ITState::Init(BYTE bState) +{ + m_bITState = bState; +#ifdef _DEBUG + m_fValid = true; +#endif +} + +// Does the current IT state indicate we're executing within an IT block? +bool ITState::InITBlock() +{ + _ASSERTE(m_fValid); + return (m_bITState & 0x1f) != 0; +} + +// Only valid within an IT block. Returns the condition code which will be evaluated for the current +// instruction. +DWORD ITState::CurrentCondition() +{ + _ASSERTE(m_fValid); + _ASSERTE(InITBlock()); + return BitExtract(m_bITState, 7, 4); +} + +// Transition the IT state to that for the next instruction. +void ITState::Advance() +{ + _ASSERTE(m_fValid); + if ((m_bITState & 0x7) == 0) + m_bITState = 0; + else + m_bITState = (m_bITState & 0xe0) | ((m_bITState << 1) & 0x1f); +} + +// Write the current IT state back into the given context. +void ITState::Set(T_CONTEXT *pCtx) +{ + _ASSERTE(m_fValid); + + Clear(pCtx); + pCtx->Cpsr |= BitExtract(m_bITState, 1, 0) << 25; + pCtx->Cpsr |= BitExtract(m_bITState, 7, 2) << 10; +} + +// Clear IT state (i.e. force execution to be outside of an IT block) in the given context. +/* static */ void ITState::Clear(T_CONTEXT *pCtx) +{ + pCtx->Cpsr &= 0xf9ff03ff; +} + +// +// ArmSingleStepper methods. +// +ArmSingleStepper::ArmSingleStepper() + : m_originalPc(0), m_targetPc(0), m_rgCode(0), m_state(Disabled), + m_fEmulatedITInstruction(false), m_fRedirectedPc(false), m_fEmulate(false), m_fBypass(false), m_fSkipIT(false) +{ + m_opcodes[0] = 0; + m_opcodes[1] = 0; +} + +ArmSingleStepper::~ArmSingleStepper() +{ +#if !defined(DACCESS_COMPILE) && !defined(FEATURE_PAL) + DeleteExecutable(m_rgCode); +#endif +} + +void ArmSingleStepper::Init() +{ +#if !defined(DACCESS_COMPILE) && !defined(FEATURE_PAL) + if (m_rgCode == NULL) + { + m_rgCode = new (executable) WORD[kMaxCodeBuffer]; + } +#endif +} + +// Given the context with which a thread will be resumed, modify that context such that resuming the thread +// will execute a single instruction before raising an EXCEPTION_BREAKPOINT. The thread context must be +// cleaned up via the Fixup method below before any further exception processing can occur (at which point the +// caller can behave as though EXCEPTION_SINGLE_STEP was raised). +void ArmSingleStepper::Enable() +{ + _ASSERTE(m_state != Applied); + + if (m_state == Enabled) + { + // We allow single-stepping to be enabled multiple times before the thread is resumed, but we require + // that the thread state is the same in all cases (i.e. additional step requests are treated as + // no-ops). + _ASSERTE(!m_fBypass); + _ASSERTE(m_opcodes[0] == 0); + _ASSERTE(m_opcodes[1] == 0); + + return; + } + + LOG((LF_CORDB, LL_INFO100000, "ArmSingleStepper::Enable\n")); + + m_fBypass = false; + m_opcodes[0] = 0; + m_opcodes[1] = 0; + m_state = Enabled; +} + +void ArmSingleStepper::Bypass(DWORD ip, WORD opcode1, WORD opcode2) +{ + _ASSERTE(m_state != Applied); + + if (m_state == Enabled) + { + // We allow single-stepping to be enabled multiple times before the thread is resumed, but we require + // that the thread state is the same in all cases (i.e. additional step requests are treated as + // no-ops). + if (m_fBypass) + { + _ASSERTE(m_opcodes[0] == opcode1); + _ASSERTE(m_opcodes[1] == opcode2); + _ASSERTE(m_originalPc == ip); + return; + } + } + + + LOG((LF_CORDB, LL_INFO100000, "ArmSingleStepper::Bypass(pc=%x, opcode=%x %x)\n", (DWORD)ip, (DWORD)opcode1, (DWORD)opcode2)); + + m_fBypass = true; + m_originalPc = ip; + m_opcodes[0] = opcode1; + m_opcodes[1] = opcode2; + m_state = Enabled; +} + +void ArmSingleStepper::Apply(T_CONTEXT *pCtx) +{ + if (m_rgCode == NULL) + { + Init(); + + // OOM. We will simply ignore the single step. + if (m_rgCode == NULL) + return; + } + + _ASSERTE(pCtx != NULL); + + if (!m_fBypass) + { + DWORD pc = ((DWORD)pCtx->Pc) & ~THUMB_CODE; + m_opcodes[0] = *(WORD*)pc; + if (Is32BitInstruction( m_opcodes[0])) + m_opcodes[1] = *(WORD*)(pc+2); + } + + WORD opcode1 = m_opcodes[0]; + WORD opcode2 = m_opcodes[1]; + + LOG((LF_CORDB, LL_INFO100000, "ArmSingleStepper::Apply(pc=%x, opcode=%x %x)\n", + (DWORD)pCtx->Pc, (DWORD)opcode1, (DWORD)opcode2)); + +#ifdef _DEBUG + // Make sure that we aren't trying to step through our own buffer. If this asserts, something is horribly + // wrong with the debugging layer. Likely GetManagedStoppedCtx is retrieving a Pc that points to our + // buffer, even though the single stepper is disabled. + DWORD codestart = (DWORD)(DWORD_PTR)m_rgCode; + DWORD codeend = codestart + (kMaxCodeBuffer * sizeof(WORD)); + _ASSERTE((pCtx->Pc < codestart) || (pCtx->Pc >= codeend)); +#endif + + // All stepping is simulated using a breakpoint instruction. Since other threads are not suspended while + // we step, we avoid race conditions and other complexity by redirecting the thread into a thread-local + // execution buffer. We can either copy the instruction we wish to step over into the buffer followed by a + // breakpoint or we can emulate the instruction (which is useful for instruction that depend on the value + // of the PC or that branch or call to an alternate location). Even in the emulation case we still + // redirect execution into the buffer and insert a breakpoint; this simplifies our interface since the + // rest of the runtime is not set up to expect single stepping to occur inline. Instead there is always a + // 1:1 relationship between setting the single-step mode and receiving an exception once the thread is + // restarted. + // + // There are two parts to the emulation: + // 1) In this method we either emulate the instruction (updating the input thread context as a result) or + // copy the single instruction into the execution buffer. In both cases we copy a breakpoint into the + // execution buffer as well then update the thread context to redirect execution into this buffer. + // 2) In the runtime's first chance vectored exception handler we perform the necessary fixups to make + // the exception look like the result of a single step. This includes resetting the PC to its correct + // value (either the instruction following the stepped instruction or the target PC cached in this + // object when we emulated an instruction that alters the PC). It also involves switching + // EXCEPTION_BREAKPOINT to EXCEPTION_SINGLE_STEP. + // + // If we encounter an exception while emulating an instruction (currently this can only happen if we A/V + // trying to read a value from memory) then we abandon emulation and fall back to the copy instruction + // mechanism. When we run the execution buffer the exception should be raised and handled as normal (we + // still peform context fixup in this case but we don't attempt to alter any exception code other than + // EXCEPTION_BREAKPOINT to EXCEPTION_SINGLE_STEP). There is a very small timing window here where another + // thread could alter memory protections to avoid the A/V when we run the instruction for real but the + // liklihood of this happening (in managed code no less) is judged sufficiently small that it's not worth + // the alternate solution (where we'd have to set the thread up to raise an exception with exactly the + // right thread context). + // + // Matters are complicated by the ARM IT instruction (upto four following instructions are executed + // conditionally based on a single condition or its negation). The issues are that the current instruction + // may be rendered into a no-op or that a breakpoint immediately following the current instruction may not + // be executed. To simplify matters we may modify the IT state to force our instructions to execute. We + // cache the real state and re-apply it along with the rest of our fixups when handling the breakpoint + // exception. Note that when executing general instructions we can't simply disable any IT state since + // many instructions alter their behavior depending on whether they're executing within an IT block + // (mostly it's used to determine whether these instructions set condition flags or not). + + // Cache thread's initial PC and IT state since we'll overwrite them as part of the emulation and we need + // to get back to the correct values at fixup time. We also cache a target PC (set below) since some + // instructions will set the PC directly or otherwise make it difficult for us to compute the final PC + // from the original. We still need the original PC however since this is the one we'll use if an + // exception (other than a breakpoint) occurs. + _ASSERTE(!m_fBypass || (m_originalPc == pCtx->Pc)); + + m_originalPc = pCtx->Pc; + m_originalITState.Get(pCtx); + + // By default assume the next PC is right after the current instruction. + m_targetPc = m_originalPc + (Is32BitInstruction(opcode1) ? 4 : 2); + m_fEmulate = false; + + // One more special case: if we attempt to single-step over an IT instruction it's easier to emulate this, + // set the new IT state in m_originalITState and set a special flag that lets Fixup() know we don't need + // to advance the state (this only works because we know IT will never raise an exception so we don't need + // m_originalITState to store the real original IT state, though in truth a legal IT instruction cannot be + // executed inside an IT block anyway). This flag (and m_originalITState) will be set inside TryEmulate() + // as needed. + m_fEmulatedITInstruction = false; + m_fSkipIT = false; + + // There are three different scenarios we must deal with (listed in priority order). In all cases we will + // redirect the thread to execute code from our buffer and end by raising a breakpoint exception: + // 1) We're executing in an IT block and the current instruction doesn't meet the condition requirements. + // We leave the state unchanged and in fixup will advance the PC to the next instruction slot. + // 2) The current instruction either takes the PC as an input or modifies the PC in a non-trivial manner. + // We can't easily run these instructions from the redirect buffer so we emulate their effect (i.e. + // update the current context in the same way as executing the instruction would). The breakpoint + // fixup logic will restore the PC to the real resultant PC we cache in m_targetPc. + // 3) For all other cases (including emulation cases where we aborted due to a memory fault) we copy the + // single instruction into the redirect buffer for execution followed by a breakpoint (once we regain + // control in the breakpoint fixup logic we can then reset the PC to its proper location. + + DWORD idxNextInstruction = 0; + + if (m_originalITState.InITBlock() && !ConditionHolds(pCtx, m_originalITState.CurrentCondition())) + { + LOG((LF_CORDB, LL_INFO100000, "ArmSingleStepper: Case 1: ITState::Clear;\n")); + // Case 1: The current instruction is a no-op because due to the IT instruction. We've already set the + // target PC to the next instruction slot. Disable the IT block since we want our breakpoint + // to execute. We'll put the correct value back during fixup. + ITState::Clear(pCtx); + m_fSkipIT = true; + m_rgCode[idxNextInstruction++] = kBreakpointOp; + } + else if (TryEmulate(pCtx, opcode1, opcode2, false)) + { + LOG((LF_CORDB, LL_INFO100000, "ArmSingleStepper: Case 2: Emulate\n")); + // Case 2: Successfully emulated an instruction that reads or writes the PC. Cache the new target PC + // so upon fixup we'll resume execution there rather than the following instruction. No need + // to mess with IT state since we know the next instruction is scheduled to execute (we dealt + // with the case where it wasn't above) and we're going to execute a breakpoint in that slot. + m_targetPc = pCtx->Pc; + m_fEmulate = true; + + // Set breakpoints to stop the execution. This will get us right back here. + m_rgCode[idxNextInstruction++] = kBreakpointOp; + m_rgCode[idxNextInstruction++] = kBreakpointOp; + } + else + { + LOG((LF_CORDB, LL_INFO100000, "ArmSingleStepper: Case 3: CopyInstruction. Is32Bit=%d\n", (DWORD)Is32BitInstruction(opcode1))); + // Case 3: In all other cases copy the instruction to the buffer and we'll run it directly. If we're + // in an IT block there could be up to three instructions following this one whose execution + // is skipped. We could try to be clever here and either alter IT state to force the next + // instruction to execute or calculate the how many filler instructions we need to insert + // before we're guaranteed our breakpoint will be respected. But it's easier to just insert + // three additional breakpoints here (code below will add the fourth) and that way we'll + // guarantee one of them will be hit (we don't care which one -- the fixup code will update + // the PC and IT state to make it look as though the CPU just executed the current + // instruction). + m_rgCode[idxNextInstruction++] = opcode1; + if (Is32BitInstruction(opcode1)) + m_rgCode[idxNextInstruction++] = opcode2; + + m_rgCode[idxNextInstruction++] = kBreakpointOp; + m_rgCode[idxNextInstruction++] = kBreakpointOp; + m_rgCode[idxNextInstruction++] = kBreakpointOp; + } + + // Always terminate the redirection buffer with a breakpoint. + m_rgCode[idxNextInstruction++] = kBreakpointOp; + _ASSERTE(idxNextInstruction <= kMaxCodeBuffer); + + // Set the thread up so it will redirect to our buffer when execution resumes. + pCtx->Pc = ((DWORD)(DWORD_PTR)m_rgCode) | THUMB_CODE; + + // Make sure the CPU sees the updated contents of the buffer. + FlushInstructionCache(GetCurrentProcess(), m_rgCode, sizeof(m_rgCode)); + + // Done, set the state. + m_state = Applied; +} + +void ArmSingleStepper::Disable() +{ + _ASSERTE(m_state != Applied); + m_state = Disabled; +} + +// When called in response to an exception (preferably in a first chance vectored handler before anyone else +// has looked at the thread context) this method will (a) determine whether this exception was raised by a +// call to Enable() above, in which case true will be returned and (b) perform final fixup of the thread +// context passed in to complete the emulation of a hardware single step. Note that this routine must be +// called even if the exception code is not EXCEPTION_BREAKPOINT since the instruction stepped might have +// raised its own exception (e.g. A/V) and we still need to fix the thread context in this case. +bool ArmSingleStepper::Fixup(T_CONTEXT *pCtx, DWORD dwExceptionCode) +{ +#ifdef _DEBUG + DWORD codestart = (DWORD)(DWORD_PTR)m_rgCode; + DWORD codeend = codestart + (kMaxCodeBuffer * sizeof(WORD)); +#endif + + // If we reach fixup, we should either be Disabled or Applied. If we reach here with Enabled it means + // that the debugging layer Enabled the single stepper, but we never applied it to a CONTEXT. + _ASSERTE(m_state != Enabled); + + // Nothing to do if the stepper is disabled on this thread. + if (m_state == Disabled) + { + // We better not be inside our internal code buffer though. + _ASSERTE((pCtx->Pc < codestart) || (pCtx->Pc >= codeend)); + return false; + } + + // Turn off the single stepper after we have executed one instruction. + m_state = Disabled; + + // We should always have a PC somewhere in our redirect buffer. +#ifdef _DEBUG + _ASSERTE((pCtx->Pc >= codestart) && (pCtx->Pc < codeend)); +#endif + + if (dwExceptionCode == EXCEPTION_BREAKPOINT) + { + // The single step went as planned. Set the PC back to its real value (either following the + // instruction we stepped or the computed destination we cached after emulating an instruction that + // modifies the PC). Advance the IT state from the value we cached before the single step (unless we + // stepped an IT instruction itself, in which case m_originalITState holds the new state and we should + // just set that). + if (!m_fEmulate) + { + if (m_rgCode[0] != kBreakpointOp) + { + LOG((LF_CORDB, LL_INFO100000, "ArmSingleStepper::Fixup executed code, ip = %x\n", m_targetPc)); + + pCtx->Pc = m_targetPc; + if (!m_fEmulatedITInstruction) + m_originalITState.Advance(); + + m_originalITState.Set(pCtx); + } + else + { + if (m_fSkipIT) + { + // We needed to skip over an instruction due to a false condition in an IT block. + LOG((LF_CORDB, LL_INFO100000, "ArmSingleStepper::Fixup skipped instruction due to IT\n")); + pCtx->Pc = m_targetPc; + + _ASSERTE(!m_fEmulatedITInstruction); + m_originalITState.Advance(); + m_originalITState.Set(pCtx); + } + else + { + // We've hit a breakpoint in the code stream. We will return false here (which causes us to NOT + // replace the breakpoint code with single step), and place the Pc back to the original Pc. The + // debugger patch skipping code will move past this breakpoint. + LOG((LF_CORDB, LL_INFO100000, "ArmSingleStepper::Fixup emulated breakpoint\n")); + pCtx->Pc = m_originalPc; + + _ASSERTE(pCtx->Pc & THUMB_CODE); + return false; + } + } + } + else + { + bool res = TryEmulate(pCtx, m_opcodes[0], m_opcodes[1], true); + _ASSERTE(res); // We should always successfully emulate since we ran it through TryEmulate already. + + if (!m_fRedirectedPc) + pCtx->Pc = m_targetPc; + + LOG((LF_CORDB, LL_INFO100000, "ArmSingleStepper::Fixup emulated, ip = %x\n", pCtx->Pc)); + } + } + else + { + // The stepped instruction caused an exception. Reset the PC and IT state to their original values we + // cached before stepping. (We should never seen this when stepping an IT instruction which overwrites + // m_originalITState). + _ASSERTE(!m_fEmulatedITInstruction); + _ASSERTE(m_fEmulate == false); + pCtx->Pc = m_originalPc; + m_originalITState.Set(pCtx); + + LOG((LF_CORDB, LL_INFO100000, "ArmSingleStepper::Fixup hit exception pc = %x ex = %x\n", pCtx->Pc, dwExceptionCode)); + } + + _ASSERTE(pCtx->Pc & THUMB_CODE); + return true; +} + +// Count the number of bits set in a DWORD. +DWORD ArmSingleStepper::BitCount(DWORD dwValue) +{ + // There are faster implementations but speed isn't critical here. + DWORD cBits = 0; + while (dwValue) + { + cBits += dwValue & 1; + dwValue >>= 1; + } + return cBits; +} + +// Return true if the given condition (C, N, Z or V) holds in the current context. +#define GET_FLAG(pCtx, _flag) \ + ((pCtx->Cpsr & (1 << APSR_##_flag)) != 0) + +// Returns true if the current context indicates the ARM condition specified holds. +bool ArmSingleStepper::ConditionHolds(T_CONTEXT *pCtx, DWORD cond) +{ + switch (cond) + { + case 0: // EQ (Z==1) + return GET_FLAG(pCtx, Z); + case 1: // NE (Z==0) + return !GET_FLAG(pCtx, Z); + case 2: // CS (C==1) + return GET_FLAG(pCtx, C); + case 3: // CC (C==0) + return !GET_FLAG(pCtx, C); + case 4: // MI (N==1) + return GET_FLAG(pCtx, N); + case 5: // PL (N==0) + return !GET_FLAG(pCtx, N); + case 6: // VS (V==1) + return GET_FLAG(pCtx, V); + case 7: // VC (V==0) + return !GET_FLAG(pCtx, V); + case 8: // HI (C==1 && Z==0) + return GET_FLAG(pCtx, C) && !GET_FLAG(pCtx, Z); + case 9: // LS (C==0 || Z==1) + return !GET_FLAG(pCtx, C) || GET_FLAG(pCtx, Z); + case 10: // GE (N==V) + return GET_FLAG(pCtx, N) == GET_FLAG(pCtx, V); + case 11: // LT (N!=V) + return GET_FLAG(pCtx, N) != GET_FLAG(pCtx, V); + case 12: // GT (Z==0 && N==V) + return !GET_FLAG(pCtx, Z) && (GET_FLAG(pCtx, N) == GET_FLAG(pCtx, V)); + case 13: // LE (Z==1 || N!=V) + return GET_FLAG(pCtx, Z) || (GET_FLAG(pCtx, N) != GET_FLAG(pCtx, V)); + case 14: // AL + return true; + case 15: + _ASSERTE(!"Unsupported condition code: 15"); + return false; + default: +// UNREACHABLE(); + return false; + } +} + +// Get the current value of a register. PC (register 15) is always reported as the current instruction PC + 4 +// as per the ARM architecture. +DWORD ArmSingleStepper::GetReg(T_CONTEXT *pCtx, DWORD reg) +{ + _ASSERTE(reg <= 15); + + if (reg == 15) + return (m_originalPc + 4) & ~THUMB_CODE; + + return (&pCtx->R0)[reg]; +} + +// Set the current value of a register. If the PC (register 15) is set then m_fRedirectedPc is set to true. +void ArmSingleStepper::SetReg(T_CONTEXT *pCtx, DWORD reg, DWORD value) +{ + _ASSERTE(reg <= 15); + + if (reg == 15) + { + value |= THUMB_CODE; + m_fRedirectedPc = true; + } + + (&pCtx->R0)[reg] = value; +} + +// Attempt to read a 1, 2 or 4 byte value from memory, zero or sign extend it to a 4-byte value and place that +// value into the buffer pointed at by pdwResult. Returns false if attempting to read the location caused a +// fault. +bool ArmSingleStepper::GetMem(DWORD *pdwResult, DWORD_PTR pAddress, DWORD cbSize, bool fSignExtend) +{ + struct Param + { + DWORD *pdwResult; + DWORD_PTR pAddress; + DWORD cbSize; + bool fSignExtend; + bool bReturnValue; + } param; + + param.pdwResult = pdwResult; + param.pAddress = pAddress; + param.cbSize = cbSize; + param.fSignExtend = fSignExtend; + param.bReturnValue = true; + + PAL_TRY(Param *, pParam, ¶m) + { + switch (pParam->cbSize) + { + case 1: + *pParam->pdwResult = *(BYTE*)pParam->pAddress; + if (pParam->fSignExtend && (*pParam->pdwResult & 0x00000080)) + *pParam->pdwResult |= 0xffffff00; + break; + case 2: + *pParam->pdwResult = *(WORD*)pParam->pAddress; + if (pParam->fSignExtend && (*pParam->pdwResult & 0x00008000)) + *pParam->pdwResult |= 0xffff0000; + break; + case 4: + *pParam->pdwResult = *(DWORD*)pParam->pAddress; + break; + default: + UNREACHABLE(); + pParam->bReturnValue = false; + } + } + PAL_EXCEPT(EXCEPTION_EXECUTE_HANDLER) + { + param.bReturnValue = false; + } + PAL_ENDTRY; + + return param.bReturnValue; +} + +// Wrapper around GetMem above that will automatically return from TryEmulate() indicating the instruction +// could not be emulated if we try to read memory and fail due to an exception. This logic works (i.e. we can +// simply return without worrying whether we've already updated the thread context) due to the fact that we +// either (a) read memory before updating any registers (the various LDR literal variants) or (b) update the +// register list before the base register in LDM-like operations (and this should therefore be an idempotent +// operation when we re-execute the instruction). If this ever changes we will have to store a copy of the +// original context we can use to revert changes (it gets even more complex if we ever have to emulate an +// instruction that writes memory). +#define GET_MEM(_result, _addr, _size, _signextend) \ + do { \ + if (!GetMem((_result), (_addr), (_size), (_signextend))) \ + return false; \ + } while (false) + +// Implements the various LDM-style multi-register load instructions (these include POP). +#define LDM(ctx, _base, _registerlist, _writeback, _ia) \ + do { \ + DWORD _pAddr = GetReg(ctx, _base); \ + if (!(_ia)) \ + _pAddr -= BitCount(_registerlist) * sizeof(void*); \ + DWORD _pStartAddr = _pAddr; \ + for (DWORD _i = 0; _i < 16; _i++) \ + { \ + if ((_registerlist) & (1 << _i)) \ + { \ + DWORD _tmpresult; \ + GET_MEM(&_tmpresult, _pAddr, 4, false); \ + SetReg(ctx, _i, _tmpresult); \ + _pAddr += sizeof(void*); \ + } \ + } \ + if (_writeback) \ + SetReg(ctx, _base, (_ia) ? _pAddr : _pStartAddr); \ + } while (false) + +// Parse the instruction whose first word is given in opcode1 (if the instruction is 32-bit TryEmulate will +// fetch the second word using the value of the PC stored in the current context). If the instruction reads or +// writes the PC or is the IT instruction then it will be emulated by updating the thread context +// appropriately and true will be returned. If the instruction is not one of those cases (or it is but we +// faulted trying to read memory during the emulation) no state is updated and false is returned instead. +bool ArmSingleStepper::TryEmulate(T_CONTEXT *pCtx, WORD opcode1, WORD opcode2, bool execute) +{ + LOG((LF_CORDB, LL_INFO100000, "ArmSingleStepper::TryEmulate(opcode=%x %x, execute=%s)\n", (DWORD)opcode1, (DWORD)opcode2, execute ? "true" : "false")); + + // Track whether instruction emulation wrote a modified PC. + m_fRedirectedPc = false; + + // Track whether we successfully emulated an instruction. If we did and we didn't modify the PC (e.g. a + // ADR instruction or a conditional branch not taken) then we'll need to explicitly set the PC to the next + // instruction (since our caller expects that whenever we return true m_pCtx->Pc holds the next + // instruction address). + bool fEmulated = false; + + if (Is32BitInstruction(opcode1)) + { + if (((opcode1 & 0xfbff) == 0xf2af) && + ((opcode2 & 0x8000) == 0x0000)) + { + // ADR.W : T2 + if (execute) + { + DWORD Rd = BitExtract(opcode2, 11, 8); + DWORD i = BitExtract(opcode1, 10, 10); + DWORD imm3 = BitExtract(opcode2, 14, 12); + DWORD imm8 = BitExtract(opcode2, 7, 0); + + SetReg(pCtx, Rd, (GetReg(pCtx, 15) & ~3) - ((i << 11) | (imm3 << 8) | imm8)); + } + + fEmulated = true; + } + else if (((opcode1 & 0xfbff) == 0xf20f) && + ((opcode2 & 0x8000) == 0x0000)) + { + // ADR.W : T3 + if (execute) + { + DWORD Rd = BitExtract(opcode2, 11, 8); + DWORD i = BitExtract(opcode1, 10, 10); + DWORD imm3 = BitExtract(opcode2, 14, 12); + DWORD imm8 = BitExtract(opcode2, 7, 0); + + SetReg(pCtx, Rd, (GetReg(pCtx, 15) & ~3) + ((i << 11) | (imm3 << 8) | imm8)); + } + + fEmulated = true; + } + else if (((opcode1 & 0xf800) == 0xf000) && + ((opcode2 & 0xd000) == 0x8000) && + ((opcode1 & 0x0380) != 0x0380)) + { + // B.W : T3 + if (execute) + { + DWORD S = BitExtract(opcode1, 10, 10); + DWORD cond = BitExtract(opcode1, 9, 6); + DWORD imm6 = BitExtract(opcode1, 5, 0); + DWORD J1 = BitExtract(opcode2, 13, 13); + DWORD J2 = BitExtract(opcode2, 11, 11); + DWORD imm11 = BitExtract(opcode2, 10, 0); + + if (ConditionHolds(pCtx, cond) && execute) + { + DWORD disp = (S ? 0xfff00000 : 0) | (J2 << 19) | (J1 << 18) | (imm6 << 12) | (imm11 << 1); + SetReg(pCtx, 15, GetReg(pCtx, 15) + disp); + } + } + + fEmulated = true; + } + else if (((opcode1 & 0xf800) == 0xf000) && + ((opcode2 & 0xd000) == 0x9000)) + { + // B.W : T4 + if (execute) + { + DWORD S = BitExtract(opcode1, 10, 10); + DWORD imm10 = BitExtract(opcode1, 9, 0); + DWORD J1 = BitExtract(opcode2, 13, 13); + DWORD J2 = BitExtract(opcode2, 11, 11); + DWORD imm11 = BitExtract(opcode2, 10, 0); + + DWORD I1 = (J1 ^ S) ^ 1; + DWORD I2 = (J2 ^ S) ^ 1; + + DWORD disp = (S ? 0xff000000 : 0) | (I1 << 23) | (I2 << 22) | (imm10 << 12) | (imm11 << 1); + SetReg(pCtx, 15, GetReg(pCtx, 15) + disp); + } + + fEmulated = true; + } + else if (((opcode1 & 0xf800) == 0xf000) && + ((opcode2 & 0xd000) == 0xd000)) + { + // BL (immediate) : T1 + if (execute) + { + DWORD S = BitExtract(opcode1, 10, 10); + DWORD imm10 = BitExtract(opcode1, 9, 0); + DWORD J1 = BitExtract(opcode2, 13, 13); + DWORD J2 = BitExtract(opcode2, 11, 11); + DWORD imm11 = BitExtract(opcode2, 10, 0); + + DWORD I1 = (J1 ^ S) ^ 1; + DWORD I2 = (J2 ^ S) ^ 1; + + SetReg(pCtx, 14, GetReg(pCtx, 15) | 1); + + DWORD disp = (S ? 0xff000000 : 0) | (I1 << 23) | (I2 << 22) | (imm10 << 12) | (imm11 << 1); + SetReg(pCtx, 15, GetReg(pCtx, 15) + disp); + } + + fEmulated = true; + } + else if (((opcode1 & 0xffd0) == 0xe890) && + ((opcode2 & 0x2000) == 0x0000)) + { + // LDM.W : T2, POP.W : T2 + if (execute) + { + DWORD W = BitExtract(opcode1, 5, 5); + DWORD Rn = BitExtract(opcode1, 3, 0); + DWORD registerList = opcode2; + + LDM(pCtx, Rn, registerList, W, true); + fEmulated = true; + } + else + { + // We should only emulate this instruction if Pc is set + if (opcode2 & (1<<15)) + fEmulated = true; + } + } + else if (((opcode1 & 0xffd0) == 0xe410) && + ((opcode2 & 0x2000) == 0x0000)) + { + // LDMDB : T1 + if (execute) + { + DWORD W = BitExtract(opcode1, 5, 5); + DWORD Rn = BitExtract(opcode1, 3, 0); + DWORD registerList = opcode2; + + LDM(pCtx, Rn, registerList, W, false); + fEmulated = true; + } + else + { + // We should only emulate this instruction if Pc is set + if (opcode2 & (1<<15)) + fEmulated = true; + } + } + else if (((opcode1 & 0xfff0) == 0xf8d0) && + ((opcode1 & 0x000f) != 0x000f)) + { + // LDR.W (immediate): T3 + DWORD Rt = BitExtract(opcode2, 15, 12); + DWORD Rn = BitExtract(opcode1, 3, 0); + if (execute) + { + DWORD imm12 = BitExtract(opcode2, 11, 0); + + DWORD value; + GET_MEM(&value, GetReg(pCtx, Rn) + imm12, 4, false); + + SetReg(pCtx, Rt, value); + fEmulated = true; + } + else + { + // We should only emulate this instruction if Pc is used + if (Rt == 15 || Rn == 15) + fEmulated = true; + } + } + else if (((opcode1 & 0xfff0) == 0xf850) && + ((opcode2 & 0x0800) == 0x0800) && + ((opcode1 & 0x000f) != 0x000f)) + { + // LDR (immediate) : T4, POP : T3 + DWORD Rn = BitExtract(opcode1, 3, 0); + DWORD Rt = BitExtract(opcode2, 15, 12); + if (execute) + { + DWORD P = BitExtract(opcode2, 10, 10); + DWORD U = BitExtract(opcode2, 9, 9); + DWORD W = BitExtract(opcode2, 8, 8); + DWORD imm8 = BitExtract(opcode2, 7, 0); + + DWORD offset_addr = U ? GetReg(pCtx, Rn) + imm8 : GetReg(pCtx, Rn) - imm8; + DWORD addr = P ? offset_addr : GetReg(pCtx, Rn); + + DWORD value; + GET_MEM(&value, addr, 4, false); + + if (W) + SetReg(pCtx, Rn, offset_addr); + + SetReg(pCtx, Rt, value); + fEmulated = true; + } + else + { + // We should only emulate this instruction if Pc is used + if (Rt == 15 || Rn == 15) + fEmulated = true; + } + } + else if (((opcode1 & 0xff7f) == 0xf85f)) + { + // LDR.W (literal) : T2 + DWORD Rt = BitExtract(opcode2, 15, 12); + if (execute) + { + DWORD U = BitExtract(opcode1, 7, 7); + DWORD imm12 = BitExtract(opcode2, 11, 0); + + // This instruction always reads relative to R15/PC + DWORD addr = GetReg(pCtx, 15) & ~3; + addr = U ? addr + imm12 : addr - imm12; + + DWORD value; + GET_MEM(&value, addr, 4, false); + + SetReg(pCtx, Rt, value); + } + + // We should ALWAYS emulate this instruction, because this instruction + // always reads the memory relative to PC + fEmulated = true; + } + else if (((opcode1 & 0xfff0) == 0xf850) && + ((opcode2 & 0x0fc0) == 0x0000) && + ((opcode1 & 0x000f) != 0x000f)) + { + // LDR.W : T2 + DWORD Rn = BitExtract(opcode1, 3, 0); + DWORD Rt = BitExtract(opcode2, 15, 12); + DWORD Rm = BitExtract(opcode2, 3, 0); + if (execute) + { + DWORD imm2 = BitExtract(opcode2, 5, 4); + DWORD addr = GetReg(pCtx, Rn) + (GetReg(pCtx, Rm) << imm2); + + DWORD value; + GET_MEM(&value, addr, 4, false); + + SetReg(pCtx, Rt, value); + fEmulated = true; + } + else + { + // We should only emulate this instruction if Pc is used + if (Rt == 15 || Rn == 15 || Rm == 15) + fEmulated = true; + } + } + else if (((opcode1 & 0xff7f) == 0xf81f) && + ((opcode2 & 0xf000) != 0xf000)) + { + // LDRB (literal) : T2 + if (execute) + { + DWORD U = BitExtract(opcode1, 7, 7); + DWORD Rt = BitExtract(opcode2, 15, 12); + DWORD imm12 = BitExtract(opcode2, 11, 0); + + DWORD addr = (GetReg(pCtx, 15) & ~3); + addr = U ? addr + imm12 : addr - imm12; + + DWORD value; + GET_MEM(&value, addr, 1, false); + + SetReg(pCtx, Rt, value); + } + + fEmulated = true; + } + else if (((opcode1 & 0xfe5f) == 0xe85f) && + ((opcode1 & 0x0120) != 0x0000)) + { + // LDRD (literal) : T1 + if (execute) + { + DWORD U = BitExtract(opcode1, 7, 7); + DWORD Rt = BitExtract(opcode2, 15, 12); + DWORD Rt2 = BitExtract(opcode2, 11, 8); + DWORD imm8 = BitExtract(opcode2, 7, 0); + + DWORD addr = (GetReg(pCtx, 15) & ~3); + addr = U ? addr + (imm8 << 2) : addr - (imm8 << 2); + + DWORD value1; + GET_MEM(&value1, addr, 4, false); + + DWORD value2; + GET_MEM(&value2, addr + 4, 4, false); + + SetReg(pCtx, Rt, value1); + SetReg(pCtx, Rt2, value2); + } + + fEmulated = true; + } + else if (((opcode1 & 0xff7f) == 0xf83f) && + ((opcode2 & 0xf000) != 0xf000)) + { + // LDRH (literal) : T1 + if (execute) + { + DWORD U = BitExtract(opcode1, 7, 7); + DWORD Rt = BitExtract(opcode2, 15, 12); + DWORD imm12 = BitExtract(opcode2, 11, 0); + + DWORD addr = (GetReg(pCtx, 15) & ~3); + addr = U ? addr + imm12 : addr - imm12; + + DWORD value; + GET_MEM(&value, addr, 2, false); + + SetReg(pCtx, Rt, value); + } + + fEmulated = true; + } + else if (((opcode1 & 0xff7f) == 0xf91f) && + ((opcode2 & 0xf000) != 0xf000)) + { + // LDRSB (literal) : T1 + if (execute) + { + DWORD U = BitExtract(opcode1, 7, 7); + DWORD Rt = BitExtract(opcode2, 15, 12); + DWORD imm12 = BitExtract(opcode2, 11, 0); + + DWORD addr = (GetReg(pCtx, 15) & ~3); + addr = U ? addr + imm12 : addr - imm12; + + DWORD value; + GET_MEM(&value, addr, 1, true); + + SetReg(pCtx, Rt, value); + } + + fEmulated = true; + } + else if (((opcode1 & 0xff7f) == 0xf53f) && + ((opcode2 & 0xf000) != 0xf000)) + { + // LDRSH (literal) : T1 + if (execute) + { + DWORD U = BitExtract(opcode1, 7, 7); + DWORD Rt = BitExtract(opcode2, 15, 12); + DWORD imm12 = BitExtract(opcode2, 11, 0); + + DWORD addr = (GetReg(pCtx, 15) & ~3); + addr = U ? addr + imm12 : addr - imm12; + + DWORD value; + GET_MEM(&value, addr, 2, true); + + SetReg(pCtx, Rt, value); + } + + fEmulated = true; + } + else if (((opcode1 & 0xfff0) == 0xe8d0) && + ((opcode2 & 0xffe0) == 0xf000)) + { + // TBB/TBH : T1 + if (execute) + { + DWORD Rn = BitExtract(opcode1, 3, 0); + DWORD H = BitExtract(opcode2, 4, 4); + DWORD Rm = BitExtract(opcode2, 3, 0); + + DWORD addr = GetReg(pCtx, Rn); + + DWORD value; + if (H) + GET_MEM(&value, addr + (GetReg(pCtx, Rm) << 1), 2, false); + else + GET_MEM(&value, addr + GetReg(pCtx, Rm), 1, false); + + SetReg(pCtx, 15, GetReg(pCtx, 15) + (value << 1)); + } + + fEmulated = true; + } + + // If we emulated an instruction but didn't set the PC explicitly we have to do so now (in such cases + // the next PC will always point directly after the instruction we just emulated). + if (fEmulated && !m_fRedirectedPc) + SetReg(pCtx, 15, GetReg(pCtx, 15)); + } + else + { + // Handle 16-bit instructions. + + if ((opcode1 & 0xf800) == 0xa000) + { + // ADR : T1 + if (execute) + { + DWORD Rd = BitExtract(opcode1, 10, 8); + DWORD imm8 = BitExtract(opcode1, 7, 0); + + SetReg(pCtx, Rd, (GetReg(pCtx, 15) & 3) + (imm8 << 2)); + } + + fEmulated = true; + } + else if (((opcode1 & 0xf000) == 0xd000) && ((opcode1 & 0x0f00) != 0x0e00)) + { + // B : T1 + + // We only emulate this instruction if we take the conditional + // jump. If not we'll pass right over the jump and set the + // target IP as normal. + DWORD cond = BitExtract(opcode1, 11, 8); + if (execute) + { + _ASSERTE(ConditionHolds(pCtx, cond)); + + DWORD imm8 = BitExtract(opcode1, 7, 0); + DWORD disp = (imm8 << 1) | ((imm8 & 0x80) ? 0xffffff00 : 0); + + SetReg(pCtx, 15, GetReg(pCtx, 15) + disp); + fEmulated = true; + } + else + { + if (ConditionHolds(pCtx, cond)) + { + fEmulated = true; + } + } + } + else if ((opcode1 & 0xf800) == 0xe000) + { + if (execute) + { + // B : T2 + DWORD imm11 = BitExtract(opcode1, 10, 0); + DWORD disp = (imm11 << 1) | ((imm11 & 0x400) ? 0xfffff000 : 0); + + SetReg(pCtx, 15, GetReg(pCtx, 15) + disp); + } + + fEmulated = true; + } + else if ((opcode1 & 0xff87) == 0x4780) + { + // BLX (register) : T1 + if (execute) + { + DWORD Rm = BitExtract(opcode1, 6, 3); + DWORD addr = GetReg(pCtx, Rm); + + SetReg(pCtx, 14, (GetReg(pCtx, 15) - 2) | 1); + SetReg(pCtx, 15, addr); + } + + fEmulated = true; + } + else if ((opcode1 & 0xff87) == 0x4700) + { + // BX : T1 + if (execute) + { + DWORD Rm = BitExtract(opcode1, 6, 3); + SetReg(pCtx, 15, GetReg(pCtx, Rm)); + } + + fEmulated = true; + } + else if ((opcode1 & 0xf500) == 0xb100) + { + // CBNZ/CBZ : T1 + if (execute) + { + DWORD op = BitExtract(opcode1, 11, 11); + DWORD i = BitExtract(opcode1, 9, 9); + DWORD imm5 = BitExtract(opcode1, 7, 3); + DWORD Rn = BitExtract(opcode1, 2, 0); + + if ((op && (GetReg(pCtx, Rn) != 0)) || + (!op && (GetReg(pCtx, Rn) == 0))) + { + SetReg(pCtx, 15, GetReg(pCtx, 15) + ((i << 6) | (imm5 << 1))); + } + } + + fEmulated = true; + } + else if (((opcode1 & 0xff00) == 0xbf00) && + ((opcode1 & 0x000f) != 0x0000)) + { + // IT : T1 + if (execute) + { + DWORD firstcond = BitExtract(opcode1, 7, 4); + DWORD mask = BitExtract(opcode1, 3, 0); + + // The IT instruction is special. We compute the IT state bits for the CPSR and cache them in + // m_originalITState. We then set m_fEmulatedITInstruction so that Fixup() knows not to advance + // this state (simply write it as-is back into the CPSR). + m_originalITState.Init((BYTE)((firstcond << 4) | mask)); + m_originalITState.Set(pCtx); + m_fEmulatedITInstruction = true; + } + + fEmulated = true; + } + else if ((opcode1 & 0xf800) == 0x4800) + { + // LDR (literal) : T1 + if (execute) + { + DWORD Rt = BitExtract(opcode1, 10, 8); + DWORD imm8 = BitExtract(opcode1, 7, 0); + + DWORD addr = (GetReg(pCtx, 15) & ~3) + (imm8 << 2); + + DWORD value = 0; + GET_MEM(&value, addr, 4, false); + + SetReg(pCtx, Rt, value); + } + + fEmulated = true; + } + else if ((opcode1 & 0xff00) == 0x4600) + { + // MOV (register) : T1 + DWORD D = BitExtract(opcode1, 7, 7); + DWORD Rm = BitExtract(opcode1, 6, 3); + DWORD Rd = (D << 3) | BitExtract(opcode1, 2, 0); + + if (execute) + { + SetReg(pCtx, Rd, GetReg(pCtx, Rm)); + fEmulated = true; + } + else + { + // Only emulate if we change Pc + if (Rm == 15 || Rd == 15) + fEmulated = true; + } + } + else if ((opcode1 & 0xfe00) == 0xbc00) + { + // POP : T1 + DWORD P = BitExtract(opcode1, 8, 8); + DWORD registerList = (P << 15) | BitExtract(opcode1, 7, 0); + if (execute) + { + LDM(pCtx, 13, registerList, true, true); + fEmulated = true; + } + else + { + // Only emulate if Pc is in the register list + if (registerList & (1<<15)) + fEmulated = true; + } + } + + // If we emulated an instruction but didn't set the PC explicitly we have to do so now (in such cases + // the next PC will always point directly after the instruction we just emulated). + if (execute && fEmulated && !m_fRedirectedPc) + SetReg(pCtx, 15, GetReg(pCtx, 15) - 2); + } + + LOG((LF_CORDB, LL_INFO100000, "ArmSingleStepper::TryEmulate(opcode=%x %x) emulated=%s redirectedPc=%s\n", + (DWORD)opcode1, (DWORD)opcode2, fEmulated ? "true" : "false", m_fRedirectedPc ? "true" : "false")); + return fEmulated; +} |