diff options
author | Steve MacLean <Steve.MacLean@microsoft.com> | 2019-07-03 22:29:07 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-07-03 22:29:07 -0400 |
commit | 613f9f233abde4159a85aa8130b7fcb57dc7a4ef (patch) | |
tree | 09aed307f82583475ef52a7e2448c46da8c640db /src/vm | |
parent | ae3430d8ce50c9e954192e89871aff783da375e2 (diff) | |
download | coreclr-613f9f233abde4159a85aa8130b7fcb57dc7a4ef.tar.gz coreclr-613f9f233abde4159a85aa8130b7fcb57dc7a4ef.tar.bz2 coreclr-613f9f233abde4159a85aa8130b7fcb57dc7a4ef.zip |
arm64singlestepper (#25512)
Add single step emulation for arm64 Linux
Add a copy and rework armsinglestepper to arm64singlestepper
Add arm64 emulation of all armv8 user space instructions which read or write PC.
- ADR, ADRP
- Branch instructions: B, BL, B.cond, BR, BLR, RET
- LDR (literal)
* Add FEATURE_EMULATE_SINGLESTEP
* Enable for ARM64 linux
* Debugging fixes
Fix IsSSFlagEnabled bug
Fix opcode type
Fix code buffer asserts
Fix CBZ typo
Fix BitExtract
Fix m_targetPc
Minimize written instructions
Fix comments
Fix Bypass address truncation
Fix false assert
Add additional logging
Use %lx to log addresses
Remove stray LOG
Remove stray assert
Diffstat (limited to 'src/vm')
-rw-r--r-- | src/vm/CMakeLists.txt | 6 | ||||
-rw-r--r-- | src/vm/arm64/arm64singlestepper.cpp | 696 | ||||
-rw-r--r-- | src/vm/arm64singlestepper.h | 112 | ||||
-rw-r--r-- | src/vm/excep.cpp | 4 | ||||
-rw-r--r-- | src/vm/exceptionhandling.cpp | 10 | ||||
-rw-r--r-- | src/vm/threads.h | 26 | ||||
-rw-r--r-- | src/vm/threadsuspend.cpp | 2 |
7 files changed, 841 insertions, 15 deletions
diff --git a/src/vm/CMakeLists.txt b/src/vm/CMakeLists.txt index f341566df0..3a556aff2f 100644 --- a/src/vm/CMakeLists.txt +++ b/src/vm/CMakeLists.txt @@ -852,6 +852,12 @@ elseif(CLR_CMAKE_TARGET_ARCH_ARM64) ${ARCH_SOURCES_DIR}/virtualcallstubcpu.hpp exceptionhandling.h ) + + if(CLR_CMAKE_PLATFORM_UNIX) + list(APPEND VM_SOURCES_DAC_AND_WKS_ARCH + ${ARCH_SOURCES_DIR}/arm64singlestepper.cpp + ) + endif(CLR_CMAKE_PLATFORM_UNIX) endif() if(CLR_CMAKE_PLATFORM_UNIX) diff --git a/src/vm/arm64/arm64singlestepper.cpp b/src/vm/arm64/arm64singlestepper.cpp new file mode 100644 index 0000000000..832e63cb21 --- /dev/null +++ b/src/vm/arm64/arm64singlestepper.cpp @@ -0,0 +1,696 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. +// + +// +// Emulate hardware single-step on ARM. +// + +#include "common.h" +#include "arm64singlestepper.h" + +inline uint64_t SignExtend(uint64_t value, unsigned int signbit) +{ + _ASSERTE(signbit < 64); + + if (signbit == 63) + return value; + + uint64_t sign = value & (1ull << signbit); + + if (sign) + return value | (~0ull << signbit); + else + return value; +} + +inline uint64_t BitExtract(uint64_t value, unsigned int highbit, unsigned int lowbit, bool signExtend = false) +{ + _ASSERTE((highbit < 64) && (lowbit < 64) && (highbit >= lowbit)); + uint64_t extractedValue = (value >> lowbit) & ((1ull << ((highbit - lowbit) + 1)) - 1); + + return signExtend ? SignExtend(extractedValue, highbit - lowbit) : extractedValue; +} + + +// +// Arm64SingleStepper methods. +// +Arm64SingleStepper::Arm64SingleStepper() + : m_originalPc(0), m_targetPc(0), m_rgCode(0), m_state(Disabled), + m_fEmulate(false), m_fBypass(false) +{ + m_opcodes[0] = 0; +} + +Arm64SingleStepper::~Arm64SingleStepper() +{ +#if !defined(DACCESS_COMPILE) +#ifdef FEATURE_PAL + SystemDomain::GetGlobalLoaderAllocator()->GetExecutableHeap()->BackoutMem(m_rgCode, kMaxCodeBuffer * sizeof(uint32_t)); +#else + DeleteExecutable(m_rgCode); +#endif +#endif +} + +void Arm64SingleStepper::Init() +{ +#if !defined(DACCESS_COMPILE) + if (m_rgCode == NULL) + { +#ifdef FEATURE_PAL + m_rgCode = (uint32_t *)(void *)SystemDomain::GetGlobalLoaderAllocator()->GetExecutableHeap()->AllocMem(S_SIZE_T(kMaxCodeBuffer * sizeof(uint32_t))); +#else + m_rgCode = new (executable) uint32_t[kMaxCodeBuffer]; +#endif + } +#endif +} + +// Given the context with which a thread will be resumed, modify that context such that resuming the thread +// will execute a single instruction before raising an EXCEPTION_BREAKPOINT. The thread context must be +// cleaned up via the Fixup method below before any further exception processing can occur (at which point the +// caller can behave as though EXCEPTION_SINGLE_STEP was raised). +void Arm64SingleStepper::Enable() +{ + _ASSERTE(m_state != Applied); + + if (m_state == Enabled) + { + // We allow single-stepping to be enabled multiple times before the thread is resumed, but we require + // that the thread state is the same in all cases (i.e. additional step requests are treated as + // no-ops). + _ASSERTE(!m_fBypass); + _ASSERTE(m_opcodes[0] == 0); + + return; + } + + LOG((LF_CORDB, LL_INFO100000, "Arm64SingleStepper::Enable\n")); + + m_fBypass = false; + m_opcodes[0] = 0; + m_state = Enabled; +} + +void Arm64SingleStepper::Bypass(uint64_t ip, uint32_t opcode) +{ + _ASSERTE(m_state != Applied); + + if (m_state == Enabled) + { + // We allow single-stepping to be enabled multiple times before the thread is resumed, but we require + // that the thread state is the same in all cases (i.e. additional step requests are treated as + // no-ops). + if (m_fBypass) + { + _ASSERTE(m_opcodes[0] == opcode); + _ASSERTE(m_originalPc == ip); + return; + } + } + + + LOG((LF_CORDB, LL_INFO100000, "Arm64SingleStepper::Bypass(pc=%lx, opcode=%x)\n", ip, opcode)); + + m_fBypass = true; + m_originalPc = ip; + m_opcodes[0] = opcode; + m_state = Enabled; +} + +void Arm64SingleStepper::Apply(T_CONTEXT *pCtx) +{ + if (m_rgCode == NULL) + { + Init(); + + // OOM. We will simply ignore the single step. + if (m_rgCode == NULL) + return; + } + + _ASSERTE(pCtx != NULL); + + if (!m_fBypass) + { + uint64_t pc = pCtx->Pc; + m_opcodes[0] = *(uint32_t*)pc; // Opcodes are always in little endian, we only support little endian execution mode + } + + uint32_t opcode = m_opcodes[0]; + + LOG((LF_CORDB, LL_INFO100000, "Arm64SingleStepper::Apply(pc=%lx, opcode=%x)\n", (uint64_t)pCtx->Pc, opcode)); + +#ifdef _DEBUG + // Make sure that we aren't trying to step through our own buffer. If this asserts, something is horribly + // wrong with the debugging layer. Likely GetManagedStoppedCtx is retrieving a Pc that points to our + // buffer, even though the single stepper is disabled. + uint64_t codestart = (uint64_t)m_rgCode; + uint64_t codeend = codestart + (kMaxCodeBuffer * sizeof(uint32_t)); + _ASSERTE((pCtx->Pc < codestart) || (pCtx->Pc >= codeend)); +#endif + + // All stepping is simulated using a breakpoint instruction. Since other threads are not suspended while + // we step, we avoid race conditions and other complexity by redirecting the thread into a thread-local + // execution buffer. We can either copy the instruction we wish to step over into the buffer followed by a + // breakpoint or we can emulate the instruction (which is useful for instruction that depend on the value + // of the PC or that branch or call to an alternate location). Even in the emulation case we still + // redirect execution into the buffer and insert a breakpoint; this simplifies our interface since the + // rest of the runtime is not set up to expect single stepping to occur inline. Instead there is always a + // 1:1 relationship between setting the single-step mode and receiving an exception once the thread is + // restarted. + // + // There are two parts to the emulation: + // 1) In this method we either emulate the instruction (updating the input thread context as a result) or + // copy the single instruction into the execution buffer. In both cases we copy a breakpoint into the + // execution buffer as well then update the thread context to redirect execution into this buffer. + // 2) In the runtime's first chance vectored exception handler we perform the necessary fixups to make + // the exception look like the result of a single step. This includes resetting the PC to its correct + // value (either the instruction following the stepped instruction or the target PC cached in this + // object when we emulated an instruction that alters the PC). It also involves switching + // EXCEPTION_BREAKPOINT to EXCEPTION_SINGLE_STEP. + // + // If we encounter an exception while emulating an instruction (currently this can only happen if we A/V + // trying to read a value from memory) then we abandon emulation and fall back to the copy instruction + // mechanism. When we run the execution buffer the exception should be raised and handled as normal (we + // still peform context fixup in this case but we don't attempt to alter any exception code other than + // EXCEPTION_BREAKPOINT to EXCEPTION_SINGLE_STEP). There is a very small timing window here where another + // thread could alter memory protections to avoid the A/V when we run the instruction for real but the + // liklihood of this happening (in managed code no less) is judged sufficiently small that it's not worth + // the alternate solution (where we'd have to set the thread up to raise an exception with exactly the + // right thread context). + + // Cache thread's initial PC since we'll overwrite them as part of the emulation and we need + // to get back to the correct values at fixup time. We also cache a target PC (set below) since some + // instructions will set the PC directly or otherwise make it difficult for us to compute the final PC + // from the original. We still need the original PC however since this is the one we'll use if an + // exception (other than a breakpoint) occurs. + _ASSERTE((!m_fBypass || (m_originalPc == pCtx->Pc))); + + m_originalPc = pCtx->Pc; + + // By default assume the next PC is right after the current instruction. + m_targetPc = m_originalPc + sizeof(uint32_t); + m_fEmulate = false; + + // There are two different scenarios we must deal with (listed in priority order). In all cases we will + // redirect the thread to execute code from our buffer and end by raising a breakpoint exception: + // 1) The current instruction either takes the PC as an input or modifies the PC. + // We can't easily run these instructions from the redirect buffer so we emulate their effect (i.e. + // update the current context in the same way as executing the instruction would). The breakpoint + // fixup logic will restore the PC to the real resultant PC we cache in m_targetPc. + // 2) For all other cases (including emulation cases where we aborted due to a memory fault) we copy the + // single instruction into the redirect buffer for execution followed by a breakpoint (once we regain + // control in the breakpoint fixup logic we can then reset the PC to its proper location. + + int idxNextInstruction = 0; + + if (TryEmulate(pCtx, opcode, false)) + { + LOG((LF_CORDB, LL_INFO100000, "Arm64SingleStepper: Case 1: Emulate\n")); + // Case 1: Emulate an instruction that reads or writes the PC. + m_fEmulate = true; + } + else + { + LOG((LF_CORDB, LL_INFO100000, "Arm64SingleStepper: Case 2: CopyInstruction.\n")); + // Case 2: In all other cases copy the instruction to the buffer and we'll run it directly. + m_rgCode[idxNextInstruction++] = opcode; + } + + // Always terminate the redirection buffer with a breakpoint. + m_rgCode[idxNextInstruction++] = kBreakpointOp; + _ASSERTE(idxNextInstruction <= kMaxCodeBuffer); + + // Set the thread up so it will redirect to our buffer when execution resumes. + pCtx->Pc = (uint64_t)m_rgCode; + + // Make sure the CPU sees the updated contents of the buffer. + FlushInstructionCache(GetCurrentProcess(), m_rgCode, sizeof(m_rgCode)); + + // Done, set the state. + m_state = Applied; +} + +void Arm64SingleStepper::Disable() +{ + _ASSERTE(m_state != Applied); + m_state = Disabled; +} + +// When called in response to an exception (preferably in a first chance vectored handler before anyone else +// has looked at the thread context) this method will (a) determine whether this exception was raised by a +// call to Enable() above, in which case true will be returned and (b) perform final fixup of the thread +// context passed in to complete the emulation of a hardware single step. Note that this routine must be +// called even if the exception code is not EXCEPTION_BREAKPOINT since the instruction stepped might have +// raised its own exception (e.g. A/V) and we still need to fix the thread context in this case. +bool Arm64SingleStepper::Fixup(T_CONTEXT *pCtx, DWORD dwExceptionCode) +{ +#ifdef _DEBUG + uint64_t codestart = (uint64_t)m_rgCode; + uint64_t codeend = codestart + (kMaxCodeBuffer * sizeof(uint32_t)); +#endif + + // If we reach fixup, we should either be Disabled or Applied. If we reach here with Enabled it means + // that the debugging layer Enabled the single stepper, but we never applied it to a CONTEXT. + _ASSERTE(m_state != Enabled); + + // Nothing to do if the stepper is disabled on this thread. + if (m_state == Disabled) + { + // We better not be inside our internal code buffer though. + _ASSERTE((pCtx->Pc < codestart) || (pCtx->Pc > codeend)); + return false; + } + + // Turn off the single stepper after we have executed one instruction. + m_state = Disabled; + + // We should always have a PC somewhere in our redirect buffer. +#ifdef _DEBUG + _ASSERTE((pCtx->Pc >= codestart) && (pCtx->Pc <= codeend)); +#endif + + if (dwExceptionCode == EXCEPTION_BREAKPOINT) + { + // The single step went as planned. Set the PC back to its real value (either following the + // instruction we stepped or the computed destination we cached after emulating an instruction that + // modifies the PC). + if (!m_fEmulate) + { + if (m_rgCode[0] != kBreakpointOp) + { + LOG((LF_CORDB, LL_INFO100000, "Arm64SingleStepper::Fixup executed code, ip = %lx\n", m_targetPc)); + + pCtx->Pc = m_targetPc; + } + else + { + // We've hit a breakpoint in the code stream. We will return false here (which causes us to NOT + // replace the breakpoint code with single step), and place the Pc back to the original Pc. The + // debugger patch skipping code will move past this breakpoint. + LOG((LF_CORDB, LL_INFO100000, "Arm64SingleStepper::Fixup emulated breakpoint\n")); + pCtx->Pc = m_originalPc; + + _ASSERTE((pCtx->Pc & 0x3) == 0); + return false; + } + } + else + { + bool res = TryEmulate(pCtx, m_opcodes[0], true); + _ASSERTE(res); // We should always successfully emulate since we ran it through TryEmulate already. + + pCtx->Pc = m_targetPc; + + LOG((LF_CORDB, LL_INFO100000, "Arm64SingleStepper::Fixup emulated, ip = %lx\n", pCtx->Pc)); + } + } + else + { + // The stepped instruction caused an exception. Reset the PC to its original values we + // cached before stepping. + _ASSERTE(m_fEmulate == false); + pCtx->Pc = m_originalPc; + + LOG((LF_CORDB, LL_INFO100000, "Arm64SingleStepper::Fixup hit exception pc = %lx ex = %x\n", pCtx->Pc, dwExceptionCode)); + } + + _ASSERTE((pCtx->Pc & 0x3) == 0); + return true; +} + +// Return true if the given condition (C, N, Z or V) holds in the current context. +#define GET_FLAG(pCtx, _flag) \ + ((pCtx->Cpsr & (1 << APSR_##_flag)) != 0) + +// Returns true if the current context indicates the ARM condition specified holds. +bool Arm64SingleStepper::ConditionHolds(T_CONTEXT *pCtx, uint64_t cond) +{ + switch (cond) + { + case 0: // EQ (Z==1) + return GET_FLAG(pCtx, Z); + case 1: // NE (Z==0) + return !GET_FLAG(pCtx, Z); + case 2: // CS (C==1) + return GET_FLAG(pCtx, C); + case 3: // CC (C==0) + return !GET_FLAG(pCtx, C); + case 4: // MI (N==1) + return GET_FLAG(pCtx, N); + case 5: // PL (N==0) + return !GET_FLAG(pCtx, N); + case 6: // VS (V==1) + return GET_FLAG(pCtx, V); + case 7: // VC (V==0) + return !GET_FLAG(pCtx, V); + case 8: // HI (C==1 && Z==0) + return GET_FLAG(pCtx, C) && !GET_FLAG(pCtx, Z); + case 9: // LS (C==0 || Z==1) + return !GET_FLAG(pCtx, C) || GET_FLAG(pCtx, Z); + case 10: // GE (N==V) + return GET_FLAG(pCtx, N) == GET_FLAG(pCtx, V); + case 11: // LT (N!=V) + return GET_FLAG(pCtx, N) != GET_FLAG(pCtx, V); + case 12: // GT (Z==0 && N==V) + return !GET_FLAG(pCtx, Z) && (GET_FLAG(pCtx, N) == GET_FLAG(pCtx, V)); + case 13: // LE (Z==1 || N!=V) + return GET_FLAG(pCtx, Z) || (GET_FLAG(pCtx, N) != GET_FLAG(pCtx, V)); + case 14: // AL + return true; + case 15: + return false; + default: + UNREACHABLE(); + return false; + } +} + +// Get the current value of a register. +uint64_t Arm64SingleStepper::GetReg(T_CONTEXT *pCtx, uint64_t reg) +{ + _ASSERTE(reg <= 31); + + return (&pCtx->X0)[reg]; +} + +// Set the current value of a register. +void Arm64SingleStepper::SetReg(T_CONTEXT *pCtx, uint64_t reg, uint64_t value) +{ + _ASSERTE(reg <= 31); + + (&pCtx->X0)[reg] = value; +} + +// Set the current value of a register. +void Arm64SingleStepper::SetFPReg(T_CONTEXT *pCtx, uint64_t reg, uint64_t valueLo, uint64_t valueHi) +{ + _ASSERTE(reg <= 31); + + pCtx->V[reg].Low = valueLo; + pCtx->V[reg].High = valueHi; +} + +// Attempt to read a 4, or 8 byte value from memory, zero or sign extend it to a 8-byte value and place +// that value into the buffer pointed at by pdwResult. Returns false if attempting to read the location +// caused a fault. +bool Arm64SingleStepper::GetMem(uint64_t *pdwResult, uint8_t* pAddress, int cbSize, bool fSignExtend) +{ + struct Param + { + uint64_t *pdwResult; + uint8_t *pAddress; + int cbSize; + bool fSignExtend; + bool bReturnValue; + } param; + + param.pdwResult = pdwResult; + param.pAddress = pAddress; + param.cbSize = cbSize; + param.fSignExtend = fSignExtend; + param.bReturnValue = true; + + PAL_TRY(Param *, pParam, ¶m) + { + switch (pParam->cbSize) + { + case 4: + *pParam->pdwResult = *(uint32_t*)pParam->pAddress; + if (pParam->fSignExtend && (*pParam->pdwResult & 0x80000000)) + *pParam->pdwResult |= 0xffffffff00000000; + break; + case 8: + *pParam->pdwResult = *(uint64_t*)pParam->pAddress; + break; + default: + UNREACHABLE(); + pParam->bReturnValue = false; + } + } + PAL_EXCEPT(EXCEPTION_EXECUTE_HANDLER) + { + param.bReturnValue = false; + } + PAL_ENDTRY; + + return param.bReturnValue; +} + +// Wrapper around GetMem above that will automatically return from TryEmulate() indicating the instruction +// could not be emulated if we try to read memory and fail due to an exception. This logic works (i.e. we can +// simply return without worrying whether we've already updated the thread context) due to the fact that we +// either (a) read memory before updating any registers (the various LDR literal variants) or (b) update the +// register list before the base register in LDM-like operations (and this should therefore be an idempotent +// operation when we re-execute the instruction). If this ever changes we will have to store a copy of the +// original context we can use to revert changes (it gets even more complex if we ever have to emulate an +// instruction that writes memory). +#define GET_MEM(_result, _addr, _size, _signextend) \ + do { \ + if (!GetMem((_result), (_addr), (_size), (_signextend))) \ + return false; \ + } while (false) + +// Parse the instruction opcode. If the instruction reads or writes the PC it will be emulated by updating +// the thread context appropriately and true will be returned. If the instruction is not one of those cases +// (or it is but we faulted trying to read memory during the emulation) no state is updated and false is +// returned instead. +bool Arm64SingleStepper::TryEmulate(T_CONTEXT *pCtx, uint32_t opcode, bool execute) +{ + LOG((LF_CORDB, LL_INFO100000, "Arm64SingleStepper::TryEmulate(opcode=%x, execute=%s)\n", opcode, execute ? "true" : "false")); + + // Track whether instruction emulation wrote a modified PC. + bool fRedirectedPc = false; + + // Track whether we successfully emulated an instruction. If we did and we didn't modify the PC (e.g. a + // ADR instruction or a conditional branch not taken) then we'll need to explicitly set the PC to the next + // instruction (since our caller expects that whenever we return true m_pCtx->Pc holds the next + // instruction address). + bool fEmulated = false; + + if ((opcode & 0x1f000000) == 0x10000000) // PC-Rel addressing (ADR & ADRP) + { + fEmulated = true; + if (execute) + { + uint64_t P = BitExtract(opcode, 31, 31); + uint64_t immlo = BitExtract(opcode, 30, 29); + uint64_t immhi = BitExtract(opcode, 23, 5, true); + uint64_t Rd = BitExtract(opcode, 4, 0); + + if (P) // ADRP + { + LOG((LF_CORDB, LL_INFO100000, "Arm64SingleStepper::TryEmulate ADRP\n")); + uint64_t imm = (immhi << 14) | (immlo << 12); + uint64_t value = (m_originalPc & ~0xfffull) + imm; + SetReg(pCtx, Rd, value); + } + else // ADR + { + LOG((LF_CORDB, LL_INFO100000, "Arm64SingleStepper::TryEmulate ADR\n")); + uint64_t imm = (immhi << 2) | (immlo); + uint64_t value = m_originalPc + imm; + SetReg(pCtx, Rd, value); + } + } + } + else if ((opcode & 0xff000010) == 0x54000000) // Conditional branch immediate (B.cond) + { + fEmulated = true; + if (execute) + { + LOG((LF_CORDB, LL_INFO100000, "Arm64SingleStepper::TryEmulate B.cond\n")); + uint64_t imm19 = BitExtract(opcode, 23, 5, true); + uint64_t cond = BitExtract(opcode, 3, 0); + + if (ConditionHolds(pCtx, cond)) + { + uint64_t imm = (imm19 << 2); + + fRedirectedPc = true; + m_targetPc = m_originalPc + imm; + } + } + } + else if ((opcode & 0xf7000000) == 0xd6000000) // Unconditional branch register + { + if (((opcode & 0xfffffc1f) == 0xd61f0000) // BR + ||((opcode & 0xfffffc1f) == 0xd63f0000) // BLR + ||((opcode & 0xfffffc1f) == 0xd65f0000)) // RET + { + fEmulated = true; + if (execute) + { + LOG((LF_CORDB, LL_INFO100000, "Arm64SingleStepper::TryEmulate Unconditional branch register\n")); + uint64_t Rn = BitExtract(opcode, 9, 5); + uint64_t target = GetReg(pCtx, Rn); + + // arm64 supports tagged addresses (bit 55 is treated as a sign bit and extended when tagged addresses are enabled). + // assumes we don't need to emulate tagged addresses + _ASSERTE(target == BitExtract(target, 55, 0, true)); + + fRedirectedPc = true; + m_targetPc = target; + + if ((opcode & 0xfffffc1f) == 0xd63f0000) // BLR + SetReg(pCtx, 30, m_originalPc + 4); + } + } + else + { + LOG((LF_CORDB, LL_INFO100000, "Arm64SingleStepper::TryEmulate unexpected\n")); + // These are either: + // - Unallocated instructions + // - Unallocated on armv8 in EL0 (ERET, DRPS) + // - Several armv8.3 pointer authentication branch related instructions + // Note: We do not use armv8.3 pointer authentication forms in JIT or generate arm64 native code with v8.3 enabled + } + } + else if ((opcode & 0x7c000000) == 0x14000000) // Unconditional branch immediate (B & BL) + { + fEmulated = true; + if (execute) + { + uint64_t L = BitExtract(opcode, 31, 31); + uint64_t imm26 = BitExtract(opcode, 25, 0, true); + + uint64_t imm = (imm26 << 2); + + fRedirectedPc = true; + m_targetPc = m_originalPc + imm; + + if (L) // BL + { + LOG((LF_CORDB, LL_INFO100000, "Arm64SingleStepper::TryEmulate BL\n")); + SetReg(pCtx, 30, m_originalPc + 4); + } + else + { + LOG((LF_CORDB, LL_INFO100000, "Arm64SingleStepper::TryEmulate B\n")); + } + } + } + else if ((opcode & 0x7e000000) == 0x34000000) // Compare and branch CBZ & CBNZ + { + fEmulated = true; + if (execute) + { + LOG((LF_CORDB, LL_INFO100000, "Arm64SingleStepper::TryEmulate CBZ/CBNZ\n")); + + uint64_t sf = BitExtract(opcode, 31, 31); + uint64_t NZ = BitExtract(opcode, 24, 24); + uint64_t imm19 = BitExtract(opcode, 23, 5, true); + uint64_t Rt = BitExtract(opcode, 4, 0); + + uint64_t regValue = GetReg(pCtx, Rt); + + if (sf == 0) + { + // 32-bit instruction form + regValue = BitExtract(regValue, 31, 0); + } + + if ((regValue == 0) == (NZ == 0)) + { + uint64_t imm = (imm19 << 2); + + fRedirectedPc = true; + m_targetPc = m_originalPc + imm; + } + } + } + else if ((opcode & 0x7e000000) == 0x36000000) // Test and branch (TBZ & TBNZ) + { + fEmulated = true; + if (execute) + { + LOG((LF_CORDB, LL_INFO100000, "Arm64SingleStepper::TryEmulate TBZ/TBNZ\n")); + + uint64_t b5 = BitExtract(opcode, 31, 31); + uint64_t NZ = BitExtract(opcode, 24, 24); + uint64_t b40 = BitExtract(opcode, 23, 19); + uint64_t imm14 = BitExtract(opcode, 18, 5, true); + uint64_t Rt = BitExtract(opcode, 4, 0); + + uint64_t regValue = GetReg(pCtx, Rt); + + uint64_t bit = (b5 << 5) | b40; + uint64_t bitValue = BitExtract(regValue, bit, bit); + + if (bitValue == NZ) + { + uint64_t imm = (imm14 << 2); + + fRedirectedPc = true; + m_targetPc = m_originalPc + imm; + } + } + } + else if ((opcode & 0x3b000000) == 0x18000000) // Load register (literal) + { + uint64_t opc = BitExtract(opcode, 31, 30); + + fEmulated = (opc != 3); + if (execute) + { + LOG((LF_CORDB, LL_INFO100000, "Arm64SingleStepper::TryEmulate Load register (literal)\n")); + + uint64_t V = BitExtract(opcode, 26, 26); + uint64_t imm19 = BitExtract(opcode, 23, 5, true); + uint64_t Rt = BitExtract(opcode, 4, 0); + + uint64_t imm = (imm19 << 2); + + uint8_t* address = (uint8_t*)(m_originalPc + imm); + + uint64_t value = 0; + uint64_t valueHi; + + switch (opc) + { + case 0: // 32-bit + GET_MEM(&value, address, 4, false); + + if (V == 0) + SetReg(pCtx, Rt, value); + else + SetFPReg(pCtx, Rt, value); + break; + case 1: // 64-bit GPR + GET_MEM(&value, address, 8, false); + + if (V == 0) + SetReg(pCtx, Rt, value); + else + SetFPReg(pCtx, Rt, value); + break; + case 2: + if (V == 0) + { + // 32-bit GPR Sign extended + GET_MEM(&value, address, 4, true); + SetReg(pCtx, Rt, value); + } + else + { + // 128-bit FP & SIMD + GET_MEM(&value, address, 8, false); + GET_MEM(&valueHi, address + 8, 8, false); + SetFPReg(pCtx, Rt, value, valueHi); + } + break; + default: + _ASSERTE(FALSE); + } + } + } + + LOG((LF_CORDB, LL_INFO100000, "Arm64SingleStepper::TryEmulate(opcode=%x) emulated=%s redirectedPc=%s\n", + opcode, fEmulated ? "true" : "false", fRedirectedPc ? "true" : "false")); + + return fEmulated; +} diff --git a/src/vm/arm64singlestepper.h b/src/vm/arm64singlestepper.h new file mode 100644 index 0000000000..387500eb1f --- /dev/null +++ b/src/vm/arm64singlestepper.h @@ -0,0 +1,112 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. +// + +// +// Emulate hardware single-step on ARM64. +// + +#ifndef __ARM64_SINGLE_STEPPER_H__ +#define __ARM64_SINGLE_STEPPER_H__ + +// Class that encapsulates the context needed to single step one thread. +class Arm64SingleStepper +{ +public: + Arm64SingleStepper(); + ~Arm64SingleStepper(); + + // Given the context with which a thread will be resumed, modify that context such that resuming the + // thread will execute a single instruction before raising an EXCEPTION_BREAKPOINT. The thread context + // must be cleaned up via the Fixup method below before any further exception processing can occur (at + // which point the caller can behave as though EXCEPTION_SINGLE_STEP was raised). + void Enable(); + + void Bypass(uint64_t ip, uint32_t opcode); + + void Apply(T_CONTEXT *pCtx); + + // Disables the single stepper. + void Disable(); + + // Returns whether or not the stepper is enabled. + inline bool IsEnabled() const + { + return m_state == Enabled || m_state == Applied; + } + + // When called in response to an exception (preferably in a first chance vectored handler before anyone + // else has looked at the thread context) this method will (a) determine whether this exception was raised + // by a call to Enable() above, in which case true will be returned and (b) perform final fixup of the + // thread context passed in to complete the emulation of a hardware single step. Note that this routine + // must be called even if the exception code is not EXCEPTION_BREAKPOINT since the instruction stepped + // might have raised its own exception (e.g. A/V) and we still need to fix the thread context in this + // case. + bool Fixup(T_CONTEXT *pCtx, DWORD dwExceptionCode); + +private: + enum + { + kMaxCodeBuffer = 2, // max slots in our redirect buffer + // 1 for current instruction + // 1 for final breakpoint +#ifdef __linux__ + kBreakpointOp = 0xD4200000 + (0x11E1 << 5), // Opcode for the breakpoint instruction used on ARM64 Linux +#else +#error Arm64SingleStepper is only expected to be used for linux +#endif + }; + + // Bit numbers of the condition flags in the CPSR. + enum APSRBits + { + APSR_N = 31, + APSR_Z = 30, + APSR_C = 29, + APSR_V = 28, + }; + + enum StepperState + { + Disabled, + Enabled, + Applied + }; + + uint64_t m_originalPc; // PC value before stepping + uint64_t m_targetPc; // Final PC value after stepping if no exception is raised + uint32_t *m_rgCode; // Buffer execution is redirected to during the step + StepperState m_state; // Tracks whether the stepper is Enabled, Disabled, or enabled and applied to a context + uint32_t m_opcodes[1]; + bool m_fEmulate; + bool m_fBypass; + + // Initializes m_rgCode. Not thread safe. + void Init(); + + // Returns true if the current context indicates the ARM condition specified holds. + bool ConditionHolds(T_CONTEXT *pCtx, uint64_t cond); + + // Get the current value of a register. + uint64_t GetReg(T_CONTEXT *pCtx, uint64_t reg); + + // Set the current value of a register. + void SetReg(T_CONTEXT *pCtx, uint64_t reg, uint64_t value); + + // Set the current value of a FP register. + void SetFPReg(T_CONTEXT *pCtx, uint64_t reg, uint64_t valueLo, uint64_t valueHi = 0); + + // Attempt to read a 4, or 8 byte value from memory, zero or sign extend it to a 8-byte value and place + // that value into the buffer pointed at by pdwResult. Returns false if attempting to read the location + // caused a fault. + bool GetMem(uint64_t *pdwResult, uint8_t* pAddress, int cbSize, bool fSignExtend); + + // Parse the instruction opcode. If the instruction reads or writes the PC it will be emulated by updating + // the thread context appropriately and true will be returned. If the instruction is not one of those cases + // (or it is but we faulted trying to read memory during the emulation) no state is updated and false is + // returned instead. + bool TryEmulate(T_CONTEXT *pCtx, uint32_t opcode, bool execute); +}; + +#endif // !__ARM64_SINGLE_STEPPER_H__ diff --git a/src/vm/excep.cpp b/src/vm/excep.cpp index 16e247dce4..95767636ac 100644 --- a/src/vm/excep.cpp +++ b/src/vm/excep.cpp @@ -6653,7 +6653,7 @@ IsDebuggerFault(EXCEPTION_RECORD *pExceptionRecord, #ifdef DEBUGGING_SUPPORTED -#ifdef _TARGET_ARM_ +#ifdef FEATURE_EMULATE_SINGLESTEP // On ARM we don't have any reliable hardware support for single stepping so it is emulated in software. // The implementation will end up throwing an EXCEPTION_BREAKPOINT rather than an EXCEPTION_SINGLE_STEP // and leaves other aspects of the thread context in an invalid state. Therefore we use this opportunity @@ -6671,7 +6671,7 @@ IsDebuggerFault(EXCEPTION_RECORD *pExceptionRecord, pExceptionRecord->ExceptionCode = EXCEPTION_SINGLE_STEP; pExceptionRecord->ExceptionAddress = (PVOID)pContext->Pc; } -#endif // _TARGET_ARM_ +#endif // FEATURE_EMULATE_SINGLESTEP // Is this exception really meant for the COM+ Debugger? Note: we will let the debugger have a chance if there // is a debugger attached to any part of the process. It is incorrect to consider whether or not the debugger diff --git a/src/vm/exceptionhandling.cpp b/src/vm/exceptionhandling.cpp index 87220c345f..4de884fb74 100644 --- a/src/vm/exceptionhandling.cpp +++ b/src/vm/exceptionhandling.cpp @@ -5190,8 +5190,8 @@ BOOL IsSafeToHandleHardwareException(PCONTEXT contextRecord, PEXCEPTION_RECORD e IsIPInMarkedJitHelper(controlPc)); } -#ifdef _TARGET_ARM_ -static inline BOOL HandleArmSingleStep(PCONTEXT pContext, PEXCEPTION_RECORD pExceptionRecord, Thread *pThread) +#ifdef FEATURE_EMULATE_SINGLESTEP +static inline BOOL HandleSingleStep(PCONTEXT pContext, PEXCEPTION_RECORD pExceptionRecord, Thread *pThread) { // On ARM we don't have any reliable hardware support for single stepping so it is emulated in software. // The implementation will end up throwing an EXCEPTION_BREAKPOINT rather than an EXCEPTION_SINGLE_STEP @@ -5211,7 +5211,7 @@ static inline BOOL HandleArmSingleStep(PCONTEXT pContext, PEXCEPTION_RECORD pExc } return FALSE; } -#endif // _TARGET_ARM_ +#endif // FEATURE_EMULATE_SINGLESTEP BOOL HandleHardwareException(PAL_SEHException* ex) { @@ -5288,8 +5288,8 @@ BOOL HandleHardwareException(PAL_SEHException* ex) } #endif -#ifdef _TARGET_ARM_ - HandleArmSingleStep(ex->GetContextRecord(), ex->GetExceptionRecord(), pThread); +#ifdef FEATURE_EMULATE_SINGLESTEP + HandleSingleStep(ex->GetContextRecord(), ex->GetExceptionRecord(), pThread); #endif if (ex->GetExceptionRecord()->ExceptionCode == STATUS_BREAKPOINT) { diff --git a/src/vm/threads.h b/src/vm/threads.h index a5666900d8..5b2e3985d1 100644 --- a/src/vm/threads.h +++ b/src/vm/threads.h @@ -494,9 +494,12 @@ typedef Thread::ForbidSuspendThreadHolder ForbidSuspendThreadHolder; #else // CROSSGEN_COMPILE -#ifdef _TARGET_ARM_ +#if (defined(_TARGET_ARM_) && defined(FEATURE_EMULATE_SINGLESTEP)) #include "armsinglestepper.h" #endif +#if (defined(_TARGET_ARM64_) && defined(FEATURE_EMULATE_SINGLESTEP)) +#include "arm64singlestepper.h" +#endif #if !defined(PLATFORM_SUPPORTS_SAFE_THREADSUSPEND) // DISABLE_THREADSUSPEND controls whether Thread::SuspendThread will be used at all. @@ -2910,11 +2913,16 @@ public: ResetThreadStateNC(Thread::TSNC_DebuggerIsStepping); } -#ifdef _TARGET_ARM_ - // ARM doesn't currently support any reliable hardware mechanism for single-stepping. Instead we emulate - // this in software. This support is used only by the debugger. +#ifdef FEATURE_EMULATE_SINGLESTEP + // ARM doesn't currently support any reliable hardware mechanism for single-stepping. + // ARM64 unix doesn't currently support any reliable hardware mechanism for single-stepping. + // For each we emulate single step in software. This support is used only by the debugger. private: +#if defined(_TARGET_ARM_) ArmSingleStepper m_singleStepper; +#else + Arm64SingleStepper m_singleStepper; +#endif public: #ifndef DACCESS_COMPILE // Given the context with which this thread shall be resumed and the first WORD of the instruction that @@ -2927,9 +2935,13 @@ public: m_singleStepper.Enable(); } - void BypassWithSingleStep(DWORD ip, WORD opcode1, WORD opcode2) + void BypassWithSingleStep(const void* ip ARM_ARG(WORD opcode1) ARM_ARG(WORD opcode2) ARM64_ARG(uint32_t opcode)) { - m_singleStepper.Bypass(ip, opcode1, opcode2); +#if defined(_TARGET_ARM_) + m_singleStepper.Bypass((DWORD)ip, opcode1, opcode2); +#else + m_singleStepper.Bypass((uint64_t)ip, opcode); +#endif } void DisableSingleStep() @@ -2955,7 +2967,7 @@ public: return m_singleStepper.Fixup(pCtx, dwExceptionCode); } #endif // !DACCESS_COMPILE -#endif // _TARGET_ARM_ +#endif // FEATURE_EMULATE_SINGLESTEP private: diff --git a/src/vm/threadsuspend.cpp b/src/vm/threadsuspend.cpp index debf11af52..9c0af1f913 100644 --- a/src/vm/threadsuspend.cpp +++ b/src/vm/threadsuspend.cpp @@ -5234,7 +5234,7 @@ void Thread::SysResumeFromDebug(AppDomain *pAppDomain) "[0x%x] RESUME: TS_DebugSuspendPending was set, but will be removed\n", thread->GetThreadId())); -#ifdef _TARGET_ARM_ +#ifdef FEATURE_EMULATE_SINGLESTEP if (thread->IsSingleStepEnabled()) { if (ISREDIRECTEDTHREAD(thread)) |