diff options
author | dotnet-bot <dotnet-bot@microsoft.com> | 2015-01-30 14:14:42 -0800 |
---|---|---|
committer | dotnet-bot <dotnet-bot@microsoft.com> | 2015-01-30 14:14:42 -0800 |
commit | ef1e2ab328087c61a6878c1e84f4fc5d710aebce (patch) | |
tree | dee1bbb89e9d722e16b0d1485e3cdd1b6c8e2cfa /src/pal/src/arch | |
download | coreclr-ef1e2ab328087c61a6878c1e84f4fc5d710aebce.tar.gz coreclr-ef1e2ab328087c61a6878c1e84f4fc5d710aebce.tar.bz2 coreclr-ef1e2ab328087c61a6878c1e84f4fc5d710aebce.zip |
Initial commit to populate CoreCLR repo
[tfs-changeset: 1407945]
Diffstat (limited to 'src/pal/src/arch')
-rw-r--r-- | src/pal/src/arch/i386/context.cpp | 1352 | ||||
-rw-r--r-- | src/pal/src/arch/i386/context2.s | 212 | ||||
-rw-r--r-- | src/pal/src/arch/i386/dispatchexceptionwrapper.s | 133 | ||||
-rw-r--r-- | src/pal/src/arch/i386/optimizedtls.cpp | 238 | ||||
-rw-r--r-- | src/pal/src/arch/i386/processor.cpp | 45 | ||||
-rw-r--r-- | src/pal/src/arch/i386/runfilter.s | 158 | ||||
-rw-r--r-- | src/pal/src/arch/i386/tryexcept.s | 207 |
7 files changed, 2345 insertions, 0 deletions
diff --git a/src/pal/src/arch/i386/context.cpp b/src/pal/src/arch/i386/context.cpp new file mode 100644 index 0000000000..7b5c1fab0d --- /dev/null +++ b/src/pal/src/arch/i386/context.cpp @@ -0,0 +1,1352 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. +// + +/*++ + + + +Module Name: + + context.c + +Abstract: + + Implementation of GetThreadContext/SetThreadContext/DebugBreak functions for + the Intel x86 platform. These functions are processor dependent. + + + +--*/ + +#include "pal/palinternal.h" +#include "pal/dbgmsg.h" +#include "pal/context.h" +#include "pal/debug.h" + +#include <sys/ptrace.h> +#include <errno.h> +#include <unistd.h> + +SET_DEFAULT_DEBUG_CHANNEL(DEBUG); + + +void CONTEXT_CaptureContext(LPCONTEXT lpContext) +{ + // TODO: this needs to be implemented. See context2.s + _ASSERT(FALSE); +} + +#ifdef _X86_ +#define CONTEXT_ALL_FLOATING (CONTEXT_FLOATING_POINT | CONTEXT_EXTENDED_REGISTERS) +#elif defined(_AMD64_) +#define CONTEXT_ALL_FLOATING CONTEXT_FLOATING_POINT +#else +#error Unexpected architecture. +#endif + +#if !HAVE_MACH_EXCEPTIONS + +#if HAVE_BSD_REGS_T +#include <machine/reg.h> +#include <machine/npx.h> +#endif // HAVE_BSD_REGS_T +#if HAVE_PT_REGS +#include <asm/ptrace.h> +#endif // HAVE_PT_REGS + +#if HAVE_GREGSET_T + +#ifdef BIT64 +#define MCREG_Rbx(mc) ((mc).gregs[REG_RBX]) +#define MCREG_Rcx(mc) ((mc).gregs[REG_RCX]) +#define MCREG_Rdx(mc) ((mc).gregs[REG_RDX]) +#define MCREG_Rsi(mc) ((mc).gregs[REG_RSI]) +#define MCREG_Rdi(mc) ((mc).gregs[REG_RDI]) +#define MCREG_Rbp(mc) ((mc).gregs[REG_RBP]) +#define MCREG_Rax(mc) ((mc).gregs[REG_RAX]) +#define MCREG_Rip(mc) ((mc).gregs[REG_RIP]) +#define MCREG_Rsp(mc) ((mc).gregs[REG_RSP]) +#define MCREG_SegCs(mc) ((mc).gregs[REG_CSGSFS]) +#define MCREG_R8(mc) ((mc).gregs[REG_R8]) +#define MCREG_R9(mc) ((mc).gregs[REG_R9]) +#define MCREG_R10(mc) ((mc).gregs[REG_R10]) +#define MCREG_R11(mc) ((mc).gregs[REG_R11]) +#define MCREG_R12(mc) ((mc).gregs[REG_R12]) +#define MCREG_R13(mc) ((mc).gregs[REG_R13]) +#define MCREG_R14(mc) ((mc).gregs[REG_R14]) +#define MCREG_R15(mc) ((mc).gregs[REG_R15]) + +#else // BIT64 + +#define MCREG_Ebx(mc) ((mc).gregs[REG_EBX]) +#define MCREG_Ecx(mc) ((mc).gregs[REG_ECX]) +#define MCREG_Edx(mc) ((mc).gregs[REG_EDX]) +#define MCREG_Esi(mc) ((mc).gregs[REG_ESI]) +#define MCREG_Edi(mc) ((mc).gregs[REG_EDI]) +#define MCREG_Ebp(mc) ((mc).gregs[REG_EBP]) +#define MCREG_Eax(mc) ((mc).gregs[REG_EAX]) +#define MCREG_Eip(mc) ((mc).gregs[REG_EIP]) +#define MCREG_Esp(mc) ((mc).gregs[REG_ESP]) +#define MCREG_SegCs(mc) ((mc).gregs[REG_CS]) +#define MCREG_SegSs(mc) ((mc).gregs[REG_SS]) + +#endif // BIT64 + +#define MCREG_EFlags(mc) ((mc).gregs[REG_EFL]) + +#else // HAVE_GREGSET_T + +#define MCREG_Ebx(mc) ((mc).mc_ebx) +#define MCREG_Ecx(mc) ((mc).mc_ecx) +#define MCREG_Edx(mc) ((mc).mc_edx) +#define MCREG_Esi(mc) ((mc).mc_esi) +#define MCREG_Edi(mc) ((mc).mc_edi) +#define MCREG_Ebp(mc) ((mc).mc_ebp) +#define MCREG_Eax(mc) ((mc).mc_eax) +#define MCREG_Eip(mc) ((mc).mc_eip) +#define MCREG_SegCs(mc) ((mc).mc_cs) +#define MCREG_EFlags(mc) ((mc).mc_eflags) +#define MCREG_Esp(mc) ((mc).mc_esp) +#define MCREG_SegSs(mc) ((mc).mc_ss) + +#endif // HAVE_GREGSET_T + + +#if HAVE_PT_REGS + +#ifdef BIT64 +#define PTREG_Rbx(ptreg) ((ptreg).rbx) +#define PTREG_Rcx(ptreg) ((ptreg).rcx) +#define PTREG_Rdx(ptreg) ((ptreg).rdx) +#define PTREG_Rsi(ptreg) ((ptreg).rsi) +#define PTREG_Rdi(ptreg) ((ptreg).rdi) +#define PTREG_Rbp(ptreg) ((ptreg).rbp) +#define PTREG_Rax(ptreg) ((ptreg).rax) +#define PTREG_Rip(ptreg) ((ptreg).rip) +#define PTREG_SegCs(ptreg) ((ptreg).cs) +#define PTREG_SegSs(ptreg) ((ptreg).ss) +#define PTREG_Rsp(ptreg) ((ptreg).rsp) +#define PTREG_R8(ptreg) ((ptreg).r8) +#define PTREG_R9(ptreg) ((ptreg).r9) +#define PTREG_R10(ptreg) ((ptreg).r10) +#define PTREG_R11(ptreg) ((ptreg).r11) +#define PTREG_R12(ptreg) ((ptreg).r12) +#define PTREG_R13(ptreg) ((ptreg).r13) +#define PTREG_R14(ptreg) ((ptreg).r14) +#define PTREG_R15(ptreg) ((ptreg).r15) + +#else // BIT64 + +#define PTREG_Ebx(ptreg) ((ptreg).ebx) +#define PTREG_Ecx(ptreg) ((ptreg).ecx) +#define PTREG_Edx(ptreg) ((ptreg).edx) +#define PTREG_Esi(ptreg) ((ptreg).esi) +#define PTREG_Edi(ptreg) ((ptreg).edi) +#define PTREG_Ebp(ptreg) ((ptreg).ebp) +#define PTREG_Eax(ptreg) ((ptreg).eax) +#define PTREG_Eip(ptreg) ((ptreg).eip) +#define PTREG_SegCs(ptreg) ((ptreg).xcs) +#define PTREG_SegSs(ptreg) ((ptreg).xss) +#define PTREG_Esp(ptreg) ((ptreg).esp) + +#endif // BIT64 + + +#define PTREG_EFlags(ptreg) ((ptreg).eflags) + +#endif // HAVE_PT_REGS + + +#if HAVE_BSD_REGS_T + +#define BSDREG_Ebx(reg) ((reg).r_ebx) +#define BSDREG_Ecx(reg) ((reg).r_ecx) +#define BSDREG_Edx(reg) ((reg).r_edx) +#define BSDREG_Esi(reg) ((reg).r_esi) +#define BSDREG_Edi(reg) ((reg).r_edi) +#define BSDREG_Ebp(reg) ((reg).r_ebp) +#define BSDREG_Eax(reg) ((reg).r_eax) +#define BSDREG_Eip(reg) ((reg).r_eip) +#define BSDREG_SegCs(reg) ((reg).r_cs) +#define BSDREG_EFlags(reg) ((reg).r_eflags) +#define BSDREG_Esp(reg) ((reg).r_esp) +#define BSDREG_SegSs(reg) ((reg).r_ss) + +#endif // HAVE_BSD_REGS_T + +#ifdef BIT64 +#define ASSIGN_CONTROL_REGS \ + ASSIGN_REG(Rbp) \ + ASSIGN_REG(Rip) \ + ASSIGN_REG(SegCs) \ + ASSIGN_REG(EFlags) \ + ASSIGN_REG(Rsp) \ + +#define ASSIGN_INTEGER_REGS \ + ASSIGN_REG(Rdi) \ + ASSIGN_REG(Rsi) \ + ASSIGN_REG(Rbx) \ + ASSIGN_REG(Rdx) \ + ASSIGN_REG(Rcx) \ + ASSIGN_REG(Rax) \ + ASSIGN_REG(R8) \ + ASSIGN_REG(R9) \ + ASSIGN_REG(R10) \ + ASSIGN_REG(R11) \ + ASSIGN_REG(R12) \ + ASSIGN_REG(R13) \ + ASSIGN_REG(R14) \ + ASSIGN_REG(R15) \ + +#else // BIT64 +#define ASSIGN_CONTROL_REGS \ + ASSIGN_REG(Ebp) \ + ASSIGN_REG(Eip) \ + ASSIGN_REG(SegCs) \ + ASSIGN_REG(EFlags) \ + ASSIGN_REG(Esp) \ + ASSIGN_REG(SegSs) \ + +#define ASSIGN_INTEGER_REGS \ + ASSIGN_REG(Edi) \ + ASSIGN_REG(Esi) \ + ASSIGN_REG(Ebx) \ + ASSIGN_REG(Edx) \ + ASSIGN_REG(Ecx) \ + ASSIGN_REG(Eax) \ + +#endif //BIT64 + +#define ASSIGN_ALL_REGS \ + ASSIGN_CONTROL_REGS \ + ASSIGN_INTEGER_REGS \ + +/*++ +Function: + CONTEXT_GetRegisters + +Abstract + retrieve the machine registers value of the indicated process. + +Parameter + processId: process ID + registers: reg structure in which the machine registers value will be returned. +Return + returns TRUE if it succeeds, FALSE otherwise +--*/ +BOOL CONTEXT_GetRegisters(DWORD processId, ucontext_t *registers) +{ +#if HAVE_BSD_REGS_T + int regFd = -1; +#endif // HAVE_BSD_REGS_T + BOOL bRet = FALSE; + + if (processId == GetCurrentProcessId()) + { +#if HAVE_GETCONTEXT + if (getcontext(registers) != 0) + { + ASSERT("getcontext() failed %d (%s)\n", errno, strerror(errno)); + return FALSE; + } +#elif HAVE_BSD_REGS_T + char buf[MAX_PATH]; + struct reg bsd_registers; + + sprintf_s(buf, sizeof(buf), "/proc/%d/regs", processId); + + if ((regFd = PAL__open(buf, O_RDONLY)) == -1) + { + ASSERT("PAL__open() failed %d (%s) \n", errno, strerror(errno)); + return FALSE; + } + + if (lseek(regFd, 0, 0) == -1) + { + ASSERT("lseek() failed %d (%s)\n", errno, strerror(errno)); + goto EXIT; + } + + if (read(regFd, &bsd_registers, sizeof(bsd_registers)) != sizeof(bsd_registers)) + { + ASSERT("read() failed %d (%s)\n", errno, strerror(errno)); + goto EXIT; + } + +#define ASSIGN_REG(reg) MCREG_##reg(registers->uc_mcontext) = BSDREG_##reg(bsd_registers); + ASSIGN_ALL_REGS +#undef ASSIGN_REG + +#else +#error "Don't know how to get current context on this platform!" +#endif + } + else + { +#if HAVE_PT_REGS + struct pt_regs ptrace_registers; +#elif HAVE_BSD_REGS_T + struct reg ptrace_registers; +#endif + + if (ptrace((__ptrace_request)PT_GETREGS, processId, (caddr_t) &ptrace_registers, 0) == -1) + { + ASSERT("Failed ptrace(PT_GETREGS, processId:%d) errno:%d (%s)\n", + processId, errno, strerror(errno)); + } + +#if HAVE_PT_REGS +#define ASSIGN_REG(reg) MCREG_##reg(registers->uc_mcontext) = PTREG_##reg(ptrace_registers); +#elif HAVE_BSD_REGS_T +#define ASSIGN_REG(reg) MCREG_##reg(registers->uc_mcontext) = BSDREG_##reg(ptrace_registers); +#endif + ASSIGN_ALL_REGS +#undef ASSIGN_REG + } + + bRet = TRUE; +#if HAVE_BSD_REGS_T +EXIT : + if (regFd != -1) + { + close(regFd); + } +#endif // HAVE_BSD_REGS_T + return bRet; +} + +/*++ +Function: + GetThreadContext + +See MSDN doc. +--*/ +BOOL +CONTEXT_GetThreadContext( + DWORD dwProcessId, +#if !defined(_AMD64_) + DWORD dwThreadId, +#else // defined(_AMD64_) + DWORD64 dwThreadId, +#endif // !defined(_AMD64_) + DWORD dwLwpId, + LPCONTEXT lpContext) +{ + BOOL ret = FALSE; + ucontext_t registers; + + if (lpContext == NULL) + { + ERROR("Invalid lpContext parameter value\n"); + SetLastError(ERROR_NOACCESS); + goto EXIT; + } + + /* How to consider the case when dwThreadId is different from the current + thread of its owner process. Machine registers values could be retreived + by a ptrace(pid, ...) call or from the "/proc/%pid/reg" file content. + Unfortunately, these two methods only depend on process ID, not on + thread ID. */ + + if (dwProcessId == GetCurrentProcessId()) + { + if (dwThreadId != GetCurrentThreadId()) + { + DWORD flags; + // There aren't any APIs for this. We can potentially get the + // context of another thread by using per-thread signals, but + // on FreeBSD signal handlers that are called as a result + // of signals raised via pthread_kill don't get a valid + // sigcontext or ucontext_t. But we need this to return TRUE + // to avoid an assertion in the CLR in code that manages to + // cope reasonably well without a valid thread context. + // Given that, we'll zero out our structure and return TRUE. + ERROR("GetThreadContext on a thread other than the current " + "thread is returning TRUE\n"); + flags = lpContext->ContextFlags; + memset(lpContext, 0, sizeof(*lpContext)); + lpContext->ContextFlags = flags; + ret = TRUE; + goto EXIT; + } + + } + + if (lpContext->ContextFlags & + (CONTEXT_CONTROL | CONTEXT_INTEGER)) + { + if (CONTEXT_GetRegisters(dwProcessId, ®isters) == FALSE) + { + SetLastError(ERROR_INTERNAL_ERROR); + goto EXIT; + } + +#define ASSIGN_REG(reg) lpContext->reg = MCREG_##reg(registers.uc_mcontext); + if (lpContext->ContextFlags & CONTEXT_CONTROL) + { + ASSIGN_CONTROL_REGS + } + if (lpContext->ContextFlags & CONTEXT_INTEGER) + { + ASSIGN_INTEGER_REGS + } +#undef ASSIGN_REG + } + + ret = TRUE; + +EXIT: + return ret; +} + +/*++ +Function: + SetThreadContext + +See MSDN doc. +--*/ +BOOL +CONTEXT_SetThreadContext( + DWORD dwProcessId, +#if !defined(_AMD64_) + DWORD dwThreadId, +#else // defined(_AMD64_) + DWORD64 dwThreadId, +#endif // !defined(_AMD64_) + DWORD dwLwpId, + CONST CONTEXT *lpContext) +{ + BOOL ret = FALSE; + +#if HAVE_PT_REGS + struct pt_regs ptrace_registers; +#elif HAVE_BSD_REGS_T + struct reg ptrace_registers; +#endif + + if (lpContext == NULL) + { + ERROR("Invalid lpContext parameter value\n"); + SetLastError(ERROR_NOACCESS); + goto EXIT; + } + + /* How to consider the case when dwThreadId is different from the current + thread of its owner process. Machine registers values could be retreived + by a ptrace(pid, ...) call or from the "/proc/%pid/reg" file content. + Unfortunately, these two methods only depend on process ID, not on + thread ID. */ + + if (dwProcessId == GetCurrentProcessId()) + { +#ifdef FEATURE_PAL_SXS + // Need to implement SetThreadContext(current thread) for the IX architecture; look at common_signal_handler. + _ASSERT(FALSE); +#endif // FEATURE_PAL_SXS + ASSERT("SetThreadContext should be called for cross-process only.\n"); + SetLastError(ERROR_INVALID_PARAMETER); + goto EXIT; + } + + if (lpContext->ContextFlags & + (CONTEXT_CONTROL | CONTEXT_INTEGER)) + { + if (ptrace((__ptrace_request)PT_GETREGS, dwProcessId, (caddr_t)&ptrace_registers, 0) == -1) + { + ASSERT("Failed ptrace(PT_GETREGS, processId:%d) errno:%d (%s)\n", + dwProcessId, errno, strerror(errno)); + SetLastError(ERROR_INTERNAL_ERROR); + goto EXIT; + } + +#if HAVE_PT_REGS +#define ASSIGN_REG(reg) PTREG_##reg(ptrace_registers) = lpContext->reg; +#elif HAVE_BSD_REGS_T +#define ASSIGN_REG(reg) BSDREG_##reg(ptrace_registers) = lpContext->reg; +#endif + if (lpContext->ContextFlags & CONTEXT_CONTROL) + { + ASSIGN_CONTROL_REGS + } + if (lpContext->ContextFlags & CONTEXT_INTEGER) + { + ASSIGN_INTEGER_REGS + } +#undef ASSIGN_REG + + if (ptrace((__ptrace_request)PT_SETREGS, dwProcessId, (caddr_t)&ptrace_registers, 0) == -1) + { + ASSERT("Failed ptrace(PT_SETREGS, processId:%d) errno:%d (%s)\n", + dwProcessId, errno, strerror(errno)); + SetLastError(ERROR_INTERNAL_ERROR); + goto EXIT; + } + } + + ret = TRUE; + EXIT: + return ret; +} + +/*++ +Function : + CONTEXTToNativeContext + + Converts a CONTEXT record to a native context. + +Parameters : + CONST CONTEXT *lpContext : CONTEXT to convert + native_context_t *native : native context to fill in + ULONG contextFlags : flags that determine which registers are valid in + lpContext and which ones to set in native + +Return value : + None + +--*/ +void CONTEXTToNativeContext(CONST CONTEXT *lpContext, native_context_t *native, + ULONG contextFlags) +{ + if (contextFlags != (CONTEXT_CONTROL | CONTEXT_INTEGER)) + { + ASSERT("Invalid contextFlags in CONTEXTToNativeContext!"); + } + +#define ASSIGN_REG(reg) MCREG_##reg(native->uc_mcontext) = lpContext->reg; + ASSIGN_ALL_REGS +#undef ASSIGN_REG +} + +/*++ +Function : + CONTEXTFromNativeContext + + Converts a native context to a CONTEXT record. + +Parameters : + const native_context_t *native : native context to convert + LPCONTEXT lpContext : CONTEXT to fill in + ULONG contextFlags : flags that determine which registers are valid in + native and which ones to set in lpContext + +Return value : + None + +--*/ +void CONTEXTFromNativeContext(const native_context_t *native, LPCONTEXT lpContext, + ULONG contextFlags) +{ + if (contextFlags != (CONTEXT_CONTROL | CONTEXT_INTEGER)) + { + ASSERT("Invalid contextFlags in CONTEXTFromNativeContext!"); + } + lpContext->ContextFlags = contextFlags; + +#define ASSIGN_REG(reg) lpContext->reg = MCREG_##reg(native->uc_mcontext); + ASSIGN_ALL_REGS +#undef ASSIGN_REG +} + +/*++ +Function : + CONTEXTGetPC + + Returns the program counter from the native context. + +Parameters : + const native_context_t *native : native context + +Return value : + The program counter from the native context. + +--*/ +LPVOID CONTEXTGetPC(const native_context_t *context) +{ +#ifdef BIT64 + return (LPVOID)MCREG_Rip(context->uc_mcontext); +#else + return (LPVOID) MCREG_Eip(context->uc_mcontext); +#endif // BIT64 +} + +/*++ +Function : + CONTEXTGetExceptionCodeForSignal + + Translates signal and context information to a Win32 exception code. + +Parameters : + const siginfo_t *siginfo : signal information from a signal handler + const native_context_t *context : context information + +Return value : + The Win32 exception code that corresponds to the signal and context + information. + +--*/ +#ifdef ILL_ILLOPC +// If si_code values are available for all signals, use those. +DWORD CONTEXTGetExceptionCodeForSignal(const siginfo_t *siginfo, + const native_context_t *context) +{ + switch (siginfo->si_signo) + { + case SIGILL: + switch (siginfo->si_code) + { + case ILL_ILLOPC: // Illegal opcode + case ILL_ILLOPN: // Illegal operand + case ILL_ILLADR: // Illegal addressing mode + case ILL_ILLTRP: // Illegal trap + case ILL_COPROC: // Co-processor error + return EXCEPTION_ILLEGAL_INSTRUCTION; + case ILL_PRVOPC: // Privileged opcode + case ILL_PRVREG: // Privileged register + return EXCEPTION_PRIV_INSTRUCTION; + case ILL_BADSTK: // Internal stack error + return EXCEPTION_STACK_OVERFLOW; + default: + break; + } + break; + case SIGFPE: + switch (siginfo->si_code) + { + case FPE_INTDIV: + return EXCEPTION_INT_DIVIDE_BY_ZERO; + case FPE_INTOVF: + return EXCEPTION_INT_OVERFLOW; + case FPE_FLTDIV: + return EXCEPTION_FLT_DIVIDE_BY_ZERO; + case FPE_FLTOVF: + return EXCEPTION_FLT_OVERFLOW; + case FPE_FLTUND: + return EXCEPTION_FLT_UNDERFLOW; + case FPE_FLTRES: + return EXCEPTION_FLT_INEXACT_RESULT; + case FPE_FLTINV: + return EXCEPTION_FLT_INVALID_OPERATION; + case FPE_FLTSUB: + return EXCEPTION_FLT_INVALID_OPERATION; + default: + break; + } + break; + case SIGSEGV: + switch (siginfo->si_code) + { + case SI_USER: // User-generated signal, sometimes sent + // for SIGSEGV under normal circumstances + case SEGV_MAPERR: // Address not mapped to object + case SEGV_ACCERR: // Invalid permissions for mapped object + return EXCEPTION_ACCESS_VIOLATION; + default: + break; + } + break; + case SIGBUS: + switch (siginfo->si_code) + { + case BUS_ADRALN: // Invalid address alignment + return EXCEPTION_DATATYPE_MISALIGNMENT; + case BUS_ADRERR: // Non-existent physical address + return EXCEPTION_ACCESS_VIOLATION; + case BUS_OBJERR: // Object-specific hardware error + default: + break; + } + case SIGTRAP: + switch (siginfo->si_code) + { + case SI_KERNEL: + case SI_USER: + case TRAP_BRKPT: // Process breakpoint + return EXCEPTION_BREAKPOINT; + case TRAP_TRACE: // Process trace trap + return EXCEPTION_SINGLE_STEP; + default: + // We don't want to use ASSERT here since it raises SIGTRAP and we + // might again end up here resulting in an infinite loop! + // so, we print out an error message and return + DBG_PRINTF(DLI_ASSERT, defdbgchan, TRUE) + ("Got unknown SIGTRAP signal (%d) with code %d\n", SIGTRAP, siginfo->si_code); + + return EXCEPTION_ILLEGAL_INSTRUCTION; + } + default: + break; + } + ASSERT("Got unknown signal number %d with code %d\n", + siginfo->si_signo, siginfo->si_code); + return EXCEPTION_ILLEGAL_INSTRUCTION; +} +#else // ILL_ILLOPC +DWORD CONTEXTGetExceptionCodeForSignal(const siginfo_t *siginfo, + const native_context_t *context) +{ + int trap; + + if (siginfo->si_signo == SIGFPE) + { + // Floating point exceptions are mapped by their si_code. + switch (siginfo->si_code) + { + case FPE_INTDIV : + TRACE("Got signal SIGFPE:FPE_INTDIV; raising " + "EXCEPTION_INT_DIVIDE_BY_ZERO\n"); + return EXCEPTION_INT_DIVIDE_BY_ZERO; + break; + case FPE_INTOVF : + TRACE("Got signal SIGFPE:FPE_INTOVF; raising " + "EXCEPTION_INT_OVERFLOW\n"); + return EXCEPTION_INT_OVERFLOW; + break; + case FPE_FLTDIV : + TRACE("Got signal SIGFPE:FPE_FLTDIV; raising " + "EXCEPTION_FLT_DIVIDE_BY_ZERO\n"); + return EXCEPTION_FLT_DIVIDE_BY_ZERO; + break; + case FPE_FLTOVF : + TRACE("Got signal SIGFPE:FPE_FLTOVF; raising " + "EXCEPTION_FLT_OVERFLOW\n"); + return EXCEPTION_FLT_OVERFLOW; + break; + case FPE_FLTUND : + TRACE("Got signal SIGFPE:FPE_FLTUND; raising " + "EXCEPTION_FLT_UNDERFLOW\n"); + return EXCEPTION_FLT_UNDERFLOW; + break; + case FPE_FLTRES : + TRACE("Got signal SIGFPE:FPE_FLTRES; raising " + "EXCEPTION_FLT_INEXACT_RESULT\n"); + return EXCEPTION_FLT_INEXACT_RESULT; + break; + case FPE_FLTINV : + TRACE("Got signal SIGFPE:FPE_FLTINV; raising " + "EXCEPTION_FLT_INVALID_OPERATION\n"); + return EXCEPTION_FLT_INVALID_OPERATION; + break; + case FPE_FLTSUB :/* subscript out of range */ + TRACE("Got signal SIGFPE:FPE_FLTSUB; raising " + "EXCEPTION_FLT_INVALID_OPERATION\n"); + return EXCEPTION_FLT_INVALID_OPERATION; + break; + default: + ASSERT("Got unknown signal code %d\n", siginfo->si_code); + break; + } + } + + trap = context->uc_mcontext.mc_trapno; + switch (trap) + { + case T_PRIVINFLT : /* privileged instruction */ + TRACE("Trap code T_PRIVINFLT mapped to EXCEPTION_PRIV_INSTRUCTION\n"); + return EXCEPTION_PRIV_INSTRUCTION; + case T_BPTFLT : /* breakpoint instruction */ + TRACE("Trap code T_BPTFLT mapped to EXCEPTION_BREAKPOINT\n"); + return EXCEPTION_BREAKPOINT; + case T_ARITHTRAP : /* arithmetic trap */ + TRACE("Trap code T_ARITHTRAP maps to floating point exception...\n"); + return 0; /* let the caller pick an exception code */ +#ifdef T_ASTFLT + case T_ASTFLT : /* system forced exception : ^C, ^\. SIGINT signal + handler shouldn't be calling this function, since + it doesn't need an exception code */ + ASSERT("Trap code T_ASTFLT received, shouldn't get here\n"); + return 0; +#endif // T_ASTFLT + case T_PROTFLT : /* protection fault */ + TRACE("Trap code T_PROTFLT mapped to EXCEPTION_ACCESS_VIOLATION\n"); + return EXCEPTION_ACCESS_VIOLATION; + case T_TRCTRAP : /* debug exception (sic) */ + TRACE("Trap code T_TRCTRAP mapped to EXCEPTION_SINGLE_STEP\n"); + return EXCEPTION_SINGLE_STEP; + case T_PAGEFLT : /* page fault */ + TRACE("Trap code T_PAGEFLT mapped to EXCEPTION_ACCESS_VIOLATION\n"); + return EXCEPTION_ACCESS_VIOLATION; + case T_ALIGNFLT : /* alignment fault */ + TRACE("Trap code T_ALIGNFLT mapped to EXCEPTION_DATATYPE_MISALIGNMENT\n"); + return EXCEPTION_DATATYPE_MISALIGNMENT; + case T_DIVIDE : + TRACE("Trap code T_DIVIDE mapped to EXCEPTION_INT_DIVIDE_BY_ZERO\n"); + return EXCEPTION_INT_DIVIDE_BY_ZERO; + case T_NMI : /* non-maskable trap */ + TRACE("Trap code T_NMI mapped to EXCEPTION_ILLEGAL_INSTRUCTION\n"); + return EXCEPTION_ILLEGAL_INSTRUCTION; + case T_OFLOW : + TRACE("Trap code T_OFLOW mapped to EXCEPTION_INT_OVERFLOW\n"); + return EXCEPTION_INT_OVERFLOW; + case T_BOUND : /* bound instruction fault */ + TRACE("Trap code T_BOUND mapped to EXCEPTION_ARRAY_BOUNDS_EXCEEDED\n"); + return EXCEPTION_ARRAY_BOUNDS_EXCEEDED; + case T_DNA : /* device not available fault */ + TRACE("Trap code T_DNA mapped to EXCEPTION_ILLEGAL_INSTRUCTION\n"); + return EXCEPTION_ILLEGAL_INSTRUCTION; + case T_DOUBLEFLT : /* double fault */ + TRACE("Trap code T_DOUBLEFLT mapped to EXCEPTION_ILLEGAL_INSTRUCTION\n"); + return EXCEPTION_ILLEGAL_INSTRUCTION; + case T_FPOPFLT : /* fp coprocessor operand fetch fault */ + TRACE("Trap code T_FPOPFLT mapped to EXCEPTION_FLT_INVALID_OPERATION\n"); + return EXCEPTION_FLT_INVALID_OPERATION; + case T_TSSFLT : /* invalid tss fault */ + TRACE("Trap code T_TSSFLT mapped to EXCEPTION_ILLEGAL_INSTRUCTION\n"); + return EXCEPTION_ILLEGAL_INSTRUCTION; + case T_SEGNPFLT : /* segment not present fault */ + TRACE("Trap code T_SEGNPFLT mapped to EXCEPTION_ACCESS_VIOLATION\n"); + return EXCEPTION_ACCESS_VIOLATION; + case T_STKFLT : /* stack fault */ + TRACE("Trap code T_STKFLT mapped to EXCEPTION_STACK_OVERFLOW\n"); + return EXCEPTION_STACK_OVERFLOW; + case T_MCHK : /* machine check trap */ + TRACE("Trap code T_MCHK mapped to EXCEPTION_ILLEGAL_INSTRUCTION\n"); + return EXCEPTION_ILLEGAL_INSTRUCTION; + case T_RESERVED : /* reserved (unknown) */ + TRACE("Trap code T_RESERVED mapped to EXCEPTION_ILLEGAL_INSTRUCTION\n"); + return EXCEPTION_ILLEGAL_INSTRUCTION; + default: + ASSERT("Got unknown trap code %d\n", trap); + break; + } + return EXCEPTION_ILLEGAL_INSTRUCTION; +} +#endif // ILL_ILLOPC + +#else // !HAVE_MACH_EXCEPTIONS + +#include <mach/message.h> +#include <mach/thread_act.h> +#include "../../exception/machexception.h" + +/*++ +Function: + CONTEXT_GetThreadContextFromPort + + Helper for GetThreadContext that uses a mach_port +--*/ +kern_return_t +CONTEXT_GetThreadContextFromPort( + mach_port_t Port, + LPCONTEXT lpContext) +{ + // Extract the CONTEXT from the Mach thread. + + kern_return_t MachRet = KERN_SUCCESS; + mach_msg_type_number_t StateCount; + thread_state_flavor_t StateFlavor; + + if (lpContext->ContextFlags & (CONTEXT_CONTROL|CONTEXT_INTEGER)) + { +#ifdef _X86_ + x86_thread_state32_t State; + StateFlavor = x86_THREAD_STATE32; +#elif defined(_AMD64_) + x86_thread_state64_t State; + StateFlavor = x86_THREAD_STATE64; +#else +#error Unexpected architecture. +#endif + + StateCount = sizeof(State) / sizeof(natural_t); + + MachRet = thread_get_state(Port, + StateFlavor, + (thread_state_t)&State, + &StateCount); + if (MachRet != KERN_SUCCESS) + { + ASSERT("thread_get_state(THREAD_STATE) failed: %d\n", MachRet); + goto EXIT; + } + + // Copy in the GPRs and the various other control registers +#ifdef _X86_ + lpContext->Eax = State.eax; + lpContext->Ebx = State.ebx; + lpContext->Ecx = State.ecx; + lpContext->Edx = State.edx; + lpContext->Edi = State.edi; + lpContext->Esi = State.esi; + lpContext->Ebp = State.ebp; + lpContext->Esp = State.esp; + lpContext->SegSs = State.ss; + lpContext->EFlags = State.eflags; + lpContext->Eip = State.eip; + lpContext->SegCs = State.cs; + lpContext->SegDs_PAL_Undefined = State.ds; + lpContext->SegEs_PAL_Undefined = State.es; + lpContext->SegFs_PAL_Undefined = State.fs; + lpContext->SegGs_PAL_Undefined = State.gs; +#elif defined(_AMD64_) + lpContext->Rax = State.__rax; + lpContext->Rbx = State.__rbx; + lpContext->Rcx = State.__rcx; + lpContext->Rdx = State.__rdx; + lpContext->Rdi = State.__rdi; + lpContext->Rsi = State.__rsi; + lpContext->Rbp = State.__rbp; + lpContext->Rsp = State.__rsp; + lpContext->R8 = State.__r8; + lpContext->R9 = State.__r9; + lpContext->R10 = State.__r10; + lpContext->R11 = State.__r11; + lpContext->R12 = State.__r12; + lpContext->R13 = State.__r13; + lpContext->R14 = State.__r14; + lpContext->R15 = State.__r15; +// lpContext->SegSs = State.ss; // no such state? + lpContext->EFlags = State.__rflags; + lpContext->Rip = State.__rip; + lpContext->SegCs = State.__cs; +// lpContext->SegDs_PAL_Undefined = State.ds; // no such state? +// lpContext->SegEs_PAL_Undefined = State.es; // no such state? + lpContext->SegFs = State.__fs; + lpContext->SegGs = State.__gs; +#else +#error Unexpected architecture. +#endif + } + + if (lpContext->ContextFlags & CONTEXT_ALL_FLOATING) { +#ifdef _X86_ + x86_float_state32_t State; + StateFlavor = x86_FLOAT_STATE32; +#elif defined(_AMD64_) + x86_float_state64_t State; + StateFlavor = x86_FLOAT_STATE64; +#else +#error Unexpected architecture. +#endif + StateCount = sizeof(State) / sizeof(natural_t); + + MachRet = thread_get_state(Port, + StateFlavor, + (thread_state_t)&State, + &StateCount); + if (MachRet != KERN_SUCCESS) + { + ASSERT("thread_get_state(FLOAT_STATE) failed: %d\n", MachRet); + goto EXIT; + } + + if (lpContext->ContextFlags & CONTEXT_FLOATING_POINT) + { + // Copy the FPRs +#ifdef _X86_ + lpContext->FloatSave.ControlWord = *(DWORD*)&State.fpu_fcw; + lpContext->FloatSave.StatusWord = *(DWORD*)&State.fpu_fsw; + lpContext->FloatSave.TagWord = State.fpu_ftw; + lpContext->FloatSave.ErrorOffset = State.fpu_ip; + lpContext->FloatSave.ErrorSelector = State.fpu_cs; + lpContext->FloatSave.DataOffset = State.fpu_dp; + lpContext->FloatSave.DataSelector = State.fpu_ds; + lpContext->FloatSave.Cr0NpxState = State.fpu_mxcsr; + + // Windows stores the floating point registers in a packed layout (each 10-byte register end to end + // for a total of 80 bytes). But Mach returns each register in an 16-bit structure (presumably for + // alignment purposes). So we can't just memcpy the registers over in a single block, we need to copy + // them individually. + for (int i = 0; i < 8; i++) + memcpy(&lpContext->FloatSave.RegisterArea[i * 10], (&State.fpu_stmm0)[i].mmst_reg, 10); +#elif defined(_AMD64_) + lpContext->FltSave.ControlWord = *(DWORD*)&State.__fpu_fcw; + lpContext->FltSave.StatusWord = *(DWORD*)&State.__fpu_fsw; + lpContext->FltSave.TagWord = State.__fpu_ftw; + lpContext->FltSave.ErrorOffset = State.__fpu_ip; + lpContext->FltSave.ErrorSelector = State.__fpu_cs; + lpContext->FltSave.DataOffset = State.__fpu_dp; + lpContext->FltSave.DataSelector = State.__fpu_ds; + lpContext->FltSave.MxCsr = State.__fpu_mxcsr; + lpContext->FltSave.MxCsr_Mask = State.__fpu_mxcsrmask; // note: we don't save the mask for x86 + + // Windows stores the floating point registers in a packed layout (each 10-byte register end to end + // for a total of 80 bytes). But Mach returns each register in an 16-bit structure (presumably for + // alignment purposes). So we can't just memcpy the registers over in a single block, we need to copy + // them individually. + for (int i = 0; i < 8; i++) + memcpy(&lpContext->FltSave.FloatRegisters[i], (&State.__fpu_stmm0)[i].__mmst_reg, 10); + + // AMD64's FLOATING_POINT includes the xmm registers. + memcpy(&lpContext->Xmm0, &State.__fpu_xmm0, 8 * 16); +#else +#error Unexpected architecture. +#endif + } + +#ifdef _X86_ + if (lpContext->ContextFlags & CONTEXT_EXTENDED_REGISTERS) { + // The only extended register information that Mach will tell us about are the xmm register values. + // Both Windows and Mach store the registers in a packed layout (each of the 8 registers is 16 bytes) + // so we can simply memcpy them across. + memcpy(lpContext->ExtendedRegisters + CONTEXT_EXREG_XMM_OFFSET, &State.fpu_xmm0, 8 * 16); + } +#endif _X86_ + } + +EXIT: + return MachRet; +} + +/*++ +Function: + GetThreadContext + +See MSDN doc. +--*/ +BOOL +CONTEXT_GetThreadContext( + DWORD dwProcessId, +#if !defined(_AMD64_) + DWORD dwThreadId, +#else // defined(_AMD64_) + DWORD64 dwThreadId, +#endif // !defined(_AMD64_) + DWORD dwLwpId, + LPCONTEXT lpContext) +{ + BOOL ret = FALSE; + + if (lpContext == NULL) + { + ERROR("Invalid lpContext parameter value\n"); + SetLastError(ERROR_NOACCESS); + goto EXIT; + } + + if (GetCurrentProcessId() == dwProcessId) + { + if (dwThreadId != GetCurrentThreadId()) + { + // the target thread is in the current process, but isn't + // the current one: extract the CONTEXT from the Mach thread. + mach_port_t mptPort; + mptPort = pthread_mach_thread_np((pthread_t)dwThreadId); + + ret = (CONTEXT_GetThreadContextFromPort(mptPort, lpContext) == KERN_SUCCESS); + } + else + { + CONTEXT_CaptureContext(lpContext); + ret = TRUE; + } + } + else + { + ASSERT("Cross-process GetThreadContext() is not supported on this platform\n"); + SetLastError(ERROR_NOACCESS); + } + +EXIT: + return ret; +} + +/*++ +Function: + SetThreadContextOnPort + + Helper for CONTEXT_SetThreadContext +--*/ +kern_return_t +CONTEXT_SetThreadContextOnPort( + mach_port_t Port, + IN CONST CONTEXT *lpContext) +{ + kern_return_t MachRet = KERN_SUCCESS; + mach_msg_type_number_t StateCount; + thread_state_flavor_t StateFlavor; + + if (lpContext->ContextFlags & (CONTEXT_CONTROL|CONTEXT_INTEGER)) + { +#ifdef _X86_ + x86_thread_state32_t State; + StateFlavor = x86_THREAD_STATE32; + + State.eax = lpContext->Eax; + State.ebx = lpContext->Ebx; + State.ecx = lpContext->Ecx; + State.edx = lpContext->Edx; + State.edi = lpContext->Edi; + State.esi = lpContext->Esi; + State.ebp = lpContext->Ebp; + State.esp = lpContext->Esp; + State.ss = lpContext->SegSs; + State.eflags = lpContext->EFlags; + State.eip = lpContext->Eip; + State.cs = lpContext->SegCs; + State.ds = lpContext->SegDs_PAL_Undefined; + State.es = lpContext->SegEs_PAL_Undefined; + State.fs = lpContext->SegFs_PAL_Undefined; + State.gs = lpContext->SegGs_PAL_Undefined; +#elif defined(_AMD64_) + x86_thread_state64_t State; + StateFlavor = x86_THREAD_STATE64; + + State.__rax = lpContext->Rax; + State.__rbx = lpContext->Rbx; + State.__rcx = lpContext->Rcx; + State.__rdx = lpContext->Rdx; + State.__rdi = lpContext->Rdi; + State.__rsi = lpContext->Rsi; + State.__rbp = lpContext->Rbp; + State.__rsp = lpContext->Rsp; + State.__r8 = lpContext->R8; + State.__r9 = lpContext->R9; + State.__r10 = lpContext->R10; + State.__r11 = lpContext->R11; + State.__r12 = lpContext->R12; + State.__r13 = lpContext->R13; + State.__r14 = lpContext->R14; + State.__r15 = lpContext->R15; +// State.ss = lpContext->SegSs; + State.__rflags = lpContext->EFlags; + State.__rip = lpContext->Rip; + State.__cs = lpContext->SegCs; +// State.ds = lpContext->SegDs_PAL_Undefined; +// State.es = lpContext->SegEs_PAL_Undefined; + State.__fs = lpContext->SegFs; + State.__gs = lpContext->SegGs; +#else +#error Unexpected architecture. +#endif + + StateCount = sizeof(State) / sizeof(natural_t); + + MachRet = thread_set_state(Port, + StateFlavor, + (thread_state_t)&State, + StateCount); + if (MachRet != KERN_SUCCESS) + { + ASSERT("thread_set_state(THREAD_STATE) failed: %d\n", MachRet); + goto EXIT; + } + } + + if (lpContext->ContextFlags & CONTEXT_ALL_FLOATING) + { + +#ifdef _X86_ + x86_float_state32_t State; + StateFlavor = x86_FLOAT_STATE32; +#elif defined(_AMD64_) + x86_float_state64_t State; + StateFlavor = x86_FLOAT_STATE64; +#else +#error Unexpected architecture. +#endif + + StateCount = sizeof(State) / sizeof(natural_t); + + // If we're setting only one of the floating point or extended registers (of which Mach supports only + // the xmm values) then we don't have values for the other set. This is a problem since Mach only + // supports setting both groups as a single unit. So in this case we'll need to fetch the current + // values first. + if (lpContext->ContextFlags & CONTEXT_ALL_FLOATING != + CONTEXT_ALL_FLOATING) + { + mach_msg_type_number_t StateCountGet = StateCount; + MachRet = thread_get_state(Port, + StateFlavor, + (thread_state_t)&State, + &StateCountGet); + if (MachRet != KERN_SUCCESS) + { + ASSERT("thread_get_state(FLOAT_STATE) failed: %d\n", MachRet); + goto EXIT; + } + _ASSERTE(StateCountGet == StateCount); + } + + if (lpContext->ContextFlags & CONTEXT_FLOATING_POINT) + { +#ifdef _X86_ + *(DWORD*)&State.fpu_fcw = lpContext->FloatSave.ControlWord; + *(DWORD*)&State.fpu_fsw = lpContext->FloatSave.StatusWord; + State.fpu_ftw = lpContext->FloatSave.TagWord; + State.fpu_ip = lpContext->FloatSave.ErrorOffset; + State.fpu_cs = lpContext->FloatSave.ErrorSelector; + State.fpu_dp = lpContext->FloatSave.DataOffset; + State.fpu_ds = lpContext->FloatSave.DataSelector; + State.fpu_mxcsr = lpContext->FloatSave.Cr0NpxState; + + // Windows stores the floating point registers in a packed layout (each 10-byte register end to + // end for a total of 80 bytes). But Mach returns each register in an 16-bit structure (presumably + // for alignment purposes). So we can't just memcpy the registers over in a single block, we need + // to copy them individually. + for (int i = 0; i < 8; i++) + memcpy((&State.fpu_stmm0)[i].mmst_reg, &lpContext->FloatSave.RegisterArea[i * 10], 10); +#elif defined(_AMD64_) + *(DWORD*)&State.__fpu_fcw = lpContext->FltSave.ControlWord; + *(DWORD*)&State.__fpu_fsw = lpContext->FltSave.StatusWord; + State.__fpu_ftw = lpContext->FltSave.TagWord; + State.__fpu_ip = lpContext->FltSave.ErrorOffset; + State.__fpu_cs = lpContext->FltSave.ErrorSelector; + State.__fpu_dp = lpContext->FltSave.DataOffset; + State.__fpu_ds = lpContext->FltSave.DataSelector; + State.__fpu_mxcsr = lpContext->FltSave.MxCsr; + State.__fpu_mxcsrmask = lpContext->FltSave.MxCsr_Mask; // note: we don't save the mask for x86 + + // Windows stores the floating point registers in a packed layout (each 10-byte register end to + // end for a total of 80 bytes). But Mach returns each register in an 16-bit structure (presumably + // for alignment purposes). So we can't just memcpy the registers over in a single block, we need + // to copy them individually. + for (int i = 0; i < 8; i++) + memcpy((&State.__fpu_stmm0)[i].__mmst_reg, &lpContext->FltSave.FloatRegisters[i], 10); + + memcpy(&State.__fpu_xmm0, &lpContext->Xmm0, 8 * 16); +#else +#error Unexpected architecture. +#endif + } + +#ifdef _X86_ + if (lpContext->ContextFlags & CONTEXT_EXTENDED_REGISTERS) + { + // The only extended register information that Mach will tell us about are the xmm register + // values. Both Windows and Mach store the registers in a packed layout (each of the 8 registers + // is 16 bytes) so we can simply memcpy them across. + memcpy(&State.fpu_xmm0, lpContext->ExtendedRegisters + CONTEXT_EXREG_XMM_OFFSET, 8 * 16); + } +#endif // _X86_ + + MachRet = thread_set_state(Port, + StateFlavor, + (thread_state_t)&State, + StateCount); + if (MachRet != KERN_SUCCESS) + { + ASSERT("thread_set_state(FLOAT_STATE) failed: %d\n", MachRet); + goto EXIT; + } + } + +EXIT: + return MachRet; +} + +/*++ +Function: + SetThreadContext + +See MSDN doc. +--*/ +BOOL +CONTEXT_SetThreadContext( + DWORD dwProcessId, +#if !defined(_AMD64_) + DWORD dwThreadId, +#else // defined(_AMD64_) + DWORD64 dwThreadId, +#endif // !defined(_AMD64_) + DWORD dwLwpId, + CONST CONTEXT *lpContext) +{ + BOOL ret = FALSE; + + if (lpContext == NULL) + { + ERROR("Invalid lpContext parameter value\n"); + SetLastError(ERROR_NOACCESS); + goto EXIT; + } + + if (dwProcessId != GetCurrentProcessId()) + { + // GetThreadContext() of a thread in another process + ASSERT("Cross-process GetThreadContext() is not supported\n"); + SetLastError(ERROR_NOACCESS); + goto EXIT; + } + + if (dwThreadId != GetCurrentThreadId()) + { + // hThread is in the current process, but isn't the current + // thread. Extract the CONTEXT from the Mach thread. + + mach_port_t mptPort; + + mptPort = pthread_mach_thread_np((pthread_t)dwThreadId); + + ret = (CONTEXT_SetThreadContextOnPort(mptPort, lpContext) == KERN_SUCCESS); + } + else + { + MachSetThreadContext(const_cast<CONTEXT *>(lpContext)); + ASSERT("MachSetThreadContext should never return\n"); + } + +EXIT: + return ret; +} + +#endif // !HAVE_MACH_EXCEPTIONS + +/*++ +Function: + DBG_DebugBreak: same as DebugBreak + +See MSDN doc. +--*/ +VOID +DBG_DebugBreak() +{ + __asm__ __volatile__("int $3"); +} + + +/*++ +Function: + DBG_FlushInstructionCache: processor-specific portion of + FlushInstructionCache + +See MSDN doc. +--*/ +BOOL +DBG_FlushInstructionCache( + IN LPCVOID lpBaseAddress, + IN SIZE_T dwSize) +{ + // Intel x86 hardware has cache coherency, so nothing needs to be done. + return TRUE; +} + +#if _DEBUG && defined(__APPLE__) +/*++ +Function: + DBG_CheckStackAlignment + + The Apple ABI requires 16-byte alignment on the stack pointer. + This function interrupts otherwise. +--*/ +// Bullseye has parsing problems if "asm" comes after VOID +asm VOID +DBG_CheckStackAlignment() +{ +#ifndef __llvm__ +#ifdef _X86_ + // Prolog - at this point we are at aligned - 4 (for the call) + push ebp // aligned - 8 + mov ebp, esp + sub esp,STACK_ALIGN_REQ-8 // aligned (or should be) +#elif defined(_AMD64_) + // Prolog - at this point we are at aligned - 8 (for the call) + push rbp // aligned -16 + mov rbp, rsp +#else +#error Unexpected architecture. +#endif +#endif // !__llvm__ + + test esp,STACK_ALIGN_REQ-1 // can get away with esp even on AMD64. + jz .+3 + int 3 + +#ifndef __llvm__ + // Epilog + leave +#endif // !__llvm__ +} +#endif // DEBUG && APPLE + diff --git a/src/pal/src/arch/i386/context2.s b/src/pal/src/arch/i386/context2.s new file mode 100644 index 0000000000..535506cf00 --- /dev/null +++ b/src/pal/src/arch/i386/context2.s @@ -0,0 +1,212 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. +// +// +// Implementation of _CONTEXT_CaptureContext for the Intel x86 platform. +// This function is processor dependent. It is used by exception handling, +// and is always apply to the current thread. +// + +#ifdef BIT64 + +#define CONTEXT_CONTROL 1 // SegSs, Rsp, SegCs, Rip, and EFlags +#define CONTEXT_INTEGER 2 // Rax, Rcx, Rdx, Rbx, Rbp, Rsi, Rdi, R8-R15 +#define CONTEXT_SEGMENTS 4 // SegDs, SegEs, SegFs, SegGs +#define CONTEXT_FLOATING_POINT 8 +#define CONTEXT_DEBUG_REGISTERS 16 // Dr0-Dr3 and Dr6-Dr7 + +#define CONTEXT_ContextFlags 6*8 +#define CONTEXT_SegCs CONTEXT_ContextFlags+8 +#define CONTEXT_SegDs CONTEXT_SegCs+2 +#define CONTEXT_SegEs CONTEXT_SegDs+2 +#define CONTEXT_SegFs CONTEXT_SegEs+2 +#define CONTEXT_SegGs CONTEXT_SegFs+2 +#define CONTEXT_SegSs CONTEXT_SegGs+2 +#define CONTEXT_EFlags CONTEXT_SegSs+2 +#define CONTEXT_Dr0 CONTEXT_EFlags+4 +#define CONTEXT_Dr1 CONTEXT_Dr0+8 +#define CONTEXT_Dr2 CONTEXT_Dr1+8 +#define CONTEXT_Dr3 CONTEXT_Dr2+8 +#define CONTEXT_Dr6 CONTEXT_Dr3+8 +#define CONTEXT_Dr7 CONTEXT_Dr6+8 +#define CONTEXT_Rax CONTEXT_Dr7+8 +#define CONTEXT_Rcx CONTEXT_Rax+8 +#define CONTEXT_Rdx CONTEXT_Rcx+8 +#define CONTEXT_Rbx CONTEXT_Rdx+8 +#define CONTEXT_Rsp CONTEXT_Rbx+8 +#define CONTEXT_Rbp CONTEXT_Rsp+8 +#define CONTEXT_Rsi CONTEXT_Rbp+8 +#define CONTEXT_Rdi CONTEXT_Rsi+8 +#define CONTEXT_R8 CONTEXT_Rdi+8 +#define CONTEXT_R9 CONTEXT_R8+8 +#define CONTEXT_R10 CONTEXT_R9+8 +#define CONTEXT_R11 CONTEXT_R10+8 +#define CONTEXT_R12 CONTEXT_R11+8 +#define CONTEXT_R13 CONTEXT_R12+8 +#define CONTEXT_R14 CONTEXT_R13+8 +#define CONTEXT_R15 CONTEXT_R14+8 +#define CONTEXT_Rip CONTEXT_R15+8 +#define CONTEXT_FltSave CONTEXT_Rip+8 +#define FLOATING_SAVE_AREA_SIZE 4*8+24*16+96 +#define CONTEXT_Xmm0 CONTEXT_FltSave+10*16 +#define CONTEXT_Xmm1 CONTEXT_Xmm0+16 +#define CONTEXT_Xmm2 CONTEXT_Xmm1+16 +#define CONTEXT_Xmm3 CONTEXT_Xmm2+16 +#define CONTEXT_Xmm4 CONTEXT_Xmm3+16 +#define CONTEXT_Xmm5 CONTEXT_Xmm4+16 +#define CONTEXT_Xmm6 CONTEXT_Xmm5+16 +#define CONTEXT_Xmm7 CONTEXT_Xmm6+16 +#define CONTEXT_Xmm8 CONTEXT_Xmm7+16 +#define CONTEXT_Xmm9 CONTEXT_Xmm8+16 +#define CONTEXT_Xmm10 CONTEXT_Xmm9+16 +#define CONTEXT_Xmm11 CONTEXT_Xmm10+16 +#define CONTEXT_Xmm12 CONTEXT_Xmm11+16 +#define CONTEXT_Xmm13 CONTEXT_Xmm12+16 +#define CONTEXT_Xmm14 CONTEXT_Xmm13+16 +#define CONTEXT_Xmm15 CONTEXT_Xmm14+16 +#define CONTEXT_VectorRegister CONTEXT_Xmm15+16 +#define CONTEXT_VectorControl CONTEXT_VectorRegister+16*26 +#define CONTEXT_DebugControl CONTEXT_VectorControl+8 +#define CONTEXT_LastBranchToRip CONTEXT_DebugControl+8 +#define CONTEXT_LastBranchFromRip CONTEXT_LastBranchToRip+8 +#define CONTEXT_LastExceptionToRip CONTEXT_LastBranchFromRip+8 +#define CONTEXT_LastExceptionFromRip CONTEXT_LastExceptionToRip+8 + +// Incoming: +// RDI: Context* +// + .globl _CONTEXT_CaptureContext +_CONTEXT_CaptureContext: + testb $CONTEXT_INTEGER, CONTEXT_ContextFlags(%rdi) + je 0f + mov %rdi, CONTEXT_Rdi(%rdi) + mov %rsi, CONTEXT_Rsi(%rdi) + mov %rbx, CONTEXT_Rbx(%rdi) + mov %rdx, CONTEXT_Rdx(%rdi) + mov %rcx, CONTEXT_Rcx(%rdi) + mov %rax, CONTEXT_Rax(%rdi) + mov %rbp, CONTEXT_Rbp(%rdi) + mov %r8, CONTEXT_R8(%rdi) + mov %r9, CONTEXT_R9(%rdi) + mov %r10, CONTEXT_R10(%rdi) + mov %r11, CONTEXT_R11(%rdi) + mov %r12, CONTEXT_R12(%rdi) + mov %r13, CONTEXT_R13(%rdi) + mov %r14, CONTEXT_R14(%rdi) + mov %r15, CONTEXT_R15(%rdi) + jmp 1f +0: + nop +1: + testb $CONTEXT_CONTROL, CONTEXT_ContextFlags(%rdi) + je 2f + + // Return address is @ RSP + mov (%rsp), %rdx + mov %rdx, CONTEXT_Rip(%rdi) + mov %cs, CONTEXT_SegCs(%rdi) + pushfq + pop %rdx + mov %edx, CONTEXT_EFlags(%rdi) + lea 8(%rsp), %rdx + mov %rdx, CONTEXT_Rsp(%rdi) + mov %ss, CONTEXT_SegSs(%rdi) +2: + // Need to double check this is producing the right result + // also that FFSXR (fast save/restore) is not turned on + // otherwise it omits the xmm registers. + testb $CONTEXT_FLOATING_POINT, CONTEXT_ContextFlags(%rdi) + je 3f + fxsave CONTEXT_FltSave(%rdi) +3: + testb $CONTEXT_DEBUG_REGISTERS, CONTEXT_ContextFlags(%rdi) + je 4f + mov %dr0, %rdx + mov %rdx, CONTEXT_Dr0(%rdi) + mov %dr1, %rdx + mov %rdx, CONTEXT_Dr1(%rdi) + mov %dr2, %rdx + mov %rdx, CONTEXT_Dr2(%rdi) + mov %dr3, %rdx + mov %rdx, CONTEXT_Dr3(%rdi) + mov %dr6, %rdx + mov %rdx, CONTEXT_Dr6(%rdi) + mov %dr7, %rdx + mov %rdx, CONTEXT_Dr7(%rdi) +4: + ret + +#else + +#define CONTEXT_ContextFlags 0 +#define CONTEXT_FLOATING_POINT 8 +#define CONTEXT_FloatSave 7*4 +#define FLOATING_SAVE_AREA_SIZE 8*4+80 +#define CONTEXT_Edi CONTEXT_FloatSave + FLOATING_SAVE_AREA_SIZE + 4*4 +#define CONTEXT_Esi CONTEXT_Edi+4 +#define CONTEXT_Ebx CONTEXT_Esi+4 +#define CONTEXT_Edx CONTEXT_Ebx+4 +#define CONTEXT_Ecx CONTEXT_Edx+4 +#define CONTEXT_Eax CONTEXT_Ecx+4 +#define CONTEXT_Ebp CONTEXT_Eax+4 +#define CONTEXT_Eip CONTEXT_Ebp+4 +#define CONTEXT_SegCs CONTEXT_Eip+4 +#define CONTEXT_EFlags CONTEXT_SegCs+4 +#define CONTEXT_Esp CONTEXT_EFlags+4 +#define CONTEXT_SegSs CONTEXT_Esp+4 +#define CONTEXT_EXTENDED_REGISTERS 32 +#define CONTEXT_ExtendedRegisters CONTEXT_SegSs+4 +#define CONTEXT_Xmm0 CONTEXT_ExtendedRegisters+160 +#define CONTEXT_Xmm1 CONTEXT_Xmm0+16 +#define CONTEXT_Xmm2 CONTEXT_Xmm1+16 +#define CONTEXT_Xmm3 CONTEXT_Xmm2+16 +#define CONTEXT_Xmm4 CONTEXT_Xmm3+16 +#define CONTEXT_Xmm5 CONTEXT_Xmm4+16 +#define CONTEXT_Xmm6 CONTEXT_Xmm5+16 +#define CONTEXT_Xmm7 CONTEXT_Xmm6+16 + + .globl _CONTEXT_CaptureContext +_CONTEXT_CaptureContext: + push %eax + mov 8(%esp), %eax + mov %edi, CONTEXT_Edi(%eax) + mov %esi, CONTEXT_Esi(%eax) + mov %ebx, CONTEXT_Ebx(%eax) + mov %edx, CONTEXT_Edx(%eax) + mov %ecx, CONTEXT_Ecx(%eax) + pop %ecx + mov %ecx, CONTEXT_Eax(%eax) + mov %ebp, CONTEXT_Ebp(%eax) + mov (%esp), %edx + mov %edx, CONTEXT_Eip(%eax) + push %cs + pop %edx + mov %edx, CONTEXT_SegCs(%eax) + pushf + pop %edx + mov %edx, CONTEXT_EFlags(%eax) + lea 4(%esp), %edx + mov %edx, CONTEXT_Esp(%eax) + push %ss + pop %edx + mov %edx, CONTEXT_SegSs(%eax) + testb $CONTEXT_FLOATING_POINT, CONTEXT_ContextFlags(%eax) + je 0f + fnsave CONTEXT_FloatSave(%eax) + frstor CONTEXT_FloatSave(%eax) +0: + testb $CONTEXT_EXTENDED_REGISTERS, CONTEXT_ContextFlags(%eax) + je 2f + movdqu %xmm0, CONTEXT_Xmm0(%eax) + movdqu %xmm1, CONTEXT_Xmm1(%eax) + movdqu %xmm2, CONTEXT_Xmm2(%eax) + movdqu %xmm3, CONTEXT_Xmm3(%eax) + movdqu %xmm4, CONTEXT_Xmm4(%eax) + movdqu %xmm5, CONTEXT_Xmm5(%eax) + movdqu %xmm6, CONTEXT_Xmm6(%eax) + movdqu %xmm7, CONTEXT_Xmm7(%eax) +2: + ret + +#endif diff --git a/src/pal/src/arch/i386/dispatchexceptionwrapper.s b/src/pal/src/arch/i386/dispatchexceptionwrapper.s new file mode 100644 index 0000000000..831ce93f54 --- /dev/null +++ b/src/pal/src/arch/i386/dispatchexceptionwrapper.s @@ -0,0 +1,133 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. +// + +// ==++== +// + +// ==--== +// +// Implementation of the PAL_DispatchExceptionWrapper that is +// interposed between a function that caused a hardware fault +// and PAL_DispatchException that throws an SEH exception for +// the fault, to make the stack unwindable. +// +// On Mac OS X 10.6, the unwinder fails to operate correctly +// on our original int3; int3 body. The workaround is to +// increase the size of the function to include a call statement, +// even though it will never be executed. + +#if defined(_AMD64_) +#define PAL_DISPATCHEXCEPTION __Z21PAL_DispatchExceptionmmmmmmP8_CONTEXTP17_EXCEPTION_RECORD +#else //!defined(_AMD64_) +#define PAL_DISPATCHEXCEPTION __Z21PAL_DispatchExceptionP8_CONTEXTP17_EXCEPTION_RECORD +#endif // defined(_AMD64_) + + .text + .globl __Z21PAL_DispatchExceptionP8_CONTEXTP17_EXCEPTION_RECORD + .globl _PAL_DispatchExceptionWrapper +_PAL_DispatchExceptionWrapper: +LBegin: + int3 + call PAL_DISPATCHEXCEPTION + int3 +LEnd: + +// +// PAL_DispatchExceptionWrapper will never be called; it only serves +// to be referenced from a stack frame on the faulting thread. Its +// unwinding behavior is equivalent to any standard function having +// an ebp frame. The FDE below is analogous to the one generated +// by "g++ -S" for the following source file. +// +// --- snip --- +// struct CONTEXT +// { +// char reserved[716]; +// }; +// +// struct EXCEPTION_RECORD +// { +// char reserved[80]; +// }; +// +// void PAL_DispatchException(CONTEXT *pContext, EXCEPTION_RECORD *pExceptionRecord); +// +// extern "C" void PAL_DispatchExceptionWrapper() +// { +// CONTEXT Context; +// EXCEPTION_RECORD ExceptionRecord; +// PAL_DispatchException(&Context, &ExceptionRecord); +// } +// --- snip --- +// + + .section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support +CIE_DispatchExceptionPersonality: + .long LECIE1-LSCIE1 +LSCIE1: + .long 0x0 + .byte 0x1 + .ascii "zLR\0" + .byte 0x1 +#ifdef BIT64 + .byte 0x78 // data_align: -8 + .byte 16 // return address register: rip +#else // BIT64 + .byte 0x7c // data_align: -4 + .byte 0x8 // return address register: eip +#endif // BIT64 else + .byte 0x2 + .byte 0x10 + .byte 0x10 + .byte 0xc // DW_CFA_def_cfa +#ifdef BIT64 + .byte 0x7 // operand1 = rsp + .byte 0x8 // operand2 = offset 8 + .byte 0x80 | 16 // DW_CFA_offset of return address register +#else // BIT64 + .byte 0x5 // operand1 = esp + .byte 0x4 // operand2 = offset 4 + .byte 0x80 | 8 // DW_CFA_offset of return address register +#endif // BIT64 else + .byte 0x1 // operand1 = 1 word + .align 2 +LECIE1: + + .globl _PAL_DispatchExceptionWrapper.eh +_PAL_DispatchExceptionWrapper.eh: +LSFDE1: + .set LLFDE1,LEFDE1-LASFDE1 + .set LLength,LEnd-LBegin + .long LLFDE1 +LASFDE1: + .long LASFDE1-CIE_DispatchExceptionPersonality +#ifdef BIT64 + .quad LBegin-. + .quad LLength + .byte 0x8 + .quad 0x0 +#else // BIT64 + .long LBegin-. + .long LLength + .byte 0x4 + .long 0x0 +#endif // BIT64 else + .byte 0xe // DW_CFA_def_cfa_offset +#ifdef BIT64 + .byte 0x10 + .byte 0x80 | 6 // DW_CFA_offset rbp +#else // BIT64 + .byte 0x8 + .byte 0x80 | 4 // DW_CFA_offset ebp +#endif // BIT64 else + .byte 0x2 + .byte 0xd // DW_CFA_def_cfa_register +#ifdef BIT64 + .byte 6 // operand1 = rbp +#else // BIT64 + .byte 4 // operand1 = ebp +#endif // BIT64 + .align 2 +LEFDE1: diff --git a/src/pal/src/arch/i386/optimizedtls.cpp b/src/pal/src/arch/i386/optimizedtls.cpp new file mode 100644 index 0000000000..fe9b3a939d --- /dev/null +++ b/src/pal/src/arch/i386/optimizedtls.cpp @@ -0,0 +1,238 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. +// + +/*++ + + + +Module Name: + + optimizedtls.cpp + +Abstract: + + Implementation of platform-specific Thread local storage functions. + + + +--*/ + +#include "pal/thread.hpp" +#include "pal/malloc.hpp" + +#include <pthread.h> + +#include "pal/dbgmsg.h" +#include "pal/misc.h" +#include "pal/debug.h" + +#include <stddef.h> + +using namespace CorUnix; + +SET_DEFAULT_DEBUG_CHANNEL(THREAD); + +#if defined(USE_OPTIMIZEDTLSGETTER) + +#define PAL_safe_offsetof(s,m) ((size_t)((ptrdiff_t)&(char&)(((s *)64)->m))-64) + +/*++ +Function: + CorUnix::TLSMakeOptimizedGetter + + Creates a platform-optimized version of TlsGetValue compiled + for a particular index. + + Generates the hot part of CorUnix::InternalGetCurrentThread + as a chunk of highly optimized machine-specific code at runtime. + + Check the difference between CorUnix::InternalGetCurrentThread and + CorUnix::InternalGetCurrentThreadSlow to see the C/C++ code that matches + the code generated by this function. +--*/ +PAL_POPTIMIZEDTLSGETTER +CorUnix::TLSMakeOptimizedGetter( + IN CPalThread* pThread, + IN DWORD dwTlsIndex) +{ +#ifdef BIT64 +#pragma unused(pThread, dwTlsIndex) + ERROR("TLSMakeOptimizedGetter not rewritten for amd64 yet."); + return NULL; +#else + PAL_POPTIMIZEDTLSGETTER Ret = NULL; + BYTE* p; + int i = 0; + +#ifdef __APPLE__ +#define TLS_OPTIMIZED_GETTER_SIZE 118 +#else +#define TLS_OPTIMIZED_GETTER_SIZE 115 +#endif + + p = (BYTE*)InternalMalloc(pThread, TLS_OPTIMIZED_GETTER_SIZE * sizeof(BYTE)); + + if (p == NULL) + { + return Ret; + } + + // Need to preserve %ecx, %edx, and %esi registers as specified in + // GetThreadGeneric(void) in vm/i386/asmhelpers.s + p[i++] = 0x51; // push %ecx + p[i++] = 0x52; // push %edx + p[i++] = 0x89; // mov %esp,%eax // %eax = sp; + p[i++] = 0xe0; + p[i++] = 0xc1; // shr $0x11,%eax // sp >> 17; + p[i++] = 0xe8; + p[i++] = 0x11; + p[i++] = 0x89; // mov %eax,%edx // key = sp >> 17; + p[i++] = 0xc2; + p[i++] = 0xc1; // sar $0x7,%edx // key >> 7; + p[i++] = 0xfa; + p[i++] = 0x07; + p[i++] = 0x29; // sub %edx,%eax // key -= key >> 7; + p[i++] = 0xd0; + p[i++] = 0x89; // mov %eax,%edx + p[i++] = 0xc2; + p[i++] = 0xc1; // sar $0x5,%edx // key >> 5; + p[i++] = 0xfa; + p[i++] = 0x05; + p[i++] = 0x29; // sub %edx,%eax // key -= key >> 5; + p[i++] = 0xd0; + p[i++] = 0x89; // mov %eax,%edx + p[i++] = 0xc2; + p[i++] = 0xc1; // sar $0x3,%edx // key >> 3; + p[i++] = 0xfa; + p[i++] = 0x03; + p[i++] = 0x29; // sub %edx,%eax // key -= key >> 3; + p[i++] = 0xd0; + p[i++] = 0x25; // and $0xff,%eax // key &= 0xFF; + p[i++] = 0xff; + p[i++] = 0x00; + p[i++] = 0x00; + p[i++] = 0x00; + p[i++] = 0x8b; // mov (flush_counter),%ecx // %ecx = counter = flush_counter; + p[i++] = 0x0d; + *((DWORD*) &p[i]) = (DWORD)&flush_counter; + i += sizeof(DWORD); + p[i++] = 0x8b; // mov (thread_hints,%eax,4),%eax // %edx = pThread = thread_hints[key]; + p[i++] = 0x14; + p[i++] = 0x85; + *((DWORD*) &p[i]) = (DWORD)&thread_hints; + i += sizeof(DWORD); + p[i++] = 0x39; // cmp %esp,offsetof(CPalThread,tlsInfo)+offsetof(CThreadTLSInfo,minStack)(%edx) + // if ((size_t)pThread->tlsInfo.minStack <= sp) + p[i++] = 0xa2; + *((DWORD*) &p[i]) = (DWORD)(PAL_safe_offsetof(CPalThread,tlsInfo)+PAL_safe_offsetof(CThreadTLSInfo,minStack)); + i += sizeof(DWORD); + p[i++] = 0x77; // ja CallInternalGetCurrentThreadSlow: + p[i++] = 0x19; + p[i++] = 0x3b; // cmp offsetof(CPalThread,tlsInfo)+offsetof(CThreadTLSInfo,maxStack)(%edx),%esp + // if (sp < (size_t)pThread->tlsInfo.maxStack) + p[i++] = 0xa2; + *((DWORD*) &p[i]) = (DWORD)(PAL_safe_offsetof(CPalThread,tlsInfo)+PAL_safe_offsetof(CThreadTLSInfo,maxStack)); + i += sizeof(DWORD); + p[i++] = 0x73; // jae CallInternalGetCurrentThreadSlow: + p[i++] = 0x11; + p[i++] = 0x39; // cmp (flush_counter),%ecx // if (counter == flush_counter) + p[i++] = 0x0d; + *((DWORD*) &p[i]) = (DWORD)&flush_counter; + i += sizeof(DWORD); + p[i++] = 0x75; // jne CallInternalGetCurrentThreadSlow: + p[i++] = 0x09; + if (dwTlsIndex != THREAD_OBJECT_TLS_INDEX) + { + p[i++] = 0x8b; // mov offsetof(pThread->tlsSlots[dwTlsIndex])(%edx),%eax // %eax = pThread->tlsSlots[dwTlsIndex]; + p[i++] = 0x82; + *((DWORD*) &p[i]) = (DWORD)(PAL_safe_offsetof(CPalThread,tlsInfo)+PAL_safe_offsetof(CThreadTLSInfo,tlsSlots[dwTlsIndex])); + i += sizeof(DWORD); + } + else + { + p[i++] = 0x89; // mov %edx,%eax // %eax = pThread; + p[i++] = 0xd0; + p[i++] = 0x90; // nop + p[i++] = 0x90; // nop + p[i++] = 0x90; // nop + p[i++] = 0x90; // nop + } + p[i++] = 0x5a; // pop %edx + p[i++] = 0x59; // pop %ecx + p[i++] = 0xc3; // ret + // CallInternalGetCurrentThreadSlow: + p[i++] = 0x5a; // pop %edx + p[i++] = 0x59; // pop %ecx + p[i++] = 0x8d; // lea (thread_hints,%eax,4),%eax // %eax = &thread_hints[key]; + p[i++] = 0x04; + p[i++] = 0x85; + *((DWORD*) &p[i]) = (DWORD)&thread_hints; + i += sizeof(DWORD); + p[i++] = 0x55; // push %ebp + p[i++] = 0x89; // mov %esp,%ebp + p[i++] = 0xe5; + p[i++] = 0x51; // push %ecx + p[i++] = 0x89; // mov %esp,%ecx // this is the reference esp - need to match the reference esp used in the fast path. + p[i++] = 0xe1; + p[i++] = 0x52; // push %edx +#ifdef __APPLE__ + // establish 16-byte stack alignment + p[i++] = 0x83; // subl $8,%esp + p[i++] = 0xec; + p[i++] = 0x08; +#endif + p[i++] = 0x50; // push %eax // store &thread_hints[key] on stack as 2nd argument; + p[i++] = 0x51; // push %ecx // reference esp - The 1st argument for call to InternalGetCurrentThreadSlow. + p[i++] = 0xe8; // call InternalGetCurrentThreadSlow + *((DWORD*) &p[i]) = (DWORD)&InternalGetCurrentThreadSlow - (DWORD)(&p[i+sizeof(DWORD)]); + i += sizeof(DWORD); +#ifdef __APPLE__ + p[i++] = 0x83; // addl $16,%esp + p[i++] = 0xc4; + p[i++] = 0x10; +#else + p[i++] = 0x83; // addl $8,%esp + p[i++] = 0xc4; + p[i++] = 0x08; +#endif + if (dwTlsIndex != THREAD_OBJECT_TLS_INDEX) + { + p[i++] = 0x8b; // mov offsetof(pThread->tlsSlots[dwTlsIndex])(%eax),%eax // %eax = pThread->tlsSlots[dwTlsIndex]; + p[i++] = 0x80; + *((DWORD*) &p[i]) = (DWORD)(PAL_safe_offsetof(CPalThread,tlsInfo)+PAL_safe_offsetof(CThreadTLSInfo,tlsSlots[dwTlsIndex])); + i += sizeof(DWORD); + } + p[i++] = 0x5a; // pop %edx + p[i++] = 0x59; // pop %ecx + p[i++] = 0xc9; // leave + p[i++] = 0xc3; // ret + + if (i > TLS_OPTIMIZED_GETTER_SIZE) + { + ASSERT("Invalid TLS_OPTIMIZED_GETTER_SIZE %d\n", i); + } + + DBG_FlushInstructionCache(p, TLS_OPTIMIZED_GETTER_SIZE * sizeof(BYTE)); + + Ret = (PAL_POPTIMIZEDTLSGETTER)p; + + return Ret; +#endif // BIT64 else +} + +/*++ +Function: + TLSFreeOptimizedGetter + + Frees a function created by MakeOptimizedTlsGetter(). +--*/ +VOID +CorUnix::TLSFreeOptimizedGetter( + IN PAL_POPTIMIZEDTLSGETTER pOptimizedTlsGetter) +{ + InternalFree(InternalGetCurrentThread(), (void *)pOptimizedTlsGetter); +} + +#endif // USE_OPTIMIZEDTLSGETTER diff --git a/src/pal/src/arch/i386/processor.cpp b/src/pal/src/arch/i386/processor.cpp new file mode 100644 index 0000000000..ef0f161f3d --- /dev/null +++ b/src/pal/src/arch/i386/processor.cpp @@ -0,0 +1,45 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. +// + +/*++ + + + +Module Name: + + processor.cpp + +Abstract: + + Implementation of processor related functions for the Intel x86/x64 + platforms. These functions are processor dependent. + + + +--*/ + +#include "pal/palinternal.h" + +/*++ +Function: +YieldProcessor + +The YieldProcessor function signals to the processor to give resources +to threads that are waiting for them. This macro is only effective on +processors that support technology allowing multiple threads running +on a single processor, such as Intel's Hyper-Threading technology. + +--*/ +void +PALAPI +YieldProcessor( + VOID) +{ + __asm__ __volatile__ ( + "rep\n" + "nop" + ); +} + diff --git a/src/pal/src/arch/i386/runfilter.s b/src/pal/src/arch/i386/runfilter.s new file mode 100644 index 0000000000..1bf2f7438b --- /dev/null +++ b/src/pal/src/arch/i386/runfilter.s @@ -0,0 +1,158 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. +// +// +// Implementation of the PAL_RunFilter primitive that allows +// to run a filter guarded by a personality routine that can +// deal with nested exceptions. +// + +#define ALIGN_UP(x) ((x + 15) & ~15) + +#ifdef BIT64 + +#define SIZEOF_ARG_REGISTERS 32 +#define FRAME_SIZE ALIGN_UP(SIZEOF_ARG_REGISTERS) + + .text + .globl _PAL_RunFilter +_PAL_RunFilter: +LFB7: + push %rbp +LCFI0: + mov %rsp, %rbp +LCFI1: + sub $FRAME_SIZE, %rsp + mov %rdi, (%rsp) // ExceptionPointers + mov %rsi, 8(%rsp) // DispatcherContext + mov %rdx, 16(%rsp) // pvParam + mov %rcx, 24(%rsp) // pfnFilter + + // Filters need to be passed ExceptionPointers and pvParam arguments, in that order. + // ExceptionPointers is already in the right register (RDI), so setup pvParam to be + // in RSI + mov %rdx, %rsi +LEHB0: + call *%rcx // Invoke the filter +LEHE0: + leave + ret +LFE7: + +#else // BIT64 + +#define SIZEOF_ARG_REGISTERS 12 +#define FRAME_SIZE ALIGN_UP(8 + SIZEOF_ARG_REGISTERS) - 8 + + .text + .globl _PAL_RunFilter +_PAL_RunFilter: +LFB7: + pushl %ebp +LCFI0: + movl %esp, %ebp +LCFI1: + subl $FRAME_SIZE, %esp + movl 8(%ebp), %eax // exception pointers + movl %eax, (%esp) + movl 12(%ebp), %eax // dispatcher context + movl %eax, 4(%esp) + movl 16(%ebp), %eax // param + movl %eax, 8(%esp) +LEHB0: + call *20(%ebp) // filter +LEHE0: + leave + ret +LFE7: + +#endif // BIT64 else + + .section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support +CIE_SEHFilterPersonality: + .long LECIE1-LSCIE1 +LSCIE1: + .long 0x0 + .byte 0x1 + .ascii "zPLR\0" + .byte 0x1 +#ifdef BIT64 + .byte 0x78 // data_align: -8 + .byte 16 // return address register: rip +#else // BIT64 + .byte 0x7c // data_align: -4 + .byte 0x8 // return address register: eip +#endif // BIT64 else + .byte 0x7 + .byte 0x9b +#ifdef BIT64 + .long _PAL_SEHFilterPersonalityRoutine+4@GOTPCREL +#else // BIT64 + .long L_PAL_SEHFilterPersonalityRoutine$non_lazy_ptr-. +#endif // BIT64 else + .byte 0x10 + .byte 0x10 + .byte 0xc // DW_CFA_def_cfa +#ifdef BIT64 + .byte 0x7 // operand1 = rsp + .byte 0x8 // operand2 = offset 8 + .byte 0x80 | 16 // DW_CFA_offset of return address register +#else // BIT64 + .byte 0x5 // operand1 = esp + .byte 0x4 // operand2 = offset 4 + .byte 0x80 | 8 // DW_CFA_offset of return address register +#endif // BIT64 else + .byte 0x1 // operand1 = 1 word + .align 2 +LECIE1: + + .globl _PAL_RunFilter.eh +_PAL_RunFilter.eh: +LSFDE1: + .set LLFDE1,LEFDE1-LASFDE1 + .set LFL7,LFE7-LFB7 + .long LLFDE1 +LASFDE1: + .long LASFDE1-CIE_SEHFilterPersonality +#ifdef BIT64 + .quad LFB7-. + .quad LFL7 + .byte 0x8 + .quad 0x0 +#else // BIT64 + .long LFB7-. + .long LFL7 + .byte 0x4 + .long 0x0 +#endif // BIT64 else + .byte 0x4 // DW_CFA_advance_loc4 + .long LCFI0-LFB7 + .byte 0xe // DW_CFA_def_cfa_offset +#ifdef BIT64 + .byte 0x10 + .byte 0x80 | 6 // DW_CFA_offset rbp +#else // BIT64 + .byte 0x8 + .byte 0x80 | 4 // DW_CFA_offset ebp +#endif // BIT64 else + .byte 0x2 + .byte 0x4 // DW_CFA_advance_loc4 + .long LCFI1-LCFI0 + .byte 0xd // DW_CFA_def_cfa_register +#ifdef BIT64 + .byte 6 // operand1 = rbp +#else // BIT64 + .byte 4 // operand1 = ebp +#endif // BIT64 + .align 2 +LEFDE1: + +#ifndef BIT64 + + .section __IMPORT,__pointers,non_lazy_symbol_pointers +L_PAL_SEHFilterPersonalityRoutine$non_lazy_ptr: + .indirect_symbol _PAL_SEHFilterPersonalityRoutine + .long 0 + +#endif // BIT64 diff --git a/src/pal/src/arch/i386/tryexcept.s b/src/pal/src/arch/i386/tryexcept.s new file mode 100644 index 0000000000..f0bea999ad --- /dev/null +++ b/src/pal/src/arch/i386/tryexcept.s @@ -0,0 +1,207 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. +// + +// ==++== +// + +// ==--== +// +// Implementation of the PAL_TryExcept primitive for MSVC-style +// exception handling. +// + +#define ALIGN_UP(x) ((x + 15) & ~15) + +#ifdef BIT64 + +// GCC follows the AMD64 ABI calling convention which is considerably different from the Microsoft AMD64 calling convention. +// +// With MSVC, the first four arguments are passed in RCX, RDX, R8, R9 and the remaining on the stack. +// With GCC, the first six arguments are passed in RDI, RSI, RDX, RCX, R8, R9 and the remaining on the stack. + +// => Size of the total number of arguments PAL_TryExcept takes (32 bytes) + +// => 1 stack slot (8 bytes) to preserve "actions" flags when PAL_SEHPersonalityRoutine is invoked during unwind copies +// data to the stack location before fixing the context to invoke PAL_CallRunHandler + +// => 1 stack slot (8 bytes) to ensure stack is 16bytes aligned. +// +// Hence, the 48 bytes frame size. +#define SIZEOF_ARG_REGISTERS 48 +#define FRAME_SIZE ALIGN_UP(SIZEOF_ARG_REGISTERS) + + .text + .globl _PAL_TryExcept +_PAL_TryExcept: +LFB7: + push %rbp +LCFI0: + mov %rsp, %rbp +LCFI1: + sub $FRAME_SIZE, %rsp + mov %rdi, (%rsp) // Move the Body address to the stack + mov %rsi, 8(%rsp) // Move the Filter address to the stack + mov %rdx, 16(%rsp) // Move pvParam (i.e. HandlerData) to the stack + mov %rcx, 24(%rsp) // Move pfExecuteHandler value to the stack + mov %rdi, %r9 // Move the body address to r9 + mov %rdx, %rdi // Move the HandlerData argument to RDI - this will serve as the first (and only) argument passed to the __try block body below +LEHB0: + call *%r9 // ..and invoke the body of the __try block +LEHE0: + xor %rax, %rax // NULL, meaning "do not run handler" + jmp Lepilog + .globl _PAL_CallRunHandler +_PAL_CallRunHandler: + // Note: First two args (actions and exceptionObject) have already been + // setup by PAL_SEHPersonalityRoutine's cleanup phase handling. They are at + // RSP+32 and RSP+48 respectively. + // + // Prepare the arguments to be passed to PAL_RunHandler. + mov 32(%rsp), %rdi // actions + mov 40(%rsp), %rsi // exceptionObject + mov 8(%rsp), %rdx // filter + mov 16(%rsp), %rcx // param + mov 24(%rsp), %r8 // pfExecuteHandler + call _PAL_RunHandler +Lepilog: + leave + ret +LFE7: + +#else // BIT64 + +#define SIZEOF_ARG_REGISTERS 20 +#define FRAME_SIZE ALIGN_UP(8 + SIZEOF_ARG_REGISTERS) - 8 + + .text + .globl _PAL_TryExcept +_PAL_TryExcept: +LFB7: + pushl %ebp +LCFI0: + movl %esp, %ebp +LCFI1: + subl $FRAME_SIZE, %esp + movl 16(%ebp), %eax // param + movl %eax, (%esp) +LEHB0: + call *8(%ebp) // body +LEHE0: + xor %eax, %eax // NULL, meaning "do not run handler" + jmp Lepilog + .globl _PAL_CallRunHandler +_PAL_CallRunHandler: + // note: first two args already set when we get here + mov 12(%ebp), %eax // filter + mov %eax, 8(%esp) + mov 16(%ebp), %eax // param + mov %eax, 12(%esp) + mov 20(%ebp), %eax // pfExecuteHandler + mov %eax, 16(%esp) + call L_PAL_RunHandler$stub +Lepilog: + leave + ret +LFE7: + +#endif // BIT64 else + + .section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support +CIE_SEHPersonality: + .long LECIE1-LSCIE1 +LSCIE1: + .long 0x0 + .byte 0x1 + #ifdef BIT64 + .ascii "zPLR\0" + .byte 0x1 + .byte 0x78 // data_align: -8 + .byte 16 // return address register: rip + .byte 0x7 + .byte 0x9b + .long _PAL_SEHPersonalityRoutine+4@GOTPCREL + .byte 0x10 + .byte 0x10 + .byte 0xc // DW_CFA_def_cfa + .byte 0x7 // operand1 = rsp + .byte 0x8 // operand2 = offset 8 + .byte 0x80 | 16 // DW_CFA_offset of return address register + .byte 0x1 // operand1 = 1 word + .align 2 +#else // BIT64 + .ascii "zPLR\0" + .byte 0x1 + .byte 0x7c // data_align: -4 + .byte 0x8 // return address register: eip + .byte 0x7 + .byte 0x9b + .long L_PAL_SEHPersonalityRoutine$non_lazy_ptr-. + .byte 0x10 + .byte 0x10 + .byte 0xc // DW_CFA_def_cfa + .byte 0x5 // operand1 = esp + .byte 0x4 // operand2 = offset 4 + .byte 0x80 | 8 // DW_CFA_offset of return address register + .byte 0x1 // operand1 = 1 word + .align 2 +#endif // BIT64 else +LECIE1: + + .globl _PAL_TryExcept.eh +_PAL_TryExcept.eh: +LSFDE1: + .set LLFDE1,LEFDE1-LASFDE1 + .set LFL7,LFE7-LFB7 + .long LLFDE1 +LASFDE1: + .long LASFDE1-CIE_SEHPersonality +#ifdef BIT64 + .quad LFB7-. + .quad LFL7 + .byte 0x8 + .quad 0x0 + .byte 0x4 // DW_CFA_advance_loc4 +#else // BIT64 + .long LFB7-. + .long LFL7 + .byte 0x4 + .long 0x0 + .byte 0x4 // DW_CFA_advance_loc4 +#endif // BIT64 else + .long LCFI0-LFB7 + .byte 0xe // DW_CFA_def_cfa_offset +#ifdef BIT64 + .byte 0x10 + .byte 0x80 | 6 // DW_CFA_offset rbp +#else // BIT64 + .byte 0x8 + .byte 0x80 | 4 // DW_CFA_offset ebp +#endif // BIT64 else + .byte 0x2 + .byte 0x4 // DW_CFA_advance_loc4 + .long LCFI1-LCFI0 + .byte 0xd // DW_CFA_def_cfa_register +#ifdef BIT64 + .byte 6 // operand1 = rbp + .align 2 +#else // BIT64 + .byte 4 // operand1 = ebp + .align 2 +#endif // BIT64 + +LEFDE1: + +#ifndef BIT64 + + .section __IMPORT,__jump_table,symbol_stubs,self_modifying_code+pure_instructions,5 +L_PAL_RunHandler$stub: + .indirect_symbol _PAL_RunHandler + hlt ; hlt ; hlt ; hlt ; hlt + + .section __IMPORT,__pointers,non_lazy_symbol_pointers +L_PAL_SEHPersonalityRoutine$non_lazy_ptr: + .indirect_symbol _PAL_SEHPersonalityRoutine + .long 0 + +#endif // !BIT64 + |