From 9585fc244705b3a06c1f99dc6d60b9c9583ec3ed Mon Sep 17 00:00:00 2001 From: Aditya Mandaleeka Date: Mon, 18 Jul 2016 19:39:05 -0700 Subject: Save and restore ymm registers in signal handlers. - Modified the CONTEXT structure for storing the upper 16 bytes of ymm registers - Upon start of signal handler, ymmh data is copied from the native context to the CONTEXT structure, and a new flag is set to indicate that it has ymmh data - Upon calling RtlRestoreContext, the new flag is checked, and ymmh data is restored into registers from the CONTEXT structure - This change fixes only the Linux side for now. --- src/pal/inc/pal.h | 2 + src/pal/src/arch/i386/asmconstants.h | 4 +- src/pal/src/debug/debug.cpp | 6 +-- src/pal/src/exception/machexception.cpp | 6 +-- src/pal/src/exception/signal.cpp | 14 ++++-- src/pal/src/include/pal/context.h | 78 +++++++++++++++++++++++++++------ src/pal/src/thread/context.cpp | 23 ++++++++-- src/pal/src/thread/thread.cpp | 5 ++- 8 files changed, 109 insertions(+), 29 deletions(-) (limited to 'src') diff --git a/src/pal/inc/pal.h b/src/pal/inc/pal.h index fe29112b07..e086717510 100644 --- a/src/pal/inc/pal.h +++ b/src/pal/inc/pal.h @@ -2570,6 +2570,8 @@ typedef struct _CONTEXT { #define CONTEXT_ALL (CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_SEGMENTS | CONTEXT_FLOATING_POINT | CONTEXT_DEBUG_REGISTERS) +#define CONTEXT_XSTATE (CONTEXT_AMD64 | 0x40L) + #define CONTEXT_EXCEPTION_ACTIVE 0x8000000 #define CONTEXT_SERVICE_ACTIVE 0x10000000 #define CONTEXT_EXCEPTION_REQUEST 0x40000000 diff --git a/src/pal/src/arch/i386/asmconstants.h b/src/pal/src/arch/i386/asmconstants.h index 8ec73b4bad..460d8a6192 100644 --- a/src/pal/src/arch/i386/asmconstants.h +++ b/src/pal/src/arch/i386/asmconstants.h @@ -14,6 +14,8 @@ #define CONTEXT_FULL (CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_FLOATING_POINT) +#define CONTEXT_XSTATE 64 + #define CONTEXT_ContextFlags 6*8 #define CONTEXT_SegCs CONTEXT_ContextFlags+8 #define CONTEXT_SegDs CONTEXT_SegCs+2 @@ -47,7 +49,7 @@ #define CONTEXT_Rip CONTEXT_R15+8 #define CONTEXT_FltSave CONTEXT_Rip+8 #define FLOATING_SAVE_AREA_SIZE 4*8+24*16+96 -#define CONTEXT_Xmm0 CONTEXT_FltSave+FLOATING_SAVE_AREA_SIZE // was 10*16 +#define CONTEXT_Xmm0 CONTEXT_FltSave+10*16 #define CONTEXT_Xmm1 CONTEXT_Xmm0+16 #define CONTEXT_Xmm2 CONTEXT_Xmm1+16 #define CONTEXT_Xmm3 CONTEXT_Xmm2+16 diff --git a/src/pal/src/debug/debug.cpp b/src/pal/src/debug/debug.cpp index 86ea9f98e4..b3ce4b1ff9 100644 --- a/src/pal/src/debug/debug.cpp +++ b/src/pal/src/debug/debug.cpp @@ -25,12 +25,14 @@ Revision History: #undef _FILE_OFFSET_BITS #endif +#include "pal/dbgmsg.h" +SET_DEFAULT_DEBUG_CHANNEL(DEBUG); // some headers have code with asserts, so do this first + #include "pal/thread.hpp" #include "pal/procobj.hpp" #include "pal/file.hpp" #include "pal/palinternal.h" -#include "pal/dbgmsg.h" #include "pal/process.h" #include "pal/context.h" #include "pal/debug.h" @@ -66,8 +68,6 @@ Revision History: using namespace CorUnix; -SET_DEFAULT_DEBUG_CHANNEL(DEBUG); - extern "C" void DBG_DebugBreak_End(); #if HAVE_PROCFS_CTL diff --git a/src/pal/src/exception/machexception.cpp b/src/pal/src/exception/machexception.cpp index a483509f07..af1dc89fb5 100644 --- a/src/pal/src/exception/machexception.cpp +++ b/src/pal/src/exception/machexception.cpp @@ -14,12 +14,14 @@ Abstract: --*/ +#include "pal/dbgmsg.h" +SET_DEFAULT_DEBUG_CHANNEL(EXCEPT); // some headers have code with asserts, so do this first + #include "pal/thread.hpp" #include "pal/seh.hpp" #include "pal/palinternal.h" #if HAVE_MACH_EXCEPTIONS #include "machexception.h" -#include "pal/dbgmsg.h" #include "pal/critsect.h" #include "pal/debug.h" #include "pal/init.h" @@ -42,8 +44,6 @@ Abstract: using namespace CorUnix; -SET_DEFAULT_DEBUG_CHANNEL(EXCEPT); - // The port we use to handle exceptions and to set the thread context mach_port_t s_ExceptionPort; diff --git a/src/pal/src/exception/signal.cpp b/src/pal/src/exception/signal.cpp index 8dd75ac185..c2c217993a 100644 --- a/src/pal/src/exception/signal.cpp +++ b/src/pal/src/exception/signal.cpp @@ -18,6 +18,9 @@ Abstract: --*/ +#include "pal/dbgmsg.h" +SET_DEFAULT_DEBUG_CHANNEL(EXCEPT); // some headers have code with asserts, so do this first + #include "pal/corunix.hpp" #include "pal/handleapi.hpp" #include "pal/thread.hpp" @@ -27,7 +30,6 @@ Abstract: #include "pal/palinternal.h" #if !HAVE_MACH_EXCEPTIONS -#include "pal/dbgmsg.h" #include "pal/init.h" #include "pal/process.h" #include "pal/debug.h" @@ -43,8 +45,6 @@ Abstract: using namespace CorUnix; -SET_DEFAULT_DEBUG_CHANNEL(EXCEPT); - #ifdef SIGRTMIN #define INJECT_ACTIVATION_SIGNAL SIGRTMIN #endif @@ -611,10 +611,16 @@ static bool common_signal_handler(int code, siginfo_t *siginfo, void *sigcontext // which is required for restoring context RtlCaptureContext(contextRecord); + ULONG contextFlags = CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_FLOATING_POINT; + +#if defined(_AMD64_) + contextFlags |= CONTEXT_XSTATE; +#endif + // Fill context record with required information. from pal.h: // On non-Win32 platforms, the CONTEXT pointer in the // PEXCEPTION_POINTERS will contain at least the CONTEXT_CONTROL registers. - CONTEXTFromNativeContext(ucontext, contextRecord, CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_FLOATING_POINT); + CONTEXTFromNativeContext(ucontext, contextRecord, contextFlags); /* Unmask signal so we can receive it again */ sigemptyset(&signal_set); diff --git a/src/pal/src/include/pal/context.h b/src/pal/src/include/pal/context.h index 6f1b3fe734..5e378942fb 100644 --- a/src/pal/src/include/pal/context.h +++ b/src/pal/src/include/pal/context.h @@ -121,19 +121,71 @@ typedef ucontext_t native_context_t; #define MCREG_R14(mc) ((mc).gregs[REG_R14]) #define MCREG_R15(mc) ((mc).gregs[REG_R15]) -#define FPREG_Xmm(uc, index) *(M128A*)&((uc)->uc_mcontext.fpregs->_xmm[index]) - -#define FPREG_St(uc, index) *(M128A*)&((uc)->uc_mcontext.fpregs->_st[index]) - -#define FPREG_ControlWord(uc) ((uc)->uc_mcontext.fpregs->cwd) -#define FPREG_StatusWord(uc) ((uc)->uc_mcontext.fpregs->swd) -#define FPREG_TagWord(uc) ((uc)->uc_mcontext.fpregs->ftw) -#define FPREG_ErrorOffset(uc) *(DWORD*)&((uc)->uc_mcontext.fpregs->rip) -#define FPREG_ErrorSelector(uc) *(((WORD*)&((uc)->uc_mcontext.fpregs->rip)) + 2) -#define FPREG_DataOffset(uc) *(DWORD*)&((uc)->uc_mcontext.fpregs->rdp) -#define FPREG_DataSelector(uc) *(((WORD*)&((uc)->uc_mcontext.fpregs->rdp)) + 2) -#define FPREG_MxCsr(uc) ((uc)->uc_mcontext.fpregs->mxcsr) -#define FPREG_MxCsr_Mask(uc) ((uc)->uc_mcontext.fpregs->mxcr_mask) +#define FPREG_Fpstate(uc) ((uc)->uc_mcontext.fpregs) +#define FPREG_Xmm(uc, index) *(M128A*)&(FPREG_Fpstate(uc)->_xmm[index]) + +#define FPREG_St(uc, index) *(M128A*)&(FPREG_Fpstate(uc)->_st[index]) + +#define FPREG_ControlWord(uc) (FPREG_Fpstate(uc)->cwd) +#define FPREG_StatusWord(uc) (FPREG_Fpstate(uc)->swd) +#define FPREG_TagWord(uc) (FPREG_Fpstate(uc)->ftw) +#define FPREG_ErrorOffset(uc) *(DWORD*)&(FPREG_Fpstate(uc)->rip) +#define FPREG_ErrorSelector(uc) *(((WORD*)&(FPREG_Fpstate(uc)->rip)) + 2) +#define FPREG_DataOffset(uc) *(DWORD*)&(FPREG_Fpstate(uc)->rdp) +#define FPREG_DataSelector(uc) *(((WORD*)&(FPREG_Fpstate(uc)->rdp)) + 2) +#define FPREG_MxCsr(uc) (FPREG_Fpstate(uc)->mxcsr) +#define FPREG_MxCsr_Mask(uc) (FPREG_Fpstate(uc)->mxcr_mask) + +///////////////////// +// Extended state + +inline _fpx_sw_bytes *FPREG_FpxSwBytes(const ucontext_t *uc) +{ + // Bytes 464..511 in the FXSAVE format are available for software to use for any purpose. In this case, they are used to + // indicate information about extended state. + _ASSERTE(reinterpret_cast(&FPREG_Fpstate(uc)->padding[12]) - reinterpret_cast(FPREG_Fpstate(uc)) == 464); + + _ASSERTE(FPREG_Fpstate(uc) != nullptr); + + return reinterpret_cast<_fpx_sw_bytes *>(&FPREG_Fpstate(uc)->padding[12]); +} + +inline UINT32 FPREG_ExtendedSize(const ucontext_t *uc) +{ + _ASSERTE(FPREG_FpxSwBytes(uc)->magic1 == FP_XSTATE_MAGIC1); + return FPREG_FpxSwBytes(uc)->extended_size; +} + +inline bool FPREG_HasExtendedState(const ucontext_t *uc) +{ + // See comments in /usr/include/x86_64-linux-gnu/asm/sigcontext.h for info on how to detect if extended state is present + static_assert_no_msg(FP_XSTATE_MAGIC2_SIZE == sizeof(UINT32)); + + if (FPREG_FpxSwBytes(uc)->magic1 != FP_XSTATE_MAGIC1) + { + return false; + } + + UINT32 extendedSize = FPREG_ExtendedSize(uc); + if (extendedSize < sizeof(_xstate)) + { + return false; + } + + _ASSERTE(extendedSize >= FP_XSTATE_MAGIC2_SIZE); + return *reinterpret_cast(reinterpret_cast(FPREG_Fpstate(uc)) + (extendedSize - FP_XSTATE_MAGIC2_SIZE)) + == FP_XSTATE_MAGIC2; +} + +inline void *FPREG_Xstate_Ymmh(const ucontext_t *uc) +{ + static_assert_no_msg(sizeof(reinterpret_cast<_xstate *>(FPREG_Fpstate(uc))->ymmh.ymmh_space) == 16 * 16); + _ASSERTE(FPREG_HasExtendedState(uc)); + + return reinterpret_cast<_xstate *>(FPREG_Fpstate(uc))->ymmh.ymmh_space; +} + +///////////////////// #else // BIT64 diff --git a/src/pal/src/thread/context.cpp b/src/pal/src/thread/context.cpp index 025bb978c6..c3412bad7c 100644 --- a/src/pal/src/thread/context.cpp +++ b/src/pal/src/thread/context.cpp @@ -19,8 +19,10 @@ Abstract: --*/ -#include "pal/palinternal.h" #include "pal/dbgmsg.h" +SET_DEFAULT_DEBUG_CHANNEL(THREAD); // some headers have code with asserts, so do this first + +#include "pal/palinternal.h" #include "pal/context.h" #include "pal/debug.h" #include "pal/thread.hpp" @@ -29,8 +31,6 @@ Abstract: #include #include -SET_DEFAULT_DEBUG_CHANNEL(THREAD); - extern PGET_GCMARKER_EXCEPTION_CODE g_getGcMarkerExceptionCode; // in context2.S @@ -465,6 +465,15 @@ void CONTEXTToNativeContext(CONST CONTEXT *lpContext, native_context_t *native) } #endif } + + // TODO: Enable for all Unix systems +#if defined(_AMD64_) && defined(__linux__) + if ((lpContext->ContextFlags & CONTEXT_XSTATE) != 0) + { + _ASSERTE(FPREG_HasExtendedState(native)); + memcpy_s(FPREG_Xstate_Ymmh(native), sizeof(M128A) * 16, lpContext->VectorRegister, sizeof(M128A) * 16); + } +#endif // _AMD64_ } /*++ @@ -551,6 +560,14 @@ void CONTEXTFromNativeContext(const native_context_t *native, LPCONTEXT lpContex } #endif } + + // TODO: Enable for all Unix systems +#if defined(_AMD64_) && defined(__linux__) + if ((contextFlags & CONTEXT_XSTATE) != 0 && FPREG_HasExtendedState(native)) + { + memcpy_s(lpContext->VectorRegister, sizeof(M128A) * 16, FPREG_Xstate_Ymmh(native), sizeof(M128A) * 16); + } +#endif // _AMD64_ } /*++ diff --git a/src/pal/src/thread/thread.cpp b/src/pal/src/thread/thread.cpp index 159c451224..d6f6f9c47a 100644 --- a/src/pal/src/thread/thread.cpp +++ b/src/pal/src/thread/thread.cpp @@ -18,6 +18,9 @@ Abstract: --*/ +#include "pal/dbgmsg.h" +SET_DEFAULT_DEBUG_CHANNEL(THREAD); // some headers have code with asserts, so do this first + #include "pal/corunix.hpp" #include "pal/context.h" #include "pal/thread.hpp" @@ -29,7 +32,6 @@ Abstract: #include "procprivate.hpp" #include "pal/process.h" #include "pal/module.h" -#include "pal/dbgmsg.h" #include "pal/environ.h" #include "pal/init.h" @@ -74,7 +76,6 @@ using namespace CorUnix; /* ------------------- Definitions ------------------------------*/ -SET_DEFAULT_DEBUG_CHANNEL(THREAD); // The default stack size of a newly created thread (currently 256KB) // when the dwStackSize parameter of PAL_CreateThread() -- cgit v1.2.3 From dae88fd583d964913a052f3005f8a4ee486fc07f Mon Sep 17 00:00:00 2001 From: Aditya Mandaleeka Date: Mon, 25 Jul 2016 18:10:35 -0700 Subject: Fix offset to VectorRegister in i386/asmconstants.h. --- src/pal/src/arch/i386/asmconstants.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/pal/src/arch/i386/asmconstants.h b/src/pal/src/arch/i386/asmconstants.h index 460d8a6192..182c1191e4 100644 --- a/src/pal/src/arch/i386/asmconstants.h +++ b/src/pal/src/arch/i386/asmconstants.h @@ -65,7 +65,7 @@ #define CONTEXT_Xmm13 CONTEXT_Xmm12+16 #define CONTEXT_Xmm14 CONTEXT_Xmm13+16 #define CONTEXT_Xmm15 CONTEXT_Xmm14+16 -#define CONTEXT_VectorRegister CONTEXT_Xmm15+16 +#define CONTEXT_VectorRegister CONTEXT_FltSave+FLOATING_SAVE_AREA_SIZE #define CONTEXT_VectorControl CONTEXT_VectorRegister+16*26 #define CONTEXT_DebugControl CONTEXT_VectorControl+8 #define CONTEXT_LastBranchToRip CONTEXT_DebugControl+8 -- cgit v1.2.3 From 1c7531b638a6b35befc5fe66e33e2de83ecc6fe1 Mon Sep 17 00:00:00 2001 From: Aditya Mandaleeka Date: Thu, 28 Jul 2016 15:48:43 -0700 Subject: Add extended state to RtlRestoreContext --- src/pal/src/arch/i386/context2.S | 22 ++++++++++++++++++++++ src/pal/src/thread/context.cpp | 16 +++++++++++++--- 2 files changed, 35 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/pal/src/arch/i386/context2.S b/src/pal/src/arch/i386/context2.S index 6320446a51..0e93e81a55 100644 --- a/src/pal/src/arch/i386/context2.S +++ b/src/pal/src/arch/i386/context2.S @@ -126,6 +126,28 @@ LOCAL_LABEL(Done_Restore_CONTEXT_DEBUG_REGISTERS): fxrstor [rdi + CONTEXT_FltSave] LOCAL_LABEL(Done_Restore_CONTEXT_FLOATING_POINT): + test BYTE PTR [rdi + CONTEXT_ContextFlags], CONTEXT_XSTATE + je LOCAL_LABEL(Done_Restore_CONTEXT_XSTATE) + + // Restore the extended state (for now, this is just the upper halves of YMM registers) + vinsertf128 ymm0, ymm0, xmmword ptr [rdi + (CONTEXT_VectorRegister + 0 * 16)], 1 + vinsertf128 ymm1, ymm1, xmmword ptr [rdi + (CONTEXT_VectorRegister + 1 * 16)], 1 + vinsertf128 ymm2, ymm2, xmmword ptr [rdi + (CONTEXT_VectorRegister + 2 * 16)], 1 + vinsertf128 ymm3, ymm3, xmmword ptr [rdi + (CONTEXT_VectorRegister + 3 * 16)], 1 + vinsertf128 ymm4, ymm4, xmmword ptr [rdi + (CONTEXT_VectorRegister + 4 * 16)], 1 + vinsertf128 ymm5, ymm5, xmmword ptr [rdi + (CONTEXT_VectorRegister + 5 * 16)], 1 + vinsertf128 ymm6, ymm6, xmmword ptr [rdi + (CONTEXT_VectorRegister + 6 * 16)], 1 + vinsertf128 ymm7, ymm7, xmmword ptr [rdi + (CONTEXT_VectorRegister + 7 * 16)], 1 + vinsertf128 ymm8, ymm8, xmmword ptr [rdi + (CONTEXT_VectorRegister + 8 * 16)], 1 + vinsertf128 ymm9, ymm9, xmmword ptr [rdi + (CONTEXT_VectorRegister + 9 * 16)], 1 + vinsertf128 ymm10, ymm10, xmmword ptr [rdi + (CONTEXT_VectorRegister + 10 * 16)], 1 + vinsertf128 ymm11, ymm11, xmmword ptr [rdi + (CONTEXT_VectorRegister + 11 * 16)], 1 + vinsertf128 ymm12, ymm12, xmmword ptr [rdi + (CONTEXT_VectorRegister + 12 * 16)], 1 + vinsertf128 ymm13, ymm13, xmmword ptr [rdi + (CONTEXT_VectorRegister + 13 * 16)], 1 + vinsertf128 ymm14, ymm14, xmmword ptr [rdi + (CONTEXT_VectorRegister + 14 * 16)], 1 + vinsertf128 ymm15, ymm15, xmmword ptr [rdi + (CONTEXT_VectorRegister + 15 * 16)], 1 +LOCAL_LABEL(Done_Restore_CONTEXT_XSTATE): + test BYTE PTR [rdi + CONTEXT_ContextFlags], CONTEXT_CONTROL je LOCAL_LABEL(Done_Restore_CONTEXT_CONTROL) diff --git a/src/pal/src/thread/context.cpp b/src/pal/src/thread/context.cpp index c3412bad7c..49d39d2a3d 100644 --- a/src/pal/src/thread/context.cpp +++ b/src/pal/src/thread/context.cpp @@ -563,10 +563,20 @@ void CONTEXTFromNativeContext(const native_context_t *native, LPCONTEXT lpContex // TODO: Enable for all Unix systems #if defined(_AMD64_) && defined(__linux__) - if ((contextFlags & CONTEXT_XSTATE) != 0 && FPREG_HasExtendedState(native)) + if ((contextFlags & CONTEXT_XSTATE) != 0) { - memcpy_s(lpContext->VectorRegister, sizeof(M128A) * 16, FPREG_Xstate_Ymmh(native), sizeof(M128A) * 16); - } + if (FPREG_HasExtendedState(native)) + { + memcpy_s(lpContext->VectorRegister, sizeof(M128A) * 16, FPREG_Xstate_Ymmh(native), sizeof(M128A) * 16); + } + else + { + // Reset the CONTEXT_XSTATE bit(s) so it's clear that the extended state data in + // the CONTEXT is not valid. + const ULONG xstateFlags = CONTEXT_XSTATE & ~(CONTEXT_CONTROL & CONTEXT_INTEGER); + lpContext->ContextFlags &= ~xstateFlags; + } + } #endif // _AMD64_ } -- cgit v1.2.3 From 002149cadd71aee97a6d0e25c1d9e6edfe21e087 Mon Sep 17 00:00:00 2001 From: Aditya Mandaleeka Date: Thu, 28 Jul 2016 16:19:06 -0700 Subject: Also clear CONTEXT_XSTATE if fpregs aren't available. --- src/pal/src/thread/context.cpp | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/pal/src/thread/context.cpp b/src/pal/src/thread/context.cpp index 49d39d2a3d..9aaf105d74 100644 --- a/src/pal/src/thread/context.cpp +++ b/src/pal/src/thread/context.cpp @@ -522,16 +522,19 @@ void CONTEXTFromNativeContext(const native_context_t *native, LPCONTEXT lpContex if (native->uc_mcontext.__fpregs == nullptr) #endif { - // Reset the CONTEXT_FLOATING_POINT bit(s) so it's clear that the floating point - // data in the CONTEXT is not valid. Since CONTEXT_FLOATING_POINT is defined as - // the architecture bit(s) OR'd with one or more other bits, we first get the bits - // that are unique to CONTEXT_FLOATING_POINT by resetting the architecture bits. - // We determine what those are by inverting the union of CONTEXT_CONTROL and - // CONTEXT_INTEGER, both of which should also have the architecture bit(s) set. + // Reset the CONTEXT_FLOATING_POINT bit(s) and the CONTEXT_XSTATE bit(s) so it's + // clear that the floating point and extended state data in the CONTEXT is not + // valid. Since these flags are defined as the architecture bit(s) OR'd with one + // or more other bits, we first get the bits that are unique to each by resetting + // the architecture bits. We determine what those are by inverting the union of + // CONTEXT_CONTROL and CONTEXT_INTEGER, both of which should also have the + // architecture bit(s) set. const ULONG floatingPointFlags = CONTEXT_FLOATING_POINT & ~(CONTEXT_CONTROL & CONTEXT_INTEGER); - lpContext->ContextFlags &= ~floatingPointFlags; + const ULONG xstateFlags = CONTEXT_XSTATE & ~(CONTEXT_CONTROL & CONTEXT_INTEGER); - // Bail out regardless of whether the caller wanted CONTEXT_FLOATING_POINT + lpContext->ContextFlags &= ~(floatingPointFlags | xstateFlags); + + // Bail out regardless of whether the caller wanted CONTEXT_FLOATING_POINT or CONTEXT_XSTATE return; } #endif -- cgit v1.2.3