summaryrefslogtreecommitdiff
path: root/src/pal
diff options
context:
space:
mode:
authorJan Vorlicek <janvorli@microsoft.com>2019-06-21 10:48:08 +0200
committerGitHub <noreply@github.com>2019-06-21 10:48:08 +0200
commit3954d187d1563a886f5ae2ed72442a813bbf2573 (patch)
treec59ad30c93b80dfb31ad10f724da6fac17bb99e7 /src/pal
parent99c30f13c1f64180a2524e6160d374f366c01719 (diff)
downloadcoreclr-3954d187d1563a886f5ae2ed72442a813bbf2573.tar.gz
coreclr-3954d187d1563a886f5ae2ed72442a813bbf2573.tar.bz2
coreclr-3954d187d1563a886f5ae2ed72442a813bbf2573.zip
Fix OSX floating point state extraction (#25295)
There was a bug reported on a very recent Mac with Intel i9 processor. A crash in the RtlRestoreContext was happening at the fxrstor instruction due to the fact that the floating point state data were garbage. The investigation has shown that sometimes, the x86_FLOAT_STATE64 cannot be obtained using the thread_get_state API. And it was also found that at the same time, the x86_AVX_STATE64 can be obtained. The state extracted by the AVX variant contains all the registers that the FLOAT variant would extract. However, in some cases, even the x86_AVX_STATE64 cannot be obtained and there is a third flavor that we can get - x86_AVX512_STATE64. Unfortunately, there are cases where none of those can be obtained. It is not clear what causes these cases, it seems only kernel debugging can give us an answer to that. This change modifies the way we extract the floating point state. We first try to get the AVX state, if we fail, we try the AVX512 and finally we fall back to the FLOAT state. If we fail to get the floating point state with any of these, we return context without the floating point state flag set. Also, if only getting the FLOAT state succeeds, we return context without the XSTATE flag set.
Diffstat (limited to 'src/pal')
-rw-r--r--src/pal/src/exception/machexception.cpp25
-rw-r--r--src/pal/src/thread/context.cpp200
2 files changed, 66 insertions, 159 deletions
diff --git a/src/pal/src/exception/machexception.cpp b/src/pal/src/exception/machexception.cpp
index 67d28d076b..336e951f99 100644
--- a/src/pal/src/exception/machexception.cpp
+++ b/src/pal/src/exception/machexception.cpp
@@ -1017,10 +1017,10 @@ Parameters:
thread - mach thread port
Return value :
- None
+ KERN_SUCCESS if the suspend succeeded, other code in case of failure
--*/
static
-void
+kern_return_t
SuspendMachThread(thread_act_t thread)
{
kern_return_t machret;
@@ -1028,7 +1028,10 @@ SuspendMachThread(thread_act_t thread)
while (true)
{
machret = thread_suspend(thread);
- CHECK_MACH("thread_suspend", machret);
+ if (machret != KERN_SUCCESS)
+ {
+ break;
+ }
// Ensure that if the thread was running in the kernel, the kernel operation
// is safely aborted so that it can be restarted later.
@@ -1041,8 +1044,13 @@ SuspendMachThread(thread_act_t thread)
// The thread was running in the kernel executing a non-atomic operation
// that cannot be restarted, so we need to resume the thread and retry
machret = thread_resume(thread);
- CHECK_MACH("thread_resume", machret);
+ if (machret != KERN_SUCCESS)
+ {
+ break;
+ }
}
+
+ return machret;
}
/*++
@@ -1103,7 +1111,8 @@ SEHExceptionThread(void *args)
thread = sMessage.GetThreadContext(&sContext);
// Suspend the target thread
- SuspendMachThread(thread);
+ machret = SuspendMachThread(thread);
+ CHECK_MACH("SuspendMachThread", machret);
machret = CONTEXT_SetThreadContextOnPort(thread, &sContext);
CHECK_MACH("CONTEXT_SetThreadContextOnPort", machret);
@@ -1220,7 +1229,8 @@ SEHExceptionThread(void *args)
NONPAL_TRACE("ForwardExceptionRequest for thread %08x\n", thread);
// Suspend the faulting thread.
- SuspendMachThread(thread);
+ machret = SuspendMachThread(thread);
+ CHECK_MACH("SuspendMachThread", machret);
// Set the context back to the original faulting state.
MachExceptionInfo *pExceptionInfo = sMessage.GetExceptionInfo();
@@ -1545,7 +1555,8 @@ InjectActivationInternal(CPalThread* pThread)
PAL_ERROR palError;
mach_port_t threadPort = pThread->GetMachPortSelf();
- kern_return_t MachRet = thread_suspend(threadPort);
+
+ kern_return_t MachRet = SuspendMachThread(threadPort);
palError = (MachRet == KERN_SUCCESS) ? NO_ERROR : ERROR_GEN_FAILURE;
if (palError == NO_ERROR)
diff --git a/src/pal/src/thread/context.cpp b/src/pal/src/thread/context.cpp
index b98b99dab4..10b2db4ed0 100644
--- a/src/pal/src/thread/context.cpp
+++ b/src/pal/src/thread/context.cpp
@@ -890,10 +890,8 @@ CONTEXT_GetThreadContextFromPort(
if (lpContext->ContextFlags & (CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_SEGMENTS) & CONTEXT_AREA_MASK)
{
-#ifdef _X86_
- x86_thread_state32_t State;
- StateFlavor = x86_THREAD_STATE32;
-#elif defined(_AMD64_)
+
+#ifdef _AMD64_
x86_thread_state64_t State;
StateFlavor = x86_THREAD_STATE64;
#else
@@ -911,41 +909,47 @@ CONTEXT_GetThreadContextFromPort(
}
if (lpContext->ContextFlags & CONTEXT_ALL_FLOATING & CONTEXT_AREA_MASK) {
-#ifdef _X86_
- x86_float_state32_t State;
- StateFlavor = x86_FLOAT_STATE32;
-#elif defined(_AMD64_)
- x86_float_state64_t State;
- StateFlavor = x86_FLOAT_STATE64;
-#else
-#error Unexpected architecture.
-#endif
- StateCount = sizeof(State) / sizeof(natural_t);
- MachRet = thread_get_state(Port, StateFlavor, (thread_state_t)&State, &StateCount);
- if (MachRet != KERN_SUCCESS)
- {
- ASSERT("thread_get_state(FLOAT_STATE) failed: %d\n", MachRet);
- goto exit;
- }
-
- CONTEXT_GetThreadContextFromThreadState(StateFlavor, (thread_state_t)&State, lpContext);
- }
+ // The thread_get_state for floating point state can fail for some flavors when the processor is not
+ // in the right mode at the time we are taking the state. So we will try to get the AVX state first and
+ // if it fails, get the FLOAT state and if that fails, take AVX512 state. Both AVX and AVX512 states
+ // are supersets of the FLOAT state.
+ // Check a few fields to make sure the assumption is correct.
+ static_assert_no_msg(sizeof(x86_avx_state64_t) > sizeof(x86_float_state64_t));
+ static_assert_no_msg(sizeof(x86_avx512_state64_t) > sizeof(x86_avx_state64_t));
+ static_assert_no_msg(offsetof(x86_avx_state64_t, __fpu_fcw) == offsetof(x86_float_state64_t, __fpu_fcw));
+ static_assert_no_msg(offsetof(x86_avx_state64_t, __fpu_xmm0) == offsetof(x86_float_state64_t, __fpu_xmm0));
+ static_assert_no_msg(offsetof(x86_avx512_state64_t, __fpu_fcw) == offsetof(x86_float_state64_t, __fpu_fcw));
+ static_assert_no_msg(offsetof(x86_avx512_state64_t, __fpu_xmm0) == offsetof(x86_float_state64_t, __fpu_xmm0));
+
+ x86_avx512_state64_t State;
-#if defined(_AMD64_) && defined(XSTATE_SUPPORTED)
- if (lpContext->ContextFlags & CONTEXT_XSTATE & CONTEXT_AREA_MASK) {
- x86_avx_state64_t State;
StateFlavor = x86_AVX_STATE64;
- StateCount = sizeof(State) / sizeof(natural_t);
+ StateCount = sizeof(x86_avx_state64_t) / sizeof(natural_t);
MachRet = thread_get_state(Port, StateFlavor, (thread_state_t)&State, &StateCount);
if (MachRet != KERN_SUCCESS)
{
- ASSERT("thread_get_state(XSTATE) failed: %d\n", MachRet);
- goto exit;
+ // The AVX state is not available, try to get the AVX512 state.
+ StateFlavor = x86_AVX512_STATE64;
+ StateCount = sizeof(x86_avx512_state64_t) / sizeof(natural_t);
+ MachRet = thread_get_state(Port, StateFlavor, (thread_state_t)&State, &StateCount);
+ if (MachRet != KERN_SUCCESS)
+ {
+ // Neither the AVX nor the AVX512 state is not available, try to get at least the FLOAT state.
+ lpContext->ContextFlags &= ~(CONTEXT_XSTATE & CONTEXT_AREA_MASK);
+
+ StateFlavor = x86_FLOAT_STATE64;
+ StateCount = sizeof(x86_float_state64_t) / sizeof(natural_t);
+ MachRet = thread_get_state(Port, StateFlavor, (thread_state_t)&State, &StateCount);
+ if (MachRet != KERN_SUCCESS)
+ {
+ // We were unable to get any floating point state. This case was observed on OSX with AVX512 capable processors.
+ lpContext->ContextFlags &= ~((CONTEXT_XSTATE | CONTEXT_ALL_FLOATING) & CONTEXT_AREA_MASK);
+ }
+ }
}
CONTEXT_GetThreadContextFromThreadState(StateFlavor, (thread_state_t)&State, lpContext);
}
-#endif
exit:
return MachRet;
@@ -964,65 +968,7 @@ CONTEXT_GetThreadContextFromThreadState(
{
switch (threadStateFlavor)
{
-#ifdef _X86_
- case x86_THREAD_STATE32:
- if (lpContext->ContextFlags & (CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_SEGMENTS) & CONTEXT_AREA_MASK)
- {
- x86_thread_state32_t *pState = (x86_thread_state32_t *)threadState;
-
- lpContext->Eax = pState->eax;
- lpContext->Ebx = pState->ebx;
- lpContext->Ecx = pState->ecx;
- lpContext->Edx = pState->edx;
- lpContext->Edi = pState->edi;
- lpContext->Esi = pState->esi;
- lpContext->Ebp = pState->ebp;
- lpContext->Esp = pState->esp;
- lpContext->SegSs = pState->ss;
- lpContext->EFlags = pState->eflags;
- lpContext->Eip = pState->eip;
- lpContext->SegCs = pState->cs;
- lpContext->SegDs_PAL_Undefined = pState->ds;
- lpContext->SegEs_PAL_Undefined = pState->es;
- lpContext->SegFs_PAL_Undefined = pState->fs;
- lpContext->SegGs_PAL_Undefined = pState->gs;
- }
- break;
-
- case x86_FLOAT_STATE32:
- {
- x86_float_state32_t *pState = (x86_float_state32_t *)threadState;
-
- if (lpContext->ContextFlags & CONTEXT_FLOATING_POINT & CONTEXT_AREA_MASK)
- {
- lpContext->FloatSave.ControlWord = *(DWORD*)&pState->fpu_fcw;
- lpContext->FloatSave.StatusWord = *(DWORD*)&pState->fpu_fsw;
- lpContext->FloatSave.TagWord = pState->fpu_ftw;
- lpContext->FloatSave.ErrorOffset = pState->fpu_ip;
- lpContext->FloatSave.ErrorSelector = pState->fpu_cs;
- lpContext->FloatSave.DataOffset = pState->fpu_dp;
- lpContext->FloatSave.DataSelector = pState->fpu_ds;
- lpContext->FloatSave.Cr0NpxState = pState->fpu_mxcsr;
-
- // Windows stores the floating point registers in a packed layout (each 10-byte register end to end
- // for a total of 80 bytes). But Mach returns each register in an 16-bit structure (presumably for
- // alignment purposes). So we can't just memcpy the registers over in a single block, we need to copy
- // them individually.
- for (int i = 0; i < 8; i++)
- memcpy(&lpContext->FloatSave.RegisterArea[i * 10], (&pState->fpu_stmm0)[i].mmst_reg, 10);
- }
-
- if (lpContext->ContextFlags & CONTEXT_EXTENDED_REGISTERS & CONTEXT_AREA_MASK)
- {
- // The only extended register information that Mach will tell us about are the xmm register values.
- // Both Windows and Mach store the registers in a packed layout (each of the 8 registers is 16 bytes)
- // so we can simply memcpy them across.
- memcpy(lpContext->ExtendedRegisters + CONTEXT_EXREG_XMM_OFFSET, &pState->fpu_xmm0, 8 * 16);
- }
- }
- break;
-
-#elif defined(_AMD64_)
+#ifdef _AMD64_
case x86_THREAD_STATE64:
if (lpContext->ContextFlags & (CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_SEGMENTS) & CONTEXT_AREA_MASK)
{
@@ -1057,6 +1003,16 @@ CONTEXT_GetThreadContextFromThreadState(
}
break;
+ case x86_AVX_STATE64:
+ case x86_AVX512_STATE64:
+ if (lpContext->ContextFlags & CONTEXT_XSTATE & CONTEXT_AREA_MASK)
+ {
+ x86_avx_state64_t *pState = (x86_avx_state64_t *)threadState;
+ memcpy(&lpContext->VectorRegister, &pState->__fpu_ymmh0, 16 * 16);
+ }
+
+ // Intentional fall-through, the AVX states are supersets of the FLOAT state
+
case x86_FLOAT_STATE64:
if (lpContext->ContextFlags & CONTEXT_FLOATING_POINT & CONTEXT_AREA_MASK)
{
@@ -1083,16 +1039,6 @@ CONTEXT_GetThreadContextFromThreadState(
memcpy(&lpContext->Xmm0, &pState->__fpu_xmm0, 16 * 16);
}
break;
-
-#ifdef XSTATE_SUPPORTED
- case x86_AVX_STATE64:
- if (lpContext->ContextFlags & CONTEXT_XSTATE & CONTEXT_AREA_MASK)
- {
- x86_avx_state64_t *pState = (x86_avx_state64_t *)threadState;
- memcpy(&lpContext->VectorRegister, &pState->__fpu_ymmh0, 16 * 16);
- }
- break;
-#endif
#else
#error Unexpected architecture.
#endif
@@ -1181,27 +1127,7 @@ CONTEXT_SetThreadContextOnPort(
if (lpContext->ContextFlags & (CONTEXT_CONTROL|CONTEXT_INTEGER) & CONTEXT_AREA_MASK)
{
-#ifdef _X86_
- x86_thread_state32_t State;
- StateFlavor = x86_THREAD_STATE32;
-
- State.eax = lpContext->Eax;
- State.ebx = lpContext->Ebx;
- State.ecx = lpContext->Ecx;
- State.edx = lpContext->Edx;
- State.edi = lpContext->Edi;
- State.esi = lpContext->Esi;
- State.ebp = lpContext->Ebp;
- State.esp = lpContext->Esp;
- State.ss = lpContext->SegSs;
- State.eflags = lpContext->EFlags;
- State.eip = lpContext->Eip;
- State.cs = lpContext->SegCs;
- State.ds = lpContext->SegDs_PAL_Undefined;
- State.es = lpContext->SegEs_PAL_Undefined;
- State.fs = lpContext->SegFs_PAL_Undefined;
- State.gs = lpContext->SegGs_PAL_Undefined;
-#elif defined(_AMD64_)
+#ifdef _AMD64_
x86_thread_state64_t State;
StateFlavor = x86_THREAD_STATE64;
@@ -1249,11 +1175,7 @@ CONTEXT_SetThreadContextOnPort(
if (lpContext->ContextFlags & CONTEXT_ALL_FLOATING & CONTEXT_AREA_MASK)
{
-#ifdef _X86_
- x86_float_state32_t State;
- StateFlavor = x86_FLOAT_STATE32;
- StateCount = sizeof(State) / sizeof(natural_t);
-#elif defined(_AMD64_)
+#ifdef _AMD64_
#ifdef XSTATE_SUPPORTED
// We're relying on the fact that the initial portion of
// x86_avx_state64_t is identical to x86_float_state64_t.
@@ -1304,23 +1226,7 @@ CONTEXT_SetThreadContextOnPort(
if (lpContext->ContextFlags & CONTEXT_FLOATING_POINT & CONTEXT_AREA_MASK)
{
-#ifdef _X86_
- *(DWORD*)&State.fpu_fcw = lpContext->FloatSave.ControlWord;
- *(DWORD*)&State.fpu_fsw = lpContext->FloatSave.StatusWord;
- State.fpu_ftw = lpContext->FloatSave.TagWord;
- State.fpu_ip = lpContext->FloatSave.ErrorOffset;
- State.fpu_cs = lpContext->FloatSave.ErrorSelector;
- State.fpu_dp = lpContext->FloatSave.DataOffset;
- State.fpu_ds = lpContext->FloatSave.DataSelector;
- State.fpu_mxcsr = lpContext->FloatSave.Cr0NpxState;
-
- // Windows stores the floating point registers in a packed layout (each 10-byte register end to
- // end for a total of 80 bytes). But Mach returns each register in an 16-bit structure (presumably
- // for alignment purposes). So we can't just memcpy the registers over in a single block, we need
- // to copy them individually.
- for (int i = 0; i < 8; i++)
- memcpy((&State.fpu_stmm0)[i].mmst_reg, &lpContext->FloatSave.RegisterArea[i * 10], 10);
-#elif defined(_AMD64_)
+#ifdef _AMD64_
*(DWORD*)&State.__fpu_fcw = lpContext->FltSave.ControlWord;
*(DWORD*)&State.__fpu_fsw = lpContext->FltSave.StatusWord;
State.__fpu_ftw = lpContext->FltSave.TagWord;
@@ -1344,16 +1250,6 @@ CONTEXT_SetThreadContextOnPort(
#endif
}
-#ifdef _X86_
- if (lpContext->ContextFlags & CONTEXT_EXTENDED_REGISTERS & CONTEXT_AREA_MASK)
- {
- // The only extended register information that Mach will tell us about are the xmm register
- // values. Both Windows and Mach store the registers in a packed layout (each of the 8 registers
- // is 16 bytes) so we can simply memcpy them across.
- memcpy(&State.fpu_xmm0, lpContext->ExtendedRegisters + CONTEXT_EXREG_XMM_OFFSET, 8 * 16);
- }
-#endif // _X86_
-
#if defined(_AMD64_) && defined(XSTATE_SUPPORTED)
if (lpContext->ContextFlags & CONTEXT_XSTATE & CONTEXT_AREA_MASK)
{