diff options
-rw-r--r-- | src/pal/src/exception/machexception.cpp | 25 | ||||
-rw-r--r-- | src/pal/src/thread/context.cpp | 200 |
2 files changed, 66 insertions, 159 deletions
diff --git a/src/pal/src/exception/machexception.cpp b/src/pal/src/exception/machexception.cpp index 67d28d076b..336e951f99 100644 --- a/src/pal/src/exception/machexception.cpp +++ b/src/pal/src/exception/machexception.cpp @@ -1017,10 +1017,10 @@ Parameters: thread - mach thread port Return value : - None + KERN_SUCCESS if the suspend succeeded, other code in case of failure --*/ static -void +kern_return_t SuspendMachThread(thread_act_t thread) { kern_return_t machret; @@ -1028,7 +1028,10 @@ SuspendMachThread(thread_act_t thread) while (true) { machret = thread_suspend(thread); - CHECK_MACH("thread_suspend", machret); + if (machret != KERN_SUCCESS) + { + break; + } // Ensure that if the thread was running in the kernel, the kernel operation // is safely aborted so that it can be restarted later. @@ -1041,8 +1044,13 @@ SuspendMachThread(thread_act_t thread) // The thread was running in the kernel executing a non-atomic operation // that cannot be restarted, so we need to resume the thread and retry machret = thread_resume(thread); - CHECK_MACH("thread_resume", machret); + if (machret != KERN_SUCCESS) + { + break; + } } + + return machret; } /*++ @@ -1103,7 +1111,8 @@ SEHExceptionThread(void *args) thread = sMessage.GetThreadContext(&sContext); // Suspend the target thread - SuspendMachThread(thread); + machret = SuspendMachThread(thread); + CHECK_MACH("SuspendMachThread", machret); machret = CONTEXT_SetThreadContextOnPort(thread, &sContext); CHECK_MACH("CONTEXT_SetThreadContextOnPort", machret); @@ -1220,7 +1229,8 @@ SEHExceptionThread(void *args) NONPAL_TRACE("ForwardExceptionRequest for thread %08x\n", thread); // Suspend the faulting thread. - SuspendMachThread(thread); + machret = SuspendMachThread(thread); + CHECK_MACH("SuspendMachThread", machret); // Set the context back to the original faulting state. MachExceptionInfo *pExceptionInfo = sMessage.GetExceptionInfo(); @@ -1545,7 +1555,8 @@ InjectActivationInternal(CPalThread* pThread) PAL_ERROR palError; mach_port_t threadPort = pThread->GetMachPortSelf(); - kern_return_t MachRet = thread_suspend(threadPort); + + kern_return_t MachRet = SuspendMachThread(threadPort); palError = (MachRet == KERN_SUCCESS) ? NO_ERROR : ERROR_GEN_FAILURE; if (palError == NO_ERROR) diff --git a/src/pal/src/thread/context.cpp b/src/pal/src/thread/context.cpp index b98b99dab4..10b2db4ed0 100644 --- a/src/pal/src/thread/context.cpp +++ b/src/pal/src/thread/context.cpp @@ -890,10 +890,8 @@ CONTEXT_GetThreadContextFromPort( if (lpContext->ContextFlags & (CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_SEGMENTS) & CONTEXT_AREA_MASK) { -#ifdef _X86_ - x86_thread_state32_t State; - StateFlavor = x86_THREAD_STATE32; -#elif defined(_AMD64_) + +#ifdef _AMD64_ x86_thread_state64_t State; StateFlavor = x86_THREAD_STATE64; #else @@ -911,41 +909,47 @@ CONTEXT_GetThreadContextFromPort( } if (lpContext->ContextFlags & CONTEXT_ALL_FLOATING & CONTEXT_AREA_MASK) { -#ifdef _X86_ - x86_float_state32_t State; - StateFlavor = x86_FLOAT_STATE32; -#elif defined(_AMD64_) - x86_float_state64_t State; - StateFlavor = x86_FLOAT_STATE64; -#else -#error Unexpected architecture. -#endif - StateCount = sizeof(State) / sizeof(natural_t); - MachRet = thread_get_state(Port, StateFlavor, (thread_state_t)&State, &StateCount); - if (MachRet != KERN_SUCCESS) - { - ASSERT("thread_get_state(FLOAT_STATE) failed: %d\n", MachRet); - goto exit; - } - - CONTEXT_GetThreadContextFromThreadState(StateFlavor, (thread_state_t)&State, lpContext); - } + // The thread_get_state for floating point state can fail for some flavors when the processor is not + // in the right mode at the time we are taking the state. So we will try to get the AVX state first and + // if it fails, get the FLOAT state and if that fails, take AVX512 state. Both AVX and AVX512 states + // are supersets of the FLOAT state. + // Check a few fields to make sure the assumption is correct. + static_assert_no_msg(sizeof(x86_avx_state64_t) > sizeof(x86_float_state64_t)); + static_assert_no_msg(sizeof(x86_avx512_state64_t) > sizeof(x86_avx_state64_t)); + static_assert_no_msg(offsetof(x86_avx_state64_t, __fpu_fcw) == offsetof(x86_float_state64_t, __fpu_fcw)); + static_assert_no_msg(offsetof(x86_avx_state64_t, __fpu_xmm0) == offsetof(x86_float_state64_t, __fpu_xmm0)); + static_assert_no_msg(offsetof(x86_avx512_state64_t, __fpu_fcw) == offsetof(x86_float_state64_t, __fpu_fcw)); + static_assert_no_msg(offsetof(x86_avx512_state64_t, __fpu_xmm0) == offsetof(x86_float_state64_t, __fpu_xmm0)); + + x86_avx512_state64_t State; -#if defined(_AMD64_) && defined(XSTATE_SUPPORTED) - if (lpContext->ContextFlags & CONTEXT_XSTATE & CONTEXT_AREA_MASK) { - x86_avx_state64_t State; StateFlavor = x86_AVX_STATE64; - StateCount = sizeof(State) / sizeof(natural_t); + StateCount = sizeof(x86_avx_state64_t) / sizeof(natural_t); MachRet = thread_get_state(Port, StateFlavor, (thread_state_t)&State, &StateCount); if (MachRet != KERN_SUCCESS) { - ASSERT("thread_get_state(XSTATE) failed: %d\n", MachRet); - goto exit; + // The AVX state is not available, try to get the AVX512 state. + StateFlavor = x86_AVX512_STATE64; + StateCount = sizeof(x86_avx512_state64_t) / sizeof(natural_t); + MachRet = thread_get_state(Port, StateFlavor, (thread_state_t)&State, &StateCount); + if (MachRet != KERN_SUCCESS) + { + // Neither the AVX nor the AVX512 state is not available, try to get at least the FLOAT state. + lpContext->ContextFlags &= ~(CONTEXT_XSTATE & CONTEXT_AREA_MASK); + + StateFlavor = x86_FLOAT_STATE64; + StateCount = sizeof(x86_float_state64_t) / sizeof(natural_t); + MachRet = thread_get_state(Port, StateFlavor, (thread_state_t)&State, &StateCount); + if (MachRet != KERN_SUCCESS) + { + // We were unable to get any floating point state. This case was observed on OSX with AVX512 capable processors. + lpContext->ContextFlags &= ~((CONTEXT_XSTATE | CONTEXT_ALL_FLOATING) & CONTEXT_AREA_MASK); + } + } } CONTEXT_GetThreadContextFromThreadState(StateFlavor, (thread_state_t)&State, lpContext); } -#endif exit: return MachRet; @@ -964,65 +968,7 @@ CONTEXT_GetThreadContextFromThreadState( { switch (threadStateFlavor) { -#ifdef _X86_ - case x86_THREAD_STATE32: - if (lpContext->ContextFlags & (CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_SEGMENTS) & CONTEXT_AREA_MASK) - { - x86_thread_state32_t *pState = (x86_thread_state32_t *)threadState; - - lpContext->Eax = pState->eax; - lpContext->Ebx = pState->ebx; - lpContext->Ecx = pState->ecx; - lpContext->Edx = pState->edx; - lpContext->Edi = pState->edi; - lpContext->Esi = pState->esi; - lpContext->Ebp = pState->ebp; - lpContext->Esp = pState->esp; - lpContext->SegSs = pState->ss; - lpContext->EFlags = pState->eflags; - lpContext->Eip = pState->eip; - lpContext->SegCs = pState->cs; - lpContext->SegDs_PAL_Undefined = pState->ds; - lpContext->SegEs_PAL_Undefined = pState->es; - lpContext->SegFs_PAL_Undefined = pState->fs; - lpContext->SegGs_PAL_Undefined = pState->gs; - } - break; - - case x86_FLOAT_STATE32: - { - x86_float_state32_t *pState = (x86_float_state32_t *)threadState; - - if (lpContext->ContextFlags & CONTEXT_FLOATING_POINT & CONTEXT_AREA_MASK) - { - lpContext->FloatSave.ControlWord = *(DWORD*)&pState->fpu_fcw; - lpContext->FloatSave.StatusWord = *(DWORD*)&pState->fpu_fsw; - lpContext->FloatSave.TagWord = pState->fpu_ftw; - lpContext->FloatSave.ErrorOffset = pState->fpu_ip; - lpContext->FloatSave.ErrorSelector = pState->fpu_cs; - lpContext->FloatSave.DataOffset = pState->fpu_dp; - lpContext->FloatSave.DataSelector = pState->fpu_ds; - lpContext->FloatSave.Cr0NpxState = pState->fpu_mxcsr; - - // Windows stores the floating point registers in a packed layout (each 10-byte register end to end - // for a total of 80 bytes). But Mach returns each register in an 16-bit structure (presumably for - // alignment purposes). So we can't just memcpy the registers over in a single block, we need to copy - // them individually. - for (int i = 0; i < 8; i++) - memcpy(&lpContext->FloatSave.RegisterArea[i * 10], (&pState->fpu_stmm0)[i].mmst_reg, 10); - } - - if (lpContext->ContextFlags & CONTEXT_EXTENDED_REGISTERS & CONTEXT_AREA_MASK) - { - // The only extended register information that Mach will tell us about are the xmm register values. - // Both Windows and Mach store the registers in a packed layout (each of the 8 registers is 16 bytes) - // so we can simply memcpy them across. - memcpy(lpContext->ExtendedRegisters + CONTEXT_EXREG_XMM_OFFSET, &pState->fpu_xmm0, 8 * 16); - } - } - break; - -#elif defined(_AMD64_) +#ifdef _AMD64_ case x86_THREAD_STATE64: if (lpContext->ContextFlags & (CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_SEGMENTS) & CONTEXT_AREA_MASK) { @@ -1057,6 +1003,16 @@ CONTEXT_GetThreadContextFromThreadState( } break; + case x86_AVX_STATE64: + case x86_AVX512_STATE64: + if (lpContext->ContextFlags & CONTEXT_XSTATE & CONTEXT_AREA_MASK) + { + x86_avx_state64_t *pState = (x86_avx_state64_t *)threadState; + memcpy(&lpContext->VectorRegister, &pState->__fpu_ymmh0, 16 * 16); + } + + // Intentional fall-through, the AVX states are supersets of the FLOAT state + case x86_FLOAT_STATE64: if (lpContext->ContextFlags & CONTEXT_FLOATING_POINT & CONTEXT_AREA_MASK) { @@ -1083,16 +1039,6 @@ CONTEXT_GetThreadContextFromThreadState( memcpy(&lpContext->Xmm0, &pState->__fpu_xmm0, 16 * 16); } break; - -#ifdef XSTATE_SUPPORTED - case x86_AVX_STATE64: - if (lpContext->ContextFlags & CONTEXT_XSTATE & CONTEXT_AREA_MASK) - { - x86_avx_state64_t *pState = (x86_avx_state64_t *)threadState; - memcpy(&lpContext->VectorRegister, &pState->__fpu_ymmh0, 16 * 16); - } - break; -#endif #else #error Unexpected architecture. #endif @@ -1181,27 +1127,7 @@ CONTEXT_SetThreadContextOnPort( if (lpContext->ContextFlags & (CONTEXT_CONTROL|CONTEXT_INTEGER) & CONTEXT_AREA_MASK) { -#ifdef _X86_ - x86_thread_state32_t State; - StateFlavor = x86_THREAD_STATE32; - - State.eax = lpContext->Eax; - State.ebx = lpContext->Ebx; - State.ecx = lpContext->Ecx; - State.edx = lpContext->Edx; - State.edi = lpContext->Edi; - State.esi = lpContext->Esi; - State.ebp = lpContext->Ebp; - State.esp = lpContext->Esp; - State.ss = lpContext->SegSs; - State.eflags = lpContext->EFlags; - State.eip = lpContext->Eip; - State.cs = lpContext->SegCs; - State.ds = lpContext->SegDs_PAL_Undefined; - State.es = lpContext->SegEs_PAL_Undefined; - State.fs = lpContext->SegFs_PAL_Undefined; - State.gs = lpContext->SegGs_PAL_Undefined; -#elif defined(_AMD64_) +#ifdef _AMD64_ x86_thread_state64_t State; StateFlavor = x86_THREAD_STATE64; @@ -1249,11 +1175,7 @@ CONTEXT_SetThreadContextOnPort( if (lpContext->ContextFlags & CONTEXT_ALL_FLOATING & CONTEXT_AREA_MASK) { -#ifdef _X86_ - x86_float_state32_t State; - StateFlavor = x86_FLOAT_STATE32; - StateCount = sizeof(State) / sizeof(natural_t); -#elif defined(_AMD64_) +#ifdef _AMD64_ #ifdef XSTATE_SUPPORTED // We're relying on the fact that the initial portion of // x86_avx_state64_t is identical to x86_float_state64_t. @@ -1304,23 +1226,7 @@ CONTEXT_SetThreadContextOnPort( if (lpContext->ContextFlags & CONTEXT_FLOATING_POINT & CONTEXT_AREA_MASK) { -#ifdef _X86_ - *(DWORD*)&State.fpu_fcw = lpContext->FloatSave.ControlWord; - *(DWORD*)&State.fpu_fsw = lpContext->FloatSave.StatusWord; - State.fpu_ftw = lpContext->FloatSave.TagWord; - State.fpu_ip = lpContext->FloatSave.ErrorOffset; - State.fpu_cs = lpContext->FloatSave.ErrorSelector; - State.fpu_dp = lpContext->FloatSave.DataOffset; - State.fpu_ds = lpContext->FloatSave.DataSelector; - State.fpu_mxcsr = lpContext->FloatSave.Cr0NpxState; - - // Windows stores the floating point registers in a packed layout (each 10-byte register end to - // end for a total of 80 bytes). But Mach returns each register in an 16-bit structure (presumably - // for alignment purposes). So we can't just memcpy the registers over in a single block, we need - // to copy them individually. - for (int i = 0; i < 8; i++) - memcpy((&State.fpu_stmm0)[i].mmst_reg, &lpContext->FloatSave.RegisterArea[i * 10], 10); -#elif defined(_AMD64_) +#ifdef _AMD64_ *(DWORD*)&State.__fpu_fcw = lpContext->FltSave.ControlWord; *(DWORD*)&State.__fpu_fsw = lpContext->FltSave.StatusWord; State.__fpu_ftw = lpContext->FltSave.TagWord; @@ -1344,16 +1250,6 @@ CONTEXT_SetThreadContextOnPort( #endif } -#ifdef _X86_ - if (lpContext->ContextFlags & CONTEXT_EXTENDED_REGISTERS & CONTEXT_AREA_MASK) - { - // The only extended register information that Mach will tell us about are the xmm register - // values. Both Windows and Mach store the registers in a packed layout (each of the 8 registers - // is 16 bytes) so we can simply memcpy them across. - memcpy(&State.fpu_xmm0, lpContext->ExtendedRegisters + CONTEXT_EXREG_XMM_OFFSET, 8 * 16); - } -#endif // _X86_ - #if defined(_AMD64_) && defined(XSTATE_SUPPORTED) if (lpContext->ContextFlags & CONTEXT_XSTATE & CONTEXT_AREA_MASK) { |