summaryrefslogtreecommitdiff
path: root/src/pal
diff options
context:
space:
mode:
authorJohn Chen <jochen@microsoft.com>2016-11-29 19:50:09 -0800
committerGitHub <noreply@github.com>2016-11-29 19:50:09 -0800
commit85c0a5641d60aabe028a9e4b4ef07a7b58a2181b (patch)
tree92eeb60f1702c448e5d62eb80c92f59879b406d5 /src/pal
parent89440daa5cfe3ad814b9caac6bd47c5d7ffc6d9b (diff)
downloadcoreclr-85c0a5641d60aabe028a9e4b4ef07a7b58a2181b.tar.gz
coreclr-85c0a5641d60aabe028a9e4b4ef07a7b58a2181b.tar.bz2
coreclr-85c0a5641d60aabe028a9e4b4ef07a7b58a2181b.zip
Capture YMM registers on OSX (#8365)
Also fixed checking of context flags on OSX (issue #8116)
Diffstat (limited to 'src/pal')
-rw-r--r--src/pal/src/CMakeLists.txt1
-rw-r--r--src/pal/src/arch/amd64/processor.cpp20
-rw-r--r--src/pal/src/exception/machexception.cpp7
-rw-r--r--src/pal/src/thread/context.cpp89
4 files changed, 100 insertions, 17 deletions
diff --git a/src/pal/src/CMakeLists.txt b/src/pal/src/CMakeLists.txt
index 89eac35273..298763e41b 100644
--- a/src/pal/src/CMakeLists.txt
+++ b/src/pal/src/CMakeLists.txt
@@ -41,6 +41,7 @@ endif()
if(CMAKE_SYSTEM_NAME STREQUAL Darwin)
add_definitions(-D_TARGET_MAC64)
+ add_definitions(-DXSTATE_SUPPORTED)
set(PLATFORM_SOURCES
arch/amd64/activationhandlerwrapper.S
arch/amd64/context.S
diff --git a/src/pal/src/arch/amd64/processor.cpp b/src/pal/src/arch/amd64/processor.cpp
index 4fd3a4abc8..ac3d448a81 100644
--- a/src/pal/src/arch/amd64/processor.cpp
+++ b/src/pal/src/arch/amd64/processor.cpp
@@ -42,3 +42,23 @@ YieldProcessor(
);
}
+/*++
+Function:
+XmmYmmStateSupport
+
+Check if OS has enabled both XMM and YMM state support
+
+Return value:
+1 if XMM and YMM are enabled, 0 otherwise
+--*/
+extern "C" unsigned int XmmYmmStateSupport()
+{
+ unsigned int eax;
+ __asm(" xgetbv\n" \
+ : "=a"(eax) /*output in eax*/\
+ : "c"(0) /*inputs - 0 in ecx*/\
+ : "eax", "edx" /* registers that are clobbered*/
+ );
+ // Check OS has enabled both XMM and YMM state support
+ return ((eax & 0x06) == 0x06) ? 1 : 0;
+}
diff --git a/src/pal/src/exception/machexception.cpp b/src/pal/src/exception/machexception.cpp
index 8b0d7f22a8..5328e1f329 100644
--- a/src/pal/src/exception/machexception.cpp
+++ b/src/pal/src/exception/machexception.cpp
@@ -1518,6 +1518,7 @@ ActivationHandler(CONTEXT* context)
extern "C" void ActivationHandlerWrapper();
extern "C" int ActivationHandlerReturnOffset;
+extern "C" unsigned int XmmYmmStateSupport();
/*++
Function :
@@ -1581,6 +1582,12 @@ InjectActivationInternal(CPalThread* pThread)
// after the activation function returns.
CONTEXT *pContext = (CONTEXT *)contextAddress;
pContext->ContextFlags = CONTEXT_FULL | CONTEXT_SEGMENTS;
+#ifdef XSTATE_SUPPORTED
+ if (XmmYmmStateSupport() == 1)
+ {
+ pContext->ContextFlags |= CONTEXT_XSTATE;
+ }
+#endif
MachRet = CONTEXT_GetThreadContextFromPort(threadPort, pContext);
_ASSERT_MSG(MachRet == KERN_SUCCESS, "CONTEXT_GetThreadContextFromPort\n");
diff --git a/src/pal/src/thread/context.cpp b/src/pal/src/thread/context.cpp
index 9e1e060321..d051051283 100644
--- a/src/pal/src/thread/context.cpp
+++ b/src/pal/src/thread/context.cpp
@@ -36,6 +36,7 @@ extern PGET_GCMARKER_EXCEPTION_CODE g_getGcMarkerExceptionCode;
// in context2.S
extern void CONTEXT_CaptureContext(LPCONTEXT lpContext);
+#define CONTEXT_AREA_MASK 0xffff
#ifdef _X86_
#define CONTEXT_ALL_FLOATING (CONTEXT_FLOATING_POINT | CONTEXT_EXTENDED_REGISTERS)
#elif defined(_AMD64_)
@@ -288,7 +289,7 @@ CONTEXT_GetThreadContext(
}
if (lpContext->ContextFlags &
- (CONTEXT_CONTROL | CONTEXT_INTEGER))
+ (CONTEXT_CONTROL | CONTEXT_INTEGER) & CONTEXT_AREA_MASK)
{
if (CONTEXT_GetRegisters(dwProcessId, lpContext) == FALSE)
{
@@ -348,7 +349,7 @@ CONTEXT_SetThreadContext(
}
if (lpContext->ContextFlags &
- (CONTEXT_CONTROL | CONTEXT_INTEGER))
+ (CONTEXT_CONTROL | CONTEXT_INTEGER) & CONTEXT_AREA_MASK)
{
#if HAVE_PT_REGS
if (ptrace((__ptrace_request)PT_GETREGS, dwProcessId, (caddr_t)&ptrace_registers, 0) == -1)
@@ -371,11 +372,11 @@ CONTEXT_SetThreadContext(
ASSERT("Don't know how to set the context of another process on this platform!");
return FALSE;
#endif
- if (lpContext->ContextFlags & CONTEXT_CONTROL)
+ if (lpContext->ContextFlags & CONTEXT_CONTROL & CONTEXT_AREA_MASK)
{
ASSIGN_CONTROL_REGS
}
- if (lpContext->ContextFlags & CONTEXT_INTEGER)
+ if (lpContext->ContextFlags & CONTEXT_INTEGER & CONTEXT_AREA_MASK)
{
ASSIGN_INTEGER_REGS
}
@@ -857,7 +858,7 @@ CONTEXT_GetThreadContextFromPort(
mach_msg_type_number_t StateCount;
thread_state_flavor_t StateFlavor;
- if (lpContext->ContextFlags & (CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_SEGMENTS))
+ if (lpContext->ContextFlags & (CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_SEGMENTS) & CONTEXT_AREA_MASK)
{
#ifdef _X86_
x86_thread_state32_t State;
@@ -879,7 +880,7 @@ CONTEXT_GetThreadContextFromPort(
CONTEXT_GetThreadContextFromThreadState(StateFlavor, (thread_state_t)&State, lpContext);
}
- if (lpContext->ContextFlags & CONTEXT_ALL_FLOATING) {
+ if (lpContext->ContextFlags & CONTEXT_ALL_FLOATING & CONTEXT_AREA_MASK) {
#ifdef _X86_
x86_float_state32_t State;
StateFlavor = x86_FLOAT_STATE32;
@@ -900,6 +901,22 @@ CONTEXT_GetThreadContextFromPort(
CONTEXT_GetThreadContextFromThreadState(StateFlavor, (thread_state_t)&State, lpContext);
}
+#if defined(_AMD64_) && defined(XSTATE_SUPPORTED)
+ if (lpContext->ContextFlags & CONTEXT_XSTATE & CONTEXT_AREA_MASK) {
+ x86_avx_state64_t State;
+ StateFlavor = x86_AVX_STATE64;
+ StateCount = sizeof(State) / sizeof(natural_t);
+ MachRet = thread_get_state(Port, StateFlavor, (thread_state_t)&State, &StateCount);
+ if (MachRet != KERN_SUCCESS)
+ {
+ ASSERT("thread_get_state(XSTATE) failed: %d\n", MachRet);
+ goto exit;
+ }
+
+ CONTEXT_GetThreadContextFromThreadState(StateFlavor, (thread_state_t)&State, lpContext);
+ }
+#endif
+
exit:
return MachRet;
}
@@ -919,7 +936,7 @@ CONTEXT_GetThreadContextFromThreadState(
{
#ifdef _X86_
case x86_THREAD_STATE32:
- if (lpContext->ContextFlags & (CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_SEGMENTS))
+ if (lpContext->ContextFlags & (CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_SEGMENTS) & CONTEXT_AREA_MASK)
{
x86_thread_state32_t *pState = (x86_thread_state32_t *)threadState;
@@ -946,7 +963,7 @@ CONTEXT_GetThreadContextFromThreadState(
{
x86_float_state32_t *pState = (x86_float_state32_t *)threadState;
- if (lpContext->ContextFlags & CONTEXT_FLOATING_POINT)
+ if (lpContext->ContextFlags & CONTEXT_FLOATING_POINT & CONTEXT_AREA_MASK)
{
lpContext->FloatSave.ControlWord = *(DWORD*)&pState->fpu_fcw;
lpContext->FloatSave.StatusWord = *(DWORD*)&pState->fpu_fsw;
@@ -965,7 +982,7 @@ CONTEXT_GetThreadContextFromThreadState(
memcpy(&lpContext->FloatSave.RegisterArea[i * 10], (&pState->fpu_stmm0)[i].mmst_reg, 10);
}
- if (lpContext->ContextFlags & CONTEXT_EXTENDED_REGISTERS)
+ if (lpContext->ContextFlags & CONTEXT_EXTENDED_REGISTERS & CONTEXT_AREA_MASK)
{
// The only extended register information that Mach will tell us about are the xmm register values.
// Both Windows and Mach store the registers in a packed layout (each of the 8 registers is 16 bytes)
@@ -977,7 +994,7 @@ CONTEXT_GetThreadContextFromThreadState(
#elif defined(_AMD64_)
case x86_THREAD_STATE64:
- if (lpContext->ContextFlags & (CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_SEGMENTS))
+ if (lpContext->ContextFlags & (CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_SEGMENTS) & CONTEXT_AREA_MASK)
{
x86_thread_state64_t *pState = (x86_thread_state64_t *)threadState;
@@ -1011,7 +1028,7 @@ CONTEXT_GetThreadContextFromThreadState(
break;
case x86_FLOAT_STATE64:
- if (lpContext->ContextFlags & CONTEXT_FLOATING_POINT)
+ if (lpContext->ContextFlags & CONTEXT_FLOATING_POINT & CONTEXT_AREA_MASK)
{
x86_float_state64_t *pState = (x86_float_state64_t *)threadState;
@@ -1036,6 +1053,16 @@ CONTEXT_GetThreadContextFromThreadState(
memcpy(&lpContext->Xmm0, &pState->__fpu_xmm0, 16 * 16);
}
break;
+
+#ifdef XSTATE_SUPPORTED
+ case x86_AVX_STATE64:
+ if (lpContext->ContextFlags & CONTEXT_XSTATE & CONTEXT_AREA_MASK)
+ {
+ x86_avx_state64_t *pState = (x86_avx_state64_t *)threadState;
+ memcpy(&lpContext->VectorRegister, &pState->__fpu_ymmh0, 16 * 16);
+ }
+ break;
+#endif
#else
#error Unexpected architecture.
#endif
@@ -1122,7 +1149,7 @@ CONTEXT_SetThreadContextOnPort(
mach_msg_type_number_t StateCount;
thread_state_flavor_t StateFlavor;
- if (lpContext->ContextFlags & (CONTEXT_CONTROL|CONTEXT_INTEGER))
+ if (lpContext->ContextFlags & (CONTEXT_CONTROL|CONTEXT_INTEGER) & CONTEXT_AREA_MASK)
{
#ifdef _X86_
x86_thread_state32_t State;
@@ -1189,21 +1216,42 @@ CONTEXT_SetThreadContextOnPort(
}
}
- if (lpContext->ContextFlags & CONTEXT_ALL_FLOATING)
+ if (lpContext->ContextFlags & CONTEXT_ALL_FLOATING & CONTEXT_AREA_MASK)
{
#ifdef _X86_
x86_float_state32_t State;
StateFlavor = x86_FLOAT_STATE32;
+ StateCount = sizeof(State) / sizeof(natural_t);
#elif defined(_AMD64_)
+#ifdef XSTATE_SUPPORTED
+ // We're relying on the fact that the initial portion of
+ // x86_avx_state64_t is identical to x86_float_state64_t.
+ // Check a few fields to make sure the assumption is correct.
+ static_assert_no_msg(sizeof(x86_avx_state64_t) > sizeof(x86_float_state64_t));
+ static_assert_no_msg(offsetof(x86_avx_state64_t, __fpu_fcw) == offsetof(x86_float_state64_t, __fpu_fcw));
+ static_assert_no_msg(offsetof(x86_avx_state64_t, __fpu_xmm0) == offsetof(x86_float_state64_t, __fpu_xmm0));
+
+ x86_avx_state64_t State;
+ if (lpContext->ContextFlags & CONTEXT_XSTATE & CONTEXT_AREA_MASK)
+ {
+ StateFlavor = x86_AVX_STATE64;
+ StateCount = sizeof(State) / sizeof(natural_t);
+ }
+ else
+ {
+ StateFlavor = x86_FLOAT_STATE64;
+ StateCount = sizeof(x86_float_state64_t) / sizeof(natural_t);
+ }
+#else
x86_float_state64_t State;
StateFlavor = x86_FLOAT_STATE64;
+ StateCount = sizeof(State) / sizeof(natural_t);
+#endif
#else
#error Unexpected architecture.
#endif
- StateCount = sizeof(State) / sizeof(natural_t);
-
// If we're setting only one of the floating point or extended registers (of which Mach supports only
// the xmm values) then we don't have values for the other set. This is a problem since Mach only
// supports setting both groups as a single unit. So in this case we'll need to fetch the current
@@ -1224,7 +1272,7 @@ CONTEXT_SetThreadContextOnPort(
_ASSERTE(StateCountGet == StateCount);
}
- if (lpContext->ContextFlags & CONTEXT_FLOATING_POINT)
+ if (lpContext->ContextFlags & CONTEXT_FLOATING_POINT & CONTEXT_AREA_MASK)
{
#ifdef _X86_
*(DWORD*)&State.fpu_fcw = lpContext->FloatSave.ControlWord;
@@ -1267,7 +1315,7 @@ CONTEXT_SetThreadContextOnPort(
}
#ifdef _X86_
- if (lpContext->ContextFlags & CONTEXT_EXTENDED_REGISTERS)
+ if (lpContext->ContextFlags & CONTEXT_EXTENDED_REGISTERS & CONTEXT_AREA_MASK)
{
// The only extended register information that Mach will tell us about are the xmm register
// values. Both Windows and Mach store the registers in a packed layout (each of the 8 registers
@@ -1276,6 +1324,13 @@ CONTEXT_SetThreadContextOnPort(
}
#endif // _X86_
+#if defined(_AMD64_) && defined(XSTATE_SUPPORTED)
+ if (lpContext->ContextFlags & CONTEXT_XSTATE & CONTEXT_AREA_MASK)
+ {
+ memcpy(&State.__fpu_ymmh0, lpContext->VectorRegister, 16 * 16);
+ }
+#endif
+
MachRet = thread_set_state(Port,
StateFlavor,
(thread_state_t)&State,