diff options
Diffstat (limited to 'src/pal/src/arch')
-rw-r--r-- | src/pal/src/arch/arm/asmconstants.h | 61 | ||||
-rw-r--r-- | src/pal/src/arch/arm/context2.S | 174 | ||||
-rw-r--r-- | src/pal/src/arch/arm/debugbreak.S | 14 | ||||
-rw-r--r-- | src/pal/src/arch/arm/exceptionhelper.S | 30 | ||||
-rw-r--r-- | src/pal/src/arch/arm/processor.cpp | 42 | ||||
-rw-r--r-- | src/pal/src/arch/arm64/context2.S | 300 | ||||
-rw-r--r-- | src/pal/src/arch/arm64/debugbreak.S | 11 | ||||
-rw-r--r-- | src/pal/src/arch/arm64/exceptionhelper.S | 9 | ||||
-rw-r--r-- | src/pal/src/arch/arm64/processor.cpp | 41 | ||||
-rw-r--r-- | src/pal/src/arch/i386/activationhandlerwrapper.S | 30 | ||||
-rw-r--r-- | src/pal/src/arch/i386/asmconstants.h | 106 | ||||
-rw-r--r-- | src/pal/src/arch/i386/context.S | 21 | ||||
-rw-r--r-- | src/pal/src/arch/i386/context2.S | 259 | ||||
-rw-r--r-- | src/pal/src/arch/i386/debugbreak.S | 12 | ||||
-rw-r--r-- | src/pal/src/arch/i386/dispatchexceptionwrapper.S | 51 | ||||
-rw-r--r-- | src/pal/src/arch/i386/exceptionhelper.S | 42 | ||||
-rw-r--r-- | src/pal/src/arch/i386/optimizedtls.cpp | 237 | ||||
-rw-r--r-- | src/pal/src/arch/i386/processor.cpp | 44 |
18 files changed, 1484 insertions, 0 deletions
diff --git a/src/pal/src/arch/arm/asmconstants.h b/src/pal/src/arch/arm/asmconstants.h new file mode 100644 index 0000000000..ae4b01b8dc --- /dev/null +++ b/src/pal/src/arch/arm/asmconstants.h @@ -0,0 +1,61 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +#ifndef __PAL_ARM_ASMCONSTANTS_H__ +#define __PAL_ARM_ASMCONSTANTS_H__ + +#define CONTEXT_ContextFlags 0 +#define CONTEXT_R0 CONTEXT_ContextFlags+4 +#define CONTEXT_R1 CONTEXT_R0+4 +#define CONTEXT_R2 CONTEXT_R1+4 +#define CONTEXT_R3 CONTEXT_R2+4 +#define CONTEXT_R4 CONTEXT_R3+4 +#define CONTEXT_R5 CONTEXT_R4+4 +#define CONTEXT_R6 CONTEXT_R5+4 +#define CONTEXT_R7 CONTEXT_R6+4 +#define CONTEXT_R8 CONTEXT_R7+4 +#define CONTEXT_R9 CONTEXT_R8+4 +#define CONTEXT_R10 CONTEXT_R9+4 +#define CONTEXT_R11 CONTEXT_R10+4 +#define CONTEXT_R12 CONTEXT_R11+4 +#define CONTEXT_Sp CONTEXT_R12+4 +#define CONTEXT_Lr CONTEXT_Sp+4 +#define CONTEXT_Pc CONTEXT_Lr+4 +#define CONTEXT_Cpsr CONTEXT_Pc+4 +#define CONTEXT_Fpscr CONTEXT_Cpsr+4 +#define CONTEXT_Padding CONTEXT_Fpscr+4 +#define CONTEXT_D0 CONTEXT_Padding+4 +#define CONTEXT_D1 CONTEXT_D0+8 +#define CONTEXT_D2 CONTEXT_D1+8 +#define CONTEXT_D3 CONTEXT_D2+8 +#define CONTEXT_D4 CONTEXT_D3+8 +#define CONTEXT_D5 CONTEXT_D4+8 +#define CONTEXT_D6 CONTEXT_D5+8 +#define CONTEXT_D7 CONTEXT_D6+8 +#define CONTEXT_D8 CONTEXT_D7+8 +#define CONTEXT_D9 CONTEXT_D8+8 +#define CONTEXT_D10 CONTEXT_D9+8 +#define CONTEXT_D11 CONTEXT_D10+8 +#define CONTEXT_D12 CONTEXT_D11+8 +#define CONTEXT_D13 CONTEXT_D12+8 +#define CONTEXT_D14 CONTEXT_D13+8 +#define CONTEXT_D15 CONTEXT_D14+8 +#define CONTEXT_D16 CONTEXT_D15+8 +#define CONTEXT_D17 CONTEXT_D16+8 +#define CONTEXT_D18 CONTEXT_D17+8 +#define CONTEXT_D19 CONTEXT_D18+8 +#define CONTEXT_D20 CONTEXT_D19+8 +#define CONTEXT_D21 CONTEXT_D20+8 +#define CONTEXT_D22 CONTEXT_D21+8 +#define CONTEXT_D23 CONTEXT_D22+8 +#define CONTEXT_D24 CONTEXT_D23+8 +#define CONTEXT_D25 CONTEXT_D24+8 +#define CONTEXT_D26 CONTEXT_D25+8 +#define CONTEXT_D27 CONTEXT_D26+8 +#define CONTEXT_D28 CONTEXT_D27+8 +#define CONTEXT_D29 CONTEXT_D28+8 +#define CONTEXT_D30 CONTEXT_D29+8 +#define CONTEXT_D31 CONTEXT_D30+8 + +#endif diff --git a/src/pal/src/arch/arm/context2.S b/src/pal/src/arch/arm/context2.S new file mode 100644 index 0000000000..61e9ab8463 --- /dev/null +++ b/src/pal/src/arch/arm/context2.S @@ -0,0 +1,174 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. +// +// Implementation of _CONTEXT_CaptureContext for the ARM platform. +// This function is processor dependent. It is used by exception handling, +// and is always apply to the current thread. +// + +#include "unixasmmacros.inc" +#include "asmconstants.h" + +.syntax unified +.thumb + +#define CONTEXT_ARM 0x00200000 + +#define CONTEXT_CONTROL 1 // Sp, Lr, Pc, Cpsr +#define CONTEXT_INTEGER 2 // R0-R12 +#define CONTEXT_SEGMENTS 4 // +#define CONTEXT_FLOATING_POINT 8 +#define CONTEXT_DEBUG_REGISTERS 16 // + +#define CONTEXT_FULL (CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_FLOATING_POINT) + + +// Incoming: +// r0: Context* +// +LEAF_ENTRY CONTEXT_CaptureContext, _TEXT + // Ensure we save these registers + push {r4-r11} + // Save processor flags before calling any of the following 'test' instructions + // because they will modify state of some flags + push {r1} + mrs r1, apsr // Get APSR - equivalent to eflags + push {r1} // Save APSR + END_PROLOGUE + + push {r2} + ldr r2, [r0, #(CONTEXT_ContextFlags)] + tst r2, #(CONTEXT_INTEGER) + pop {r2} + + // Add 4 to stack so we point at R1, pop, then sub 8 to point at APSR + add sp, sp, #4 + pop {r1} + sub sp, sp, #8 + + itttt ne + strne r0, [r0, #(CONTEXT_R0)] + addne r0, CONTEXT_R1 + stmiane r0, {r1-r12} + subne r0, CONTEXT_R1 + + ldr r2, [r0, #(CONTEXT_ContextFlags)] + tst r2, #(CONTEXT_CONTROL) + + ittt ne + addne sp, sp, #(10*4) // This needs to put the stack in the same state as it started + strne sp, [r0, #(CONTEXT_Sp)] + subne sp, sp, #(10*4) + + itt ne + strne lr, [r0, #(CONTEXT_Lr)] + strne lr, [r0, #(CONTEXT_Pc)] + + // Get the APSR pushed onto the stack at the start + pop {r1} + it ne + strne r1, [r0, #(CONTEXT_Cpsr)] + + ldr r2, [r0, #(CONTEXT_ContextFlags)] + tst r2, #(CONTEXT_FLOATING_POINT) + + itt ne + vmrsne r3, fpscr + strne r3, [r0, #(CONTEXT_Fpscr)] + + itttt ne + addne r0, CONTEXT_D0 + vstmiane r0!, {d0-d15} + vstmiane r0!, {d16-d31} + subne r0, CONTEXT_D31 + + // Make sure sp is restored + add sp, sp, #4 + + // Restore callee saved registers + pop {r4-r11} + bx lr +LEAF_END CONTEXT_CaptureContext, _TEXT + +// Incoming: +// R0: Context* +// +LEAF_ENTRY RtlCaptureContext, _TEXT + push {r1} + mov r1, #0 + orr r1, r1, #CONTEXT_ARM + orr r1, r1, #CONTEXT_INTEGER + orr r1, r1, #CONTEXT_CONTROL + orr r1, r1, #CONTEXT_FLOATING_POINT + str r1, [r0, #(CONTEXT_ContextFlags)] + pop {r1} + b C_FUNC(CONTEXT_CaptureContext) +LEAF_END RtlCaptureContext, _TEXT + +// Incoming: +// r0: Context* +// r1: Exception* +// +LEAF_ENTRY RtlRestoreContext, _TEXT + END_PROLOGUE + + ldr r2, [r0, #(CONTEXT_ContextFlags)] + tst r2, #(CONTEXT_FLOATING_POINT) + + itttt ne + addne r0, CONTEXT_D0 + vldmiane r0!, {d0-d15} + vldmiane r0, {d16-d31} + subne r0, CONTEXT_D16 + + itt ne + ldrne r3, [r0, #(CONTEXT_Fpscr)] + vmrsne r3, FPSCR + + ldr r2, [r0, #(CONTEXT_ContextFlags)] + tst r2, #(CONTEXT_CONTROL) + + it eq + beq LOCAL_LABEL(No_Restore_CONTEXT_CONTROL) + + ldr r2, [r0, #(CONTEXT_ContextFlags)] + tst r2, #(CONTEXT_INTEGER) + + it eq + beq LOCAL_LABEL(No_Restore_CONTEXT_INTEGER) + + ldr R2, [r0, #(CONTEXT_Cpsr)] + msr APSR, r2 + + // Ideally, we would like to use `ldmia r0, {r0-r12, sp, lr, pc}` here, + // but clang 3.6 and later, as per ARM recommendation, disallows using + // Sp in the register list, and Pc and Lr simultaneously. + // So we are going to use the IPC register r12 to copy Sp, Lr and Pc + // which should be ok -- TODO: Is this really ok? + add r12, r0, CONTEXT_R0 + ldm r12, {r0-r11} + ldr sp, [r12, #(CONTEXT_Sp - (CONTEXT_R0))] + ldr lr, [r12, #(CONTEXT_Lr - (CONTEXT_R0))] + ldr pc, [r12, #(CONTEXT_Pc - (CONTEXT_R0))] + +LOCAL_LABEL(No_Restore_CONTEXT_INTEGER): + + ldr r2, [r0, #(CONTEXT_Cpsr)] + msr APSR, r2 + + ldr sp, [r0, #(CONTEXT_Sp)] + ldr lr, [r0, #(CONTEXT_Lr)] + ldr pc, [r0, #(CONTEXT_Pc)] + +LOCAL_LABEL(No_Restore_CONTEXT_CONTROL): + ldr r2, [r0, #(CONTEXT_ContextFlags)] + tst r2, #(CONTEXT_INTEGER) + + itt ne + addne r0, CONTEXT_R0 + ldmiane r0, {r0-r12} + + sub sp, sp, #4 + bx lr +LEAF_END RtlRestoreContext, _TEXT diff --git a/src/pal/src/arch/arm/debugbreak.S b/src/pal/src/arch/arm/debugbreak.S new file mode 100644 index 0000000000..863d7cf40b --- /dev/null +++ b/src/pal/src/arch/arm/debugbreak.S @@ -0,0 +1,14 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +#include "unixasmmacros.inc" + +.syntax unified +.thumb + +LEAF_ENTRY DBG_DebugBreak, _TEXT + EMIT_BREAKPOINT + bx lr +LEAF_END_MARKED DBG_DebugBreak, _TEXT + diff --git a/src/pal/src/arch/arm/exceptionhelper.S b/src/pal/src/arch/arm/exceptionhelper.S new file mode 100644 index 0000000000..ed1c9c3dc2 --- /dev/null +++ b/src/pal/src/arch/arm/exceptionhelper.S @@ -0,0 +1,30 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +#include "unixasmmacros.inc" +#include "asmconstants.h" + +.syntax unified +.thumb + +// EXTERN_C void ThrowExceptionFromContextInternal(CONTEXT* context, PAL_SEHException* ex); +LEAF_ENTRY ThrowExceptionFromContextInternal, _TEXT + // Ported from src/pal/src/arch/i386/exceptionhelper.S + push_nonvol_reg {r7} /* FP. x64-RBP */ + + ldr r4, [r0, #(CONTEXT_R4)] + ldr r5, [r0, #(CONTEXT_R5)] + ldr r6, [r0, #(CONTEXT_R6)] + ldr r7, [r0, #(CONTEXT_R7)] + ldr r8, [r0, #(CONTEXT_R8)] + ldr r9, [r0, #(CONTEXT_R9)] + ldr r10, [r0, #(CONTEXT_R10)] + ldr r11, [r0, #(CONTEXT_R11)] + ldr sp, [r0, #(CONTEXT_Sp)] + ldr lr, [r0, #(CONTEXT_Pc)] + + // The PAL_SEHException pointer + mov r0, r1 + b EXTERNAL_C_FUNC(ThrowExceptionHelper) +LEAF_END ThrowExceptionFromContextInternal, _TEXT diff --git a/src/pal/src/arch/arm/processor.cpp b/src/pal/src/arch/arm/processor.cpp new file mode 100644 index 0000000000..f41caff1e0 --- /dev/null +++ b/src/pal/src/arch/arm/processor.cpp @@ -0,0 +1,42 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +/*++ + + + +Module Name: + + processor.cpp + +Abstract: + + Implementation of processor related functions for the ARM + platform. These functions are processor dependent. + + + +--*/ + +#include "pal/palinternal.h" + +/*++ +Function: +YieldProcessor + +The YieldProcessor function signals to the processor to give resources +to threads that are waiting for them. This macro is only effective on +processors that support technology allowing multiple threads running +on a single processor, such as Intel's Hyper-Threading technology. + +--*/ +void +PALAPI +YieldProcessor( + VOID) +{ + // Pretty sure ARM has no useful function here? + return; +} + diff --git a/src/pal/src/arch/arm64/context2.S b/src/pal/src/arch/arm64/context2.S new file mode 100644 index 0000000000..a64e62c94d --- /dev/null +++ b/src/pal/src/arch/arm64/context2.S @@ -0,0 +1,300 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. +// +// Implementation of _CONTEXT_CaptureContext for the ARM platform. +// This function is processor dependent. It is used by exception handling, +// and is always apply to the current thread. +// + +#include "unixasmmacros.inc" + +#define CONTEXT_ARM64 0x00400000L + +#define CONTEXT_CONTROL (CONTEXT_ARM64 | 0x1L) +#define CONTEXT_INTEGER (CONTEXT_ARM64 | 0x2L) +#define CONTEXT_FLOATING_POINT (CONTEXT_ARM64 | 0x4L) +#define CONTEXT_DEBUG_REGISTERS (CONTEXT_ARM64 | 0x8L) + +#define CONTEXT_FULL (CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_FLOATING_POINT) + +#define CONTEXT_ContextFlags 0 +#define CONTEXT_Cpsr CONTEXT_ContextFlags+4 +#define CONTEXT_X0 CONTEXT_Cpsr+4 +#define CONTEXT_X1 CONTEXT_X0+8 +#define CONTEXT_X2 CONTEXT_X1+8 +#define CONTEXT_X3 CONTEXT_X2+8 +#define CONTEXT_X4 CONTEXT_X3+8 +#define CONTEXT_X5 CONTEXT_X4+8 +#define CONTEXT_X6 CONTEXT_X5+8 +#define CONTEXT_X7 CONTEXT_X6+8 +#define CONTEXT_X8 CONTEXT_X7+8 +#define CONTEXT_X9 CONTEXT_X8+8 +#define CONTEXT_X10 CONTEXT_X9+8 +#define CONTEXT_X11 CONTEXT_X10+8 +#define CONTEXT_X12 CONTEXT_X11+8 +#define CONTEXT_X13 CONTEXT_X12+8 +#define CONTEXT_X14 CONTEXT_X13+8 +#define CONTEXT_X15 CONTEXT_X14+8 +#define CONTEXT_X16 CONTEXT_X15+8 +#define CONTEXT_X17 CONTEXT_X16+8 +#define CONTEXT_X18 CONTEXT_X17+8 +#define CONTEXT_X19 CONTEXT_X18+8 +#define CONTEXT_X20 CONTEXT_X19+8 +#define CONTEXT_X21 CONTEXT_X20+8 +#define CONTEXT_X22 CONTEXT_X21+8 +#define CONTEXT_X23 CONTEXT_X22+8 +#define CONTEXT_X24 CONTEXT_X23+8 +#define CONTEXT_X25 CONTEXT_X24+8 +#define CONTEXT_X26 CONTEXT_X25+8 +#define CONTEXT_X27 CONTEXT_X26+8 +#define CONTEXT_X28 CONTEXT_X27+8 +#define CONTEXT_Fp CONTEXT_X28+8 +#define CONTEXT_Lr CONTEXT_Fp+8 +#define CONTEXT_Sp CONTEXT_Lr+8 +#define CONTEXT_Pc CONTEXT_Sp+8 +#define CONTEXT_NEON_OFFSET CONTEXT_Pc+8 +#define CONTEXT_V0 0 +#define CONTEXT_V1 CONTEXT_V0+16 +#define CONTEXT_V2 CONTEXT_V1+16 +#define CONTEXT_V3 CONTEXT_V2+16 +#define CONTEXT_V4 CONTEXT_V3+16 +#define CONTEXT_V5 CONTEXT_V4+16 +#define CONTEXT_V6 CONTEXT_V5+16 +#define CONTEXT_V7 CONTEXT_V6+16 +#define CONTEXT_V8 CONTEXT_V7+16 +#define CONTEXT_V9 CONTEXT_V8+16 +#define CONTEXT_V10 CONTEXT_V9+16 +#define CONTEXT_V11 CONTEXT_V10+16 +#define CONTEXT_V12 CONTEXT_V11+16 +#define CONTEXT_V13 CONTEXT_V12+16 +#define CONTEXT_V14 CONTEXT_V13+16 +#define CONTEXT_V15 CONTEXT_V14+16 +#define CONTEXT_V16 CONTEXT_V15+16 +#define CONTEXT_V17 CONTEXT_V16+16 +#define CONTEXT_V18 CONTEXT_V17+16 +#define CONTEXT_V19 CONTEXT_V18+16 +#define CONTEXT_V20 CONTEXT_V19+16 +#define CONTEXT_V21 CONTEXT_V20+16 +#define CONTEXT_V22 CONTEXT_V21+16 +#define CONTEXT_V23 CONTEXT_V22+16 +#define CONTEXT_V24 CONTEXT_V23+16 +#define CONTEXT_V25 CONTEXT_V24+16 +#define CONTEXT_V26 CONTEXT_V25+16 +#define CONTEXT_V27 CONTEXT_V26+16 +#define CONTEXT_V28 CONTEXT_V27+16 +#define CONTEXT_V29 CONTEXT_V28+16 +#define CONTEXT_V30 CONTEXT_V29+16 +#define CONTEXT_V31 CONTEXT_V30+16 +#define CONTEXT_FLOAT_CONTROL_OFFSET CONTEXT_V31 +#define CONTEXT_Fpcr 0 +#define CONTEXT_Fpsr CONTEXT_Fpcr+4 + +// Incoming: +// x0: Context* +// +LEAF_ENTRY CONTEXT_CaptureContext, _TEXT + sub sp, sp, #32 + // save x1, x2 and x3 on stack so we can use them as scratch + stp x1, x2, [sp] + str x3, [sp, 16] + // save the current flags on the stack + mrs x1, nzcv + str x1, [sp, 24] + + ldr w1, [x0, CONTEXT_ContextFlags] + // clangs assembler doesn't seem to support the mov Wx, imm32 yet + movz w2, #0x40, lsl #16 + movk w2, #0x1 + mov w3, w2 + and w2, w1, w2 + cmp w2, w3 + b.ne LOCAL_LABEL(Done_CONTEXT_CONTROL) + + // save the cpsr + ldr x2, [sp, 24] + str w2, [x0, CONTEXT_Cpsr] + stp fp, lr, [x0, CONTEXT_Fp] + add sp, sp, #32 + mov x2, sp + stp x2, lr, [x0, CONTEXT_Sp] + sub sp, sp, #32 + +LOCAL_LABEL(Done_CONTEXT_CONTROL): + // we dont clobber x1 in the CONTEXT_CONTROL case + // ldr w1, [x0, CONTEXT_ContextFlags] + // clangs assembler doesn't seem to support the mov Wx, imm32 yet + movz w2, #0x40, lsl #16 + movk w2, #0x2 + mov w3, w2 + and w2, w1, w2 + cmp w2, w3 + b.ne LOCAL_LABEL(Done_CONTEXT_INTEGER) + + ldp x1, x2, [sp] + ldr x3, [sp, 16] + + stp x0, x1, [x0, CONTEXT_X0] + stp x2, x3, [x0, CONTEXT_X2] + stp x4, x5, [x0, CONTEXT_X4] + stp x6, x7, [x0, CONTEXT_X6] + stp x8, x9, [x0, CONTEXT_X8] + stp x10, x11, [x0, CONTEXT_X10] + stp x12, x13, [x0, CONTEXT_X12] + stp x14, x15, [x0, CONTEXT_X14] + stp x16, x17, [x0, CONTEXT_X16] + stp x18, x19, [x0, CONTEXT_X18] + stp x20, x21, [x0, CONTEXT_X20] + stp x22, x23, [x0, CONTEXT_X22] + stp x24, x25, [x0, CONTEXT_X24] + stp x26, x27, [x0, CONTEXT_X26] + str x28, [x0, CONTEXT_X28] + + +LOCAL_LABEL(Done_CONTEXT_INTEGER): + ldr w1, [x0, CONTEXT_ContextFlags] + // clangs assembler doesn't seem to support the mov Wx, imm32 yet + movz w2, #0x40, lsl #16 + movk w2, #0x4 + mov w3, w2 + and w2, w1, w2 + cmp w2, w3 + b.ne LOCAL_LABEL(Done_CONTEXT_FLOATING_POINT) + + add x0, x0, CONTEXT_NEON_OFFSET + stp q0, q1, [x0, CONTEXT_V0] + stp q2, q3, [x0, CONTEXT_V2] + stp q4, q5, [x0, CONTEXT_V4] + stp q6, q7, [x0, CONTEXT_V6] + stp q8, q9, [x0, CONTEXT_V8] + stp q10, q11, [x0, CONTEXT_V10] + stp q12, q13, [x0, CONTEXT_V12] + stp q14, q15, [x0, CONTEXT_V14] + stp q16, q17, [x0, CONTEXT_V16] + stp q18, q19, [x0, CONTEXT_V18] + stp q20, q21, [x0, CONTEXT_V20] + stp q22, q23, [x0, CONTEXT_V22] + stp q24, q25, [x0, CONTEXT_V24] + stp q26, q27, [x0, CONTEXT_V26] + stp q28, q29, [x0, CONTEXT_V28] + stp q30, q31, [x0, CONTEXT_V30] + add x0, x0, CONTEXT_FLOAT_CONTROL_OFFSET + mrs x1, fpcr + mrs x2, fpsr + sub x0, x0, CONTEXT_FLOAT_CONTROL_OFFSET + stp x1, x2, [x0, CONTEXT_Fpcr] + sub x0, x0, CONTEXT_NEON_OFFSET + +LOCAL_LABEL(Done_CONTEXT_FLOATING_POINT): + + add sp, sp, #32 + ret +LEAF_END CONTEXT_CaptureContext, _TEXT + +// Incoming: +// x0: Context* + +LEAF_ENTRY RtlCaptureContext, _TEXT + sub sp, sp, #16 + str x1, [sp] + // same as above, clang doesn't like mov with #imm32 + // keep this in sync if CONTEXT_FULL changes + movz w1, #0x40, lsl #16 + orr w1, w1, #0x1 + orr w1, w1, #0x2 + orr w1, w1, #0x4 + orr w1, w1, #0x8 + str w1, [x0, CONTEXT_ContextFlags] + ldr x1, [sp] + add sp, sp, #16 + b C_FUNC(CONTEXT_CaptureContext) +LEAF_END RtlCaptureContext, _TEXT + +// Incoming: +// x0: Context* +// x1: Exception* +// +LEAF_ENTRY RtlRestoreContext, _TEXT + // aarch64 specifies: + // IP0 and IP1, the Intra-Procedure Call temporary registers, + // are available for use by e.g. veneers or branch islands during a procedure call. + // They are otherwise corruptible. + // Since we cannot control $pc directly, we're going to corrupt x16 and x17 + // so that we can restore control + // since we potentially clobber x0 below, we'll bank it in x16 + mov x16, x0 + + ldr w2, [x16, CONTEXT_ContextFlags] + // clangs assembler doesn't seem to support the mov Wx, imm32 yet + movz w3, #0x40, lsl #16 + movk w3, #0x4 + mov w4, w3 + and w3, w2, w3 + cmp w3, w4 + b.ne LOCAL_LABEL(No_Restore_CONTEXT_FLOATING_POINT) + + add x16, x16, CONTEXT_NEON_OFFSET + ldp q0, q1, [x16, CONTEXT_V0] + ldp q2, q3, [x16, CONTEXT_V2] + ldp q4, q5, [x16, CONTEXT_V4] + ldp q6, q7, [x16, CONTEXT_V6] + ldp q8, q9, [x16, CONTEXT_V8] + ldp q10, q11, [x16, CONTEXT_V10] + ldp q12, q13, [x16, CONTEXT_V12] + ldp q14, q15, [x16, CONTEXT_V14] + ldp q16, q17, [x16, CONTEXT_V16] + ldp q18, q19, [x16, CONTEXT_V18] + ldp q20, q21, [x16, CONTEXT_V20] + ldp q22, q23, [x16, CONTEXT_V22] + ldp q24, q25, [x16, CONTEXT_V24] + ldp q26, q27, [x16, CONTEXT_V26] + ldp q28, q29, [x16, CONTEXT_V28] + ldp q30, q31, [x16, CONTEXT_V30] + ldp x1, x2, [x16, CONTEXT_Fpcr] + msr fpcr, x1 + msr fpsr, x2 + sub x16, x16, CONTEXT_NEON_OFFSET + +LOCAL_LABEL(No_Restore_CONTEXT_FLOATING_POINT): + movz w2, #0x40, lsl #16 + movk w2, #0x2 + mov w3, w2 + and w2, w1, w2 + cmp w2, w3 + b.ne LOCAL_LABEL(No_Restore_CONTEXT_INTEGER) + + ldp x0, x1, [x16, CONTEXT_X0] + ldp x2, x3, [x16, CONTEXT_X2] + ldp x4, x5, [x16, CONTEXT_X4] + ldp x6, x7, [x16, CONTEXT_X6] + ldp x8, x9, [x16, CONTEXT_X8] + ldp x10, x11, [x16, CONTEXT_X10] + ldp x12, x13, [x16, CONTEXT_X12] + ldp x14, x15, [x16, CONTEXT_X14] + ldp x18, x19, [x16, CONTEXT_X18] + ldp x20, x21, [x16, CONTEXT_X20] + ldp x22, x23, [x16, CONTEXT_X22] + ldp x24, x25, [x16, CONTEXT_X24] + ldp x26, x27, [x16, CONTEXT_X26] + ldr x28, [x16, CONTEXT_X28] + +LOCAL_LABEL(No_Restore_CONTEXT_INTEGER): + movz w2, #0x40, lsl #16 + movk w2, #0x2 + mov w3, w2 + and w2, w1, w2 + cmp w2, w3 + b.ne LOCAL_LABEL(No_Restore_CONTEXT_CONTROL) + + ldr w17, [x16, CONTEXT_Cpsr] + msr nzcv, x17 + ldp fp, lr, [x16, CONTEXT_Fp] + ldr x17, [x16, CONTEXT_Sp] + mov sp, x17 + ldr x17, [x16, CONTEXT_Pc] + br x17 + +LOCAL_LABEL(No_Restore_CONTEXT_CONTROL): + ret + +LEAF_END RtlRestoreContext, _TEXT diff --git a/src/pal/src/arch/arm64/debugbreak.S b/src/pal/src/arch/arm64/debugbreak.S new file mode 100644 index 0000000000..0dc5bb6bd3 --- /dev/null +++ b/src/pal/src/arch/arm64/debugbreak.S @@ -0,0 +1,11 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +#include "unixasmmacros.inc" + +LEAF_ENTRY DBG_DebugBreak, _TEXT + EMIT_BREAKPOINT + ret +LEAF_END_MARKED DBG_DebugBreak, _TEXT + diff --git a/src/pal/src/arch/arm64/exceptionhelper.S b/src/pal/src/arch/arm64/exceptionhelper.S new file mode 100644 index 0000000000..4fdcfc5eb1 --- /dev/null +++ b/src/pal/src/arch/arm64/exceptionhelper.S @@ -0,0 +1,9 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +#include "unixasmmacros.inc" + +LEAF_ENTRY ThrowExceptionFromContextInternal, _TEXT + EMIT_BREAKPOINT +LEAF_END ThrowExceptionFromContextInternal, _TEXT diff --git a/src/pal/src/arch/arm64/processor.cpp b/src/pal/src/arch/arm64/processor.cpp new file mode 100644 index 0000000000..6c7851a2b1 --- /dev/null +++ b/src/pal/src/arch/arm64/processor.cpp @@ -0,0 +1,41 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +/*++ + + + +Module Name: + + processor.cpp + +Abstract: + + Implementation of processor related functions for the ARM64 + platform. These functions are processor dependent. + + + +--*/ + +#include "pal/palinternal.h" + +/*++ +Function: +YieldProcessor + +The YieldProcessor function signals to the processor to give resources +to threads that are waiting for them. This macro is only effective on +processors that support technology allowing multiple threads running +on a single processor, such as Intel's Hyper-Threading technology. + +--*/ +void +PALAPI +YieldProcessor( + VOID) +{ + return; +} + diff --git a/src/pal/src/arch/i386/activationhandlerwrapper.S b/src/pal/src/arch/i386/activationhandlerwrapper.S new file mode 100644 index 0000000000..63f718e81f --- /dev/null +++ b/src/pal/src/arch/i386/activationhandlerwrapper.S @@ -0,0 +1,30 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +.intel_syntax noprefix +#include "unixasmmacros.inc" +#include "asmconstants.h" + +#ifdef BIT64 +// Offset of the return address from the ActivationHandler in the ActivationHandlerWrapper +.globl C_FUNC(ActivationHandlerReturnOffset) +C_FUNC(ActivationHandlerReturnOffset): + .int LOCAL_LABEL(ActivationHandlerReturn)-C_FUNC(ActivationHandlerWrapper) + +NESTED_ENTRY ActivationHandlerWrapper, _TEXT, NoHandler + push_nonvol_reg rbp + mov rbp, rsp + alloc_stack (CONTEXT_Size) + set_cfa_register rbp, (2*8) + mov rdi, rsp + int3 + call C_FUNC(ActivationHandler) +LOCAL_LABEL(ActivationHandlerReturn): + int3 + free_stack (CONTEXT_Size) + pop_nonvol_reg rbp + ret +NESTED_END ActivationHandlerWrapper, _TEXT + +#endif // BIT64 diff --git a/src/pal/src/arch/i386/asmconstants.h b/src/pal/src/arch/i386/asmconstants.h new file mode 100644 index 0000000000..182c1191e4 --- /dev/null +++ b/src/pal/src/arch/i386/asmconstants.h @@ -0,0 +1,106 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +#ifdef BIT64 + +#define CONTEXT_AMD64 0x100000 + +#define CONTEXT_CONTROL 1 // SegSs, Rsp, SegCs, Rip, and EFlags +#define CONTEXT_INTEGER 2 // Rax, Rcx, Rdx, Rbx, Rbp, Rsi, Rdi, R8-R15 +#define CONTEXT_SEGMENTS 4 // SegDs, SegEs, SegFs, SegGs +#define CONTEXT_FLOATING_POINT 8 +#define CONTEXT_DEBUG_REGISTERS 16 // Dr0-Dr3 and Dr6-Dr7 + +#define CONTEXT_FULL (CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_FLOATING_POINT) + +#define CONTEXT_XSTATE 64 + +#define CONTEXT_ContextFlags 6*8 +#define CONTEXT_SegCs CONTEXT_ContextFlags+8 +#define CONTEXT_SegDs CONTEXT_SegCs+2 +#define CONTEXT_SegEs CONTEXT_SegDs+2 +#define CONTEXT_SegFs CONTEXT_SegEs+2 +#define CONTEXT_SegGs CONTEXT_SegFs+2 +#define CONTEXT_SegSs CONTEXT_SegGs+2 +#define CONTEXT_EFlags CONTEXT_SegSs+2 +#define CONTEXT_Dr0 CONTEXT_EFlags+4 +#define CONTEXT_Dr1 CONTEXT_Dr0+8 +#define CONTEXT_Dr2 CONTEXT_Dr1+8 +#define CONTEXT_Dr3 CONTEXT_Dr2+8 +#define CONTEXT_Dr6 CONTEXT_Dr3+8 +#define CONTEXT_Dr7 CONTEXT_Dr6+8 +#define CONTEXT_Rax CONTEXT_Dr7+8 +#define CONTEXT_Rcx CONTEXT_Rax+8 +#define CONTEXT_Rdx CONTEXT_Rcx+8 +#define CONTEXT_Rbx CONTEXT_Rdx+8 +#define CONTEXT_Rsp CONTEXT_Rbx+8 +#define CONTEXT_Rbp CONTEXT_Rsp+8 +#define CONTEXT_Rsi CONTEXT_Rbp+8 +#define CONTEXT_Rdi CONTEXT_Rsi+8 +#define CONTEXT_R8 CONTEXT_Rdi+8 +#define CONTEXT_R9 CONTEXT_R8+8 +#define CONTEXT_R10 CONTEXT_R9+8 +#define CONTEXT_R11 CONTEXT_R10+8 +#define CONTEXT_R12 CONTEXT_R11+8 +#define CONTEXT_R13 CONTEXT_R12+8 +#define CONTEXT_R14 CONTEXT_R13+8 +#define CONTEXT_R15 CONTEXT_R14+8 +#define CONTEXT_Rip CONTEXT_R15+8 +#define CONTEXT_FltSave CONTEXT_Rip+8 +#define FLOATING_SAVE_AREA_SIZE 4*8+24*16+96 +#define CONTEXT_Xmm0 CONTEXT_FltSave+10*16 +#define CONTEXT_Xmm1 CONTEXT_Xmm0+16 +#define CONTEXT_Xmm2 CONTEXT_Xmm1+16 +#define CONTEXT_Xmm3 CONTEXT_Xmm2+16 +#define CONTEXT_Xmm4 CONTEXT_Xmm3+16 +#define CONTEXT_Xmm5 CONTEXT_Xmm4+16 +#define CONTEXT_Xmm6 CONTEXT_Xmm5+16 +#define CONTEXT_Xmm7 CONTEXT_Xmm6+16 +#define CONTEXT_Xmm8 CONTEXT_Xmm7+16 +#define CONTEXT_Xmm9 CONTEXT_Xmm8+16 +#define CONTEXT_Xmm10 CONTEXT_Xmm9+16 +#define CONTEXT_Xmm11 CONTEXT_Xmm10+16 +#define CONTEXT_Xmm12 CONTEXT_Xmm11+16 +#define CONTEXT_Xmm13 CONTEXT_Xmm12+16 +#define CONTEXT_Xmm14 CONTEXT_Xmm13+16 +#define CONTEXT_Xmm15 CONTEXT_Xmm14+16 +#define CONTEXT_VectorRegister CONTEXT_FltSave+FLOATING_SAVE_AREA_SIZE +#define CONTEXT_VectorControl CONTEXT_VectorRegister+16*26 +#define CONTEXT_DebugControl CONTEXT_VectorControl+8 +#define CONTEXT_LastBranchToRip CONTEXT_DebugControl+8 +#define CONTEXT_LastBranchFromRip CONTEXT_LastBranchToRip+8 +#define CONTEXT_LastExceptionToRip CONTEXT_LastBranchFromRip+8 +#define CONTEXT_LastExceptionFromRip CONTEXT_LastExceptionToRip+8 +#define CONTEXT_Size CONTEXT_LastExceptionFromRip+8 + +#else // BIT64 + +#define CONTEXT_ContextFlags 0 +#define CONTEXT_FLOATING_POINT 8 +#define CONTEXT_FloatSave 7*4 +#define FLOATING_SAVE_AREA_SIZE 8*4+80 +#define CONTEXT_Edi CONTEXT_FloatSave + FLOATING_SAVE_AREA_SIZE + 4*4 +#define CONTEXT_Esi CONTEXT_Edi+4 +#define CONTEXT_Ebx CONTEXT_Esi+4 +#define CONTEXT_Edx CONTEXT_Ebx+4 +#define CONTEXT_Ecx CONTEXT_Edx+4 +#define CONTEXT_Eax CONTEXT_Ecx+4 +#define CONTEXT_Ebp CONTEXT_Eax+4 +#define CONTEXT_Eip CONTEXT_Ebp+4 +#define CONTEXT_SegCs CONTEXT_Eip+4 +#define CONTEXT_EFlags CONTEXT_SegCs+4 +#define CONTEXT_Esp CONTEXT_EFlags+4 +#define CONTEXT_SegSs CONTEXT_Esp+4 +#define CONTEXT_EXTENDED_REGISTERS 32 +#define CONTEXT_ExtendedRegisters CONTEXT_SegSs+4 +#define CONTEXT_Xmm0 CONTEXT_ExtendedRegisters+160 +#define CONTEXT_Xmm1 CONTEXT_Xmm0+16 +#define CONTEXT_Xmm2 CONTEXT_Xmm1+16 +#define CONTEXT_Xmm3 CONTEXT_Xmm2+16 +#define CONTEXT_Xmm4 CONTEXT_Xmm3+16 +#define CONTEXT_Xmm5 CONTEXT_Xmm4+16 +#define CONTEXT_Xmm6 CONTEXT_Xmm5+16 +#define CONTEXT_Xmm7 CONTEXT_Xmm6+16 + +#endif // BIT64 diff --git a/src/pal/src/arch/i386/context.S b/src/pal/src/arch/i386/context.S new file mode 100644 index 0000000000..f8a2dca89c --- /dev/null +++ b/src/pal/src/arch/i386/context.S @@ -0,0 +1,21 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +#if defined(_DEBUG) + .text + .globl _DBG_CheckStackAlignment + +_DBG_CheckStackAlignment: + // Prolog - at this point we are at aligned - 8 (for the call) + pushq %rbp // aligned -16 + movq %rsp, %rbp + + testl $0xf,%esp // can get away with esp even on AMD64. + jz .+3 + int3 + + // Epilog + popq %rbp + ret +#endif diff --git a/src/pal/src/arch/i386/context2.S b/src/pal/src/arch/i386/context2.S new file mode 100644 index 0000000000..0e93e81a55 --- /dev/null +++ b/src/pal/src/arch/i386/context2.S @@ -0,0 +1,259 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. +// +// Implementation of _CONTEXT_CaptureContext for the Intel x86 platform. +// This function is processor dependent. It is used by exception handling, +// and is always apply to the current thread. +// + +.intel_syntax noprefix +#include "unixasmmacros.inc" +#include "asmconstants.h" + +#ifdef BIT64 + +#define IRETFRAME_Rip 0 +#define IRETFRAME_SegCs IRETFRAME_Rip+8 +#define IRETFRAME_EFlags IRETFRAME_SegCs+8 +#define IRETFRAME_Rsp IRETFRAME_EFlags+8 +#define IRETFRAME_SegSs IRETFRAME_Rsp+8 +#define IRetFrameLength IRETFRAME_SegSs+8 +#define IRetFrameLengthAligned 16*((IRetFrameLength+8)/16) + +// Incoming: +// RDI: Context* +// +LEAF_ENTRY CONTEXT_CaptureContext, _TEXT + // Save processor flags before calling any of the following 'test' instructions + // because they will modify state of some flags + push_eflags + END_PROLOGUE + + test BYTE PTR [rdi + CONTEXT_ContextFlags], CONTEXT_INTEGER + je LOCAL_LABEL(Done_CONTEXT_INTEGER) + mov [rdi + CONTEXT_Rdi], rdi + mov [rdi + CONTEXT_Rsi], rsi + mov [rdi + CONTEXT_Rbx], rbx + mov [rdi + CONTEXT_Rdx], rdx + mov [rdi + CONTEXT_Rcx], rcx + mov [rdi + CONTEXT_Rax], rax + mov [rdi + CONTEXT_Rbp], rbp + mov [rdi + CONTEXT_R8], r8 + mov [rdi + CONTEXT_R9], r9 + mov [rdi + CONTEXT_R10], r10 + mov [rdi + CONTEXT_R11], r11 + mov [rdi + CONTEXT_R12], r12 + mov [rdi + CONTEXT_R13], r13 + mov [rdi + CONTEXT_R14], r14 + mov [rdi + CONTEXT_R15], r15 +LOCAL_LABEL(Done_CONTEXT_INTEGER): + + test BYTE PTR [rdi + CONTEXT_ContextFlags], CONTEXT_CONTROL + je LOCAL_LABEL(Done_CONTEXT_CONTROL) + + // Return address is @ (RSP + 8) + mov rdx, [rsp + 8] + mov [rdi + CONTEXT_Rip], rdx +.att_syntax + mov %cs, CONTEXT_SegCs(%rdi) +.intel_syntax noprefix + // Get the value of EFlags that was pushed on stack at the beginning of the function + mov rdx, [rsp] + mov [rdi + CONTEXT_EFlags], edx + lea rdx, [rsp + 16] + mov [rdi + CONTEXT_Rsp], rdx +.att_syntax + mov %ss, CONTEXT_SegSs(%rdi) +.intel_syntax noprefix +LOCAL_LABEL(Done_CONTEXT_CONTROL): + + // Need to double check this is producing the right result + // also that FFSXR (fast save/restore) is not turned on + // otherwise it omits the xmm registers. + test BYTE PTR [rdi + CONTEXT_ContextFlags], CONTEXT_FLOATING_POINT + je LOCAL_LABEL(Done_CONTEXT_FLOATING_POINT) + fxsave [rdi + CONTEXT_FltSave] +LOCAL_LABEL(Done_CONTEXT_FLOATING_POINT): + + test BYTE PTR [rdi + CONTEXT_ContextFlags], CONTEXT_DEBUG_REGISTERS + je LOCAL_LABEL(Done_CONTEXT_DEBUG_REGISTERS) + mov rdx, dr0 + mov [rdi + CONTEXT_Dr0], rdx + mov rdx, dr1 + mov [rdi + CONTEXT_Dr1], rdx + mov rdx, dr2 + mov [rdi + CONTEXT_Dr2], rdx + mov rdx, dr3 + mov [rdi + CONTEXT_Dr3], rdx + mov rdx, dr6 + mov [rdi + CONTEXT_Dr6], rdx + mov rdx, dr7 + mov [rdi + CONTEXT_Dr7], rdx +LOCAL_LABEL(Done_CONTEXT_DEBUG_REGISTERS): + + free_stack 8 + ret +LEAF_END CONTEXT_CaptureContext, _TEXT + +LEAF_ENTRY RtlCaptureContext, _TEXT + mov DWORD PTR [rdi + CONTEXT_ContextFlags], (CONTEXT_AMD64 | CONTEXT_FULL | CONTEXT_SEGMENTS) + jmp C_FUNC(CONTEXT_CaptureContext) +LEAF_END RtlCaptureContext, _TEXT + +LEAF_ENTRY RtlRestoreContext, _TEXT + push_nonvol_reg rbp + alloc_stack (IRetFrameLengthAligned) + + test BYTE PTR [rdi + CONTEXT_ContextFlags], CONTEXT_DEBUG_REGISTERS + je LOCAL_LABEL(Done_Restore_CONTEXT_DEBUG_REGISTERS) + mov rdx, [rdi + CONTEXT_Dr0] + mov dr0, rdx + mov rdx, [rdi + CONTEXT_Dr1] + mov dr1, rdx + mov rdx, [rdi + CONTEXT_Dr2] + mov dr2, rdx + mov rdx, [rdi + CONTEXT_Dr3] + mov dr3, rdx + mov rdx, [rdi + CONTEXT_Dr6] + mov dr6, rdx + mov rdx, [rdi + CONTEXT_Dr7] + mov dr7, rdx +LOCAL_LABEL(Done_Restore_CONTEXT_DEBUG_REGISTERS): + + test BYTE PTR [rdi + CONTEXT_ContextFlags], CONTEXT_FLOATING_POINT + je LOCAL_LABEL(Done_Restore_CONTEXT_FLOATING_POINT) + fxrstor [rdi + CONTEXT_FltSave] +LOCAL_LABEL(Done_Restore_CONTEXT_FLOATING_POINT): + + test BYTE PTR [rdi + CONTEXT_ContextFlags], CONTEXT_XSTATE + je LOCAL_LABEL(Done_Restore_CONTEXT_XSTATE) + + // Restore the extended state (for now, this is just the upper halves of YMM registers) + vinsertf128 ymm0, ymm0, xmmword ptr [rdi + (CONTEXT_VectorRegister + 0 * 16)], 1 + vinsertf128 ymm1, ymm1, xmmword ptr [rdi + (CONTEXT_VectorRegister + 1 * 16)], 1 + vinsertf128 ymm2, ymm2, xmmword ptr [rdi + (CONTEXT_VectorRegister + 2 * 16)], 1 + vinsertf128 ymm3, ymm3, xmmword ptr [rdi + (CONTEXT_VectorRegister + 3 * 16)], 1 + vinsertf128 ymm4, ymm4, xmmword ptr [rdi + (CONTEXT_VectorRegister + 4 * 16)], 1 + vinsertf128 ymm5, ymm5, xmmword ptr [rdi + (CONTEXT_VectorRegister + 5 * 16)], 1 + vinsertf128 ymm6, ymm6, xmmword ptr [rdi + (CONTEXT_VectorRegister + 6 * 16)], 1 + vinsertf128 ymm7, ymm7, xmmword ptr [rdi + (CONTEXT_VectorRegister + 7 * 16)], 1 + vinsertf128 ymm8, ymm8, xmmword ptr [rdi + (CONTEXT_VectorRegister + 8 * 16)], 1 + vinsertf128 ymm9, ymm9, xmmword ptr [rdi + (CONTEXT_VectorRegister + 9 * 16)], 1 + vinsertf128 ymm10, ymm10, xmmword ptr [rdi + (CONTEXT_VectorRegister + 10 * 16)], 1 + vinsertf128 ymm11, ymm11, xmmword ptr [rdi + (CONTEXT_VectorRegister + 11 * 16)], 1 + vinsertf128 ymm12, ymm12, xmmword ptr [rdi + (CONTEXT_VectorRegister + 12 * 16)], 1 + vinsertf128 ymm13, ymm13, xmmword ptr [rdi + (CONTEXT_VectorRegister + 13 * 16)], 1 + vinsertf128 ymm14, ymm14, xmmword ptr [rdi + (CONTEXT_VectorRegister + 14 * 16)], 1 + vinsertf128 ymm15, ymm15, xmmword ptr [rdi + (CONTEXT_VectorRegister + 15 * 16)], 1 +LOCAL_LABEL(Done_Restore_CONTEXT_XSTATE): + + test BYTE PTR [rdi + CONTEXT_ContextFlags], CONTEXT_CONTROL + je LOCAL_LABEL(Done_Restore_CONTEXT_CONTROL) + + // The control registers are restored via the iret instruction + // so we build the frame for the iret on the stack. +#ifdef __APPLE__ +.att_syntax + // On OSX, we cannot read SS via the thread_get_context and RtlRestoreContext + // needs to be used on context extracted by thread_get_context. So we + // don't change the SS. + mov %ss, %ax +.intel_syntax noprefix +#else + mov ax, [rdi + CONTEXT_SegSs] +#endif + mov [rsp + IRETFRAME_SegSs], ax + mov rax, [rdi + CONTEXT_Rsp] + mov [rsp + IRETFRAME_Rsp], rax + mov eax, [rdi + CONTEXT_EFlags] + mov [rsp + IRETFRAME_EFlags], eax + mov ax, [rdi + CONTEXT_SegCs] + mov [rsp + IRETFRAME_SegCs], ax + mov rax, [rdi + CONTEXT_Rip] + mov [rsp + IRETFRAME_Rip], rax + +LOCAL_LABEL(Done_Restore_CONTEXT_CONTROL): + // Remember the result of the test for the CONTEXT_CONTROL + push_eflags + test BYTE PTR [rdi + CONTEXT_ContextFlags], CONTEXT_INTEGER + je LOCAL_LABEL(Done_Restore_CONTEXT_INTEGER) + mov rsi, [rdi + CONTEXT_Rsi] + mov rbx, [rdi + CONTEXT_Rbx] + mov rdx, [rdi + CONTEXT_Rdx] + mov rcx, [rdi + CONTEXT_Rcx] + mov rax, [rdi + CONTEXT_Rax] + mov rbp, [rdi + CONTEXT_Rbp] + mov r8, [rdi + CONTEXT_R8] + mov r9, [rdi + CONTEXT_R9] + mov r10, [rdi + CONTEXT_R10] + mov r11, [rdi + CONTEXT_R11] + mov r12, [rdi + CONTEXT_R12] + mov r13, [rdi + CONTEXT_R13] + mov r14, [rdi + CONTEXT_R14] + mov r15, [rdi + CONTEXT_R15] + mov rdi, [rdi + CONTEXT_Rdi] +LOCAL_LABEL(Done_Restore_CONTEXT_INTEGER): + + // Restore the result of the test for the CONTEXT_CONTROL + pop_eflags + je LOCAL_LABEL(No_Restore_CONTEXT_CONTROL) + // The function was asked to restore the control registers, so + // we perform iretq that restores them all. + // We don't return to the caller in this case. + iretq +LOCAL_LABEL(No_Restore_CONTEXT_CONTROL): + + // The function was not asked to restore the control registers + // so we return back to the caller. + free_stack (IRetFrameLengthAligned) + pop_nonvol_reg rbp + ret +LEAF_END RtlRestoreContext, _TEXT + +#else + + .globl C_FUNC(CONTEXT_CaptureContext) +C_FUNC(CONTEXT_CaptureContext): + push %eax + mov 8(%esp), %eax + mov %edi, CONTEXT_Edi(%eax) + mov %esi, CONTEXT_Esi(%eax) + mov %ebx, CONTEXT_Ebx(%eax) + mov %edx, CONTEXT_Edx(%eax) + mov %ecx, CONTEXT_Ecx(%eax) + pop %ecx + mov %ecx, CONTEXT_Eax(%eax) + mov %ebp, CONTEXT_Ebp(%eax) + mov (%esp), %edx + mov %edx, CONTEXT_Eip(%eax) + push %cs + pop %edx + mov %edx, CONTEXT_SegCs(%eax) + pushf + pop %edx + mov %edx, CONTEXT_EFlags(%eax) + lea 4(%esp), %edx + mov %edx, CONTEXT_Esp(%eax) + push %ss + pop %edx + mov %edx, CONTEXT_SegSs(%eax) + testb $CONTEXT_FLOATING_POINT, CONTEXT_ContextFlags(%eax) + je 0f + fnsave CONTEXT_FloatSave(%eax) + frstor CONTEXT_FloatSave(%eax) +0: + testb $CONTEXT_EXTENDED_REGISTERS, CONTEXT_ContextFlags(%eax) + je 2f + movdqu %xmm0, CONTEXT_Xmm0(%eax) + movdqu %xmm1, CONTEXT_Xmm1(%eax) + movdqu %xmm2, CONTEXT_Xmm2(%eax) + movdqu %xmm3, CONTEXT_Xmm3(%eax) + movdqu %xmm4, CONTEXT_Xmm4(%eax) + movdqu %xmm5, CONTEXT_Xmm5(%eax) + movdqu %xmm6, CONTEXT_Xmm6(%eax) + movdqu %xmm7, CONTEXT_Xmm7(%eax) +2: + ret + +#endif diff --git a/src/pal/src/arch/i386/debugbreak.S b/src/pal/src/arch/i386/debugbreak.S new file mode 100644 index 0000000000..3065e4064c --- /dev/null +++ b/src/pal/src/arch/i386/debugbreak.S @@ -0,0 +1,12 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +.intel_syntax noprefix +#include "unixasmmacros.inc" + +LEAF_ENTRY DBG_DebugBreak, _TEXT + int3 + ret +LEAF_END_MARKED DBG_DebugBreak, _TEXT + diff --git a/src/pal/src/arch/i386/dispatchexceptionwrapper.S b/src/pal/src/arch/i386/dispatchexceptionwrapper.S new file mode 100644 index 0000000000..ee5ff468d6 --- /dev/null +++ b/src/pal/src/arch/i386/dispatchexceptionwrapper.S @@ -0,0 +1,51 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +// ==++== +// + +// ==--== +// +// Implementation of the PAL_DispatchExceptionWrapper that is +// interposed between a function that caused a hardware fault +// and PAL_DispatchException that throws an SEH exception for +// the fault, to make the stack unwindable. +// + +.intel_syntax noprefix +#include "unixasmmacros.inc" + +// Offset of the return address from the PAL_DispatchException in the PAL_DispatchExceptionWrapper +.globl C_FUNC(PAL_DispatchExceptionReturnOffset) +C_FUNC(PAL_DispatchExceptionReturnOffset): + .int LOCAL_LABEL(PAL_DispatchExceptionReturn) - C_FUNC(PAL_DispatchExceptionWrapper) + +// +// PAL_DispatchExceptionWrapper will never be called; it only serves +// to be referenced from a stack frame on the faulting thread. Its +// unwinding behavior is equivalent to any standard function having +// an ebp frame. It is analogous to the following source file. +// +// extern "C" void PAL_DispatchException(CONTEXT *pContext, EXCEPTION_RECORD *pExceptionRecord, MachExceptionInfo *pMachExceptionInfo); +// +// extern "C" void PAL_DispatchExceptionWrapper() +// { +// CONTEXT Context; +// EXCEPTION_RECORD ExceptionRecord; +// MachExceptionInfo MachExceptionInfo; +// PAL_DispatchException(&Context, &ExceptionRecord, &MachExceptionInfo); +// } +// + +NESTED_ENTRY PAL_DispatchExceptionWrapper, _TEXT, NoHandler + push_nonvol_reg rbp + mov rbp, rsp + set_cfa_register rbp, (2*8) + int3 + call C_FUNC(PAL_DispatchException) +LOCAL_LABEL(PAL_DispatchExceptionReturn): + int3 + pop_nonvol_reg rbp + ret +NESTED_END PAL_DispatchExceptionWrapper, _TEXT diff --git a/src/pal/src/arch/i386/exceptionhelper.S b/src/pal/src/arch/i386/exceptionhelper.S new file mode 100644 index 0000000000..b7b34ace41 --- /dev/null +++ b/src/pal/src/arch/i386/exceptionhelper.S @@ -0,0 +1,42 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +.intel_syntax noprefix +#include "unixasmmacros.inc" +#include "asmconstants.h" + +////////////////////////////////////////////////////////////////////////// +// +// This function creates a stack frame right below the target frame, restores all callee +// saved registers from the passed in context, sets the RSP to that frame and sets the +// return address to the target frame's RIP. +// Then it uses the ThrowExceptionHelper to throw the passed in exception from that context. +// EXTERN_C void ThrowExceptionFromContextInternal(CONTEXT* context, PAL_SEHException* ex); +LEAF_ENTRY ThrowExceptionFromContextInternal, _TEXT + // Save the RBP to the stack so that the unwind can work at the instruction after + // loading the RBP from the context, but before loading the RSP from the context. + push_nonvol_reg rbp + mov r12, [rdi + CONTEXT_R12] + mov r13, [rdi + CONTEXT_R13] + mov r14, [rdi + CONTEXT_R14] + mov r15, [rdi + CONTEXT_R15] + mov rbx, [rdi + CONTEXT_Rbx] + mov rbp, [rdi + CONTEXT_Rbp] + mov rsp, [rdi + CONTEXT_Rsp] + // The RSP was set to the target frame's value, so the current function's + // CFA is now right at the RSP. + .cfi_def_cfa_offset 0 + + // Indicate that now that we have moved the RSP to the target address, + // the RBP is no longer saved in the current stack frame. + .cfi_restore rbp + + mov rax, [rdi + CONTEXT_Rip] + + // Store return address to the stack + push_register rax + // The PAL_SEHException pointer + mov rdi, rsi + jmp EXTERNAL_C_FUNC(ThrowExceptionHelper) +LEAF_END ThrowExceptionFromContextInternal, _TEXT diff --git a/src/pal/src/arch/i386/optimizedtls.cpp b/src/pal/src/arch/i386/optimizedtls.cpp new file mode 100644 index 0000000000..910a6eb931 --- /dev/null +++ b/src/pal/src/arch/i386/optimizedtls.cpp @@ -0,0 +1,237 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +/*++ + + + +Module Name: + + optimizedtls.cpp + +Abstract: + + Implementation of platform-specific Thread local storage functions. + + + +--*/ + +#include "pal/thread.hpp" +#include "pal/malloc.hpp" + +#include <pthread.h> + +#include "pal/dbgmsg.h" +#include "pal/misc.h" +#include "pal/debug.h" + +#include <stddef.h> + +using namespace CorUnix; + +SET_DEFAULT_DEBUG_CHANNEL(THREAD); + +#if defined(USE_OPTIMIZEDTLSGETTER) + +#define PAL_safe_offsetof(s,m) ((size_t)((ptrdiff_t)&(char&)(((s *)64)->m))-64) + +/*++ +Function: + CorUnix::TLSMakeOptimizedGetter + + Creates a platform-optimized version of TlsGetValue compiled + for a particular index. + + Generates the hot part of CorUnix::InternalGetCurrentThread + as a chunk of highly optimized machine-specific code at runtime. + + Check the difference between CorUnix::InternalGetCurrentThread and + CorUnix::InternalGetCurrentThreadSlow to see the C/C++ code that matches + the code generated by this function. +--*/ +PAL_POPTIMIZEDTLSGETTER +CorUnix::TLSMakeOptimizedGetter( + IN CPalThread* pThread, + IN DWORD dwTlsIndex) +{ +#ifdef BIT64 +#pragma unused(pThread, dwTlsIndex) + ERROR("TLSMakeOptimizedGetter not rewritten for amd64 yet."); + return NULL; +#else + PAL_POPTIMIZEDTLSGETTER Ret = NULL; + BYTE* p; + int i = 0; + +#ifdef __APPLE__ +#define TLS_OPTIMIZED_GETTER_SIZE 118 +#else +#define TLS_OPTIMIZED_GETTER_SIZE 115 +#endif + + p = (BYTE*)InternalMalloc(pThread, TLS_OPTIMIZED_GETTER_SIZE * sizeof(BYTE)); + + if (p == NULL) + { + return Ret; + } + + // Need to preserve %ecx, %edx, and %esi registers as specified in + // GetThreadGeneric(void) in vm/i386/asmhelpers.s + p[i++] = 0x51; // push %ecx + p[i++] = 0x52; // push %edx + p[i++] = 0x89; // mov %esp,%eax // %eax = sp; + p[i++] = 0xe0; + p[i++] = 0xc1; // shr $0x11,%eax // sp >> 17; + p[i++] = 0xe8; + p[i++] = 0x11; + p[i++] = 0x89; // mov %eax,%edx // key = sp >> 17; + p[i++] = 0xc2; + p[i++] = 0xc1; // sar $0x7,%edx // key >> 7; + p[i++] = 0xfa; + p[i++] = 0x07; + p[i++] = 0x29; // sub %edx,%eax // key -= key >> 7; + p[i++] = 0xd0; + p[i++] = 0x89; // mov %eax,%edx + p[i++] = 0xc2; + p[i++] = 0xc1; // sar $0x5,%edx // key >> 5; + p[i++] = 0xfa; + p[i++] = 0x05; + p[i++] = 0x29; // sub %edx,%eax // key -= key >> 5; + p[i++] = 0xd0; + p[i++] = 0x89; // mov %eax,%edx + p[i++] = 0xc2; + p[i++] = 0xc1; // sar $0x3,%edx // key >> 3; + p[i++] = 0xfa; + p[i++] = 0x03; + p[i++] = 0x29; // sub %edx,%eax // key -= key >> 3; + p[i++] = 0xd0; + p[i++] = 0x25; // and $0xff,%eax // key &= 0xFF; + p[i++] = 0xff; + p[i++] = 0x00; + p[i++] = 0x00; + p[i++] = 0x00; + p[i++] = 0x8b; // mov (flush_counter),%ecx // %ecx = counter = flush_counter; + p[i++] = 0x0d; + *((DWORD*) &p[i]) = (DWORD)&flush_counter; + i += sizeof(DWORD); + p[i++] = 0x8b; // mov (thread_hints,%eax,4),%eax // %edx = pThread = thread_hints[key]; + p[i++] = 0x14; + p[i++] = 0x85; + *((DWORD*) &p[i]) = (DWORD)&thread_hints; + i += sizeof(DWORD); + p[i++] = 0x39; // cmp %esp,offsetof(CPalThread,tlsInfo)+offsetof(CThreadTLSInfo,minStack)(%edx) + // if ((size_t)pThread->tlsInfo.minStack <= sp) + p[i++] = 0xa2; + *((DWORD*) &p[i]) = (DWORD)(PAL_safe_offsetof(CPalThread,tlsInfo)+PAL_safe_offsetof(CThreadTLSInfo,minStack)); + i += sizeof(DWORD); + p[i++] = 0x77; // ja CallInternalGetCurrentThreadSlow: + p[i++] = 0x19; + p[i++] = 0x3b; // cmp offsetof(CPalThread,tlsInfo)+offsetof(CThreadTLSInfo,maxStack)(%edx),%esp + // if (sp < (size_t)pThread->tlsInfo.maxStack) + p[i++] = 0xa2; + *((DWORD*) &p[i]) = (DWORD)(PAL_safe_offsetof(CPalThread,tlsInfo)+PAL_safe_offsetof(CThreadTLSInfo,maxStack)); + i += sizeof(DWORD); + p[i++] = 0x73; // jae CallInternalGetCurrentThreadSlow: + p[i++] = 0x11; + p[i++] = 0x39; // cmp (flush_counter),%ecx // if (counter == flush_counter) + p[i++] = 0x0d; + *((DWORD*) &p[i]) = (DWORD)&flush_counter; + i += sizeof(DWORD); + p[i++] = 0x75; // jne CallInternalGetCurrentThreadSlow: + p[i++] = 0x09; + if (dwTlsIndex != THREAD_OBJECT_TLS_INDEX) + { + p[i++] = 0x8b; // mov offsetof(pThread->tlsSlots[dwTlsIndex])(%edx),%eax // %eax = pThread->tlsSlots[dwTlsIndex]; + p[i++] = 0x82; + *((DWORD*) &p[i]) = (DWORD)(PAL_safe_offsetof(CPalThread,tlsInfo)+PAL_safe_offsetof(CThreadTLSInfo,tlsSlots[dwTlsIndex])); + i += sizeof(DWORD); + } + else + { + p[i++] = 0x89; // mov %edx,%eax // %eax = pThread; + p[i++] = 0xd0; + p[i++] = 0x90; // nop + p[i++] = 0x90; // nop + p[i++] = 0x90; // nop + p[i++] = 0x90; // nop + } + p[i++] = 0x5a; // pop %edx + p[i++] = 0x59; // pop %ecx + p[i++] = 0xc3; // ret + // CallInternalGetCurrentThreadSlow: + p[i++] = 0x5a; // pop %edx + p[i++] = 0x59; // pop %ecx + p[i++] = 0x8d; // lea (thread_hints,%eax,4),%eax // %eax = &thread_hints[key]; + p[i++] = 0x04; + p[i++] = 0x85; + *((DWORD*) &p[i]) = (DWORD)&thread_hints; + i += sizeof(DWORD); + p[i++] = 0x55; // push %ebp + p[i++] = 0x89; // mov %esp,%ebp + p[i++] = 0xe5; + p[i++] = 0x51; // push %ecx + p[i++] = 0x89; // mov %esp,%ecx // this is the reference esp - need to match the reference esp used in the fast path. + p[i++] = 0xe1; + p[i++] = 0x52; // push %edx +#ifdef __APPLE__ + // establish 16-byte stack alignment + p[i++] = 0x83; // subl $8,%esp + p[i++] = 0xec; + p[i++] = 0x08; +#endif + p[i++] = 0x50; // push %eax // store &thread_hints[key] on stack as 2nd argument; + p[i++] = 0x51; // push %ecx // reference esp - The 1st argument for call to InternalGetCurrentThreadSlow. + p[i++] = 0xe8; // call InternalGetCurrentThreadSlow + *((DWORD*) &p[i]) = (DWORD)&InternalGetCurrentThreadSlow - (DWORD)(&p[i+sizeof(DWORD)]); + i += sizeof(DWORD); +#ifdef __APPLE__ + p[i++] = 0x83; // addl $16,%esp + p[i++] = 0xc4; + p[i++] = 0x10; +#else + p[i++] = 0x83; // addl $8,%esp + p[i++] = 0xc4; + p[i++] = 0x08; +#endif + if (dwTlsIndex != THREAD_OBJECT_TLS_INDEX) + { + p[i++] = 0x8b; // mov offsetof(pThread->tlsSlots[dwTlsIndex])(%eax),%eax // %eax = pThread->tlsSlots[dwTlsIndex]; + p[i++] = 0x80; + *((DWORD*) &p[i]) = (DWORD)(PAL_safe_offsetof(CPalThread,tlsInfo)+PAL_safe_offsetof(CThreadTLSInfo,tlsSlots[dwTlsIndex])); + i += sizeof(DWORD); + } + p[i++] = 0x5a; // pop %edx + p[i++] = 0x59; // pop %ecx + p[i++] = 0xc9; // leave + p[i++] = 0xc3; // ret + + if (i > TLS_OPTIMIZED_GETTER_SIZE) + { + ASSERT("Invalid TLS_OPTIMIZED_GETTER_SIZE %d\n", i); + } + + DBG_FlushInstructionCache(p, TLS_OPTIMIZED_GETTER_SIZE * sizeof(BYTE)); + + Ret = (PAL_POPTIMIZEDTLSGETTER)p; + + return Ret; +#endif // BIT64 else +} + +/*++ +Function: + TLSFreeOptimizedGetter + + Frees a function created by MakeOptimizedTlsGetter(). +--*/ +VOID +CorUnix::TLSFreeOptimizedGetter( + IN PAL_POPTIMIZEDTLSGETTER pOptimizedTlsGetter) +{ + InternalFree(InternalGetCurrentThread(), (void *)pOptimizedTlsGetter); +} + +#endif // USE_OPTIMIZEDTLSGETTER diff --git a/src/pal/src/arch/i386/processor.cpp b/src/pal/src/arch/i386/processor.cpp new file mode 100644 index 0000000000..4fd3a4abc8 --- /dev/null +++ b/src/pal/src/arch/i386/processor.cpp @@ -0,0 +1,44 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +/*++ + + + +Module Name: + + processor.cpp + +Abstract: + + Implementation of processor related functions for the Intel x86/x64 + platforms. These functions are processor dependent. + + + +--*/ + +#include "pal/palinternal.h" + +/*++ +Function: +YieldProcessor + +The YieldProcessor function signals to the processor to give resources +to threads that are waiting for them. This macro is only effective on +processors that support technology allowing multiple threads running +on a single processor, such as Intel's Hyper-Threading technology. + +--*/ +void +PALAPI +YieldProcessor( + VOID) +{ + __asm__ __volatile__ ( + "rep\n" + "nop" + ); +} + |