summaryrefslogtreecommitdiff
path: root/src/vm/arm64
diff options
context:
space:
mode:
authorCarol Eidt <carol.eidt@microsoft.com>2019-02-01 15:35:28 -0800
committerGitHub <noreply@github.com>2019-02-01 15:35:28 -0800
commitf10d5bec97e722c5ed54a1ce5f357d99dea60ff7 (patch)
tree6c99ac3c4a7d6c625b0473936dc2c511e85066d3 /src/vm/arm64
parent7ca73bcfa0ce072617b068393b7ed66919097bc1 (diff)
downloadcoreclr-f10d5bec97e722c5ed54a1ce5f357d99dea60ff7.tar.gz
coreclr-f10d5bec97e722c5ed54a1ce5f357d99dea60ff7.tar.bz2
coreclr-f10d5bec97e722c5ed54a1ce5f357d99dea60ff7.zip
Preserve Vector Arg registers on Arm64 (#22257)
* Preserve Vector Arg registers on Arm64 Fix #14371
Diffstat (limited to 'src/vm/arm64')
-rw-r--r--src/vm/arm64/CallDescrWorkerARM64.asm8
-rw-r--r--src/vm/arm64/asmconstants.h3
-rw-r--r--src/vm/arm64/asmhelpers.S24
-rw-r--r--src/vm/arm64/asmhelpers.asm29
-rw-r--r--src/vm/arm64/asmmacros.h18
-rw-r--r--src/vm/arm64/calldescrworkerarm64.S8
-rw-r--r--src/vm/arm64/cgencpu.h12
7 files changed, 53 insertions, 49 deletions
diff --git a/src/vm/arm64/CallDescrWorkerARM64.asm b/src/vm/arm64/CallDescrWorkerARM64.asm
index 65c7db6f3f..fe277ceb62 100644
--- a/src/vm/arm64/CallDescrWorkerARM64.asm
+++ b/src/vm/arm64/CallDescrWorkerARM64.asm
@@ -56,10 +56,10 @@ Ldonestack
;; given in x9.
ldr x9, [x19,#CallDescrData__pFloatArgumentRegisters]
cbz x9, LNoFloatingPoint
- ldp d0, d1, [x9]
- ldp d2, d3, [x9, #16]
- ldp d4, d5, [x9, #32]
- ldp d6, d7, [x9, #48]
+ ldp q0, q1, [x9]
+ ldp q2, q3, [x9, #32]
+ ldp q4, q5, [x9, #64]
+ ldp q6, q7, [x9, #96]
LNoFloatingPoint
;; Copy [pArgumentRegisters, ..., pArgumentRegisters + 56]
diff --git a/src/vm/arm64/asmconstants.h b/src/vm/arm64/asmconstants.h
index 7d0a9f734b..1acc1b46d7 100644
--- a/src/vm/arm64/asmconstants.h
+++ b/src/vm/arm64/asmconstants.h
@@ -58,7 +58,8 @@ ASMCONSTANTS_C_ASSERT(AppDomain__m_dwId == offsetof(AppDomain, m_dwId));
#define SIZEOF__ArgumentRegisters 0x40
ASMCONSTANTS_C_ASSERT(SIZEOF__ArgumentRegisters == sizeof(ArgumentRegisters))
-#define SIZEOF__FloatArgumentRegisters 0x40
+// There are 8 128-bit registers in FloatArgumentRegisters
+#define SIZEOF__FloatArgumentRegisters 0x80
ASMCONSTANTS_C_ASSERT(SIZEOF__FloatArgumentRegisters == sizeof(FloatArgumentRegisters))
#define CallDescrData__pSrc 0x00
diff --git a/src/vm/arm64/asmhelpers.S b/src/vm/arm64/asmhelpers.S
index c0baa92456..ed48d66e58 100644
--- a/src/vm/arm64/asmhelpers.S
+++ b/src/vm/arm64/asmhelpers.S
@@ -121,18 +121,18 @@ LEAF_END HelperMethodFrameRestoreState, _TEXT
// The call in ndirect import precode points to this function.
NESTED_ENTRY NDirectImportThunk, _TEXT, NoHandler
- PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, -160
+ PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, -224
SAVE_ARGUMENT_REGISTERS sp, 16
- SAVE_FLOAT_ARGUMENT_REGISTERS sp, 88
+ SAVE_FLOAT_ARGUMENT_REGISTERS sp, 96
mov x0, x12
bl NDirectImportWorker
mov x12, x0
// pop the stack and restore original register state
- RESTORE_FLOAT_ARGUMENT_REGISTERS sp, 88
+ RESTORE_FLOAT_ARGUMENT_REGISTERS sp, 96
RESTORE_ARGUMENT_REGISTERS sp, 16
- EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 160
+ EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 224
// If we got back from NDirectImportWorker, the MD has been successfully
// linked. Proceed to execute the original DLL call.
@@ -493,9 +493,9 @@ WRITE_BARRIER_END JIT_WriteBarrier
NESTED_ENTRY VirtualMethodFixupStub, _TEXT, NoHandler
// Save arguments and return address
- PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, -160
+ PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, -224
SAVE_ARGUMENT_REGISTERS sp, 16
- SAVE_FLOAT_ARGUMENT_REGISTERS sp, 88
+ SAVE_FLOAT_ARGUMENT_REGISTERS sp, 96
// Refer to ZapImportVirtualThunk::Save
// for details on this.
@@ -512,8 +512,8 @@ NESTED_ENTRY VirtualMethodFixupStub, _TEXT, NoHandler
// pop the stack and restore original register state
RESTORE_ARGUMENT_REGISTERS sp, 16
- RESTORE_FLOAT_ARGUMENT_REGISTERS sp, 88
- EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 160
+ RESTORE_FLOAT_ARGUMENT_REGISTERS sp, 96
+ EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 224
PATCH_LABEL VirtualMethodFixupPatchLabel
@@ -723,9 +723,9 @@ COMToCLRDispatchHelper_RegSetup
NESTED_ENTRY TheUMEntryPrestub, _TEXT, UnhandledExceptionHandlerUnix
// Save arguments and return address
- PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, -160
+ PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, -224
SAVE_ARGUMENT_REGISTERS sp, 16
- SAVE_FLOAT_ARGUMENT_REGISTERS sp, 88
+ SAVE_FLOAT_ARGUMENT_REGISTERS sp, 96
mov x0, x12
bl C_FUNC(TheUMEntryPrestubWorker)
@@ -735,8 +735,8 @@ NESTED_ENTRY TheUMEntryPrestub, _TEXT, UnhandledExceptionHandlerUnix
// pop the stack and restore original register state
RESTORE_ARGUMENT_REGISTERS sp, 16
- RESTORE_FLOAT_ARGUMENT_REGISTERS sp, 88
- EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 160
+ RESTORE_FLOAT_ARGUMENT_REGISTERS sp, 96
+ EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 224
// and tailcall to the actual method
EPILOG_BRANCH_REG x12
diff --git a/src/vm/arm64/asmhelpers.asm b/src/vm/arm64/asmhelpers.asm
index 37efbaeccd..7d8aad3e48 100644
--- a/src/vm/arm64/asmhelpers.asm
+++ b/src/vm/arm64/asmhelpers.asm
@@ -184,18 +184,18 @@ Done
; The call in ndirect import precode points to this function.
NESTED_ENTRY NDirectImportThunk
- PROLOG_SAVE_REG_PAIR fp, lr, #-160!
+ PROLOG_SAVE_REG_PAIR fp, lr, #-224!
SAVE_ARGUMENT_REGISTERS sp, 16
- SAVE_FLOAT_ARGUMENT_REGISTERS sp, 88
+ SAVE_FLOAT_ARGUMENT_REGISTERS sp, 96
mov x0, x12
bl NDirectImportWorker
mov x12, x0
; pop the stack and restore original register state
- RESTORE_FLOAT_ARGUMENT_REGISTERS sp, 88
+ RESTORE_FLOAT_ARGUMENT_REGISTERS sp, 96
RESTORE_ARGUMENT_REGISTERS sp, 16
- EPILOG_RESTORE_REG_PAIR fp, lr, #160!
+ EPILOG_RESTORE_REG_PAIR fp, lr, #224!
; If we got back from NDirectImportWorker, the MD has been successfully
; linked. Proceed to execute the original DLL call.
@@ -538,9 +538,9 @@ Exit
NESTED_ENTRY VirtualMethodFixupStub
; Save arguments and return address
- PROLOG_SAVE_REG_PAIR fp, lr, #-160!
+ PROLOG_SAVE_REG_PAIR fp, lr, #-224!
SAVE_ARGUMENT_REGISTERS sp, 16
- SAVE_FLOAT_ARGUMENT_REGISTERS sp, 88
+ SAVE_FLOAT_ARGUMENT_REGISTERS sp, 96
; Refer to ZapImportVirtualThunk::Save
; for details on this.
@@ -557,8 +557,8 @@ Exit
; pop the stack and restore original register state
RESTORE_ARGUMENT_REGISTERS sp, 16
- RESTORE_FLOAT_ARGUMENT_REGISTERS sp, 88
- EPILOG_RESTORE_REG_PAIR fp, lr, #160!
+ RESTORE_FLOAT_ARGUMENT_REGISTERS sp, 96
+ EPILOG_RESTORE_REG_PAIR fp, lr, #224!
PATCH_LABEL VirtualMethodFixupPatchLabel
@@ -862,7 +862,10 @@ COMToCLRDispatchHelper_StackLoop
COMToCLRDispatchHelper_RegSetup
- RESTORE_FLOAT_ARGUMENT_REGISTERS x1, -1 * GenericComCallStub_FrameOffset
+ ; We need an aligned offset for restoring float args, so do the subtraction into
+ ; a scratch register
+ sub x5, x1, GenericComCallStub_FrameOffset
+ RESTORE_FLOAT_ARGUMENT_REGISTERS x5, 0
mov lr, x2
mov x12, x3
@@ -892,9 +895,9 @@ COMToCLRDispatchHelper_RegSetup
NESTED_ENTRY TheUMEntryPrestub,,UMEntryPrestubUnwindFrameChainHandler
; Save arguments and return address
- PROLOG_SAVE_REG_PAIR fp, lr, #-160!
+ PROLOG_SAVE_REG_PAIR fp, lr, #-224!
SAVE_ARGUMENT_REGISTERS sp, 16
- SAVE_FLOAT_ARGUMENT_REGISTERS sp, 88
+ SAVE_FLOAT_ARGUMENT_REGISTERS sp, 96
mov x0, x12
bl TheUMEntryPrestubWorker
@@ -904,8 +907,8 @@ COMToCLRDispatchHelper_RegSetup
; pop the stack and restore original register state
RESTORE_ARGUMENT_REGISTERS sp, 16
- RESTORE_FLOAT_ARGUMENT_REGISTERS sp, 88
- EPILOG_RESTORE_REG_PAIR fp, lr, #160!
+ RESTORE_FLOAT_ARGUMENT_REGISTERS sp, 96
+ EPILOG_RESTORE_REG_PAIR fp, lr, #224!
; and tailcall to the actual method
EPILOG_BRANCH_REG x12
diff --git a/src/vm/arm64/asmmacros.h b/src/vm/arm64/asmmacros.h
index 291fcf8e70..5c6195b405 100644
--- a/src/vm/arm64/asmmacros.h
+++ b/src/vm/arm64/asmmacros.h
@@ -183,7 +183,7 @@ __PWTB_SAVE_ARGUMENT_REGISTERS_OFFSET SETA 0
MEND
-; Reserve 64 bytes of memory before calling SAVE_FLOAT_ARGUMENT_REGISTERS
+; Reserve 128 bytes of memory before calling SAVE_FLOAT_ARGUMENT_REGISTERS
MACRO
SAVE_FLOAT_ARGUMENT_REGISTERS $reg, $offset
@@ -195,10 +195,10 @@ __PWTB_SAVE_FLOAT_ARGUMENT_REGISTERS_OFFSET SETA $offset
__PWTB_SAVE_FLOAT_ARGUMENT_REGISTERS_OFFSET SETA 0
ENDIF
- stp d0, d1, [$reg, #(__PWTB_SAVE_FLOAT_ARGUMENT_REGISTERS_OFFSET)]
- stp d2, d3, [$reg, #(__PWTB_SAVE_FLOAT_ARGUMENT_REGISTERS_OFFSET + 16)]
- stp d4, d5, [$reg, #(__PWTB_SAVE_FLOAT_ARGUMENT_REGISTERS_OFFSET + 32)]
- stp d6, d7, [$reg, #(__PWTB_SAVE_FLOAT_ARGUMENT_REGISTERS_OFFSET + 48)]
+ stp q0, q1, [$reg, #(__PWTB_SAVE_FLOAT_ARGUMENT_REGISTERS_OFFSET)]
+ stp q2, q3, [$reg, #(__PWTB_SAVE_FLOAT_ARGUMENT_REGISTERS_OFFSET + 32)]
+ stp q4, q5, [$reg, #(__PWTB_SAVE_FLOAT_ARGUMENT_REGISTERS_OFFSET + 64)]
+ stp q6, q7, [$reg, #(__PWTB_SAVE_FLOAT_ARGUMENT_REGISTERS_OFFSET + 96)]
MEND
MACRO
@@ -231,10 +231,10 @@ __PWTB_RESTORE_FLOAT_ARGUMENT_REGISTERS_OFFSET SETA $offset
__PWTB_RESTORE_FLOAT_ARGUMENT_REGISTERS_OFFSET SETA 0
ENDIF
- ldp d0, d1, [$reg, #(__PWTB_RESTORE_FLOAT_ARGUMENT_REGISTERS_OFFSET)]
- ldp d2, d3, [$reg, #(__PWTB_RESTORE_FLOAT_ARGUMENT_REGISTERS_OFFSET + 16)]
- ldp d4, d5, [$reg, #(__PWTB_RESTORE_FLOAT_ARGUMENT_REGISTERS_OFFSET + 32)]
- ldp d6, d7, [$reg, #(__PWTB_RESTORE_FLOAT_ARGUMENT_REGISTERS_OFFSET + 48)]
+ ldp q0, q1, [$reg, #(__PWTB_RESTORE_FLOAT_ARGUMENT_REGISTERS_OFFSET)]
+ ldp q2, q3, [$reg, #(__PWTB_RESTORE_FLOAT_ARGUMENT_REGISTERS_OFFSET + 32)]
+ ldp q4, q5, [$reg, #(__PWTB_RESTORE_FLOAT_ARGUMENT_REGISTERS_OFFSET + 64)]
+ ldp q6, q7, [$reg, #(__PWTB_RESTORE_FLOAT_ARGUMENT_REGISTERS_OFFSET + 96)]
MEND
; ------------------------------------------------------------------
diff --git a/src/vm/arm64/calldescrworkerarm64.S b/src/vm/arm64/calldescrworkerarm64.S
index c3ce06aa72..f987d402dd 100644
--- a/src/vm/arm64/calldescrworkerarm64.S
+++ b/src/vm/arm64/calldescrworkerarm64.S
@@ -48,10 +48,10 @@ LOCAL_LABEL(donestack):
// given in x8.
ldr x9, [x19,#CallDescrData__pFloatArgumentRegisters]
cbz x9, LOCAL_LABEL(NoFloatingPoint)
- ldp d0, d1, [x9]
- ldp d2, d3, [x9, #16]
- ldp d4, d5, [x9, #32]
- ldp d6, d7, [x9, #48]
+ ldp q0, q1, [x9]
+ ldp q2, q3, [x9, #32]
+ ldp q4, q5, [x9, #64]
+ ldp q6, q7, [x9, #96]
LOCAL_LABEL(NoFloatingPoint):
// Copy [pArgumentRegisters, ..., pArgumentRegisters + 56]
diff --git a/src/vm/arm64/cgencpu.h b/src/vm/arm64/cgencpu.h
index a297a84e17..fd1fbafe96 100644
--- a/src/vm/arm64/cgencpu.h
+++ b/src/vm/arm64/cgencpu.h
@@ -100,7 +100,7 @@ static_assert(((STACK_ELEM_SIZE & (STACK_ELEM_SIZE-1)) == 0), "STACK_ELEM_SIZE m
//**********************************************************************
//--------------------------------------------------------------------
-// This represents the callee saved (non-volatile) registers saved as
+// This represents the callee saved (non-volatile) integer registers saved as
// of a FramedMethodFrame.
//--------------------------------------------------------------------
typedef DPTR(struct CalleeSavedRegisters) PTR_CalleeSavedRegisters;
@@ -111,7 +111,7 @@ struct CalleeSavedRegisters {
};
//--------------------------------------------------------------------
-// This represents the arguments that are stored in volatile registers.
+// This represents the arguments that are stored in volatile integer registers.
// This should not overlap the CalleeSavedRegisters since those are already
// saved separately and it would be wasteful to save the same register twice.
// If we do use a non-volatile register as an argument, then the ArgIterator
@@ -138,10 +138,10 @@ typedef DPTR(struct FloatArgumentRegisters) PTR_FloatArgumentRegisters;
struct FloatArgumentRegisters {
// armV8 supports 32 floating point registers. Each register is 128bits long.
// It can be accessed as 128-bit value or 64-bit value(d0-d31) or as 32-bit value (s0-s31)
- // or as 16-bit value or as 8-bit values. C# only has two builtin floating datatypes float(32-bit) and
- // double(64-bit). It does not have a quad-precision floating point.So therefore it does not make sense to
- // store full 128-bit values in Frame when the upper 64 bit will not contain any values.
- double d[8]; // d0-d7
+ // or as 16-bit value or as 8-bit values.
+ // Although C# only has two builtin floating datatypes float(32-bit) and double(64-bit),
+ // HW Intrinsics support using the full 128-bit value for passing Vectors.
+ NEON128 q[8]; // q0-q7
};