summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/pal/inc/unixasmmacrosarm64.inc34
-rw-r--r--src/vm/argdestination.h21
-rw-r--r--src/vm/arm64/CallDescrWorkerARM64.asm8
-rw-r--r--src/vm/arm64/asmconstants.h3
-rw-r--r--src/vm/arm64/asmhelpers.S24
-rw-r--r--src/vm/arm64/asmhelpers.asm29
-rw-r--r--src/vm/arm64/asmmacros.h18
-rw-r--r--src/vm/arm64/calldescrworkerarm64.S8
-rw-r--r--src/vm/arm64/cgencpu.h12
-rw-r--r--src/vm/callingconvention.h7
10 files changed, 83 insertions, 81 deletions
diff --git a/src/pal/inc/unixasmmacrosarm64.inc b/src/pal/inc/unixasmmacrosarm64.inc
index f99efbe45d..6db1f242f3 100644
--- a/src/pal/inc/unixasmmacrosarm64.inc
+++ b/src/pal/inc/unixasmmacrosarm64.inc
@@ -137,14 +137,14 @@ C_FUNC(\Name\()_End):
// ArgumentRegisters::x2
// ArgumentRegisters::x1
// ArgumentRegisters::x0
-// FloatRegisters::d7
-// FloatRegisters::d6
-// FloatRegisters::d5
-// FloatRegisters::d4
-// FloatRegisters::d3
-// FloatRegisters::d2
-// FloatRegisters::d1
-// FloatRegisters::d0
+// FloatRegisters::q7
+// FloatRegisters::q6
+// FloatRegisters::q5
+// FloatRegisters::q4
+// FloatRegisters::q3
+// FloatRegisters::q2
+// FloatRegisters::q1
+// FloatRegisters::q0
.macro PROLOG_WITH_TRANSITION_BLOCK extraLocals = 0, SaveFPArgs = 1
__PWTB_FloatArgumentRegisters = \extraLocals
@@ -200,13 +200,13 @@ C_FUNC(\Name\()_End):
.endm
-// Reserve 64 bytes of memory before calling SAVE_FLOAT_ARGUMENT_REGISTERS
+// Reserve 128 bytes of memory before calling SAVE_FLOAT_ARGUMENT_REGISTERS
.macro SAVE_FLOAT_ARGUMENT_REGISTERS reg, ofs
- stp d0, d1, [\reg, #(\ofs)]
- stp d2, d3, [\reg, #(\ofs + 16)]
- stp d4, d5, [\reg, #(\ofs + 32)]
- stp d6, d7, [\reg, #(\ofs + 48)]
+ stp q0, q1, [\reg, #(\ofs)]
+ stp q2, q3, [\reg, #(\ofs + 32)]
+ stp q4, q5, [\reg, #(\ofs + 64)]
+ stp q6, q7, [\reg, #(\ofs + 96)]
.endm
@@ -222,10 +222,10 @@ C_FUNC(\Name\()_End):
.macro RESTORE_FLOAT_ARGUMENT_REGISTERS reg, ofs
- ldp d0, d1, [\reg, #(\ofs)]
- ldp d2, d3, [\reg, #(\ofs + 16)]
- ldp d4, d5, [\reg, #(\ofs + 32)]
- ldp d6, d7, [\reg, #(\ofs + 48)]
+ ldp q0, q1, [\reg, #(\ofs)]
+ ldp q2, q3, [\reg, #(\ofs + 32)]
+ ldp q4, q5, [\reg, #(\ofs + 64)]
+ ldp q6, q7, [\reg, #(\ofs + 96)]
.endm
diff --git a/src/vm/argdestination.h b/src/vm/argdestination.h
index 04968a1aff..439761bec2 100644
--- a/src/vm/argdestination.h
+++ b/src/vm/argdestination.h
@@ -65,20 +65,17 @@ public:
int floatRegCount = m_argLocDescForStructInRegs->m_cFloatReg;
bool typeFloat = m_argLocDescForStructInRegs->m_isSinglePrecision;
- void* dest = this->GetDestinationAddress();
+ UINT64* dest = (UINT64*) this->GetDestinationAddress();
- if (typeFloat)
+ for (int i = 0; i < floatRegCount; ++i)
{
- for (int i = 0; i < floatRegCount; ++i)
- {
- // Copy 4 bytes on 8 bytes alignment
- *((UINT64*)dest + i) = *((UINT32*)src + i);
- }
- }
- else
- {
- // We can just do a memcpy.
- memcpyNoGCRefs(dest, src, fieldBytes);
+ // Copy 4 or 8 bytes from src.
+ UINT64 val = typeFloat ? *((UINT32*)src + i) : *((UINT64*)src + i);
+ // Always store 8 bytes
+ *(dest++) = val;
+ // For now, always zero the next 8 bytes.
+ // (When HVAs are supported we will get the next 8 bytes from src.)
+ *(dest++) = 0;
}
}
diff --git a/src/vm/arm64/CallDescrWorkerARM64.asm b/src/vm/arm64/CallDescrWorkerARM64.asm
index 65c7db6f3f..fe277ceb62 100644
--- a/src/vm/arm64/CallDescrWorkerARM64.asm
+++ b/src/vm/arm64/CallDescrWorkerARM64.asm
@@ -56,10 +56,10 @@ Ldonestack
;; given in x9.
ldr x9, [x19,#CallDescrData__pFloatArgumentRegisters]
cbz x9, LNoFloatingPoint
- ldp d0, d1, [x9]
- ldp d2, d3, [x9, #16]
- ldp d4, d5, [x9, #32]
- ldp d6, d7, [x9, #48]
+ ldp q0, q1, [x9]
+ ldp q2, q3, [x9, #32]
+ ldp q4, q5, [x9, #64]
+ ldp q6, q7, [x9, #96]
LNoFloatingPoint
;; Copy [pArgumentRegisters, ..., pArgumentRegisters + 56]
diff --git a/src/vm/arm64/asmconstants.h b/src/vm/arm64/asmconstants.h
index 7d0a9f734b..1acc1b46d7 100644
--- a/src/vm/arm64/asmconstants.h
+++ b/src/vm/arm64/asmconstants.h
@@ -58,7 +58,8 @@ ASMCONSTANTS_C_ASSERT(AppDomain__m_dwId == offsetof(AppDomain, m_dwId));
#define SIZEOF__ArgumentRegisters 0x40
ASMCONSTANTS_C_ASSERT(SIZEOF__ArgumentRegisters == sizeof(ArgumentRegisters))
-#define SIZEOF__FloatArgumentRegisters 0x40
+// There are 8 128-bit registers in FloatArgumentRegisters
+#define SIZEOF__FloatArgumentRegisters 0x80
ASMCONSTANTS_C_ASSERT(SIZEOF__FloatArgumentRegisters == sizeof(FloatArgumentRegisters))
#define CallDescrData__pSrc 0x00
diff --git a/src/vm/arm64/asmhelpers.S b/src/vm/arm64/asmhelpers.S
index c0baa92456..ed48d66e58 100644
--- a/src/vm/arm64/asmhelpers.S
+++ b/src/vm/arm64/asmhelpers.S
@@ -121,18 +121,18 @@ LEAF_END HelperMethodFrameRestoreState, _TEXT
// The call in ndirect import precode points to this function.
NESTED_ENTRY NDirectImportThunk, _TEXT, NoHandler
- PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, -160
+ PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, -224
SAVE_ARGUMENT_REGISTERS sp, 16
- SAVE_FLOAT_ARGUMENT_REGISTERS sp, 88
+ SAVE_FLOAT_ARGUMENT_REGISTERS sp, 96
mov x0, x12
bl NDirectImportWorker
mov x12, x0
// pop the stack and restore original register state
- RESTORE_FLOAT_ARGUMENT_REGISTERS sp, 88
+ RESTORE_FLOAT_ARGUMENT_REGISTERS sp, 96
RESTORE_ARGUMENT_REGISTERS sp, 16
- EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 160
+ EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 224
// If we got back from NDirectImportWorker, the MD has been successfully
// linked. Proceed to execute the original DLL call.
@@ -493,9 +493,9 @@ WRITE_BARRIER_END JIT_WriteBarrier
NESTED_ENTRY VirtualMethodFixupStub, _TEXT, NoHandler
// Save arguments and return address
- PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, -160
+ PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, -224
SAVE_ARGUMENT_REGISTERS sp, 16
- SAVE_FLOAT_ARGUMENT_REGISTERS sp, 88
+ SAVE_FLOAT_ARGUMENT_REGISTERS sp, 96
// Refer to ZapImportVirtualThunk::Save
// for details on this.
@@ -512,8 +512,8 @@ NESTED_ENTRY VirtualMethodFixupStub, _TEXT, NoHandler
// pop the stack and restore original register state
RESTORE_ARGUMENT_REGISTERS sp, 16
- RESTORE_FLOAT_ARGUMENT_REGISTERS sp, 88
- EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 160
+ RESTORE_FLOAT_ARGUMENT_REGISTERS sp, 96
+ EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 224
PATCH_LABEL VirtualMethodFixupPatchLabel
@@ -723,9 +723,9 @@ COMToCLRDispatchHelper_RegSetup
NESTED_ENTRY TheUMEntryPrestub, _TEXT, UnhandledExceptionHandlerUnix
// Save arguments and return address
- PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, -160
+ PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, -224
SAVE_ARGUMENT_REGISTERS sp, 16
- SAVE_FLOAT_ARGUMENT_REGISTERS sp, 88
+ SAVE_FLOAT_ARGUMENT_REGISTERS sp, 96
mov x0, x12
bl C_FUNC(TheUMEntryPrestubWorker)
@@ -735,8 +735,8 @@ NESTED_ENTRY TheUMEntryPrestub, _TEXT, UnhandledExceptionHandlerUnix
// pop the stack and restore original register state
RESTORE_ARGUMENT_REGISTERS sp, 16
- RESTORE_FLOAT_ARGUMENT_REGISTERS sp, 88
- EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 160
+ RESTORE_FLOAT_ARGUMENT_REGISTERS sp, 96
+ EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 224
// and tailcall to the actual method
EPILOG_BRANCH_REG x12
diff --git a/src/vm/arm64/asmhelpers.asm b/src/vm/arm64/asmhelpers.asm
index 37efbaeccd..7d8aad3e48 100644
--- a/src/vm/arm64/asmhelpers.asm
+++ b/src/vm/arm64/asmhelpers.asm
@@ -184,18 +184,18 @@ Done
; The call in ndirect import precode points to this function.
NESTED_ENTRY NDirectImportThunk
- PROLOG_SAVE_REG_PAIR fp, lr, #-160!
+ PROLOG_SAVE_REG_PAIR fp, lr, #-224!
SAVE_ARGUMENT_REGISTERS sp, 16
- SAVE_FLOAT_ARGUMENT_REGISTERS sp, 88
+ SAVE_FLOAT_ARGUMENT_REGISTERS sp, 96
mov x0, x12
bl NDirectImportWorker
mov x12, x0
; pop the stack and restore original register state
- RESTORE_FLOAT_ARGUMENT_REGISTERS sp, 88
+ RESTORE_FLOAT_ARGUMENT_REGISTERS sp, 96
RESTORE_ARGUMENT_REGISTERS sp, 16
- EPILOG_RESTORE_REG_PAIR fp, lr, #160!
+ EPILOG_RESTORE_REG_PAIR fp, lr, #224!
; If we got back from NDirectImportWorker, the MD has been successfully
; linked. Proceed to execute the original DLL call.
@@ -538,9 +538,9 @@ Exit
NESTED_ENTRY VirtualMethodFixupStub
; Save arguments and return address
- PROLOG_SAVE_REG_PAIR fp, lr, #-160!
+ PROLOG_SAVE_REG_PAIR fp, lr, #-224!
SAVE_ARGUMENT_REGISTERS sp, 16
- SAVE_FLOAT_ARGUMENT_REGISTERS sp, 88
+ SAVE_FLOAT_ARGUMENT_REGISTERS sp, 96
; Refer to ZapImportVirtualThunk::Save
; for details on this.
@@ -557,8 +557,8 @@ Exit
; pop the stack and restore original register state
RESTORE_ARGUMENT_REGISTERS sp, 16
- RESTORE_FLOAT_ARGUMENT_REGISTERS sp, 88
- EPILOG_RESTORE_REG_PAIR fp, lr, #160!
+ RESTORE_FLOAT_ARGUMENT_REGISTERS sp, 96
+ EPILOG_RESTORE_REG_PAIR fp, lr, #224!
PATCH_LABEL VirtualMethodFixupPatchLabel
@@ -862,7 +862,10 @@ COMToCLRDispatchHelper_StackLoop
COMToCLRDispatchHelper_RegSetup
- RESTORE_FLOAT_ARGUMENT_REGISTERS x1, -1 * GenericComCallStub_FrameOffset
+ ; We need an aligned offset for restoring float args, so do the subtraction into
+ ; a scratch register
+ sub x5, x1, GenericComCallStub_FrameOffset
+ RESTORE_FLOAT_ARGUMENT_REGISTERS x5, 0
mov lr, x2
mov x12, x3
@@ -892,9 +895,9 @@ COMToCLRDispatchHelper_RegSetup
NESTED_ENTRY TheUMEntryPrestub,,UMEntryPrestubUnwindFrameChainHandler
; Save arguments and return address
- PROLOG_SAVE_REG_PAIR fp, lr, #-160!
+ PROLOG_SAVE_REG_PAIR fp, lr, #-224!
SAVE_ARGUMENT_REGISTERS sp, 16
- SAVE_FLOAT_ARGUMENT_REGISTERS sp, 88
+ SAVE_FLOAT_ARGUMENT_REGISTERS sp, 96
mov x0, x12
bl TheUMEntryPrestubWorker
@@ -904,8 +907,8 @@ COMToCLRDispatchHelper_RegSetup
; pop the stack and restore original register state
RESTORE_ARGUMENT_REGISTERS sp, 16
- RESTORE_FLOAT_ARGUMENT_REGISTERS sp, 88
- EPILOG_RESTORE_REG_PAIR fp, lr, #160!
+ RESTORE_FLOAT_ARGUMENT_REGISTERS sp, 96
+ EPILOG_RESTORE_REG_PAIR fp, lr, #224!
; and tailcall to the actual method
EPILOG_BRANCH_REG x12
diff --git a/src/vm/arm64/asmmacros.h b/src/vm/arm64/asmmacros.h
index 291fcf8e70..5c6195b405 100644
--- a/src/vm/arm64/asmmacros.h
+++ b/src/vm/arm64/asmmacros.h
@@ -183,7 +183,7 @@ __PWTB_SAVE_ARGUMENT_REGISTERS_OFFSET SETA 0
MEND
-; Reserve 64 bytes of memory before calling SAVE_FLOAT_ARGUMENT_REGISTERS
+; Reserve 128 bytes of memory before calling SAVE_FLOAT_ARGUMENT_REGISTERS
MACRO
SAVE_FLOAT_ARGUMENT_REGISTERS $reg, $offset
@@ -195,10 +195,10 @@ __PWTB_SAVE_FLOAT_ARGUMENT_REGISTERS_OFFSET SETA $offset
__PWTB_SAVE_FLOAT_ARGUMENT_REGISTERS_OFFSET SETA 0
ENDIF
- stp d0, d1, [$reg, #(__PWTB_SAVE_FLOAT_ARGUMENT_REGISTERS_OFFSET)]
- stp d2, d3, [$reg, #(__PWTB_SAVE_FLOAT_ARGUMENT_REGISTERS_OFFSET + 16)]
- stp d4, d5, [$reg, #(__PWTB_SAVE_FLOAT_ARGUMENT_REGISTERS_OFFSET + 32)]
- stp d6, d7, [$reg, #(__PWTB_SAVE_FLOAT_ARGUMENT_REGISTERS_OFFSET + 48)]
+ stp q0, q1, [$reg, #(__PWTB_SAVE_FLOAT_ARGUMENT_REGISTERS_OFFSET)]
+ stp q2, q3, [$reg, #(__PWTB_SAVE_FLOAT_ARGUMENT_REGISTERS_OFFSET + 32)]
+ stp q4, q5, [$reg, #(__PWTB_SAVE_FLOAT_ARGUMENT_REGISTERS_OFFSET + 64)]
+ stp q6, q7, [$reg, #(__PWTB_SAVE_FLOAT_ARGUMENT_REGISTERS_OFFSET + 96)]
MEND
MACRO
@@ -231,10 +231,10 @@ __PWTB_RESTORE_FLOAT_ARGUMENT_REGISTERS_OFFSET SETA $offset
__PWTB_RESTORE_FLOAT_ARGUMENT_REGISTERS_OFFSET SETA 0
ENDIF
- ldp d0, d1, [$reg, #(__PWTB_RESTORE_FLOAT_ARGUMENT_REGISTERS_OFFSET)]
- ldp d2, d3, [$reg, #(__PWTB_RESTORE_FLOAT_ARGUMENT_REGISTERS_OFFSET + 16)]
- ldp d4, d5, [$reg, #(__PWTB_RESTORE_FLOAT_ARGUMENT_REGISTERS_OFFSET + 32)]
- ldp d6, d7, [$reg, #(__PWTB_RESTORE_FLOAT_ARGUMENT_REGISTERS_OFFSET + 48)]
+ ldp q0, q1, [$reg, #(__PWTB_RESTORE_FLOAT_ARGUMENT_REGISTERS_OFFSET)]
+ ldp q2, q3, [$reg, #(__PWTB_RESTORE_FLOAT_ARGUMENT_REGISTERS_OFFSET + 32)]
+ ldp q4, q5, [$reg, #(__PWTB_RESTORE_FLOAT_ARGUMENT_REGISTERS_OFFSET + 64)]
+ ldp q6, q7, [$reg, #(__PWTB_RESTORE_FLOAT_ARGUMENT_REGISTERS_OFFSET + 96)]
MEND
; ------------------------------------------------------------------
diff --git a/src/vm/arm64/calldescrworkerarm64.S b/src/vm/arm64/calldescrworkerarm64.S
index c3ce06aa72..f987d402dd 100644
--- a/src/vm/arm64/calldescrworkerarm64.S
+++ b/src/vm/arm64/calldescrworkerarm64.S
@@ -48,10 +48,10 @@ LOCAL_LABEL(donestack):
// given in x8.
ldr x9, [x19,#CallDescrData__pFloatArgumentRegisters]
cbz x9, LOCAL_LABEL(NoFloatingPoint)
- ldp d0, d1, [x9]
- ldp d2, d3, [x9, #16]
- ldp d4, d5, [x9, #32]
- ldp d6, d7, [x9, #48]
+ ldp q0, q1, [x9]
+ ldp q2, q3, [x9, #32]
+ ldp q4, q5, [x9, #64]
+ ldp q6, q7, [x9, #96]
LOCAL_LABEL(NoFloatingPoint):
// Copy [pArgumentRegisters, ..., pArgumentRegisters + 56]
diff --git a/src/vm/arm64/cgencpu.h b/src/vm/arm64/cgencpu.h
index a297a84e17..fd1fbafe96 100644
--- a/src/vm/arm64/cgencpu.h
+++ b/src/vm/arm64/cgencpu.h
@@ -100,7 +100,7 @@ static_assert(((STACK_ELEM_SIZE & (STACK_ELEM_SIZE-1)) == 0), "STACK_ELEM_SIZE m
//**********************************************************************
//--------------------------------------------------------------------
-// This represents the callee saved (non-volatile) registers saved as
+// This represents the callee saved (non-volatile) integer registers saved as
// of a FramedMethodFrame.
//--------------------------------------------------------------------
typedef DPTR(struct CalleeSavedRegisters) PTR_CalleeSavedRegisters;
@@ -111,7 +111,7 @@ struct CalleeSavedRegisters {
};
//--------------------------------------------------------------------
-// This represents the arguments that are stored in volatile registers.
+// This represents the arguments that are stored in volatile integer registers.
// This should not overlap the CalleeSavedRegisters since those are already
// saved separately and it would be wasteful to save the same register twice.
// If we do use a non-volatile register as an argument, then the ArgIterator
@@ -138,10 +138,10 @@ typedef DPTR(struct FloatArgumentRegisters) PTR_FloatArgumentRegisters;
struct FloatArgumentRegisters {
// armV8 supports 32 floating point registers. Each register is 128bits long.
// It can be accessed as 128-bit value or 64-bit value(d0-d31) or as 32-bit value (s0-s31)
- // or as 16-bit value or as 8-bit values. C# only has two builtin floating datatypes float(32-bit) and
- // double(64-bit). It does not have a quad-precision floating point.So therefore it does not make sense to
- // store full 128-bit values in Frame when the upper 64 bit will not contain any values.
- double d[8]; // d0-d7
+ // or as 16-bit value or as 8-bit values.
+ // Although C# only has two builtin floating datatypes float(32-bit) and double(64-bit),
+ // HW Intrinsics support using the full 128-bit value for passing Vectors.
+ NEON128 q[8]; // q0-q7
};
diff --git a/src/vm/callingconvention.h b/src/vm/callingconvention.h
index e70e31f3a7..eaabaa42b2 100644
--- a/src/vm/callingconvention.h
+++ b/src/vm/callingconvention.h
@@ -583,8 +583,8 @@ public:
if (TransitionBlock::IsFloatArgumentRegisterOffset(argOffset))
{
- // Dividing by 8 as size of each register in FloatArgumentRegisters is 8 bytes.
- pLoc->m_idxFloatReg = (argOffset - TransitionBlock::GetOffsetOfFloatArgumentRegisters()) / 8;
+ // Dividing by 16 as size of each register in FloatArgumentRegisters is 16 bytes.
+ pLoc->m_idxFloatReg = (argOffset - TransitionBlock::GetOffsetOfFloatArgumentRegisters()) / 16;
if (!m_argTypeHandle.IsNull() && m_argTypeHandle.IsHFA())
{
@@ -1332,7 +1332,8 @@ int ArgIteratorTemplate<ARGITERATOR_BASE>::GetNextOffset()
{
if (cFPRegs + m_idxFPReg <= 8)
{
- int argOfs = TransitionBlock::GetOffsetOfFloatArgumentRegisters() + m_idxFPReg * 8;
+ // Each floating point register in the argument area is 16 bytes.
+ int argOfs = TransitionBlock::GetOffsetOfFloatArgumentRegisters() + m_idxFPReg * 16;
m_idxFPReg += cFPRegs;
return argOfs;
}