diff options
Diffstat (limited to 'src')
53 files changed, 2892 insertions, 150 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 6b967e3107..b811aed911 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -7,8 +7,10 @@ if(CLR_CMAKE_PLATFORM_ARCH_AMD64) include_directories("debug/inc/amd64") elseif(CLR_CMAKE_PLATFORM_ARCH_ARM) include_directories("debug/inc/arm") +elseif(CLR_CMAKE_PLATFORM_ARCH_ARM64) + include_directories("debug/inc/arm64") else() - message(FATAL_ERROR "Only ARM and AMD64 is supported") + message(FATAL_ERROR "Only ARM, ARM64 and AMD64 is supported") endif() include_directories("debug/inc/dump") diff --git a/src/ToolBox/SOS/Strike/util.h b/src/ToolBox/SOS/Strike/util.h index 5b0c40f93c..3e2ca16a9a 100644 --- a/src/ToolBox/SOS/Strike/util.h +++ b/src/ToolBox/SOS/Strike/util.h @@ -1621,7 +1621,7 @@ BOOL IsMiniDumpFile(); void ReportOOM(); BOOL SafeReadMemory (TADDR offset, PVOID lpBuffer, ULONG cb, PULONG lpcbBytesRead); -#if !defined(_TARGET_WIN64_) +#if !defined(_TARGET_WIN64_) && !defined(_ARM64_) // on 64-bit platforms TADDR and CLRDATA_ADDRESS are identical inline BOOL SafeReadMemory (CLRDATA_ADDRESS offset, PVOID lpBuffer, ULONG cb, PULONG lpcbBytesRead) { return SafeReadMemory(TO_TADDR(offset), lpBuffer, cb, lpcbBytesRead); } diff --git a/src/ToolBox/SOS/lldbplugin/CMakeLists.txt b/src/ToolBox/SOS/lldbplugin/CMakeLists.txt index 798c91a142..ce279e9a1c 100644 --- a/src/ToolBox/SOS/lldbplugin/CMakeLists.txt +++ b/src/ToolBox/SOS/lldbplugin/CMakeLists.txt @@ -12,6 +12,9 @@ elseif(CLR_CMAKE_PLATFORM_ARCH_ARM) add_definitions(-DDBG_TARGET_32BIT=1) add_definitions(-DDBG_TARGET_ARM=1) add_definitions(-DDBG_TARGET_WIN32=1) +elseif(CLR_CMAKE_PLATFORM_ARCH_ARM64) + message(WARNING "lldb is not supported on linux/aarch64 yet") + return() endif() set(ENABLE_LLDBPLUGIN ${CLR_CMAKE_PLATFORM_UNIX} CACHE BOOL "Enable building the SOS plugin for LLDB.") diff --git a/src/binder/CMakeLists.txt b/src/binder/CMakeLists.txt index b6e9da3f46..b624d9fafa 100644 --- a/src/binder/CMakeLists.txt +++ b/src/binder/CMakeLists.txt @@ -4,8 +4,10 @@ if(CLR_CMAKE_PLATFORM_ARCH_AMD64) include_directories(BEFORE "../vm/amd64") elseif(CLR_CMAKE_PLATFORM_ARCH_ARM) include_directories(BEFORE "../vm/arm") +elseif(CLR_CMAKE_PLATFORM_ARCH_ARM64) + include_directories(BEFORE "../vm/arm64") else() - message(FATAL_ERROR "Only ARM and AMD64 is supported") + message(FATAL_ERROR "Only ARM, ARM64 and AMD64 is supported") endif() include_directories(BEFORE "../vm") include_directories(BEFORE "inc") diff --git a/src/classlibnative/CMakeLists.txt b/src/classlibnative/CMakeLists.txt index 00a6941920..683506bba7 100644 --- a/src/classlibnative/CMakeLists.txt +++ b/src/classlibnative/CMakeLists.txt @@ -4,8 +4,10 @@ if(CLR_CMAKE_PLATFORM_ARCH_AMD64) include_directories(BEFORE "../vm/amd64") elseif(CLR_CMAKE_PLATFORM_ARCH_ARM) include_directories(BEFORE "../vm/arm") +elseif(CLR_CMAKE_PLATFORM_ARCH_ARM64) + include_directories(BEFORE "../vm/arm64") else() - message(FATAL_ERROR "Only ARM and AMD64 is supported") + message(FATAL_ERROR "Only ARM, ARM64 and AMD64 is supported") endif() include_directories("../debug/inc") include_directories("../debug/inc/dump") @@ -20,8 +22,12 @@ elseif(CLR_CMAKE_PLATFORM_ARCH_ARM) add_definitions(-DDBG_TARGET_32BIT=1) add_definitions(-DDBG_TARGET_ARM=1) add_definitions(-DDBG_TARGET_WIN32=1) +elseif(CLR_CMAKE_PLATFORM_ARCH_ARM64) + add_definitions(-DDBG_TARGET_64BIT=1) + add_definitions(-DDBG_TARGET_ARM64=1) + add_definitions(-DDBG_TARGET_WIN64=1) else() - message(FATAL_ERROR "Only ARM and AMD64 is supported") + message(FATAL_ERROR "Only ARM, ARM64 and AMD64 is supported") endif() add_subdirectory(bcltype) diff --git a/src/debug/daccess/CMakeLists.txt b/src/debug/daccess/CMakeLists.txt index 0d93d75966..9a61991859 100644 --- a/src/debug/daccess/CMakeLists.txt +++ b/src/debug/daccess/CMakeLists.txt @@ -15,8 +15,14 @@ elseif(CLR_CMAKE_PLATFORM_ARCH_ARM) add_definitions(-DDBG_TARGET_ARM=1) add_definitions(-DDBG_TARGET_WIN32=1) add_definitions(-D_WIN32=1) +elseif(CLR_CMAKE_PLATFORM_ARCH_ARM64) + add_definitions(-D_TARGET_ARM64_=1) + add_definitions(-DDBG_TARGET_64BIT=1) + add_definitions(-DDBG_TARGET_ARM64=1) + add_definitions(-DDBG_TARGET_WIN64=1) + add_definitions(-D_WIN64=1) else() - message(FATAL_ERROR "Only ARM and AMD64 is supported") + message(FATAL_ERROR "Only ARM, ARM64 and AMD64 is supported") endif() include_directories(BEFORE ${VM_DIR}) diff --git a/src/debug/daccess/daccess.cpp b/src/debug/daccess/daccess.cpp index b737ecd912..2c8c8323eb 100644 --- a/src/debug/daccess/daccess.cpp +++ b/src/debug/daccess/daccess.cpp @@ -5492,15 +5492,17 @@ ClrDataAccess::Initialize(void) // Determine our platform based on the pre-processor macros set when we were built #ifdef FEATURE_PAL - #if defined(DBG_TARGET_X86) - CorDebugPlatform hostPlatform = CORDB_PLATFORM_POSIX_X86; - #elif defined(DBG_TARGET_AMD64) - CorDebugPlatform hostPlatform = CORDB_PLATFORM_POSIX_AMD64; - #elif defined(DBG_TARGET_ARM) - CorDebugPlatform hostPlatform = CORDB_PLATFORM_POSIX_ARM; - #else - #error Unknown Processor. - #endif + #if defined(DBG_TARGET_X86) + CorDebugPlatform hostPlatform = CORDB_PLATFORM_POSIX_X86; + #elif defined(DBG_TARGET_AMD64) + CorDebugPlatform hostPlatform = CORDB_PLATFORM_POSIX_AMD64; + #elif defined(DBG_TARGET_ARM) + CorDebugPlatform hostPlatform = CORDB_PLATFORM_POSIX_ARM; + #elif defined(DBG_TARGET_ARM64) + CorDebugPlatform hostPlatform = CORDB_PLATFORM_POSIX_ARM64; + #else + #error Unknown Processor. + #endif #else #if defined(DBG_TARGET_X86) CorDebugPlatform hostPlatform = CORDB_PLATFORM_WINDOWS_X86; diff --git a/src/debug/di/CMakeLists.txt b/src/debug/di/CMakeLists.txt index 55b5a44657..1c9125adce 100644 --- a/src/debug/di/CMakeLists.txt +++ b/src/debug/di/CMakeLists.txt @@ -8,8 +8,13 @@ elseif(CLR_CMAKE_PLATFORM_ARCH_ARM) add_definitions(-DDBG_TARGET_32BIT) add_definitions(-DDBG_TARGET_WIN32=1) add_definitions(-DDBG_TARGET_ARM=1) +elseif(CLR_CMAKE_PLATFORM_ARCH_ARM64) + add_definitions(-D_TARGET_WIN64_=1) + add_definitions(-DDBG_TARGET_64BIT) + add_definitions(-DDBG_TARGET_WIN64=1) + add_definitions(-DDBG_TARGET_ARM64=1) else() - message(FATAL_ERROR "Only ARM and AMD64 is supported") + message(FATAL_ERROR "Only ARM, ARM64 and AMD64 is supported") endif() add_definitions(-DFEATURE_METADATA_CUSTOM_DATA_SOURCE -DFEATURE_METADATA_DEBUGGEE_DATA_SOURCE -DFEATURE_NO_HOST -DFEATURE_METADATA_LOAD_TRUSTED_IMAGES) @@ -66,4 +71,4 @@ endif(WIN32) add_precompiled_header(stdafx.h stdafx.cpp CORDBDI_SOURCES) -add_library(cordbdi STATIC ${CORDBDI_SOURCES})
\ No newline at end of file +add_library(cordbdi STATIC ${CORDBDI_SOURCES}) diff --git a/src/debug/di/arm64/cordbregisterset.cpp b/src/debug/di/arm64/cordbregisterset.cpp index ccddb6fd52..8cd61cafa8 100644 --- a/src/debug/di/arm64/cordbregisterset.cpp +++ b/src/debug/di/arm64/cordbregisterset.cpp @@ -8,7 +8,6 @@ // //***************************************************************************** -#include "stdafx.h" #include "primitives.h" diff --git a/src/debug/di/arm64/primitives.cpp b/src/debug/di/arm64/primitives.cpp index 66bf4ed17d..f09f32b7d1 100644 --- a/src/debug/di/arm64/primitives.cpp +++ b/src/debug/di/arm64/primitives.cpp @@ -5,6 +5,4 @@ // -#include "stdafx.h" - #include "../../shared/arm64/primitives.cpp" diff --git a/src/debug/di/shimremotedatatarget.cpp b/src/debug/di/shimremotedatatarget.cpp index cd1304be40..7885f6bddf 100644 --- a/src/debug/di/shimremotedatatarget.cpp +++ b/src/debug/di/shimremotedatatarget.cpp @@ -215,8 +215,10 @@ ShimRemoteDataTarget::GetPlatform( *pPlatform = CORDB_PLATFORM_POSIX_X86; #elif defined(DBG_TARGET_AMD64) *pPlatform = CORDB_PLATFORM_POSIX_AMD64; - #elif defined(DBG_TARGET_ARM) + #elif defined(DBG_TARGET_ARM) *pPlatform = CORDB_PLATFORM_POSIX_ARM; + #elif defined(DBG_TARGET_ARM64) + *pPlatform = CORDB_PLATFORM_POSIX_ARM64; #else #error Unknown Processor. #endif diff --git a/src/debug/ee/CMakeLists.txt b/src/debug/ee/CMakeLists.txt index e37e2573f3..66eb054b1d 100644 --- a/src/debug/ee/CMakeLists.txt +++ b/src/debug/ee/CMakeLists.txt @@ -12,8 +12,13 @@ elseif(CLR_CMAKE_PLATFORM_ARCH_ARM) add_definitions(-DDBG_TARGET_32BIT=1) add_definitions(-DDBG_TARGET_ARM=1) add_definitions(-DDBG_TARGET_WIN32=1) +elseif(CLR_CMAKE_PLATFORM_ARCH_ARM64) + add_definitions(-D_TARGET_ARM64_=1) + add_definitions(-DDBG_TARGET_64BIT=1) + add_definitions(-DDBG_TARGET_ARM64=1) + add_definitions(-DDBG_TARGET_WIN64=1) else() - message(FATAL_ERROR "Only ARM and AMD64 is supported") + message(FATAL_ERROR "Only ARM, ARM64 and AMD64 is supported") endif() diff --git a/src/debug/ee/wks/CMakeLists.txt b/src/debug/ee/wks/CMakeLists.txt index e2fd7b2bfd..e8ad205fce 100644 --- a/src/debug/ee/wks/CMakeLists.txt +++ b/src/debug/ee/wks/CMakeLists.txt @@ -31,6 +31,8 @@ if(CLR_CMAKE_PLATFORM_ARCH_AMD64) add_library(cordbee_wks ${CORDBEE_SOURCES_WKS} ../amd64/dbghelpers.S) elseif(CLR_CMAKE_PLATFORM_ARCH_ARM) add_library(cordbee_wks ${CORDBEE_SOURCES_WKS} ../arm/dbghelpers.S) +elseif(CLR_CMAKE_PLATFORM_ARCH_ARM64) + add_library(cordbee_wks ${CORDBEE_SOURCES_WKS}) else() message(FATAL_ERROR "Only ARM and AMD64 is supported") endif() diff --git a/src/inc/clrnt.h b/src/inc/clrnt.h index 97d249ad88..fe6c5a87d2 100644 --- a/src/inc/clrnt.h +++ b/src/inc/clrnt.h @@ -1012,6 +1012,10 @@ RtlVirtualUnwind ( #ifdef _TARGET_ARM64_ +#define UNW_FLAG_NHANDLER 0x0 /* any handler */ +#define UNW_FLAG_EHANDLER 0x1 /* filter handler */ +#define UNW_FLAG_UHANDLER 0x2 /* unwind handler */ + // This function returns the RVA of the end of the function (exclusive, so one byte after the actual end) // using the unwind info on ARM64. (see ExternalAPIs\Win9CoreSystem\inc\winnt.h) FORCEINLINE @@ -1044,6 +1048,18 @@ typedef struct _UNWIND_INFO { // dummy } UNWIND_INFO, *PUNWIND_INFO; +PEXCEPTION_ROUTINE +RtlVirtualUnwind( + IN ULONG HandlerType, + IN ULONG64 ImageBase, + IN ULONG64 ControlPc, + IN PRUNTIME_FUNCTION FunctionEntry, + IN OUT PCONTEXT ContextRecord, + OUT PVOID *HandlerData, + OUT PULONG64 EstablisherFrame, + IN OUT PKNONVOLATILE_CONTEXT_POINTERS ContextPointers OPTIONAL + ); + #endif #endif // CLRNT_H_ diff --git a/src/inc/switches.h b/src/inc/switches.h index b298b5563b..f6fec8309b 100644 --- a/src/inc/switches.h +++ b/src/inc/switches.h @@ -262,7 +262,7 @@ #define FEATURE_STACK_SAMPLING #endif // defined (ALLOW_SXS_JIT) -#if defined(_TARGET_ARM64_) +#if defined(_TARGET_ARM64_) && !defined(FEATURE_CORECLR) #define FEATURE_INTERPRETER #endif // defined(_TARGET_ARM64_) diff --git a/src/inc/volatile.h b/src/inc/volatile.h index 5310a65ff1..8ec42962b0 100644 --- a/src/inc/volatile.h +++ b/src/inc/volatile.h @@ -72,12 +72,12 @@ #error The Volatile type is currently only defined for Visual C++ and GNU C++ #endif -#if defined(__GNUC__) && !defined(_X86_) && !defined(_AMD64_) && !defined(_ARM_) -#error The Volatile type is currently only defined for GCC when targeting x86, AMD64 or ARM CPUs +#if defined(__GNUC__) && !defined(_X86_) && !defined(_AMD64_) && !defined(_ARM_) && !defined(_ARM64_) +#error The Volatile type is currently only defined for GCC when targeting x86, AMD64, ARM or ARM64 CPUs #endif #if defined(__GNUC__) -#if defined(_ARM_) +#if defined(_ARM_) || defined(_ARM64_) // This is functionally equivalent to the MemoryBarrier() macro used on ARM on Windows. #define VOLATILE_MEMORY_BARRIER() asm volatile ("dmb sy" : : : "memory") #else diff --git a/src/jit/CMakeLists.txt b/src/jit/CMakeLists.txt index 2612331d32..96f11b760a 100644 --- a/src/jit/CMakeLists.txt +++ b/src/jit/CMakeLists.txt @@ -79,8 +79,17 @@ elseif(CLR_CMAKE_PLATFORM_ARCH_ARM) codegenarm.cpp unwindarm.cpp ) +elseif(CLR_CMAKE_PLATFORM_ARCH_ARM64) + set( ARCH_SOURCES + codegenarm64.cpp + emitarm64.cpp + lowerarm64.cpp + targetarm64.cpp + unwindarm.cpp + unwindarm64.cpp + ) else() - message(FATAL_ERROR "Only ARM and AMD64 is supported") + message(FATAL_ERROR "Only ARM, ARM64 and AMD64 is supported") endif() set( SOURCES diff --git a/src/jit/codegenarm64.cpp b/src/jit/codegenarm64.cpp index 1166f793c7..93ad2a3bdf 100644 --- a/src/jit/codegenarm64.cpp +++ b/src/jit/codegenarm64.cpp @@ -1424,7 +1424,7 @@ void CodeGen::genCodeForBBlist() { VarSetOps::AddElemD(compiler, removedGCVars, varIndex); } -#endif DEBUG +#endif // DEBUG VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varIndex); } else if (compiler->lvaIsGCTracked(varDsc)) @@ -1434,7 +1434,7 @@ void CodeGen::genCodeForBBlist() { VarSetOps::AddElemD(compiler, addedGCVars, varIndex); } -#endif DEBUG +#endif // DEBUG VarSetOps::AddElemD(compiler, gcInfo.gcVarPtrSetCur, varIndex); } } @@ -3341,6 +3341,8 @@ CodeGen::genLclHeap(GenTreePtr tree) var_types type = genActualType(size->gtType); emitAttr easz = emitTypeSize(type); BasicBlock* endLabel = nullptr; + BasicBlock* loop = nullptr; + unsigned stackAdjustment = 0; #ifdef DEBUG // Verify ESP @@ -3417,7 +3419,7 @@ CodeGen::genLclHeap(GenTreePtr tree) inst_RV_IV(INS_AND, regCnt, ~(STACK_ALIGN - 1), emitActualTypeSize(type)); } - unsigned stackAdjustment = 0; + stackAdjustment = 0; #if FEATURE_EH_FUNCLETS // If we have PSPsym, then need to re-locate it after localloc. if (hasPspSym) @@ -3507,7 +3509,7 @@ CodeGen::genLclHeap(GenTreePtr tree) genSetRegToIcon(regCnt, amount, ((int)amount == amount)? TYP_INT : TYP_LONG); } - BasicBlock* loop = genCreateTempLabel(); + loop = genCreateTempLabel(); if (compiler->info.compInitMem) { // At this point 'regCnt' is set to the total number of bytes to locAlloc. diff --git a/src/jit/emitarm64.cpp b/src/jit/emitarm64.cpp index 3ae8c4ce35..06e4e73d07 100644 --- a/src/jit/emitarm64.cpp +++ b/src/jit/emitarm64.cpp @@ -2864,6 +2864,9 @@ emitter::code_t emitter::emitInsCode(instruction ins, insFormat fmt) case EA_8BYTE: result = INS_OPTS_1D; break; + default: + // TODO-Cleanup: add unreached() here + break; } } else if (datasize == EA_16BYTE) @@ -2882,6 +2885,9 @@ emitter::code_t emitter::emitInsCode(instruction ins, insFormat fmt) case EA_8BYTE: result = INS_OPTS_2D; break; + default: + // TODO-Cleanup: add unreached() here + break; } } return result; @@ -3095,6 +3101,9 @@ emitter::code_t emitter::emitInsCode(instruction ins, insFormat fmt) case EA_8BYTE: result = (index < 1); break; + default: + // TODO-Cleanup: add unreached() here + break; } } else if (datasize == EA_16BYTE) @@ -3113,6 +3122,9 @@ emitter::code_t emitter::emitInsCode(instruction ins, insFormat fmt) case EA_8BYTE: result = (index < 2); break; + default: + // TODO-Cleanup: add unreached() here + break; } } } @@ -3163,6 +3175,9 @@ void emitter::emitIns_I(instruction ins, assert(!"Instruction cannot be encoded: IF_SI_0A"); } break; + default: + // TODO-Cleanup: add unreached() here + break; } assert(fmt != IF_NONE); @@ -3205,6 +3220,10 @@ void emitter::emitIns_R(instruction ins, fmt = IF_BR_1A; break; + default: + // TODO-Cleanup: add unreached() here + break; + } assert(fmt != IF_NONE); @@ -3463,6 +3482,10 @@ void emitter::emitIns_R_I(instruction ins, } break; + default: + // TODO-Cleanup: add unreached() here + break; + } // end switch (ins) assert(canEncode); @@ -3551,6 +3574,11 @@ void emitter::emitIns_R_F(instruction ins, } } break; + + default: + // TODO-Cleanup: add unreached() here + break; + } // end switch (ins) assert(canEncode); @@ -3988,6 +4016,10 @@ void emitter::emitIns_R_R(instruction ins, fmt = IF_DV_2J; break; + default: + // TODO-Cleanup: add unreached() here + break; + } // end switch (ins) assert(fmt != IF_NONE); @@ -4087,6 +4119,10 @@ void emitter::emitIns_R_I_I(instruction ins, } break; + default: + // TODO-Cleanup: add unreached() here + break; + } // end switch (ins) assert(fmt != IF_NONE); @@ -4527,6 +4563,10 @@ void emitter::emitIns_R_R_I(instruction ins, isLdSt = true; break; + default: + // TODO-Cleanup: add unreached() here + break; + } // end switch (ins) if (isLdSt) @@ -4932,6 +4972,10 @@ void emitter::emitIns_R_R_R(instruction ins, emitIns_R_R_R_I(ins, attr, reg1, reg2, reg3, 0); return; + default: + // TODO-Cleanup: add unreached() here + break; + } // end switch (ins) assert(fmt != IF_NONE); @@ -5099,6 +5143,10 @@ void emitter::emitIns_R_R_R_I(instruction ins, isLdSt = true; break; + default: + // TODO-Cleanup: add unreached() here + break; + } // end switch (ins) if (isLdSt) @@ -5280,6 +5328,10 @@ void emitter::emitIns_R_R_R_Ext(instruction ins, } break; + default: + // TODO-Cleanup: add unreached() here + break; + } // end switch (ins) assert(scale != -1); @@ -5415,6 +5467,10 @@ void emitter::emitIns_R_R_I_I(instruction ins, fmt = IF_DV_2F; break; + default: + // TODO-Cleanup: add unreached() here + break; + } assert(fmt != IF_NONE); @@ -5478,6 +5534,11 @@ void emitter::emitIns_R_R_R_R(instruction ins, case INS_invalid: fmt = IF_NONE; break; + + default: + // TODO-Cleanup: add unreached() here + break; + } assert(fmt != IF_NONE); @@ -5522,6 +5583,10 @@ void emitter::emitIns_R_COND(instruction ins, fmt = IF_DR_1D; break; + default: + // TODO-Cleanup: add unreached() here + break; + } // end switch (ins) assert(fmt != IF_NONE); @@ -5566,6 +5631,9 @@ void emitter::emitIns_R_R_COND(instruction ins, cfi.cond = cond; fmt = IF_DR_2D; break; + default: + // TODO-Cleanup: add unreached() here + break; } // end switch (ins) @@ -5616,6 +5684,10 @@ void emitter::emitIns_R_R_R_COND(instruction ins, fmt = IF_DR_3D; break; + default: + // TODO-Cleanup: add unreached() here + break; + } // end switch (ins) assert(fmt != IF_NONE); @@ -5664,7 +5736,9 @@ void emitter::emitIns_R_R_FLAGS_COND (instruction ins, cfi.cond = cond; fmt = IF_DR_2I; break; - + default: + // TODO-Cleanup: add unreached() here + break; } // end switch (ins) assert(fmt != IF_NONE); @@ -5723,7 +5797,9 @@ void emitter::emitIns_R_I_FLAGS_COND (instruction ins, assert(!"Instruction cannot be encoded: ccmp/ccmn imm5"); } break; - + default: + // TODO-Cleanup: add unreached() here + break; } // end switch (ins) assert(fmt != IF_NONE); @@ -5762,7 +5838,9 @@ void emitter::emitIns_BARR (instruction ins, fmt = IF_SI_0B; imm = (ssize_t) barrier; break; - + default: + // TODO-Cleanup: add unreached() here + break; } // end switch (ins) assert(fmt != IF_NONE); @@ -6295,6 +6373,9 @@ void emitter::emitIns_R_L (instruction ins, case INS_adrp: fmt = IF_DI_1E; break; + default: + // TODO-Cleanup: add unreached() here + break; } assert(fmt == IF_DI_1E); @@ -6406,6 +6487,9 @@ void emitter::emitIns_J(instruction ins, // TODO-ARM64-CQ: fmt = IF_LARGEJMP; /* Assume the jump will be long */ fmt = IF_BI_0B; break; + default: + // TODO-Cleanup: add unreached() here + break; } assert((fmt == IF_BI_0A) || (fmt == IF_BI_0B)); @@ -8564,6 +8648,9 @@ size_t emitter::emitOutputInstr(insGroup *ig, case EA_8BYTE: cmode = 0xE; // 1110 break; + default: + // TODO-Cleanup: add unreached() here + break; } code = emitInsCode(ins, fmt); @@ -9479,7 +9566,7 @@ void emitter::emitDispAddrRI(regNumber reg, insOpts opt, ssize_t } else { - printf(operStr[1]); + printf("%c", operStr[1]); } emitDispImm(imm, false); printf("]"); diff --git a/src/jit/instr.cpp b/src/jit/instr.cpp index 8486175f10..d1cc16f18e 100644 --- a/src/jit/instr.cpp +++ b/src/jit/instr.cpp @@ -3890,7 +3890,7 @@ void CodeGen::instGen_MemoryBarrier() #elif defined (_TARGET_ARM_) getEmitter()->emitIns_I(INS_dmb, EA_4BYTE, 0xf); #elif defined (_TARGET_ARM64_) - NYI_ARM64("instGen_MemoryBarrier"); + getEmitter()->emitIns_BARR(INS_dmb, INS_BARRIER_ST); #else #error "Unknown _TARGET_" #endif diff --git a/src/jit/registerarm64.h b/src/jit/registerarm64.h index 70e6ead666..62d41fbe5e 100644 --- a/src/jit/registerarm64.h +++ b/src/jit/registerarm64.h @@ -12,7 +12,7 @@ #define REGALIAS(alias, realname) #endif -#define RMASK(x) (1LL << (x)) +#define RMASK(x) (1ULL << (x)) /* REGDEF(name, rnum, mask, xname, wname) */ @@ -57,7 +57,7 @@ REGALIAS(R29, FP) REGALIAS(R30, LR) #define VBASE 32 -#define VMASK(x) (1LL << (VBASE+(x))) +#define VMASK(x) (1ULL << (VBASE+(x))) /* REGDEF(name, rnum, mask, xname, wname) */ diff --git a/src/pal/inc/pal.h b/src/pal/inc/pal.h index a942ed4f63..9c310a5644 100644 --- a/src/pal/inc/pal.h +++ b/src/pal/inc/pal.h @@ -94,8 +94,10 @@ extern "C" { #define _M_IA64 64100 #elif defined(__x86_64__) && !defined(_M_AMD64) #define _M_AMD64 100 -#elif defined(__ARM_ARCH) && !defined(_M_ARM) +#elif defined(__arm__) && !defined(_M_ARM) #define _M_ARM 7 +#elif defined(__aarch64__) && !defined(_M_ARM64) +#define _M_ARM64 1 #endif #if defined(_M_IX86) && !defined(_X86_) @@ -118,6 +120,8 @@ extern "C" { #define _AMD64_ #elif defined(_M_ARM) && !defined(_ARM_) #define _ARM_ +#elif defined(_M_ARM64) && !defined(_ARM64_) +#define _ARM64_ #endif #endif // !_MSC_VER @@ -3133,6 +3137,184 @@ typedef struct _IMAGE_ARM_RUNTIME_FUNCTION_ENTRY { }; } IMAGE_ARM_RUNTIME_FUNCTION_ENTRY, * PIMAGE_ARM_RUNTIME_FUNCTION_ENTRY; +#elif defined(_ARM64_) + +#define CONTEXT_ARM64 0x00400000L + +#define CONTEXT_CONTROL (CONTEXT_ARM64 | 0x1L) +#define CONTEXT_INTEGER (CONTEXT_ARM64 | 0x2L) +#define CONTEXT_FLOATING_POINT (CONTEXT_ARM64 | 0x4L) +#define CONTEXT_DEBUG_REGISTERS (CONTEXT_ARM64 | 0x8L) + +#define CONTEXT_FULL (CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_FLOATING_POINT) + +#define CONTEXT_ALL (CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_FLOATING_POINT | CONTEXT_DEBUG_REGISTERS) + +#define CONTEXT_EXCEPTION_ACTIVE 0x8000000L +#define CONTEXT_SERVICE_ACTIVE 0x10000000L +#define CONTEXT_EXCEPTION_REQUEST 0x40000000L +#define CONTEXT_EXCEPTION_REPORTING 0x80000000L + +// +// This flag is set by the unwinder if it has unwound to a call +// site, and cleared whenever it unwinds through a trap frame. +// It is used by language-specific exception handlers to help +// differentiate exception scopes during dispatching. +// + +#define CONTEXT_UNWOUND_TO_CALL 0x20000000 + +// +// Define initial Cpsr/Fpscr value +// + +#define INITIAL_CPSR 0x10 +#define INITIAL_FPSCR 0 + +// begin_ntoshvp + +// +// Specify the number of breakpoints and watchpoints that the OS +// will track. Architecturally, ARM64 supports up to 16. In practice, +// however, almost no one implements more than 4 of each. +// + +#define ARM64_MAX_BREAKPOINTS 8 +#define ARM64_MAX_WATCHPOINTS 2 + +// +// Context Frame +// +// This frame has a several purposes: 1) it is used as an argument to +// NtContinue, 2) it is used to constuct a call frame for APC delivery, +// and 3) it is used in the user level thread creation routines. +// +// +// The flags field within this record controls the contents of a CONTEXT +// record. +// +// If the context record is used as an input parameter, then for each +// portion of the context record controlled by a flag whose value is +// set, it is assumed that that portion of the context record contains +// valid context. If the context record is being used to modify a threads +// context, then only that portion of the threads context is modified. +// +// If the context record is used as an output parameter to capture the +// context of a thread, then only those portions of the thread's context +// corresponding to set flags will be returned. +// +// CONTEXT_CONTROL specifies Sp, Lr, Pc, and Cpsr +// +// CONTEXT_INTEGER specifies R0-R12 +// +// CONTEXT_FLOATING_POINT specifies Q0-Q15 / D0-D31 / S0-S31 +// +// CONTEXT_DEBUG_REGISTERS specifies up to 16 of DBGBVR, DBGBCR, DBGWVR, +// DBGWCR. +// + +typedef struct _NEON128 { + ULONGLONG Low; + LONGLONG High; +} NEON128, *PNEON128; + +typedef struct DECLSPEC_ALIGN(16) _CONTEXT { + + // + // Control flags. + // + + /* +0x000 */ DWORD ContextFlags; + + // + // Integer registers + // + + /* +0x004 */ DWORD Cpsr; // NZVF + DAIF + CurrentEL + SPSel + /* +0x008 */ DWORD64 X0; + DWORD64 X1; + DWORD64 X2; + DWORD64 X3; + DWORD64 X4; + DWORD64 X5; + DWORD64 X6; + DWORD64 X7; + DWORD64 X8; + DWORD64 X9; + DWORD64 X10; + DWORD64 X11; + DWORD64 X12; + DWORD64 X13; + DWORD64 X14; + DWORD64 X15; + DWORD64 X16; + DWORD64 X17; + DWORD64 X18; + DWORD64 X19; + DWORD64 X20; + DWORD64 X21; + DWORD64 X22; + DWORD64 X23; + DWORD64 X24; + DWORD64 X25; + DWORD64 X26; + DWORD64 X27; + DWORD64 X28; + /* +0x0f0 */ DWORD64 Fp; + /* +0x0f8 */ DWORD64 Lr; + /* +0x100 */ DWORD64 Sp; + /* +0x108 */ DWORD64 Pc; + + // + // Floating Point/NEON Registers + // + + /* +0x110 */ NEON128 V[32]; + /* +0x310 */ DWORD Fpcr; + /* +0x314 */ DWORD Fpsr; + + // + // Debug registers + // + + /* +0x318 */ DWORD Bcr[ARM64_MAX_BREAKPOINTS]; + /* +0x338 */ DWORD64 Bvr[ARM64_MAX_BREAKPOINTS]; + /* +0x378 */ DWORD Wcr[ARM64_MAX_WATCHPOINTS]; + /* +0x380 */ DWORD64 Wvr[ARM64_MAX_WATCHPOINTS]; + /* +0x390 */ + +} CONTEXT, *PCONTEXT, *LPCONTEXT; + +// +// Nonvolatile context pointer record. +// + +typedef struct _KNONVOLATILE_CONTEXT_POINTERS { + + PDWORD64 X19; + PDWORD64 X20; + PDWORD64 X21; + PDWORD64 X22; + PDWORD64 X23; + PDWORD64 X24; + PDWORD64 X25; + PDWORD64 X26; + PDWORD64 X27; + PDWORD64 X28; + PDWORD64 Fp; + PDWORD64 Lr; + + PDWORD64 D8; + PDWORD64 D9; + PDWORD64 D10; + PDWORD64 D11; + PDWORD64 D12; + PDWORD64 D13; + PDWORD64 D14; + PDWORD64 D15; + +} KNONVOLATILE_CONTEXT_POINTERS, *PKNONVOLATILE_CONTEXT_POINTERS; + #else #error Unknown architecture for defining CONTEXT. #endif @@ -3259,6 +3441,8 @@ PAL_GetLogicalProcessorCacheSizeFromOS(); #define PAL_CS_NATIVE_DATA_SIZE 96 #elif defined(__LINUX__) && defined(_ARM_) #define PAL_CS_NATIVE_DATA_SIZE 80 +#elif defined(__LINUX__) && defined(_ARM64_) +#define PAL_CS_NATIVE_DATA_SIZE 116 #else #warning #error PAL_CS_NATIVE_DATA_SIZE is not defined for this architecture diff --git a/src/pal/inc/rt/ntimage.h b/src/pal/inc/rt/ntimage.h index 77a19a9b9f..d6c273f794 100644 --- a/src/pal/inc/rt/ntimage.h +++ b/src/pal/inc/rt/ntimage.h @@ -940,6 +940,25 @@ typedef IMAGE_RELOCATION UNALIGNED *PIMAGE_RELOCATION; #define IMAGE_REL_ARM_SECTION 0x000E // Section table index #define IMAGE_REL_ARM_SECREL 0x000F // Offset within section +// +// ARM64 relocation types +// +#define IMAGE_REL_ARM64_ABSOLUTE 0x0000 +#define IMAGE_REL_ARM64_ADDR32 0x0001 +#define IMAGE_REL_ARM64_ADDR32NB 0x0002 +#define IMAGE_REL_ARM64_BRANCH26 0x0003 +#define IMAGE_REL_ARM64_PAGEBASE_REL21 0x0004 +#define IMAGE_REL_ARM64_REL21 0x0005 +#define IMAGE_REL_ARM64_PAGEOFFSET_12A 0x0006 +#define IMAGE_REL_ARM64_PAGEOFFSET_12L 0x0007 +#define IMAGE_REL_ARM64_SECREL 0x0008 +#define IMAGE_REL_ARM64_SECREL_LOW12A 0x0009 +#define IMAGE_REL_ARM64_SECREL_HIGH12A 0x000A +#define IMAGE_REL_ARM64_SECREL_LOW12L 0x000B +#define IMAGE_REL_ARM64_TOKEN 0x000C +#define IMAGE_REL_ARM64_SECTION 0x000D +#define IMAGE_REL_ARM64_ADDR64 0x000E + #define IMAGE_REL_AM_ABSOLUTE 0x0000 #define IMAGE_REL_AM_ADDR32 0x0001 #define IMAGE_REL_AM_ADDR32NB 0x0002 diff --git a/src/pal/inc/rt/palrt.h b/src/pal/inc/rt/palrt.h index 994aa6c0fa..cee51734fa 100644 --- a/src/pal/inc/rt/palrt.h +++ b/src/pal/inc/rt/palrt.h @@ -1387,6 +1387,7 @@ typedef VOID (__stdcall *WAITORTIMERCALLBACK)(PVOID, BOOLEAN); #define PROCESSOR_ARCHITECTURE_AMD64 9 #define PROCESSOR_ARCHITECTURE_IA32_ON_WIN64 10 #define PROCESSOR_ARCHITECTURE_NEUTRAL 11 +#define PROCESSOR_ARCHITECTURE_ARM64 12 #define PROCESSOR_ARCHITECTURE_UNKNOWN 0xFFFF @@ -1636,6 +1637,24 @@ typedef struct _DISPATCHER_CONTEXT { DWORD Reserved; } DISPATCHER_CONTEXT, *PDISPATCHER_CONTEXT; +#elif defined(_ARM64_) + +typedef struct _DISPATCHER_CONTEXT { + ULONG64 ControlPc; + ULONG64 ImageBase; + PRUNTIME_FUNCTION FunctionEntry; + ULONG64 EstablisherFrame; + ULONG64 TargetPc; + PCONTEXT ContextRecord; + PEXCEPTION_ROUTINE LanguageHandler; + PVOID HandlerData; + PUNWIND_HISTORY_TABLE HistoryTable; + ULONG64 ScopeIndex; + BOOLEAN ControlPcIsUnwound; + PBYTE NonVolatileRegisters; + ULONG64 Reserved; +} DISPATCHER_CONTEXT, *PDISPATCHER_CONTEXT; + #else typedef struct _DISPATCHER_CONTEXT { diff --git a/src/pal/inc/unixasmmacros.inc b/src/pal/inc/unixasmmacros.inc index f996f9505e..4ccb17dd0a 100644 --- a/src/pal/inc/unixasmmacros.inc +++ b/src/pal/inc/unixasmmacros.inc @@ -60,4 +60,6 @@ #include "unixasmmacrosamd64.inc" #elif defined(_ARM_) #include "unixasmmacrosarm.inc" +#elif defined(_ARM64_) +#include "unixasmmacrosarm64.inc" #endif diff --git a/src/pal/inc/unixasmmacrosarm.inc b/src/pal/inc/unixasmmacrosarm.inc index 40daa7a71c..6418bdcb5d 100644 --- a/src/pal/inc/unixasmmacrosarm.inc +++ b/src/pal/inc/unixasmmacrosarm.inc @@ -222,4 +222,4 @@ C_FUNC(\Name\()_End): EMIT_BREAKPOINT 0: #endif -.endm
\ No newline at end of file +.endm diff --git a/src/pal/inc/unixasmmacrosarm64.inc b/src/pal/inc/unixasmmacrosarm64.inc new file mode 100644 index 0000000000..62df30614c --- /dev/null +++ b/src/pal/inc/unixasmmacrosarm64.inc @@ -0,0 +1,270 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. +// + +.macro PATCH_LABEL Name + .global C_FUNC(\Name) +C_FUNC(\Name): +.endm + +.macro LEAF_ENTRY Name, Section + .global C_FUNC(\Name) + .type \Name, %function +C_FUNC(\Name): + .cfi_startproc +.endm + +.macro LEAF_END_MARKED Name, Section + .global C_FUNC(\Name\()_End) +C_FUNC(\Name\()_End): + .size \Name, .-\Name + .cfi_endproc +.endm + +.macro PREPARE_EXTERNAL_VAR Name, HelperReg + ldr \HelperReg, [pc, #C_FUNC(\Name)@GOTPCREL] +.endm + +.macro PROLOG_STACK_ALLOC Size + sub sp, sp, \Size + .cfi_adjust_cfa_offset \Size +.endm + +.macro EPILOG_STACK_FREE Size + add sp, sp, \Size +.endm + +.macro EPILOG_STACK_RESTORE + mov sp, fp +.endm + +.macro PROLOG_SAVE_REG reg, ofs + str \reg, [sp, \ofs] +.endm + +.macro PROLOG_SAVE_REG_PAIR reg1, reg2, ofs + stp \reg1, \reg2, [sp, \ofs] + .ifc \reg1, fp + mov fp, sp + .endif +.endm + +.macro PROLOG_SAVE_REG_PAIR_INDEXED reg1, reg2, ofs + stp \reg1, \reg2, [sp, \ofs]! + .ifc \reg1, fp + mov fp, sp + .endif +.endm + +.macro EPILOG_RESTORE_REG reg, ofs + ldr \reg, [sp, \ofs] +.endm + +.macro EPILOG_RESTORE_REG_PAIR reg1, reg2, ofs + ldp \reg1, \reg2, [sp, \ofs] +.endm + +.macro EPILOG_RESTORE_REG_PAIR_INDEXED reg1, reg2, ofs + ldp \reg1, \reg2, [sp], \ofs +.endm + +.macro EPILOG_RETURN + ret +.endm + +.macro EMIT_BREAKPOINT + brk #0 +.endm + +//----------------------------------------------------------------------------- +// Define the prolog for a TransitionFrame-based method. This macro should be called first in the method and +// comprises the entire prolog (i.e. don't modify SP after calling this).The locals must be 8 byte aligned +// +.macro PROLOG_WITH_TRANSITION_BLOCK extraLocals, SaveFPArgs + + __PWTB_FloatArgumentRegisters = \extraLocals + + .if ((__PWTB_FloatArgumentRegisters % 16) != 0) + __PWTB_FloatArgumentRegisters = __PWTB_FloatArgumentRegisters + 8 + .endif + + __PWTB_TransitionBlock = __PWTB_FloatArgumentRegisters + + .if \SaveFPArgs > 0 + __PWTB_TransitionBlock = __PWTB_TransitionBlock + SIZEOF__FloatArgumentRegisters + .endif + + __PWTB_StackAlloc = __PWTB_TransitionBlock + __PWTB_ArgumentRegisters = __PWTB_StackAlloc + 96 + + PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, #-160 + // Spill callee saved registers + PROLOG_SAVE_REG_PAIR x19, x20, #16 + PROLOG_SAVE_REG_PAIR x21, x22, #32 + PROLOG_SAVE_REG_PAIR x23, x24, #48 + PROLOG_SAVE_REG_PAIR x25, x26, #64 + PROLOG_SAVE_REG_PAIR x27, x28, #80 + + // Allocate space for the rest of the frame + PROLOG_STACK_ALLOC __PWTB_StackAlloc + + // Spill argument registers. + SAVE_ARGUMENT_REGISTERS sp, __PWTB_ArgumentRegisters + + .if \SaveFPArgs > 0 + SAVE_FLOAT_ARGUMENT_REGISTERS sp, \extraLocals + .endif + +.endm + +//----------------------------------------------------------------------------- +// The Following sets of SAVE_*_REGISTERS expect the memory to be reserved and +// base address to be passed in $reg +// + +// Reserve 64 bytes of memory before calling SAVE_ARGUMENT_REGISTERS +.macro SAVE_ARGUMENT_REGISTERS reg, ofs + + stp x0, x1, [\reg, #(\ofs)] + stp x2, x3, [\reg, #(\ofs + 16)] + stp x4, x5, [\reg, #(\ofs + 32)] + stp x6, x7, [\reg, #(\ofs + 48)] + +.endm + +// Reserve 64 bytes of memory before calling SAVE_FLOAT_ARGUMENT_REGISTERS +.macro SAVE_FLOAT_ARGUMENT_REGISTERS reg, ofs + + stp d0, d1, [\reg, #(\ofs)] + stp d2, d3, [\reg, #(\ofs + 16)] + stp d4, d5, [\reg, #(\ofs + 32)] + stp d6, d7, [\reg, #(\ofs + 48)] + +.endm + +.macro RESTORE_ARGUMENT_REGISTERS reg, ofs + + ldp x0, x1, [\reg, #(\ofs)] + ldp x2, x3, [\reg, #(\ofs + 16)] + ldp x4, x5, [\reg, #(\ofs + 32)] + ldp x6, x7, [\reg, #(\ofs + 48)] + +.endm + +.macro RESTORE_FLOAT_ARGUMENT_REGISTERS reg, ofs + + ldp d0, d1, [\reg, #(\ofs)] + ldp d2, d3, [\reg, #(\ofs + 16)] + ldp d4, d5, [\reg, #(\ofs + 32)] + ldp d6, d7, [\reg, #(\ofs + 48)] + +.endm + +.macro EPILOG_BRANCH_REG reg + + br \reg + +.endm + +//----------------------------------------------------------------------------- +// Provides a matching epilog to PROLOG_WITH_TRANSITION_BLOCK and ends by preparing for tail-calling. +// Since this is a tail call argument registers are restored. +// +.macro EPILOG_WITH_TRANSITION_BLOCK_TAILCALL extraLocals, SaveFPArgs + + __PWTB_FloatArgumentRegisters = \extraLocals + + .if ((__PWTB_FloatArgumentRegisters % 16) != 0) + __PWTB_FloatArgumentRegisters = __PWTB_FloatArgumentRegisters + 8 + .endif + + __PWTB_TransitionBlock = __PWTB_FloatArgumentRegisters + + .if \SaveFPArgs > 0 + __PWTB_TransitionBlock = __PWTB_TransitionBlock + SIZEOF__FloatArgumentRegisters + .endif + + __PWTB_StackAlloc = __PWTB_TransitionBlock + __PWTB_ArgumentRegisters = __PWTB_StackAlloc + 96 + + .if \SaveFPArgs > 0 + RESTORE_FLOAT_ARGUMENT_REGISTERS sp, __PWTB_FloatArgumentRegisters + .endif + + RESTORE_ARGUMENT_REGISTERS sp, __PWTB_ArgumentRegisters + EPILOG_STACK_FREE __PWTB_StackAlloc + + EPILOG_RESTORE_REG_PAIR x19, x20, #16 + EPILOG_RESTORE_REG_PAIR x21, x22, #32 + EPILOG_RESTORE_REG_PAIR x23, x24, #48 + EPILOG_RESTORE_REG_PAIR x25, x26, #64 + EPILOG_RESTORE_REG_PAIR x27, x28, #80 + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, #160 + +.endm + +// ------------------------------------------------------------------ +// Macro to generate Redirection Stubs +// +// $reason : reason for redirection +// Eg. GCThreadControl +// NOTE: If you edit this macro, make sure you update GetCONTEXTFromRedirectedStubStackFrame. +// This function is used by both the personality routine and the debugger to retrieve the original CONTEXT. +.macro GenerateRedirectedHandledJITCaseStub reason + +#if NOTYET + GBLS __RedirectionStubFuncName + GBLS __RedirectionStubEndFuncName + GBLS __RedirectionFuncName +__RedirectionStubFuncName SETS "RedirectedHandledJITCaseFor":CC:"$reason":CC:"_Stub" +__RedirectionStubEndFuncName SETS "RedirectedHandledJITCaseFor":CC:"$reason":CC:"_StubEnd" +__RedirectionFuncName SETS "|?RedirectedHandledJITCaseFor":CC:"$reason":CC:"@Thread@@CAXXZ|" + + IMPORT $__RedirectionFuncName + + NESTED_ENTRY $__RedirectionStubFuncName + PROLOG_SAVE_REG_PAIR fp, lr, #-16 + sub sp, sp, #16 // stack slot for CONTEXT * and padding + + //REDIRECTSTUB_SP_OFFSET_CONTEXT is defined in asmconstants.h and is used in GetCONTEXTFromRedirectedStubStackFrame + //If CONTEXT is not saved at 0 offset from SP it must be changed as well. + ASSERT REDIRECTSTUB_SP_OFFSET_CONTEXT == 0 + + // Stack alignment. This check is necessary as this function can be + // entered before complete execution of the prolog of another function. + and x8, fp, #15 + sub sp, sp, x8 + + + // + // Save a copy of the redirect CONTEXT*. + // This is needed for the debugger to unwind the stack. + // + bl GetCurrentSavedRedirectContext + str x0, [sp] + + // + // Fetch the interrupted pc and save it as our return address. + // + ldr x1, [x0, #CONTEXT_Pc] + str x1, [fp, #8] + + // + // Call target, which will do whatever we needed to do in the context + // of the target thread, and will RtlRestoreContext when it is done. + // + bl $__RedirectionFuncName + + EMIT_BREAKPOINT // Unreachable + +// Put a label here to tell the debugger where the end of this function is. +$__RedirectionStubEndFuncName + EXPORT $__RedirectionStubEndFuncName + + NESTED_END +#else + EMIT_BREAKPOINT +#endif + +.endm diff --git a/src/pal/src/CMakeLists.txt b/src/pal/src/CMakeLists.txt index 07aa78cce4..0709764a3e 100644 --- a/src/pal/src/CMakeLists.txt +++ b/src/pal/src/CMakeLists.txt @@ -30,6 +30,9 @@ if(CMAKE_SYSTEM_PROCESSOR STREQUAL x86_64 OR CMAKE_SYSTEM_PROCESSOR STREQUAL amd elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL armv7l) set(PAL_CMAKE_PLATFORM_ARCH_ARM 1) add_definitions(-D_ARM_) +elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL aarch64) + set(PAL_CMAKE_PLATFORM_ARCH_ARM64 1) + add_definitions(-D_ARM64_) else() message(FATAL_ERROR "Only ARM and AMD64 is supported") endif() @@ -60,6 +63,9 @@ if(PAL_CMAKE_PLATFORM_ARCH_AMD64) elseif(PAL_CMAKE_PLATFORM_ARCH_ARM) add_definitions(-DBIT32=1) add_definitions(-D_WIN32=1) +elseif(PAL_CMAKE_PLATFORM_ARCH_ARM64) + add_definitions(-DBIT64=1) + add_definitions(-D_WIN64=1) endif() @@ -76,6 +82,11 @@ elseif(PAL_CMAKE_PLATFORM_ARCH_ARM) arch/arm/context2.S arch/arm/processor.cpp ) +elseif(PAL_CMAKE_PLATFORM_ARCH_ARM64) + set(ARCH_SOURCES + arch/arm64/context2.S + arch/arm64/processor.cpp + ) endif() set(SOURCES diff --git a/src/pal/src/arch/arm64/context2.S b/src/pal/src/arch/arm64/context2.S new file mode 100644 index 0000000000..3d9cfb94bd --- /dev/null +++ b/src/pal/src/arch/arm64/context2.S @@ -0,0 +1,301 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. +// +// +// Implementation of _CONTEXT_CaptureContext for the ARM platform. +// This function is processor dependent. It is used by exception handling, +// and is always apply to the current thread. +// + +#include "unixasmmacros.inc" + +#define CONTEXT_ARM64 0x00400000L + +#define CONTEXT_CONTROL (CONTEXT_ARM64 | 0x1L) +#define CONTEXT_INTEGER (CONTEXT_ARM64 | 0x2L) +#define CONTEXT_FLOATING_POINT (CONTEXT_ARM64 | 0x4L) +#define CONTEXT_DEBUG_REGISTERS (CONTEXT_ARM64 | 0x8L) + +#define CONTEXT_FULL (CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_FLOATING_POINT) + +#define CONTEXT_ContextFlags 0 +#define CONTEXT_Cpsr CONTEXT_ContextFlags+4 +#define CONTEXT_X0 CONTEXT_Cpsr+4 +#define CONTEXT_X1 CONTEXT_X0+8 +#define CONTEXT_X2 CONTEXT_X1+8 +#define CONTEXT_X3 CONTEXT_X2+8 +#define CONTEXT_X4 CONTEXT_X3+8 +#define CONTEXT_X5 CONTEXT_X4+8 +#define CONTEXT_X6 CONTEXT_X5+8 +#define CONTEXT_X7 CONTEXT_X6+8 +#define CONTEXT_X8 CONTEXT_X7+8 +#define CONTEXT_X9 CONTEXT_X8+8 +#define CONTEXT_X10 CONTEXT_X9+8 +#define CONTEXT_X11 CONTEXT_X10+8 +#define CONTEXT_X12 CONTEXT_X11+8 +#define CONTEXT_X13 CONTEXT_X12+8 +#define CONTEXT_X14 CONTEXT_X13+8 +#define CONTEXT_X15 CONTEXT_X14+8 +#define CONTEXT_X16 CONTEXT_X15+8 +#define CONTEXT_X17 CONTEXT_X16+8 +#define CONTEXT_X18 CONTEXT_X17+8 +#define CONTEXT_X19 CONTEXT_X18+8 +#define CONTEXT_X20 CONTEXT_X19+8 +#define CONTEXT_X21 CONTEXT_X20+8 +#define CONTEXT_X22 CONTEXT_X21+8 +#define CONTEXT_X23 CONTEXT_X22+8 +#define CONTEXT_X24 CONTEXT_X23+8 +#define CONTEXT_X25 CONTEXT_X24+8 +#define CONTEXT_X26 CONTEXT_X25+8 +#define CONTEXT_X27 CONTEXT_X26+8 +#define CONTEXT_X28 CONTEXT_X27+8 +#define CONTEXT_Fp CONTEXT_X28+8 +#define CONTEXT_Lr CONTEXT_Fp+8 +#define CONTEXT_Sp CONTEXT_Lr+8 +#define CONTEXT_Pc CONTEXT_Sp+8 +#define CONTEXT_NEON_OFFSET CONTEXT_Pc+8 +#define CONTEXT_V0 0 +#define CONTEXT_V1 CONTEXT_V0+16 +#define CONTEXT_V2 CONTEXT_V1+16 +#define CONTEXT_V3 CONTEXT_V2+16 +#define CONTEXT_V4 CONTEXT_V3+16 +#define CONTEXT_V5 CONTEXT_V4+16 +#define CONTEXT_V6 CONTEXT_V5+16 +#define CONTEXT_V7 CONTEXT_V6+16 +#define CONTEXT_V8 CONTEXT_V7+16 +#define CONTEXT_V9 CONTEXT_V8+16 +#define CONTEXT_V10 CONTEXT_V9+16 +#define CONTEXT_V11 CONTEXT_V10+16 +#define CONTEXT_V12 CONTEXT_V11+16 +#define CONTEXT_V13 CONTEXT_V12+16 +#define CONTEXT_V14 CONTEXT_V13+16 +#define CONTEXT_V15 CONTEXT_V14+16 +#define CONTEXT_V16 CONTEXT_V15+16 +#define CONTEXT_V17 CONTEXT_V16+16 +#define CONTEXT_V18 CONTEXT_V17+16 +#define CONTEXT_V19 CONTEXT_V18+16 +#define CONTEXT_V20 CONTEXT_V19+16 +#define CONTEXT_V21 CONTEXT_V20+16 +#define CONTEXT_V22 CONTEXT_V21+16 +#define CONTEXT_V23 CONTEXT_V22+16 +#define CONTEXT_V24 CONTEXT_V23+16 +#define CONTEXT_V25 CONTEXT_V24+16 +#define CONTEXT_V26 CONTEXT_V25+16 +#define CONTEXT_V27 CONTEXT_V26+16 +#define CONTEXT_V28 CONTEXT_V27+16 +#define CONTEXT_V29 CONTEXT_V28+16 +#define CONTEXT_V30 CONTEXT_V29+16 +#define CONTEXT_V31 CONTEXT_V30+16 +#define CONTEXT_FLOAT_CONTROL_OFFSET CONTEXT_V31 +#define CONTEXT_Fpcr 0 +#define CONTEXT_Fpsr CONTEXT_Fpcr+4 + +// Incoming: +// x0: Context* +// +LEAF_ENTRY CONTEXT_CaptureContext, _TEXT + sub sp, sp, #32 + // save x1, x2 and x3 on stack so we can use them as scratch + stp x1, x2, [sp] + str x3, [sp, 16] + // save the current flags on the stack + mrs x1, nzcv + str x1, [sp, 24] + + ldr w1, [x0, CONTEXT_ContextFlags] + // clangs assembler doesn't seem to support the mov Wx, imm32 yet + movz w2, #0x40, lsl #16 + movk w2, #0x1 + mov w3, w2 + and w2, w1, w2 + cmp w2, w3 + b.ne LOCAL_LABEL(Done_CONTEXT_CONTROL) + + // save the cpsr + ldr x2, [sp, 24] + str w2, [x0, CONTEXT_Cpsr] + stp fp, lr, [x0, CONTEXT_Fp] + add sp, sp, #32 + mov x2, sp + stp x2, lr, [x0, CONTEXT_Sp] + sub sp, sp, #32 + +LOCAL_LABEL(Done_CONTEXT_CONTROL): + // we dont clobber x1 in the CONTEXT_CONTROL case + // ldr w1, [x0, CONTEXT_ContextFlags] + // clangs assembler doesn't seem to support the mov Wx, imm32 yet + movz w2, #0x40, lsl #16 + movk w2, #0x2 + mov w3, w2 + and w2, w1, w2 + cmp w2, w3 + b.ne LOCAL_LABEL(Done_CONTEXT_INTEGER) + + ldp x1, x2, [sp] + ldr x3, [sp, 16] + + stp x0, x1, [x0, CONTEXT_X0] + stp x2, x3, [x0, CONTEXT_X2] + stp x4, x5, [x0, CONTEXT_X4] + stp x6, x7, [x0, CONTEXT_X6] + stp x8, x9, [x0, CONTEXT_X8] + stp x10, x11, [x0, CONTEXT_X10] + stp x12, x13, [x0, CONTEXT_X12] + stp x14, x15, [x0, CONTEXT_X14] + stp x16, x17, [x0, CONTEXT_X16] + stp x18, x19, [x0, CONTEXT_X18] + stp x20, x21, [x0, CONTEXT_X20] + stp x22, x23, [x0, CONTEXT_X22] + stp x24, x25, [x0, CONTEXT_X24] + stp x26, x27, [x0, CONTEXT_X26] + str x28, [x0, CONTEXT_X28] + + +LOCAL_LABEL(Done_CONTEXT_INTEGER): + ldr w1, [x0, CONTEXT_ContextFlags] + // clangs assembler doesn't seem to support the mov Wx, imm32 yet + movz w2, #0x40, lsl #16 + movk w2, #0x4 + mov w3, w2 + and w2, w1, w2 + cmp w2, w3 + b.ne LOCAL_LABEL(Done_CONTEXT_FLOATING_POINT) + + add x0, x0, CONTEXT_NEON_OFFSET + stp q0, q1, [x0, CONTEXT_V0] + stp q2, q3, [x0, CONTEXT_V2] + stp q4, q5, [x0, CONTEXT_V4] + stp q6, q7, [x0, CONTEXT_V6] + stp q8, q9, [x0, CONTEXT_V8] + stp q10, q11, [x0, CONTEXT_V10] + stp q12, q13, [x0, CONTEXT_V12] + stp q14, q15, [x0, CONTEXT_V14] + stp q16, q17, [x0, CONTEXT_V16] + stp q18, q19, [x0, CONTEXT_V18] + stp q20, q21, [x0, CONTEXT_V20] + stp q22, q23, [x0, CONTEXT_V22] + stp q24, q25, [x0, CONTEXT_V24] + stp q26, q27, [x0, CONTEXT_V26] + stp q28, q29, [x0, CONTEXT_V28] + stp q30, q31, [x0, CONTEXT_V30] + add x0, x0, CONTEXT_FLOAT_CONTROL_OFFSET + mrs x1, fpcr + mrs x2, fpsr + sub x0, x0, CONTEXT_FLOAT_CONTROL_OFFSET + stp x1, x2, [x0, CONTEXT_Fpcr] + sub x0, x0, CONTEXT_NEON_OFFSET + +LOCAL_LABEL(Done_CONTEXT_FLOATING_POINT): + + add sp, sp, #32 + ret +LEAF_END CONTEXT_CaptureContext, _TEXT + +// Incoming: +// x0: Context* + +LEAF_ENTRY RtlCaptureContext, _TEXT + sub sp, sp, #16 + str x1, [sp] + // same as above, clang doesn't like mov with #imm32 + // keep this in sync if CONTEXT_FULL changes + movz w1, #0x40, lsl #16 + orr w1, w1, #0x1 + orr w1, w1, #0x2 + orr w1, w1, #0x4 + orr w1, w1, #0x8 + str w1, [x0, CONTEXT_ContextFlags] + ldr x1, [sp] + add sp, sp, #16 + b C_FUNC(CONTEXT_CaptureContext) +LEAF_END RtlCaptureContext, _TEXT + +// Incoming: +// x0: Context* +// x1: Exception* +// +LEAF_ENTRY RtlRestoreContext, _TEXT + // aarch64 specifies: + // IP0 and IP1, the Intra-Procedure Call temporary registers, + // are available for use by e.g. veneers or branch islands during a procedure call. + // They are otherwise corruptible. + // Since we cannot control $pc directly, we're going to corrupt x16 and x17 + // so that we can restore control + // since we potentially clobber x0 below, we'll bank it in x16 + mov x16, x0 + + ldr w2, [x16, CONTEXT_ContextFlags] + // clangs assembler doesn't seem to support the mov Wx, imm32 yet + movz w3, #0x40, lsl #16 + movk w3, #0x4 + mov w4, w3 + and w3, w2, w3 + cmp w3, w4 + b.ne LOCAL_LABEL(No_Restore_CONTEXT_FLOATING_POINT) + + add x16, x16, CONTEXT_NEON_OFFSET + ldp q0, q1, [x16, CONTEXT_V0] + ldp q2, q3, [x16, CONTEXT_V2] + ldp q4, q5, [x16, CONTEXT_V4] + ldp q6, q7, [x16, CONTEXT_V6] + ldp q8, q9, [x16, CONTEXT_V8] + ldp q10, q11, [x16, CONTEXT_V10] + ldp q12, q13, [x16, CONTEXT_V12] + ldp q14, q15, [x16, CONTEXT_V14] + ldp q16, q17, [x16, CONTEXT_V16] + ldp q18, q19, [x16, CONTEXT_V18] + ldp q20, q21, [x16, CONTEXT_V20] + ldp q22, q23, [x16, CONTEXT_V22] + ldp q24, q25, [x16, CONTEXT_V24] + ldp q26, q27, [x16, CONTEXT_V26] + ldp q28, q29, [x16, CONTEXT_V28] + ldp q30, q31, [x16, CONTEXT_V30] + ldp x1, x2, [x16, CONTEXT_Fpcr] + msr fpcr, x1 + msr fpsr, x2 + sub x16, x16, CONTEXT_NEON_OFFSET + +LOCAL_LABEL(No_Restore_CONTEXT_FLOATING_POINT): + movz w2, #0x40, lsl #16 + movk w2, #0x2 + mov w3, w2 + and w2, w1, w2 + cmp w2, w3 + b.ne LOCAL_LABEL(No_Restore_CONTEXT_INTEGER) + + ldp x0, x1, [x16, CONTEXT_X0] + ldp x2, x3, [x16, CONTEXT_X2] + ldp x4, x5, [x16, CONTEXT_X4] + ldp x6, x7, [x16, CONTEXT_X6] + ldp x8, x9, [x16, CONTEXT_X8] + ldp x10, x11, [x16, CONTEXT_X10] + ldp x12, x13, [x16, CONTEXT_X12] + ldp x14, x15, [x16, CONTEXT_X14] + ldp x18, x19, [x16, CONTEXT_X18] + ldp x20, x21, [x16, CONTEXT_X20] + ldp x22, x23, [x16, CONTEXT_X22] + ldp x24, x25, [x16, CONTEXT_X24] + ldp x26, x27, [x16, CONTEXT_X26] + ldr x28, [x16, CONTEXT_X28] + +LOCAL_LABEL(No_Restore_CONTEXT_INTEGER): + movz w2, #0x40, lsl #16 + movk w2, #0x2 + mov w3, w2 + and w2, w1, w2 + cmp w2, w3 + b.ne LOCAL_LABEL(No_Restore_CONTEXT_CONTROL) + + ldr w17, [x16, CONTEXT_Cpsr] + msr nzcv, x17 + ldp fp, lr, [x16, CONTEXT_Fp] + ldr x17, [x16, CONTEXT_Sp] + mov sp, x17 + ldr x17, [x16, CONTEXT_Pc] + br x17 + +LOCAL_LABEL(No_Restore_CONTEXT_CONTROL): + ret + +LEAF_END RtlRestoreContext, _TEXT diff --git a/src/pal/src/arch/arm64/processor.cpp b/src/pal/src/arch/arm64/processor.cpp new file mode 100644 index 0000000000..3f37374f91 --- /dev/null +++ b/src/pal/src/arch/arm64/processor.cpp @@ -0,0 +1,42 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. +// + +/*++ + + + +Module Name: + + processor.cpp + +Abstract: + + Implementation of processor related functions for the ARM64 + platform. These functions are processor dependent. + + + +--*/ + +#include "pal/palinternal.h" + +/*++ +Function: +YieldProcessor + +The YieldProcessor function signals to the processor to give resources +to threads that are waiting for them. This macro is only effective on +processors that support technology allowing multiple threads running +on a single processor, such as Intel's Hyper-Threading technology. + +--*/ +void +PALAPI +YieldProcessor( + VOID) +{ + return; +} + diff --git a/src/pal/src/exception/seh-unwind.cpp b/src/pal/src/exception/seh-unwind.cpp index 9dd025c51d..aee921dd06 100644 --- a/src/pal/src/exception/seh-unwind.cpp +++ b/src/pal/src/exception/seh-unwind.cpp @@ -50,6 +50,22 @@ Abstract: ASSIGN_REG(R13) \ ASSIGN_REG(R14) \ ASSIGN_REG(R15) +#elif defined(_ARM64_) +#define ASSIGN_UNWIND_REGS \ + ASSIGN_REG(Pc) \ + ASSIGN_REG(Sp) \ + ASSIGN_REG(Fp) \ + ASSIGN_REG(Lr) \ + ASSIGN_REG(X19) \ + ASSIGN_REG(X20) \ + ASSIGN_REG(X21) \ + ASSIGN_REG(X22) \ + ASSIGN_REG(X23) \ + ASSIGN_REG(X24) \ + ASSIGN_REG(X25) \ + ASSIGN_REG(X26) \ + ASSIGN_REG(X27) \ + ASSIGN_REG(X28) #else #error unsupported architecture #endif @@ -111,6 +127,21 @@ static void UnwindContextToWinContext(unw_cursor_t *cursor, CONTEXT *winContext) unw_get_reg(cursor, UNW_ARM_R9, (unw_word_t *) &winContext->R9); unw_get_reg(cursor, UNW_ARM_R10, (unw_word_t *) &winContext->R10); unw_get_reg(cursor, UNW_ARM_R11, (unw_word_t *) &winContext->R11); +#elif defined(_ARM64_) + unw_get_reg(cursor, UNW_REG_IP, (unw_word_t *) &winContext->Pc); + unw_get_reg(cursor, UNW_REG_SP, (unw_word_t *) &winContext->Sp); + unw_get_reg(cursor, UNW_AARCH64_X29, (unw_word_t *) &winContext->Fp); + unw_get_reg(cursor, UNW_AARCH64_X30, (unw_word_t *) &winContext->Lr); + unw_get_reg(cursor, UNW_AARCH64_X19, (unw_word_t *) &winContext->X19); + unw_get_reg(cursor, UNW_AARCH64_X20, (unw_word_t *) &winContext->X20); + unw_get_reg(cursor, UNW_AARCH64_X21, (unw_word_t *) &winContext->X21); + unw_get_reg(cursor, UNW_AARCH64_X22, (unw_word_t *) &winContext->X22); + unw_get_reg(cursor, UNW_AARCH64_X23, (unw_word_t *) &winContext->X23); + unw_get_reg(cursor, UNW_AARCH64_X24, (unw_word_t *) &winContext->X24); + unw_get_reg(cursor, UNW_AARCH64_X25, (unw_word_t *) &winContext->X25); + unw_get_reg(cursor, UNW_AARCH64_X26, (unw_word_t *) &winContext->X26); + unw_get_reg(cursor, UNW_AARCH64_X27, (unw_word_t *) &winContext->X27); + unw_get_reg(cursor, UNW_AARCH64_X28, (unw_word_t *) &winContext->X28); #else #error unsupported architecture #endif @@ -152,17 +183,50 @@ static void GetContextPointers(unw_cursor_t *cursor, unw_context_t *unwContext, GetContextPointer(cursor, unwContext, UNW_ARM_R9, &contextPointers->R9); GetContextPointer(cursor, unwContext, UNW_ARM_R10, &contextPointers->R10); GetContextPointer(cursor, unwContext, UNW_ARM_R11, &contextPointers->R11); +#elif defined(_ARM64_) + GetContextPointer(cursor, unwContext, UNW_AARCH64_X19, &contextPointers->X19); + GetContextPointer(cursor, unwContext, UNW_AARCH64_X20, &contextPointers->X20); + GetContextPointer(cursor, unwContext, UNW_AARCH64_X21, &contextPointers->X21); + GetContextPointer(cursor, unwContext, UNW_AARCH64_X22, &contextPointers->X22); + GetContextPointer(cursor, unwContext, UNW_AARCH64_X23, &contextPointers->X23); + GetContextPointer(cursor, unwContext, UNW_AARCH64_X24, &contextPointers->X24); + GetContextPointer(cursor, unwContext, UNW_AARCH64_X25, &contextPointers->X25); + GetContextPointer(cursor, unwContext, UNW_AARCH64_X26, &contextPointers->X26); + GetContextPointer(cursor, unwContext, UNW_AARCH64_X27, &contextPointers->X27); + GetContextPointer(cursor, unwContext, UNW_AARCH64_X28, &contextPointers->X28); #else #error unsupported architecture #endif } +static DWORD64 GetPc(CONTEXT *context) +{ +#if defined(_AMD64_) + return context->Rip; +#elif defined(_ARM64_) + return context->Pc; +#else +#error don't know how to get the program counter for this architecture +#endif +} + +static void SetPc(CONTEXT *context, DWORD64 pc) +{ +#if defined(_AMD64_) + context->Rip = pc; +#elif defined(_ARM64_) + context->Pc = pc; +#else +#error don't know how to set the program counter for this architecture +#endif +} + BOOL PAL_VirtualUnwind(CONTEXT *context, KNONVOLATILE_CONTEXT_POINTERS *contextPointers) { int st; unw_context_t unwContext; unw_cursor_t cursor; -#if defined(__APPLE__) || defined(__FreeBSD__) +#if defined(__APPLE__) || defined(__FreeBSD__) || defined(_ARM64_) DWORD64 curPc; #endif @@ -187,7 +251,7 @@ BOOL PAL_VirtualUnwind(CONTEXT *context, KNONVOLATILE_CONTEXT_POINTERS *contextP WinContextToUnwindCursor(context, &cursor); #endif -#if defined(__APPLE__) || defined(__FreeBSD__) +#if defined(__APPLE__) || defined(__FreeBSD__) || defined(_ARM64_) // OSX and FreeBSD appear to do two different things when unwinding // 1: If it reaches where it cannot unwind anymore, say a // managed frame. It wil return 0, but also update the $pc @@ -196,7 +260,7 @@ BOOL PAL_VirtualUnwind(CONTEXT *context, KNONVOLATILE_CONTEXT_POINTERS *contextP // The behaviour of libunwind from nongnu.org is to null the PC // So we bank the original PC here, so we can compare it after // the step - curPc = context->Rip; + curPc = GetPc(context); #endif st = unw_step(&cursor); @@ -208,10 +272,10 @@ BOOL PAL_VirtualUnwind(CONTEXT *context, KNONVOLATILE_CONTEXT_POINTERS *contextP // Update the passed in windows context to reflect the unwind // UnwindContextToWinContext(&cursor, context); -#if defined(__APPLE__) || defined(__FreeBSD__) - if (st == 0 && context->Rip == curPc) +#if defined(__APPLE__) || defined(__FreeBSD__) || defined(_ARM64_) + if (st == 0 && GetPc(context) == curPc) { - context->Rip = 0; + SetPc(context, 0); } #endif @@ -257,7 +321,7 @@ static void RtlpRaiseException(EXCEPTION_RECORD *ExceptionRecord) ExceptionRecord->ExceptionAddress = (void *) ContextRecord.Eip; #elif defined(_AMD64_) ExceptionRecord->ExceptionAddress = (void *) ContextRecord.Rip; -#elif defined(_ARM_) +#elif defined(_ARM_) || defined(_ARM64_) ExceptionRecord->ExceptionAddress = (void *) ContextRecord.Pc; #else #error unsupported architecture diff --git a/src/pal/src/include/pal/context.h b/src/pal/src/include/pal/context.h index b1dc76c420..5f0718725b 100644 --- a/src/pal/src/include/pal/context.h +++ b/src/pal/src/include/pal/context.h @@ -103,6 +103,44 @@ typedef ucontext_t native_context_t; #ifdef BIT64 +#if defined(_ARM64_) +#define MCREG_X0(mc) ((mc).regs[0]) +#define MCREG_X1(mc) ((mc).regs[1]) +#define MCREG_X2(mc) ((mc).regs[2]) +#define MCREG_X3(mc) ((mc).regs[3]) +#define MCREG_X4(mc) ((mc).regs[4]) +#define MCREG_X5(mc) ((mc).regs[5]) +#define MCREG_X6(mc) ((mc).regs[6]) +#define MCREG_X7(mc) ((mc).regs[7]) +#define MCREG_X8(mc) ((mc).regs[8]) +#define MCREG_X9(mc) ((mc).regs[9]) +#define MCREG_X10(mc) ((mc).regs[10]) +#define MCREG_X11(mc) ((mc).regs[11]) +#define MCREG_X12(mc) ((mc).regs[12]) +#define MCREG_X13(mc) ((mc).regs[13]) +#define MCREG_X14(mc) ((mc).regs[14]) +#define MCREG_X15(mc) ((mc).regs[15]) +#define MCREG_X16(mc) ((mc).regs[16]) +#define MCREG_X17(mc) ((mc).regs[17]) +#define MCREG_X18(mc) ((mc).regs[18]) +#define MCREG_X19(mc) ((mc).regs[19]) +#define MCREG_X20(mc) ((mc).regs[20]) +#define MCREG_X21(mc) ((mc).regs[21]) +#define MCREG_X22(mc) ((mc).regs[22]) +#define MCREG_X23(mc) ((mc).regs[23]) +#define MCREG_X24(mc) ((mc).regs[24]) +#define MCREG_X25(mc) ((mc).regs[25]) +#define MCREG_X26(mc) ((mc).regs[26]) +#define MCREG_X27(mc) ((mc).regs[27]) +#define MCREG_X28(mc) ((mc).regs[28]) +#define MCREG_Fp(mc) ((mc).regs[29]) +#define MCREG_Lr(mc) ((mc).regs[30]) + +#define MCREG_Sp(mc) ((mc).sp) +#define MCREG_Pc(mc) ((mc).pc) +#define MCREG_PState(mc) ((mc).pstate) +#define MCREG_Cpsr(mc) ((mc).cpsr) +#else // For FreeBSD, as found in x86/ucontext.h #define MCREG_Rbp(mc) ((mc).mc_rbp) #define MCREG_Rip(mc) ((mc).mc_rip) @@ -138,6 +176,7 @@ typedef ucontext_t native_context_t; #define FPREG_Xmm(uc, index) *(M128A*) &(FPSTATE(uc)->sv_xmm[index]) #define FPREG_St(uc, index) *(M128A*) &(FPSTATE(uc)->sv_fp[index].fp_acc) +#endif #else // BIT64 diff --git a/src/pal/src/thread/context.cpp b/src/pal/src/thread/context.cpp index 3742b5ddf1..050a4ac88a 100644 --- a/src/pal/src/thread/context.cpp +++ b/src/pal/src/thread/context.cpp @@ -41,6 +41,8 @@ extern void CONTEXT_CaptureContext(LPCONTEXT lpContext); #define CONTEXT_ALL_FLOATING CONTEXT_FLOATING_POINT #elif defined(_ARM_) #define CONTEXT_ALL_FLOATING CONTEXT_FLOATING_POINT +#elif defined(_ARM64_) +#define CONTEXT_ALL_FLOATING CONTEXT_FLOATING_POINT #else #error Unexpected architecture. #endif @@ -117,6 +119,45 @@ extern void CONTEXT_CaptureContext(LPCONTEXT lpContext); ASSIGN_REG(R10) \ ASSIGN_REG(R11) \ ASSIGN_REG(R12) +#elif defined(_ARM64_) +#define ASSIGN_CONTROL_REGS \ + ASSIGN_REG(Sp) \ + ASSIGN_REG(Lr) \ + ASSIGN_REG(Pc) + +#define ASSIGN_INTEGER_REGS \ + ASSIGN_REG(X0) \ + ASSIGN_REG(X1) \ + ASSIGN_REG(X2) \ + ASSIGN_REG(X3) \ + ASSIGN_REG(X4) \ + ASSIGN_REG(X5) \ + ASSIGN_REG(X6) \ + ASSIGN_REG(X7) \ + ASSIGN_REG(X8) \ + ASSIGN_REG(X9) \ + ASSIGN_REG(X10) \ + ASSIGN_REG(X11) \ + ASSIGN_REG(X12) \ + ASSIGN_REG(X13) \ + ASSIGN_REG(X14) \ + ASSIGN_REG(X15) \ + ASSIGN_REG(X16) \ + ASSIGN_REG(X17) \ + ASSIGN_REG(X18) \ + ASSIGN_REG(X19) \ + ASSIGN_REG(X20) \ + ASSIGN_REG(X21) \ + ASSIGN_REG(X22) \ + ASSIGN_REG(X23) \ + ASSIGN_REG(X24) \ + ASSIGN_REG(X25) \ + ASSIGN_REG(X26) \ + ASSIGN_REG(X27) \ + ASSIGN_REG(X28) + +#else +#error Don't know how to assign registers on this architecture #endif #define ASSIGN_ALL_REGS \ @@ -201,6 +242,10 @@ BOOL CONTEXT_GetRegisters(DWORD processId, ucontext_t *registers) #define ASSIGN_REG(reg) MCREG_##reg(registers->uc_mcontext) = PTREG_##reg(ptrace_registers); #elif HAVE_BSD_REGS_T #define ASSIGN_REG(reg) MCREG_##reg(registers->uc_mcontext) = BSDREG_##reg(ptrace_registers); +#else +#define ASSIGN_REG(reg) + ASSERT("Don't know how to get the context of another process on this platform!"); + return bRet; #endif ASSIGN_ALL_REGS #undef ASSIGN_REG @@ -352,6 +397,10 @@ CONTEXT_SetThreadContext( #define ASSIGN_REG(reg) PTREG_##reg(ptrace_registers) = lpContext->reg; #elif HAVE_BSD_REGS_T #define ASSIGN_REG(reg) BSDREG_##reg(ptrace_registers) = lpContext->reg; +#else +#define ASSIGN_REG(reg) + ASSERT("Don't know how to set the context of another process on this platform!"); + return FALSE; #endif if (lpContext->ContextFlags & CONTEXT_CONTROL) { @@ -517,6 +566,10 @@ LPVOID CONTEXTGetPC(const native_context_t *context) return (LPVOID) MCREG_Eip(context->uc_mcontext); #elif defined(_ARM_) return (LPVOID) MCREG_Pc(context->uc_mcontext); +#elif defined(_ARM64_) + return (LPVOID) MCREG_Pc(context->uc_mcontext); +#else +# error implement me for this architecture #endif } diff --git a/src/pal/tests/CMakeLists.txt b/src/pal/tests/CMakeLists.txt index d3b91d2ac2..1381a98fd7 100644 --- a/src/pal/tests/CMakeLists.txt +++ b/src/pal/tests/CMakeLists.txt @@ -4,6 +4,8 @@ if(CLR_CMAKE_PLATFORM_ARCH_AMD64) set(PAL_CMAKE_PLATFORM_ARCH_AMD64 1) elseif(CLR_CMAKE_PLATFORM_ARCH_ARM) set(PAL_CMAKE_PLATFORM_ARCH_ARM 1) +elseif(CLR_CMAKE_PLATFORM_ARCH_ARM64) + set(PAL_CMAKE_PLATFORM_ARCH_ARM64 1) endif() # Compile options @@ -19,8 +21,11 @@ if(PAL_CMAKE_PLATFORM_ARCH_AMD64) elseif(PAL_CMAKE_PLATFORM_ARCH_ARM) add_definitions(-DBIT32=1) add_definitions(-D_WIN32=1) +elseif(PAL_CMAKE_PLATFORM_ARCH_ARM64) + add_definitions(-DBIT64=1) + add_definitions(-D_WIN64=1) else() - message(FATAL_ERROR "Only ARM and AMD64 is supported") + message(FATAL_ERROR "Only ARM, ARM64 and AMD64 is supported") endif() add_compile_options(-Wno-empty-body) diff --git a/src/unwinder/CMakeLists.txt b/src/unwinder/CMakeLists.txt index 27e613cd3c..86c935a578 100644 --- a/src/unwinder/CMakeLists.txt +++ b/src/unwinder/CMakeLists.txt @@ -23,8 +23,14 @@ elseif(CLR_CMAKE_PLATFORM_ARCH_ARM) list(APPEND UNWINDER_SOURCES arm/unwinder_arm.cpp ) +elseif(CLR_CMAKE_PLATFORM_ARCH_ARM64) + include_directories(arm64) + + list(APPEND UNWINDER_SOURCES + arm64/unwinder_arm64.cpp + ) else() - message(FATAL_ERROR "Only ARM and AMD64 is supported") + message(FATAL_ERROR "Only ARM, ARM64 and AMD64 is supported") endif() convert_to_absolute_path(UNWINDER_SOURCES ${UNWINDER_SOURCES}) @@ -37,4 +43,4 @@ if(CLR_CMAKE_PLATFORM_UNIX) add_subdirectory(wks) endif(CLR_CMAKE_PLATFORM_UNIX) -add_subdirectory(dac)
\ No newline at end of file +add_subdirectory(dac) diff --git a/src/unwinder/arm64/unwinder_arm64.cpp b/src/unwinder/arm64/unwinder_arm64.cpp index a56e42469e..a5177887ab 100644 --- a/src/unwinder/arm64/unwinder_arm64.cpp +++ b/src/unwinder/arm64/unwinder_arm64.cpp @@ -215,7 +215,11 @@ Return Value: SourceAddress = StartingSp + FIELD_OFFSET(ARM64_KTRAP_FRAME, X); for (RegIndex = 0; RegIndex < 18; RegIndex++) { UPDATE_CONTEXT_POINTERS(UnwindParams, RegIndex, SourceAddress); +#ifdef __clang__ + *(&ContextRecord->X0 + (RegIndex * sizeof(void*))) = MEMORY_READ_QWORD(UnwindParams, SourceAddress); +#else ContextRecord->X[RegIndex] = MEMORY_READ_QWORD(UnwindParams, SourceAddress); +#endif SourceAddress += sizeof(ULONG_PTR); } @@ -288,10 +292,14 @@ Return Value: // Restore X0-X28, and D0-D31 // - SourceAddress = StartingSp + FIELD_OFFSET(T_CONTEXT, X); + SourceAddress = StartingSp + FIELD_OFFSET(T_CONTEXT, X0); for (RegIndex = 0; RegIndex < 29; RegIndex++) { UPDATE_CONTEXT_POINTERS(UnwindParams, RegIndex, SourceAddress); +#ifdef __clang__ + *(&ContextRecord->X0 + (RegIndex * sizeof(void*))) = MEMORY_READ_QWORD(UnwindParams, SourceAddress); +#else ContextRecord->X[RegIndex] = MEMORY_READ_QWORD(UnwindParams, SourceAddress); +#endif SourceAddress += sizeof(ULONG_PTR); } @@ -473,7 +481,11 @@ Return Value: for (RegIndex = 0; RegIndex < RegisterCount; RegIndex++) { UPDATE_CONTEXT_POINTERS(UnwindParams, RegIndex, CurAddress); +#ifdef __clang__ + *(&ContextRecord->X0 + (RegIndex * sizeof(void*))) = MEMORY_READ_QWORD(UnwindParams, CurAddress); +#else ContextRecord->X[FirstRegister + RegIndex] = MEMORY_READ_QWORD(UnwindParams, CurAddress); +#endif CurAddress += 8; } if (SpOffset < 0) { @@ -1534,7 +1546,7 @@ BOOL OOPStackUnwinderArm64::Unwind(T_CONTEXT * pContext) if ((Rfe.UnwindData & 3) != 0) { - hr = RtlpUnwindFunctionCompact(pContext->Pc - (ULONG)ImageBase, + hr = RtlpUnwindFunctionCompact(pContext->Pc - ImageBase, &Rfe, pContext, &DummyEstablisherFrame, @@ -1545,8 +1557,8 @@ BOOL OOPStackUnwinderArm64::Unwind(T_CONTEXT * pContext) } else { - hr = RtlpUnwindFunctionFull(pContext->Pc - (ULONG)ImageBase, - (ULONG)ImageBase, + hr = RtlpUnwindFunctionFull(pContext->Pc - ImageBase, + ImageBase, &Rfe, pContext, &DummyEstablisherFrame, @@ -1578,3 +1590,57 @@ BOOL DacUnwindStackFrame(T_CONTEXT *pContext, T_KNONVOLATILE_CONTEXT_POINTERS* p return res; } + +#if defined(FEATURE_PAL) +//TODO: Fix the context pointers +PEXCEPTION_ROUTINE +RtlVirtualUnwind( + IN ULONG HandlerType, + IN ULONG64 ImageBase, + IN ULONG64 ControlPc, + IN PRUNTIME_FUNCTION FunctionEntry, + IN OUT PCONTEXT ContextRecord, + OUT PVOID *HandlerData, + OUT PULONG64 EstablisherFrame, + IN OUT PKNONVOLATILE_CONTEXT_POINTERS ContextPointers OPTIONAL + ) +{ + PEXCEPTION_ROUTINE handlerRoutine; + HRESULT hr; + + DWORD64 startingPc = ControlPc; + DWORD64 startingSp = ContextRecord->Sp; + + T_RUNTIME_FUNCTION Rfe; + + Rfe.BeginAddress = FunctionEntry->BeginAddress; + Rfe.UnwindData = FunctionEntry->UnwindData; + + if ((Rfe.UnwindData & 3) != 0) + { + hr = RtlpUnwindFunctionCompact(ControlPc - ImageBase, + &Rfe, + ContextRecord, + EstablisherFrame, + &handlerRoutine, + HandlerData, + NULL); + + } + else + { + hr = RtlpUnwindFunctionFull(ControlPc - ImageBase, + ImageBase, + &Rfe, + ContextRecord, + EstablisherFrame, + &handlerRoutine, + HandlerData, + NULL); + } + + _ASSERTE(SUCCEEDED(hr)); + + return handlerRoutine; +} +#endif diff --git a/src/unwinder/dac/CMakeLists.txt b/src/unwinder/dac/CMakeLists.txt index 12163af12a..75d7050c6f 100644 --- a/src/unwinder/dac/CMakeLists.txt +++ b/src/unwinder/dac/CMakeLists.txt @@ -14,8 +14,14 @@ elseif(CLR_CMAKE_PLATFORM_ARCH_ARM) add_definitions(-DDBG_TARGET_ARM=1) add_definitions(-DDBG_TARGET_WIN32=1) add_definitions(-D_WIN32=1) +elseif(CLR_CMAKE_PLATFORM_ARCH_ARM64) + add_definitions(-D_TARGET_ARM64_=1) + add_definitions(-DDBG_TARGET_64BIT=1) + add_definitions(-DDBG_TARGET_ARM64=1) + add_definitions(-DDBG_TARGET_WIN64=1) + add_definitions(-D_WIN64=1) else() - message(FATAL_ERROR "Only ARM and AMD64 is supported") + message(FATAL_ERROR "Only ARM, ARM64 and AMD64 is supported") endif() add_library(unwinder_dac ${UNWINDER_SOURCES}) diff --git a/src/vm/CMakeLists.txt b/src/vm/CMakeLists.txt index fcc16acf78..9e5569f1f8 100644 --- a/src/vm/CMakeLists.txt +++ b/src/vm/CMakeLists.txt @@ -1,10 +1,5 @@ set(CMAKE_INCLUDE_CURRENT_DIR ON) -# WINTODO: Conditionalize the next check -# AMD64 specific sources subdirectory -set(AMD64_SOURCES_DIR amd64) -set(ARM_SOURCES_DIR arm) - # Needed due to the cmunged files being in the binary folders, the set(CMAKE_INCLUDE_CURRENT_DIR ON) is not enough include_directories(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}) @@ -15,17 +10,22 @@ if(CLR_CMAKE_PLATFORM_ARCH_AMD64) add_definitions(-DDBG_TARGET_64BIT=1) add_definitions(-DDBG_TARGET_AMD64=1) add_definitions(-DDBG_TARGET_WIN64=1) - include_directories(${AMD64_SOURCES_DIR}) elseif(CLR_CMAKE_PLATFORM_ARCH_ARM) add_definitions(-D_TARGET_ARM_=1) add_definitions(-DDBG_TARGET_32BIT=1) add_definitions(-DDBG_TARGET_ARM=1) add_definitions(-DDBG_TARGET_WIN32=1) - include_directories(${ARM_SOURCES_DIR}) +elseif(CLR_CMAKE_PLATFORM_ARCH_ARM64) + add_definitions(-D_TARGET_ARM64_=1) + add_definitions(-DDBG_TARGET_64BIT=1) + add_definitions(-DDBG_TARGET_ARM64=1) + add_definitions(-DDBG_TARGET_WIN64=1) else() - message(FATAL_ERROR "Only ARM and AMD64 is supported") + message(FATAL_ERROR "Only ARM, ARM64 and AMD64 is supported") endif() +include_directories(${ARCH_SOURCES_DIR}) + add_definitions(-DFEATURE_LEAVE_RUNTIME_HOLDER=1) add_definitions(-DUNICODE) @@ -314,51 +314,58 @@ list(APPEND VM_SOURCES_DAC # AMD64 specific asm sources set(VM_SOURCES_WKS_ARCH_ASM - ${AMD64_SOURCES_DIR}/AsmHelpers.asm - ${AMD64_SOURCES_DIR}/CallDescrWorkerAMD64.asm - ${AMD64_SOURCES_DIR}/ComCallPreStub.asm - ${AMD64_SOURCES_DIR}/CrtHelpers.asm - ${AMD64_SOURCES_DIR}/GenericComCallStubs.asm - ${AMD64_SOURCES_DIR}/GenericComPlusCallStubs.asm - ${AMD64_SOURCES_DIR}/getstate.asm - ${AMD64_SOURCES_DIR}/InstantiatingStub.asm - ${AMD64_SOURCES_DIR}/JitHelpers_Fast.asm - ${AMD64_SOURCES_DIR}/JitHelpers_FastWriteBarriers.asm - ${AMD64_SOURCES_DIR}/JitHelpers_InlineGetAppDomain.asm - ${AMD64_SOURCES_DIR}/JitHelpers_InlineGetThread.asm - ${AMD64_SOURCES_DIR}/JitHelpers_Slow.asm - ${AMD64_SOURCES_DIR}/PInvokeStubs.asm - ${AMD64_SOURCES_DIR}/RedirectedHandledJITCase.asm - ${AMD64_SOURCES_DIR}/ThePreStubAMD64.asm - ${AMD64_SOURCES_DIR}/ExternalMethodFixupThunk.asm - ${AMD64_SOURCES_DIR}/TlsGetters.asm # Condition="'$(FeatureImplicitTls)' != 'true' - ${AMD64_SOURCES_DIR}/UMThunkStub.asm - ${AMD64_SOURCES_DIR}/VirtualCallStubAMD64.asm + ${ARCH_SOURCES_DIR}/AsmHelpers.asm + ${ARCH_SOURCES_DIR}/CallDescrWorkerAMD64.asm + ${ARCH_SOURCES_DIR}/ComCallPreStub.asm + ${ARCH_SOURCES_DIR}/CrtHelpers.asm + ${ARCH_SOURCES_DIR}/GenericComCallStubs.asm + ${ARCH_SOURCES_DIR}/GenericComPlusCallStubs.asm + ${ARCH_SOURCES_DIR}/getstate.asm + ${ARCH_SOURCES_DIR}/InstantiatingStub.asm + ${ARCH_SOURCES_DIR}/JitHelpers_Fast.asm + ${ARCH_SOURCES_DIR}/JitHelpers_FastWriteBarriers.asm + ${ARCH_SOURCES_DIR}/JitHelpers_InlineGetAppDomain.asm + ${ARCH_SOURCES_DIR}/JitHelpers_InlineGetThread.asm + ${ARCH_SOURCES_DIR}/JitHelpers_Slow.asm + ${ARCH_SOURCES_DIR}/PInvokeStubs.asm + ${ARCH_SOURCES_DIR}/RedirectedHandledJITCase.asm + ${ARCH_SOURCES_DIR}/ThePreStubAMD64.asm + ${ARCH_SOURCES_DIR}/ExternalMethodFixupThunk.asm + ${ARCH_SOURCES_DIR}/TlsGetters.asm # Condition="'$(FeatureImplicitTls)' != 'true' + ${ARCH_SOURCES_DIR}/UMThunkStub.asm + ${ARCH_SOURCES_DIR}/VirtualCallStubAMD64.asm ) else() if(CLR_CMAKE_PLATFORM_ARCH_AMD64) set(VM_SOURCES_WKS_ARCH_ASM - ${AMD64_SOURCES_DIR}/calldescrworkeramd64.S - ${AMD64_SOURCES_DIR}/crthelpers.S - ${AMD64_SOURCES_DIR}/externalmethodfixupthunk.S - ${AMD64_SOURCES_DIR}/getstate.S - ${AMD64_SOURCES_DIR}/jithelpers_fast.S - ${AMD64_SOURCES_DIR}/jithelpers_fastwritebarriers.S - ${AMD64_SOURCES_DIR}/jithelpers_slow.S - ${AMD64_SOURCES_DIR}/theprestubamd64.S - ${AMD64_SOURCES_DIR}/unixasmhelpers.S - ${AMD64_SOURCES_DIR}/umthunkstub.S - ${AMD64_SOURCES_DIR}/virtualcallstubamd64.S + ${ARCH_SOURCES_DIR}/calldescrworkeramd64.S + ${ARCH_SOURCES_DIR}/crthelpers.S + ${ARCH_SOURCES_DIR}/externalmethodfixupthunk.S + ${ARCH_SOURCES_DIR}/getstate.S + ${ARCH_SOURCES_DIR}/jithelpers_fast.S + ${ARCH_SOURCES_DIR}/jithelpers_fastwritebarriers.S + ${ARCH_SOURCES_DIR}/jithelpers_slow.S + ${ARCH_SOURCES_DIR}/theprestubamd64.S + ${ARCH_SOURCES_DIR}/unixasmhelpers.S + ${ARCH_SOURCES_DIR}/umthunkstub.S + ${ARCH_SOURCES_DIR}/virtualcallstubamd64.S ) elseif(CLR_CMAKE_PLATFORM_ARCH_ARM) set(VM_SOURCES_WKS_ARCH_ASM - ${ARM_SOURCES_DIR}/asmhelpers.S - ${ARM_SOURCES_DIR}/crthelpers.S - ${ARM_SOURCES_DIR}/ehhelpers.S - ${ARM_SOURCES_DIR}/memcpy.S - ${ARM_SOURCES_DIR}/patchedcode.S + ${ARCH_SOURCES_DIR}/asmhelpers.S + ${ARCH_SOURCES_DIR}/crthelpers.S + ${ARCH_SOURCES_DIR}/ehhelpers.S + ${ARCH_SOURCES_DIR}/memcpy.S + ${ARCH_SOURCES_DIR}/patchedcode.S + ) + elseif(CLR_CMAKE_PLATFORM_ARCH_ARM64) + set(VM_SOURCES_WKS_ARCH_ASM + ${ARCH_SOURCES_DIR}/asmhelpers.S + ${ARCH_SOURCES_DIR}/calldescrworkerarm64.S + ${ARCH_SOURCES_DIR}/crthelpers.S + ${ARCH_SOURCES_DIR}/pinvokestubs.S ) endif() @@ -367,47 +374,42 @@ endif(WIN32) if(CLR_CMAKE_PLATFORM_ARCH_AMD64) set(VM_SOURCES_DAC_AND_WKS_ARCH - ${AMD64_SOURCES_DIR}/cgenamd64.cpp - ${AMD64_SOURCES_DIR}/excepamd64.cpp - ${AMD64_SOURCES_DIR}/gmsamd64.cpp - ${AMD64_SOURCES_DIR}/stublinkeramd64.cpp + ${ARCH_SOURCES_DIR}/cgenamd64.cpp + ${ARCH_SOURCES_DIR}/excepamd64.cpp + ${ARCH_SOURCES_DIR}/gmsamd64.cpp + ${ARCH_SOURCES_DIR}/stublinkeramd64.cpp ) set(VM_SOURCES_WKS_ARCH - ${AMD64_SOURCES_DIR}/jithelpersamd64.cpp - ${AMD64_SOURCES_DIR}/jitinterfaceamd64.cpp - ${AMD64_SOURCES_DIR}/profiler.cpp + ${ARCH_SOURCES_DIR}/jithelpersamd64.cpp + ${ARCH_SOURCES_DIR}/jitinterfaceamd64.cpp + ${ARCH_SOURCES_DIR}/profiler.cpp jitinterfacegen.cpp ) - - if(CLR_CMAKE_PLATFORM_UNIX) - - list(APPEND VM_SOURCES_WKS_ARCH - ${AMD64_SOURCES_DIR}/unixstubs.cpp - ) - - endif(CLR_CMAKE_PLATFORM_UNIX) elseif(CLR_CMAKE_PLATFORM_ARCH_ARM) set(VM_SOURCES_DAC_AND_WKS_ARCH - ${ARM_SOURCES_DIR}/exceparm.cpp - ${ARM_SOURCES_DIR}/stubs.cpp - ${ARM_SOURCES_DIR}/armsinglestepper.cpp + ${ARCH_SOURCES_DIR}/exceparm.cpp + ${ARCH_SOURCES_DIR}/stubs.cpp + ${ARCH_SOURCES_DIR}/armsinglestepper.cpp ) set(VM_SOURCES_WKS_ARCH - ${ARM_SOURCES_DIR}/jithelpersarm.cpp - ${ARM_SOURCES_DIR}/profiler.cpp + ${ARCH_SOURCES_DIR}/jithelpersarm.cpp + ${ARCH_SOURCES_DIR}/profiler.cpp ) - - if(CLR_CMAKE_PLATFORM_UNIX) - - list(APPEND VM_SOURCES_WKS_ARCH - ${ARM_SOURCES_DIR}/unixstubs.cpp +elseif(CLR_CMAKE_PLATFORM_ARCH_ARM64) + set(VM_SOURCES_DAC_AND_WKS_ARCH + ${ARCH_SOURCES_DIR}/cgenarm64.cpp + ${ARCH_SOURCES_DIR}/stubs.cpp ) - - endif(CLR_CMAKE_PLATFORM_UNIX) endif() +if(CLR_CMAKE_PLATFORM_UNIX) + list(APPEND VM_SOURCES_WKS_ARCH + ${ARCH_SOURCES_DIR}/unixstubs.cpp + ) +endif(CLR_CMAKE_PLATFORM_UNIX) + set(VM_SOURCES_DAC_ARCH gcinfodecoder.cpp dbggcinfodecoder.cpp diff --git a/src/vm/arm64/asmconstants.h b/src/vm/arm64/asmconstants.h index 04018bdb9f..6304d2c171 100644 --- a/src/vm/arm64/asmconstants.h +++ b/src/vm/arm64/asmconstants.h @@ -12,7 +12,7 @@ // #error this file should only be used on an ARM platform // #endif // _ARM64_ -#include "..\..\inc\switches.h" +#include "../../inc/switches.h" //----------------------------------------------------------------------------- @@ -24,6 +24,10 @@ #define ASMCONSTANTS_RUNTIME_ASSERT(cond) #endif +#define DynamicHelperFrameFlags_Default 0 +#define DynamicHelperFrameFlags_ObjectArg 1 +#define DynamicHelperFrameFlags_ObjectArg2 2 + #define Thread__m_fPreemptiveGCDisabled 0x0C #define Thread__m_pFrame 0x10 diff --git a/src/vm/arm64/asmhelpers.S b/src/vm/arm64/asmhelpers.S new file mode 100644 index 0000000000..9b2f028250 --- /dev/null +++ b/src/vm/arm64/asmhelpers.S @@ -0,0 +1,844 @@ +// +// Copyright (c) Geoff Norton. All rights reserved. +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. +// + +#include "asmconstants.h" +#include "unixasmmacros.inc" + +// LPVOID __stdcall GetCurrentIP(void)// +LEAF_ENTRY GetCurrentIP, _TEXT + mov x0, lr + ret lr +LEAF_END GetCurrentIP, _TEXT + +// LPVOID __stdcall GetCurrentSP(void)// +LEAF_ENTRY GetCurrentSP, _TEXT + mov x0, sp + ret lr +LEAF_END GetCurrentSP, _TEXT + +//----------------------------------------------------------------------------- +// This routine captures the machine state. It is used by helper method frame +//----------------------------------------------------------------------------- +//void LazyMachStateCaptureState(struct LazyMachState *pState)// +LEAF_ENTRY LazyMachStateCaptureState, _TEXT + // marks that this is not yet valid + mov w1, #0 + str w1, [x0, #MachState__isValid] + + str lr, [x0, #LazyMachState_captureIp] + + str fp, [x0, #LazyMachState_captureFp] + + // str instruction does not save sp register directly so move to temp register + mov x1, sp + str x1, [x0, #LazyMachState_captureSp] + + // save non-volatile registers that can contain object references + add x1, x0, #LazyMachState_captureX19_X28 + stp x19, x20, [x1, #(16*0)] + stp x21, x22, [x1, #(16*1)] + stp x23, x24, [x1, #(16*2)] + stp x25, x26, [x1, #(16*3)] + stp x27, x28, [x1, #(16*4)] + + ret lr +LEAF_END LazyMachStateCaptureState, _TEXT + +// +// If a preserved register were pushed onto the stack between +// the managed caller and the H_M_F, ptrX19_X28 will point to its +// location on the stack and it would have been updated on the +// stack by the GC already and it will be popped back into the +// appropriate register when the appropriate epilog is run. +// +// Otherwise, the register is preserved across all the code +// in this HCALL or FCALL, so we need to update those registers +// here because the GC will have updated our copies in the +// frame. +// +// So, if ptrX19_X28 points into the MachState, we need to update +// the register here. That's what this macro does. +// +.macro RestoreRegMS regIndex, reg + // Incoming: + // + // x0 = address of MachState + // + // $regIndex: Index of the register (x19-x28). For x19, index is 19. + //For x20, index is 20, and so on. + // + // $reg: Register name (e.g. x19, x20, etc) + // + // Get the address of the specified captured egister from machine state + add x2, x0, #(MachState__captureX19_X28 + ((\regIndex-19)*8)) + + // Get the content of specified preserved register pointer from machine state + ldr x3, [x0, #(MachState__ptrX19_X28 + ((\regIndex-19)*8))] + + cmp x2, x3 + bne LOCAL_LABEL(NoRestore_\reg) + ldr \reg, [x2] +LOCAL_LABEL(NoRestore_\reg): + +.endmacro + +// EXTERN_C int __fastcall HelperMethodFrameRestoreState( +// INDEBUG_COMMA(HelperMethodFrame *pFrame) +// MachState *pState +// ) +LEAF_ENTRY HelperMethodFrameRestoreState, _TEXT + + #ifdef _DEBUG + mov x0, x1 + #endif + + // If machine state is invalid, then simply exit + ldr x1, [x0, #MachState__isValid] + cmp x1, #0 + beq LOCAL_LABEL(Done) + + RestoreRegMS 19, X19 + RestoreRegMS 20, X20 + RestoreRegMS 21, X21 + RestoreRegMS 22, X22 + RestoreRegMS 23, X23 + RestoreRegMS 24, X24 + RestoreRegMS 25, X25 + RestoreRegMS 26, X26 + RestoreRegMS 27, X27 + RestoreRegMS 28, X28 +LOCAL_LABEL(Done): + // Its imperative that the return value of HelperMethodFrameRestoreState is zero + // as it is used in the state machine to loop until it becomes zero. + // Refer to HELPER_METHOD_FRAME_END macro for details. + mov x0,#0 + ret lr + +LEAF_END HelperMethodFrameRestoreState, _TEXT + +// ------------------------------------------------------------------ +// The call in ndirect import precode points to this function. +NESTED_ENTRY NDirectImportThunk, _TEXT, NoHandler + + PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, #-144 + SAVE_ARGUMENT_REGISTERS sp, 16 + SAVE_FLOAT_ARGUMENT_REGISTERS sp, 80 + + mov x0, x12 + bl NDirectImportWorker + mov x12, x0 + + // pop the stack and restore original register state + RESTORE_FLOAT_ARGUMENT_REGISTERS sp, 80 + RESTORE_ARGUMENT_REGISTERS sp, 16 + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, #144 + + // If we got back from NDirectImportWorker, the MD has been successfully + // linked. Proceed to execute the original DLL call. + EPILOG_BRANCH_REG x12 + +NESTED_END NDirectImportThunk, _TEXT + +// ------------------------------------------------------------------ +// ARM64TODO: Implement PrecodeFixupThunk when PreCode is Enabled +NESTED_ENTRY PrecodeFixupThunk, _TEXT, NoHandler + brk #0 +NESTED_END PrecodeFixupThunk, _TEXT +// ------------------------------------------------------------------ + +NESTED_ENTRY ThePreStub, _TEXT, NoHandler + + PROLOG_WITH_TRANSITION_BLOCK 0, 0 + + add x0, sp, #__PWTB_TransitionBlock // pTransitionBlock + mov x1, METHODDESC_REGISTER // pMethodDesc + + bl PreStubWorker + + mov x9, x0 + + EPILOG_WITH_TRANSITION_BLOCK_TAILCALL 0, 0 + EPILOG_BRANCH_REG x9 + +NESTED_END ThePreStub, _TEXT + +// ------------------------------------------------------------------ +// ThePreStubPatch() + +LEAF_ENTRY ThePreStubPatch, _TEXT + nop +.globl C_FUNC(ThePreStubPatchLabel) +C_FUNC(ThePreStubPatchLabel): + ret lr +LEAF_END ThePreStubPatch, _TEXT + + +// ------------------------------------------------------------------ +// void ResolveWorkerAsmStub(args in regs x0-x7 & stack, x11:IndirectionCellAndFlags, x12:DispatchToken) +// +// The stub dispatch thunk which transfers control to VSD_ResolveWorker. +NESTED_ENTRY ResolveWorkerAsmStub, _TEXT, NoHandler + + PROLOG_WITH_TRANSITION_BLOCK 0, 0 + + add x0, sp, #__PWTB_TransitionBlock // pTransitionBlock + and x1, x11, #-4 // Indirection cell + mov x2, x12 // DispatchToken + and x3, x11, #3 // flag + bl C_FUNC(VSD_ResolveWorker) + mov x9, x0 + + EPILOG_WITH_TRANSITION_BLOCK_TAILCALL 0, 0 + + EPILOG_BRANCH_REG x9 + +NESTED_END ResolveWorkerAsmStub, _TEXT + +NESTED_ENTRY ResolveWorkerChainLookupAsmStub, _TEXT, NoHandler + + // ARMSTUB TODO: implement chained lookup + b C_FUNC(ResolveWorkerAsmStub) + +NESTED_END ResolveWorkerChainLookupAsmStub, _TEXT + +//----------------------------------------------------------------------------- +// The following Macros help in WRITE_BARRIER Implemetations +// WRITE_BARRIER_ENTRY +// +// Declare the start of a write barrier function. Use similarly to NESTED_ENTRY. This is the only legal way +// to declare a write barrier function. +// +.macro WRITE_BARRIER_ENTRY name + LEAF_ENTRY \name, _TEXT +.endmacro + +// WRITE_BARRIER_END +// +// The partner to WRITE_BARRIER_ENTRY, used like NESTED_END. +// +.macro WRITE_BARRIER_END name + LEAF_END_MARKED \name, _TEXT +.endmacro + +// void JIT_ByRefWriteBarrier +// On entry: +// x13 : the source address (points to object reference to write) +// x14 : the destination address (object reference written here) +// +// On exit: +// x12 : trashed +// x13 : incremented by 8 +// x14 : incremented by 8 +// x15 : trashed +// +WRITE_BARRIER_ENTRY JIT_ByRefWriteBarrier + + ldr x15, [x13], 8 + b C_FUNC(JIT_CheckedWriteBarrier) + +WRITE_BARRIER_END JIT_ByRefWriteBarrier + +//----------------------------------------------------------------------------- +// Simple WriteBarriers +// void JIT_CheckedWriteBarrier(Object** dst, Object* src) +// On entry: +// x14 : the destination address (LHS of the assignment) +// x15 : the object reference (RHS of the assignment) +// +// On exit: +// x12 : trashed +// x14 : incremented by 8 +// x15 : trashed +// +WRITE_BARRIER_ENTRY JIT_CheckedWriteBarrier + // ARM64TODO: Temporary indirect access till support for :lo12:symbol is added + ldr x12, =g_lowest_address + ldr x12, [x12] + cmp x14, x12 + blt LOCAL_LABEL(NotInHeap) + + // ARM64TODO: Temporary indirect access till support for :lo12:symbol is added + ldr x12, =g_highest_address + ldr x12, [x12] + cmp x14, x12 + blt C_FUNC(JIT_WriteBarrier) + +LOCAL_LABEL(NotInHeap): + str x15, [x14], 8 + ret lr +WRITE_BARRIER_END JIT_CheckedWriteBarrier + +// void JIT_WriteBarrier(Object** dst, Object* src) +// On entry: +// x14 : the destination address (LHS of the assignment) +// x15 : the object reference (RHS of the assignment) +// +// On exit: +// x12 : trashed +// x14 : incremented by 8 +// x15 : trashed +// +WRITE_BARRIER_ENTRY JIT_WriteBarrier + dmb ST + str x15, [x14], 8 + + // Branch to Exit if the reference is not in the Gen0 heap + // + // ARM64TODO: Temporary indirect access till support for :lo12:symbol is added + ldr x12, =g_ephemeral_low + ldr x12, [x12] + cmp x15, x12 + blt LOCAL_LABEL(Exit) + + // ARM64TODO: Temporary indirect access till support for :lo12:symbol is added + ldr x12, =g_ephemeral_high + ldr x12, [x12] + cmp x15, x12 + bgt LOCAL_LABEL(Exit) + + // Check if we need to update the card table + // ARM64TODO: Temporary indirect access till support for :lo12:symbol is added + ldr x12, =g_card_table + ldr x12, [x12] + add x15, x12, x14, lsr #11 + ldrb w12, [x15] + cmp x12, 0xFF + beq LOCAL_LABEL(Exit) + +LOCAL_LABEL(UpdateCardTable): + mov x12, 0xFF + strb w12, [x15] +LOCAL_LABEL(Exit): + ret lr +WRITE_BARRIER_END JIT_WriteBarrier + +// ------------------------------------------------------------------ +// Start of the writeable code region +LEAF_ENTRY JIT_PatchedCodeStart, _TEXT + ret lr +LEAF_END JIT_PatchedCodeStart, _TEXT + +// ------------------------------------------------------------------ +// End of the writeable code region +LEAF_ENTRY JIT_PatchedCodeLast, _TEXT + ret lr +LEAF_END JIT_PatchedCodeLast, _TEXT + +//------------------------------------------------ +// VirtualMethodFixupStub +// +// In NGEN images, virtual slots inherited from cross-module dependencies +// point to a jump thunk that calls into the following function that will +// call into a VM helper. The VM helper is responsible for patching up +// thunk, upon executing the precode, so that all subsequent calls go directly +// to the actual method body. +// +// This is done lazily for performance reasons. +// +// On entry: +// +// x0 = "this" pointer +// x12 = Address of thunk + +NESTED_ENTRY VirtualMethodFixupStub, _TEXT, NoHandler + + // Save arguments and return address + PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, #-144 + SAVE_ARGUMENT_REGISTERS sp, 16 + SAVE_FLOAT_ARGUMENT_REGISTERS sp, 80 + + // Refer to ZapImportVirtualThunk::Save + // for details on this. + // + // Move the thunk start address in x1 + mov x1, x12 + + // Call the helper in the VM to perform the actual fixup + // and tell us where to tail call. x0 already contains + // the this pointer. + bl C_FUNC(VirtualMethodFixupWorker) + // On return, x0 contains the target to tailcall to + mov x12, x0 + + // pop the stack and restore original register state + RESTORE_ARGUMENT_REGISTERS sp, 16 + RESTORE_FLOAT_ARGUMENT_REGISTERS sp, 80 + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, #144 + + PATCH_LABEL VirtualMethodFixupPatchLabel + + // and tailcall to the actual method + EPILOG_BRANCH_REG x12 + +NESTED_END VirtualMEthodFixupStub, _TEXT + +//------------------------------------------------ +// ExternalMethodFixupStub +// +// In NGEN images, calls to cross-module external methods initially +// point to a jump thunk that calls into the following function that will +// call into a VM helper. The VM helper is responsible for patching up the +// thunk, upon executing the precode, so that all subsequent calls go directly +// to the actual method body. +// +// This is done lazily for performance reasons. +// +// On entry: +// +// x12 = Address of thunk + +NESTED_ENTRY ExternalMethodFixupStub, _TEXT, NoHandler + + PROLOG_WITH_TRANSITION_BLOCK 0, 0 + + add x0, sp, #__PWTB_TransitionBlock // pTransitionBlock + mov x1, x12 // pThunk + + bl C_FUNC(ExternalMethodFixupWorker) + + // mov the address we patched to in x12 so that we can tail call to it + mov x12, x0 + + EPILOG_WITH_TRANSITION_BLOCK_TAILCALL 0, 0 + PATCH_LABEL ExternalMethodFixupPatchLabel + EPILOG_BRANCH_REG x12 + +NESTED_END ExternalMethodFixupStub, _TEXT + +// void SinglecastDelegateInvokeStub(Delegate *pThis) +LEAF_ENTRY SinglecastDelegateInvokeStub, _TEXT + cmp x0, #0 + beq LOCAL_LABEL(LNullThis) + + ldr x16, [x0, #DelegateObject___methodPtr] + ldr x0, [x0, #DelegateObject___target] + + br x16 + +LOCAL_LABEL(LNullThis): + mov x0, #CORINFO_NullReferenceException_ASM + b C_FUNC(JIT_InternalThrow) + +LEAF_END SinglecastDelegateInvokeStub, _TEXT + +// +// x12 = UMEntryThunk* +// +NESTED_ENTRY TheUMEntryPrestub, _TEXT, UnhandledExceptionHandlerUnix + + // Save arguments and return address + PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, #-144 + SAVE_ARGUMENT_REGISTERS sp, 16 + SAVE_FLOAT_ARGUMENT_REGISTERS sp, 80 + + mov x0, x12 + bl C_FUNC(TheUMEntryPrestubWorker) + + // save real target address in x12. + mov x12, x0 + + // pop the stack and restore original register state + RESTORE_ARGUMENT_REGISTERS sp, 16 + RESTORE_FLOAT_ARGUMENT_REGISTERS sp, 80 + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, #144 + + // and tailcall to the actual method + EPILOG_BRANCH_REG x12 + +NESTED_END TheUMEntryPrestub, _TEXT + +// +// x12 = UMEntryThunk* +// +NESTED_ENTRY UMThunkStub, _TEXT, UnhandledExceptionHandlerUnix + + // Save arguments and return address + PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, #-96 // 64 for regArgs, 8 for x19 & 8 for x12 + // save callee saved reg x19. x19 is used in the method to store thread* + PROLOG_SAVE_REG x19, #88 + + SAVE_ARGUMENT_REGISTERS sp, 16 + +#define UMThunkStub_HiddenArg 80 // offset of saved UMEntryThunk * +#define UMThunkStub_StackArgs 96 // offset of original stack args (total size of UMThunkStub frame) + + // save UMEntryThunk* + str x12, [sp, #UMThunkStub_HiddenArg] + + // assuming GetThread does not clobber FP Args + bl C_FUNC(GetThread) + cbz x0, LOCAL_LABEL(UMThunkStub_DoThreadSetup) + +LOCAL_LABEL(UMThunkStub_HaveThread): + mov x19, x0 // x19 = Thread * + + mov x9, 1 + // m_fPreemptiveGCDisabled is 4 byte field so using 32-bit variant + str w9, [x19, #Thread__m_fPreemptiveGCDisabled] + + ldr x2, =g_TrapReturningThreads + ldr x3, [x2] + // assuming x0 contains Thread* before jumping to UMThunkStub_DoTrapReturningThreads + cbnz x3, LOCAL_LABEL(UMThunkStub_DoTrapReturningThreads) + +LOCAL_LABEL(UMThunkStub_InCooperativeMode): + ldr x12, [fp, #UMThunkStub_HiddenArg] // x12 = UMEntryThunk* + + ldr x0, [x19, #Thread__m_pDomain] + + // m_dwDomainId is 4 bytes so using 32-bit variant + ldr w1, [x12, #UMEntryThunk__m_dwDomainId] + ldr w0, [x0, #AppDomain__m_dwId] + cmp w0, w1 + bne LOCAL_LABEL(UMThunkStub_WrongAppDomain) + + ldr x3, [x12, #UMEntryThunk__m_pUMThunkMarshInfo] // x3 = m_pUMThunkMarshInfo + + // m_cbActualArgSize is UINT32 and hence occupies 4 bytes + ldr w2, [x3, #UMThunkMarshInfo__m_cbActualArgSize] // w2 = Stack arg bytes + cbz w2, LOCAL_LABEL(UMThunkStub_RegArgumentsSetup) + + // extend to 64-bits + uxtw x2, w2 + + // Source pointer + add x0, fp, #UMThunkStub_StackArgs + + // move source pointer to end of Stack Args + add x0, x0, x2 + + // Count of stack slot pairs to copy (divide by 16) + lsr x1, x2, #4 + + // Is there an extra stack slot (can happen when stack arg bytes not multiple of 16) + and x2, x2, #8 + + // If yes then start source pointer from 16 byte aligned stack slot + add x0, x0, x2 + + // increment stack slot pair count by 1 if x2 is not zero + add x1, x1, x2, LSR #3 + +LOCAL_LABEL(UMThunkStub_StackLoop): + ldp x4, x5, [x0, #-16]! // pre-Index + stp x4, x5, [sp, #-16]! // pre-Index + subs x1, x1, #1 + bne LOCAL_LABEL(UMThunkStub_StackLoop) + +LOCAL_LABEL(UMThunkStub_RegArgumentsSetup): + ldr x16, [x3, #UMThunkMarshInfo__m_pILStub] + + RESTORE_ARGUMENT_REGISTERS fp, 16 + + blr x16 + +LOCAL_LABEL(UMThunkStub_PostCall): + mov x4, 0 + // m_fPreemptiveGCDisabled is 4 byte field so using 32-bit variant + str w4, [x19, #Thread__m_fPreemptiveGCDisabled] + + EPILOG_STACK_RESTORE + EPILOG_RESTORE_REG x19, #88 + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, #96 + + EPILOG_RETURN + +LOCAL_LABEL(UMThunkStub_DoThreadSetup): + sub sp, sp, #SIZEOF__FloatArgumentRegisters + SAVE_FLOAT_ARGUMENT_REGISTERS sp, 0 + bl C_FUNC(CreateThreadBlockThrow) + RESTORE_FLOAT_ARGUMENT_REGISTERS sp, 0 + add sp, sp, #SIZEOF__FloatArgumentRegisters + b LOCAL_LABEL(UMThunkStub_HaveThread) + +LOCAL_LABEL(UMThunkStub_DoTrapReturningThreads): + sub sp, sp, #SIZEOF__FloatArgumentRegisters + SAVE_FLOAT_ARGUMENT_REGISTERS sp, 0 + // x0 already contains Thread* pThread + // UMEntryThunk* pUMEntry + ldr x1, [fp, #UMThunkStub_HiddenArg] + bl C_FUNC(UMThunkStubRareDisableWorker) + RESTORE_FLOAT_ARGUMENT_REGISTERS sp, 0 + add sp, sp, #SIZEOF__FloatArgumentRegisters + b LOCAL_LABEL(UMThunkStub_InCooperativeMode) + +LOCAL_LABEL(UMThunkStub_WrongAppDomain): + // Saving FP Args as this is read by UM2MThunk_WrapperHelper + sub sp, sp, #SIZEOF__FloatArgumentRegisters + SAVE_FLOAT_ARGUMENT_REGISTERS sp, 0 + + // UMEntryThunk* pUMEntry + ldr x0, [fp, #UMThunkStub_HiddenArg] + + // void * pArgs + add x2, fp, #16 + + // remaining arguments are unused + bl C_FUNC(UM2MDoADCallBack) + + // restore integral return value + ldr x0, [fp, #16] + + // restore FP or HFA return value + RESTORE_FLOAT_ARGUMENT_REGISTERS sp, 0 + + b LOCAL_LABEL(UMThunkStub_PostCall) + +NESTED_END UMThunkStub, _TEXT + + +// UM2MThunk_WrapperHelper(void *pThunkArgs, // x0 +// int cbStackArgs, // x1 (unused) +// void *pAddr, // x2 (unused) +// UMEntryThunk *pEntryThunk,// x3 +// Thread *pThread) // x4 + +// pThunkArgs points to the argument registers pushed on the stack by UMThunkStub + +NESTED_ENTRY UM2MThunk_WrapperHelper, _TEXT, NoHandler + + PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, #-32 + PROLOG_SAVE_REG x19, #16 + + + // save pThunkArgs in non-volatile reg. It is required after return from call to ILStub + mov x19, x0 + + // ARM64TODO - Is this required by ILStub + mov x12, x3 //// x12 = UMEntryThunk * + + // + // Note that layout of the arguments is given by UMThunkStub frame + // + ldr x3, [x3, #UMEntryThunk__m_pUMThunkMarshInfo] + + // m_cbActualArgSize is 4-byte field + ldr w2, [x3, #UMThunkMarshInfo__m_cbActualArgSize] + cbz w2, LOCAL_LABEL(UM2MThunk_WrapperHelper_RegArgumentsSetup) + + // extend to 64- bits + uxtw x2, w2 + + // Source pointer. Subtracting 16 bytes due to fp & lr + add x6, x0, #(UMThunkStub_StackArgs-16) + + // move source ptr to end of Stack Args + add x6, x6, x2 + + // Count of stack slot pairs to copy (divide by 16) + lsr x1, x2, #4 + + // Is there an extra stack slot? (can happen when stack arg bytes not multiple of 16) + and x2, x2, #8 + + // If yes then start source pointer from 16 byte aligned stack slot + add x6, x6, x2 + + // increment stack slot pair count by 1 if x2 is not zero + add x1, x1, x2, LSR #3 + +LOCAL_LABEL(UM2MThunk_WrapperHelper_StackLoop): + ldp x4, x5, [x6, #-16]! + stp x4, x5, [sp, #-16]! + subs x1, x1, #1 + bne LOCAL_LABEL(UM2MThunk_WrapperHelper_StackLoop) + +LOCAL_LABEL(UM2MThunk_WrapperHelper_RegArgumentsSetup): + ldr x16, [x3, #(UMThunkMarshInfo__m_pILStub)] + + // reload floating point registers + RESTORE_FLOAT_ARGUMENT_REGISTERS x0, -1 * (SIZEOF__FloatArgumentRegisters + 16) + + // reload argument registers + RESTORE_ARGUMENT_REGISTERS x0, 0 + + blr x16 + + // save integral return value + str x0, [x19] + // save FP/HFA return values + SAVE_FLOAT_ARGUMENT_REGISTERS x19, -1 * (SIZEOF__FloatArgumentRegisters + 16) + + EPILOG_STACK_RESTORE + EPILOG_RESTORE_REG x19, #16 + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, #32 + EPILOG_RETURN + +NESTED_END UM2MThunk_WrapperHelper, _TEXT + + + +// ------------------------------------------------------------------ +// Redirection Stub for GC in fully interruptible method +//GenerateRedirectedHandledJITCaseStub GCThreadControl +// ------------------------------------------------------------------ +//GenerateRedirectedHandledJITCaseStub DbgThreadControl +// ------------------------------------------------------------------ +//GenerateRedirectedHandledJITCaseStub UserSuspend +// ------------------------------------------------------------------ +//GenerateRedirectedHandledJITCaseStub YieldTask + +#ifdef _DEBUG +// ------------------------------------------------------------------ +// Redirection Stub for GC Stress +GenerateRedirectedHandledJITCaseStub GCStress +#endif + + +// ------------------------------------------------------------------ + +// This helper enables us to call into a funclet after restoring Fp register +NESTED_ENTRY CallEHFunclet, _TEXT, NoHandler + + // Using below prolog instead of PROLOG_SAVE_REG_PAIR fp,lr, #-16 + // is intentional. Above statement would also emit instruction to save + // sp in fp. If sp is saved in fp in prolog then it is not expected that fp can change in the body + // of method. However, this method needs to be able to change fp before calling funclet. + // This is required to access locals in funclet. + PROLOG_SAVE_REG_PAIR_INDEXED x19,x20, #-16 + PROLOG_SAVE_REG fp, #0 + PROLOG_SAVE_REG lr, #8 + + // On entry: + // + // X0 = throwable + // X1 = PC to invoke + // X2 = address of X19 register in CONTEXT record// used to restore the non-volatile registers of CrawlFrame + // X3 = address of the location where the SP of funclet's caller (i.e. this helper) should be saved. + // + // Save the SP of this function + str fp, [x3] + + ldr fp, [x2, #80] // offset of fp in CONTEXT relative to X19 + + // Invoke the funclet + blr x1 + nop + + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, #16 + EPILOG_RETURN + +NESTED_END CallEHFunclet, _TEXT + +// This helper enables us to call into a filter funclet by passing it the CallerSP to lookup the +// frame pointer for accessing the locals in the parent method. +NESTED_ENTRY CallEHFilterFunclet, _TEXT, NoHandler + + PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, #-16 + + // On entry: + // + // X0 = throwable + // X1 = SP of the caller of the method/funclet containing the filter + // X2 = PC to invoke + // X3 = address of the location where the SP of funclet's caller (i.e. this helper) should be saved. + // + // Save the SP of this function + str fp, [x3] + // Invoke the filter funclet + blr x2 + + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, #16 + EPILOG_RETURN + +NESTED_END CallEHFilterFunclet, _TEXT + +#define FaultingExceptionFrame_StackAlloc (SIZEOF__GSCookie + SIZEOF__FaultingExceptionFrame) +#define FaultingExceptionFrame_FrameOffset SIZEOF__GSCookie + +.macro GenerateRedirectedStubWithFrame stub, target + + // + // This is the primary function to which execution will be redirected to. + // + NESTED_ENTRY \stub, _TEXT, NoHandler + + // + // IN: lr: original IP before redirect + // + + PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, #-16 + PROLOG_STACK_ALLOC FaultingExceptionFrame_StackAlloc + + // At this point, the stack maybe misaligned if the thread abort was asynchronously + // triggered in the prolog or epilog of the managed method. For such a case, we must + // align the stack before calling into the VM. + // + // Runtime check for 16-byte alignment. + mov x0, sp + and x0, x0, #15 + sub sp, sp, x0 + + // Save pointer to FEF for GetFrameFromRedirectedStubStackFrame + add x19, sp, #FaultingExceptionFrame_FrameOffset + + // Prepare to initialize to NULL + mov x1,#0 + str x1, [x19]// Initialize vtbl (it is not strictly necessary) + str x1, [x19, #FaultingExceptionFrame__m_fFilterExecuted]// Initialize BOOL for personality routine + + mov x0, x19 // move the ptr to FEF in X0 + + bl C_FUNC(\target) + + // Target should not return. + EMIT_BREAKPOINT + + NESTED_END \stub, _TEXT + +.endmacro + + +// ------------------------------------------------------------------ +// +// Helpers for async (NullRef, AccessViolation) exceptions +// + +NESTED_ENTRY NakedThrowHelper2, _TEXT ,FixContextHandler + PROLOG_SAVE_REG_PAIR_INDEXED fp,lr, #-16 + + // On entry: + // + // X0 = Address of FaultingExceptionFrame + bl C_FUNC(LinkFrameAndThrow) + + // Target should not return. + EMIT_BREAKPOINT + +NESTED_END NakedThrowHelper2, _TEXT + +GenerateRedirectedStubWithFrame NakedThrowHelper, NakedThrowHelper2 + +#ifdef FEATURE_READYTORUN + +NESTED_ENTRY DelayLoad_MethodCall_FakeProlog, _TEXT, NoHandler +DelayLoad_MethodCall: + .global DelayLoad_MethodCall + + EMIT_BREAKPOINT +NESTED_END DelayLoad_MethodCall_FakeProlog, _TEXT + + +.macro DynamicHelper frameFlags, suffix + NESTED_ENTRY DelayLoad_Helper\suffix\()_FakeProlog, _TEXT, NoHandler +DelayLoad_Helper\suffix: + .global DelayLoad_Helper\suffix + + EMIT_BREAKPOINT + + NESTED_END DelayLoad_Helper\suffix\()_FakeProlog, _TEXT +.endm + +DynamicHelper DynamicHelperFrameFlags_Default +DynamicHelper DynamicHelperFrameFlags_ObjectArg, _Obj +DynamicHelper DynamicHelperFrameFlags_ObjectArg | DynamicHelperFrameFlags_ObjectArg2, _ObjObj + +#endif + +LEAF_ENTRY StartUnwindingNativeFrames, _TEXT + EMIT_BREAKPOINT +LEAF_END StartUnwindingNativeFrames, _TEXT + diff --git a/src/vm/arm64/calldescrworkerarm64.S b/src/vm/arm64/calldescrworkerarm64.S new file mode 100644 index 0000000000..2ff42f43dc --- /dev/null +++ b/src/vm/arm64/calldescrworkerarm64.S @@ -0,0 +1,130 @@ +// +// Copyright (c) Geoff Norton. All rights reserved. +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. +// + +#include "unixasmmacros.inc" +#include "asmconstants.h" + +//----------------------------------------------------------------------------- +// This helper routine enregisters the appropriate arguments and makes the +// actual call. +//----------------------------------------------------------------------------- +//void CallDescrWorkerInternal(CallDescrData * pCallDescrData); + +NESTED_ENTRY CallDescrWorkerInternal, _TEXT, UnhandledExceptionHandlerUnix + PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, #-32 + PROLOG_SAVE_REG x19, #16 //the stack slot at sp+24 is empty for 16 byte alligment + + mov x19, x0 // save pCallDescrData in x19 + + ldr w1, [x19,#CallDescrData__numStackSlots] + cbz w1, LOCAL_LABEL(donestack) + + // Add frame padding to ensure frame size is a multiple of 16 (a requirement of the OS ABI). + // We push two registers (above) and numStackSlots arguments (below). If this comes to an odd number + // of slots we must pad with another. This simplifies to "if the low bit of numStackSlots is set, + // extend the stack another eight bytes". + ldr x0, [x19,#CallDescrData__pSrc] + add x0, x0, x1, lsl #3 // pSrcEnd=pSrc+8*numStackSlots + ands x2, x1, #1 + beq LOCAL_LABEL(stackloop) + + // This loop copies numStackSlots words + // from [pSrcEnd-8,pSrcEnd-16,...] to [sp-8,sp-16,...] + + // pad and store one stack slot as number of slots are odd + ldr x4, [x0,#-8]! + str x4, [sp,#-16]! + subs x1, x1, #1 + beq LOCAL_LABEL(donestack) +LOCAL_LABEL(stackloop): + ldp x2, x4, [x0,#-16]! + stp x2, x4, [sp,#-16]! + subs x1, x1, #2 + bne LOCAL_LABEL(stackloop) +LOCAL_LABEL(donestack): + + // If FP arguments are supplied in registers (x8 != NULL) then initialize all of them from the pointer + // given in x8. + ldr x8, [x19,#CallDescrData__pFloatArgumentRegisters] + cbz x8, LOCAL_LABEL(NoFloatingPoint) + ldp d0, d1, [x8] + ldp d2, d3, [x8, #16] + ldp d4, d5, [x8, #32] + ldp d6, d7, [x8, #48] +LOCAL_LABEL(NoFloatingPoint): + + // Copy [pArgumentRegisters, ..., pArgumentRegisters + 56] + // into x0, ..., x7 + + ldr x8, [x19,#CallDescrData__pArgumentRegisters] + ldp x0, x1, [x8] + ldp x2, x3, [x8, #16] + ldp x4, x5, [x8, #32] + ldp x6, x7, [x8, #48] + + // ARM64TODO: => see if anything special needs to be done for remoting + // call pTarget + ldr x8, [x19,#CallDescrData__pTarget] + blr x8 + + ldr w3, [x19,#CallDescrData__fpReturnSize] + + // Int return case + cbz w3, LOCAL_LABEL(IntReturn) + + // Float return case + cmp w3, #4 + beq LOCAL_LABEL(FloatReturn) + + // Double return case + cmp w3, #8 + bne LOCAL_LABEL(NoDoubleReturn) + +LOCAL_LABEL(FloatReturn): + str d0, [x19, #(CallDescrData__returnValue + 0)] + b LOCAL_LABEL(ReturnDone) + +LOCAL_LABEL(NoDoubleReturn): + + //FloatHFAReturn return case + cmp w3, #16 + bne LOCAL_LABEL(NoFloatHFAReturn) + + stp s0, s1, [x19, #(CallDescrData__returnValue + 0)] + stp s2, s3, [x19, #(CallDescrData__returnValue + 0x08)] + b LOCAL_LABEL(ReturnDone) +LOCAL_LABEL(NoFloatHFAReturn): + + //DoubleHFAReturn return case + cmp w3, #32 + bne LOCAL_LABEL(NoDoubleHFAReturn) + + stp d0, d1, [x19, #(CallDescrData__returnValue + 0)] + stp d2, d3, [x19, #(CallDescrData__returnValue + 0x10)] + b LOCAL_LABEL(ReturnDone) + +LOCAL_LABEL(NoDoubleHFAReturn): + + EMIT_BREAKPOINT // Unreachable + +LOCAL_LABEL(IntReturn): + // Save return value into retbuf for int + str x0, [x19, #(CallDescrData__returnValue + 0)] + +LOCAL_LABEL(ReturnDone): + +#ifdef _DEBUG + // trash the floating point registers to ensure that the HFA return values + // won't survive by accident + ldp d0, d1, [sp] + ldp d2, d3, [sp, #16] +#endif + + EPILOG_STACK_RESTORE + EPILOG_RESTORE_REG x19, #16 //the stack slot at sp+24 is empty for 16 byte alligment + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, #32 + EPILOG_RETURN +NESTED_END CallDescrWorkerInternal, _TEXT diff --git a/src/vm/arm64/cgencpu.h b/src/vm/arm64/cgencpu.h index 28d4474fa4..ac9921a23c 100644 --- a/src/vm/arm64/cgencpu.h +++ b/src/vm/arm64/cgencpu.h @@ -407,7 +407,7 @@ extern "C" void SinglecastDelegateInvokeStub(); #define DATA_ALIGNMENT 8 -DECLSPEC_ALIGN(16) struct UMEntryThunkCode +struct DECLSPEC_ALIGN(16) UMEntryThunkCode { DWORD m_code[4]; @@ -490,8 +490,8 @@ struct StubPrecode { CONTRACTL_END; EnsureWritableExecutablePages(&m_pTarget); - return (TADDR)InterlockedCompareExchange( - (TADDR*)&m_pTarget, (TADDR)target, (TADDR)expected) == expected; + return (TADDR)InterlockedCompareExchange64( + (LONGLONG*)&m_pTarget, (TADDR)target, (TADDR)expected) == expected; } #ifdef FEATURE_PREJIT diff --git a/src/vm/arm64/crthelpers.S b/src/vm/arm64/crthelpers.S new file mode 100644 index 0000000000..7d92917e8c --- /dev/null +++ b/src/vm/arm64/crthelpers.S @@ -0,0 +1,292 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. +// + +// ==++== +// + +#include "unixasmmacros.inc" + +// +// ==--== + +// Calls to JIT_MemSet is emitted by jit for initialization of large structs. +// We need to provide our own implementation of memset instead of using the ones in crt because crt implementation does not gurantee +// that aligned 8/4/2 - byte memory will be written atomically. This is required because members in a struct can be read atomically +// and their values should be written atomically. +// +// +//void JIT_MemSet(void *dst, int val, SIZE_T count) +// +// uintptr_t valEx = (char)val; +// valEx = valEx | valEx << 8; +// valEx = valEx | valEx << 16; +// valEx = valEx | valEx << 32; +// +// // If not aligned then make it 8-byte aligned +// if(((uintptr_t)dst&0x7) != 0) +// { +// if(((uintptr_t)dst&0x3) == 0) +// { +// *(UINT*)dst = (UINT)valEx; +// dst = (UINT*)dst + 1; +// count-=4; +// } +// else if(((uintptr_t)dst&0x1) == 0) +// { +// while(count > 0 && ((uintptr_t)dst&0x7) != 0) +// { +// *(short*)dst = (short)valEx; +// dst = (short*)dst + 1; +// count-=2; +// } +// } +// else +// { +// while(count > 0 && ((uintptr_t)dst&0x7) != 0) +// { +// *(char*)dst = (char)valEx; +// dst = (char*)dst + 1; +// count--; +// } +// } +// } +// +// while(count > 8) +// { +// *(uintptr_t*)dst = valEx; +// dst = (uintptr_t*)dst + 1; +// count-=8; +// } +// +// if(count & 4) +// { +// *(UINT*)dst = (UINT)valEx; +// dst = (UINT*)dst + 1; +// } +// +// if(count & 2) +// { +// *(short*)dst = (short)valEx; +// dst = (short*)dst + 1; +// } +// +// if(count & 1) +// { +// *(char*)dst = (char)valEx; +// } +// +// + +// Assembly code corresponding to above C++ method. JIT_MemSet can AV and clr exception personality routine needs to +// determine if the exception has taken place inside JIT_Memset in order to throw corresponding managed exception. +// Determining this is slow if the method were implemented as C++ method (using unwind info). In .asm file by adding JIT_MemSet_End +// marker it can be easily determined if exception happened in JIT_MemSet. Therefore, JIT_MemSet has been written in assembly instead of +// as C++ method. + +LEAF_ENTRY JIT_MemSet, _TEXT + sxtb w8,w1 + sxtw x8,w8 + orr x8,x8,x8, lsl #8 + orr x8,x8,x8, lsl #0x10 + orr x9,x8,x8, lsl #0x20 + and x8,x0,#7 + cbz x8,LOCAL_LABEL(JIT_MemSet_0x7c) + and x8,x0,#3 + cbnz x8,LOCAL_LABEL(JIT_MemSet_0x38) + str w9,[x0] + add x0,x0,#4 + mov x8,#-4 + add x2,x2,x8 + b LOCAL_LABEL(JIT_MemSet_0x7c) +LOCAL_LABEL(JIT_MemSet_0x38): + cbz x2,LOCAL_LABEL(JIT_MemSet_0x7c) + tbnz x0,#0,LOCAL_LABEL(JIT_MemSet_0x60) +LOCAL_LABEL(JIT_MemSet_0x40): + and x8,x0,#7 + cbz x8,LOCAL_LABEL(JIT_MemSet_0x7c) + strh w9,[x0] + add x0,x0,#2 + mov x8,#-2 + add x2,x2,x8 + cbnz x2,LOCAL_LABEL(JIT_MemSet_0x40) + b LOCAL_LABEL(JIT_MemSet_0x7c) +LOCAL_LABEL(JIT_MemSet_0x60): + and x8,x0,#7 + cbz x8,LOCAL_LABEL(JIT_MemSet_0x7c) + strb w9,[x0] + add x0,x0,#1 + mov x8,#-1 + add x2,x2,x8 + cbnz x2,LOCAL_LABEL(JIT_MemSet_0x60) +LOCAL_LABEL(JIT_MemSet_0x7c): + cmp x2,#8 + bls LOCAL_LABEL(JIT_MemSet_0xb8) + mov x8,#-9 + add x8,x2,x8 + lsr x8,x8,#3 + add x11,x8,#1 + mov x10,x0 + add x8,x10,x11, lsl #3 +LOCAL_LABEL(JIT_MemSet_0x9c): + cmp x10,x8 + beq LOCAL_LABEL(JIT_MemSet_0xac) + str x9,[x10],#8 + b LOCAL_LABEL(JIT_MemSet_0x9c) +LOCAL_LABEL(JIT_MemSet_0xac): + mov x8,#-8 + madd x2,x11,x8,x2 + add x0,x0,x11, lsl #3 +LOCAL_LABEL(JIT_MemSet_0xb8): + tbz x2,#2,LOCAL_LABEL(JIT_MemSet_0xc4) + str w9,[x0] + add x0,x0,#4 +LOCAL_LABEL(JIT_MemSet_0xc4): + tbz x2,#1,LOCAL_LABEL(JIT_MemSet_0xd0) + strh w9,[x0] + add x0,x0,#2 +LOCAL_LABEL(JIT_MemSet_0xd0): + tbz x2,#0,LOCAL_LABEL(JIT_MemSet_0xd8) + strb w9,[x0] +LOCAL_LABEL(JIT_MemSet_0xd8): + ret lr +LEAF_END JIT_MemSet, _TEXT + +// See comments above for JIT_MemSet + +//void JIT_MemCpy(void *dst, const void *src, SIZE_T count) +// +// // If not aligned then make it 8-byte aligned +// if(((uintptr_t)dst&0x7) != 0) +// { +// if(((uintptr_t)dst&0x3) == 0) +// { +// *(UINT*)dst = *(UINT*)src; +// dst = (UINT*)dst + 1; +// src = (UINT*)src + 1; +// count-=4; +// } +// else if(((uintptr_t)dst&0x1) == 0) +// { +// while(count > 0 && ((uintptr_t)dst&0x7) != 0) +// { +// *(short*)dst = *(short*)src; +// dst = (short*)dst + 1; +// src = (short*)src + 1; +// count-=2; +// } +// } +// else +// { +// while(count > 0 && ((uintptr_t)dst&0x7) != 0) +// { +// *(char*)dst = *(char*)src; +// dst = (char*)dst + 1; +// src = (char*)src + 1; +// count--; +// } +// } +// } +// +// while(count > 8) +// { +// *(uintptr_t*)dst = *(uintptr_t*)src; +// dst = (uintptr_t*)dst + 1; +// src = (uintptr_t*)src + 1; +// count-=8; +// } +// +// if(count & 4) +// { +// *(UINT*)dst = *(UINT*)src; +// dst = (UINT*)dst + 1; +// src = (UINT*)src + 1; +// } +// +// if(count & 2) +// { +// *(short*)dst = *(short*)src; +// dst = (short*)dst + 1; +// src = (short*)src + 1; +// } +// +// if(count & 1) +// { +// *(char*)dst = *(char*)src; +// } +// +// + +// Assembly code corresponding to above C++ method. +// See comments above for JIT_MemSet method +LEAF_ENTRY JIT_MemCpy, _TEXT + and x8,x0,#7 + cbz x8,LOCAL_LABEL(JIT_MemCpy_0x80) + and x8,x0,#3 + cbnz x8,LOCAL_LABEL(JIT_MemCpy_0x2c) + ldr w8,[x1] + str w8,[x0] + add x0,x0,#4 + add x1,x1,#4 + mov x8,#-4 + add x2,x2,x8 + b LOCAL_LABEL(JIT_MemCpy_0x80) +LOCAL_LABEL(JIT_MemCpy_0x2c): + cbz x2,LOCAL_LABEL(JIT_MemCpy_0x80) + tbnz x0,#0,LOCAL_LABEL(JIT_MemCpy_0x5c) +LOCAL_LABEL(JIT_MemCpy_0x34): + and x8,x0,#7 + cbz x8,LOCAL_LABEL(JIT_MemCpy_0x80) + ldrsh w8,[x1] + strh w8,[x0] + add x0,x0,#2 + add x1,x1,#2 + mov x8,#-2 + add x2,x2,x8 + cbnz x2,LOCAL_LABEL(JIT_MemCpy_0x34) + b LOCAL_LABEL(JIT_MemCpy_0x80) +LOCAL_LABEL(JIT_MemCpy_0x5c): + and x8,x0,#7 + cbz x8,LOCAL_LABEL(JIT_MemCpy_0x80) + ldrsb w8,[x1] + strb w8,[x0] + add x0,x0,#1 + add x1,x1,#1 + mov x8,#-1 + add x2,x2,x8 + cbnz x2,LOCAL_LABEL(JIT_MemCpy_0x5c) +LOCAL_LABEL(JIT_MemCpy_0x80): + cmp x2,#8 + bls LOCAL_LABEL(JIT_MemCpy_0xb4) + mov x8,#-9 + add x8,x2,x8 + lsr x8,x8,#3 + add x9,x8,#1 + mov x8,#-8 + madd x2,x9,x8,x2 +LOCAL_LABEL(JIT_MemCpy_0xa0): + ldr x8,[x1],#8 + str x8,[x0],#8 + mov x8,#-1 + add x9,x9,x8 + cbnz x9,LOCAL_LABEL(JIT_MemCpy_0xa0) +LOCAL_LABEL(JIT_MemCpy_0xb4): + tbz x2,#2,LOCAL_LABEL(JIT_MemCpy_0xc8) + ldr w8,[x1] + str w8,[x0] + add x0,x0,#4 + add x1,x1,#4 +LOCAL_LABEL(JIT_MemCpy_0xc8): + tbz x2,#1,LOCAL_LABEL(JIT_MemCpy_0xdc) + ldrsh w8,[x1] + strh w8,[x0] + add x0,x0,#2 + add x1,x1,#2 +LOCAL_LABEL(JIT_MemCpy_0xdc): + tbz x2,#0,LOCAL_LABEL(JIT_MemCpy_0xe8) + ldrsb w8,[x1] + strb w8,[x0] +LOCAL_LABEL(JIT_MemCpy_0xe8): + ret lr +LEAF_END JIT_MemCpy, _TEXT diff --git a/src/vm/arm64/pinvokestubs.S b/src/vm/arm64/pinvokestubs.S new file mode 100644 index 0000000000..b1dd5c4c35 --- /dev/null +++ b/src/vm/arm64/pinvokestubs.S @@ -0,0 +1,125 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. +// + +// ==++== +// + +// +// ==--== +#include "asmconstants.h" +#include "unixasmmacros.inc" + +// ------------------------------------------------------------------ +// Macro to generate PInvoke Stubs. +// $__PInvokeStubFuncName : function which calls the actual stub obtained from VASigCookie +// $__PInvokeGenStubFuncName : function which generates the IL stubs for PInvoke +// +// Params :- +// $FuncPrefix : prefix of the function name for the stub +// Eg. VarargPinvoke, GenericPInvokeCalli +// $VASigCookieReg : register which contains the VASigCookie +// $SaveFPArgs : "Yes" or "No" . For varidic functions FP Args are not present in FP regs +// So need not save FP Args registers for vararg Pinvoke +.macro PINVOKE_STUB FuncPrefix,VASigCookieReg,HiddenArg,SaveFPArgs + +#if NOTYET + GBLS __PInvokeStubFuncName + GBLS __PInvokeGenStubFuncName + GBLS __PInvokeStubWorkerName + + IF "\FuncPrefix" == "GenericPInvokeCalli" +__PInvokeStubFuncName SETS "\FuncPrefix":CC:"Helper" + ELSE +__PInvokeStubFuncName SETS "\FuncPrefix":CC:"Stub" + ENDIF +__PInvokeGenStubFuncName SETS "\FuncPrefix":CC:"GenILStub" +__PInvokeStubWorkerName SETS "\FuncPrefix":CC:"StubWorker" + + IF "\VASigCookieReg" == "x1" +__PInvokeStubFuncName SETS "\__PInvokeStubFuncName":CC:"_RetBuffArg" +__PInvokeGenStubFuncName SETS "\__PInvokeGenStubFuncName":CC:"_RetBuffArg" + ENDIF + + NESTED_ENTRY \__PInvokeStubFuncName + + // get the stub + ldr x9, [\VASigCookieReg, #VASigCookie__pNDirectILStub] + + // if null goto stub generation + cbz x9, %0 + + + EPILOG_BRANCH_REG x9 + +0 + EPILOG_BRANCH \__PInvokeGenStubFuncName + + NESTED_END + + + NESTED_ENTRY \__PInvokeGenStubFuncName + + PROLOG_WITH_TRANSITION_BLOCK 0, \SaveFPArgs + + // x2 = Umanaged Target\MethodDesc + mov x2, \HiddenArg + + // x1 = VaSigCookie + IF "\VASigCookieReg" != "x1" + mov x1, \VASigCookieReg + ENDIF + + // x0 = pTransitionBlock + add x0, sp, #__PWTB_TransitionBlock + + // save hidden arg + mov x19, \HiddenArg + + bl \__PInvokeStubWorkerName + + // restore hidden arg (method desc or unmanaged target) + mov \HiddenArg , x19 + + + EPILOG_WITH_TRANSITION_BLOCK_TAILCALL + + EPILOG_BRANCH \__PInvokeStubFuncName + + NESTED_END +#else + EMIT_BREAKPOINT +#endif +.endm + +// ------------------------------------------------------------------ +// VarargPInvokeStub & VarargPInvokeGenILStub +// There is a separate stub when the method has a hidden return buffer arg. +// +// in: +// x0 = VASigCookie* +// x12 = MethodDesc * +// +PINVOKE_STUB VarargPInvoke, x0, x12, 1 + + +// ------------------------------------------------------------------ +// GenericPInvokeCalliHelper & GenericPInvokeCalliGenILStub +// Helper for generic pinvoke calli instruction +// +// in: +// x15 = VASigCookie* +// x14 = Unmanaged target +// +PINVOKE_STUB GenericPInvokeCalli, x15, x14, 1 + +// ------------------------------------------------------------------ +// VarargPInvokeStub_RetBuffArg & VarargPInvokeGenILStub_RetBuffArg +// Vararg PInvoke Stub when the method has a hidden return buffer arg +// +// in: +// x1 = VASigCookie* +// x12 = MethodDesc* +// +PINVOKE_STUB VarargPInvoke, x1, x12, 0 diff --git a/src/vm/arm64/stubs.cpp b/src/vm/arm64/stubs.cpp index cca6b7143a..72039c228c 100644 --- a/src/vm/arm64/stubs.cpp +++ b/src/vm/arm64/stubs.cpp @@ -315,7 +315,12 @@ void LazyMachState::unwindLazyState(LazyMachState* baseState, do { +#ifndef FEATURE_PAL pvControlPc = Thread::VirtualUnwindCallFrame(&context, &nonVolContextPtrs); +#else + PAL_VirtualUnwind(&context, &nonVolContextPtrs); + pvControlPc = GetIP(&context); +#endif if (funCallDepth > 0) { @@ -647,6 +652,8 @@ void UpdateRegDisplayFromCalleeSavedRegisters(REGDISPLAY * pRD, CalleeSavedRegis pContextPointers->X28 = (PDWORD64)&pCalleeSaved->x28; } +#ifndef CROSSGEN_COMPILE + void TransitionFrame::UpdateRegDisplay(const PREGDISPLAY pRD) { @@ -656,7 +663,11 @@ void TransitionFrame::UpdateRegDisplay(const PREGDISPLAY pRD) // copy the argumetn registers ArgumentRegisters *pArgRegs = GetArgumentRegisters(); for (int i = 0; i < ARGUMENTREGISTERS_SIZE; i++) +#ifdef __clang__ + *(&pRD->pCurrentContext->X0 + (sizeof(void*)*i)) = pArgRegs->x[i]; +#else pRD->pCurrentContext->X[i] = pArgRegs->x[i]; +#endif // copy the callee saved regs CalleeSavedRegisters *pCalleeSaved = GetCalleeSavedRegisters(); @@ -676,6 +687,8 @@ void TransitionFrame::UpdateRegDisplay(const PREGDISPLAY pRD) } +#endif + void TailCallFrame::UpdateRegDisplay(const PREGDISPLAY pRD) { _ASSERTE(!"ARM64:NYI"); @@ -1197,10 +1210,6 @@ VOID ResetCurrentContext() } #endif -extern "C" void ResolveWorkerChainLookupAsmStub() -{ - _ASSERTE(!"ARM64:NYI"); -} extern "C" void StubDispatchFixupPatchLabel() { _ASSERTE(!"ARM64:NYI"); @@ -1693,6 +1702,8 @@ void StubLinkerCPU::EmitCallManagedMethod(MethodDesc *pMD, BOOL fTailCall) } } +#ifndef CROSSGEN_COMPILE + EXTERN_C UINT32 _tls_index; void StubLinkerCPU::EmitGetThreadInlined(IntReg Xt) { @@ -1705,7 +1716,8 @@ void StubLinkerCPU::EmitGetThreadInlined(IntReg Xt) EmitLabelRef(NewExternalCodeLabel((LPVOID)&_tls_index), reinterpret_cast<LoadFromLabelInstructionFormat&>(gLoadFromLabelIF), X8); // Load Teb->ThreadLocalStoragePointer into x8 - EmitLoadStoreRegImm(eLOAD, Xt, IntReg(18), offsetof(_TEB, ThreadLocalStoragePointer)); +// FIXME: I know what TEB is, but how is it in scope here? +// EmitLoadStoreRegImm(eLOAD, Xt, IntReg(18), offsetof(_TEB, ThreadLocalStoragePointer)); // index it with _tls_index, i.e Teb->ThreadLocalStoragePointer[_tls_index]. // This will give us the TLS section for the module on this thread's context @@ -1720,8 +1732,6 @@ void StubLinkerCPU::EmitGetThreadInlined(IntReg Xt) } -#ifndef CROSSGEN_COMPILE - void StubLinkerCPU::EmitUnboxMethodStub(MethodDesc *pMD) { _ASSERTE(!pMD->RequiresInstMethodDescArg()); @@ -1737,3 +1747,50 @@ void StubLinkerCPU::EmitUnboxMethodStub(MethodDesc *pMD) #endif // CROSSGEN_COMPILE #endif // #ifndef DACCESS_COMPILE + +#ifdef FEATURE_READYTORUN +PCODE DynamicHelpers::CreateHelper(LoaderAllocator * pAllocator, TADDR arg, PCODE target) +{ + UNREACHABLE(); +} + +PCODE DynamicHelpers::CreateHelperWithArg(LoaderAllocator * pAllocator, TADDR arg, PCODE target) +{ + UNREACHABLE(); +} + +PCODE DynamicHelpers::CreateHelper(LoaderAllocator * pAllocator, TADDR arg, TADDR arg2, PCODE target) +{ + UNREACHABLE(); +} + +PCODE DynamicHelpers::CreateHelperArgMove(LoaderAllocator * pAllocator, TADDR arg, PCODE target) +{ + UNREACHABLE(); +} + +PCODE DynamicHelpers::CreateReturn(LoaderAllocator * pAllocator) +{ + UNREACHABLE(); +} + +PCODE DynamicHelpers::CreateReturnConst(LoaderAllocator * pAllocator, TADDR arg) +{ + UNREACHABLE(); +} + +PCODE DynamicHelpers::CreateReturnIndirConst(LoaderAllocator * pAllocator, TADDR arg, INT8 offset) +{ + UNREACHABLE(); +} + +PCODE DynamicHelpers::CreateHelperWithTwoArgs(LoaderAllocator * pAllocator, TADDR arg, PCODE target) +{ + UNREACHABLE(); +} + +PCODE DynamicHelpers::CreateHelperWithTwoArgs(LoaderAllocator * pAllocator, TADDR arg, TADDR arg2, PCODE target) +{ + UNREACHABLE(); +} +#endif diff --git a/src/vm/arm64/unixstubs.cpp b/src/vm/arm64/unixstubs.cpp new file mode 100644 index 0000000000..c14ff59e53 --- /dev/null +++ b/src/vm/arm64/unixstubs.cpp @@ -0,0 +1,29 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. +// + +#include "common.h" + +extern "C" +{ + void RedirectForThrowControl() + { + PORTABILITY_ASSERT("Implement for PAL"); + } + + void GenericPInvokeCalliHelper() + { + PORTABILITY_ASSERT("Implement for PAL"); + } + + void VarargPInvokeStub() + { + PORTABILITY_ASSERT("Implement for PAL"); + } + + void VarargPInvokeStub_RetBuffArg() + { + PORTABILITY_ASSERT("Implement for PAL"); + } +}; diff --git a/src/vm/arm64/virtualcallstubcpu.hpp b/src/vm/arm64/virtualcallstubcpu.hpp index 9210a0b6fc..c302cee9b7 100644 --- a/src/vm/arm64/virtualcallstubcpu.hpp +++ b/src/vm/arm64/virtualcallstubcpu.hpp @@ -216,7 +216,7 @@ struct ResolveHolder //ldr w13, [x10 + DATA_OFFSET(_hashedToken)] offset = DATA_OFFSET(_hashedToken); _ASSERTE(offset >=0 && offset%8 == 0); - _stub._resolveEntryPoint[n++] = 0xB940014D | offset<<8; + _stub._resolveEntryPoint[n++] = 0xB940014D | offset<<7; //eor x9,x9,x13 _stub._resolveEntryPoint[n++] = 0xCA0D0129; @@ -326,13 +326,13 @@ struct ResolveHolder //ldr x12, [x10 , DATA_OFFSET(_token)] offset=DATA_OFFSET(_token); _ASSERTE(offset >=0 && offset%8 == 0); - _stub._slowEntryPoint[n++] = 0xF940014C | (offset<<10); + _stub._slowEntryPoint[n++] = 0xF940014C | (offset<<7); // //ldr x9, [x10 , DATA_OFFSET(_resolveWorkerTarget)] offset=DATA_OFFSET(_resolveWorkerTarget); _ASSERTE(offset >=0 && offset%8 == 0); - _stub._slowEntryPoint[n++] = 0xF9400149 | (offset<<10); + _stub._slowEntryPoint[n++] = 0xF9400149 | (offset<<7); // br x9 _stub._slowEntryPoint[n++] = 0xD61F0120; diff --git a/src/vm/callingconvention.h b/src/vm/callingconvention.h index 2d3e0a7526..b1ed3853c6 100644 --- a/src/vm/callingconvention.h +++ b/src/vm/callingconvention.h @@ -368,7 +368,7 @@ public: if (m_argType == ELEMENT_TYPE_VALUETYPE) { _ASSERTE(!m_argTypeHandle.IsNull()); - return ((m_argSize > ENREGISTERED_PARAMTYPE_MAXSIZE) && (!m_argTypeHandle.IsHFA() || IsVarArg())); + return ((m_argSize > ENREGISTERED_PARAMTYPE_MAXSIZE) && (!m_argTypeHandle.IsHFA() || this->IsVarArg())); } return FALSE; #else diff --git a/src/vm/crossgen/CMakeLists.txt b/src/vm/crossgen/CMakeLists.txt index e3dd990695..c27cb82831 100644 --- a/src/vm/crossgen/CMakeLists.txt +++ b/src/vm/crossgen/CMakeLists.txt @@ -114,8 +114,13 @@ elseif(CLR_CMAKE_PLATFORM_ARCH_ARM) list(APPEND VM_CROSSGEN_SOURCES ../arm/stubs.cpp ) +elseif(CLR_CMAKE_PLATFORM_ARCH_ARM64) + include_directories(../arm64) + list(APPEND VM_CROSSGEN_SOURCES + ../arm64/stubs.cpp + ) else() - message(FATAL_ERROR "Only ARM and AMD64 is supported") + message(FATAL_ERROR "Only ARM, ARM64 and AMD64 is supported") endif() if (WIN32) diff --git a/src/vm/frames.cpp b/src/vm/frames.cpp index e65aade1cb..6e0a424c81 100644 --- a/src/vm/frames.cpp +++ b/src/vm/frames.cpp @@ -1068,7 +1068,7 @@ void InterpreterFrame::GcScanRoots(promote_func *fn, ScanContext* sc) #endif // FEATURE_INTERPRETER -#ifdef _DEBUG +#if defined(_DEBUG) && !defined (DACCESS_COMPILE) struct IsProtectedByGCFrameStruct { diff --git a/src/vm/frames.h b/src/vm/frames.h index 58702aa480..44e38bf155 100644 --- a/src/vm/frames.h +++ b/src/vm/frames.h @@ -754,7 +754,7 @@ public: return VPTR_HOST_VTABLE_TO_TADDR(*(LPVOID*)this); } -#ifdef _DEBUG +#if defined(_DEBUG) && !defined(DACCESS_COMPILE) virtual BOOL Protects(OBJECTREF *ppObjectRef) { LIMITED_METHOD_CONTRACT; diff --git a/src/vm/gcinfodecoder.cpp b/src/vm/gcinfodecoder.cpp index b231924e0f..c4dcfc4565 100644 --- a/src/vm/gcinfodecoder.cpp +++ b/src/vm/gcinfodecoder.cpp @@ -1793,6 +1793,24 @@ void GcInfoDecoder::ReportRegisterToGC( // ARM64 pCallBack(hCallBack, pObjRef, gcFlags DAC_ARG(DacSlotLocation(regNum, 0, false))); } +#ifdef FEATURE_PAL +OBJECTREF* GcInfoDecoder::GetCapturedRegister( + int regNum, + PREGDISPLAY pRD + ) +{ + _ASSERTE(regNum >= 0 && regNum <= 28); + + // The fields of CONTEXT are in the same order as + // the processor encoding numbers. + + DWORD64 *pX0; + pX0 = &pRD->pCurrentContext->X0; + + return (OBJECTREF*)(pX0 + regNum); +} +#endif // FEATURE_PAL + #else // Unknown platform OBJECTREF* GcInfoDecoder::GetRegisterSlot( diff --git a/src/vm/stublink.h b/src/vm/stublink.h index 8456432fdc..5c40c8986a 100644 --- a/src/vm/stublink.h +++ b/src/vm/stublink.h @@ -322,6 +322,15 @@ protected: BOOL m_fPushArgRegs; // If true, r0-r3 are saved before callee saved regs #endif // _TARGET_ARM_ +#ifdef _TARGET_ARM64_ +protected: + BOOL m_fProlog; // True if DescribeProlog has been called + UINT m_cIntRegArgs; // Count of int register arguments (x0 - x7) + UINT m_cVecRegArgs; // Count of FP register arguments (v0 - v7) + UINT m_cCalleeSavedRegs; // Count of callee saved registers (x19 - x28) + UINT m_cbStackSpace; // Additional stack space for return buffer and stack alignment +#endif // _TARGET_ARM64_ + #ifdef STUBLINKER_GENERATES_UNWIND_INFO #ifdef _DEBUG @@ -368,12 +377,6 @@ private: #ifdef _TARGET_ARM64_ #define MAX_UNWIND_CODE_WORDS 5 /* maximum number of 32-bit words to store unwind codes */ -protected: - BOOL m_fProlog; // True if DescribeProlog has been called - UINT m_cIntRegArgs; // Count of int register arguments (x0 - x7) - UINT m_cVecRegArgs; // Count of FP register arguments (v0 - v7) - UINT m_cCalleeSavedRegs; // Count of callee saved registers (x19 - x28) - UINT m_cbStackSpace; // Additional stack space for return buffer and stack alignment private: // Reserve fixed size block that's big enough to fit any unwind info we can have |