summaryrefslogtreecommitdiff
path: root/src/vm
diff options
context:
space:
mode:
authorSteve MacLean <sdmaclea.qdt@qualcommdatacenter.com>2017-08-01 13:11:55 -0400
committerJan Vorlicek <janvorli@microsoft.com>2017-08-01 19:11:55 +0200
commit98d20f3d843356935edc194706b1d8640b048add (patch)
treeae024df7b25f458685ec4766b36fd099c44a6430 /src/vm
parent52564c8235a95791eef17ee56e14fd903fd23565 (diff)
downloadcoreclr-98d20f3d843356935edc194706b1d8640b048add.tar.gz
coreclr-98d20f3d843356935edc194706b1d8640b048add.tar.bz2
coreclr-98d20f3d843356935edc194706b1d8640b048add.zip
[Arm64] Adjust alignment for 128 byte cache line (#12801)
Diffstat (limited to 'src/vm')
-rw-r--r--src/vm/threadpoolrequest.cpp6
-rw-r--r--src/vm/threadpoolrequest.h22
-rw-r--r--src/vm/util.hpp7
-rw-r--r--src/vm/win32threadpool.cpp14
-rw-r--r--src/vm/win32threadpool.h29
5 files changed, 44 insertions, 34 deletions
diff --git a/src/vm/threadpoolrequest.cpp b/src/vm/threadpoolrequest.cpp
index a1ec4b087e..f52de8cf41 100644
--- a/src/vm/threadpoolrequest.cpp
+++ b/src/vm/threadpoolrequest.cpp
@@ -27,13 +27,13 @@
#include "nativeoverlapped.h"
#include "appdomain.inl"
-BYTE PerAppDomainTPCountList::s_padding[64 - sizeof(LONG)];
+BYTE PerAppDomainTPCountList::s_padding[MAX_CACHE_LINE_SIZE - sizeof(LONG)];
// Make this point to unmanaged TP in case, no appdomains have initialized yet.
// Cacheline aligned, hot variable
-DECLSPEC_ALIGN(64) LONG PerAppDomainTPCountList::s_ADHint = -1;
+DECLSPEC_ALIGN(MAX_CACHE_LINE_SIZE) LONG PerAppDomainTPCountList::s_ADHint = -1;
// Move out of from preceeding variables' cache line
-DECLSPEC_ALIGN(64) UnManagedPerAppDomainTPCount PerAppDomainTPCountList::s_unmanagedTPCount;
+DECLSPEC_ALIGN(MAX_CACHE_LINE_SIZE) UnManagedPerAppDomainTPCount PerAppDomainTPCountList::s_unmanagedTPCount;
//The list of all per-appdomain work-request counts.
ArrayListStatic PerAppDomainTPCountList::s_appDomainIndexList;
diff --git a/src/vm/threadpoolrequest.h b/src/vm/threadpoolrequest.h
index 8d2c7e486a..3d2dc3da82 100644
--- a/src/vm/threadpoolrequest.h
+++ b/src/vm/threadpoolrequest.h
@@ -20,6 +20,8 @@
#ifndef _THREADPOOL_REQUEST_H
#define _THREADPOOL_REQUEST_H
+#include "util.hpp"
+
#define TP_QUANTUM 2
#define UNUSED_THREADPOOL_INDEX (DWORD)-1
@@ -181,11 +183,11 @@ public:
private:
ADID m_id;
TPIndex m_index;
- DECLSPEC_ALIGN(64) struct {
- BYTE m_padding1[64 - sizeof(LONG)];
+ DECLSPEC_ALIGN(MAX_CACHE_LINE_SIZE) struct {
+ BYTE m_padding1[MAX_CACHE_LINE_SIZE - sizeof(LONG)];
// Only use with VolatileLoad+VolatileStore+FastInterlockCompareExchange
LONG m_numRequestsPending;
- BYTE m_padding2[64];
+ BYTE m_padding2[MAX_CACHE_LINE_SIZE];
};
};
@@ -286,11 +288,11 @@ public:
private:
SpinLock m_lock;
ULONG m_NumRequests;
- DECLSPEC_ALIGN(64) struct {
- BYTE m_padding1[64 - sizeof(LONG)];
+ DECLSPEC_ALIGN(MAX_CACHE_LINE_SIZE) struct {
+ BYTE m_padding1[MAX_CACHE_LINE_SIZE - sizeof(LONG)];
// Only use with VolatileLoad+VolatileStore+FastInterlockCompareExchange
LONG m_outstandingThreadRequestCount;
- BYTE m_padding2[64];
+ BYTE m_padding2[MAX_CACHE_LINE_SIZE];
};
};
@@ -351,12 +353,12 @@ public:
private:
static DWORD FindFirstFreeTpEntry();
- static BYTE s_padding[64 - sizeof(LONG)];
- DECLSPEC_ALIGN(64) static LONG s_ADHint;
- DECLSPEC_ALIGN(64) static UnManagedPerAppDomainTPCount s_unmanagedTPCount;
+ static BYTE s_padding[MAX_CACHE_LINE_SIZE - sizeof(LONG)];
+ DECLSPEC_ALIGN(MAX_CACHE_LINE_SIZE) static LONG s_ADHint;
+ DECLSPEC_ALIGN(MAX_CACHE_LINE_SIZE) static UnManagedPerAppDomainTPCount s_unmanagedTPCount;
//The list of all per-appdomain work-request counts.
static ArrayListStatic s_appDomainIndexList;
};
-#endif //_THREADPOOL_REQUEST_H \ No newline at end of file
+#endif //_THREADPOOL_REQUEST_H
diff --git a/src/vm/util.hpp b/src/vm/util.hpp
index 80cf97055b..edfd9161e4 100644
--- a/src/vm/util.hpp
+++ b/src/vm/util.hpp
@@ -44,6 +44,13 @@
#define UtilMessageBoxNonLocalizedVA __error("Use one of the EEMessageBox APIs (defined in eemessagebox.h) from inside the EE")
#define WszMessageBox __error("Use one of the EEMessageBox APIs (defined in eemessagebox.h) from inside the EE")
+// Hot cache lines need to be aligned to cache line size to improve performance
+#if defined(ARM64)
+#define MAX_CACHE_LINE_SIZE 128
+#else
+#define MAX_CACHE_LINE_SIZE 64
+#endif
+
//========================================================================
// More convenient names for integer types of a guaranteed size.
//========================================================================
diff --git a/src/vm/win32threadpool.cpp b/src/vm/win32threadpool.cpp
index 18df0dc76e..e2887c53db 100644
--- a/src/vm/win32threadpool.cpp
+++ b/src/vm/win32threadpool.cpp
@@ -86,7 +86,7 @@ SVAL_IMPL(LONG,ThreadpoolMgr,MaxFreeCPThreads); // = MaxFreeCP
Volatile<LONG> ThreadpoolMgr::NumCPInfrastructureThreads = 0; // number of threads currently busy handling draining cycle
// Cacheline aligned, hot variable
-DECLSPEC_ALIGN(64) SVAL_IMPL(ThreadpoolMgr::ThreadCounter, ThreadpoolMgr, WorkerCounter);
+DECLSPEC_ALIGN(MAX_CACHE_LINE_SIZE) SVAL_IMPL(ThreadpoolMgr::ThreadCounter, ThreadpoolMgr, WorkerCounter);
SVAL_IMPL(LONG,ThreadpoolMgr,MinLimitTotalWorkerThreads); // = MaxLimitCPThreadsPerCPU * number of CPUS
SVAL_IMPL(LONG,ThreadpoolMgr,MaxLimitTotalWorkerThreads); // = MaxLimitCPThreadsPerCPU * number of CPUS
@@ -97,7 +97,7 @@ LONG ThreadpoolMgr::cpuUtilizationAverage = 0;
HillClimbing ThreadpoolMgr::HillClimbingInstance;
// Cacheline aligned, 3 hot variables updated in a group
-DECLSPEC_ALIGN(64) LONG ThreadpoolMgr::PriorCompletedWorkRequests = 0;
+DECLSPEC_ALIGN(MAX_CACHE_LINE_SIZE) LONG ThreadpoolMgr::PriorCompletedWorkRequests = 0;
DWORD ThreadpoolMgr::PriorCompletedWorkRequestsTime;
DWORD ThreadpoolMgr::NextCompletedWorkRequestsTime;
@@ -116,10 +116,10 @@ int ThreadpoolMgr::ThreadAdjustmentInterval;
LONG ThreadpoolMgr::Initialization=0; // indicator of whether the threadpool is initialized.
// Cacheline aligned, hot variable
-DECLSPEC_ALIGN(64) unsigned int ThreadpoolMgr::LastDequeueTime; // used to determine if work items are getting thread starved
+DECLSPEC_ALIGN(MAX_CACHE_LINE_SIZE) unsigned int ThreadpoolMgr::LastDequeueTime; // used to determine if work items are getting thread starved
// Move out of from preceeding variables' cache line
-DECLSPEC_ALIGN(64) int ThreadpoolMgr::offset_counter = 0;
+DECLSPEC_ALIGN(MAX_CACHE_LINE_SIZE) int ThreadpoolMgr::offset_counter = 0;
SPTR_IMPL(WorkRequest,ThreadpoolMgr,WorkRequestHead); // Head of work request queue
SPTR_IMPL(WorkRequest,ThreadpoolMgr,WorkRequestTail); // Head of work request queue
@@ -144,17 +144,17 @@ HANDLE ThreadpoolMgr::TimerThread=NULL;
Thread *ThreadpoolMgr::pTimerThread=NULL;
// Cacheline aligned, hot variable
-DECLSPEC_ALIGN(64) DWORD ThreadpoolMgr::LastTickCount;
+DECLSPEC_ALIGN(MAX_CACHE_LINE_SIZE) DWORD ThreadpoolMgr::LastTickCount;
#ifdef _DEBUG
DWORD ThreadpoolMgr::TickCountAdjustment=0;
#endif
// Cacheline aligned, hot variable
-DECLSPEC_ALIGN(64) LONG ThreadpoolMgr::GateThreadStatus=GATE_THREAD_STATUS_NOT_RUNNING;
+DECLSPEC_ALIGN(MAX_CACHE_LINE_SIZE) LONG ThreadpoolMgr::GateThreadStatus=GATE_THREAD_STATUS_NOT_RUNNING;
// Move out of from preceeding variables' cache line
-DECLSPEC_ALIGN(64) ThreadpoolMgr::RecycledListsWrapper ThreadpoolMgr::RecycledLists;
+DECLSPEC_ALIGN(MAX_CACHE_LINE_SIZE) ThreadpoolMgr::RecycledListsWrapper ThreadpoolMgr::RecycledLists;
ThreadpoolMgr::TimerInfo *ThreadpoolMgr::TimerInfosToBeRecycled = NULL;
diff --git a/src/vm/win32threadpool.h b/src/vm/win32threadpool.h
index 65be889a50..dec336be63 100644
--- a/src/vm/win32threadpool.h
+++ b/src/vm/win32threadpool.h
@@ -123,9 +123,8 @@ class ThreadpoolMgr
class UnfairSemaphore
{
private:
-
// padding to ensure we get our own cache line
- BYTE padding1[64];
+ BYTE padding1[MAX_CACHE_LINE_SIZE];
//
// We track everything we care about in a single 64-bit struct to allow us to
@@ -146,12 +145,12 @@ class ThreadpoolMgr
} m_counts;
private:
+ // padding to ensure we get our own cache line
+ BYTE padding2[MAX_CACHE_LINE_SIZE];
+
const int m_spinLimitPerProcessor; //used when calculating max spin duration
CLRSemaphore m_sem; //waiters wait on this
- // padding to ensure we get our own cache line
- BYTE padding2[64];
-
INDEBUG(int m_maxCount;)
bool UpdateCounts(Counts newCounts, Counts currentCounts)
@@ -350,6 +349,9 @@ public:
{
static const int MaxPossibleCount = 0x7fff;
+ // padding to ensure we get our own cache line
+ BYTE padding1[MAX_CACHE_LINE_SIZE];
+
union Counts
{
struct
@@ -370,11 +372,10 @@ public:
LONGLONG AsLongLong;
bool operator==(Counts other) {LIMITED_METHOD_CONTRACT; return AsLongLong == other.AsLongLong;}
-
} counts;
// padding to ensure we get our own cache line
- BYTE padding[64];
+ BYTE padding2[MAX_CACHE_LINE_SIZE];
Counts GetCleanCounts()
{
@@ -1247,11 +1248,11 @@ private:
SVAL_DECL(LONG,MinLimitTotalWorkerThreads); // same as MinLimitTotalCPThreads
SVAL_DECL(LONG,MaxLimitTotalWorkerThreads); // same as MaxLimitTotalCPThreads
- DECLSPEC_ALIGN(64) static unsigned int LastDequeueTime; // used to determine if work items are getting thread starved
+ DECLSPEC_ALIGN(MAX_CACHE_LINE_SIZE) static unsigned int LastDequeueTime; // used to determine if work items are getting thread starved
static HillClimbing HillClimbingInstance;
- DECLSPEC_ALIGN(64) static LONG PriorCompletedWorkRequests;
+ DECLSPEC_ALIGN(MAX_CACHE_LINE_SIZE) static LONG PriorCompletedWorkRequests;
static DWORD PriorCompletedWorkRequestsTime;
static DWORD NextCompletedWorkRequestsTime;
@@ -1277,7 +1278,7 @@ private:
static const DWORD WorkerTimeout = 20 * 1000;
static const DWORD WorkerTimeoutAppX = 5 * 1000; // shorter timeout to allow threads to exit prior to app suspension
- DECLSPEC_ALIGN(64) SVAL_DECL(ThreadCounter,WorkerCounter);
+ DECLSPEC_ALIGN(MAX_CACHE_LINE_SIZE) SVAL_DECL(ThreadCounter,WorkerCounter);
//
// WorkerSemaphore is an UnfairSemaphore because:
@@ -1306,7 +1307,7 @@ private:
SVAL_DECL(LIST_ENTRY,TimerQueue); // queue of timers
static HANDLE TimerThread; // Currently we only have one timer thread
static Thread* pTimerThread;
- DECLSPEC_ALIGN(64) static DWORD LastTickCount; // the count just before timer thread goes to sleep
+ DECLSPEC_ALIGN(MAX_CACHE_LINE_SIZE) static DWORD LastTickCount; // the count just before timer thread goes to sleep
static BOOL InitCompletionPortThreadpool; // flag indicating whether completion port threadpool has been initialized
static HANDLE GlobalCompletionPort; // used for binding io completions on file handles
@@ -1319,20 +1320,20 @@ private:
SVAL_DECL(LONG,MinLimitTotalCPThreads);
SVAL_DECL(LONG,MaxFreeCPThreads); // = MaxFreeCPThreadsPerCPU * Number of CPUS
- DECLSPEC_ALIGN(64) static LONG GateThreadStatus; // See GateThreadStatus enumeration
+ DECLSPEC_ALIGN(MAX_CACHE_LINE_SIZE) static LONG GateThreadStatus; // See GateThreadStatus enumeration
static Volatile<LONG> NumCPInfrastructureThreads; // number of threads currently busy handling draining cycle
SVAL_DECL(LONG,cpuUtilization);
static LONG cpuUtilizationAverage;
- DECLSPEC_ALIGN(64) static RecycledListsWrapper RecycledLists;
+ DECLSPEC_ALIGN(MAX_CACHE_LINE_SIZE) static RecycledListsWrapper RecycledLists;
#ifdef _DEBUG
static DWORD TickCountAdjustment; // add this value to value returned by GetTickCount
#endif
- DECLSPEC_ALIGN(64) static int offset_counter;
+ DECLSPEC_ALIGN(MAX_CACHE_LINE_SIZE) static int offset_counter;
static const int offset_multiplier = 128;
};