summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorSergiy Kuryata <sergeyk@microsoft.com>2015-12-02 10:15:18 -0800
committerSergiy Kuryata <sergeyk@microsoft.com>2015-12-02 10:21:28 -0800
commit4cb6f34a094a90527106575d3578ba3f9c390b03 (patch)
tree0edaeb0946f9f48323b7e4b45fde4de5fb73fec8 /src
parent487d85ca531524341fb4c5d189794677572411b1 (diff)
downloadcoreclr-4cb6f34a094a90527106575d3578ba3f9c390b03.tar.gz
coreclr-4cb6f34a094a90527106575d3578ba3f9c390b03.tar.bz2
coreclr-4cb6f34a094a90527106575d3578ba3f9c390b03.zip
Implement an allocator for executable (JIT) memory in PAL
This change improves performance of CoreCLR with Server GC enabled by about 30% according to ASP.NET benchmarks on Linux. The table below shows number of requests per second that an ASP.NET benchmark could handle on my machine before and after the change. Pipeline Before After Improvement 16 230K Req/sec 305K Req/sec 33% 256 240K Req/sec 340K Req/sec 42% The problem was that with Server GC enabled, the GC initialization code was reserving a large chunck (about 18GB on my machine) of virtual address space during runtime initialization. Unfortunately, due to implementation details of MM on Linux, GC memory was located next to the location of libcoreclr. As a result, the runtime could not allocate memory for JIT'ed code close to the coreclr library. Because of that the JIT'ed code had to use jump stubs to call functions from the runtime (which can become very expensive, for example, for write barriers). This change fixes this issue by implementing a simple allocator that tries to reserve (during process startup) a chuck of virtual memory that is located near the coreclr library (within 2GB range) that can be later used for JIT'ed code.
Diffstat (limited to 'src')
-rw-r--r--src/pal/inc/pal.h5
-rw-r--r--src/pal/src/include/pal/virtual.h90
-rw-r--r--src/pal/src/init/pal.cpp7
-rw-r--r--src/pal/src/map/virtual.cpp243
-rw-r--r--src/utilcode/util.cpp7
5 files changed, 321 insertions, 31 deletions
diff --git a/src/pal/inc/pal.h b/src/pal/inc/pal.h
index 77877a8992..aa51b1959b 100644
--- a/src/pal/inc/pal.h
+++ b/src/pal/inc/pal.h
@@ -480,6 +480,7 @@ typedef long time_t;
#define PAL_INITIALIZE_NONE 0x00
#define PAL_INITIALIZE_SYNC_THREAD 0x01
+#define PAL_INITIALIZE_EXEC_ALLOCATOR 0x02
// PAL_Initialize() flags
#define PAL_INITIALIZE PAL_INITIALIZE_SYNC_THREAD
@@ -487,6 +488,9 @@ typedef long time_t;
// PAL_InitializeDLL() flags - don't start any of the helper threads
#define PAL_INITIALIZE_DLL PAL_INITIALIZE_NONE
+// PAL_InitializeCoreCLR() flags
+#define PAL_INITIALIZE_CORECLR (PAL_INITIALIZE | PAL_INITIALIZE_EXEC_ALLOCATOR)
+
typedef DWORD (PALAPI *PTHREAD_START_ROUTINE)(LPVOID lpThreadParameter);
typedef PTHREAD_START_ROUTINE LPTHREAD_START_ROUTINE;
@@ -3538,6 +3542,7 @@ SetErrorMode(
#define MEM_MAPPED 0x40000
#define MEM_TOP_DOWN 0x100000
#define MEM_WRITE_WATCH 0x200000
+#define MEM_RESERVE_EXECUTABLE 0x40000000 // reserve memory using executable memory allocator
PALIMPORT
HANDLE
diff --git a/src/pal/src/include/pal/virtual.h b/src/pal/src/include/pal/virtual.h
index 326cf2a9f6..75968137ca 100644
--- a/src/pal/src/include/pal/virtual.h
+++ b/src/pal/src/include/pal/virtual.h
@@ -82,7 +82,7 @@ Return value:
TRUE if initialization succeeded
FALSE otherwise.
--*/
-BOOL VIRTUALInitialize( void );
+BOOL VIRTUALInitialize(bool initializeExecutableMemoryAllocator);
/*++
Function :
@@ -105,6 +105,94 @@ BOOL VIRTUALOwnedRegion( IN UINT_PTR address );
#ifdef __cplusplus
}
+
+/// <summary>
+/// This class implements a virtual memory allocator for JIT'ed code.
+/// The purpose of this allocator is to opportunistically reserve a chuck of virtual memory
+/// that is located near the coreclr library (within 2GB range) that can be later used by
+/// JIT. Having executable memory close to the coreclr library allows JIT to generate more
+/// efficient code (by avoiding usage of jump stubs) and thus it can significantly improve
+/// performance of the application.
+///
+/// This allocator is integrated with the VirtualAlloc/Reserve code. If VirtualAlloc has been
+/// called with the MEM_RESERVE_EXECUTABLE flag then it will first try to obtain the requested size
+/// of virtual memory from ExecutableMemoryAllocator. If ExecutableMemoryAllocator runs out of
+/// the reserved memory (or fails to allocate it during initialization) then VirtualAlloc/Reserve code
+/// will simply fall back to reserving memory using OS APIs.
+///
+/// Notes:
+/// - the memory allocated by this class is NOT committed by default. It is responsibility
+/// of the caller to commit the virtual memory before accessing it.
+/// - in addition, this class does not provide ability to free the reserved memory. The caller
+/// has full control of the memory it got from this allocator (i.e. the caller becomes
+/// the owner of the allocated memory), so it is caller's responsibility to free the memory
+/// if it is no longer needed.
+/// </summary>
+class ExecutableMemoryAllocator
+{
+public:
+ /// <summary>
+ /// This function initializes the allocator. It should be called early during process startup
+ /// (when process address space is pretty much empty) in order to have a chance to reserve
+ /// sufficient amount of memory that is close to the coreclr library.
+ /// </summary>
+ void Initialize();
+
+ /// <summary>
+ /// This function attempts to allocate the requested amount of memory from its reserved virtual
+ /// address space. The function will return NULL if the allocation request cannot
+ /// be satisfied by the memory that is currently available in the allocator.
+ /// </summary>
+ LPVOID AllocateMemory(int32_t allocationSize);
+
+private:
+ /// <summary>
+ /// This function is called during initialization. It opportunistically tries to reserve
+ /// a large chunk of virtual memory that can be later used to store JIT'ed code.
+ /// </summary>
+ void TryReserveInitialMemory();
+
+ /// <summary>
+ /// This function returns a random offset (in multiples of the virtual page size)
+ /// at which the allocator should start allocating memory from its reserved memory range.
+ /// </summary>
+ int32_t GenerateRandomStartOffset();
+
+private:
+ /// <summary>
+ /// There does not seem to be an easy way find the size of a library on Unix.
+ /// So this constant represents an approximation of the libcoreclr size (on debug build)
+ /// that can be used to calculate an approximate location of the memory that
+ /// is in 2GB range from the coreclr library. In addition, having precise size of libcoreclr
+ /// is not necessary for the calculations.
+ /// </summary>
+ const int32_t CoreClrLibrarySize = 100 * 1024 * 1024;
+
+ /// <summary>
+ /// This constant represent the max size of the virtual memory that this allocator
+ /// will try to reserve during initialization. We want all JIT-ed code and the
+ /// entire libcoreclr to be located in a 2GB range.
+ /// </summary>
+ const int32_t MaxExecutableMemorySize = 0x7FFF0000 - CoreClrLibrarySize;
+
+ /// <summary>Start address of the reserved virtual address space</summary>
+ LPVOID m_startAddress;
+
+ /// <summary>Next available address in the reserved address space</summary>
+ LPVOID m_nextFreeAddress;
+
+ /// <summary>
+ /// Total size of the virtual memory that the allocator has been able to
+ /// reserve during its initialization.
+ /// </summary>
+ int32_t m_totalSizeOfReservedMemory;
+
+ /// <summary>
+ /// Remaining size of the reserved virtual memory that can be used to satisfy allocation requests.
+ /// </summary>
+ int32_t m_remainingReservedMemory;
+};
+
#endif // __cplusplus
#endif /* _PAL_VIRTUAL_H_ */
diff --git a/src/pal/src/init/pal.cpp b/src/pal/src/init/pal.cpp
index 15ecae6028..ff7cddd928 100644
--- a/src/pal/src/init/pal.cpp
+++ b/src/pal/src/init/pal.cpp
@@ -499,7 +499,8 @@ Initialize(
}
/* Initialize the Virtual* functions. */
- if (FALSE == VIRTUALInitialize())
+ bool initializeExecutableMemoryAllocator = (flags & PAL_INITIALIZE_EXEC_ALLOCATOR) != 0;
+ if (FALSE == VIRTUALInitialize(initializeExecutableMemoryAllocator))
{
ERROR("Unable to initialize virtual memory support\n");
goto CLEANUP10;
@@ -623,8 +624,8 @@ PAL_ERROR
PALAPI
PAL_InitializeCoreCLR(const char *szExePath)
{
- // Fake up a command line to call PAL_Initialize with.
- int result = PAL_Initialize(1, &szExePath);
+ // Fake up a command line to call PAL initialization with.
+ int result = Initialize(1, &szExePath, PAL_INITIALIZE_CORECLR);
if (result != 0)
{
return GetLastError();
diff --git a/src/pal/src/map/virtual.cpp b/src/pal/src/map/virtual.cpp
index 0a9e773d0d..d1a389ede6 100644
--- a/src/pal/src/map/virtual.cpp
+++ b/src/pal/src/map/virtual.cpp
@@ -92,6 +92,23 @@ static int gBackingFile = -1;
#define MAP_ANON MAP_ANONYMOUS
#endif
+/******
+ *
+ * ReserveVirtualMemory() - Helper function that is used by Virtual* APIs
+ * and ExecutableMemoryAllocator to reserve virtual memory from the OS.
+ *
+ */
+static LPVOID ReserveVirtualMemory(
+ IN CPalThread *pthrCurrent, /* Currently executing thread */
+ IN LPVOID lpAddress, /* Region to reserve or commit */
+ IN SIZE_T dwSize); /* Size of Region */
+
+/// <summary>
+/// A memory allocator that allocates memory from a pre-reserved region
+/// of virtual memory that is located near the coreclr library.
+/// </summary>
+static ExecutableMemoryAllocator g_executableMemoryAllocator;
+
/*++
Function:
VIRTUALInitialize()
@@ -105,13 +122,19 @@ Return value:
--*/
extern "C"
BOOL
-VIRTUALInitialize()
+VIRTUALInitialize(bool initializeExecutableMemoryAllocator)
{
TRACE( "Initializing the Virtual Critical Sections. \n" );
InternalInitializeCriticalSection(&virtual_critsec);
pVirtualMemory = NULL;
+
+ if (initializeExecutableMemoryAllocator)
+ {
+ g_executableMemoryAllocator.Initialize();
+ }
+
return TRUE;
}
@@ -894,9 +917,6 @@ static LPVOID VIRTUALReserveMemory(
LPVOID pRetVal = NULL;
UINT_PTR StartBoundary;
SIZE_T MemSize;
-#if HAVE_VM_ALLOCATE
- int result;
-#endif // HAVE_VM_ALLOCATE
TRACE( "Reserving the memory now..\n");
@@ -910,6 +930,66 @@ static LPVOID VIRTUALReserveMemory(
InternalEnterCriticalSection(pthrCurrent, &virtual_critsec);
+ // If this is a request for special executable (JIT'ed) memory then, first of all,
+ // try to get memory from the executable memory allocator to satisfy the request.
+ if (((flAllocationType & MEM_RESERVE_EXECUTABLE) != 0) && (lpAddress == NULL))
+ {
+ pRetVal = g_executableMemoryAllocator.AllocateMemory(MemSize);
+ }
+
+ if (pRetVal == NULL)
+ {
+ // Try to reserve memory from the OS
+ pRetVal = ReserveVirtualMemory(pthrCurrent, (LPVOID)StartBoundary, MemSize);
+ }
+
+ if (pRetVal != NULL)
+ {
+#if !MMAP_IGNORES_HINT
+ if ( !lpAddress )
+ {
+#endif // MMAP_IGNORES_HINT
+ /* Compute the real values instead of the null values. */
+ StartBoundary = (UINT_PTR)pRetVal & ~VIRTUAL_PAGE_MASK;
+ MemSize = ( ((UINT_PTR)pRetVal + dwSize + VIRTUAL_PAGE_MASK) & ~VIRTUAL_PAGE_MASK ) -
+ StartBoundary;
+#if !MMAP_IGNORES_HINT
+ }
+#endif // MMAP_IGNORES_HINT
+ if ( !VIRTUALStoreAllocationInfo( StartBoundary, MemSize,
+ flAllocationType, flProtect ) )
+ {
+ ASSERT( "Unable to store the structure in the list.\n");
+ pthrCurrent->SetLastError( ERROR_INTERNAL_ERROR );
+ munmap( pRetVal, MemSize );
+ pRetVal = NULL;
+ }
+ }
+
+ InternalLeaveCriticalSection(pthrCurrent, &virtual_critsec);
+ return pRetVal;
+}
+
+/******
+ *
+ * ReserveVirtualMemory() - Helper function that is used by Virtual* APIs
+ * and ExecutableMemoryAllocator to reserve virtual memory from the OS.
+ *
+ */
+static LPVOID ReserveVirtualMemory(
+ IN CPalThread *pthrCurrent, /* Currently executing thread */
+ IN LPVOID lpAddress, /* Region to reserve or commit */
+ IN SIZE_T dwSize) /* Size of Region */
+{
+ LPVOID pRetVal = NULL;
+ UINT_PTR StartBoundary = (UINT_PTR)lpAddress;
+ SIZE_T MemSize = dwSize;
+#if HAVE_VM_ALLOCATE
+ int result;
+#endif // HAVE_VM_ALLOCATE
+
+ TRACE( "Reserving the memory now..\n");
+
#if MMAP_IGNORES_HINT
pRetVal = VIRTUALReserveFromBackingFile(StartBoundary, MemSize);
#else // MMAP_IGNORES_HINT
@@ -967,29 +1047,10 @@ static LPVOID VIRTUALReserveMemory(
goto done;
}
#endif // MMAP_ANON_IGNORES_PROTECTION
-#if !MMAP_IGNORES_HINT
- if ( !lpAddress )
- {
-#endif // MMAP_IGNORES_HINT
- /* Compute the real values instead of the null values. */
- StartBoundary = (UINT_PTR)pRetVal & ~VIRTUAL_PAGE_MASK;
- MemSize = ( ((UINT_PTR)pRetVal + dwSize + VIRTUAL_PAGE_MASK) & ~VIRTUAL_PAGE_MASK ) -
- StartBoundary;
-#if !MMAP_IGNORES_HINT
- }
-#endif // MMAP_IGNORES_HINT
- if ( !VIRTUALStoreAllocationInfo( StartBoundary, MemSize,
- flAllocationType, flProtect ) )
- {
- ASSERT( "Unable to store the structure in the list.\n");
- pthrCurrent->SetLastError( ERROR_INTERNAL_ERROR );
- munmap( pRetVal, MemSize );
- pRetVal = NULL;
- }
}
else
{
- ERROR( "Failed due to insufficent memory.\n" );
+ ERROR( "Failed due to insufficient memory.\n" );
#if HAVE_VM_ALLOCATE
vm_deallocate(mach_task_self(), StartBoundary, MemSize);
#endif // HAVE_VM_ALLOCATE
@@ -999,7 +1060,6 @@ static LPVOID VIRTUALReserveMemory(
}
done:
- InternalLeaveCriticalSection(pthrCurrent, &virtual_critsec);
return pRetVal;
}
@@ -1583,10 +1643,10 @@ VirtualAlloc(
}
/* Test for un-supported flags. */
- if ( ( flAllocationType & ~( MEM_COMMIT | MEM_RESERVE | MEM_TOP_DOWN ) ) != 0 )
+ if ( ( flAllocationType & ~( MEM_COMMIT | MEM_RESERVE | MEM_TOP_DOWN | MEM_RESERVE_EXECUTABLE ) ) != 0 )
{
ASSERT( "flAllocationType can be one, or any combination of MEM_COMMIT, \
- MEM_RESERVE, or MEM_TOP_DOWN.\n" );
+ MEM_RESERVE, MEM_TOP_DOWN, or MEM_RESERVE_EXECUTABLE.\n" );
pthrCurrent->SetLastError( ERROR_INVALID_PARAMETER );
goto done;
}
@@ -2291,3 +2351,132 @@ ResetWriteWatch(
// Until it is implemented, return non-zero value as an indicator of failure
return 1;
}
+
+/// <summary>
+/// This function initializes the allocator. It should be called early during process startup
+/// (when process address space is pretty much empty) in order to have a chance to reserve
+/// sufficient amount of memory that is close to the coreclr library.
+/// </summary>
+void ExecutableMemoryAllocator::Initialize()
+{
+ m_startAddress = NULL;
+ m_nextFreeAddress = NULL;
+ m_totalSizeOfReservedMemory = 0;
+ m_remainingReservedMemory = 0;
+
+ // Enable the executable memory allocator on 64-bit platforms only
+ // because 32-bit platforms have limited amount of virtual address space.
+#ifdef BIT64
+ TryReserveInitialMemory();
+#endif // BIT64
+
+}
+
+/// <summary>
+/// This function is called during PAL initialization. It opportunistically tries to reserve
+/// a large chunk of virtual memory that can be later used to store JIT'ed code.
+/// </summary>
+void ExecutableMemoryAllocator::TryReserveInitialMemory()
+{
+ CPalThread* pthrCurrent = InternalGetCurrentThread();
+ int32_t sizeOfAllocation = MaxExecutableMemorySize;
+ int32_t startAddressIncrement;
+ UINT_PTR startAddress;
+ UINT_PTR coreclrLoadAddress;
+ const int32_t MemoryProbingIncrement = 128 * 1024 * 1024;
+
+ // Try to find and reserve an available region of virtual memory that is located
+ // within 2GB range (defined by the MaxExecutableMemorySize constant) from the
+ // location of the coreclr library.
+ // Potentially, as a possible future improvement, we can get precise information
+ // about available memory ranges by parsing data from '/proc/self/maps'.
+ // But since this code is called early during process startup, the user address space
+ // is pretty much empty so the simple algorithm that is implemented below is sufficient
+ // for this purpose.
+
+ // First of all, we need to determine the current address of libcoreclr. Please note that depending on
+ // the OS implementation, the library is usually loaded either at the end or at the start of the user
+ // address space. If the library is loaded at low addresses then try to reserve memory above libcoreclr
+ // (thus avoiding reserving memory below 4GB; besides some operating systems do not allow that).
+ // If libcorclr is loaded at high addresses then try to reserve memory below its location.
+ coreclrLoadAddress = (UINT_PTR)PAL_GetSymbolModuleBase((void*)VirtualAlloc);
+ if ((coreclrLoadAddress < 0xFFFFFFFF) || ((coreclrLoadAddress - MaxExecutableMemorySize) < 0xFFFFFFFF))
+ {
+ // Try to allocate above the location of libcoreclr
+ startAddress = coreclrLoadAddress + CoreClrLibrarySize;
+ startAddressIncrement = MemoryProbingIncrement;
+ }
+ else
+ {
+ // Try to allocate below the location of libcoreclr
+ startAddress = coreclrLoadAddress - MaxExecutableMemorySize;
+ startAddressIncrement = 0;
+ }
+
+ // Do actual memory reservation.
+ do
+ {
+ m_startAddress = ReserveVirtualMemory(pthrCurrent, (LPVOID)startAddress, sizeOfAllocation);
+ if (m_startAddress != NULL)
+ {
+ // Memory has been successfully reserved.
+ m_totalSizeOfReservedMemory = sizeOfAllocation;
+
+ // Randomize the location at which we start allocating from the reserved memory range.
+ int32_t randomOffset = GenerateRandomStartOffset();
+ m_nextFreeAddress = (LPVOID)(((UINT_PTR)m_startAddress) + randomOffset);
+ m_remainingReservedMemory = sizeOfAllocation - randomOffset;
+ break;
+ }
+
+ // Try to allocate a smaller region
+ sizeOfAllocation -= MemoryProbingIncrement;
+ startAddress += startAddressIncrement;
+
+ } while (sizeOfAllocation >= MemoryProbingIncrement);
+}
+
+/// <summary>
+/// This function attempts to allocate the requested amount of memory from its reserved virtual
+/// address space. The function will return NULL if the allocation request cannot
+/// be satisfied by the memory that is currently available in the allocator.
+///
+/// Note: This function MUST be called with the virtual_critsec lock held.
+///
+/// </summary>
+LPVOID ExecutableMemoryAllocator::AllocateMemory(int32_t allocationSize)
+{
+ LPVOID allocatedMemory = NULL;
+
+ // Allocation size must be in multiples of the virtual page size.
+ _ASSERTE((allocationSize & VIRTUAL_PAGE_MASK) == 0);
+
+ // The code below assumes that the caller owns the virtual_critsec lock.
+ // So the calculations are not done in thread-safe manner.
+ if ((allocationSize > 0) && (allocationSize <= m_remainingReservedMemory))
+ {
+ allocatedMemory = m_nextFreeAddress;
+ m_nextFreeAddress = (LPVOID)(((UINT_PTR)m_nextFreeAddress) + allocationSize);
+ m_remainingReservedMemory -= allocationSize;
+
+ }
+
+ return allocatedMemory;
+}
+
+/// <summary>
+/// This function returns a random offset (in multiples of the virtual page size)
+/// at which the allocator should start allocating memory from its reserved memory range.
+/// </summary>
+int32_t ExecutableMemoryAllocator::GenerateRandomStartOffset()
+{
+ int32_t pageCount;
+ const int32_t MaxStartPageOffset = 64;
+
+ // This code is similar to what coreclr runtime does on Windows.
+ // It generates a random number of pages to skip between 0...MaxStartPageOffset.
+ srandom(time(NULL));
+ pageCount = (int32_t)(MaxStartPageOffset * (int64_t)random() / RAND_MAX);
+
+ return pageCount * VIRTUAL_PAGE_SIZE;
+}
diff --git a/src/utilcode/util.cpp b/src/utilcode/util.cpp
index 062cf61d95..d7d3a9f4cb 100644
--- a/src/utilcode/util.cpp
+++ b/src/utilcode/util.cpp
@@ -526,6 +526,13 @@ BYTE * ClrVirtualAllocExecutable(SIZE_T dwSize,
// Fall through to
#endif // USE_UPPER_ADDRESS
+#ifdef FEATURE_PAL
+ // Tell PAL to use the executable memory allocator to satisfy this request for virtual memory.
+ // This will allow us to place JIT'ed code close to the coreclr library
+ // and thus improve performance by avoiding jump stubs in managed code.
+ flAllocationType |= MEM_RESERVE_EXECUTABLE;
+#endif // FEATURE_PAL
+
return (BYTE *) ClrVirtualAlloc (NULL, dwSize, flAllocationType, flProtect);
}