diff options
author | Jan Kotas <jkotas@microsoft.com> | 2018-05-04 10:40:28 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-05-04 10:40:28 -0700 |
commit | 37e643696f860094ca3182c87f1375540b9d704e (patch) | |
tree | 5a840c7743873ca6c46e64a89487989a95880887 /src | |
parent | fac17ce1413c2d270eb52e958faf5e869bcd33a2 (diff) | |
download | coreclr-37e643696f860094ca3182c87f1375540b9d704e.tar.gz coreclr-37e643696f860094ca3182c87f1375540b9d704e.tar.bz2 coreclr-37e643696f860094ca3182c87f1375540b9d704e.zip |
Fix System.String over-allocation (#17876)
BaseSize for System.String was not set correctly. It caused unnecessary extra 8 bytes to be allocated at the end of strings that had `Length % 4 < 2` on 64-bit platforms.
This change makes affected strings proportionally cheaper. For example, `new string('a', 1)` in a long-running loop is 7% faster.
Diffstat (limited to 'src')
-rw-r--r-- | src/debug/daccess/request.cpp | 2 | ||||
-rw-r--r-- | src/vm/amd64/JitHelpers_InlineGetThread.asm | 4 | ||||
-rw-r--r-- | src/vm/amd64/JitHelpers_Slow.asm | 4 | ||||
-rw-r--r-- | src/vm/amd64/asmconstants.h | 3 | ||||
-rw-r--r-- | src/vm/appdomain.cpp | 2 | ||||
-rw-r--r-- | src/vm/arm/asmconstants.h | 45 | ||||
-rw-r--r-- | src/vm/i386/jitinterfacex86.cpp | 9 | ||||
-rw-r--r-- | src/vm/methodtablebuilder.cpp | 4 | ||||
-rw-r--r-- | src/vm/object.h | 20 | ||||
-rw-r--r-- | src/vm/object.inl | 10 |
10 files changed, 26 insertions, 77 deletions
diff --git a/src/debug/daccess/request.cpp b/src/debug/daccess/request.cpp index 377e008fd4..12864e7b0d 100644 --- a/src/debug/daccess/request.cpp +++ b/src/debug/daccess/request.cpp @@ -1475,7 +1475,7 @@ ClrDataAccess::GetObjectStringData(CLRDATA_ADDRESS obj, unsigned int count, __ou if (count > needed) count = needed; - TADDR pszStr = TO_TADDR(obj)+offsetof(StringObject, m_Characters); + TADDR pszStr = TO_TADDR(obj)+offsetof(StringObject, m_FirstChar); hr = m_pTarget->ReadVirtual(pszStr, (PBYTE)stringData, count * sizeof(wchar_t), &needed); if (SUCCEEDED(hr)) diff --git a/src/vm/amd64/JitHelpers_InlineGetThread.asm b/src/vm/amd64/JitHelpers_InlineGetThread.asm index d9f58cc30f..8ebffeb38e 100644 --- a/src/vm/amd64/JitHelpers_InlineGetThread.asm +++ b/src/vm/amd64/JitHelpers_InlineGetThread.asm @@ -134,12 +134,10 @@ LEAF_ENTRY AllocateStringFastMP_InlineGetThread, _TEXT cmp ecx, (ASM_LARGE_OBJECT_SIZE - 256)/2 jae OversizedString - mov edx, [r9 + OFFSET__MethodTable__m_BaseSize] - ; Calculate the final size to allocate. ; We need to calculate baseSize + cnt*2, then round that up by adding 7 and anding ~7. - lea edx, [edx + ecx*2 + 7] + lea edx, [STRING_BASE_SIZE + ecx*2 + 7] and edx, -8 INLINE_GETTHREAD r11 diff --git a/src/vm/amd64/JitHelpers_Slow.asm b/src/vm/amd64/JitHelpers_Slow.asm index 0e26ae6dfd..cd3aa14a92 100644 --- a/src/vm/amd64/JitHelpers_Slow.asm +++ b/src/vm/amd64/JitHelpers_Slow.asm @@ -276,12 +276,10 @@ LEAF_ENTRY AllocateStringFastUP, _TEXT cmp ecx, (ASM_LARGE_OBJECT_SIZE - 256)/2 jae FramedAllocateString - mov r8d, [r11 + OFFSET__MethodTable__m_BaseSize] - ; Calculate the final size to allocate. ; We need to calculate baseSize + cnt*2, then round that up by adding 7 and anding ~7. - lea r8d, [r8d + ecx*2 + 7] + lea r8d, [STRING_BASE_SIZE + ecx*2 + 7] and r8d, -8 inc [g_global_alloc_lock] diff --git a/src/vm/amd64/asmconstants.h b/src/vm/amd64/asmconstants.h index f526e22f2b..cadc10ac56 100644 --- a/src/vm/amd64/asmconstants.h +++ b/src/vm/amd64/asmconstants.h @@ -545,6 +545,9 @@ ASMCONSTANTS_C_ASSERT(ASM_LARGE_OBJECT_SIZE == LARGE_OBJECT_SIZE); ASMCONSTANTS_C_ASSERT(OFFSETOF__ArrayBase__m_NumComponents == offsetof(ArrayBase, m_NumComponents)); +#define STRING_BASE_SIZE 0x16 +ASMCONSTANTS_RUNTIME_ASSERT(STRING_BASE_SIZE == StringObject::GetBaseSize()); + #define OFFSETOF__StringObject__m_StringLength 0x8 ASMCONSTANTS_C_ASSERT(OFFSETOF__StringObject__m_StringLength == offsetof(StringObject, m_StringLength)); diff --git a/src/vm/appdomain.cpp b/src/vm/appdomain.cpp index 67b79ec19d..292ac13d71 100644 --- a/src/vm/appdomain.cpp +++ b/src/vm/appdomain.cpp @@ -2759,8 +2759,6 @@ void SystemDomain::LoadBaseSystemClasses() // Load String g_pStringClass = MscorlibBinder::LoadPrimitiveType(ELEMENT_TYPE_STRING); - _ASSERTE(g_pStringClass->GetBaseSize() == ObjSizeOf(StringObject)+sizeof(WCHAR)); - _ASSERTE(g_pStringClass->GetComponentSize() == 2); // Used by Buffer::BlockCopy g_pByteArrayMT = ClassLoader::LoadArrayTypeThrowing( diff --git a/src/vm/arm/asmconstants.h b/src/vm/arm/asmconstants.h index ab3d16b9a9..5af778ba36 100644 --- a/src/vm/arm/asmconstants.h +++ b/src/vm/arm/asmconstants.h @@ -95,18 +95,6 @@ ASMCONSTANTS_C_ASSERT(MethodTableWriteableData__m_dwFlags == offsetof(MethodTabl #define MethodTableWriteableData__enum_flag_Unrestored 0x04 ASMCONSTANTS_C_ASSERT(MethodTableWriteableData__enum_flag_Unrestored == MethodTableWriteableData::enum_flag_Unrestored); -#define StringObject__m_StringLength 0x04 -ASMCONSTANTS_C_ASSERT(StringObject__m_StringLength == offsetof(StringObject, m_StringLength)); - -#define SIZEOF__BaseStringObject 0xe -ASMCONSTANTS_C_ASSERT(SIZEOF__BaseStringObject == (ObjSizeOf(StringObject) + sizeof(WCHAR))); - -#define SIZEOF__ArrayOfObjectRef 0xc -ASMCONSTANTS_C_ASSERT(SIZEOF__ArrayOfObjectRef == ObjSizeOf(ArrayBase)); - -#define SIZEOF__ArrayOfValueType 0xc -ASMCONSTANTS_C_ASSERT(SIZEOF__ArrayOfValueType == ObjSizeOf(ArrayBase)); - #define ArrayBase__m_NumComponents 0x4 ASMCONSTANTS_C_ASSERT(ArrayBase__m_NumComponents == offsetof(ArrayBase, m_NumComponents)); @@ -116,33 +104,8 @@ ASMCONSTANTS_C_ASSERT(ArrayTypeDesc__m_Arg == offsetof(ArrayTypeDesc, m_Arg)); #define PtrArray__m_Array 0x8 ASMCONSTANTS_C_ASSERT(PtrArray__m_Array == offsetof(PtrArray, m_Array)); -#define SYSTEM_INFO__dwNumberOfProcessors 0x14 -ASMCONSTANTS_C_ASSERT(SYSTEM_INFO__dwNumberOfProcessors == offsetof(SYSTEM_INFO, dwNumberOfProcessors)); - #define TypeHandle_CanCast 0x1 // TypeHandle::CanCast -// Maximum number of characters to be allocated for a string in AllocateStringFast*. Chosen so that we'll -// never have to check for overflow and will never try to allocate a string on regular heap that should have -// gone on the large object heap. Additionally the constant has been chosen such that it can be encoded in a -// single Thumb2 CMP instruction. -#define MAX_FAST_ALLOCATE_STRING_SIZE 42240 -ASMCONSTANTS_C_ASSERT(MAX_FAST_ALLOCATE_STRING_SIZE < ((LARGE_OBJECT_SIZE - SIZEOF__BaseStringObject) / 2)); - - -// Array of objectRef of this Maximum number of elements can be allocated in JIT_NewArr1OBJ_MP*. Chosen so that we'll -// never have to check for overflow and will never try to allocate the array on regular heap that should have -// gone on the large object heap. Additionally the constant has been chosen such that it can be encoded in a -// single Thumb2 CMP instruction. -#define MAX_FAST_ALLOCATE_ARRAY_OBJECTREF_SIZE 21120 -ASMCONSTANTS_C_ASSERT(MAX_FAST_ALLOCATE_ARRAY_OBJECTREF_SIZE < ((LARGE_OBJECT_SIZE - SIZEOF__ArrayOfObjectRef) / sizeof(void*))); - -// Array of valueClass of this Maximum number of characters can be allocated JIT_NewArr1VC_MP*. Chosen so that we'll -// never have to check for overflow and will never try to allocate the array on regular heap that should have -// gone on the large object heap. Additionally the constant has been chosen such that it can be encoded in a -// single Thumb2 CMP instruction. -#define MAX_FAST_ALLOCATE_ARRAY_VC_SIZE 65280 -ASMCONSTANTS_C_ASSERT(MAX_FAST_ALLOCATE_ARRAY_VC_SIZE < ((4294967296 - 1 - SIZEOF__ArrayOfValueType) / 65536)); - #define SIZEOF__GSCookie 0x4 ASMCONSTANTS_C_ASSERT(SIZEOF__GSCookie == sizeof(GSCookie)); @@ -203,14 +166,6 @@ ASMCONSTANTS_C_ASSERT(UnmanagedToManagedFrame__m_pvDatum == offsetof(UnmanagedTo #endif // FEATURE_COMINTEROP -#ifndef CROSSGEN_COMPILE -#define Thread__m_alloc_context__alloc_limit 0x44 -ASMCONSTANTS_C_ASSERT(Thread__m_alloc_context__alloc_limit == offsetof(Thread, m_alloc_context) + offsetof(gc_alloc_context, alloc_limit)); - -#define Thread__m_alloc_context__alloc_ptr 0x40 -ASMCONSTANTS_C_ASSERT(Thread__m_alloc_context__alloc_ptr == offsetof(Thread, m_alloc_context) + offsetof(gc_alloc_context, alloc_ptr)); -#endif // CROSSGEN_COMPILE - #define Thread__m_fPreemptiveGCDisabled 0x08 #ifndef CROSSGEN_COMPILE ASMCONSTANTS_C_ASSERT(Thread__m_fPreemptiveGCDisabled == offsetof(Thread, m_fPreemptiveGCDisabled)); diff --git a/src/vm/i386/jitinterfacex86.cpp b/src/vm/i386/jitinterfacex86.cpp index 642c417598..4eccd35a76 100644 --- a/src/vm/i386/jitinterfacex86.cpp +++ b/src/vm/i386/jitinterfacex86.cpp @@ -1063,14 +1063,13 @@ void *JIT_TrialAlloc::GenAllocString(Flags flags) // jae noLock - seems tempting to jump to noAlloc, but we haven't taken the lock yet sl.X86EmitCondJump(noLock, X86CondCode::kJAE); - // mov edx, [ecx]MethodTable.m_BaseSize - sl.X86EmitIndexRegLoad(kEDX, kECX, offsetof(MethodTable,m_BaseSize)); - // Calculate the final size to allocate. // We need to calculate baseSize + cnt*2, then round that up by adding 3 and anding ~3. - // lea eax, [edx+eax*2+5] - sl.X86EmitOp(0x8d, kEAX, kEDX, (DATA_ALIGNMENT-1), kEAX, 2); + // lea eax, [basesize+(alignment-1)+eax*2] + sl.Emit16(0x048d); + sl.Emit8(0x45); + sl.Emit32(StringObject::GetBaseSize() + (DATA_ALIGNMENT-1)); // and eax, ~3 sl.Emit16(0xe083); diff --git a/src/vm/methodtablebuilder.cpp b/src/vm/methodtablebuilder.cpp index 0dc226e333..5daadddf35 100644 --- a/src/vm/methodtablebuilder.cpp +++ b/src/vm/methodtablebuilder.cpp @@ -9737,8 +9737,8 @@ void MethodTableBuilder::CheckForSystemTypes() { // Strings are not "normal" objects, so we need to mess with their method table a bit // so that the GC can figure out how big each string is... - DWORD baseSize = ObjSizeOf(StringObject) + sizeof(WCHAR); - pMT->SetBaseSize(baseSize); // NULL character included + DWORD baseSize = StringObject::GetBaseSize(); + pMT->SetBaseSize(baseSize); GetHalfBakedClass()->SetBaseSizePadding(baseSize - bmtFP->NumInstanceFieldBytes); diff --git a/src/vm/object.h b/src/vm/object.h index e3e6c12399..3dd1e26d63 100644 --- a/src/vm/object.h +++ b/src/vm/object.h @@ -914,7 +914,7 @@ typedef PTR_StringObject STRINGREF; * Special String implementation for performance. * * m_StringLength - Length of string in number of WCHARs - * m_Characters - The string buffer + * m_FirstChar - The string buffer * */ @@ -932,9 +932,6 @@ typedef PTR_StringObject STRINGREF; #define STRING_STATE_FAST_OPS 0x80000000 #define STRING_STATE_SPECIAL_SORT 0xC0000000 -#ifdef _MSC_VER -#pragma warning(disable : 4200) // disable zero-sized array warning -#endif class StringObject : public Object { #ifdef DACCESS_COMPILE @@ -947,11 +944,7 @@ class StringObject : public Object private: DWORD m_StringLength; - WCHAR m_Characters[0]; - // GC will see a StringObject like this: - // DWORD m_StringLength - // WCHAR m_Characters[0] - // DWORD m_OptionalPadding (this is an optional field and will appear based on need) + WCHAR m_FirstChar; public: VOID SetStringLength(DWORD len) { LIMITED_METHOD_CONTRACT; _ASSERTE(len >= 0); m_StringLength = len; } @@ -961,12 +954,11 @@ class StringObject : public Object ~StringObject() {LIMITED_METHOD_CONTRACT; } public: + static DWORD GetBaseSize(); static SIZE_T GetSize(DWORD stringLength); DWORD GetStringLength() { LIMITED_METHOD_DAC_CONTRACT; return( m_StringLength );} - WCHAR* GetBuffer() { LIMITED_METHOD_CONTRACT; _ASSERTE(this != nullptr); return (WCHAR*)( dac_cast<TADDR>(this) + offsetof(StringObject, m_Characters) ); } - WCHAR* GetBuffer(DWORD *pdwSize) { LIMITED_METHOD_CONTRACT; _ASSERTE((this != nullptr) && pdwSize); *pdwSize = GetStringLength(); return GetBuffer(); } - WCHAR* GetBufferNullable() { LIMITED_METHOD_CONTRACT; return( (this == nullptr) ? nullptr : (WCHAR*)( dac_cast<TADDR>(this) + offsetof(StringObject, m_Characters) ) ); } + WCHAR* GetBuffer() { LIMITED_METHOD_CONTRACT; _ASSERTE(this != nullptr); return (WCHAR*)( dac_cast<TADDR>(this) + offsetof(StringObject, m_FirstChar) ); } DWORD GetHighCharState() { WRAPPER_NO_CONTRACT; @@ -992,7 +984,7 @@ class StringObject : public Object static UINT GetBufferOffset() { LIMITED_METHOD_DAC_CONTRACT; - return (UINT)(offsetof(StringObject, m_Characters)); + return (UINT)(offsetof(StringObject, m_FirstChar)); } static UINT GetStringLengthOffset() { @@ -3329,7 +3321,7 @@ public: static inline void *Value(void *src, MethodTable *nullableMT) { Nullable *nullable = (Nullable *)src; - return nullable->ValueAddr(nullableMT); + return nullable->ValueAddr(nullableMT); } private: diff --git a/src/vm/object.inl b/src/vm/object.inl index 82e55f7844..53ffb89cfc 100644 --- a/src/vm/object.inl +++ b/src/vm/object.inl @@ -58,12 +58,18 @@ inline SIZE_T Object::GetSize() return s; } +__forceinline /*static*/ DWORD StringObject::GetBaseSize() +{ + LIMITED_METHOD_DAC_CONTRACT; + + return ObjSizeOf(Object) + sizeof(DWORD) /* length */ + sizeof(WCHAR) /* null terminator */; +} + __forceinline /*static*/ SIZE_T StringObject::GetSize(DWORD strLen) { LIMITED_METHOD_DAC_CONTRACT; - // Extra WCHAR for null terminator - return ObjSizeOf(StringObject) + sizeof(WCHAR) + strLen * sizeof(WCHAR); + return GetBaseSize() + strLen * sizeof(WCHAR); } #ifdef DACCESS_COMPILE |