diff options
-rw-r--r-- | src/classlibnative/bcltype/stringnative.cpp | 232 | ||||
-rw-r--r-- | src/classlibnative/bcltype/stringnative.h | 4 | ||||
-rw-r--r-- | src/mscorlib/System.Private.CoreLib.csproj | 1 | ||||
-rw-r--r-- | src/mscorlib/shared/System.Private.CoreLib.Shared.projitems | 1 | ||||
-rw-r--r-- | src/mscorlib/shared/System/String.Searching.cs (renamed from src/mscorlib/src/System/String.Searching.cs) | 208 | ||||
-rw-r--r-- | src/vm/ecalllist.h | 2 |
6 files changed, 170 insertions, 278 deletions
diff --git a/src/classlibnative/bcltype/stringnative.cpp b/src/classlibnative/bcltype/stringnative.cpp index 34fbf1eb34..dc9be01680 100644 --- a/src/classlibnative/bcltype/stringnative.cpp +++ b/src/classlibnative/bcltype/stringnative.cpp @@ -31,22 +31,6 @@ #pragma optimize("tgy", on) #endif - -#define PROBABILISTICMAP_BLOCK_INDEX_MASK 0X7 -#define PROBABILISTICMAP_BLOCK_INDEX_SHIFT 0x3 -#define PROBABILISTICMAP_SIZE 0X8 - -// -// -// FORWARD DECLARATIONS -// -// -int ArrayContains(WCHAR searchChar, __in_ecount(length) const WCHAR *begin, int length); -void InitializeProbabilisticMap(int* charMap, __in_ecount(length) const WCHAR* charArray, int length); -bool ProbablyContains(const int* charMap, WCHAR searchChar); -bool IsBitSet(int value, int bitPos); -void SetBit(int* value, int bitPos); - // // // CONSTRUCTORS @@ -295,116 +279,6 @@ FCIMPL6(INT32, COMString::CompareOrdinalEx, StringObject* strA, INT32 indexA, IN } FCIMPLEND -/*===============================IndexOfCharArray=============================== -**Action: -**Returns: -**Arguments: -**Exceptions: -==============================================================================*/ -FCIMPL4(INT32, COMString::IndexOfCharArray, StringObject* thisRef, CHARArray* valueRef, INT32 startIndex, INT32 count ) -{ - FCALL_CONTRACT; - - VALIDATEOBJECT(thisRef); - VALIDATEOBJECT(valueRef); - - if (thisRef == NULL) - FCThrow(kNullReferenceException); - - WCHAR *thisChars; - WCHAR *valueChars; - int valueLength; - int thisLength; - - thisRef->RefInterpretGetStringValuesDangerousForGC(&thisChars, &thisLength); - - int endIndex = startIndex + count; - - valueLength = valueRef->GetNumComponents(); - valueChars = (WCHAR *)valueRef->GetDataPtr(); - - // use probabilistic map, see (code:InitializeProbabilisticMap) - int charMap[PROBABILISTICMAP_SIZE] = {0}; - - InitializeProbabilisticMap(charMap, valueChars, valueLength); - - for(int i = startIndex; i < endIndex; i++) { - WCHAR thisChar = thisChars[i]; - if (ProbablyContains(charMap, thisChar)) - if (ArrayContains(thisChars[i], valueChars, valueLength) >= 0) { - FC_GC_POLL_RET(); - return i; - } - } - - FC_GC_POLL_RET(); - return -1; -} -FCIMPLEND - -/*=============================LastIndexOfCharArray============================= -**Action: -**Returns: -**Arguments: -**Exceptions: -==============================================================================*/ - -FCIMPL4(INT32, COMString::LastIndexOfCharArray, StringObject* thisRef, CHARArray* valueRef, INT32 startIndex, INT32 count ) -{ - FCALL_CONTRACT; - - VALIDATEOBJECT(thisRef); - VALIDATEOBJECT(valueRef); - WCHAR *thisChars, *valueChars; - int thisLength, valueLength; - - if (thisRef==NULL) { - FCThrow(kNullReferenceException); - } - - if (valueRef == NULL) - FCThrowArgumentNull(W("anyOf")); - - thisRef->RefInterpretGetStringValuesDangerousForGC(&thisChars, &thisLength); - - if (thisLength == 0) { - return -1; - } - - if (startIndex < 0 || startIndex >= thisLength) { - FCThrowArgumentOutOfRange(W("startIndex"), W("ArgumentOutOfRange_Index")); - } - - if (count<0 || count - 1 > startIndex) { - FCThrowArgumentOutOfRange(W("count"), W("ArgumentOutOfRange_Count")); - } - - - valueLength = valueRef->GetNumComponents(); - valueChars = (WCHAR *)valueRef->GetDataPtr(); - - int endIndex = startIndex - count + 1; - - // use probabilistic map, see (code:InitializeProbabilisticMap) - int charMap[PROBABILISTICMAP_SIZE] = {0}; - - InitializeProbabilisticMap(charMap, valueChars, valueLength); - - //We search [startIndex..EndIndex] - for (int i=startIndex; i>=endIndex; i--) { - WCHAR thisChar = thisChars[i]; - if (ProbablyContains(charMap, thisChar)) - if (ArrayContains(thisChars[i],valueChars, valueLength) >= 0) { - FC_GC_POLL_RET(); - return i; - } - } - - FC_GC_POLL_RET(); - return -1; - -} -FCIMPLEND /*==================================GETCHARAT=================================== **Returns the character at position index. Thows IndexOutOfRangeException as @@ -447,112 +321,6 @@ FCIMPL1(INT32, COMString::Length, StringObject* str) { FCIMPLEND -// HELPER METHODS -// -// -// A probabilistic map is an optimization that is used in IndexOfAny/ -// LastIndexOfAny methods. The idea is to create a bit map of the characters we -// are searching for and use this map as a "cheap" check to decide if the -// current character in the string exists in the array of input characters. -// There are 256 bits in the map, with each character mapped to 2 bits. Every -// character is divided into 2 bytes, and then every byte is mapped to 1 bit. -// The character map is an array of 8 integers acting as map blocks. The 3 lsb -// in each byte in the character is used to index into this map to get the -// right block, the value of the remaining 5 msb are used as the bit position -// inside this block. -void InitializeProbabilisticMap(int* charMap, __in_ecount(length) const WCHAR* charArray, int length) { - LIMITED_METHOD_CONTRACT; - - _ASSERTE(charMap != NULL); - _ASSERTE(charArray != NULL); - _ASSERTE(length >= 0); - - bool hasAscii = false; - - for(int i = 0; i < length; ++i) { - int hi,lo; - - int c = charArray[i]; - - lo = c & 0xFF; - hi = (c >> 8) & 0xFF; - - int* value = &charMap[lo & PROBABILISTICMAP_BLOCK_INDEX_MASK]; - SetBit(value, lo >> PROBABILISTICMAP_BLOCK_INDEX_SHIFT); - - if (hi > 0) { - value = &charMap[hi & PROBABILISTICMAP_BLOCK_INDEX_MASK]; - SetBit(value, hi >> PROBABILISTICMAP_BLOCK_INDEX_SHIFT); - } - else { - hasAscii = true; - } - } - - if (hasAscii) { - // Common to search for ASCII symbols. Just the high value once. - charMap[0] |= 1; - } -} - -// Use the probabilistic map to decide if the character value exists in the -// map. When this method return false, we are certain the character doesn't -// exist, however a true return means it *may* exist. -inline bool ProbablyContains(const int* charMap, WCHAR searchValue) { - LIMITED_METHOD_CONTRACT; - - int lo, hi; - - lo = searchValue & 0xFF; - int value = charMap[lo & PROBABILISTICMAP_BLOCK_INDEX_MASK]; - - if (IsBitSet(value, lo >> PROBABILISTICMAP_BLOCK_INDEX_SHIFT)) { - hi = (searchValue >> 8) & 0xFF; - value = charMap[hi & PROBABILISTICMAP_BLOCK_INDEX_MASK]; - - return IsBitSet(value, hi >> PROBABILISTICMAP_BLOCK_INDEX_SHIFT); - } - - return false; -} - -inline void SetBit(int* value, int bitPos) { - LIMITED_METHOD_CONTRACT; - - _ASSERTE(bitPos <= 31); - - *value |= (1 << bitPos); -} - -inline bool IsBitSet(int value, int bitPos) { - LIMITED_METHOD_CONTRACT; - - _ASSERTE(bitPos <= 31); - - return (value & (1 << bitPos)) != 0; -} - - -/*================================ArrayContains================================= -**Action: -**Returns: -**Arguments: -**Exceptions: -==============================================================================*/ -int ArrayContains(WCHAR searchChar, __in_ecount(length) const WCHAR *begin, int length) { - LIMITED_METHOD_CONTRACT; - _ASSERTE(begin != NULL); - _ASSERTE(length >= 0); - - for(int i = 0; i < length; i++) { - if(begin[i] == searchChar) { - return i; - } - } - return -1; -} - - /*================================ReplaceString================================= **Action: **Returns: diff --git a/src/classlibnative/bcltype/stringnative.h b/src/classlibnative/bcltype/stringnative.h index a8409826c6..24326ed818 100644 --- a/src/classlibnative/bcltype/stringnative.h +++ b/src/classlibnative/bcltype/stringnative.h @@ -61,10 +61,6 @@ public: static FCDECL6(INT32, CompareOrdinalEx, StringObject* strA, INT32 indexA, INT32 countA, StringObject* strB, INT32 indexB, INT32 countB); - static FCDECL4(INT32, LastIndexOfCharArray, StringObject* thisRef, CHARArray* valueRef, INT32 startIndex, INT32 count ); - - static FCDECL4(INT32, IndexOfCharArray, StringObject* vThisRef, CHARArray* value, INT32 startIndex, INT32 count ); - static FCDECL2(FC_CHAR_RET, GetCharAt, StringObject* pThisRef, INT32 index); static FCDECL1(INT32, Length, StringObject* pThisRef); diff --git a/src/mscorlib/System.Private.CoreLib.csproj b/src/mscorlib/System.Private.CoreLib.csproj index 6eda96a87b..03838dba84 100644 --- a/src/mscorlib/System.Private.CoreLib.csproj +++ b/src/mscorlib/System.Private.CoreLib.csproj @@ -319,7 +319,6 @@ <Compile Include="$(BclSourcesRoot)\System\String.cs" /> <Compile Include="$(BclSourcesRoot)\System\String.Comparison.cs" /> <Compile Include="$(BclSourcesRoot)\System\String.Manipulation.cs" /> - <Compile Include="$(BclSourcesRoot)\System\String.Searching.cs" /> <Compile Include="$(BclSourcesRoot)\System\Text\StringBuilder.CoreCLR.cs" /> <Compile Include="$(BclSourcesRoot)\System\Text\StringBuilderCache.cs" /> <Compile Include="$(BclSourcesRoot)\System\Exception.cs" /> diff --git a/src/mscorlib/shared/System.Private.CoreLib.Shared.projitems b/src/mscorlib/shared/System.Private.CoreLib.Shared.projitems index 0831e58af5..1dbbe7b34e 100644 --- a/src/mscorlib/shared/System.Private.CoreLib.Shared.projitems +++ b/src/mscorlib/shared/System.Private.CoreLib.Shared.projitems @@ -436,6 +436,7 @@ <Compile Include="$(MSBuildThisFileDirectory)System\Single.cs" /> <Compile Include="$(MSBuildThisFileDirectory)System\Span.cs" /> <Compile Include="$(MSBuildThisFileDirectory)System\Span.NonGeneric.cs" /> + <Compile Include="$(MSBuildThisFileDirectory)System\String.Searching.cs" /> <Compile Include="$(MSBuildThisFileDirectory)System\StringSpanHelpers.cs" /> <Compile Include="$(MSBuildThisFileDirectory)System\StackOverflowException.cs" /> <Compile Include="$(MSBuildThisFileDirectory)System\StringComparer.cs" /> diff --git a/src/mscorlib/src/System/String.Searching.cs b/src/mscorlib/shared/System/String.Searching.cs index 0be214d0d9..527a73b136 100644 --- a/src/mscorlib/src/System/String.Searching.cs +++ b/src/mscorlib/shared/System/String.Searching.cs @@ -3,7 +3,7 @@ // See the LICENSE file in the project root for more information. using System.Globalization; -using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; namespace System { @@ -75,7 +75,7 @@ namespace System } // Returns the index of the first occurrence of any specified character in the current instance. - // The search starts at startIndex and runs to startIndex + count -1. + // The search starts at startIndex and runs to startIndex + count - 1. // public int IndexOfAny(char[] anyOf) { @@ -90,19 +90,13 @@ namespace System public int IndexOfAny(char[] anyOf, int startIndex, int count) { if (anyOf == null) - { throw new ArgumentNullException(nameof(anyOf)); - } if ((uint)startIndex > (uint)Length) - { throw new ArgumentOutOfRangeException(nameof(startIndex), SR.ArgumentOutOfRange_Index); - } if ((uint)count > (uint)(Length - startIndex)) - { throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_Count); - } if (anyOf.Length == 2) { @@ -176,33 +170,114 @@ namespace System } } - [MethodImplAttribute(MethodImplOptions.InternalCall)] - private extern int IndexOfCharArray(char[] anyOf, int startIndex, int count); + private unsafe int IndexOfCharArray(char[] anyOf, int startIndex, int count) + { + // use probabilistic map, see InitializeProbabilisticMap + ProbabilisticMap map = default(ProbabilisticMap); + uint* charMap = (uint*)↦ + InitializeProbabilisticMap(charMap, anyOf); + + fixed (char* pChars = &_firstChar) + { + char* pCh = pChars + startIndex; + + while (count > 0) + { + int thisChar = *pCh; + + if (IsCharBitSet(charMap, (byte)thisChar) && + IsCharBitSet(charMap, (byte)(thisChar >> 8)) && + ArrayContains((char)thisChar, anyOf)) + { + return (int)(pCh - pChars); + } + + count--; + pCh++; + } + + return -1; + } + } + + private const int PROBABILISTICMAP_BLOCK_INDEX_MASK = 0x7; + private const int PROBABILISTICMAP_BLOCK_INDEX_SHIFT = 0x3; + private const int PROBABILISTICMAP_SIZE = 0x8; + + // A probabilistic map is an optimization that is used in IndexOfAny/ + // LastIndexOfAny methods. The idea is to create a bit map of the characters we + // are searching for and use this map as a "cheap" check to decide if the + // current character in the string exists in the array of input characters. + // There are 256 bits in the map, with each character mapped to 2 bits. Every + // character is divided into 2 bytes, and then every byte is mapped to 1 bit. + // The character map is an array of 8 integers acting as map blocks. The 3 lsb + // in each byte in the character is used to index into this map to get the + // right block, the value of the remaining 5 msb are used as the bit position + // inside this block. + private static unsafe void InitializeProbabilisticMap(uint* charMap, char[] anyOf) + { + bool hasAscii = false; + uint* charMapLocal = charMap; // https://github.com/dotnet/coreclr/issues/14264 + + for (int i = 0; i < anyOf.Length; ++i) + { + int c = anyOf[i]; + + // Map low bit + SetCharBit(charMapLocal, (byte)c); + + // Map high bit + c >>= 8; + + if (c == 0) + { + hasAscii = true; + } + else + { + SetCharBit(charMapLocal, (byte)c); + } + } + + if (hasAscii) + { + // Common to search for ASCII symbols. Just set the high value once. + charMapLocal[0] |= 1u; + } + } + + private static bool ArrayContains(char searchChar, char[] anyOf) + { + for (int i = 0; i < anyOf.Length; i++) + { + if (anyOf[i] == searchChar) + return true; + } + + return false; + } + + private unsafe static bool IsCharBitSet(uint* charMap, byte value) + { + return (charMap[value & PROBABILISTICMAP_BLOCK_INDEX_MASK] & (1u << (value >> PROBABILISTICMAP_BLOCK_INDEX_SHIFT))) != 0; + } + + private unsafe static void SetCharBit(uint* charMap, byte value) + { + charMap[value & PROBABILISTICMAP_BLOCK_INDEX_MASK] |= 1u << (value >> PROBABILISTICMAP_BLOCK_INDEX_SHIFT); + } - // Determines the position within this string of the first occurrence of the specified - // string, according to the specified search criteria. The search begins at - // the first character of this string, it is case-sensitive and the current culture - // comparison is used. - // public int IndexOf(String value) { return IndexOf(value, StringComparison.CurrentCulture); } - // Determines the position within this string of the first occurrence of the specified - // string, according to the specified search criteria. The search begins at - // startIndex, it is case-sensitive and the current culture comparison is used. - // public int IndexOf(String value, int startIndex) { return IndexOf(value, startIndex, StringComparison.CurrentCulture); } - // Determines the position within this string of the first occurrence of the specified - // string, according to the specified search criteria. The search begins at - // startIndex, ends at endIndex and the current culture comparison is used. - // public int IndexOf(String value, int startIndex, int count) { if (startIndex < 0 || startIndex > this.Length) @@ -258,10 +333,7 @@ namespace System return CultureInfo.InvariantCulture.CompareInfo.IndexOf(this, value, startIndex, count, CompareOptions.Ordinal); case StringComparison.OrdinalIgnoreCase: - if (value.IsAscii() && this.IsAscii()) - return CultureInfo.InvariantCulture.CompareInfo.IndexOf(this, value, startIndex, count, CompareOptions.IgnoreCase); - else - return TextInfo.IndexOfStringOrdinalIgnoreCase(this, value, startIndex, count); + return TextInfo.IndexOfStringOrdinalIgnoreCase(this, value, startIndex, count); default: throw new ArgumentException(SR.NotSupported_StringComparison, nameof(comparisonType)); @@ -334,8 +406,6 @@ namespace System // The character at position startIndex is included in the search. startIndex is the larger // index within the string. // - - //ForceInline ... Jit can't recognize String.get_Length to determine that this is "fluff" public int LastIndexOfAny(char[] anyOf) { return LastIndexOfAny(anyOf, this.Length - 1, this.Length); @@ -346,9 +416,68 @@ namespace System return LastIndexOfAny(anyOf, startIndex, startIndex + 1); } - [MethodImplAttribute(MethodImplOptions.InternalCall)] - public extern int LastIndexOfAny(char[] anyOf, int startIndex, int count); + public unsafe int LastIndexOfAny(char[] anyOf, int startIndex, int count) + { + if (anyOf == null) + throw new ArgumentNullException(nameof(anyOf)); + + if (Length == 0) + return -1; + + if ((uint)startIndex >= (uint)Length) + { + throw new ArgumentOutOfRangeException(nameof(startIndex), SR.ArgumentOutOfRange_Index); + } + + if ((count < 0) || ((count - 1) > startIndex)) + { + throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_Count); + } + + if (anyOf.Length > 1) + { + return LastIndexOfCharArray(anyOf, startIndex, count); + } + else if (anyOf.Length == 1) + { + return LastIndexOf(anyOf[0], startIndex, count); + } + else // anyOf.Length == 0 + { + return -1; + } + } + + private unsafe int LastIndexOfCharArray(char[] anyOf, int startIndex, int count) + { + // use probabilistic map, see InitializeProbabilisticMap + ProbabilisticMap map = default(ProbabilisticMap); + uint* charMap = (uint*)↦ + + InitializeProbabilisticMap(charMap, anyOf); + + fixed (char* pChars = &_firstChar) + { + char* pCh = pChars + startIndex; + + while (count > 0) + { + int thisChar = *pCh; + + if (IsCharBitSet(charMap, (byte)thisChar) && + IsCharBitSet(charMap, (byte)(thisChar >> 8)) && + ArrayContains((char)thisChar, anyOf)) + { + return (int)(pCh - pChars); + } + count--; + pCh--; + } + + return -1; + } + } // Returns the index of the last occurrence of any character in value in the current instance. // The search starts at startIndex and runs backwards to startIndex - count + 1. @@ -404,16 +533,15 @@ namespace System startIndex--; if (count > 0) count--; - - // If we are looking for nothing, just return 0 - if (value.Length == 0 && count >= 0 && startIndex - count + 1 >= 0) - return startIndex; } // 2nd half of this also catches when startIndex == MAXINT, so MAXINT - 0 + 1 == -1, which is < 0. if (count < 0 || startIndex - count + 1 < 0) throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_Count); + // If we are looking for nothing, just return startIndex + if (value.Length == 0) + return startIndex; switch (comparisonType) { @@ -428,17 +556,19 @@ namespace System case StringComparison.InvariantCultureIgnoreCase: return CultureInfo.InvariantCulture.CompareInfo.LastIndexOf(this, value, startIndex, count, CompareOptions.IgnoreCase); + case StringComparison.Ordinal: return CultureInfo.InvariantCulture.CompareInfo.LastIndexOf(this, value, startIndex, count, CompareOptions.Ordinal); case StringComparison.OrdinalIgnoreCase: - if (value.IsAscii() && this.IsAscii()) - return CultureInfo.InvariantCulture.CompareInfo.LastIndexOf(this, value, startIndex, count, CompareOptions.IgnoreCase); - else - return TextInfo.LastIndexOfStringOrdinalIgnoreCase(this, value, startIndex, count); + return TextInfo.LastIndexOfStringOrdinalIgnoreCase(this, value, startIndex, count); + default: throw new ArgumentException(SR.NotSupported_StringComparison, nameof(comparisonType)); } } + + [StructLayout(LayoutKind.Explicit, Size = PROBABILISTICMAP_SIZE * sizeof(uint))] + private struct ProbabilisticMap { } } } diff --git a/src/vm/ecalllist.h b/src/vm/ecalllist.h index 4720b5a11f..83b6539f42 100644 --- a/src/vm/ecalllist.h +++ b/src/vm/ecalllist.h @@ -112,8 +112,6 @@ FCFuncStart(gStringFuncs) FCIntrinsic("get_Chars", COMString::GetCharAt, CORINFO_INTRINSIC_StringGetChar) FCFuncElement("IsAscii", COMString::IsAscii) FCFuncElement("CompareOrdinalHelper", COMString::CompareOrdinalEx) - FCFuncElement("IndexOfCharArray", COMString::IndexOfCharArray) - FCFuncElement("LastIndexOfAny", COMString::LastIndexOfCharArray) FCFuncElementSig("ReplaceInternal", &gsig_IM_Str_Str_RetStr, COMString::ReplaceString) #ifdef FEATURE_COMINTEROP FCFuncElement("SetTrailByte", COMString::FCSetTrailByte) |