diff options
author | Jan Kotas <jkotas@microsoft.com> | 2017-10-04 08:39:18 -0700 |
---|---|---|
committer | Jan Kotas <jkotas@microsoft.com> | 2017-10-04 13:19:03 -0700 |
commit | 238907f69662dda53dee0afd9848756a5bb790e9 (patch) | |
tree | 10cb7b880e777175d74e91027eaa177921a83e9e /src/mscorlib | |
parent | 2de0bce49beeaf1bc6fcedd006114f4c8df7566a (diff) | |
download | coreclr-238907f69662dda53dee0afd9848756a5bb790e9.tar.gz coreclr-238907f69662dda53dee0afd9848756a5bb790e9.tar.bz2 coreclr-238907f69662dda53dee0afd9848756a5bb790e9.zip |
Move String.Searching.cs to shared CoreLib partition (dotnet/corert#4673)
Signed-off-by: dotnet-bot <dotnet-bot@microsoft.com>
Diffstat (limited to 'src/mscorlib')
-rw-r--r-- | src/mscorlib/System.Private.CoreLib.csproj | 1 | ||||
-rw-r--r-- | src/mscorlib/shared/System.Private.CoreLib.Shared.projitems | 1 | ||||
-rw-r--r-- | src/mscorlib/shared/System/String.Searching.cs (renamed from src/mscorlib/src/System/String.Searching.cs) | 208 |
3 files changed, 170 insertions, 40 deletions
diff --git a/src/mscorlib/System.Private.CoreLib.csproj b/src/mscorlib/System.Private.CoreLib.csproj index 6eda96a87b..03838dba84 100644 --- a/src/mscorlib/System.Private.CoreLib.csproj +++ b/src/mscorlib/System.Private.CoreLib.csproj @@ -319,7 +319,6 @@ <Compile Include="$(BclSourcesRoot)\System\String.cs" /> <Compile Include="$(BclSourcesRoot)\System\String.Comparison.cs" /> <Compile Include="$(BclSourcesRoot)\System\String.Manipulation.cs" /> - <Compile Include="$(BclSourcesRoot)\System\String.Searching.cs" /> <Compile Include="$(BclSourcesRoot)\System\Text\StringBuilder.CoreCLR.cs" /> <Compile Include="$(BclSourcesRoot)\System\Text\StringBuilderCache.cs" /> <Compile Include="$(BclSourcesRoot)\System\Exception.cs" /> diff --git a/src/mscorlib/shared/System.Private.CoreLib.Shared.projitems b/src/mscorlib/shared/System.Private.CoreLib.Shared.projitems index 0831e58af5..1dbbe7b34e 100644 --- a/src/mscorlib/shared/System.Private.CoreLib.Shared.projitems +++ b/src/mscorlib/shared/System.Private.CoreLib.Shared.projitems @@ -436,6 +436,7 @@ <Compile Include="$(MSBuildThisFileDirectory)System\Single.cs" /> <Compile Include="$(MSBuildThisFileDirectory)System\Span.cs" /> <Compile Include="$(MSBuildThisFileDirectory)System\Span.NonGeneric.cs" /> + <Compile Include="$(MSBuildThisFileDirectory)System\String.Searching.cs" /> <Compile Include="$(MSBuildThisFileDirectory)System\StringSpanHelpers.cs" /> <Compile Include="$(MSBuildThisFileDirectory)System\StackOverflowException.cs" /> <Compile Include="$(MSBuildThisFileDirectory)System\StringComparer.cs" /> diff --git a/src/mscorlib/src/System/String.Searching.cs b/src/mscorlib/shared/System/String.Searching.cs index 0be214d0d9..527a73b136 100644 --- a/src/mscorlib/src/System/String.Searching.cs +++ b/src/mscorlib/shared/System/String.Searching.cs @@ -3,7 +3,7 @@ // See the LICENSE file in the project root for more information. using System.Globalization; -using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; namespace System { @@ -75,7 +75,7 @@ namespace System } // Returns the index of the first occurrence of any specified character in the current instance. - // The search starts at startIndex and runs to startIndex + count -1. + // The search starts at startIndex and runs to startIndex + count - 1. // public int IndexOfAny(char[] anyOf) { @@ -90,19 +90,13 @@ namespace System public int IndexOfAny(char[] anyOf, int startIndex, int count) { if (anyOf == null) - { throw new ArgumentNullException(nameof(anyOf)); - } if ((uint)startIndex > (uint)Length) - { throw new ArgumentOutOfRangeException(nameof(startIndex), SR.ArgumentOutOfRange_Index); - } if ((uint)count > (uint)(Length - startIndex)) - { throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_Count); - } if (anyOf.Length == 2) { @@ -176,33 +170,114 @@ namespace System } } - [MethodImplAttribute(MethodImplOptions.InternalCall)] - private extern int IndexOfCharArray(char[] anyOf, int startIndex, int count); + private unsafe int IndexOfCharArray(char[] anyOf, int startIndex, int count) + { + // use probabilistic map, see InitializeProbabilisticMap + ProbabilisticMap map = default(ProbabilisticMap); + uint* charMap = (uint*)↦ + InitializeProbabilisticMap(charMap, anyOf); + + fixed (char* pChars = &_firstChar) + { + char* pCh = pChars + startIndex; + + while (count > 0) + { + int thisChar = *pCh; + + if (IsCharBitSet(charMap, (byte)thisChar) && + IsCharBitSet(charMap, (byte)(thisChar >> 8)) && + ArrayContains((char)thisChar, anyOf)) + { + return (int)(pCh - pChars); + } + + count--; + pCh++; + } + + return -1; + } + } + + private const int PROBABILISTICMAP_BLOCK_INDEX_MASK = 0x7; + private const int PROBABILISTICMAP_BLOCK_INDEX_SHIFT = 0x3; + private const int PROBABILISTICMAP_SIZE = 0x8; + + // A probabilistic map is an optimization that is used in IndexOfAny/ + // LastIndexOfAny methods. The idea is to create a bit map of the characters we + // are searching for and use this map as a "cheap" check to decide if the + // current character in the string exists in the array of input characters. + // There are 256 bits in the map, with each character mapped to 2 bits. Every + // character is divided into 2 bytes, and then every byte is mapped to 1 bit. + // The character map is an array of 8 integers acting as map blocks. The 3 lsb + // in each byte in the character is used to index into this map to get the + // right block, the value of the remaining 5 msb are used as the bit position + // inside this block. + private static unsafe void InitializeProbabilisticMap(uint* charMap, char[] anyOf) + { + bool hasAscii = false; + uint* charMapLocal = charMap; // https://github.com/dotnet/coreclr/issues/14264 + + for (int i = 0; i < anyOf.Length; ++i) + { + int c = anyOf[i]; + + // Map low bit + SetCharBit(charMapLocal, (byte)c); + + // Map high bit + c >>= 8; + + if (c == 0) + { + hasAscii = true; + } + else + { + SetCharBit(charMapLocal, (byte)c); + } + } + + if (hasAscii) + { + // Common to search for ASCII symbols. Just set the high value once. + charMapLocal[0] |= 1u; + } + } + + private static bool ArrayContains(char searchChar, char[] anyOf) + { + for (int i = 0; i < anyOf.Length; i++) + { + if (anyOf[i] == searchChar) + return true; + } + + return false; + } + + private unsafe static bool IsCharBitSet(uint* charMap, byte value) + { + return (charMap[value & PROBABILISTICMAP_BLOCK_INDEX_MASK] & (1u << (value >> PROBABILISTICMAP_BLOCK_INDEX_SHIFT))) != 0; + } + + private unsafe static void SetCharBit(uint* charMap, byte value) + { + charMap[value & PROBABILISTICMAP_BLOCK_INDEX_MASK] |= 1u << (value >> PROBABILISTICMAP_BLOCK_INDEX_SHIFT); + } - // Determines the position within this string of the first occurrence of the specified - // string, according to the specified search criteria. The search begins at - // the first character of this string, it is case-sensitive and the current culture - // comparison is used. - // public int IndexOf(String value) { return IndexOf(value, StringComparison.CurrentCulture); } - // Determines the position within this string of the first occurrence of the specified - // string, according to the specified search criteria. The search begins at - // startIndex, it is case-sensitive and the current culture comparison is used. - // public int IndexOf(String value, int startIndex) { return IndexOf(value, startIndex, StringComparison.CurrentCulture); } - // Determines the position within this string of the first occurrence of the specified - // string, according to the specified search criteria. The search begins at - // startIndex, ends at endIndex and the current culture comparison is used. - // public int IndexOf(String value, int startIndex, int count) { if (startIndex < 0 || startIndex > this.Length) @@ -258,10 +333,7 @@ namespace System return CultureInfo.InvariantCulture.CompareInfo.IndexOf(this, value, startIndex, count, CompareOptions.Ordinal); case StringComparison.OrdinalIgnoreCase: - if (value.IsAscii() && this.IsAscii()) - return CultureInfo.InvariantCulture.CompareInfo.IndexOf(this, value, startIndex, count, CompareOptions.IgnoreCase); - else - return TextInfo.IndexOfStringOrdinalIgnoreCase(this, value, startIndex, count); + return TextInfo.IndexOfStringOrdinalIgnoreCase(this, value, startIndex, count); default: throw new ArgumentException(SR.NotSupported_StringComparison, nameof(comparisonType)); @@ -334,8 +406,6 @@ namespace System // The character at position startIndex is included in the search. startIndex is the larger // index within the string. // - - //ForceInline ... Jit can't recognize String.get_Length to determine that this is "fluff" public int LastIndexOfAny(char[] anyOf) { return LastIndexOfAny(anyOf, this.Length - 1, this.Length); @@ -346,9 +416,68 @@ namespace System return LastIndexOfAny(anyOf, startIndex, startIndex + 1); } - [MethodImplAttribute(MethodImplOptions.InternalCall)] - public extern int LastIndexOfAny(char[] anyOf, int startIndex, int count); + public unsafe int LastIndexOfAny(char[] anyOf, int startIndex, int count) + { + if (anyOf == null) + throw new ArgumentNullException(nameof(anyOf)); + + if (Length == 0) + return -1; + + if ((uint)startIndex >= (uint)Length) + { + throw new ArgumentOutOfRangeException(nameof(startIndex), SR.ArgumentOutOfRange_Index); + } + + if ((count < 0) || ((count - 1) > startIndex)) + { + throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_Count); + } + + if (anyOf.Length > 1) + { + return LastIndexOfCharArray(anyOf, startIndex, count); + } + else if (anyOf.Length == 1) + { + return LastIndexOf(anyOf[0], startIndex, count); + } + else // anyOf.Length == 0 + { + return -1; + } + } + + private unsafe int LastIndexOfCharArray(char[] anyOf, int startIndex, int count) + { + // use probabilistic map, see InitializeProbabilisticMap + ProbabilisticMap map = default(ProbabilisticMap); + uint* charMap = (uint*)↦ + + InitializeProbabilisticMap(charMap, anyOf); + + fixed (char* pChars = &_firstChar) + { + char* pCh = pChars + startIndex; + + while (count > 0) + { + int thisChar = *pCh; + + if (IsCharBitSet(charMap, (byte)thisChar) && + IsCharBitSet(charMap, (byte)(thisChar >> 8)) && + ArrayContains((char)thisChar, anyOf)) + { + return (int)(pCh - pChars); + } + count--; + pCh--; + } + + return -1; + } + } // Returns the index of the last occurrence of any character in value in the current instance. // The search starts at startIndex and runs backwards to startIndex - count + 1. @@ -404,16 +533,15 @@ namespace System startIndex--; if (count > 0) count--; - - // If we are looking for nothing, just return 0 - if (value.Length == 0 && count >= 0 && startIndex - count + 1 >= 0) - return startIndex; } // 2nd half of this also catches when startIndex == MAXINT, so MAXINT - 0 + 1 == -1, which is < 0. if (count < 0 || startIndex - count + 1 < 0) throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_Count); + // If we are looking for nothing, just return startIndex + if (value.Length == 0) + return startIndex; switch (comparisonType) { @@ -428,17 +556,19 @@ namespace System case StringComparison.InvariantCultureIgnoreCase: return CultureInfo.InvariantCulture.CompareInfo.LastIndexOf(this, value, startIndex, count, CompareOptions.IgnoreCase); + case StringComparison.Ordinal: return CultureInfo.InvariantCulture.CompareInfo.LastIndexOf(this, value, startIndex, count, CompareOptions.Ordinal); case StringComparison.OrdinalIgnoreCase: - if (value.IsAscii() && this.IsAscii()) - return CultureInfo.InvariantCulture.CompareInfo.LastIndexOf(this, value, startIndex, count, CompareOptions.IgnoreCase); - else - return TextInfo.LastIndexOfStringOrdinalIgnoreCase(this, value, startIndex, count); + return TextInfo.LastIndexOfStringOrdinalIgnoreCase(this, value, startIndex, count); + default: throw new ArgumentException(SR.NotSupported_StringComparison, nameof(comparisonType)); } } + + [StructLayout(LayoutKind.Explicit, Size = PROBABILISTICMAP_SIZE * sizeof(uint))] + private struct ProbabilisticMap { } } } |