diff options
author | Matt Ellis <matell@microsoft.com> | 2016-01-12 17:01:02 -0800 |
---|---|---|
committer | Matt Ellis <matell@microsoft.com> | 2016-01-13 13:37:19 -0800 |
commit | 8c82dd5428bdab80ce2f1a0803747c2406b9f178 (patch) | |
tree | b8c0887576701da825ba29e3e42f08689981f1f5 /src | |
parent | b8a0a62e41fa236ff1463523c3a287321a65bcff (diff) | |
download | coreclr-8c82dd5428bdab80ce2f1a0803747c2406b9f178.tar.gz coreclr-8c82dd5428bdab80ce2f1a0803747c2406b9f178.tar.bz2 coreclr-8c82dd5428bdab80ce2f1a0803747c2406b9f178.zip |
Fast path IndexOf and variants for ASCII
In ICU doing any sort of index of operation (which includes Prefix and
Suffix checking) is relatively expensive. ICU ends up doing a fair
amount of work and allocations in order to construct a searcher object
which could be reused, but our APIs are not amenable towards working in
this manner.
However, for some cultures we can often fast path ASCII searches when we
know that ASCII and Ordinal comparisions are the same, as is the case
for both Invariant and en-US.
This change has CompareInfo hold some additional state about a locale to
decide if we can do this optimiztion and then wires it up to IndexOf,
LastIndexOf, IsPrefix and IsSuffix.
In the future, we can try to extend the set of allowable cultures that
we preform this optimization on by coming up with better checks on when
it is safe to preform this transformation.
Today, this optimization does not apply when IgnoreSymbols is set,
because we would need to blank some ASCII symbol characters. If this
ends up being a common operation, we could consider having ordinal
implementations that also ignore symbols.
This represents the best that we can do for dotnet/corefx#3672. It gets
us back to where we were before for many common real world cases.
Fixes dotnet/corefx#3672.
Diffstat (limited to 'src')
-rw-r--r-- | src/mscorlib/corefx/System/Globalization/CompareInfo.Unix.cs | 43 |
1 files changed, 42 insertions, 1 deletions
diff --git a/src/mscorlib/corefx/System/Globalization/CompareInfo.Unix.cs b/src/mscorlib/corefx/System/Globalization/CompareInfo.Unix.cs index 905ac9deb2..2337fed15c 100644 --- a/src/mscorlib/corefx/System/Globalization/CompareInfo.Unix.cs +++ b/src/mscorlib/corefx/System/Globalization/CompareInfo.Unix.cs @@ -13,12 +13,15 @@ namespace System.Globalization [SecurityCritical] private readonly Interop.GlobalizationInterop.SafeSortHandle m_sortHandle; + private readonly bool m_isAsciiEqualityOrdinal; + [SecuritySafeCritical] internal CompareInfo(CultureInfo culture) { m_name = culture.m_name; m_sortName = culture.SortName; m_sortHandle = Interop.GlobalizationInterop.GetSortHandle(System.Text.Encoding.UTF8.GetBytes(m_sortName)); + m_isAsciiEqualityOrdinal = (m_sortName == "en-US" || m_sortName == ""); } [SecurityCritical] @@ -161,6 +164,11 @@ namespace System.Globalization return IndexOfOrdinal(source, target, startIndex, count, ignoreCase: false); } + if (m_isAsciiEqualityOrdinal && CanUseAsciiOrdinalForOptions(options) && source.IsAscii() && target.IsAscii()) + { + return IndexOf(source, target, startIndex, count, GetOrdinalCompareOptions(options)); + } + fixed (char* pSource = source) { int index = Interop.GlobalizationInterop.IndexOf(m_sortHandle, target, target.Length, pSource + startIndex, count, options); @@ -180,12 +188,17 @@ namespace System.Globalization { return startIndex; } - + if (options == CompareOptions.Ordinal) { return LastIndexOfOrdinal(source, target, startIndex, count, ignoreCase: false); } + if (m_isAsciiEqualityOrdinal && CanUseAsciiOrdinalForOptions(options) && source.IsAscii() && target.IsAscii()) + { + return LastIndexOf(source, target, startIndex, count, GetOrdinalCompareOptions(options)); + } + // startIndex is the index into source where we start search backwards from. leftStartIndex is the index into source // of the start of the string that is count characters away from startIndex. int leftStartIndex = (startIndex - count + 1); @@ -205,6 +218,11 @@ namespace System.Globalization Contract.Assert(!string.IsNullOrEmpty(prefix)); Contract.Assert((options & (CompareOptions.Ordinal | CompareOptions.OrdinalIgnoreCase)) == 0); + if (m_isAsciiEqualityOrdinal && CanUseAsciiOrdinalForOptions(options) && source.IsAscii() && prefix.IsAscii()) + { + return IsPrefix(source, prefix, GetOrdinalCompareOptions(options)); + } + return Interop.GlobalizationInterop.StartsWith(m_sortHandle, prefix, prefix.Length, source, source.Length, options); } @@ -215,6 +233,11 @@ namespace System.Globalization Contract.Assert(!string.IsNullOrEmpty(suffix)); Contract.Assert((options & (CompareOptions.Ordinal | CompareOptions.OrdinalIgnoreCase)) == 0); + if (m_isAsciiEqualityOrdinal && CanUseAsciiOrdinalForOptions(options) && source.IsAscii() && suffix.IsAscii()) + { + return IsSuffix(source, suffix, GetOrdinalCompareOptions(options)); + } + return Interop.GlobalizationInterop.EndsWith(m_sortHandle, suffix, suffix.Length, source, source.Length, options); } @@ -251,5 +274,23 @@ namespace System.Globalization [DllImport(JitHelpers.QCall)] [SuppressUnmanagedCodeSecurity] private static unsafe extern int InternalHashSortKey(byte* sortKey, int sortKeyLength, [MarshalAs(UnmanagedType.Bool)] bool forceRandomizedHashing, long additionalEntropy); + + private static CompareOptions GetOrdinalCompareOptions(CompareOptions options) + { + if ((options & CompareOptions.IgnoreCase) == CompareOptions.IgnoreCase) + { + return CompareOptions.OrdinalIgnoreCase; + } + else + { + return CompareOptions.Ordinal; + } + } + + private static bool CanUseAsciiOrdinalForOptions(CompareOptions options) + { + // Unlike the other Ignore options, IgnoreSymbols impacts ASCII characters (e.g. '). + return (options & CompareOptions.IgnoreSymbols) == 0; + } } } |