summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJames Ko <jamesqko@gmail.com>2016-07-19 20:06:01 -0400
committerJan Kotas <jkotas@microsoft.com>2016-07-19 17:06:01 -0700
commit719c34f3fc205093cb137e73c608f3b58e1a144f (patch)
tree489dcd88b4de3100d7cdb402a140dfedf10d62cc
parentb1b8d2b85090e419be92475159f1983e645981ce (diff)
downloadcoreclr-719c34f3fc205093cb137e73c608f3b58e1a144f.tar.gz
coreclr-719c34f3fc205093cb137e73c608f3b58e1a144f.tar.bz2
coreclr-719c34f3fc205093cb137e73c608f3b58e1a144f.zip
Better performance for string.CompareOrdinalHelper (#6119)
- For 64-bit, process a `long` at a time (and 3 longs per iteration of the loop), since it's the largest word size of the processor. This is what other functions, like `EqualsHelper` or `StartsWithOrdinalHelper` do. - Make sure we perform aligned reads on 64-bit platforms - Strings (objects in general?) are allocated on an aligned word boundary (that is, divisible by 4/8). However, the offset to the chars (`OffsetToStringData`) is 12 on 64-bit platforms, meaning that if we read a long from the first char it will *not* be aligned. To fix this, first check for differences in the first/second chars and start the loop from there. - Used `goto` within the loop code, so we don't have to keep making jumps in the common case. I also removed a comment above the `String` class that seems to be outdated (seeing as there is no string constructor that accepts another string).
-rw-r--r--src/mscorlib/src/System/String.cs168
1 files changed, 98 insertions, 70 deletions
diff --git a/src/mscorlib/src/System/String.cs b/src/mscorlib/src/System/String.cs
index d769147359..5a0dcb9ec5 100644
--- a/src/mscorlib/src/System/String.cs
+++ b/src/mscorlib/src/System/String.cs
@@ -34,15 +34,9 @@ namespace System {
// instance and return the result as a new String. All comparison methods are
// implemented as a part of String. As with arrays, character positions
// (indices) are zero-based.
- //
- // When passing a null string into a constructor in VJ and VC, the null should be
- // explicitly type cast to a String.
- // For Example:
- // String s = new String((String)null);
- // Console.WriteLine(s);
- //
+
[ComVisible(true)]
- [Serializable]
+ [Serializable]
public sealed class String : IComparable, ICloneable, IConvertible, IEnumerable
, IComparable<String>, IEnumerable<char>, IEquatable<String>
{
@@ -51,13 +45,12 @@ namespace System {
//NOTE NOTE NOTE NOTE
//These fields map directly onto the fields in an EE StringObject. See object.h for the layout.
//
- [NonSerialized]private int m_stringLength;
+ [NonSerialized] private int m_stringLength;
- [NonSerialized]private char m_firstChar;
+ // For empty strings, this will be '\0' since
+ // strings are both null-terminated and length prefixed
+ [NonSerialized] private char m_firstChar;
- //private static readonly char FmtMsgMarkerChar='%';
- //private static readonly char FmtMsgFmtCodeChar='!';
- //These are defined in Com99/src/vm/COMStringCommon.h and must be kept in sync.
private const int TrimHead = 0;
private const int TrimTail = 1;
private const int TrimBoth = 2;
@@ -468,85 +461,118 @@ namespace System {
Contract.Requires(strA != null);
Contract.Requires(strB != null);
+ // NOTE: This may be subject to change if eliminating the check
+ // in the callers makes them small enough to be inlined by the JIT
+ Contract.Assert(strA.m_firstChar == strB.m_firstChar,
+ "For performance reasons, callers of this method should " +
+ "check/short-circuit beforehand if the first char is the same.");
+
int length = Math.Min(strA.Length, strB.Length);
- int diffOffset = -1;
fixed (char* ap = &strA.m_firstChar) fixed (char* bp = &strB.m_firstChar)
{
char* a = ap;
char* b = bp;
+ // Check if the second chars are different here
+ // The reason we check if m_firstChar is different is because
+ // it's the most common case and allows us to avoid a method call
+ // to here.
+ // The reason we check if the second char is different is because
+ // if the first two chars the same we can increment by 4 bytes,
+ // leaving us word-aligned on both 32-bit (12 bytes into the string)
+ // and 64-bit (16 bytes) platforms.
+
+ // For empty strings, the second char will be null due to padding.
+ // The start of the string (not including sync block pointer)
+ // is the method table pointer + string length, which takes up
+ // 8 bytes on 32-bit, 12 on x64. For empty strings the null
+ // terminator immediately follows, leaving us with an object
+ // 10/14 bytes in size. Since everything needs to be a multiple
+ // of 4/8, this will get padded and zeroed out.
+
+ // For one-char strings the second char will be the null terminator.
+
+ // NOTE: If in the future there is a way to read the second char
+ // without pinning the string (e.g. System.Runtime.CompilerServices.Unsafe
+ // is exposed to mscorlib, or a future version of C# allows inline IL),
+ // then do that and short-circuit before the fixed.
+
+ if (*(a + 1) != *(b + 1)) goto DiffOffset1;
+
+ // Since we know that the first two chars are the same,
+ // we can increment by 2 here and skip 4 bytes.
+ // This leaves us 8-byte aligned, which results
+ // on better perf for 64-bit platforms.
+ length -= 2; a += 2; b += 2;
+
// unroll the loop
- while (length >= 10)
+#if BIT64
+ while (length >= 12)
{
- if (*(int*)a != *(int*)b) {
- diffOffset = 0;
- break;
- }
-
- if (*(int*)(a+2) != *(int*)(b+2)) {
- diffOffset = 2;
- break;
- }
-
- if (*(int*)(a+4) != *(int*)(b+4)) {
- diffOffset = 4;
- break;
- }
-
- if (*(int*)(a+6) != *(int*)(b+6)) {
- diffOffset = 6;
- break;
- }
-
- if (*(int*)(a+8) != *(int*)(b+8)) {
- diffOffset = 8;
- break;
- }
- length -= 10;
- a += 10;
- b += 10;
+ if (*(long*)a != *(long*)b) goto DiffOffset0;
+ if (*(long*)(a + 4) != *(long*)(b + 4)) goto DiffOffset4;
+ if (*(long*)(a + 8) != *(long*)(b + 8)) goto DiffOffset8;
+ length -= 12; a += 12; b += 12;
}
-
- if( diffOffset != -1) {
- // we already see a difference in the unrolled loop above
- a += diffOffset;
- b += diffOffset;
- int order;
- if ( (order = (int)*a - (int)*b) != 0) {
- return order;
- }
- Contract.Assert( *(a+1) != *(b+1), "This byte must be different if we reach here!");
- return ((int)*(a+1) - (int)*(b+1));
+#else // BIT64
+ while (length >= 10)
+ {
+ if (*(int*)a != *(int*)b) goto DiffOffset0;
+ if (*(int*)(a + 2) != *(int*)(b + 2)) goto DiffOffset2;
+ if (*(int*)(a + 4) != *(int*)(b + 4)) goto DiffOffset4;
+ if (*(int*)(a + 6) != *(int*)(b + 6)) goto DiffOffset6;
+ if (*(int*)(a + 8) != *(int*)(b + 8)) goto DiffOffset8;
+ length -= 10; a += 10; b += 10;
}
+#endif // BIT64
- // now go back to slower code path and do comparison on 4 bytes at a time.
+ // Fallback loop:
+ // go back to slower code path and do comparison on 4 bytes at a time.
// This depends on the fact that the String objects are
// always zero terminated and that the terminating zero is not included
// in the length. For odd string sizes, the last compare will include
// the zero terminator.
- while (length > 0) {
- if (*(int*)a != *(int*)b) {
- break;
- }
+ while (length > 0)
+ {
+ if (*(int*)a != *(int*)b) goto DiffNextInt;
length -= 2;
a += 2;
b += 2;
}
- if( length > 0) {
- int c;
- // found a different int on above loop
- if ( (c = (int)*a - (int)*b) != 0) {
- return c;
- }
- Contract.Assert( *(a+1) != *(b+1), "This byte must be different if we reach here!");
- return ((int)*(a+1) - (int)*(b+1));
- }
-
// At this point, we have compared all the characters in at least one string.
// The longer string will be larger.
return strA.Length - strB.Length;
+
+#if BIT64
+ DiffOffset8: a += 4; b += 4;
+ DiffOffset4: a += 4; b += 4;
+#else // BIT64
+ // Use jumps instead of falling through, since
+ // otherwise going to DiffOffset8 will involve
+ // 8 add instructions before getting to DiffNextInt
+ DiffOffset8: a += 8; b += 8; goto DiffOffset0;
+ DiffOffset6: a += 6; b += 6; goto DiffOffset0;
+ DiffOffset4: a += 2; b += 2;
+ DiffOffset2: a += 2; b += 2;
+#endif // BIT64
+
+ DiffOffset0:
+ // If we reached here, we already see a difference in the unrolled loop above
+#if BIT64
+ if (*(int*)a == *(int*)b)
+ {
+ a += 2; b += 2;
+ }
+#endif // BIT64
+
+ DiffNextInt:
+ if (*a != *b) return *a - *b;
+
+ DiffOffset1:
+ Contract.Assert(*(a + 1) != *(b + 1), "This char must be different if we reach here!");
+ return *(a + 1) - *(b + 1);
}
}
@@ -1862,7 +1888,8 @@ namespace System {
case StringComparison.Ordinal:
// Most common case: first character is different.
- if ((strA.m_firstChar - strB.m_firstChar) != 0)
+ // Returns false for empty strings.
+ if (strA.m_firstChar != strB.m_firstChar)
{
return strA.m_firstChar - strB.m_firstChar;
}
@@ -2174,7 +2201,8 @@ namespace System {
}
// Most common case, first character is different.
- if ((strA.m_firstChar - strB.m_firstChar) != 0)
+ // This will return false for empty strings.
+ if (strA.m_firstChar != strB.m_firstChar)
{
return strA.m_firstChar - strB.m_firstChar;
}