summaryrefslogtreecommitdiff
path: root/src/System.Private.CoreLib/shared/System/Char.cs
diff options
context:
space:
mode:
authorLevi Broderick <GrabYourPitchforks@users.noreply.github.com>2019-02-14 11:00:02 -0800
committerGitHub <noreply@github.com>2019-02-14 11:00:02 -0800
commit84eaa7ac079e625f2fbe36ba976f735dbdacdc6b (patch)
treee392054ae46786a81350196aa007fd4f61a8922e /src/System.Private.CoreLib/shared/System/Char.cs
parent94b0faee9f93247bb32c89ab7ed545a07540ba97 (diff)
downloadcoreclr-84eaa7ac079e625f2fbe36ba976f735dbdacdc6b.tar.gz
coreclr-84eaa7ac079e625f2fbe36ba976f735dbdacdc6b.tar.bz2
coreclr-84eaa7ac079e625f2fbe36ba976f735dbdacdc6b.zip
Add Rune creation API from UTF-16 surrogate pair (#22590)
Also brings in some perf improvements to existing char and UnicodeUtility APIs
Diffstat (limited to 'src/System.Private.CoreLib/shared/System/Char.cs')
-rw-r--r--src/System.Private.CoreLib/shared/System/Char.cs48
1 files changed, 42 insertions, 6 deletions
diff --git a/src/System.Private.CoreLib/shared/System/Char.cs b/src/System.Private.CoreLib/shared/System/Char.cs
index 1312380296..50dd092671 100644
--- a/src/System.Private.CoreLib/shared/System/Char.cs
+++ b/src/System.Private.CoreLib/shared/System/Char.cs
@@ -904,7 +904,14 @@ namespace System
public static bool IsSurrogatePair(char highSurrogate, char lowSurrogate)
{
- return IsHighSurrogate(highSurrogate) && IsLowSurrogate(lowSurrogate);
+ // Since both the high and low surrogate ranges are exactly 0x400 elements
+ // wide, and since this is a power of two, we can perform a single comparison
+ // by baselining each value to the start of its respective range and taking
+ // the logical OR of them.
+
+ uint highSurrogateOffset = (uint)highSurrogate - CharUnicodeInfo.HIGH_SURROGATE_START;
+ uint lowSurrogateOffset = (uint)lowSurrogate - CharUnicodeInfo.LOW_SURROGATE_START;
+ return (highSurrogateOffset | lowSurrogateOffset) <= CharUnicodeInfo.HIGH_SURROGATE_RANGE;
}
internal const int UNICODE_PLANE00_END = 0x00ffff;
@@ -937,15 +944,44 @@ namespace System
public static int ConvertToUtf32(char highSurrogate, char lowSurrogate)
{
- if (!IsHighSurrogate(highSurrogate))
+ // First, extend both to 32 bits, then calculate the offset of
+ // each candidate surrogate char from the start of its range.
+
+ uint highSurrogateOffset = (uint)highSurrogate - CharUnicodeInfo.HIGH_SURROGATE_START;
+ uint lowSurrogateOffset = (uint)lowSurrogate - CharUnicodeInfo.LOW_SURROGATE_START;
+
+ // This is a single comparison which allows us to check both for validity at once since
+ // both the high surrogate range and the low surrogate range are the same length.
+ // If the comparison fails, we call to a helper method to throw the correct exception message.
+
+ if ((highSurrogateOffset | lowSurrogateOffset) > CharUnicodeInfo.HIGH_SURROGATE_RANGE)
+ {
+ ConvertToUtf32_ThrowInvalidArgs(highSurrogateOffset);
+ }
+
+ // The 0x40u << 10 below is to account for uuuuu = wwww + 1 in the surrogate encoding.
+ return ((int)highSurrogateOffset << 10) + (lowSurrogate - CharUnicodeInfo.LOW_SURROGATE_START) + (0x40 << 10);
+ }
+
+ [StackTraceHidden]
+ private static void ConvertToUtf32_ThrowInvalidArgs(uint highSurrogateOffset)
+ {
+ // If the high surrogate is not within its expected range, throw an exception
+ // whose message fingers it as invalid. If it's within the expected range,
+ // change the message to read that the low surrogate was the problem.
+
+ if (highSurrogateOffset > CharUnicodeInfo.HIGH_SURROGATE_RANGE)
{
- throw new ArgumentOutOfRangeException(nameof(highSurrogate), SR.ArgumentOutOfRange_InvalidHighSurrogate);
+ throw new ArgumentOutOfRangeException(
+ paramName: "highSurrogate",
+ message: SR.ArgumentOutOfRange_InvalidHighSurrogate);
}
- if (!IsLowSurrogate(lowSurrogate))
+ else
{
- throw new ArgumentOutOfRangeException(nameof(lowSurrogate), SR.ArgumentOutOfRange_InvalidLowSurrogate);
+ throw new ArgumentOutOfRangeException(
+ paramName: "lowSurrogate",
+ message: SR.ArgumentOutOfRange_InvalidLowSurrogate);
}
- return (((highSurrogate - CharUnicodeInfo.HIGH_SURROGATE_START) * 0x400) + (lowSurrogate - CharUnicodeInfo.LOW_SURROGATE_START) + UNICODE_PLANE01_START);
}
/*=============================ConvertToUtf32===================================