diff options
author | Levi Broderick <GrabYourPitchforks@users.noreply.github.com> | 2019-02-14 11:00:02 -0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-02-14 11:00:02 -0800 |
commit | 84eaa7ac079e625f2fbe36ba976f735dbdacdc6b (patch) | |
tree | e392054ae46786a81350196aa007fd4f61a8922e /src/System.Private.CoreLib/shared/System/Char.cs | |
parent | 94b0faee9f93247bb32c89ab7ed545a07540ba97 (diff) | |
download | coreclr-84eaa7ac079e625f2fbe36ba976f735dbdacdc6b.tar.gz coreclr-84eaa7ac079e625f2fbe36ba976f735dbdacdc6b.tar.bz2 coreclr-84eaa7ac079e625f2fbe36ba976f735dbdacdc6b.zip |
Add Rune creation API from UTF-16 surrogate pair (#22590)
Also brings in some perf improvements to existing char and UnicodeUtility APIs
Diffstat (limited to 'src/System.Private.CoreLib/shared/System/Char.cs')
-rw-r--r-- | src/System.Private.CoreLib/shared/System/Char.cs | 48 |
1 files changed, 42 insertions, 6 deletions
diff --git a/src/System.Private.CoreLib/shared/System/Char.cs b/src/System.Private.CoreLib/shared/System/Char.cs index 1312380296..50dd092671 100644 --- a/src/System.Private.CoreLib/shared/System/Char.cs +++ b/src/System.Private.CoreLib/shared/System/Char.cs @@ -904,7 +904,14 @@ namespace System public static bool IsSurrogatePair(char highSurrogate, char lowSurrogate) { - return IsHighSurrogate(highSurrogate) && IsLowSurrogate(lowSurrogate); + // Since both the high and low surrogate ranges are exactly 0x400 elements + // wide, and since this is a power of two, we can perform a single comparison + // by baselining each value to the start of its respective range and taking + // the logical OR of them. + + uint highSurrogateOffset = (uint)highSurrogate - CharUnicodeInfo.HIGH_SURROGATE_START; + uint lowSurrogateOffset = (uint)lowSurrogate - CharUnicodeInfo.LOW_SURROGATE_START; + return (highSurrogateOffset | lowSurrogateOffset) <= CharUnicodeInfo.HIGH_SURROGATE_RANGE; } internal const int UNICODE_PLANE00_END = 0x00ffff; @@ -937,15 +944,44 @@ namespace System public static int ConvertToUtf32(char highSurrogate, char lowSurrogate) { - if (!IsHighSurrogate(highSurrogate)) + // First, extend both to 32 bits, then calculate the offset of + // each candidate surrogate char from the start of its range. + + uint highSurrogateOffset = (uint)highSurrogate - CharUnicodeInfo.HIGH_SURROGATE_START; + uint lowSurrogateOffset = (uint)lowSurrogate - CharUnicodeInfo.LOW_SURROGATE_START; + + // This is a single comparison which allows us to check both for validity at once since + // both the high surrogate range and the low surrogate range are the same length. + // If the comparison fails, we call to a helper method to throw the correct exception message. + + if ((highSurrogateOffset | lowSurrogateOffset) > CharUnicodeInfo.HIGH_SURROGATE_RANGE) + { + ConvertToUtf32_ThrowInvalidArgs(highSurrogateOffset); + } + + // The 0x40u << 10 below is to account for uuuuu = wwww + 1 in the surrogate encoding. + return ((int)highSurrogateOffset << 10) + (lowSurrogate - CharUnicodeInfo.LOW_SURROGATE_START) + (0x40 << 10); + } + + [StackTraceHidden] + private static void ConvertToUtf32_ThrowInvalidArgs(uint highSurrogateOffset) + { + // If the high surrogate is not within its expected range, throw an exception + // whose message fingers it as invalid. If it's within the expected range, + // change the message to read that the low surrogate was the problem. + + if (highSurrogateOffset > CharUnicodeInfo.HIGH_SURROGATE_RANGE) { - throw new ArgumentOutOfRangeException(nameof(highSurrogate), SR.ArgumentOutOfRange_InvalidHighSurrogate); + throw new ArgumentOutOfRangeException( + paramName: "highSurrogate", + message: SR.ArgumentOutOfRange_InvalidHighSurrogate); } - if (!IsLowSurrogate(lowSurrogate)) + else { - throw new ArgumentOutOfRangeException(nameof(lowSurrogate), SR.ArgumentOutOfRange_InvalidLowSurrogate); + throw new ArgumentOutOfRangeException( + paramName: "lowSurrogate", + message: SR.ArgumentOutOfRange_InvalidLowSurrogate); } - return (((highSurrogate - CharUnicodeInfo.HIGH_SURROGATE_START) * 0x400) + (lowSurrogate - CharUnicodeInfo.LOW_SURROGATE_START) + UNICODE_PLANE01_START); } /*=============================ConvertToUtf32=================================== |