diff options
author | Levi Broderick <GrabYourPitchforks@users.noreply.github.com> | 2019-04-15 18:28:39 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-04-15 18:28:39 -0700 |
commit | fcc4beb884b7e38a9886b7a354ec8b6cdb8aad83 (patch) | |
tree | 75ed75e7e7802c2f4ef025d7dbf03491ed17798f | |
parent | 4aeedc147f45b63e6f07155fea1446b084cb9a46 (diff) | |
download | coreclr-fcc4beb884b7e38a9886b7a354ec8b6cdb8aad83.tar.gz coreclr-fcc4beb884b7e38a9886b7a354ec8b6cdb8aad83.tar.bz2 coreclr-fcc4beb884b7e38a9886b7a354ec8b6cdb8aad83.zip |
Fix incorrect bit match pattern in UTF-16 validation (#24015)
-rw-r--r-- | src/System.Private.CoreLib/shared/System/Text/Unicode/Utf16Utility.Validation.cs | 21 |
1 files changed, 19 insertions, 2 deletions
diff --git a/src/System.Private.CoreLib/shared/System/Text/Unicode/Utf16Utility.Validation.cs b/src/System.Private.CoreLib/shared/System/Text/Unicode/Utf16Utility.Validation.cs index 40e818e2b6..bdf7972176 100644 --- a/src/System.Private.CoreLib/shared/System/Text/Unicode/Utf16Utility.Validation.cs +++ b/src/System.Private.CoreLib/shared/System/Text/Unicode/Utf16Utility.Validation.cs @@ -171,11 +171,28 @@ namespace System.Text.Unicode // - 00 if the corresponding UTF-16 char was a high surrogate code unit; // - 01 if the corresponding UTF-16 char was a low surrogate code unit; // - ## (garbage) if the corresponding UTF-16 char was not a surrogate code unit. + // Since 'mask' already has 00 in these positions (since the corresponding char + // wasn't a surrogate), "mask AND mask2 == 00" holds for these positions. uint mask2 = (uint)Sse2.MoveMask(Sse2.ShiftRightLogical(utf16Data, 3).AsByte()); - uint lowSurrogatesMask = mask2 & mask; // 01 only if was a low surrogate char, else 00 - uint highSurrogatesMask = (mask2 ^ mask) & 0x5555u; // 01 only if was a high surrogate char, else 00 + // 'lowSurrogatesMask' has its bits occur in pairs: + // - 01 if the corresponding char was a low surrogate char, + // - 00 if the corresponding char was a high surrogate char or not a surrogate at all. + + uint lowSurrogatesMask = mask2 & mask; + + // 'highSurrogatesMask' has its bits occur in pairs: + // - 01 if the corresponding char was a high surrogate char, + // - 00 if the corresponding char was a low surrogate char or not a surrogate at all. + + uint highSurrogatesMask = (mask2 ^ 0b_0101_0101_0101_0101u /* flip all even-numbered bits 00 <-> 01 */) & mask; + + Debug.Assert((highSurrogatesMask & lowSurrogatesMask) == 0, + "A char cannot simultaneously be both a high and a low surrogate char."); + + Debug.Assert(((highSurrogatesMask | lowSurrogatesMask) & 0b_1010_1010_1010_1010u) == 0, + "Only even bits (no odd bits) of the masks should be set."); // Now check that each high surrogate is followed by a low surrogate and that each // low surrogate follows a high surrogate. We make an exception for the case where |