summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLevi Broderick <GrabYourPitchforks@users.noreply.github.com>2019-04-15 18:28:39 -0700
committerGitHub <noreply@github.com>2019-04-15 18:28:39 -0700
commitfcc4beb884b7e38a9886b7a354ec8b6cdb8aad83 (patch)
tree75ed75e7e7802c2f4ef025d7dbf03491ed17798f
parent4aeedc147f45b63e6f07155fea1446b084cb9a46 (diff)
downloadcoreclr-fcc4beb884b7e38a9886b7a354ec8b6cdb8aad83.tar.gz
coreclr-fcc4beb884b7e38a9886b7a354ec8b6cdb8aad83.tar.bz2
coreclr-fcc4beb884b7e38a9886b7a354ec8b6cdb8aad83.zip
Fix incorrect bit match pattern in UTF-16 validation (#24015)
-rw-r--r--src/System.Private.CoreLib/shared/System/Text/Unicode/Utf16Utility.Validation.cs21
1 files changed, 19 insertions, 2 deletions
diff --git a/src/System.Private.CoreLib/shared/System/Text/Unicode/Utf16Utility.Validation.cs b/src/System.Private.CoreLib/shared/System/Text/Unicode/Utf16Utility.Validation.cs
index 40e818e2b6..bdf7972176 100644
--- a/src/System.Private.CoreLib/shared/System/Text/Unicode/Utf16Utility.Validation.cs
+++ b/src/System.Private.CoreLib/shared/System/Text/Unicode/Utf16Utility.Validation.cs
@@ -171,11 +171,28 @@ namespace System.Text.Unicode
// - 00 if the corresponding UTF-16 char was a high surrogate code unit;
// - 01 if the corresponding UTF-16 char was a low surrogate code unit;
// - ## (garbage) if the corresponding UTF-16 char was not a surrogate code unit.
+ // Since 'mask' already has 00 in these positions (since the corresponding char
+ // wasn't a surrogate), "mask AND mask2 == 00" holds for these positions.
uint mask2 = (uint)Sse2.MoveMask(Sse2.ShiftRightLogical(utf16Data, 3).AsByte());
- uint lowSurrogatesMask = mask2 & mask; // 01 only if was a low surrogate char, else 00
- uint highSurrogatesMask = (mask2 ^ mask) & 0x5555u; // 01 only if was a high surrogate char, else 00
+ // 'lowSurrogatesMask' has its bits occur in pairs:
+ // - 01 if the corresponding char was a low surrogate char,
+ // - 00 if the corresponding char was a high surrogate char or not a surrogate at all.
+
+ uint lowSurrogatesMask = mask2 & mask;
+
+ // 'highSurrogatesMask' has its bits occur in pairs:
+ // - 01 if the corresponding char was a high surrogate char,
+ // - 00 if the corresponding char was a low surrogate char or not a surrogate at all.
+
+ uint highSurrogatesMask = (mask2 ^ 0b_0101_0101_0101_0101u /* flip all even-numbered bits 00 <-> 01 */) & mask;
+
+ Debug.Assert((highSurrogatesMask & lowSurrogatesMask) == 0,
+ "A char cannot simultaneously be both a high and a low surrogate char.");
+
+ Debug.Assert(((highSurrogatesMask | lowSurrogatesMask) & 0b_1010_1010_1010_1010u) == 0,
+ "Only even bits (no odd bits) of the masks should be set.");
// Now check that each high surrogate is followed by a low surrogate and that each
// low surrogate follows a high surrogate. We make an exception for the case where