summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorStephen Toub <stoub@microsoft.com>2017-08-08 17:45:16 -0400
committerGitHub <noreply@github.com>2017-08-08 17:45:16 -0400
commiteae763affd9624fbddf1c30ead52d23651de98e4 (patch)
treefbaa3f1784bbde6d2666509c2cac65950753720e /src
parentc7a10a52f959514428afe2ab6f4bade3ce1dcdbf (diff)
downloadcoreclr-eae763affd9624fbddf1c30ead52d23651de98e4.tar.gz
coreclr-eae763affd9624fbddf1c30ead52d23651de98e4.tar.bz2
coreclr-eae763affd9624fbddf1c30ead52d23651de98e4.zip
Add Encoding Span-based APIs (#13269)
* Add Encoding Span-based APIs * Address PR feedback
Diffstat (limited to 'src')
-rw-r--r--src/mscorlib/shared/System/Text/Encoding.cs44
-rw-r--r--src/mscorlib/shared/System/Text/UTF32Encoding.cs7
-rw-r--r--src/mscorlib/shared/System/Text/UTF8Encoding.cs10
-rw-r--r--src/mscorlib/shared/System/Text/UnicodeEncoding.cs7
4 files changed, 67 insertions, 1 deletions
diff --git a/src/mscorlib/shared/System/Text/Encoding.cs b/src/mscorlib/shared/System/Text/Encoding.cs
index 4f23d2a4a7..bf8cb03db2 100644
--- a/src/mscorlib/shared/System/Text/Encoding.cs
+++ b/src/mscorlib/shared/System/Text/Encoding.cs
@@ -382,6 +382,8 @@ namespace System.Text
return Array.Empty<byte>();
}
+ public virtual ReadOnlySpan<byte> Preamble => GetPreamble();
+
private void GetDataItem()
{
if (_dataItem == null)
@@ -733,6 +735,14 @@ namespace System.Text
return GetByteCount(arrChar, 0, count);
}
+ public virtual unsafe int GetByteCount(ReadOnlySpan<char> chars)
+ {
+ fixed (char* charsPtr = &chars.DangerousGetPinnableReference())
+ {
+ return GetByteCount(charsPtr, chars.Length);
+ }
+ }
+
// For NLS Encodings, workhorse takes an encoder (may be null)
// Always validate parameters before calling internal version, which will only assert.
internal virtual unsafe int GetByteCount(char* chars, int count, EncoderNLS encoder)
@@ -916,6 +926,15 @@ namespace System.Text
return byteCount;
}
+ public virtual unsafe int GetBytes(ReadOnlySpan<char> chars, Span<byte> bytes)
+ {
+ fixed (char* charsPtr = &chars.DangerousGetPinnableReference())
+ fixed (byte* bytesPtr = &bytes.DangerousGetPinnableReference())
+ {
+ return GetBytes(charsPtr, chars.Length, bytesPtr, bytes.Length);
+ }
+ }
+
// Returns the number of characters produced by decoding the given byte
// array.
//
@@ -962,6 +981,14 @@ namespace System.Text
return GetCharCount(arrbyte, 0, count);
}
+ public virtual unsafe int GetCharCount(ReadOnlySpan<byte> bytes)
+ {
+ fixed (byte* bytesPtr = &bytes.DangerousGetPinnableReference())
+ {
+ return GetCharCount(bytesPtr, bytes.Length);
+ }
+ }
+
// This is our internal workhorse
// Always validate parameters before calling internal version, which will only assert.
internal virtual unsafe int GetCharCount(byte* bytes, int count, DecoderNLS decoder)
@@ -1070,6 +1097,14 @@ namespace System.Text
return charCount;
}
+ public virtual unsafe int GetChars(ReadOnlySpan<byte> bytes, Span<char> chars)
+ {
+ fixed (byte* bytesPtr = &bytes.DangerousGetPinnableReference())
+ fixed (char* charsPtr = &chars.DangerousGetPinnableReference())
+ {
+ return GetChars(bytesPtr, bytes.Length, charsPtr, chars.Length);
+ }
+ }
// This is our internal workhorse
// Always validate parameters before calling internal version, which will only assert.
@@ -1093,6 +1128,15 @@ namespace System.Text
return String.CreateStringFromEncoding(bytes, byteCount, this);
}
+ public unsafe string GetString(ReadOnlySpan<byte> bytes)
+ {
+ fixed (byte* bytesPtr = &bytes.DangerousGetPinnableReference())
+ {
+ return GetString(bytesPtr, bytes.Length);
+ }
+ }
+
+
// Returns the code page identifier of this encoding. The returned value is
// an integer between 0 and 65535 if the encoding has a code page
// identifier, or -1 if the encoding does not represent a code page.
diff --git a/src/mscorlib/shared/System/Text/UTF32Encoding.cs b/src/mscorlib/shared/System/Text/UTF32Encoding.cs
index 10161d193e..260518e21c 100644
--- a/src/mscorlib/shared/System/Text/UTF32Encoding.cs
+++ b/src/mscorlib/shared/System/Text/UTF32Encoding.cs
@@ -39,6 +39,9 @@ namespace System.Text
internal static readonly UTF32Encoding s_default = new UTF32Encoding(bigEndian: false, byteOrderMark: true);
internal static readonly UTF32Encoding s_bigEndianDefault = new UTF32Encoding(bigEndian: true, byteOrderMark: true);
+ private static readonly byte[] s_bigEndianPreamble = new byte[4] { 0x00, 0x00, 0xFE, 0xFF };
+ private static readonly byte[] s_littleEndianPreamble = new byte[4] { 0xFF, 0xFE, 0x00, 0x00 };
+
private bool _emitUTF32ByteOrderMark = false;
private bool _isThrowException = false;
private bool _bigEndian = false;
@@ -1177,6 +1180,10 @@ namespace System.Text
return Array.Empty<byte>();
}
+ public override ReadOnlySpan<byte> Preamble =>
+ GetType() != typeof(UTF32Encoding) ? GetPreamble() : // in case a derived UTF32Encoding overrode GetPreamble
+ _emitUTF32ByteOrderMark ? (_bigEndian ? s_bigEndianPreamble : s_littleEndianPreamble) :
+ Array.Empty<byte>();
public override bool Equals(Object value)
{
diff --git a/src/mscorlib/shared/System/Text/UTF8Encoding.cs b/src/mscorlib/shared/System/Text/UTF8Encoding.cs
index 02b18935e4..974bf75650 100644
--- a/src/mscorlib/shared/System/Text/UTF8Encoding.cs
+++ b/src/mscorlib/shared/System/Text/UTF8Encoding.cs
@@ -54,15 +54,19 @@ namespace System.Text
internal sealed class UTF8EncodingSealed : UTF8Encoding
{
public UTF8EncodingSealed(bool encoderShouldEmitUTF8Identifier) : base(encoderShouldEmitUTF8Identifier) { }
+
+ public override ReadOnlySpan<byte> Preamble => _emitUTF8Identifier ? s_preamble : Array.Empty<byte>();
}
// Used by Encoding.UTF8 for lazy initialization
// The initialization code will not be run until a static member of the class is referenced
internal static readonly UTF8EncodingSealed s_default = new UTF8EncodingSealed(encoderShouldEmitUTF8Identifier: true);
+ internal static readonly byte[] s_preamble = new byte[3] { 0xEF, 0xBB, 0xBF };
+
// Yes, the idea of emitting U+FEFF as a UTF-8 identifier has made it into
// the standard.
- private bool _emitUTF8Identifier = false;
+ internal readonly bool _emitUTF8Identifier = false;
private bool _isThrowException = false;
@@ -2497,6 +2501,10 @@ namespace System.Text
return Array.Empty<byte>();
}
+ public override ReadOnlySpan<byte> Preamble =>
+ GetType() != typeof(UTF8Encoding) ? GetPreamble() : // in case a derived UTF8Encoding overrode GetPreamble
+ _emitUTF8Identifier ? s_preamble :
+ Array.Empty<byte>();
public override bool Equals(Object value)
{
diff --git a/src/mscorlib/shared/System/Text/UnicodeEncoding.cs b/src/mscorlib/shared/System/Text/UnicodeEncoding.cs
index 8e44317ce2..78355299c1 100644
--- a/src/mscorlib/shared/System/Text/UnicodeEncoding.cs
+++ b/src/mscorlib/shared/System/Text/UnicodeEncoding.cs
@@ -20,6 +20,9 @@ namespace System.Text
internal static readonly UnicodeEncoding s_bigEndianDefault = new UnicodeEncoding(bigEndian: true, byteOrderMark: true);
internal static readonly UnicodeEncoding s_littleEndianDefault = new UnicodeEncoding(bigEndian: false, byteOrderMark: true);
+ private static readonly byte[] s_bigEndianPreamble = new byte[2] { 0xfe, 0xff };
+ private static readonly byte[] s_littleEndianPreamble = new byte[2] { 0xff, 0xfe };
+
internal bool isThrowException = false;
internal bool bigEndian = false;
@@ -1898,6 +1901,10 @@ namespace System.Text
return Array.Empty<Byte>();
}
+ public override ReadOnlySpan<byte> Preamble =>
+ GetType() != typeof(UnicodeEncoding) ? GetPreamble() : // in case a derived UnicodeEncoding overrode GetPreamble
+ byteOrderMark ? (bigEndian ? s_bigEndianPreamble : s_littleEndianPreamble) :
+ Array.Empty<byte>();
public override int GetMaxByteCount(int charCount)
{