summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLevi Broderick <GrabYourPitchforks@users.noreply.github.com>2019-03-18 22:58:32 -0700
committerGitHub <noreply@github.com>2019-03-18 22:58:32 -0700
commit1f3f474a13bdde1c5fecdf8cd9ce525dbe5df000 (patch)
tree6dfa953c84f1b4d1a7af414c89bb69a025902b1b
parent31581af5fa816fb2ea94145823ec3bdd6c0b0327 (diff)
downloadcoreclr-1f3f474a13bdde1c5fecdf8cd9ce525dbe5df000.tar.gz
coreclr-1f3f474a13bdde1c5fecdf8cd9ce525dbe5df000.tar.bz2
coreclr-1f3f474a13bdde1c5fecdf8cd9ce525dbe5df000.zip
Add Utf8String skeleton (#23209)
Utf8String is an experimental type that is string-like (heap-allocated, immutable, variable-length, null-terminated) but whose inner representation is UTF-8, not UTF-16. This is a skeleton implementation of the basic API shape. The ecosystem of APIs has not yet been built around it. All Utf8String-related code is currently surrounded by ifdefs to allow easy identification and removal from release branches.
-rw-r--r--clr.defines.targets1
-rw-r--r--clrdefinitions.cmake1
-rw-r--r--src/System.Private.CoreLib/System.Private.CoreLib.csproj12
-rw-r--r--src/System.Private.CoreLib/shared/System.Private.CoreLib.Shared.projitems1
-rw-r--r--src/System.Private.CoreLib/shared/System/Memory.cs32
-rw-r--r--src/System.Private.CoreLib/shared/System/ReadOnlyMemory.cs32
-rw-r--r--src/System.Private.CoreLib/shared/System/ReadOnlySpan.Fast.cs14
-rw-r--r--src/System.Private.CoreLib/shared/System/Runtime/InteropServices/MemoryMarshal.cs7
-rw-r--r--src/System.Private.CoreLib/shared/System/Span.Fast.cs14
-rw-r--r--src/System.Private.CoreLib/shared/System/String.cs13
-rw-r--r--src/System.Private.CoreLib/shared/System/Text/Unicode/Utf8Utility.cs106
-rw-r--r--src/System.Private.CoreLib/shared/System/Text/UnicodeUtility.cs2
-rw-r--r--src/System.Private.CoreLib/src/System/Char8.cs69
-rw-r--r--src/System.Private.CoreLib/src/System/Utf8Extensions.cs367
-rw-r--r--src/System.Private.CoreLib/src/System/Utf8String.Construction.cs223
-rw-r--r--src/System.Private.CoreLib/src/System/Utf8String.Manipulation.cs109
-rw-r--r--src/System.Private.CoreLib/src/System/Utf8String.Searching.cs93
-rw-r--r--src/System.Private.CoreLib/src/System/Utf8String.cs252
-rw-r--r--src/classlibnative/bcltype/objectnative.cpp3
-rw-r--r--src/inc/dacvars.h3
-rw-r--r--src/strongname/api/common.h3
-rw-r--r--src/vm/appdomain.cpp5
-rw-r--r--src/vm/classnames.h4
-rw-r--r--src/vm/common.h3
-rw-r--r--src/vm/ecall.cpp70
-rw-r--r--src/vm/ecall.h18
-rw-r--r--src/vm/ecalllist.h16
-rw-r--r--src/vm/gchelpers.cpp77
-rw-r--r--src/vm/gchelpers.h17
-rw-r--r--src/vm/jithelpers.cpp71
-rw-r--r--src/vm/jitinterface.cpp3
-rw-r--r--src/vm/jitinterface.h5
-rw-r--r--src/vm/jitinterfacegen.cpp9
-rw-r--r--src/vm/marshalnative.cpp10
-rw-r--r--src/vm/metasig.h14
-rw-r--r--src/vm/methodtable.h2
-rw-r--r--src/vm/methodtablebuilder.cpp13
-rw-r--r--src/vm/mscorlib.h15
-rw-r--r--src/vm/object.h64
-rw-r--r--src/vm/object.inl16
-rw-r--r--src/vm/reflectioninvocation.cpp18
-rw-r--r--src/vm/vars.cpp3
-rw-r--r--src/vm/vars.hpp14
43 files changed, 1797 insertions, 27 deletions
diff --git a/clr.defines.targets b/clr.defines.targets
index 3fa0417f51..e2f10586f1 100644
--- a/clr.defines.targets
+++ b/clr.defines.targets
@@ -1,6 +1,7 @@
<Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<!-- Features we're currently flighting, but don't intend to ship in officially supported releases -->
<PropertyGroup Condition="'$(IsPrerelease)' == 'true'">
+ <FeatureUtf8String>true</FeatureUtf8String>
<!-- FeatureXXX>true</FeatureXXX -->
</PropertyGroup>
diff --git a/clrdefinitions.cmake b/clrdefinitions.cmake
index 9e22da2033..a25d19d130 100644
--- a/clrdefinitions.cmake
+++ b/clrdefinitions.cmake
@@ -6,6 +6,7 @@ set(PRERELEASE 1)
# Features we're currently flighting, but don't intend to ship in officially supported releases
if (PRERELEASE)
+ add_definitions(-DFEATURE_UTF8STRING=1)
# add_definitions(-DFEATURE_XXX=1)
endif (PRERELEASE)
diff --git a/src/System.Private.CoreLib/System.Private.CoreLib.csproj b/src/System.Private.CoreLib/System.Private.CoreLib.csproj
index 6e73aeffa5..0cf6733ad9 100644
--- a/src/System.Private.CoreLib/System.Private.CoreLib.csproj
+++ b/src/System.Private.CoreLib/System.Private.CoreLib.csproj
@@ -112,6 +112,10 @@
<!-- CLR Features -->
<Import Project="$(MSBuildThisFileDirectory)..\..\clr.coreclr.props" />
<Import Project="$(MSBuildThisFileDirectory)..\..\clr.defines.targets" />
+ <!-- Experimental features -->
+ <PropertyGroup Condition="'$(FeatureUtf8String)' == 'true'">
+ <DefineConstants>$(DefineConstants);FEATURE_UTF8STRING</DefineConstants>
+ </PropertyGroup>
<!-- Sources -->
<ItemGroup>
<Compile Include="$(BclSourcesRoot)\Internal\Console.cs" />
@@ -274,6 +278,14 @@
<Compile Include="shared\Interop\Windows\Ole32\Interop.CoTaskMemAlloc.cs" />
<Compile Include="shared\Interop\Windows\OleAut32\Interop.SysAllocStringByteLen.cs" />
</ItemGroup>
+ <ItemGroup Condition="'$(FeatureUtf8String)' == 'true'">
+ <Compile Include="$(BclSourcesRoot)\System\Char8.cs" />
+ <Compile Include="$(BclSourcesRoot)\System\Utf8Extensions.cs" />
+ <Compile Include="$(BclSourcesRoot)\System\Utf8String.cs" />
+ <Compile Include="$(BclSourcesRoot)\System\Utf8String.Construction.cs" />
+ <Compile Include="$(BclSourcesRoot)\System\Utf8String.Manipulation.cs" />
+ <Compile Include="$(BclSourcesRoot)\System\Utf8String.Searching.cs" />
+ </ItemGroup>
<ItemGroup>
<Compile Include="$(BclSourcesRoot)\System\Diagnostics\Eventing\XplatEventLogger.cs" Condition="'$(FeatureXplatEventSource)' == 'true'" />
<Compile Include="$(IntermediateOutputPath)..\Eventing\NativeRuntimeEventSource.cs" Condition="'$(FeaturePerfTracing)' == 'true'"/>
diff --git a/src/System.Private.CoreLib/shared/System.Private.CoreLib.Shared.projitems b/src/System.Private.CoreLib/shared/System.Private.CoreLib.Shared.projitems
index 85ba8b8cd4..b1c9da0bfa 100644
--- a/src/System.Private.CoreLib/shared/System.Private.CoreLib.Shared.projitems
+++ b/src/System.Private.CoreLib/shared/System.Private.CoreLib.Shared.projitems
@@ -801,6 +801,7 @@
<Compile Include="$(MSBuildThisFileDirectory)System\Text\UTF8Encoding.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\ValueStringBuilder.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\Unicode\Utf8.cs" />
+ <Compile Include="$(MSBuildThisFileDirectory)System\Text\Unicode\Utf8Utility.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\TimeSpan.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\ThreadAttributes.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Threading\AbandonedMutexException.cs" />
diff --git a/src/System.Private.CoreLib/shared/System/Memory.cs b/src/System.Private.CoreLib/shared/System/Memory.cs
index ba31a6aeae..2074404630 100644
--- a/src/System.Private.CoreLib/shared/System/Memory.cs
+++ b/src/System.Private.CoreLib/shared/System/Memory.cs
@@ -6,6 +6,7 @@ using System.Buffers;
using System.Diagnostics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
+using System.Text;
using EditorBrowsableAttribute = System.ComponentModel.EditorBrowsableAttribute;
using EditorBrowsableState = System.ComponentModel.EditorBrowsableState;
@@ -164,7 +165,13 @@ namespace System
// No validation performed in release builds; caller must provide any necessary validation.
// 'obj is T[]' below also handles things like int[] <-> uint[] being convertible
- Debug.Assert((obj == null) || (typeof(T) == typeof(char) && obj is string) || (obj is T[]) || (obj is MemoryManager<T>));
+ Debug.Assert((obj == null)
+ || (typeof(T) == typeof(char) && obj is string)
+#if FEATURE_UTF8STRING
+ || ((typeof(T) == typeof(byte) || typeof(T) == typeof(Char8)) && obj is Utf8String)
+#endif // FEATURE_UTF8STRING
+ || (obj is T[])
+ || (obj is MemoryManager<T>));
_object = obj;
_index = start;
@@ -212,6 +219,14 @@ namespace System
{
return (_object is string str) ? str.Substring(_index, _length) : Span.ToString();
}
+#if FEATURE_UTF8STRING
+ else if (typeof(T) == typeof(Char8))
+ {
+ // TODO_UTF8STRING: Call into optimized transcoding routine when it's available.
+ Span<T> span = Span;
+ return Encoding.UTF8.GetString(new ReadOnlySpan<byte>(ref Unsafe.As<T, byte>(ref MemoryMarshal.GetReference(span)), span.Length));
+ }
+#endif // FEATURE_UTF8STRING
return string.Format("System.Memory<{0}>[{1}]", typeof(T).Name, _length);
}
@@ -317,6 +332,13 @@ namespace System
refToReturn = ref Unsafe.As<char, T>(ref Unsafe.As<string>(tmpObject).GetRawStringData());
lengthOfUnderlyingSpan = Unsafe.As<string>(tmpObject).Length;
}
+#if FEATURE_UTF8STRING
+ else if ((typeof(T) == typeof(byte) || typeof(T) == typeof(Char8)) && tmpObject.GetType() == typeof(Utf8String))
+ {
+ refToReturn = ref Unsafe.As<byte, T>(ref Unsafe.As<Utf8String>(tmpObject).DangerousGetMutableReference());
+ lengthOfUnderlyingSpan = Unsafe.As<Utf8String>(tmpObject).Length;
+ }
+#endif // FEATURE_UTF8STRING
else if (RuntimeHelpers.ObjectHasComponentSize(tmpObject))
{
// We know the object is not null, it's not a string, and it is variable-length. The only
@@ -427,6 +449,14 @@ namespace System
ref char stringData = ref Unsafe.Add(ref s.GetRawStringData(), _index);
return new MemoryHandle(Unsafe.AsPointer(ref stringData), handle);
}
+#if FEATURE_UTF8STRING
+ else if ((typeof(T) == typeof(byte) || typeof(T) == typeof(Char8)) && tmpObject is Utf8String utf8String)
+ {
+ GCHandle handle = GCHandle.Alloc(tmpObject, GCHandleType.Pinned);
+ ref byte stringData = ref utf8String.DangerousGetMutableReference(_index);
+ return new MemoryHandle(Unsafe.AsPointer(ref stringData), handle);
+ }
+#endif // FEATURE_UTF8STRING
else if (RuntimeHelpers.ObjectHasComponentSize(tmpObject))
{
// 'tmpObject is T[]' below also handles things like int[] <-> uint[] being convertible
diff --git a/src/System.Private.CoreLib/shared/System/ReadOnlyMemory.cs b/src/System.Private.CoreLib/shared/System/ReadOnlyMemory.cs
index 6c598430ad..bf90f0449d 100644
--- a/src/System.Private.CoreLib/shared/System/ReadOnlyMemory.cs
+++ b/src/System.Private.CoreLib/shared/System/ReadOnlyMemory.cs
@@ -6,6 +6,7 @@ using System.Buffers;
using System.Diagnostics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
+using System.Text;
using EditorBrowsableAttribute = System.ComponentModel.EditorBrowsableAttribute;
using EditorBrowsableState = System.ComponentModel.EditorBrowsableState;
@@ -99,7 +100,13 @@ namespace System
// No validation performed in release builds; caller must provide any necessary validation.
// 'obj is T[]' below also handles things like int[] <-> uint[] being convertible
- Debug.Assert((obj == null) || (typeof(T) == typeof(char) && obj is string) || (obj is T[]) || (obj is MemoryManager<T>));
+ Debug.Assert((obj == null)
+ || (typeof(T) == typeof(char) && obj is string)
+#if FEATURE_UTF8STRING
+ || ((typeof(T) == typeof(byte) || typeof(T) == typeof(Char8)) && obj is Utf8String)
+#endif // FEATURE_UTF8STRING
+ || (obj is T[])
+ || (obj is MemoryManager<T>));
_object = obj;
_index = start;
@@ -141,6 +148,14 @@ namespace System
{
return (_object is string str) ? str.Substring(_index, _length) : Span.ToString();
}
+#if FEATURE_UTF8STRING
+ else if (typeof(T) == typeof(Char8))
+ {
+ // TODO_UTF8STRING: Call into optimized transcoding routine when it's available.
+ ReadOnlySpan<T> span = Span;
+ return Encoding.UTF8.GetString(new ReadOnlySpan<byte>(ref Unsafe.As<T, byte>(ref MemoryMarshal.GetReference(span)), span.Length));
+ }
+#endif // FEATURE_UTF8STRING
return string.Format("System.ReadOnlyMemory<{0}>[{1}]", typeof(T).Name, _length);
}
@@ -239,6 +254,13 @@ namespace System
refToReturn = ref Unsafe.As<char, T>(ref Unsafe.As<string>(tmpObject).GetRawStringData());
lengthOfUnderlyingSpan = Unsafe.As<string>(tmpObject).Length;
}
+#if FEATURE_UTF8STRING
+ else if ((typeof(T) == typeof(byte) || typeof(T) == typeof(Char8)) && tmpObject.GetType() == typeof(Utf8String))
+ {
+ refToReturn = ref Unsafe.As<byte, T>(ref Unsafe.As<Utf8String>(tmpObject).DangerousGetMutableReference());
+ lengthOfUnderlyingSpan = Unsafe.As<Utf8String>(tmpObject).Length;
+ }
+#endif // FEATURE_UTF8STRING
else if (RuntimeHelpers.ObjectHasComponentSize(tmpObject))
{
// We know the object is not null, it's not a string, and it is variable-length. The only
@@ -342,6 +364,14 @@ namespace System
ref char stringData = ref Unsafe.Add(ref s.GetRawStringData(), _index);
return new MemoryHandle(Unsafe.AsPointer(ref stringData), handle);
}
+#if FEATURE_UTF8STRING
+ else if ((typeof(T) == typeof(byte) || typeof(T) == typeof(Char8)) && tmpObject is Utf8String utf8String)
+ {
+ GCHandle handle = GCHandle.Alloc(tmpObject, GCHandleType.Pinned);
+ ref byte stringData = ref utf8String.DangerousGetMutableReference(_index);
+ return new MemoryHandle(Unsafe.AsPointer(ref stringData), handle);
+ }
+#endif // FEATURE_UTF8STRING
else if (RuntimeHelpers.ObjectHasComponentSize(tmpObject))
{
// 'tmpObject is T[]' below also handles things like int[] <-> uint[] being convertible
diff --git a/src/System.Private.CoreLib/shared/System/ReadOnlySpan.Fast.cs b/src/System.Private.CoreLib/shared/System/ReadOnlySpan.Fast.cs
index eb3fd1464d..00337a5fd7 100644
--- a/src/System.Private.CoreLib/shared/System/ReadOnlySpan.Fast.cs
+++ b/src/System.Private.CoreLib/shared/System/ReadOnlySpan.Fast.cs
@@ -5,6 +5,7 @@
using System.Diagnostics;
using System.Runtime.CompilerServices;
using System.Runtime.Versioning;
+using System.Text;
using EditorBrowsableAttribute = System.ComponentModel.EditorBrowsableAttribute;
using EditorBrowsableState = System.ComponentModel.EditorBrowsableState;
using Internal.Runtime.CompilerServices;
@@ -240,12 +241,15 @@ namespace System
{
if (typeof(T) == typeof(char))
{
- unsafe
- {
- fixed (char* src = &Unsafe.As<T, char>(ref _pointer.Value))
- return new string(src, 0, _length);
- }
+ return new string(new ReadOnlySpan<char>(ref Unsafe.As<T, char>(ref _pointer.Value), _length));
}
+#if FEATURE_UTF8STRING
+ else if (typeof(T) == typeof(Char8))
+ {
+ // TODO_UTF8STRING: Call into optimized transcoding routine when it's available.
+ return Encoding.UTF8.GetString(new ReadOnlySpan<byte>(ref Unsafe.As<T, byte>(ref _pointer.Value), _length));
+ }
+#endif // FEATURE_UTF8STRING
return string.Format("System.ReadOnlySpan<{0}>[{1}]", typeof(T).Name, _length);
}
diff --git a/src/System.Private.CoreLib/shared/System/Runtime/InteropServices/MemoryMarshal.cs b/src/System.Private.CoreLib/shared/System/Runtime/InteropServices/MemoryMarshal.cs
index b1f5507122..225f434382 100644
--- a/src/System.Private.CoreLib/shared/System/Runtime/InteropServices/MemoryMarshal.cs
+++ b/src/System.Private.CoreLib/shared/System/Runtime/InteropServices/MemoryMarshal.cs
@@ -28,7 +28,12 @@ namespace System.Runtime.InteropServices
// As an optimization, we skip the "is string?" check below if typeof(T) is not char,
// as Memory<T> / ROM<T> can't possibly contain a string instance in this case.
- if (obj != null && (typeof(T) != typeof(char) || obj.GetType() != typeof(string)))
+ if (obj != null && !(
+ (typeof(T) == typeof(char) && obj.GetType() == typeof(string))
+#if FEATURE_UTF8STRING
+ || ((typeof(T) == typeof(byte) || typeof(T) == typeof(Char8)) && obj.GetType() == typeof(Utf8String))
+#endif // FEATURE_UTF8STRING
+ ))
{
if (RuntimeHelpers.ObjectHasComponentSize(obj))
{
diff --git a/src/System.Private.CoreLib/shared/System/Span.Fast.cs b/src/System.Private.CoreLib/shared/System/Span.Fast.cs
index 66de4fe3d3..adc1f3903d 100644
--- a/src/System.Private.CoreLib/shared/System/Span.Fast.cs
+++ b/src/System.Private.CoreLib/shared/System/Span.Fast.cs
@@ -5,6 +5,7 @@
using System.Diagnostics;
using System.Runtime.CompilerServices;
using System.Runtime.Versioning;
+using System.Text;
using EditorBrowsableAttribute = System.ComponentModel.EditorBrowsableAttribute;
using EditorBrowsableState = System.ComponentModel.EditorBrowsableState;
using Internal.Runtime.CompilerServices;
@@ -319,12 +320,15 @@ namespace System
{
if (typeof(T) == typeof(char))
{
- unsafe
- {
- fixed (char* src = &Unsafe.As<T, char>(ref _pointer.Value))
- return new string(src, 0, _length);
- }
+ return new string(new ReadOnlySpan<char>(ref Unsafe.As<T, char>(ref _pointer.Value), _length));
+ }
+#if FEATURE_UTF8STRING
+ else if (typeof(T) == typeof(Char8))
+ {
+ // TODO_UTF8STRING: Call into optimized transcoding routine when it's available.
+ return Encoding.UTF8.GetString(new ReadOnlySpan<byte>(ref Unsafe.As<T, byte>(ref _pointer.Value), _length));
}
+#endif // FEATURE_UTF8STRING
return string.Format("System.Span<{0}>[{1}]", typeof(T).Name, _length);
}
diff --git a/src/System.Private.CoreLib/shared/System/String.cs b/src/System.Private.CoreLib/shared/System/String.cs
index 49afbc8c8c..10f75225c0 100644
--- a/src/System.Private.CoreLib/shared/System/String.cs
+++ b/src/System.Private.CoreLib/shared/System/String.cs
@@ -24,9 +24,13 @@ namespace System
[System.Runtime.CompilerServices.TypeForwardedFrom("mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089")]
public sealed partial class String : IComparable, IEnumerable, IConvertible, IEnumerable<char>, IComparable<string>, IEquatable<string>, ICloneable
{
- // String constructors
- // These are special. The implementation methods for these have a different signature from the
- // declared constructors.
+ /*
+ * CONSTRUCTORS
+ *
+ * Defining a new constructor for string-like types (like String) requires changes both
+ * to the managed code below and to the native VM code. See the comment at the top of
+ * src/vm/ecall.cpp for instructions on how to add new overloads.
+ */
[MethodImplAttribute(MethodImplOptions.InternalCall)]
public extern String(char[] value);
@@ -335,8 +339,7 @@ namespace System
return Empty;
string result = FastAllocateString(value.Length);
- fixed (char* dest = &result._firstChar, src = &MemoryMarshal.GetReference(value))
- wstrcpy(dest, src, value.Length);
+ Buffer.Memmove(ref result._firstChar, ref MemoryMarshal.GetReference(value), (uint)value.Length);
return result;
}
diff --git a/src/System.Private.CoreLib/shared/System/Text/Unicode/Utf8Utility.cs b/src/System.Private.CoreLib/shared/System/Text/Unicode/Utf8Utility.cs
new file mode 100644
index 0000000000..6ee9ca05a6
--- /dev/null
+++ b/src/System.Private.CoreLib/shared/System/Text/Unicode/Utf8Utility.cs
@@ -0,0 +1,106 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System.Buffers;
+using System.Diagnostics;
+using System.IO;
+using System.Runtime.CompilerServices;
+
+namespace System.Text.Unicode
+{
+ internal static class Utf8Utility
+ {
+ /// <summary>
+ /// The maximum number of bytes that can result from UTF-8 transcoding
+ /// any Unicode scalar value.
+ /// </summary>
+ internal const int MaxBytesPerScalar = 4;
+
+ /// <summary>
+ /// The UTF-8 representation of <see cref="UnicodeUtility.ReplacementChar"/>.
+ /// </summary>
+ private static ReadOnlySpan<byte> ReplacementCharSequence => new byte[] { 0xEF, 0xBF, 0xBD };
+
+ /// <summary>
+ /// Returns the byte index in <paramref name="utf8Data"/> where the first invalid UTF-8 sequence begins,
+ /// or -1 if the buffer contains no invalid sequences. Also outs the <paramref name="isAscii"/> parameter
+ /// stating whether all data observed (up to the first invalid sequence or the end of the buffer, whichever
+ /// comes first) is ASCII.
+ /// </summary>
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static int GetIndexOfFirstInvalidUtf8Sequence(ReadOnlySpan<byte> utf8Data, out bool isAscii)
+ {
+ // TODO_UTF8STRING: Replace this with the faster drop-in replacement when it's available (coreclr #21948).
+
+ bool tempIsAscii = true;
+ int originalDataLength = utf8Data.Length;
+
+ while (!utf8Data.IsEmpty)
+ {
+ if (Rune.DecodeFromUtf8(utf8Data, out Rune result, out int bytesConsumed) != OperationStatus.Done)
+ {
+ break;
+ }
+
+ tempIsAscii &= result.IsAscii;
+ utf8Data = utf8Data.Slice(bytesConsumed);
+ }
+
+ isAscii = tempIsAscii;
+ return (utf8Data.IsEmpty) ? -1 : (originalDataLength - utf8Data.Length);
+ }
+
+#if FEATURE_UTF8STRING
+ /// <summary>
+ /// Returns <paramref name="value"/> if it is null or contains only well-formed UTF-8 data;
+ /// otherwises allocates a new <see cref="Utf8String"/> instance containing the same data as
+ /// <paramref name="value"/> but where all invalid UTF-8 sequences have been replaced
+ /// with U+FFD.
+ /// </summary>
+ public static Utf8String ValidateAndFixupUtf8String(Utf8String value)
+ {
+ if (Utf8String.IsNullOrEmpty(value))
+ {
+ return value;
+ }
+
+ ReadOnlySpan<byte> valueAsBytes = value.AsBytes();
+
+ int idxOfFirstInvalidData = GetIndexOfFirstInvalidUtf8Sequence(valueAsBytes, out _);
+ if (idxOfFirstInvalidData < 0)
+ {
+ return value;
+ }
+
+ // TODO_UTF8STRING: Replace this with the faster implementation once it's available.
+ // (The faster implementation is in the dev/utf8string_bak branch currently.)
+
+ MemoryStream memStream = new MemoryStream();
+ memStream.Write(valueAsBytes.Slice(0, idxOfFirstInvalidData));
+
+ valueAsBytes = valueAsBytes.Slice(idxOfFirstInvalidData);
+ do
+ {
+ if (Rune.DecodeFromUtf8(valueAsBytes, out _, out int bytesConsumed) == OperationStatus.Done)
+ {
+ // Valid scalar value - copy data as-is to MemoryStream
+ memStream.Write(valueAsBytes.Slice(0, bytesConsumed));
+ }
+ else
+ {
+ // Invalid scalar value - copy U+FFFD to MemoryStream
+ memStream.Write(ReplacementCharSequence);
+ }
+
+ valueAsBytes = valueAsBytes.Slice(bytesConsumed);
+ } while (!valueAsBytes.IsEmpty);
+
+ bool success = memStream.TryGetBuffer(out ArraySegment<byte> memStreamBuffer);
+ Debug.Assert(success, "Couldn't get underlying MemoryStream buffer.");
+
+ return Utf8String.DangerousCreateWithoutValidation(memStreamBuffer, assumeWellFormed: true);
+ }
+#endif // FEATURE_UTF8STRING
+ }
+}
diff --git a/src/System.Private.CoreLib/shared/System/Text/UnicodeUtility.cs b/src/System.Private.CoreLib/shared/System/Text/UnicodeUtility.cs
index 3aad29679d..065c938d81 100644
--- a/src/System.Private.CoreLib/shared/System/Text/UnicodeUtility.cs
+++ b/src/System.Private.CoreLib/shared/System/Text/UnicodeUtility.cs
@@ -11,7 +11,7 @@ namespace System.Text
/// <summary>
/// The Unicode replacement character U+FFFD.
/// </summary>
- public const uint ReplacementChar = 0xFFFDU;
+ public const uint ReplacementChar = 0xFFFD;
/// <summary>
/// Returns the Unicode plane (0 through 16, inclusive) which contains this code point.
diff --git a/src/System.Private.CoreLib/src/System/Char8.cs b/src/System.Private.CoreLib/src/System/Char8.cs
new file mode 100644
index 0000000000..7a71e2faa0
--- /dev/null
+++ b/src/System.Private.CoreLib/src/System/Char8.cs
@@ -0,0 +1,69 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+namespace System
+{
+ /// <summary>
+ /// Represents a UTF-8 code unit, the elemental type of <see cref="Utf8String"/>.
+ /// </summary>
+ public readonly struct Char8 : IComparable<Char8>, IEquatable<Char8>
+ {
+ private readonly byte _value;
+
+ private Char8(byte value)
+ {
+ _value = value;
+ }
+
+ public static bool operator ==(Char8 left, Char8 right) => left._value == right._value;
+ public static bool operator !=(Char8 left, Char8 right) => left._value != right._value;
+ public static bool operator <(Char8 left, Char8 right) => left._value < right._value;
+ public static bool operator <=(Char8 left, Char8 right) => left._value <= right._value;
+ public static bool operator >(Char8 left, Char8 right) => left._value > right._value;
+ public static bool operator >=(Char8 left, Char8 right) => left._value >= right._value;
+
+ // Operators from Utf8Char to <other primitives>
+ // TODO: Once C# gets support for checked operators, we should add those here.
+
+ public static implicit operator byte(Char8 value) => value._value;
+ [CLSCompliant(false)]
+ public static explicit operator sbyte(Char8 value) => (sbyte)value._value; // explicit because can integer overflow
+ public static explicit operator char(Char8 value) => (char)value._value; // explicit because don't want to encourage char conversion
+ public static implicit operator short(Char8 value) => value._value;
+ [CLSCompliant(false)]
+ public static implicit operator ushort(Char8 value) => value._value;
+ public static implicit operator int(Char8 value) => value._value;
+ [CLSCompliant(false)]
+ public static implicit operator uint(Char8 value) => value._value;
+ public static implicit operator long(Char8 value) => value._value;
+ [CLSCompliant(false)]
+ public static implicit operator ulong(Char8 value) => value._value;
+
+ // Operators from <other primitives> to Char8; most are explicit because narrowing conversions could be lossy
+ // TODO: Once C# gets support for checked operators, we should add those here.
+
+ public static implicit operator Char8(byte value) => new Char8(value);
+ [CLSCompliant(false)]
+ public static explicit operator Char8(sbyte value) => new Char8((byte)value);
+ public static explicit operator Char8(char value) => new Char8((byte)value);
+ public static explicit operator Char8(short value) => new Char8((byte)value);
+ [CLSCompliant(false)]
+ public static explicit operator Char8(ushort value) => new Char8((byte)value);
+ public static explicit operator Char8(int value) => new Char8((byte)value);
+ [CLSCompliant(false)]
+ public static explicit operator Char8(uint value) => new Char8((byte)value);
+ public static explicit operator Char8(long value) => new Char8((byte)value);
+ [CLSCompliant(false)]
+ public static explicit operator Char8(ulong value) => new Char8((byte)value);
+
+ public int CompareTo(Char8 other) => this._value.CompareTo(other._value);
+
+ public override bool Equals(object obj) => (obj is Char8 other) && (this == other);
+ public bool Equals(Char8 other) => this == other;
+
+ public override int GetHashCode() => _value;
+
+ public override string ToString() => _value.ToString("X2");
+ }
+}
diff --git a/src/System.Private.CoreLib/src/System/Utf8Extensions.cs b/src/System.Private.CoreLib/src/System/Utf8Extensions.cs
new file mode 100644
index 0000000000..9fa2a54f16
--- /dev/null
+++ b/src/System.Private.CoreLib/src/System/Utf8Extensions.cs
@@ -0,0 +1,367 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using Internal.Runtime.CompilerServices;
+
+namespace System
+{
+ public static class Utf8Extensions
+ {
+ /// <summary>
+ /// Projects <paramref name="text"/> as a <see cref="ReadOnlySpan{Byte}"/>.
+ /// </summary>
+ public static ReadOnlySpan<byte> AsBytes(this ReadOnlySpan<Char8> text)
+ {
+ return MemoryMarshal.Cast<Char8, byte>(text);
+ }
+
+ /// <summary>
+ /// Creates a new readonly span over the portion of the target <see cref="Utf8String"/>.
+ /// </summary>
+ /// <param name="text">The target <see cref="Utf8String"/>.</param>
+ /// <remarks>Returns default when <paramref name="text"/> is null.</remarks>
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ReadOnlySpan<byte> AsBytes(this Utf8String text)
+ {
+ if (text == null)
+ return default;
+
+ return new ReadOnlySpan<byte>(ref text.DangerousGetMutableReference(), text.Length);
+ }
+
+ /// <summary>
+ /// Creates a new readonly span over the portion of the target <see cref="Utf8String"/>.
+ /// </summary>
+ /// <param name="text">The target <see cref="Utf8String"/>.</param>
+ /// <param name="start">The index at which to begin this slice.</param>
+ /// <exception cref="System.ArgumentNullException">Thrown when <paramref name="text"/> is null.</exception>
+ /// <exception cref="System.ArgumentOutOfRangeException">
+ /// Thrown when the specified <paramref name="start"/> index is not in range (&lt;0 or &gt;text.Length).
+ /// </exception>
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ReadOnlySpan<byte> AsBytes(this Utf8String text, int start)
+ {
+ if (text == null)
+ {
+ if (start != 0)
+ ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.start);
+ return default;
+ }
+
+ if ((uint)start > (uint)text.Length)
+ ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.start);
+
+ return new ReadOnlySpan<byte>(ref text.DangerousGetMutableReference(start), text.Length - start);
+ }
+
+ /// <summary>
+ /// Creates a new readonly span over the portion of the target <see cref="Utf8String"/>.
+ /// </summary>
+ /// <param name="text">The target <see cref="Utf8String"/>.</param>
+ /// <param name="start">The index at which to begin this slice.</param>
+ /// <param name="length">The desired length for the slice (exclusive).</param>
+ /// <remarks>Returns default when <paramref name="text"/> is null.</remarks>
+ /// <exception cref="System.ArgumentOutOfRangeException">
+ /// Thrown when the specified <paramref name="start"/> index or <paramref name="length"/> is not in range.
+ /// </exception>
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ReadOnlySpan<byte> AsBytes(this Utf8String text, int start, int length)
+ {
+ if (text == null)
+ {
+ if (start != 0 || length != 0)
+ ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.start);
+ return default;
+ }
+
+#if BIT64
+ // See comment in Span<T>.Slice for how this works.
+ if ((ulong)(uint)start + (ulong)(uint)length > (ulong)(uint)text.Length)
+ ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.start);
+#else
+ if ((uint)start > (uint)text.Length || (uint)length > (uint)(text.Length - start))
+ ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.start);
+#endif
+
+ return new ReadOnlySpan<byte>(ref text.DangerousGetMutableReference(start), length);
+ }
+
+ /// <summary>
+ /// Creates a new readonly span over the portion of the target <see cref="Utf8String"/>.
+ /// </summary>
+ /// <param name="text">The target <see cref="Utf8String"/>.</param>
+ /// <remarks>Returns default when <paramref name="text"/> is null.</remarks>
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ReadOnlySpan<Char8> AsSpan(this Utf8String text)
+ {
+ if (text == null)
+ return default;
+
+ return new ReadOnlySpan<Char8>(ref Unsafe.As<byte, Char8>(ref text.DangerousGetMutableReference()), text.Length);
+ }
+
+ /// <summary>
+ /// Creates a new readonly span over the portion of the target <see cref="Utf8String"/>.
+ /// </summary>
+ /// <param name="text">The target <see cref="Utf8String"/>.</param>
+ /// <param name="start">The index at which to begin this slice.</param>
+ /// <exception cref="System.ArgumentNullException">Thrown when <paramref name="text"/> is null.</exception>
+ /// <exception cref="System.ArgumentOutOfRangeException">
+ /// Thrown when the specified <paramref name="start"/> index is not in range (&lt;0 or &gt;text.Length).
+ /// </exception>
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ReadOnlySpan<Char8> AsSpan(this Utf8String text, int start)
+ {
+ if (text == null)
+ {
+ if (start != 0)
+ ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.start);
+ return default;
+ }
+
+ if ((uint)start > (uint)text.Length)
+ ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.start);
+
+ return new ReadOnlySpan<Char8>(ref Unsafe.As<byte, Char8>(ref text.DangerousGetMutableReference(start)), text.Length - start);
+ }
+
+ /// <summary>
+ /// Creates a new readonly span over the portion of the target <see cref="Utf8String"/>.
+ /// </summary>
+ /// <param name="text">The target <see cref="Utf8String"/>.</param>
+ /// <param name="start">The index at which to begin this slice.</param>
+ /// <param name="length">The desired length for the slice (exclusive).</param>
+ /// <remarks>Returns default when <paramref name="text"/> is null.</remarks>
+ /// <exception cref="System.ArgumentOutOfRangeException">
+ /// Thrown when the specified <paramref name="start"/> index or <paramref name="length"/> is not in range.
+ /// </exception>
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ReadOnlySpan<Char8> AsSpan(this Utf8String text, int start, int length)
+ {
+ if (text == null)
+ {
+ if (start != 0 || length != 0)
+ ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.start);
+ return default;
+ }
+
+#if BIT64
+ // See comment in Span<T>.Slice for how this works.
+ if ((ulong)(uint)start + (ulong)(uint)length > (ulong)(uint)text.Length)
+ ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.start);
+#else
+ if ((uint)start > (uint)text.Length || (uint)length > (uint)(text.Length - start))
+ ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.start);
+#endif
+
+ return new ReadOnlySpan<Char8>(ref Unsafe.As<byte, Char8>(ref text.DangerousGetMutableReference(start)), length);
+ }
+
+ /// <summary>Creates a new <see cref="ReadOnlyMemory{T}"/> over the portion of the target <see cref="Utf8String"/>.</summary>
+ /// <param name="text">The target <see cref="Utf8String"/>.</param>
+ /// <remarks>Returns default when <paramref name="text"/> is null.</remarks>
+ public static ReadOnlyMemory<Char8> AsMemory(this Utf8String text)
+ {
+ if (text == null)
+ return default;
+
+ return new ReadOnlyMemory<Char8>(text, 0, text.Length);
+ }
+
+ /// <summary>Creates a new <see cref="ReadOnlyMemory{T}"/> over the portion of the target <see cref="Utf8String"/>.</summary>
+ /// <param name="text">The target <see cref="Utf8String"/>.</param>
+ /// <param name="start">The index at which to begin this slice.</param>
+ /// <remarks>Returns default when <paramref name="text"/> is null.</remarks>
+ /// <exception cref="System.ArgumentOutOfRangeException">
+ /// Thrown when the specified <paramref name="start"/> index is not in range (&lt;0 or &gt;text.Length).
+ /// </exception>
+ public static ReadOnlyMemory<Char8> AsMemory(this Utf8String text, int start)
+ {
+ if (text == null)
+ {
+ if (start != 0)
+ ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.start);
+ return default;
+ }
+
+ if ((uint)start > (uint)text.Length)
+ ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.start);
+
+ return new ReadOnlyMemory<Char8>(text, start, text.Length - start);
+ }
+
+ /// <summary>Creates a new <see cref="ReadOnlyMemory{T}"/> over the portion of the target <see cref="Utf8String"/>.</summary>
+ /// <param name="text">The target <see cref="Utf8String"/>.</param>
+ /// <param name="startIndex">The index at which to begin this slice.</param>
+ public static ReadOnlyMemory<Char8> AsMemory(this Utf8String text, Index startIndex)
+ {
+ if (text == null)
+ {
+ if (!startIndex.Equals(Index.Start))
+ ThrowHelper.ThrowArgumentNullException(ExceptionArgument.text);
+
+ return default;
+ }
+
+ int actualIndex = startIndex.GetOffset(text.Length);
+ if ((uint)actualIndex > (uint)text.Length)
+ ThrowHelper.ThrowArgumentOutOfRangeException();
+
+ return new ReadOnlyMemory<Char8>(text, actualIndex, text.Length - actualIndex);
+ }
+
+ /// <summary>Creates a new <see cref="ReadOnlyMemory{T}"/> over the portion of the target <see cref="Utf8String"/>.</summary>
+ /// <param name="text">The target <see cref="Utf8String"/>.</param>
+ /// <param name="start">The index at which to begin this slice.</param>
+ /// <param name="length">The desired length for the slice (exclusive).</param>
+ /// <remarks>Returns default when <paramref name="text"/> is null.</remarks>
+ /// <exception cref="System.ArgumentOutOfRangeException">
+ /// Thrown when the specified <paramref name="start"/> index or <paramref name="length"/> is not in range.
+ /// </exception>
+ public static ReadOnlyMemory<Char8> AsMemory(this Utf8String text, int start, int length)
+ {
+ if (text == null)
+ {
+ if (start != 0 || length != 0)
+ ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.start);
+ return default;
+ }
+
+#if BIT64
+ // See comment in Span<T>.Slice for how this works.
+ if ((ulong)(uint)start + (ulong)(uint)length > (ulong)(uint)text.Length)
+ ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.start);
+#else
+ if ((uint)start > (uint)text.Length || (uint)length > (uint)(text.Length - start))
+ ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.start);
+#endif
+
+ return new ReadOnlyMemory<Char8>(text, start, length);
+ }
+
+ /// <summary>Creates a new <see cref="ReadOnlyMemory{T}"/> over the portion of the target <see cref="Utf8String"/>.</summary>
+ /// <param name="text">The target <see cref="Utf8String"/>.</param>
+ /// <param name="range">The range used to indicate the start and length of the sliced string.</param>
+ public static ReadOnlyMemory<Char8> AsMemory(this Utf8String text, Range range)
+ {
+ if (text == null)
+ {
+ Index startIndex = range.Start;
+ Index endIndex = range.End;
+
+ if (!startIndex.Equals(Index.Start) || !endIndex.Equals(Index.Start))
+ ThrowHelper.ThrowArgumentNullException(ExceptionArgument.text);
+
+ return default;
+ }
+
+ (int start, int length) = range.GetOffsetAndLength(text.Length);
+ return new ReadOnlyMemory<Char8>(text, start, length);
+ }
+
+ /// <summary>Creates a new <see cref="ReadOnlyMemory{T}"/> over the portion of the target <see cref="Utf8String"/>.</summary>
+ /// <param name="text">The target <see cref="Utf8String"/>.</param>
+ /// <remarks>Returns default when <paramref name="text"/> is null.</remarks>
+ public static ReadOnlyMemory<byte> AsMemoryBytes(this Utf8String text)
+ {
+ if (text == null)
+ return default;
+
+ return new ReadOnlyMemory<byte>(text, 0, text.Length);
+ }
+
+ /// <summary>Creates a new <see cref="ReadOnlyMemory{T}"/> over the portion of the target <see cref="Utf8String"/>.</summary>
+ /// <param name="text">The target <see cref="Utf8String"/>.</param>
+ /// <param name="start">The index at which to begin this slice.</param>
+ /// <remarks>Returns default when <paramref name="text"/> is null.</remarks>
+ /// <exception cref="System.ArgumentOutOfRangeException">
+ /// Thrown when the specified <paramref name="start"/> index is not in range (&lt;0 or &gt;text.Length).
+ /// </exception>
+ public static ReadOnlyMemory<byte> AsMemoryBytes(this Utf8String text, int start)
+ {
+ if (text == null)
+ {
+ if (start != 0)
+ ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.start);
+ return default;
+ }
+
+ if ((uint)start > (uint)text.Length)
+ ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.start);
+
+ return new ReadOnlyMemory<byte>(text, start, text.Length - start);
+ }
+
+ /// <summary>Creates a new <see cref="ReadOnlyMemory{T}"/> over the portion of the target <see cref="Utf8String"/>.</summary>
+ /// <param name="text">The target <see cref="Utf8String"/>.</param>
+ /// <param name="startIndex">The index at which to begin this slice.</param>
+ public static ReadOnlyMemory<byte> AsMemoryBytes(this Utf8String text, Index startIndex)
+ {
+ if (text == null)
+ {
+ if (!startIndex.Equals(Index.Start))
+ ThrowHelper.ThrowArgumentNullException(ExceptionArgument.text);
+
+ return default;
+ }
+
+ int actualIndex = startIndex.GetOffset(text.Length);
+ if ((uint)actualIndex > (uint)text.Length)
+ ThrowHelper.ThrowArgumentOutOfRangeException();
+
+ return new ReadOnlyMemory<byte>(text, actualIndex, text.Length - actualIndex);
+ }
+
+ /// <summary>Creates a new <see cref="ReadOnlyMemory{T}"/> over the portion of the target <see cref="Utf8String"/>.</summary>
+ /// <param name="text">The target <see cref="Utf8String"/>.</param>
+ /// <param name="start">The index at which to begin this slice.</param>
+ /// <param name="length">The desired length for the slice (exclusive).</param>
+ /// <remarks>Returns default when <paramref name="text"/> is null.</remarks>
+ /// <exception cref="System.ArgumentOutOfRangeException">
+ /// Thrown when the specified <paramref name="start"/> index or <paramref name="length"/> is not in range.
+ /// </exception>
+ public static ReadOnlyMemory<byte> AsMemoryBytes(this Utf8String text, int start, int length)
+ {
+ if (text == null)
+ {
+ if (start != 0 || length != 0)
+ ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.start);
+ return default;
+ }
+
+#if BIT64
+ // See comment in Span<T>.Slice for how this works.
+ if ((ulong)(uint)start + (ulong)(uint)length > (ulong)(uint)text.Length)
+ ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.start);
+#else
+ if ((uint)start > (uint)text.Length || (uint)length > (uint)(text.Length - start))
+ ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.start);
+#endif
+
+ return new ReadOnlyMemory<byte>(text, start, length);
+ }
+
+ /// <summary>Creates a new <see cref="ReadOnlyMemory{T}"/> over the portion of the target <see cref="Utf8String"/>.</summary>
+ /// <param name="text">The target <see cref="Utf8String"/>.</param>
+ /// <param name="range">The range used to indicate the start and length of the sliced string.</param>
+ public static ReadOnlyMemory<byte> AsMemoryBytes(this Utf8String text, Range range)
+ {
+ if (text == null)
+ {
+ Index startIndex = range.Start;
+ Index endIndex = range.End;
+
+ if (!startIndex.Equals(Index.Start) || !endIndex.Equals(Index.Start))
+ ThrowHelper.ThrowArgumentNullException(ExceptionArgument.text);
+
+ return default;
+ }
+
+ (int start, int length) = range.GetOffsetAndLength(text.Length);
+ return new ReadOnlyMemory<byte>(text, start, length);
+ }
+ }
+}
diff --git a/src/System.Private.CoreLib/src/System/Utf8String.Construction.cs b/src/System.Private.CoreLib/src/System/Utf8String.Construction.cs
new file mode 100644
index 0000000000..9ecd44f3ae
--- /dev/null
+++ b/src/System.Private.CoreLib/src/System/Utf8String.Construction.cs
@@ -0,0 +1,223 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Text;
+using System.Text.Unicode;
+
+namespace System
+{
+ public sealed partial class Utf8String
+ {
+ /*
+ * CONSTRUCTORS
+ *
+ * Defining a new constructor for string-like types (like Utf8String) requires changes both
+ * to the managed code below and to the native VM code. See the comment at the top of
+ * src/vm/ecall.cpp for instructions on how to add new overloads.
+ *
+ * The default behavior of each ctor is to validate the input, replacing invalid sequences with the
+ * Unicode replacement character U+FFFD. The resulting Utf8String instance will be well-formed but
+ * might not have full fidelity with the input data. This behavior can be controlled by calling
+ * any of the Create instances and specifying a different action.
+ */
+
+ /// <summary>
+ /// Creates a <see cref="Utf8String"/> instance from existing UTF-8 data.
+ /// </summary>
+ /// <remarks>
+ /// The UTF-8 data in <paramref name="value"/> is validated for well-formedness upon construction.
+ /// Invalid code unit sequences are replaced with U+FFFD in the resulting <see cref="Utf8String"/>.
+ /// </remarks>
+ [MethodImpl(MethodImplOptions.InternalCall)]
+ public extern Utf8String(ReadOnlySpan<byte> value);
+
+#if PROJECTN
+ [DependencyReductionRoot]
+#endif
+#if !CORECLR
+ static
+#endif
+ private Utf8String Ctor(ReadOnlySpan<byte> value)
+ {
+ if (value.IsEmpty)
+ {
+ return Empty;
+ }
+
+ Utf8String newString = FastAllocate(value.Length);
+ Buffer.Memmove(ref newString.DangerousGetMutableReference(), ref MemoryMarshal.GetReference(value), (uint)value.Length);
+ return Utf8Utility.ValidateAndFixupUtf8String(newString);
+ }
+
+ /// <summary>
+ /// Creates a <see cref="Utf8String"/> instance from existing UTF-8 data.
+ /// </summary>
+ /// <remarks>
+ /// The UTF-8 data in <paramref name="value"/> is validated for well-formedness upon construction.
+ /// Invalid code unit sequences are replaced with U+FFFD in the resulting <see cref="Utf8String"/>.
+ /// </remarks>
+ [MethodImpl(MethodImplOptions.InternalCall)]
+ public extern Utf8String(byte[] value, int startIndex, int length);
+
+#if PROJECTN
+ [DependencyReductionRoot]
+#endif
+#if !CORECLR
+ static
+#endif
+ private Utf8String Ctor(byte[] value, int startIndex, int length) => Ctor(new ReadOnlySpan<byte>(value, startIndex, length));
+
+ /// <summary>
+ /// Creates a <see cref="Utf8String"/> instance from existing null-terminated UTF-8 data.
+ /// </summary>
+ /// <remarks>
+ /// The UTF-8 data in <paramref name="value"/> is validated for well-formedness upon construction.
+ /// Invalid code unit sequences are replaced with U+FFFD in the resulting <see cref="Utf8String"/>.
+ /// </remarks>
+ [MethodImpl(MethodImplOptions.InternalCall)]
+ [CLSCompliant(false)]
+ public unsafe extern Utf8String(byte* value);
+
+#if PROJECTN
+ [DependencyReductionRoot]
+#endif
+#if !CORECLR
+ static
+#endif
+ private unsafe Utf8String Ctor(byte* value)
+ {
+ if (value == null)
+ {
+ return Empty;
+ }
+
+ return Ctor(new ReadOnlySpan<byte>(value, string.strlen(value)));
+ }
+
+ /// <summary>
+ /// Creates a <see cref="Utf8String"/> instance from existing UTF-16 data.
+ /// </summary>
+ /// <remarks>
+ /// The UTF-16 data in <paramref name="value"/> is validated for well-formedness upon construction.
+ /// Invalid code unit sequences are replaced with U+FFFD in the resulting <see cref="Utf8String"/>.
+ /// </remarks>
+ [MethodImpl(MethodImplOptions.InternalCall)]
+ public extern Utf8String(ReadOnlySpan<char> value);
+
+#if PROJECTN
+ [DependencyReductionRoot]
+#endif
+#if !CORECLR
+ static
+#endif
+ private Utf8String Ctor(ReadOnlySpan<char> value)
+ {
+ if (value.IsEmpty)
+ {
+ return Empty;
+ }
+
+ // TODO_UTF8STRING: Call into optimized transcoding routine when it's available.
+
+ Utf8String newString = FastAllocate(Encoding.UTF8.GetByteCount(value));
+ Encoding.UTF8.GetBytes(value, new Span<byte>(ref newString.DangerousGetMutableReference(), newString.Length));
+ return newString;
+ }
+
+ /// <summary>
+ /// Creates a <see cref="Utf8String"/> instance from existing UTF-16 data.
+ /// </summary>
+ /// <remarks>
+ /// The UTF-16 data in <paramref name="value"/> is validated for well-formedness upon construction.
+ /// Invalid code unit sequences are replaced with U+FFFD in the resulting <see cref="Utf8String"/>.
+ /// </remarks>
+ [MethodImpl(MethodImplOptions.InternalCall)]
+ public extern Utf8String(char[] value, int startIndex, int length);
+
+#if PROJECTN
+ [DependencyReductionRoot]
+#endif
+#if !CORECLR
+ static
+#endif
+ private Utf8String Ctor(char[] value, int startIndex, int length) => Ctor(new ReadOnlySpan<char>(value, startIndex, length));
+
+ /// <summary>
+ /// Creates a <see cref="Utf8String"/> instance from existing null-terminated UTF-16 data.
+ /// </summary>
+ /// <remarks>
+ /// The UTF-16 data in <paramref name="value"/> is validated for well-formedness upon construction.
+ /// Invalid code unit sequences are replaced with U+FFFD in the resulting <see cref="Utf8String"/>.
+ /// </remarks>
+ [MethodImpl(MethodImplOptions.InternalCall)]
+ [CLSCompliant(false)]
+ public unsafe extern Utf8String(char* value);
+
+#if PROJECTN
+ [DependencyReductionRoot]
+#endif
+#if !CORECLR
+ static
+#endif
+ private unsafe Utf8String Ctor(char* value)
+ {
+ if (value == null)
+ {
+ return Empty;
+ }
+
+ return Ctor(new ReadOnlySpan<char>(value, string.wcslen(value)));
+ }
+
+ /// <summary>
+ /// Creates a <see cref="Utf8String"/> instance from existing UTF-16 data.
+ /// </summary>
+ /// <remarks>
+ /// The UTF-16 data in <paramref name="value"/> is validated for well-formedness upon construction.
+ /// Invalid code unit sequences are replaced with U+FFFD in the resulting <see cref="Utf8String"/>.
+ /// </remarks>
+ [MethodImpl(MethodImplOptions.InternalCall)]
+ public extern Utf8String(string value);
+
+#if PROJECTN
+ [DependencyReductionRoot]
+#endif
+#if !CORECLR
+ static
+#endif
+ private Utf8String Ctor(string value) => Ctor(value.AsSpan());
+
+ /*
+ * HELPER METHODS
+ */
+
+ /// <summary>
+ /// Creates a <see cref="Utf8String"/> instance from existing data, bypassing validation.
+ /// Also allows the caller to set flags dictating various attributes of the data.
+ /// </summary>
+ internal static Utf8String DangerousCreateWithoutValidation(ReadOnlySpan<byte> utf8Data, bool assumeWellFormed = false, bool assumeAscii = false)
+ {
+ if (utf8Data.IsEmpty)
+ {
+ return Empty;
+ }
+
+ Utf8String newString = FastAllocate(utf8Data.Length);
+ utf8Data.CopyTo(new Span<byte>(ref newString.DangerousGetMutableReference(), newString.Length));
+ return newString;
+ }
+
+ /// <summary>
+ /// Creates a new zero-initialized instance of the specified length. Actual storage allocated is "length + 1" bytes
+ /// because instances are null-terminated.
+ /// </summary>
+ /// <remarks>
+ /// The implementation of this method checks its input argument for overflow.
+ /// </remarks>
+ [MethodImpl(MethodImplOptions.InternalCall)]
+ private static extern Utf8String FastAllocate(int length);
+ }
+}
diff --git a/src/System.Private.CoreLib/src/System/Utf8String.Manipulation.cs b/src/System.Private.CoreLib/src/System/Utf8String.Manipulation.cs
new file mode 100644
index 0000000000..6e5209962f
--- /dev/null
+++ b/src/System.Private.CoreLib/src/System/Utf8String.Manipulation.cs
@@ -0,0 +1,109 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System.Diagnostics;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Text;
+
+namespace System
+{
+ public sealed partial class Utf8String
+ {
+ /// <summary>
+ /// Substrings this <see cref="Utf8String"/> without bounds checking.
+ /// </summary>
+ private Utf8String InternalSubstring(int startIndex, int length)
+ {
+ Debug.Assert(startIndex >= 0, "StartIndex cannot be negative.");
+ Debug.Assert(startIndex <= this.Length, "StartIndex cannot point beyond the end of the string (except to the null terminator).");
+ Debug.Assert(length >= 0, "Length cannot be negative.");
+ Debug.Assert(startIndex + length <= this.Length, "StartIndex and Length cannot point beyond the end of the string.");
+
+ Debug.Assert(startIndex != 0 && startIndex != this.Length, "Caller should handle StartIndex boundary conditions.");
+ Debug.Assert(length != 0 && length != this.Length, "Caller should handle Length boundary conditions.");
+
+ Utf8String newString = FastAllocate(length);
+ Buffer.Memmove(ref newString.DangerousGetMutableReference(), ref this.DangerousGetMutableReference(startIndex), (uint)length);
+ return newString;
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public Utf8String Substring(Index startIndex)
+ {
+ int actualIndex = startIndex.GetOffset(Length);
+ return Substring(actualIndex);
+ }
+
+ public Utf8String Substring(int startIndex)
+ {
+ if ((uint)startIndex > (uint)this.Length)
+ {
+ ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.startIndex);
+ }
+
+ // Optimizations: since instances are immutable, we can return 'this' or the known
+ // Empty instance if the caller passed us a startIndex at the string boundary.
+
+ if (startIndex == 0)
+ {
+ return this;
+ }
+
+ if (startIndex == Length)
+ {
+ return Empty;
+ }
+
+ return InternalSubstring(startIndex, Length - startIndex);
+ }
+
+ public Utf8String Substring(int startIndex, int length)
+ {
+ ValidateStartIndexAndLength(startIndex, length);
+
+ // Optimizations: since instances are immutable, we can return 'this' or the known
+ // Empty instance if the caller passed us a startIndex at the string boundary.
+
+ if (length == 0)
+ {
+ return Empty;
+ }
+
+ if (length == this.Length)
+ {
+ return this;
+ }
+
+ return InternalSubstring(startIndex, length);
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public Utf8String Substring(Range range)
+ {
+ (int start, int length) = range.GetOffsetAndLength(Length);
+ return Substring(start, length);
+ }
+
+ [StackTraceHidden]
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private void ValidateStartIndexAndLength(int startIndex, int length)
+ {
+#if BIT64
+ // See comment in Span<T>.Slice for how this works.
+ if ((ulong)(uint)startIndex + (ulong)(uint)length > (ulong)(uint)this.Length)
+ ValidateStartIndexAndLength_Throw(startIndex, length);
+#else
+ if ((uint)startIndex > (uint)this.Length || (uint)length > (uint)(this.Length - startIndex))
+ ValidateStartIndexAndLength_Throw(startIndex, length);
+#endif
+ }
+
+ [StackTraceHidden]
+ private void ValidateStartIndexAndLength_Throw(int startIndex, int length)
+ {
+ throw new ArgumentOutOfRangeException(paramName: ((uint)startIndex > (uint)this.Length) ? nameof(startIndex) : nameof(length));
+ }
+ }
+}
diff --git a/src/System.Private.CoreLib/src/System/Utf8String.Searching.cs b/src/System.Private.CoreLib/src/System/Utf8String.Searching.cs
new file mode 100644
index 0000000000..0373cdd4fd
--- /dev/null
+++ b/src/System.Private.CoreLib/src/System/Utf8String.Searching.cs
@@ -0,0 +1,93 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System.Runtime.InteropServices;
+using System.Text;
+using System.Text.Unicode;
+
+namespace System
+{
+ public sealed partial class Utf8String
+ {
+ // Ordinal search
+ public bool Contains(char value)
+ {
+ return Rune.TryCreate(value, out Rune result) && Contains(result);
+ }
+
+ // Ordinal search
+ public bool Contains(Rune value)
+ {
+ // TODO_UTF8STRING: This should be split into two methods:
+ // One which operates on a single-byte (ASCII) search value,
+ // the other which operates on a multi-byte (non-ASCII) search value.
+
+ Span<byte> runeBytes = stackalloc byte[Utf8Utility.MaxBytesPerScalar];
+ int runeBytesWritten = value.EncodeToUtf8(runeBytes);
+
+ return SpanHelpers.IndexOf(
+ ref DangerousGetMutableReference(), Length,
+ ref MemoryMarshal.GetReference(runeBytes), runeBytesWritten) >= 0;
+ }
+
+ // Ordinal search
+ public bool EndsWith(char value)
+ {
+ return Rune.TryCreate(value, out Rune result) && EndsWith(result);
+ }
+
+ // Ordinal search
+ public bool EndsWith(Rune value)
+ {
+ // TODO_UTF8STRING: This should be split into two methods:
+ // One which operates on a single-byte (ASCII) search value,
+ // the other which operates on a multi-byte (non-ASCII) search value.
+
+ Span<byte> runeBytes = stackalloc byte[Utf8Utility.MaxBytesPerScalar];
+ int runeBytesWritten = value.EncodeToUtf8(runeBytes);
+
+ return this.AsBytes().EndsWith(runeBytes.Slice(0, runeBytesWritten));
+ }
+
+ // Ordinal search
+ public int IndexOf(char value)
+ {
+ return Rune.TryCreate(value, out Rune result) ? IndexOf(result) : -1;
+ }
+
+ // Ordinal search
+ public int IndexOf(Rune value)
+ {
+ // TODO_UTF8STRING: This should be split into two methods:
+ // One which operates on a single-byte (ASCII) search value,
+ // the other which operates on a multi-byte (non-ASCII) search value.
+
+ Span<byte> runeBytes = stackalloc byte[Utf8Utility.MaxBytesPerScalar];
+ int runeBytesWritten = value.EncodeToUtf8(runeBytes);
+
+ return SpanHelpers.IndexOf(
+ ref DangerousGetMutableReference(), Length,
+ ref MemoryMarshal.GetReference(runeBytes), runeBytesWritten);
+ }
+
+ // Ordinal search
+ public bool StartsWith(char value)
+ {
+ return Rune.TryCreate(value, out Rune result) && StartsWith(result);
+ }
+
+ // Ordinal search
+ public bool StartsWith(Rune value)
+ {
+ // TODO_UTF8STRING: This should be split into two methods:
+ // One which operates on a single-byte (ASCII) search value,
+ // the other which operates on a multi-byte (non-ASCII) search value.
+
+ Span<byte> runeBytes = stackalloc byte[Utf8Utility.MaxBytesPerScalar];
+ int runeBytesWritten = value.EncodeToUtf8(runeBytes);
+
+ return this.AsBytes().StartsWith(runeBytes.Slice(0, runeBytesWritten));
+ }
+ }
+}
diff --git a/src/System.Private.CoreLib/src/System/Utf8String.cs b/src/System.Private.CoreLib/src/System/Utf8String.cs
new file mode 100644
index 0000000000..1a4357a06f
--- /dev/null
+++ b/src/System.Private.CoreLib/src/System/Utf8String.cs
@@ -0,0 +1,252 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System.ComponentModel;
+using System.Diagnostics;
+using System.Runtime.CompilerServices;
+using System.Text;
+using Internal.Runtime.CompilerServices;
+
+namespace System
+{
+ /// <summary>
+ /// Represents an immutable string of UTF-8 code units.
+ /// </summary>
+ public sealed partial class Utf8String : IEquatable<Utf8String>
+ {
+ /*
+ * STATIC FIELDS
+ */
+
+ public static readonly Utf8String Empty = FastAllocate(0);
+
+ /*
+ * INSTANCE FIELDS
+ * Do not reorder these fields. They must match the layout of Utf8StringObject in object.h.
+ */
+
+ private readonly int _length;
+ private readonly byte _firstByte;
+
+ /*
+ * OPERATORS
+ */
+
+ /// <summary>
+ /// Compares two <see cref="Utf8String"/> instances for equality using a <see cref="StringComparison.Ordinal"/> comparer.
+ /// </summary>
+ public static bool operator ==(Utf8String left, Utf8String right) => Equals(left, right);
+
+ /// <summary>
+ /// Compares two <see cref="Utf8String"/> instances for inequality using a <see cref="StringComparison.Ordinal"/> comparer.
+ /// </summary>
+ public static bool operator !=(Utf8String left, Utf8String right) => !Equals(left, right);
+
+ /// <summary>
+ /// Projects a <see cref="Utf8String"/> instance as a <see cref="ReadOnlySpan{Byte}"/>.
+ /// </summary>
+ public static explicit operator ReadOnlySpan<byte>(Utf8String value) => value.AsBytes();
+
+ /// <summary>
+ /// Projects a <see cref="Utf8String"/> instance as a <see cref="ReadOnlySpan{Char8}"/>.
+ /// </summary>
+ public static implicit operator ReadOnlySpan<Char8>(Utf8String value) => value.AsSpan();
+
+ /*
+ * INSTANCE PROPERTIES
+ */
+
+ /// <summary>
+ /// Returns the length (in UTF-8 code units) of this instance.
+ /// </summary>
+ public int Length => _length;
+
+ /*
+ * INSTANCE INDEXERS
+ */
+
+ /// <summary>
+ /// Gets the <see cref="Char8"/> at the specified position.
+ /// </summary>
+ public Char8 this[int index]
+ {
+ get
+ {
+ // Just like String, we don't allow indexing into the null terminator itself.
+
+ if ((uint)index >= (uint)Length)
+ {
+ ThrowHelper.ThrowArgumentOutOfRange_IndexException();
+ }
+
+ return Unsafe.Add(ref DangerousGetMutableReference(), index);
+ }
+ }
+
+ /// <summary>
+ /// Gets the <see cref="Char8"/> at the specified position.
+ /// </summary>
+ public Char8 this[Index index]
+ {
+ get
+ {
+ // Just like String, we don't allow indexing into the null terminator itself.
+
+ int actualIndex = index.GetOffset(Length);
+ return this[actualIndex];
+ }
+ }
+
+ /// <summary>
+ /// Gets a substring of this <see cref="Utf8String"/> based on the provided <paramref name="range"/>.
+ /// </summary>
+ public Utf8String this[Range range] => Substring(range);
+
+ /*
+ * METHODS
+ */
+
+ /// <summary>
+ /// Returns a <em>mutable</em> reference to the first byte of this <see cref="Utf8String"/>
+ /// (or the null terminator if the string is empty).
+ /// </summary>
+ /// <returns></returns>
+ internal ref byte DangerousGetMutableReference() => ref Unsafe.AsRef(in _firstByte);
+
+ /// <summary>
+ /// Returns a <em>mutable</em> reference to the element at index <paramref name="index"/>
+ /// of this <see cref="Utf8String"/> instance. The index is not bounds-checked.
+ /// </summary>
+ internal ref byte DangerousGetMutableReference(int index)
+ {
+ // Allow retrieving references to the null terminator.
+ Debug.Assert((uint)index <= (uint)Length, "Caller should've performed bounds checking.");
+
+ return ref Unsafe.Add(ref DangerousGetMutableReference(), index);
+ }
+
+ /// <summary>
+ /// Performs an equality comparison using a <see cref="StringComparison.Ordinal"/> comparer.
+ /// </summary>
+ public override bool Equals(object obj)
+ {
+ return obj is Utf8String other && this.Equals(other);
+ }
+
+ /// <summary>
+ /// Performs an equality comparison using a <see cref="StringComparison.Ordinal"/> comparer.
+ /// </summary>
+ public bool Equals(Utf8String value)
+ {
+ // First, a very quick check for referential equality.
+
+ if (ReferenceEquals(this, value))
+ {
+ return true;
+ }
+
+ // Otherwise, perform a simple bitwise equality check.
+
+ return !(value is null)
+ && this.Length == value.Length
+ && SpanHelpers.SequenceEqual(ref this.DangerousGetMutableReference(), ref value.DangerousGetMutableReference(), (uint)Length);
+ }
+
+ /// <summary>
+ /// Compares two <see cref="Utf8String"/> instances using a <see cref="StringComparison.Ordinal"/> comparer.
+ /// </summary>
+ public static bool Equals(Utf8String left, Utf8String right)
+ {
+ // First, a very quick check for referential equality.
+
+ if (ReferenceEquals(left, right))
+ {
+ return true;
+ }
+
+ // Otherwise, perform a simple bitwise equality check.
+
+ return !(left is null)
+ && !(right is null)
+ && left.Length == right.Length
+ && SpanHelpers.SequenceEqual(ref left.DangerousGetMutableReference(), ref right.DangerousGetMutableReference(), (uint)left.Length);
+ }
+
+ /// <summary>
+ /// Returns a hash code using a <see cref="StringComparison.Ordinal"/> comparison.
+ /// </summary>
+ public override int GetHashCode()
+ {
+ // TODO_UTF8STRING: Consider whether this should use a different seed than String.GetHashCode.
+
+ ulong seed = Marvin.DefaultSeed;
+ return Marvin.ComputeHash32(ref DangerousGetMutableReference(), _length /* in bytes */, (uint)seed, (uint)(seed >> 32));
+ }
+
+ /// <summary>
+ /// Gets an immutable reference that can be used in a <see langword="fixed"/> statement. The resulting
+ /// reference can be pinned and used as a null-terminated <em>LPCUTF8STR</em>.
+ /// </summary>
+ /// <remarks>
+ /// If this <see cref="Utf8String"/> instance is empty, returns a reference to the null terminator.
+ /// </remarks>
+ [EditorBrowsable(EditorBrowsableState.Never)] // for compiler use only
+ public ref readonly byte GetPinnableReference() => ref _firstByte;
+
+ /// <summary>
+ /// Returns <see langword="true"/> if <paramref name="value"/> is <see langword="null"/> or zero length;
+ /// <see langword="false"/> otherwise.
+ /// </summary>
+ public static bool IsNullOrEmpty(Utf8String value)
+ {
+ // Copied from String.IsNullOrEmpty. See that method for detailed comments on why this pattern is used.
+ return (value is null || 0u >= (uint)value.Length) ? true : false;
+ }
+
+ /// <summary>
+ /// Returns the entire <see cref="Utf8String"/> as an array of bytes.
+ /// </summary>
+ public byte[] ToByteArray()
+ {
+ if (Length == 0)
+ {
+ return Array.Empty<byte>();
+ }
+
+ byte[] bytes = new byte[Length];
+ Buffer.Memmove(ref bytes.GetRawSzArrayData(), ref DangerousGetMutableReference(), (uint)Length);
+ return bytes;
+ }
+
+ /// <summary>
+ /// Returns a substring of this <see cref="Utf8String"/> as an array of bytes.
+ /// </summary>
+ public byte[] ToByteArray(int startIndex, int length)
+ {
+ ValidateStartIndexAndLength(startIndex, length);
+
+ if (length == 0)
+ {
+ return Array.Empty<byte>();
+ }
+
+ byte[] bytes = new byte[length];
+ Buffer.Memmove(ref bytes.GetRawSzArrayData(), ref DangerousGetMutableReference(startIndex), (uint)length);
+ return bytes;
+ }
+
+ /// <summary>
+ /// Converts this <see cref="Utf8String"/> instance to a <see cref="string"/>.
+ /// </summary>
+ /// <remarks>
+ /// Invalid subsequences are replaced with U+FFFD during conversion.
+ /// </remarks>
+ public override string ToString()
+ {
+ // TODO_UTF8STRING: Call into optimized transcoding routine when it's available.
+
+ return Encoding.UTF8.GetString(new ReadOnlySpan<byte>(ref DangerousGetMutableReference(), Length));
+ }
+ }
+}
diff --git a/src/classlibnative/bcltype/objectnative.cpp b/src/classlibnative/bcltype/objectnative.cpp
index a90a37a692..64914d8807 100644
--- a/src/classlibnative/bcltype/objectnative.cpp
+++ b/src/classlibnative/bcltype/objectnative.cpp
@@ -253,6 +253,9 @@ FCIMPL1(Object*, ObjectNative::Clone, Object* pThisUNSAFE)
// assert that String has overloaded the Clone() method
_ASSERTE(pMT != g_pStringClass);
+#ifdef FEATURE_UTF8STRING
+ _ASSERTE(pMT != g_pUtf8StringClass);
+#endif // FEATURE_UTF8STRING
if (pMT->IsArray()) {
refClone = DupArrayForCloning((BASEARRAYREF)refThis);
diff --git a/src/inc/dacvars.h b/src/inc/dacvars.h
index fc5be15590..cec6d74dd7 100644
--- a/src/inc/dacvars.h
+++ b/src/inc/dacvars.h
@@ -168,6 +168,9 @@ DEFINE_DACVAR(ULONG, UNKNOWN_POINTER_TYPE, dac__g_pObjectClass, ::g_pObjectClass
DEFINE_DACVAR(ULONG, UNKNOWN_POINTER_TYPE, dac__g_pRuntimeTypeClass, ::g_pRuntimeTypeClass)
DEFINE_DACVAR(ULONG, UNKNOWN_POINTER_TYPE, dac__g_pCanonMethodTableClass, ::g_pCanonMethodTableClass)
DEFINE_DACVAR(ULONG, UNKNOWN_POINTER_TYPE, dac__g_pStringClass, ::g_pStringClass)
+#ifdef FEATURE_UTF8STRING
+DEFINE_DACVAR(ULONG, UNKNOWN_POINTER_TYPE, dac__g_pUtf8StringClass, ::g_pUtf8StringClass)
+#endif // FEATURE_UTF8STRING
DEFINE_DACVAR(ULONG, UNKNOWN_POINTER_TYPE, dac__g_pArrayClass, ::g_pArrayClass)
DEFINE_DACVAR(ULONG, UNKNOWN_POINTER_TYPE, dac__g_pSZArrayHelperClass, ::g_pSZArrayHelperClass)
DEFINE_DACVAR(ULONG, UNKNOWN_POINTER_TYPE, dac__g_pNullableClass, ::g_pNullableClass)
diff --git a/src/strongname/api/common.h b/src/strongname/api/common.h
index 26c545cff9..626d9bb720 100644
--- a/src/strongname/api/common.h
+++ b/src/strongname/api/common.h
@@ -146,6 +146,9 @@ typedef DPTR(class ReJitManager) PTR_ReJitManager;
typedef DPTR(struct ReJitInfo) PTR_ReJitInfo;
typedef DPTR(struct SharedReJitInfo) PTR_SharedReJitInfo;
typedef DPTR(class StringObject) PTR_StringObject;
+#ifdef FEATURE_UTF8STRING
+typedef DPTR(class Utf8StringObject) PTR_Utf8StringObject;
+#endif // FEATURE_UTF8STRING
typedef DPTR(class TypeHandle) PTR_TypeHandle;
#ifdef STUB_DISPATCH
typedef VPTR(class VirtualCallStubManager) PTR_VirtualCallStubManager;
diff --git a/src/vm/appdomain.cpp b/src/vm/appdomain.cpp
index 4eb716164c..9362dd9c7b 100644
--- a/src/vm/appdomain.cpp
+++ b/src/vm/appdomain.cpp
@@ -2485,6 +2485,11 @@ void SystemDomain::LoadBaseSystemClasses()
// Load String
g_pStringClass = MscorlibBinder::LoadPrimitiveType(ELEMENT_TYPE_STRING);
+#ifdef FEATURE_UTF8STRING
+ // Load Utf8String
+ g_pUtf8StringClass = MscorlibBinder::GetClass(CLASS__UTF8_STRING);
+#endif // FEATURE_UTF8STRING
+
// Used by Buffer::BlockCopy
g_pByteArrayMT = ClassLoader::LoadArrayTypeThrowing(
TypeHandle(MscorlibBinder::GetElementType(ELEMENT_TYPE_U1))).AsArray()->GetMethodTable();
diff --git a/src/vm/classnames.h b/src/vm/classnames.h
index cb71df362a..f45311f0de 100644
--- a/src/vm/classnames.h
+++ b/src/vm/classnames.h
@@ -139,6 +139,10 @@
#define g_ThreadClassName "System.Threading.Thread"
#define g_TypeClassName "System.Type"
+#ifdef FEATURE_UTF8STRING
+#define g_Utf8StringName "Utf8String"
+#endif // FEATURE_UTF8STRING
+
#define g_VariantClassName "System.Variant"
#define g_GuidClassName "System.Guid"
diff --git a/src/vm/common.h b/src/vm/common.h
index 2a91e77220..61ba2a7514 100644
--- a/src/vm/common.h
+++ b/src/vm/common.h
@@ -167,6 +167,9 @@ typedef DPTR(class ReJitManager) PTR_ReJitManager;
typedef DPTR(struct ReJitInfo) PTR_ReJitInfo;
typedef DPTR(struct SharedReJitInfo) PTR_SharedReJitInfo;
typedef DPTR(class StringObject) PTR_StringObject;
+#ifdef FEATURE_UTF8STRING
+typedef DPTR(class Utf8StringObject) PTR_Utf8StringObject;
+#endif // FEATURE_UTF8STRING
typedef DPTR(class TypeHandle) PTR_TypeHandle;
typedef VPTR(class VirtualCallStubManager) PTR_VirtualCallStubManager;
typedef VPTR(class VirtualCallStubManagerManager) PTR_VirtualCallStubManagerManager;
diff --git a/src/vm/ecall.cpp b/src/vm/ecall.cpp
index b8e0d64e8f..dfeff95d6f 100644
--- a/src/vm/ecall.cpp
+++ b/src/vm/ecall.cpp
@@ -29,6 +29,36 @@ extern const int c_nECClasses;
#endif // CROSSGEN_COMPILE
+/**********
+
+The constructors of string-like types (String, Utf8String) are special since the JIT will
+replace newobj instructions with calls to the corresponding 'Ctor' method. Depending on the
+CLR in use, the ctor methods may be instance methods (with a null 'this' parameter) or
+static methods. See the managed definitions of String.Ctor and Utf8String.Ctor for more
+information.
+
+To add a new ctor overload, in addition to defining the constructor and Ctor methods on
+the managed side, make changes to the following files. (These instructions are for
+Utf8String, but String is similar.)
+
+- src/vm/ecall.cpp (this file), update the definition of "NumberOfUtf8StringConstructors"
+ and add the appropriate static asserts immediately above the definition.
+
+- src/vm/ecall.h, search for "Utf8StringCtor" and add the DYNAMICALLY_ASSIGNED_FCALL_IMPL
+ definitions corresponding to the new overloads.
+
+- src/vm/ecalllist.h, search for "FCFuncStart(gUtf8StringFuncs)" and add the overloads
+ within that block.
+
+- src/vm/metasig.h, add the new Utf8String-returning metasig declarations; and, if necessary,
+ add any void-returning metasig declarations if they haven't already been defined elsewhere.
+ search "String_RetUtf8Str" for an example of how to do this.
+
+- src/vm/mscorlib.h, search "DEFINE_CLASS(UTF8_STRING" and add the new DEFINE_METHOD
+ declarations for the Utf8String-returning Ctor methods, referencing the new metasig declarations.
+
+**********/
+
// METHOD__STRING__CTORF_XXX has to be in same order as ECall::CtorCharXxx
#define METHOD__STRING__CTORF_FIRST METHOD__STRING__CTORF_CHARARRAY
static_assert_no_msg(METHOD__STRING__CTORF_FIRST + 0 == METHOD__STRING__CTORF_CHARARRAY);
@@ -55,14 +85,38 @@ static_assert_no_msg(ECallCtor_First + 8 == ECall::CtorSBytePtrStartLengthEncodi
#define NumberOfStringConstructors 9
+#ifdef FEATURE_UTF8STRING
+// METHOD__UTF8STRING__CTORF_XXX has to be in same order as ECall::Utf8StringCtorCharXxx
+#define METHOD__UTF8STRING__CTORF_FIRST METHOD__UTF8_STRING__CTORF_READONLYSPANOFBYTE
+static_assert_no_msg(METHOD__UTF8STRING__CTORF_FIRST + 0 == METHOD__UTF8_STRING__CTORF_READONLYSPANOFBYTE);
+static_assert_no_msg(METHOD__UTF8STRING__CTORF_FIRST + 1 == METHOD__UTF8_STRING__CTORF_READONLYSPANOFCHAR);
+static_assert_no_msg(METHOD__UTF8STRING__CTORF_FIRST + 2 == METHOD__UTF8_STRING__CTORF_BYTEARRAY_START_LEN);
+static_assert_no_msg(METHOD__UTF8STRING__CTORF_FIRST + 3 == METHOD__UTF8_STRING__CTORF_BYTEPTR);
+static_assert_no_msg(METHOD__UTF8STRING__CTORF_FIRST + 4 == METHOD__UTF8_STRING__CTORF_CHARARRAY_START_LEN);
+static_assert_no_msg(METHOD__UTF8STRING__CTORF_FIRST + 5 == METHOD__UTF8_STRING__CTORF_CHARPTR);
+static_assert_no_msg(METHOD__UTF8STRING__CTORF_FIRST + 6 == METHOD__UTF8_STRING__CTORF_STRING);
+
+// ECall::Utf8StringCtorCharXxx has to be in same order as METHOD__UTF8STRING__CTORF_XXX
+#define ECallUtf8String_Ctor_First ECall::Utf8StringCtorReadOnlySpanOfByteManaged
+static_assert_no_msg(ECallUtf8String_Ctor_First + 0 == ECall::Utf8StringCtorReadOnlySpanOfByteManaged);
+static_assert_no_msg(ECallUtf8String_Ctor_First + 1 == ECall::Utf8StringCtorReadOnlySpanOfCharManaged);
+static_assert_no_msg(ECallUtf8String_Ctor_First + 2 == ECall::Utf8StringCtorByteArrayStartLengthManaged);
+static_assert_no_msg(ECallUtf8String_Ctor_First + 3 == ECall::Utf8StringCtorBytePtrManaged);
+static_assert_no_msg(ECallUtf8String_Ctor_First + 4 == ECall::Utf8StringCtorCharArrayStartLengthManaged);
+static_assert_no_msg(ECallUtf8String_Ctor_First + 5 == ECall::Utf8StringCtorCharPtrManaged);
+static_assert_no_msg(ECallUtf8String_Ctor_First + 6 == ECall::Utf8StringCtorStringManaged);
+
+#define NumberOfUtf8StringConstructors 7
+#endif // FEATURE_UTF8STRING
+
void ECall::PopulateManagedStringConstructors()
{
STANDARD_VM_CONTRACT;
INDEBUG(static bool fInitialized = false);
_ASSERTE(!fInitialized); // assume this method is only called once
- _ASSERTE(g_pStringClass != NULL);
+ _ASSERTE(g_pStringClass != NULL);
for (int i = 0; i < NumberOfStringConstructors; i++)
{
MethodDesc* pMD = MscorlibBinder::GetMethod((BinderMethodID)(METHOD__STRING__CTORF_FIRST + i));
@@ -72,6 +126,20 @@ void ECall::PopulateManagedStringConstructors()
ECall::DynamicallyAssignFCallImpl(pDest, ECallCtor_First + i);
}
+
+#ifdef FEATURE_UTF8STRING
+ _ASSERTE(g_pUtf8StringClass != NULL);
+ for (int i = 0; i < NumberOfUtf8StringConstructors; i++)
+ {
+ MethodDesc* pMD = MscorlibBinder::GetMethod((BinderMethodID)(METHOD__UTF8STRING__CTORF_FIRST + i));
+ _ASSERTE(pMD != NULL);
+
+ PCODE pDest = pMD->GetMultiCallableAddrOfCode();
+
+ ECall::DynamicallyAssignFCallImpl(pDest, ECallUtf8String_Ctor_First + i);
+ }
+#endif // FEATURE_UTF8STRING
+
INDEBUG(fInitialized = true);
}
diff --git a/src/vm/ecall.h b/src/vm/ecall.h
index c809109c4c..58b4f0c34e 100644
--- a/src/vm/ecall.h
+++ b/src/vm/ecall.h
@@ -103,7 +103,7 @@ class ECall
static void EnumFCallMethods();
#endif // DACCESS_COMPILE
-#define DYNAMICALLY_ASSIGNED_FCALLS() \
+#define _DYNAMICALLY_ASSIGNED_FCALLS_BASE() \
DYNAMICALLY_ASSIGNED_FCALL_IMPL(FastAllocateString, FramedAllocateString) \
DYNAMICALLY_ASSIGNED_FCALL_IMPL(CtorCharArrayManaged, NULL) \
DYNAMICALLY_ASSIGNED_FCALL_IMPL(CtorCharArrayStartLengthManaged, NULL) \
@@ -116,6 +116,22 @@ class ECall
DYNAMICALLY_ASSIGNED_FCALL_IMPL(CtorSBytePtrStartLengthEncodingManaged, NULL) \
DYNAMICALLY_ASSIGNED_FCALL_IMPL(InternalGetCurrentThread, NULL) \
+#define _DYNAMICALLY_ASSIGNED_FCALLS_UTF8STRING() \
+ DYNAMICALLY_ASSIGNED_FCALL_IMPL(FastAllocateUtf8String, FramedAllocateUtf8String) \
+ DYNAMICALLY_ASSIGNED_FCALL_IMPL(Utf8StringCtorReadOnlySpanOfByteManaged, NULL) \
+ DYNAMICALLY_ASSIGNED_FCALL_IMPL(Utf8StringCtorReadOnlySpanOfCharManaged, NULL) \
+ DYNAMICALLY_ASSIGNED_FCALL_IMPL(Utf8StringCtorByteArrayStartLengthManaged, NULL) \
+ DYNAMICALLY_ASSIGNED_FCALL_IMPL(Utf8StringCtorBytePtrManaged, NULL) \
+ DYNAMICALLY_ASSIGNED_FCALL_IMPL(Utf8StringCtorCharArrayStartLengthManaged, NULL) \
+ DYNAMICALLY_ASSIGNED_FCALL_IMPL(Utf8StringCtorCharPtrManaged, NULL) \
+ DYNAMICALLY_ASSIGNED_FCALL_IMPL(Utf8StringCtorStringManaged, NULL) \
+
+#ifdef FEATURE_UTF8STRING
+#define DYNAMICALLY_ASSIGNED_FCALLS() _DYNAMICALLY_ASSIGNED_FCALLS_BASE() _DYNAMICALLY_ASSIGNED_FCALLS_UTF8STRING()
+#else
+#define DYNAMICALLY_ASSIGNED_FCALLS() _DYNAMICALLY_ASSIGNED_FCALLS_BASE()
+#endif // FEATURE_UTF8STRING
+
enum
{
#undef DYNAMICALLY_ASSIGNED_FCALL_IMPL
diff --git a/src/vm/ecalllist.h b/src/vm/ecalllist.h
index b44669ea75..7302bb4e4a 100644
--- a/src/vm/ecalllist.h
+++ b/src/vm/ecalllist.h
@@ -116,6 +116,19 @@ FCFuncStart(gStringFuncs)
FCFuncElement("Intern", AppDomainNative::GetOrInternString)
FCFuncEnd()
+#ifdef FEATURE_UTF8STRING
+FCFuncStart(gUtf8StringFuncs)
+ FCDynamic("FastAllocate", CORINFO_INTRINSIC_Illegal, ECall::FastAllocateUtf8String)
+ FCDynamicSig(COR_CTOR_METHOD_NAME, &gsig_IM_ReadOnlySpanOfByte_RetVoid, CORINFO_INTRINSIC_Illegal, ECall::Utf8StringCtorReadOnlySpanOfByteManaged)
+ FCDynamicSig(COR_CTOR_METHOD_NAME, &gsig_IM_ReadOnlySpanOfChar_RetVoid, CORINFO_INTRINSIC_Illegal, ECall::Utf8StringCtorReadOnlySpanOfCharManaged)
+ FCDynamicSig(COR_CTOR_METHOD_NAME, &gsig_IM_ArrByte_Int_Int_RetVoid, CORINFO_INTRINSIC_Illegal, ECall::Utf8StringCtorByteArrayStartLengthManaged)
+ FCDynamicSig(COR_CTOR_METHOD_NAME, &gsig_IM_PtrByte_RetVoid, CORINFO_INTRINSIC_Illegal, ECall::Utf8StringCtorBytePtrManaged)
+ FCDynamicSig(COR_CTOR_METHOD_NAME, &gsig_IM_ArrChar_Int_Int_RetVoid, CORINFO_INTRINSIC_Illegal, ECall::Utf8StringCtorCharArrayStartLengthManaged)
+ FCDynamicSig(COR_CTOR_METHOD_NAME, &gsig_IM_PtrChar_RetVoid, CORINFO_INTRINSIC_Illegal, ECall::Utf8StringCtorCharPtrManaged)
+ FCDynamicSig(COR_CTOR_METHOD_NAME, &gsig_IM_Str_RetVoid, CORINFO_INTRINSIC_Illegal, ECall::Utf8StringCtorStringManaged)
+FCFuncEnd()
+#endif // FEATURE_UTF8STRING
+
FCFuncStart(gValueTypeFuncs)
FCFuncElement("CanCompareBits", ValueTypeHelper::CanCompareBits)
FCFuncElement("FastEqualsCheck", ValueTypeHelper::FastEqualsCheck)
@@ -1270,6 +1283,9 @@ FCClassElement("TypedReference", "System", gTypedReferenceFuncs)
#ifdef FEATURE_COMINTEROP
FCClassElement("UriMarshaler", "System.StubHelpers", gUriMarshalerFuncs)
#endif
+#ifdef FEATURE_UTF8STRING
+FCClassElement("Utf8String", "System", gUtf8StringFuncs)
+#endif // FEATURE_UTF8STRING
FCClassElement("ValueClassMarshaler", "System.StubHelpers", gValueClassMarshalerFuncs)
FCClassElement("ValueType", "System", gValueTypeFuncs)
#ifdef FEATURE_COMINTEROP
diff --git a/src/vm/gchelpers.cpp b/src/vm/gchelpers.cpp
index a52e10bb4f..af3a1602b3 100644
--- a/src/vm/gchelpers.cpp
+++ b/src/vm/gchelpers.cpp
@@ -981,6 +981,8 @@ STRINGREF SlowAllocateString( DWORD cchStringLength )
// Limit the maximum string size to <2GB to mitigate risk of security issues caused by 32-bit integer
// overflows in buffer size calculations.
+ //
+ // If the value below is changed, also change SlowAllocateUtf8String.
if (cchStringLength > 0x3FFFFFDF)
ThrowOutOfMemory();
@@ -1028,6 +1030,81 @@ STRINGREF SlowAllocateString( DWORD cchStringLength )
return( ObjectToSTRINGREF(orObject) );
}
+#ifdef FEATURE_UTF8STRING
+UTF8STRINGREF SlowAllocateUtf8String(DWORD cchStringLength)
+{
+ CONTRACTL{
+ THROWS;
+ GC_TRIGGERS;
+ MODE_COOPERATIVE; // returns an objref without pinning it => cooperative
+ } CONTRACTL_END;
+
+ Utf8StringObject *orObject = NULL;
+
+#ifdef _DEBUG
+ if (g_pConfig->ShouldInjectFault(INJECTFAULT_GCHEAP))
+ {
+ char *a = new char;
+ delete a;
+ }
+#endif
+
+ // Limit the maximum string size to <2GB to mitigate risk of security issues caused by 32-bit integer
+ // overflows in buffer size calculations.
+ //
+ // 0x7FFFFFBF is derived from the const 0x3FFFFFDF in SlowAllocateString.
+ // Adding +1 (for null terminator) and multiplying by sizeof(WCHAR) means that
+ // SlowAllocateString allows a maximum of 0x7FFFFFC0 bytes to be used for the
+ // string data itself, with some additional buffer for object headers and other
+ // data. Since we don't have the sizeof(WCHAR) multiplication here, we only need
+ // -1 to account for the null terminator, leading to a max size of 0x7FFFFFBF.
+ if (cchStringLength > 0x7FFFFFBF)
+ ThrowOutOfMemory();
+
+ SIZE_T ObjectSize = PtrAlign(Utf8StringObject::GetSize(cchStringLength));
+ _ASSERTE(ObjectSize > cchStringLength);
+
+ SetTypeHandleOnThreadForAlloc(TypeHandle(g_pUtf8StringClass));
+
+ orObject = (Utf8StringObject *)Alloc(ObjectSize, FALSE, FALSE);
+
+ // Object is zero-init already
+ _ASSERTE(orObject->HasEmptySyncBlockInfo());
+
+ // Initialize Object
+ orObject->SetMethodTable(g_pUtf8StringClass);
+ orObject->SetLength(cchStringLength);
+
+ if (ObjectSize >= LARGE_OBJECT_SIZE)
+ {
+ GCHeapUtilities::GetGCHeap()->PublishObject((BYTE*)orObject);
+ }
+
+ // Notify the profiler of the allocation
+ if (TrackAllocations())
+ {
+ OBJECTREF objref = ObjectToOBJECTREF((Object*)orObject);
+ GCPROTECT_BEGIN(objref);
+ ProfilerObjectAllocatedCallback(objref, (ClassID)orObject->GetTypeHandle().AsPtr());
+ GCPROTECT_END();
+
+ orObject = (Utf8StringObject *)OBJECTREFToObject(objref);
+ }
+
+#ifdef FEATURE_EVENT_TRACE
+ // Send ETW event for allocation
+ if (ETW::TypeSystemLog::IsHeapAllocEventEnabled())
+ {
+ ETW::TypeSystemLog::SendObjectAllocatedEvent(orObject);
+ }
+#endif // FEATURE_EVENT_TRACE
+
+ LogAlloc(ObjectSize, g_pUtf8StringClass, orObject);
+
+ return( ObjectToUTF8STRINGREF(orObject) );
+}
+#endif // FEATURE_UTF8STRING
+
#ifdef FEATURE_COMINTEROP_UNMANAGED_ACTIVATION
// OBJECTREF AllocateComClassObject(ComClassFactory* pComClsFac)
void AllocateComClassObject(ComClassFactory* pComClsFac, OBJECTREF* ppRefClass)
diff --git a/src/vm/gchelpers.h b/src/vm/gchelpers.h
index 0e407c6e61..8f6a16ade9 100644
--- a/src/vm/gchelpers.h
+++ b/src/vm/gchelpers.h
@@ -71,6 +71,10 @@ STRINGREF AllocateString( DWORD cchStringLength );
// The slow version, implemented in gcscan.cpp
STRINGREF SlowAllocateString( DWORD cchStringLength );
+#ifdef FEATURE_UTF8STRING
+UTF8STRINGREF SlowAllocateUtf8String( DWORD cchStringLength );
+#endif // FEATURE_UTF8STRING
+
#else
// On other platforms, go to the (somewhat less efficient) implementations in gcscan.cpp
@@ -83,6 +87,10 @@ OBJECTREF AllocateObjectArray(DWORD cElements, TypeHandle ElementType, BOOL bAll
STRINGREF SlowAllocateString( DWORD cchStringLength );
+#ifdef FEATURE_UTF8STRING
+UTF8STRINGREF SlowAllocateUtf8String( DWORD cchStringLength );
+#endif // FEATURE_UTF8STRING
+
inline STRINGREF AllocateString( DWORD cchStringLength )
{
WRAPPER_NO_CONTRACT;
@@ -92,6 +100,15 @@ inline STRINGREF AllocateString( DWORD cchStringLength )
#endif
+#ifdef FEATURE_UTF8STRING
+inline UTF8STRINGREF AllocateUtf8String(DWORD cchStringLength)
+{
+ WRAPPER_NO_CONTRACT;
+
+ return SlowAllocateUtf8String(cchStringLength);
+}
+#endif // FEATURE_UTF8STRING
+
OBJECTREF DupArrayForCloning(BASEARRAYREF pRef, BOOL bAllocateInLargeHeap = FALSE);
// The JIT requests the EE to specify an allocation helper to use at each new-site.
diff --git a/src/vm/jithelpers.cpp b/src/vm/jithelpers.cpp
index 303f06130f..0576ca7336 100644
--- a/src/vm/jithelpers.cpp
+++ b/src/vm/jithelpers.cpp
@@ -2895,6 +2895,61 @@ HCIMPL1(StringObject*, AllocateString_MP_FastPortable, DWORD stringLength)
}
HCIMPLEND
+#ifdef FEATURE_UTF8STRING
+HCIMPL1(Utf8StringObject*, AllocateUtf8String_MP_FastPortable, DWORD stringLength)
+{
+ FCALL_CONTRACT;
+
+ do
+ {
+ _ASSERTE(GCHeapUtilities::UseThreadAllocationContexts());
+
+ // Instead of doing elaborate overflow checks, we just limit the number of elements. This will avoid all overflow
+ // problems, as well as making sure big string objects are correctly allocated in the big object heap.
+ if (stringLength >= LARGE_OBJECT_SIZE - 256)
+ {
+ break;
+ }
+
+ // This is typically the only call in the fast path. Making the call early seems to be better, as it allows the compiler
+ // to use volatile registers for intermediate values. This reduces the number of push/pop instructions and eliminates
+ // some reshuffling of intermediate values into nonvolatile registers around the call.
+ Thread *thread = GetThread();
+
+ SIZE_T totalSize = Utf8StringObject::GetSize(stringLength);
+
+ // The method table's base size includes space for a terminating null character
+ _ASSERTE(totalSize >= g_pUtf8StringClass->GetBaseSize());
+ _ASSERTE(totalSize - g_pUtf8StringClass->GetBaseSize() == stringLength);
+
+ SIZE_T alignedTotalSize = ALIGN_UP(totalSize, DATA_ALIGNMENT);
+ _ASSERTE(alignedTotalSize >= totalSize);
+ totalSize = alignedTotalSize;
+
+ gc_alloc_context *allocContext = thread->GetAllocContext();
+ BYTE *allocPtr = allocContext->alloc_ptr;
+ _ASSERTE(allocPtr <= allocContext->alloc_limit);
+ if (totalSize > static_cast<SIZE_T>(allocContext->alloc_limit - allocPtr))
+ {
+ break;
+ }
+ allocContext->alloc_ptr = allocPtr + totalSize;
+
+ _ASSERTE(allocPtr != nullptr);
+ Utf8StringObject *stringObject = reinterpret_cast<Utf8StringObject *>(allocPtr);
+ stringObject->SetMethodTable(g_pUtf8StringClass);
+ stringObject->SetLength(stringLength);
+
+ return stringObject;
+ } while (false);
+
+ // Tail call to the slow helper
+ ENDFORBIDGC();
+ return HCCALL1(FramedAllocateUtf8String, stringLength);
+}
+HCIMPLEND
+#endif // FEATURE_UTF8STRING
+
#include <optdefault.h>
/*********************************************************************/
@@ -2933,6 +2988,22 @@ HCIMPL1(StringObject*, FramedAllocateString, DWORD stringLength)
}
HCIMPLEND
+#ifdef FEATURE_UTF8STRING
+HCIMPL1(Utf8StringObject*, FramedAllocateUtf8String, DWORD stringLength)
+{
+ FCALL_CONTRACT;
+
+ UTF8STRINGREF result = NULL;
+ HELPER_METHOD_FRAME_BEGIN_RET_0(); // Set up a frame
+
+ result = SlowAllocateUtf8String(stringLength);
+
+ HELPER_METHOD_FRAME_END();
+ return((Utf8StringObject*) OBJECTREFToObject(result));
+}
+HCIMPLEND
+#endif // FEATURE_UTF8STRING
+
/*********************************************************************/
OBJECTHANDLE ConstructStringLiteral(CORINFO_MODULE_HANDLE scopeHnd, mdToken metaTok)
{
diff --git a/src/vm/jitinterface.cpp b/src/vm/jitinterface.cpp
index af5fdbac33..b3ede3baa2 100644
--- a/src/vm/jitinterface.cpp
+++ b/src/vm/jitinterface.cpp
@@ -7514,6 +7514,9 @@ bool getILIntrinsicImplementationForRuntimeHelpers(MethodDesc * ftn,
if (methodTable == MscorlibBinder::GetClass(CLASS__BOOLEAN)
|| methodTable == MscorlibBinder::GetClass(CLASS__BYTE)
|| methodTable == MscorlibBinder::GetClass(CLASS__SBYTE)
+#ifdef FEATURE_UTF8STRING
+ || methodTable == MscorlibBinder::GetClass(CLASS__CHAR8)
+#endif // FEATURE_UTF8STRING
|| methodTable == MscorlibBinder::GetClass(CLASS__CHAR)
|| methodTable == MscorlibBinder::GetClass(CLASS__INT16)
|| methodTable == MscorlibBinder::GetClass(CLASS__UINT16)
diff --git a/src/vm/jitinterface.h b/src/vm/jitinterface.h
index fe7dd4a922..af42bd29ab 100644
--- a/src/vm/jitinterface.h
+++ b/src/vm/jitinterface.h
@@ -231,6 +231,11 @@ extern FCDECL1(StringObject*, AllocateString_MP_FastPortable, DWORD stringLength
extern FCDECL1(StringObject*, UnframedAllocateString, DWORD stringLength);
extern FCDECL1(StringObject*, FramedAllocateString, DWORD stringLength);
+#ifdef FEATURE_UTF8STRING
+extern FCDECL1(Utf8StringObject*, AllocateUtf8String_MP_FastPortable, DWORD stringLength);
+extern FCDECL1(Utf8StringObject*, FramedAllocateUtf8String, DWORD stringLength);
+#endif // FEATURE_UTF8STRING
+
extern FCDECL2(Object*, JIT_NewArr1VC_MP_FastPortable, CORINFO_CLASS_HANDLE arrayMT, INT_PTR size);
extern FCDECL2(Object*, JIT_NewArr1OBJ_MP_FastPortable, CORINFO_CLASS_HANDLE arrayMT, INT_PTR size);
extern FCDECL2(Object*, JIT_NewArr1_R2R, CORINFO_CLASS_HANDLE arrayTypeHnd_, INT_PTR size);
diff --git a/src/vm/jitinterfacegen.cpp b/src/vm/jitinterfacegen.cpp
index f86011d3ef..3a5b618c26 100644
--- a/src/vm/jitinterfacegen.cpp
+++ b/src/vm/jitinterfacegen.cpp
@@ -80,6 +80,9 @@ void InitJITHelpers1()
SetJitHelperFunction(CORINFO_HELP_NEWARR_1_OBJ, JIT_NewArr1OBJ_MP_FastPortable);
ECall::DynamicallyAssignFCallImpl(GetEEFuncEntryPoint(AllocateString_MP_FastPortable), ECall::FastAllocateString);
+#ifdef FEATURE_UTF8STRING
+ ECall::DynamicallyAssignFCallImpl(GetEEFuncEntryPoint(AllocateUtf8String_MP_FastPortable), ECall::FastAllocateUtf8String);
+#endif // FEATURE_UTF8STRING
#else // FEATURE_PAL
// if (multi-proc || server GC)
if (GCHeapUtilities::UseThreadAllocationContexts())
@@ -91,6 +94,9 @@ void InitJITHelpers1()
SetJitHelperFunction(CORINFO_HELP_NEWARR_1_OBJ, JIT_NewArr1OBJ_MP_InlineGetThread);
ECall::DynamicallyAssignFCallImpl(GetEEFuncEntryPoint(AllocateStringFastMP_InlineGetThread), ECall::FastAllocateString);
+#ifdef FEATURE_UTF8STRING
+ ECall::DynamicallyAssignFCallImpl(GetEEFuncEntryPoint(AllocateUtf8String_MP_FastPortable), ECall::FastAllocateUtf8String);
+#endif // FEATURE_UTF8STRING
}
else
{
@@ -105,6 +111,9 @@ void InitJITHelpers1()
SetJitHelperFunction(CORINFO_HELP_NEWARR_1_OBJ, JIT_NewArr1OBJ_UP);
ECall::DynamicallyAssignFCallImpl(GetEEFuncEntryPoint(AllocateStringFastUP), ECall::FastAllocateString);
+#ifdef FEATURE_UTF8STRING
+ ECall::DynamicallyAssignFCallImpl(GetEEFuncEntryPoint(AllocateUtf8String_MP_FastPortable), ECall::FastAllocateUtf8String);
+#endif // FEATURE_UTF8STRING
}
#endif // FEATURE_PAL
}
diff --git a/src/vm/marshalnative.cpp b/src/vm/marshalnative.cpp
index 334a4a88e4..23df97dcb7 100644
--- a/src/vm/marshalnative.cpp
+++ b/src/vm/marshalnative.cpp
@@ -266,6 +266,11 @@ FCIMPL1(FC_BOOL_RET, MarshalNative::IsPinnable, Object* obj)
if (obj->GetMethodTable() == g_pStringClass)
FC_RETURN_BOOL(TRUE);
+#ifdef FEATURE_UTF8STRING
+ if (obj->GetMethodTable() == g_pUtf8StringClass)
+ FC_RETURN_BOOL(TRUE);
+#endif // FEATURE_UTF8STRING
+
if (obj->GetMethodTable()->IsArray())
{
BASEARRAYREF asArray = (BASEARRAYREF)ObjectToOBJECTREF(obj);
@@ -527,6 +532,11 @@ void ValidatePinnedObject(OBJECTREF obj)
if (obj->GetMethodTable() == g_pStringClass)
return;
+#ifdef FEATURE_UTF8STRING
+ if (obj->GetMethodTable() == g_pUtf8StringClass)
+ return;
+#endif // FEATURE_UTF8STRING
+
if (obj->GetMethodTable()->IsArray())
{
BASEARRAYREF asArray = (BASEARRAYREF) obj;
diff --git a/src/vm/metasig.h b/src/vm/metasig.h
index 5321fd3ee3..5e0a821e44 100644
--- a/src/vm/metasig.h
+++ b/src/vm/metasig.h
@@ -402,6 +402,7 @@ DEFINE_METASIG(IM(Bool_Bool_RetStr, F F, s))
DEFINE_METASIG(IM(PtrChar_RetVoid, P(u), v))
DEFINE_METASIG(IM(PtrChar_Int_Int_RetVoid, P(u) i i, v))
+DEFINE_METASIG_T(IM(ReadOnlySpanOfByte_RetVoid, GI(g(READONLY_SPAN), 1, b), v))
DEFINE_METASIG_T(IM(ReadOnlySpanOfChar_RetVoid, GI(g(READONLY_SPAN), 1, u), v))
DEFINE_METASIG(IM(PtrSByt_RetVoid, P(B), v))
DEFINE_METASIG(IM(PtrSByt_Int_Int_RetVoid, P(B) i i, v))
@@ -420,6 +421,19 @@ DEFINE_METASIG(IM(PtrSByt_Int_Int_RetStr, P(B) i i, s))
DEFINE_METASIG_T(IM(PtrSByt_Int_Int_Encoding_RetStr, P(B) i i C(ENCODING), s))
DEFINE_METASIG(IM(Obj_Int_RetIntPtr, j i, I))
+DEFINE_METASIG(IM(ArrByte_Int_Int_RetVoid, a(b) i i, v))
+DEFINE_METASIG(IM(PtrByte_RetVoid, P(b), v))
+
+#ifdef FEATURE_UTF8STRING
+DEFINE_METASIG_T(IM(ReadOnlySpanOfByte_RetUtf8Str, GI(g(READONLY_SPAN), 1, b), C(UTF8_STRING)))
+DEFINE_METASIG_T(IM(ReadOnlySpanOfChar_RetUtf8Str, GI(g(READONLY_SPAN), 1, u), C(UTF8_STRING)))
+DEFINE_METASIG_T(IM(ArrByte_Int_Int_RetUtf8Str, a(b) i i, C(UTF8_STRING)))
+DEFINE_METASIG_T(IM(PtrByte_RetUtf8Str, P(b), C(UTF8_STRING)))
+DEFINE_METASIG_T(IM(ArrChar_Int_Int_RetUtf8Str, a(u) i i, C(UTF8_STRING)))
+DEFINE_METASIG_T(IM(PtrChar_RetUtf8Str, P(u), C(UTF8_STRING)))
+DEFINE_METASIG_T(IM(String_RetUtf8Str, s, C(UTF8_STRING)))
+#endif // FEATURE_UTF8STRING
+
DEFINE_METASIG(IM(Char_Char_RetStr, u u, s))
DEFINE_METASIG(IM(Char_Int_RetVoid, u i, v))
DEFINE_METASIG_T(SM(RetCultureInfo, _, C(CULTURE_INFO)))
diff --git a/src/vm/methodtable.h b/src/vm/methodtable.h
index 9f9b25e37b..84f8399dc2 100644
--- a/src/vm/methodtable.h
+++ b/src/vm/methodtable.h
@@ -1743,7 +1743,7 @@ public:
BOOL IsString()
{
LIMITED_METHOD_DAC_CONTRACT;
- return HasComponentSize() && !IsArray();
+ return HasComponentSize() && !IsArray() && RawGetComponentSize() == 2;
}
BOOL HasComponentSize() const
diff --git a/src/vm/methodtablebuilder.cpp b/src/vm/methodtablebuilder.cpp
index 568a23136e..d4ce5b0df0 100644
--- a/src/vm/methodtablebuilder.cpp
+++ b/src/vm/methodtablebuilder.cpp
@@ -9711,6 +9711,19 @@ void MethodTableBuilder::CheckForSystemTypes()
pMT->SetComponentSize(2);
}
+#ifdef FEATURE_UTF8STRING
+ else if (strcmp(name, g_Utf8StringName) == 0 && strcmp(nameSpace, g_SystemNS) == 0)
+ {
+ // Utf8Strings are not "normal" objects, so we need to mess with their method table a bit
+ // so that the GC can figure out how big each string is...
+ DWORD baseSize = Utf8StringObject::GetBaseSize();
+ pMT->SetBaseSize(baseSize); // NULL character included
+
+ GetHalfBakedClass()->SetBaseSizePadding(baseSize - bmtFP->NumInstanceFieldBytes);
+
+ pMT->SetComponentSize(1);
+ }
+#endif // FEATURE_UTF8STRING
else if (strcmp(name, g_CriticalFinalizerObjectName) == 0 && strcmp(nameSpace, g_ConstrainedExecutionNS) == 0)
{
// To introduce a class with a critical finalizer,
diff --git a/src/vm/mscorlib.h b/src/vm/mscorlib.h
index 264408f26a..c54a635abe 100644
--- a/src/vm/mscorlib.h
+++ b/src/vm/mscorlib.h
@@ -322,6 +322,10 @@ DEFINE_CLASS(ENCODING, Text, Encoding)
DEFINE_CLASS(RUNE, Text, Rune)
+#ifdef FEATURE_UTF8STRING
+DEFINE_CLASS(CHAR8, System, Char8)
+#endif // FEATURE_UTF8STRING
+
DEFINE_CLASS(ENUM, System, Enum)
DEFINE_CLASS(ENVIRONMENT, System, Environment)
@@ -818,6 +822,17 @@ DEFINE_METHOD(STRING, WCSLEN, wcslen,
DEFINE_METHOD(STRING, STRLEN, strlen, SM_PtrByte_RetInt)
DEFINE_PROPERTY(STRING, LENGTH, Length, Int)
+#ifdef FEATURE_UTF8STRING
+DEFINE_CLASS(UTF8_STRING, System, Utf8String)
+DEFINE_METHOD(UTF8_STRING, CTORF_READONLYSPANOFBYTE,Ctor, IM_ReadOnlySpanOfByte_RetUtf8Str)
+DEFINE_METHOD(UTF8_STRING, CTORF_READONLYSPANOFCHAR,Ctor, IM_ReadOnlySpanOfChar_RetUtf8Str)
+DEFINE_METHOD(UTF8_STRING, CTORF_BYTEARRAY_START_LEN,Ctor, IM_ArrByte_Int_Int_RetUtf8Str)
+DEFINE_METHOD(UTF8_STRING, CTORF_BYTEPTR, Ctor, IM_PtrByte_RetUtf8Str)
+DEFINE_METHOD(UTF8_STRING, CTORF_CHARARRAY_START_LEN,Ctor, IM_ArrChar_Int_Int_RetUtf8Str)
+DEFINE_METHOD(UTF8_STRING, CTORF_CHARPTR, Ctor, IM_PtrChar_RetUtf8Str)
+DEFINE_METHOD(UTF8_STRING, CTORF_STRING, Ctor, IM_String_RetUtf8Str)
+#endif // FEATURE_UTF8STRING
+
DEFINE_CLASS(STRING_BUILDER, Text, StringBuilder)
DEFINE_PROPERTY(STRING_BUILDER, LENGTH, Length, Int)
DEFINE_PROPERTY(STRING_BUILDER, CAPACITY, Capacity, Int)
diff --git a/src/vm/object.h b/src/vm/object.h
index 6bc3a74471..9087afa4a5 100644
--- a/src/vm/object.h
+++ b/src/vm/object.h
@@ -35,7 +35,10 @@ void ErectWriteBarrierForMT(MethodTable **dst, MethodTable *ref);
* | sync block index, which is at a negative offset
* |
* +-- code:StringObject - String objects are specialized objects for string
- * | storage/retrieval for higher performance
+ * | storage/retrieval for higher performance (UCS-2 / UTF-16 data)
+ * |
+ * +-- code:Utf8StringObject - String objects are specialized objects for string
+ * | storage/retrieval for higher performance (UTF-8 data)
* |
* +-- BaseObjectWithCachedData - Object Plus one object field for caching.
* | |
@@ -870,6 +873,9 @@ typedef DPTR(UPTRArray) PTR_UPTRArray;
typedef DPTR(PTRArray) PTR_PTRArray;
class StringObject;
+#ifdef FEATURE_UTF8STRING
+class Utf8StringObject;
+#endif // FEATURE_UTF8STRING
#ifdef USE_CHECKED_OBJECTREFS
typedef REF<ArrayBase> BASEARRAYREF;
@@ -888,6 +894,9 @@ typedef REF<UPTRArray> UPTRARRAYREF;
typedef REF<CHARArray> CHARARRAYREF;
typedef REF<PTRArray> PTRARRAYREF; // Warning: Use PtrArray only for single dimensional arrays, not multidim arrays.
typedef REF<StringObject> STRINGREF;
+#ifdef FEATURE_UTF8STRING
+typedef REF<Utf8StringObject> UTF8STRINGREF;
+#endif // FEATURE_UTF8STRING
#else // USE_CHECKED_OBJECTREFS
@@ -907,6 +916,9 @@ typedef PTR_UPTRArray UPTRARRAYREF;
typedef PTR_CHARArray CHARARRAYREF;
typedef PTR_PTRArray PTRARRAYREF; // Warning: Use PtrArray only for single dimensional arrays, not multidim arrays.
typedef PTR_StringObject STRINGREF;
+#ifdef FEATURE_UTF8STRING
+typedef PTR_Utf8StringObject UTF8STRINGREF;
+#endif // FEATURE_UTF8STRING
#endif // USE_CHECKED_OBJECTREFS
@@ -1199,6 +1211,56 @@ public:
};
+#ifdef FEATURE_UTF8STRING
+class Utf8StringObject : public Object
+{
+#ifdef DACCESS_COMPILE
+ friend class ClrDataAccess;
+#endif
+
+private:
+ DWORD m_StringLength;
+ BYTE m_FirstChar;
+
+public:
+ VOID SetLength(DWORD len) { LIMITED_METHOD_CONTRACT; _ASSERTE(len >= 0); m_StringLength = len; }
+
+protected:
+ Utf8StringObject() { LIMITED_METHOD_CONTRACT; }
+ ~Utf8StringObject() { LIMITED_METHOD_CONTRACT; }
+
+public:
+
+ /*=================RefInterpretGetStringValuesDangerousForGC======================
+ **N.B.: This perfoms no range checking and relies on the caller to have done this.
+ **Args: (IN)ref -- the Utf8String to be interpretted.
+ ** (OUT)chars -- a pointer to the characters in the buffer.
+ ** (OUT)length -- a pointer to the length of the buffer.
+ **Returns: void.
+ **Exceptions: None.
+ ==============================================================================*/
+ // !!!! If you use this function, you have to be careful because chars is a pointer
+ // !!!! to the data buffer of ref. If GC happens after this call, you need to make
+ // !!!! sure that you have a pin handle on ref, or use GCPROTECT_BEGINPINNING on ref.
+ void RefInterpretGetStringValuesDangerousForGC(__deref_out_ecount(*length + 1) CHAR **chars, int *length) {
+ WRAPPER_NO_CONTRACT;
+
+ _ASSERTE(GetGCSafeMethodTable() == g_pUtf8StringClass);
+ *length = GetStringLength();
+ *chars = GetBuffer();
+#ifdef _DEBUG
+ EnableStressHeapHelper();
+#endif
+ }
+
+ DWORD GetStringLength() { LIMITED_METHOD_DAC_CONTRACT; return( m_StringLength );}
+ CHAR* GetBuffer() { LIMITED_METHOD_CONTRACT; _ASSERTE(this != nullptr); return (CHAR*)( dac_cast<TADDR>(this) + offsetof(Utf8StringObject, m_FirstChar) ); }
+
+ static DWORD GetBaseSize();
+ static SIZE_T GetSize(DWORD stringLength);
+};
+#endif // FEATURE_UTF8STRING
+
// This is the Method version of the Reflection object.
// A Method has adddition information.
// m_pMD - A pointer to the actual MethodDesc of the method.
diff --git a/src/vm/object.inl b/src/vm/object.inl
index 9652909250..ebf9d364c9 100644
--- a/src/vm/object.inl
+++ b/src/vm/object.inl
@@ -71,6 +71,22 @@ __forceinline /*static*/ SIZE_T StringObject::GetSize(DWORD strLen)
return GetBaseSize() + strLen * sizeof(WCHAR);
}
+#ifdef FEATURE_UTF8STRING
+__forceinline /*static*/ DWORD Utf8StringObject::GetBaseSize()
+{
+ LIMITED_METHOD_DAC_CONTRACT;
+
+ return OBJECT_BASESIZE + sizeof(DWORD) /* length */ + sizeof(BYTE) /* null terminator */;
+}
+
+__forceinline /*static*/ SIZE_T Utf8StringObject::GetSize(DWORD strLen)
+{
+ LIMITED_METHOD_DAC_CONTRACT;
+
+ return GetBaseSize() + strLen;
+}
+#endif // FEATURE_UTF8STRING
+
#ifdef DACCESS_COMPILE
inline void Object::EnumMemoryRegions(void)
diff --git a/src/vm/reflectioninvocation.cpp b/src/vm/reflectioninvocation.cpp
index 1f8aa04593..954d6ae267 100644
--- a/src/vm/reflectioninvocation.cpp
+++ b/src/vm/reflectioninvocation.cpp
@@ -1001,6 +1001,7 @@ FCIMPL5(Object*, RuntimeMethodHandle::InvokeMethod,
// Skip the activation optimization for remoting because of remoting proxy is not always activated.
// It would be nice to clean this up and get remoting to always activate methodtable behind the proxy.
BOOL fForceActivationForRemoting = FALSE;
+ BOOL fCtorOfVariableSizedObject = FALSE;
if (fConstructor)
{
@@ -1018,7 +1019,8 @@ FCIMPL5(Object*, RuntimeMethodHandle::InvokeMethod,
MethodTable * pMT = ownerType.AsMethodTable();
{
- if (pMT != g_pStringClass)
+ fCtorOfVariableSizedObject = pMT->HasComponentSize();
+ if (!fCtorOfVariableSizedObject)
gc.retVal = pMT->Allocate();
}
}
@@ -1324,7 +1326,11 @@ FCIMPL5(Object*, RuntimeMethodHandle::InvokeMethod,
if (fConstructor)
{
// We have a special case for Strings...The object is returned...
- if (ownerType == TypeHandle(g_pStringClass)) {
+ if (ownerType == TypeHandle(g_pStringClass)
+#ifdef FEATURE_UTF8STRING
+ || ownerType == TypeHandle(g_pUtf8StringClass)
+#endif // FEATURE_UTF8STRING
+ ) {
PVOID pReturnValue = &callDescrData.returnValue;
gc.retVal = *(OBJECTREF *)pReturnValue;
}
@@ -2590,8 +2596,12 @@ FCIMPL1(Object*, ReflectionSerialization::GetUninitializedObject, ReflectClassBa
MethodTable *pMT = type.GetMethodTable();
PREFIX_ASSUME(pMT != NULL);
- //We don't allow unitialized strings.
- if (pMT == g_pStringClass) {
+ //We don't allow unitialized Strings or Utf8Strings.
+ if (pMT == g_pStringClass
+#ifdef FEATURE_UTF8STRING
+ || pMT == g_pUtf8StringClass
+#endif // FEATURE_UTF8STRING
+ ) {
COMPlusThrow(kArgumentException, W("Argument_NoUninitializedStrings"));
}
diff --git a/src/vm/vars.cpp b/src/vm/vars.cpp
index 179acda8af..8b329d4c2e 100644
--- a/src/vm/vars.cpp
+++ b/src/vm/vars.cpp
@@ -61,6 +61,9 @@ GPTR_IMPL(MethodTable, g_pObjectClass);
GPTR_IMPL(MethodTable, g_pRuntimeTypeClass);
GPTR_IMPL(MethodTable, g_pCanonMethodTableClass); // System.__Canon
GPTR_IMPL(MethodTable, g_pStringClass);
+#ifdef FEATURE_UTF8STRING
+GPTR_IMPL(MethodTable, g_pUtf8StringClass);
+#endif // FEATURE_UTF8STRING
GPTR_IMPL(MethodTable, g_pArrayClass);
GPTR_IMPL(MethodTable, g_pSZArrayHelperClass);
GPTR_IMPL(MethodTable, g_pNullableClass);
diff --git a/src/vm/vars.hpp b/src/vm/vars.hpp
index 91ad42a91c..d8ffc60e25 100644
--- a/src/vm/vars.hpp
+++ b/src/vm/vars.hpp
@@ -79,6 +79,9 @@ class LoaderHeap;
class IGCHeap;
class Object;
class StringObject;
+#ifdef FEATURE_UTF8STRING
+class Utf8StringObject;
+#endif // FEATURE_UTF8STRING
class ArrayClass;
class MethodTable;
class MethodDesc;
@@ -313,6 +316,10 @@ class REF : public OBJECTREF
#define OBJECTREFToObject(objref) ((objref).operator-> ())
#define ObjectToSTRINGREF(obj) (STRINGREF(obj))
#define STRINGREFToObject(objref) (*( (StringObject**) &(objref) ))
+#ifdef FEATURE_UTF8STRING
+#define ObjectToUTF8STRINGREF(obj) (UTF8STRINGREF(obj))
+#define UTF8STRINGREFToObject(objref) (*( (Utf8StringObject**) &(objref) ))
+#endif // FEATURE_UTF8STRING
#else // _DEBUG_IMPL
@@ -323,6 +330,10 @@ class REF : public OBJECTREF
#define OBJECTREFToObject(objref) ((PTR_Object) (objref))
#define ObjectToSTRINGREF(obj) ((PTR_StringObject) (obj))
#define STRINGREFToObject(objref) ((PTR_StringObject) (objref))
+#ifdef FEATURE_UTF8STRING
+#define ObjectToUTF8STRINGREF(obj) ((PTR_Utf8StringObject) (obj))
+#define UTF8STRINGREFToObject(objref) ((PTR_Utf8StringObject) (objref))
+#endif // FEATURE_UTF8STRING
#endif // _DEBUG_IMPL
@@ -363,6 +374,9 @@ GPTR_DECL(MethodTable, g_pObjectClass);
GPTR_DECL(MethodTable, g_pRuntimeTypeClass);
GPTR_DECL(MethodTable, g_pCanonMethodTableClass); // System.__Canon
GPTR_DECL(MethodTable, g_pStringClass);
+#ifdef FEATURE_UTF8STRING
+GPTR_DECL(MethodTable, g_pUtf8StringClass);
+#endif // FEATURE_UTF8STRING
GPTR_DECL(MethodTable, g_pArrayClass);
GPTR_DECL(MethodTable, g_pSZArrayHelperClass);
GPTR_DECL(MethodTable, g_pNullableClass);