diff options
author | Tanner Gooding <tagoo@outlook.com> | 2018-11-08 17:58:24 -0800 |
---|---|---|
committer | Jan Kotas <jkotas@microsoft.com> | 2018-11-09 06:14:46 -0800 |
commit | 0fccc78cfea93bafbba07cc4a84a32582a3af88f (patch) | |
tree | dc73e9450660f41ed8d225804f48a30322f85086 /src/System.Private.CoreLib/shared/System/Buffers | |
parent | 00f5934a3e34977c7a1502da604f2dae90040888 (diff) | |
download | coreclr-0fccc78cfea93bafbba07cc4a84a32582a3af88f.tar.gz coreclr-0fccc78cfea93bafbba07cc4a84a32582a3af88f.tar.bz2 coreclr-0fccc78cfea93bafbba07cc4a84a32582a3af88f.zip |
Moving the Utf8Parser/Utf8Formatter to be shared (dotnet/corefx#33348)
Signed-off-by: dotnet-bot <dotnet-bot@microsoft.com>
Diffstat (limited to 'src/System.Private.CoreLib/shared/System/Buffers')
50 files changed, 7699 insertions, 0 deletions
diff --git a/src/System.Private.CoreLib/shared/System/Buffers/StandardFormat.cs b/src/System.Private.CoreLib/shared/System/Buffers/StandardFormat.cs new file mode 100644 index 0000000000..6975c204f1 --- /dev/null +++ b/src/System.Private.CoreLib/shared/System/Buffers/StandardFormat.cs @@ -0,0 +1,174 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Diagnostics; + +namespace System.Buffers +{ + /// <summary> + /// Represents a standard formatting string without using an actual String. A StandardFormat consists of a character (such as 'G', 'D' or 'X') + /// and an optional precision ranging from 0..99, or the special value NoPrecision. + /// </summary> + public readonly struct StandardFormat : IEquatable<StandardFormat> + { + /// <summary> + /// Precision values for format that don't use a precision, or for when the precision is to be unspecified. + /// </summary> + public const byte NoPrecision = byte.MaxValue; + + /// <summary> + /// The maximum valid precision value. + /// </summary> + public const byte MaxPrecision = 99; + + private readonly byte _format; + private readonly byte _precision; + + /// <summary> + /// The character component of the format. + /// </summary> + public char Symbol => (char)_format; + + /// <summary> + /// The precision component of the format. Ranges from 0..9 or the special value NoPrecision. + /// </summary> + public byte Precision => _precision; + + /// <summary> + /// true if Precision is a value other than NoPrecision + /// </summary> + public bool HasPrecision => _precision != NoPrecision; + + /// <summary> + /// true if the StandardFormat == default(StandardFormat) + /// </summary> + public bool IsDefault => _format == 0 && _precision == 0; + + /// <summary> + /// Create a StandardFormat. + /// </summary> + /// <param name="symbol">A type-specific formatting character such as 'G', 'D' or 'X'</param> + /// <param name="precision">An optional precision ranging from 0..9 or the special value NoPrecision (the default)</param> + public StandardFormat(char symbol, byte precision = NoPrecision) + { + if (precision != NoPrecision && precision > MaxPrecision) + ThrowHelper.ThrowArgumentOutOfRangeException_PrecisionTooLarge(); + if (symbol != (byte)symbol) + ThrowHelper.ThrowArgumentOutOfRangeException_SymbolDoesNotFit(); + + _format = (byte)symbol; + _precision = precision; + } + + /// <summary> + /// Converts a character to a StandardFormat using the NoPrecision precision. + /// </summary> + public static implicit operator StandardFormat(char symbol) => new StandardFormat(symbol); + + /// <summary> + /// Converts a classic .NET format string into a StandardFormat + /// </summary> + public static StandardFormat Parse(ReadOnlySpan<char> format) + { + if (format.Length == 0) + return default; + + char symbol = format[0]; + byte precision; + if (format.Length == 1) + { + precision = NoPrecision; + } + else + { + uint parsedPrecision = 0; + for (int srcIndex = 1; srcIndex < format.Length; srcIndex++) + { + uint digit = format[srcIndex] - 48u; // '0' + if (digit > 9) + throw new FormatException(SR.Format(SR.Argument_CannotParsePrecision, MaxPrecision)); + + parsedPrecision = parsedPrecision * 10 + digit; + if (parsedPrecision > MaxPrecision) + throw new FormatException(SR.Format(SR.Argument_PrecisionTooLarge, MaxPrecision)); + } + + precision = (byte)parsedPrecision; + } + + return new StandardFormat(symbol, precision); + } + + /// <summary> + /// Converts a classic .NET format string into a StandardFormat + /// </summary> + public static StandardFormat Parse(string format) => format == null ? default : Parse(format.AsSpan()); + + /// <summary> + /// Returns true if both the Symbol and Precision are equal. + /// </summary> + public override bool Equals(object obj) => obj is StandardFormat other && Equals(other); + + /// <summary> + /// Compute a hash code. + /// </summary> + public override int GetHashCode() => _format.GetHashCode() ^ _precision.GetHashCode(); + + /// <summary> + /// Returns true if both the Symbol and Precision are equal. + /// </summary> + public bool Equals(StandardFormat other) => _format == other._format && _precision == other._precision; + + /// <summary> + /// Returns the format in classic .NET format. + /// </summary> + public override string ToString() + { + unsafe + { + const int MaxLength = 4; + char* pBuffer = stackalloc char[MaxLength]; + + int dstIndex = 0; + char symbol = Symbol; + if (symbol != default) + { + pBuffer[dstIndex++] = symbol; + + byte precision = Precision; + if (precision != NoPrecision) + { + if (precision >= 100) + { + pBuffer[dstIndex++] = (char)('0' + (precision / 100) % 10); + precision = (byte)(precision % 100); + } + + if (precision >= 10) + { + pBuffer[dstIndex++] = (char)('0' + (precision / 10) % 10); + precision = (byte)(precision % 10); + } + + pBuffer[dstIndex++] = (char)('0' + precision); + } + } + + Debug.Assert(dstIndex <= MaxLength); + + return new string(pBuffer, startIndex: 0, length: dstIndex); + } + } + + /// <summary> + /// Returns true if both the Symbol and Precision are equal. + /// </summary> + public static bool operator ==(StandardFormat left, StandardFormat right) => left.Equals(right); + + /// <summary> + /// Returns false if both the Symbol and Precision are equal. + /// </summary> + public static bool operator !=(StandardFormat left, StandardFormat right) => !left.Equals(right); + } +} diff --git a/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Constants.cs b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Constants.cs new file mode 100644 index 0000000000..e2f70f0b10 --- /dev/null +++ b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Constants.cs @@ -0,0 +1,34 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +namespace System.Buffers.Text +{ + internal static partial class Utf8Constants + { + public const byte Colon = (byte)':'; + public const byte Comma = (byte)','; + public const byte Minus = (byte)'-'; + public const byte Period = (byte)'.'; + public const byte Plus = (byte)'+'; + public const byte Slash = (byte)'/'; + public const byte Space = (byte)' '; + public const byte Hyphen = (byte)'-'; + + public const byte Separator = (byte)','; + + // Invariant formatting uses groups of 3 for each number group separated by commas. + // ex. 1,234,567,890 + public const int GroupSize = 3; + + public static readonly TimeSpan NullUtcOffset = TimeSpan.MinValue; // Utc offsets must range from -14:00 to 14:00 so this is never a valid offset. + + public const int DateTimeMaxUtcOffsetHours = 14; // The UTC offset portion of a TimeSpan or DateTime can be no more than 14 hours and no less than -14 hours. + + public const int DateTimeNumFractionDigits = 7; // TimeSpan and DateTime formats allow exactly up to many digits for specifying the fraction after the seconds. + public const int MaxDateTimeFraction = 9999999; // ... and hence, the largest fraction expressible is this. + + public const ulong BillionMaxUIntValue = (ulong)uint.MaxValue * Billion; // maximum value that can be split into two uint32 {1-10 digits}{9 digits} + public const uint Billion = 1000000000; // 10^9, used to split int64/uint64 into three uint32 {1-2 digits}{9 digits}{9 digits} + } +} diff --git a/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Formatter/FormattingHelpers.cs b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Formatter/FormattingHelpers.cs new file mode 100644 index 0000000000..d5aaf13ce2 --- /dev/null +++ b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Formatter/FormattingHelpers.cs @@ -0,0 +1,246 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Diagnostics; +using System.Runtime.CompilerServices; + +namespace System.Buffers.Text +{ + // All the helper methods in this class assume that the by-ref is valid and that there is + // enough space to fit the items that will be written into the underlying memory. The calling + // code must have already done all the necessary validation. + internal static partial class FormattingHelpers + { + // A simple lookup table for converting numbers to hex. + internal const string HexTableLower = "0123456789abcdef"; + + internal const string HexTableUpper = "0123456789ABCDEF"; + + /// <summary> + /// Returns the symbol contained within the standard format. If the standard format + /// has not been initialized, returns the provided fallback symbol. + /// </summary> + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static char GetSymbolOrDefault(in StandardFormat format, char defaultSymbol) + { + // This is equivalent to the line below, but it is written in such a way + // that the JIT is able to perform more optimizations. + // + // return (format.IsDefault) ? defaultSymbol : format.Symbol; + + var symbol = format.Symbol; + if (symbol == default && format.Precision == default) + { + symbol = defaultSymbol; + } + return symbol; + } + + #region UTF-8 Helper methods + + /// <summary> + /// Fills a buffer with the ASCII character '0' (0x30). + /// </summary> + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static void FillWithAsciiZeros(Span<byte> buffer) + { + // This is a faster implementation of Span<T>.Fill(). + for (int i = 0; i < buffer.Length; i++) + { + buffer[i] = (byte)'0'; + } + } + + public enum HexCasing : uint + { + // Output [ '0' .. '9' ] and [ 'A' .. 'F' ]. + Uppercase = 0, + + // Output [ '0' .. '9' ] and [ 'a' .. 'f' ]. + // This works because values in the range [ 0x30 .. 0x39 ] ([ '0' .. '9' ]) + // already have the 0x20 bit set, so ORing them with 0x20 is a no-op, + // while outputs in the range [ 0x41 .. 0x46 ] ([ 'A' .. 'F' ]) + // don't have the 0x20 bit set, so ORing them maps to + // [ 0x61 .. 0x66 ] ([ 'a' .. 'f' ]), which is what we want. + Lowercase = 0x2020U, + } + + // The JIT can elide bounds checks if 'startingIndex' is constant and if the caller is + // writing to a span of known length (or the caller has already checked the bounds of the + // furthest access). + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static void WriteHexByte(byte value, Span<byte> buffer, int startingIndex = 0, HexCasing casing = HexCasing.Uppercase) + { + // We want to pack the incoming byte into a single integer [ 0000 HHHH 0000 LLLL ], + // where HHHH and LLLL are the high and low nibbles of the incoming byte. Then + // subtract this integer from a constant minuend as shown below. + // + // [ 1000 1001 1000 1001 ] + // - [ 0000 HHHH 0000 LLLL ] + // ========================= + // [ *YYY **** *ZZZ **** ] + // + // The end result of this is that YYY is 0b000 if HHHH <= 9, and YYY is 0b111 if HHHH >= 10. + // Similarly, ZZZ is 0b000 if LLLL <= 9, and ZZZ is 0b111 if LLLL >= 10. + // (We don't care about the value of asterisked bits.) + // + // To turn a nibble in the range [ 0 .. 9 ] into hex, we calculate hex := nibble + 48 (ascii '0'). + // To turn a nibble in the range [ 10 .. 15 ] into hex, we calculate hex := nibble - 10 + 65 (ascii 'A'). + // => hex := nibble + 55. + // The difference in the starting ASCII offset is (55 - 48) = 7, depending on whether the nibble is <= 9 or >= 10. + // Since 7 is 0b111, this conveniently matches the YYY or ZZZ value computed during the earlier subtraction. + + // The commented out code below is code that directly implements the logic described above. + + //uint packedOriginalValues = (((uint)value & 0xF0U) << 4) + ((uint)value & 0x0FU); + //uint difference = 0x8989U - packedOriginalValues; + //uint add7Mask = (difference & 0x7070U) >> 4; // line YYY and ZZZ back up with the packed values + //uint packedResult = packedOriginalValues + add7Mask + 0x3030U /* ascii '0' */; + + // The code below is equivalent to the commented out code above but has been tweaked + // to allow codegen to make some extra optimizations. + + uint difference = (((uint)value & 0xF0U) << 4) + ((uint)value & 0x0FU) - 0x8989U; + uint packedResult = ((((uint)(-(int)difference) & 0x7070U) >> 4) + difference + 0xB9B9U) | (uint)casing; + + // The low byte of the packed result contains the hex representation of the incoming byte's low nibble. + // The adjacent byte of the packed result contains the hex representation of the incoming byte's high nibble. + + // Finally, write to the output buffer starting with the *highest* index so that codegen can + // elide all but the first bounds check. (This only works if 'startingIndex' is a compile-time constant.) + + buffer[startingIndex + 1] = (byte)(packedResult); + buffer[startingIndex] = (byte)(packedResult >> 8); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static void WriteDigits(ulong value, Span<byte> buffer) + { + // We can mutate the 'value' parameter since it's a copy-by-value local. + // It'll be used to represent the value left over after each division by 10. + + for (int i = buffer.Length - 1; i >= 1; i--) + { + ulong temp = '0' + value; + value /= 10; + buffer[i] = (byte)(temp - (value * 10)); + } + + Debug.Assert(value < 10); + buffer[0] = (byte)('0' + value); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static void WriteDigitsWithGroupSeparator(ulong value, Span<byte> buffer) + { + // We can mutate the 'value' parameter since it's a copy-by-value local. + // It'll be used to represent the value left over after each division by 10. + + int digitsWritten = 0; + for (int i = buffer.Length - 1; i >= 1; i--) + { + ulong temp = '0' + value; + value /= 10; + buffer[i] = (byte)(temp - (value * 10)); + if (digitsWritten == Utf8Constants.GroupSize - 1) + { + buffer[--i] = Utf8Constants.Comma; + digitsWritten = 0; + } + else + { + digitsWritten++; + } + } + + Debug.Assert(value < 10); + buffer[0] = (byte)('0' + value); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static void WriteDigits(uint value, Span<byte> buffer) + { + // We can mutate the 'value' parameter since it's a copy-by-value local. + // It'll be used to represent the value left over after each division by 10. + + for (int i = buffer.Length - 1; i >= 1; i--) + { + uint temp = '0' + value; + value /= 10; + buffer[i] = (byte)(temp - (value * 10)); + } + + Debug.Assert(value < 10); + buffer[0] = (byte)('0' + value); + } + + /// <summary> + /// Writes a value [ 0000 .. 9999 ] to the buffer starting at the specified offset. + /// This method performs best when the starting index is a constant literal. + /// </summary> + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static void WriteFourDecimalDigits(uint value, Span<byte> buffer, int startingIndex = 0) + { + Debug.Assert(0 <= value && value <= 9999); + + uint temp = '0' + value; + value /= 10; + buffer[startingIndex + 3] = (byte)(temp - (value * 10)); + + temp = '0' + value; + value /= 10; + buffer[startingIndex + 2] = (byte)(temp - (value * 10)); + + temp = '0' + value; + value /= 10; + buffer[startingIndex + 1] = (byte)(temp - (value * 10)); + + buffer[startingIndex] = (byte)('0' + value); + } + + /// <summary> + /// Writes a value [ 00 .. 99 ] to the buffer starting at the specified offset. + /// This method performs best when the starting index is a constant literal. + /// </summary> + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static void WriteTwoDecimalDigits(uint value, Span<byte> buffer, int startingIndex = 0) + { + Debug.Assert(0 <= value && value <= 99); + + uint temp = '0' + value; + value /= 10; + buffer[startingIndex + 1] = (byte)(temp - (value * 10)); + + buffer[startingIndex] = (byte)('0' + value); + } + + #endregion UTF-8 Helper methods + + #region Math Helper methods + + /// <summary> + /// We don't have access to Math.DivRem, so this is a copy of the implementation. + /// </summary> + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static ulong DivMod(ulong numerator, ulong denominator, out ulong modulo) + { + ulong div = numerator / denominator; + modulo = numerator - (div * denominator); + return div; + } + + /// <summary> + /// We don't have access to Math.DivRem, so this is a copy of the implementation. + /// </summary> + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static uint DivMod(uint numerator, uint denominator, out uint modulo) + { + uint div = numerator / denominator; + modulo = numerator - (div * denominator); + return div; + } + + #endregion Math Helper methods + } +} diff --git a/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Formatter/Utf8Formatter.Boolean.cs b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Formatter/Utf8Formatter.Boolean.cs new file mode 100644 index 0000000000..b6452d24c6 --- /dev/null +++ b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Formatter/Utf8Formatter.Boolean.cs @@ -0,0 +1,105 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Buffers.Binary; + +namespace System.Buffers.Text +{ + public static partial class Utf8Formatter + { + /// <summary> + /// Formats a Boolean as a UTF8 string. + /// </summary> + /// <param name="value">Value to format</param> + /// <param name="destination">Buffer to write the UTF8-formatted value to</param> + /// <param name="bytesWritten">Receives the length of the formatted text in bytes</param> + /// <param name="format">The standard format to use</param> + /// <returns> + /// true for success. "bytesWritten" contains the length of the formatted text in bytes. + /// false if buffer was too short. Iteratively increase the size of the buffer and retry until it succeeds. + /// </returns> + /// <remarks> + /// Formats supported: + /// G (default) True/False + /// l true/false + /// </remarks> + /// <exceptions> + /// <cref>System.FormatException</cref> if the format is not valid for this data type. + /// </exceptions> + public static bool TryFormat(bool value, Span<byte> destination, out int bytesWritten, StandardFormat format = default) + { + char symbol = FormattingHelpers.GetSymbolOrDefault(format, 'G'); + + if (value) + { + if (symbol == 'G') + { + // By having each branch perform its own call to TryWriteUInt32BigEndian, we ensure that a + // constant value is passed to this routine, which means the compiler can reverse endianness + // at compile time instead of runtime if necessary. + const uint TrueValueUppercase = ('T' << 24) + ('r' << 16) + ('u' << 8) + ('e' << 0); + if (!BinaryPrimitives.TryWriteUInt32BigEndian(destination, TrueValueUppercase)) + { + goto BufferTooSmall; + } + } + else if (symbol == 'l') + { + const uint TrueValueLowercase = ('t' << 24) + ('r' << 16) + ('u' << 8) + ('e' << 0); + if (!BinaryPrimitives.TryWriteUInt32BigEndian(destination, TrueValueLowercase)) + { + goto BufferTooSmall; + } + } + else + { + goto BadFormat; + } + + bytesWritten = 4; + return true; + } + else + { + if (symbol == 'G') + { + // This check can't be performed earlier because we need to throw if an invalid symbol is + // provided, even if the buffer is too small. + if ((uint)4 >= (uint)destination.Length) + { + goto BufferTooSmall; + } + + const uint FalsValueUppercase = ('F' << 24) + ('a' << 16) + ('l' << 8) + ('s' << 0); + BinaryPrimitives.WriteUInt32BigEndian(destination, FalsValueUppercase); + } + else if (symbol == 'l') + { + if ((uint)4 >= (uint)destination.Length) + { + goto BufferTooSmall; + } + + const uint FalsValueLowercase = ('f' << 24) + ('a' << 16) + ('l' << 8) + ('s' << 0); + BinaryPrimitives.WriteUInt32BigEndian(destination, FalsValueLowercase); + } + else + { + goto BadFormat; + } + + destination[4] = (byte)'e'; + bytesWritten = 5; + return true; + } + + BufferTooSmall: + bytesWritten = 0; + return false; + + BadFormat: + return ThrowHelper.TryFormatThrowFormatException(out bytesWritten); + } + } +} diff --git a/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Formatter/Utf8Formatter.Date.G.cs b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Formatter/Utf8Formatter.Date.G.cs new file mode 100644 index 0000000000..7c4a2e342d --- /dev/null +++ b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Formatter/Utf8Formatter.Date.G.cs @@ -0,0 +1,92 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +namespace System.Buffers.Text +{ + public static partial class Utf8Formatter + { + // + // 'G' format for DateTime. + // + // 0123456789012345678 + // --------------------------------- + // 05/25/2017 10:30:15 + // + // Also handles the default ToString() format for DateTimeOffset + // + // 01234567890123456789012345 + // -------------------------- + // 05/25/2017 10:30:15 -08:00 + // + private static bool TryFormatDateTimeG(DateTime value, TimeSpan offset, Span<byte> destination, out int bytesWritten) + { + const int MinimumBytesNeeded = 19; + + int bytesRequired = MinimumBytesNeeded; + + if (offset != Utf8Constants.NullUtcOffset) + { + bytesRequired += 7; // Space['+'|'-']hh:mm + } + + if (destination.Length < bytesRequired) + { + bytesWritten = 0; + return false; + } + + bytesWritten = bytesRequired; + + // Hoist most of the bounds checks on buffer. + { var unused = destination[MinimumBytesNeeded - 1]; } + + // TODO: Introduce an API which can parse DateTime instances efficiently, pulling out + // all their properties (Month, Day, etc.) in one shot. This would help avoid the + // duplicate work that implicitly results from calling these properties individually. + + FormattingHelpers.WriteTwoDecimalDigits((uint)value.Month, destination, 0); + destination[2] = Utf8Constants.Slash; + + FormattingHelpers.WriteTwoDecimalDigits((uint)value.Day, destination, 3); + destination[5] = Utf8Constants.Slash; + + FormattingHelpers.WriteFourDecimalDigits((uint)value.Year, destination, 6); + destination[10] = Utf8Constants.Space; + + FormattingHelpers.WriteTwoDecimalDigits((uint)value.Hour, destination, 11); + destination[13] = Utf8Constants.Colon; + + FormattingHelpers.WriteTwoDecimalDigits((uint)value.Minute, destination, 14); + destination[16] = Utf8Constants.Colon; + + FormattingHelpers.WriteTwoDecimalDigits((uint)value.Second, destination, 17); + + if (offset != Utf8Constants.NullUtcOffset) + { + byte sign; + + if (offset < default(TimeSpan) /* a "const" version of TimeSpan.Zero */) + { + sign = Utf8Constants.Minus; + offset = TimeSpan.FromTicks(-offset.Ticks); + } + else + { + sign = Utf8Constants.Plus; + } + + // Writing the value backward allows the JIT to optimize by + // performing a single bounds check against buffer. + + FormattingHelpers.WriteTwoDecimalDigits((uint)offset.Minutes, destination, 24); + destination[23] = Utf8Constants.Colon; + FormattingHelpers.WriteTwoDecimalDigits((uint)offset.Hours, destination, 21); + destination[20] = sign; + destination[19] = Utf8Constants.Space; + } + + return true; + } + } +} diff --git a/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Formatter/Utf8Formatter.Date.L.cs b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Formatter/Utf8Formatter.Date.L.cs new file mode 100644 index 0000000000..699f91f39c --- /dev/null +++ b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Formatter/Utf8Formatter.Date.L.cs @@ -0,0 +1,66 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +namespace System.Buffers.Text +{ + public static partial class Utf8Formatter + { + // Rfc1123 - lowercase + // + // 01234567890123456789012345678 + // ----------------------------- + // tue, 03 jan 2017 08:08:05 gmt + // + private static bool TryFormatDateTimeL(DateTime value, Span<byte> destination, out int bytesWritten) + { + // Writing the check in this fashion elides all bounds checks on 'buffer' + // for the remainder of the method. + if ((uint)28 >= (uint)destination.Length) + { + bytesWritten = 0; + return false; + } + + uint dayAbbrev = s_dayAbbreviationsLowercase[(int)value.DayOfWeek]; + + destination[0] = (byte)dayAbbrev; + dayAbbrev >>= 8; + destination[1] = (byte)dayAbbrev; + dayAbbrev >>= 8; + destination[2] = (byte)dayAbbrev; + destination[3] = Utf8Constants.Comma; + destination[4] = Utf8Constants.Space; + + FormattingHelpers.WriteTwoDecimalDigits((uint)value.Day, destination, 5); + destination[7] = Utf8Constants.Space; + + uint monthAbbrev = s_monthAbbreviationsLowercase[value.Month - 1]; + destination[8] = (byte)monthAbbrev; + monthAbbrev >>= 8; + destination[9] = (byte)monthAbbrev; + monthAbbrev >>= 8; + destination[10] = (byte)monthAbbrev; + destination[11] = Utf8Constants.Space; + + FormattingHelpers.WriteFourDecimalDigits((uint)value.Year, destination, 12); + destination[16] = Utf8Constants.Space; + + FormattingHelpers.WriteTwoDecimalDigits((uint)value.Hour, destination, 17); + destination[19] = Utf8Constants.Colon; + + FormattingHelpers.WriteTwoDecimalDigits((uint)value.Minute, destination, 20); + destination[22] = Utf8Constants.Colon; + + FormattingHelpers.WriteTwoDecimalDigits((uint)value.Second, destination, 23); + destination[25] = Utf8Constants.Space; + + destination[26] = GMT1Lowercase; + destination[27] = GMT2Lowercase; + destination[28] = GMT3Lowercase; + + bytesWritten = 29; + return true; + } + } +} diff --git a/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Formatter/Utf8Formatter.Date.O.cs b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Formatter/Utf8Formatter.Date.O.cs new file mode 100644 index 0000000000..d9b7b181bc --- /dev/null +++ b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Formatter/Utf8Formatter.Date.O.cs @@ -0,0 +1,105 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +namespace System.Buffers.Text +{ + public static partial class Utf8Formatter + { + // + // Roundtrippable format. One of + // + // 012345678901234567890123456789012 + // --------------------------------- + // 2017-06-12T05:30:45.7680000-07:00 + // 2017-06-12T05:30:45.7680000Z (Z is short for "+00:00" but also distinguishes DateTimeKind.Utc from DateTimeKind.Local) + // 2017-06-12T05:30:45.7680000 (interpreted as local time wrt to current time zone) + // + private static bool TryFormatDateTimeO(DateTime value, TimeSpan offset, Span<byte> destination, out int bytesWritten) + { + const int MinimumBytesNeeded = 27; + + int bytesRequired = MinimumBytesNeeded; + DateTimeKind kind = DateTimeKind.Local; + + if (offset == Utf8Constants.NullUtcOffset) + { + kind = value.Kind; + if (kind == DateTimeKind.Local) + { + offset = TimeZoneInfo.Local.GetUtcOffset(value); + bytesRequired += 6; + } + else if (kind == DateTimeKind.Utc) + { + bytesRequired += 1; + } + } + else + { + bytesRequired += 6; + } + + if (destination.Length < bytesRequired) + { + bytesWritten = 0; + return false; + } + + bytesWritten = bytesRequired; + + // Hoist most of the bounds checks on buffer. + { var unused = destination[MinimumBytesNeeded - 1]; } + + FormattingHelpers.WriteFourDecimalDigits((uint)value.Year, destination, 0); + destination[4] = Utf8Constants.Minus; + + FormattingHelpers.WriteTwoDecimalDigits((uint)value.Month, destination, 5); + destination[7] = Utf8Constants.Minus; + + FormattingHelpers.WriteTwoDecimalDigits((uint)value.Day, destination, 8); + destination[10] = TimeMarker; + + FormattingHelpers.WriteTwoDecimalDigits((uint)value.Hour, destination, 11); + destination[13] = Utf8Constants.Colon; + + FormattingHelpers.WriteTwoDecimalDigits((uint)value.Minute, destination, 14); + destination[16] = Utf8Constants.Colon; + + FormattingHelpers.WriteTwoDecimalDigits((uint)value.Second, destination, 17); + destination[19] = Utf8Constants.Period; + + FormattingHelpers.WriteDigits((uint)((ulong)value.Ticks % (ulong)TimeSpan.TicksPerSecond), destination.Slice(20, 7)); + + if (kind == DateTimeKind.Local) + { + byte sign; + + if (offset < default(TimeSpan) /* a "const" version of TimeSpan.Zero */) + { + sign = Utf8Constants.Minus; + offset = TimeSpan.FromTicks(-offset.Ticks); + } + else + { + sign = Utf8Constants.Plus; + } + + // Writing the value backward allows the JIT to optimize by + // performing a single bounds check against buffer. + + FormattingHelpers.WriteTwoDecimalDigits((uint)offset.Minutes, destination, 31); + destination[30] = Utf8Constants.Colon; + FormattingHelpers.WriteTwoDecimalDigits((uint)offset.Hours, destination, 28); + destination[27] = sign; + + } + else if (kind == DateTimeKind.Utc) + { + destination[27] = UtcMarker; + } + + return true; + } + } +} diff --git a/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Formatter/Utf8Formatter.Date.R.cs b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Formatter/Utf8Formatter.Date.R.cs new file mode 100644 index 0000000000..dd9ec459b7 --- /dev/null +++ b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Formatter/Utf8Formatter.Date.R.cs @@ -0,0 +1,66 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +namespace System.Buffers.Text +{ + public static partial class Utf8Formatter + { + // Rfc1123 + // + // 01234567890123456789012345678 + // ----------------------------- + // Tue, 03 Jan 2017 08:08:05 GMT + // + private static bool TryFormatDateTimeR(DateTime value, Span<byte> destination, out int bytesWritten) + { + // Writing the check in this fashion elides all bounds checks on 'buffer' + // for the remainder of the method. + if ((uint)28 >= (uint)destination.Length) + { + bytesWritten = 0; + return false; + } + + uint dayAbbrev = s_dayAbbreviations[(int)value.DayOfWeek]; + + destination[0] = (byte)dayAbbrev; + dayAbbrev >>= 8; + destination[1] = (byte)dayAbbrev; + dayAbbrev >>= 8; + destination[2] = (byte)dayAbbrev; + destination[3] = Utf8Constants.Comma; + destination[4] = Utf8Constants.Space; + + FormattingHelpers.WriteTwoDecimalDigits((uint)value.Day, destination, 5); + destination[7] = Utf8Constants.Space; + + uint monthAbbrev = s_monthAbbreviations[value.Month - 1]; + destination[8] = (byte)monthAbbrev; + monthAbbrev >>= 8; + destination[9] = (byte)monthAbbrev; + monthAbbrev >>= 8; + destination[10] = (byte)monthAbbrev; + destination[11] = Utf8Constants.Space; + + FormattingHelpers.WriteFourDecimalDigits((uint)value.Year, destination, 12); + destination[16] = Utf8Constants.Space; + + FormattingHelpers.WriteTwoDecimalDigits((uint)value.Hour, destination, 17); + destination[19] = Utf8Constants.Colon; + + FormattingHelpers.WriteTwoDecimalDigits((uint)value.Minute, destination, 20); + destination[22] = Utf8Constants.Colon; + + FormattingHelpers.WriteTwoDecimalDigits((uint)value.Second, destination, 23); + destination[25] = Utf8Constants.Space; + + destination[26] = GMT1; + destination[27] = GMT2; + destination[28] = GMT3; + + bytesWritten = 29; + return true; + } + } +} diff --git a/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Formatter/Utf8Formatter.Date.cs b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Formatter/Utf8Formatter.Date.cs new file mode 100644 index 0000000000..5fbf3b7362 --- /dev/null +++ b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Formatter/Utf8Formatter.Date.cs @@ -0,0 +1,171 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +namespace System.Buffers.Text +{ + public static partial class Utf8Formatter + { + private const byte TimeMarker = (byte)'T'; + private const byte UtcMarker = (byte)'Z'; + + private const byte GMT1 = (byte)'G'; + private const byte GMT2 = (byte)'M'; + private const byte GMT3 = (byte)'T'; + + private const byte GMT1Lowercase = (byte)'g'; + private const byte GMT2Lowercase = (byte)'m'; + private const byte GMT3Lowercase = (byte)'t'; + + // The three-letter abbreviation is packed into a 24-bit unsigned integer + // where the least significant byte represents the first letter. + private static readonly uint[] s_dayAbbreviations = new uint[] + { + 'S' + ('u' << 8) + ('n' << 16), + 'M' + ('o' << 8) + ('n' << 16), + 'T' + ('u' << 8) + ('e' << 16), + 'W' + ('e' << 8) + ('d' << 16), + 'T' + ('h' << 8) + ('u' << 16), + 'F' + ('r' << 8) + ('i' << 16), + 'S' + ('a' << 8) + ('t' << 16), + }; + + private static readonly uint[] s_dayAbbreviationsLowercase = new uint[] + { + 's' + ('u' << 8) + ('n' << 16), + 'm' + ('o' << 8) + ('n' << 16), + 't' + ('u' << 8) + ('e' << 16), + 'w' + ('e' << 8) + ('d' << 16), + 't' + ('h' << 8) + ('u' << 16), + 'f' + ('r' << 8) + ('i' << 16), + 's' + ('a' << 8) + ('t' << 16) + }; + + private static readonly uint[] s_monthAbbreviations = new uint[] + { + 'J' + ('a' << 8) + ('n' << 16), + 'F' + ('e' << 8) + ('b' << 16), + 'M' + ('a' << 8) + ('r' << 16), + 'A' + ('p' << 8) + ('r' << 16), + 'M' + ('a' << 8) + ('y' << 16), + 'J' + ('u' << 8) + ('n' << 16), + 'J' + ('u' << 8) + ('l' << 16), + 'A' + ('u' << 8) + ('g' << 16), + 'S' + ('e' << 8) + ('p' << 16), + 'O' + ('c' << 8) + ('t' << 16), + 'N' + ('o' << 8) + ('v' << 16), + 'D' + ('e' << 8) + ('c' << 16), + }; + + private static readonly uint[] s_monthAbbreviationsLowercase = new uint[] + { + 'j' + ('a' << 8) + ('n' << 16), + 'f' + ('e' << 8) + ('b' << 16), + 'm' + ('a' << 8) + ('r' << 16), + 'a' + ('p' << 8) + ('r' << 16), + 'm' + ('a' << 8) + ('y' << 16), + 'j' + ('u' << 8) + ('n' << 16), + 'j' + ('u' << 8) + ('l' << 16), + 'a' + ('u' << 8) + ('g' << 16), + 's' + ('e' << 8) + ('p' << 16), + 'o' + ('c' << 8) + ('t' << 16), + 'n' + ('o' << 8) + ('v' << 16), + 'd' + ('e' << 8) + ('c' << 16), + }; + + /// <summary> + /// Formats a DateTimeOffset as a UTF8 string. + /// </summary> + /// <param name="value">Value to format</param> + /// <param name="destination">Buffer to write the UTF8-formatted value to</param> + /// <param name="bytesWritten">Receives the length of the formatted text in bytes</param> + /// <param name="format">The standard format to use</param> + /// <returns> + /// true for success. "bytesWritten" contains the length of the formatted text in bytes. + /// false if buffer was too short. Iteratively increase the size of the buffer and retry until it succeeds. + /// </returns> + /// <exceptions> + /// <remarks> + /// Formats supported: + /// default 05/25/2017 10:30:15 -08:00 + /// G 05/25/2017 10:30:15 + /// R Tue, 03 Jan 2017 08:08:05 GMT (RFC 1123) + /// l tue, 03 jan 2017 08:08:05 gmt (Lowercase RFC 1123) + /// O 2017-06-12T05:30:45.7680000-07:00 (Round-trippable) + /// </remarks> + /// <cref>System.FormatException</cref> if the format is not valid for this data type. + /// </exceptions> + public static bool TryFormat(DateTimeOffset value, Span<byte> destination, out int bytesWritten, StandardFormat format = default) + { + TimeSpan offset = Utf8Constants.NullUtcOffset; + char symbol = format.Symbol; + if (format.IsDefault) + { + symbol = 'G'; + offset = value.Offset; + } + + switch (symbol) + { + case 'R': + return TryFormatDateTimeR(value.UtcDateTime, destination, out bytesWritten); + + case 'l': + return TryFormatDateTimeL(value.UtcDateTime, destination, out bytesWritten); + + case 'O': + return TryFormatDateTimeO(value.DateTime, value.Offset, destination, out bytesWritten); + + case 'G': + return TryFormatDateTimeG(value.DateTime, offset, destination, out bytesWritten); + + default: + return ThrowHelper.TryFormatThrowFormatException(out bytesWritten); + } + } + + /// <summary> + /// Formats a DateTime as a UTF8 string. + /// </summary> + /// <param name="value">Value to format</param> + /// <param name="destination">Buffer to write the UTF8-formatted value to</param> + /// <param name="bytesWritten">Receives the length of the formatted text in bytes</param> + /// <param name="format">The standard format to use</param> + /// <returns> + /// true for success. "bytesWritten" contains the length of the formatted text in bytes. + /// false if buffer was too short. Iteratively increase the size of the buffer and retry until it succeeds. + /// </returns> + /// <remarks> + /// Formats supported: + /// G (default) 05/25/2017 10:30:15 + /// R Tue, 03 Jan 2017 08:08:05 GMT (RFC 1123) + /// l tue, 03 jan 2017 08:08:05 gmt (Lowercase RFC 1123) + /// O 2017-06-12T05:30:45.7680000-07:00 (Round-trippable) + /// </remarks> + /// <exceptions> + /// <cref>System.FormatException</cref> if the format is not valid for this data type. + /// </exceptions> + public static bool TryFormat(DateTime value, Span<byte> destination, out int bytesWritten, StandardFormat format = default) + { + char symbol = FormattingHelpers.GetSymbolOrDefault(format, 'G'); + + switch (symbol) + { + case 'R': + return TryFormatDateTimeR(value, destination, out bytesWritten); + + case 'l': + return TryFormatDateTimeL(value, destination, out bytesWritten); + + case 'O': + return TryFormatDateTimeO(value, Utf8Constants.NullUtcOffset, destination, out bytesWritten); + + case 'G': + return TryFormatDateTimeG(value, Utf8Constants.NullUtcOffset, destination, out bytesWritten); + + default: + return ThrowHelper.TryFormatThrowFormatException(out bytesWritten); + } + } + } +} diff --git a/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Formatter/Utf8Formatter.Decimal.E.cs b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Formatter/Utf8Formatter.Decimal.E.cs new file mode 100644 index 0000000000..6cde07ff7f --- /dev/null +++ b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Formatter/Utf8Formatter.Decimal.E.cs @@ -0,0 +1,104 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Diagnostics; + +namespace System.Buffers.Text +{ + public static partial class Utf8Formatter + { + private static bool TryFormatDecimalE(ref NumberBuffer number, Span<byte> destination, out int bytesWritten, byte precision, byte exponentSymbol) + { + const int NumExponentDigits = 3; + + int scale = number.Scale; + ReadOnlySpan<byte> digits = number.Digits; + + int numBytesNeeded = + ((number.IsNegative) ? 1 : 0) // minus sign + + 1 // digits before the decimal point (exactly 1) + + ((precision == 0) ? 0 : (precision + 1)) // period and the digits after the decimal point + + 2 // 'E' or 'e' followed by '+' or '-' + + NumExponentDigits; // exponent digits + + if (destination.Length < numBytesNeeded) + { + bytesWritten = 0; + return false; + } + + int dstIndex = 0; + int srcIndex = 0; + if (number.IsNegative) + { + destination[dstIndex++] = Utf8Constants.Minus; + } + + // + // Emit exactly one digit before the decimal point. + // + int exponent; + byte firstDigit = digits[srcIndex]; + if (firstDigit == 0) + { + destination[dstIndex++] = (byte)'0'; // Special case: number before the decimal point is exactly 0: Number does not store the zero in this case. + exponent = 0; + } + else + { + destination[dstIndex++] = firstDigit; + srcIndex++; + exponent = scale - 1; + } + + if (precision > 0) + { + destination[dstIndex++] = Utf8Constants.Period; + + // + // Emit digits after the decimal point. + // + int numDigitsEmitted = 0; + while (numDigitsEmitted < precision) + { + byte digit = digits[srcIndex]; + if (digit == 0) + { + while (numDigitsEmitted++ < precision) + { + destination[dstIndex++] = (byte)'0'; + } + break; + } + destination[dstIndex++] = digit; + srcIndex++; + numDigitsEmitted++; + } + } + + // Emit the exponent symbol + destination[dstIndex++] = exponentSymbol; + if (exponent >= 0) + { + destination[dstIndex++] = Utf8Constants.Plus; + } + else + { + destination[dstIndex++] = Utf8Constants.Minus; + exponent = -exponent; + } + + Debug.Assert(exponent < Number.DECIMAL_PRECISION, "If you're trying to reuse this routine for double/float, you'll need to review the code carefully for Decimal-specific assumptions."); + + // Emit exactly three digits for the exponent. + destination[dstIndex++] = (byte)'0'; // The exponent for Decimal can never exceed 28 (let alone 99) + destination[dstIndex++] = (byte)((exponent / 10) + '0'); + destination[dstIndex++] = (byte)((exponent % 10) + '0'); + + Debug.Assert(dstIndex == numBytesNeeded); + bytesWritten = numBytesNeeded; + return true; + } + } +} diff --git a/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Formatter/Utf8Formatter.Decimal.F.cs b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Formatter/Utf8Formatter.Decimal.F.cs new file mode 100644 index 0000000000..e2409f909b --- /dev/null +++ b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Formatter/Utf8Formatter.Decimal.F.cs @@ -0,0 +1,101 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Diagnostics; + +namespace System.Buffers.Text +{ + public static partial class Utf8Formatter + { + private static bool TryFormatDecimalF(ref NumberBuffer number, Span<byte> destination, out int bytesWritten, byte precision) + { + int scale = number.Scale; + ReadOnlySpan<byte> digits = number.Digits; + + int numBytesNeeded = + ((number.IsNegative) ? 1 : 0) // minus sign + + ((scale <= 0) ? 1 : scale) // digits before the decimal point (minimum 1) + + ((precision == 0) ? 0 : (precision + 1)); // if specified precision != 0, the decimal point and the digits after the decimal point (padded with zeroes if needed) + + if (destination.Length < numBytesNeeded) + { + bytesWritten = 0; + return false; + } + + int srcIndex = 0; + int dstIndex = 0; + if (number.IsNegative) + { + destination[dstIndex++] = Utf8Constants.Minus; + } + + // + // Emit digits before the decimal point. + // + if (scale <= 0) + { + destination[dstIndex++] = (byte)'0'; // The integer portion is 0 and not stored. The formatter, however, needs to emit it. + } + else + { + while (srcIndex < scale) + { + byte digit = digits[srcIndex]; + if (digit == 0) + { + int numTrailingZeroes = scale - srcIndex; + for (int i = 0; i < numTrailingZeroes; i++) + { + destination[dstIndex++] = (byte)'0'; + } + break; + } + + destination[dstIndex++] = digit; + srcIndex++; + } + } + + if (precision > 0) + { + destination[dstIndex++] = Utf8Constants.Period; + + // + // Emit digits after the decimal point. + // + int numDigitsEmitted = 0; + if (scale < 0) + { + int numLeadingZeroesToEmit = Math.Min((int)precision, -scale); + for (int i = 0; i < numLeadingZeroesToEmit; i++) + { + destination[dstIndex++] = (byte)'0'; + } + numDigitsEmitted += numLeadingZeroesToEmit; + } + + while (numDigitsEmitted < precision) + { + byte digit = digits[srcIndex]; + if (digit == 0) + { + while (numDigitsEmitted++ < precision) + { + destination[dstIndex++] = (byte)'0'; + } + break; + } + destination[dstIndex++] = digit; + srcIndex++; + numDigitsEmitted++; + } + } + + Debug.Assert(dstIndex == numBytesNeeded); + bytesWritten = numBytesNeeded; + return true; + } + } +} diff --git a/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Formatter/Utf8Formatter.Decimal.G.cs b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Formatter/Utf8Formatter.Decimal.G.cs new file mode 100644 index 0000000000..e9149ad8a9 --- /dev/null +++ b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Formatter/Utf8Formatter.Decimal.G.cs @@ -0,0 +1,107 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Diagnostics; + +namespace System.Buffers.Text +{ + public static partial class Utf8Formatter + { + private static bool TryFormatDecimalG(ref NumberBuffer number, Span<byte> destination, out int bytesWritten) + { + int scale = number.Scale; + ReadOnlySpan<byte> digits = number.Digits; + int numDigits = number.NumDigits; + + bool isFraction = scale < numDigits; + int numBytesNeeded; + if (isFraction) + { + numBytesNeeded = numDigits + 1; // A fraction. Must include one for the decimal point. + if (scale <= 0) + { + numBytesNeeded += 1 + (-scale); // A fraction of the form 0.ddd. Need to emit the non-stored 0 before the decimal point plus (-scale) leading 0's after the decimal point. + } + } + else + { + numBytesNeeded = ((scale <= 0) ? 1 : scale); // An integral. Just emit the digits before the decimal point (minimum 1) and no decimal point. + } + + if (number.IsNegative) + { + numBytesNeeded++; // And the minus sign. + } + + if (destination.Length < numBytesNeeded) + { + bytesWritten = 0; + return false; + } + + int srcIndex = 0; + int dstIndex = 0; + + if (number.IsNegative) + { + destination[dstIndex++] = Utf8Constants.Minus; + } + + // + // Emit digits before the decimal point. + // + if (scale <= 0) + { + destination[dstIndex++] = (byte)'0'; // The integer portion is 0 and not stored. The formatter, however, needs to emit it. + } + else + { + while (srcIndex < scale) + { + byte digit = digits[srcIndex]; + if (digit == 0) + { + int numTrailingZeroes = scale - srcIndex; + for (int i = 0; i < numTrailingZeroes; i++) + { + destination[dstIndex++] = (byte)'0'; + } + break; + } + + destination[dstIndex++] = digit; + srcIndex++; + } + } + + if (isFraction) + { + destination[dstIndex++] = Utf8Constants.Period; + + // + // Emit digits after the decimal point. + // + if (scale < 0) + { + int numLeadingZeroesToEmit = -scale; + for (int i = 0; i < numLeadingZeroesToEmit; i++) + { + destination[dstIndex++] = (byte)'0'; + } + } + + byte digit; + while ((digit = digits[srcIndex++]) != 0) + { + destination[dstIndex++] = digit; + } + } + + Debug.Assert(dstIndex == numBytesNeeded); + + bytesWritten = numBytesNeeded; + return true; + } + } +} diff --git a/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Formatter/Utf8Formatter.Decimal.cs b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Formatter/Utf8Formatter.Decimal.cs new file mode 100644 index 0000000000..872e2d4f35 --- /dev/null +++ b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Formatter/Utf8Formatter.Decimal.cs @@ -0,0 +1,106 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Diagnostics; + +namespace System.Buffers.Text +{ + public static partial class Utf8Formatter + { + /// <summary> + /// Formats a Decimal as a UTF8 string. + /// </summary> + /// <param name="value">Value to format</param> + /// <param name="destination">Buffer to write the UTF8-formatted value to</param> + /// <param name="bytesWritten">Receives the length of the formatted text in bytes</param> + /// <param name="format">The standard format to use</param> + /// <returns> + /// true for success. "bytesWritten" contains the length of the formatted text in bytes. + /// false if buffer was too short. Iteratively increase the size of the buffer and retry until it succeeds. + /// </returns> + /// <remarks> + /// Formats supported: + /// G/g (default) + /// F/f 12.45 Fixed point + /// E/e 1.245000e1 Exponential + /// </remarks> + /// <exceptions> + /// <cref>System.FormatException</cref> if the format is not valid for this data type. + /// </exceptions> + public static bool TryFormat(decimal value, Span<byte> destination, out int bytesWritten, StandardFormat format = default) + { + if (format.IsDefault) + { + format = 'G'; + } + + switch (format.Symbol) + { + case 'g': + case 'G': + { + if (format.Precision != StandardFormat.NoPrecision) + throw new NotSupportedException(SR.Argument_GWithPrecisionNotSupported); + NumberBuffer number = default; + Number.DecimalToNumber(value, ref number); + bool success = TryFormatDecimalG(ref number, destination, out bytesWritten); +#if DEBUG + // This DEBUG segment exists to close a code coverage hole inside TryFormatDecimalG(). Because we don't call RoundNumber() on this path, we have no way to feed + // TryFormatDecimalG() a number where trailing zeros before the decimal point have been cropped. So if the chance comes up, we'll crop the zeroes + // ourselves and make a second call to ensure we get the same outcome. + if (success) + { + Span<byte> digits = number.Digits; + int numDigits = number.NumDigits; + if (numDigits != 0 && number.Scale == numDigits && digits[numDigits - 1] == '0') + { + while (numDigits != 0 && digits[numDigits - 1] == '0') + { + digits[numDigits - 1] = 0; + numDigits--; + } + + number.CheckConsistency(); + + byte[] buffer2 = new byte[destination.Length]; + bool success2 = TryFormatDecimalG(ref number, buffer2, out int bytesWritten2); + Debug.Assert(success2); + Debug.Assert(bytesWritten2 == bytesWritten); + for (int i = 0; i < bytesWritten; i++) + { + Debug.Assert(destination[i] == buffer2[i]); + } + } + + } +#endif // DEBUG + return success; + } + + case 'f': + case 'F': + { + NumberBuffer number = default; + Number.DecimalToNumber(value, ref number); + byte precision = (format.Precision == StandardFormat.NoPrecision) ? (byte)2 : format.Precision; + Number.RoundNumber(ref number, number.Scale + precision); + return TryFormatDecimalF(ref number, destination, out bytesWritten, precision); + } + + case 'e': + case 'E': + { + NumberBuffer number = default; + Number.DecimalToNumber(value, ref number); + byte precision = (format.Precision == StandardFormat.NoPrecision) ? (byte)6 : format.Precision; + Number.RoundNumber(ref number, precision + 1); + return TryFormatDecimalE(ref number, destination, out bytesWritten, precision, exponentSymbol: (byte)format.Symbol); + } + + default: + return ThrowHelper.TryFormatThrowFormatException(out bytesWritten); + } + } + } +} diff --git a/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Formatter/Utf8Formatter.Float.cs b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Formatter/Utf8Formatter.Float.cs new file mode 100644 index 0000000000..94591134c6 --- /dev/null +++ b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Formatter/Utf8Formatter.Float.cs @@ -0,0 +1,112 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Diagnostics; +using System.Globalization; + +namespace System.Buffers.Text +{ + public static partial class Utf8Formatter + { + /// <summary> + /// Formats a Double as a UTF8 string. + /// </summary> + /// <param name="value">Value to format</param> + /// <param name="destination">Buffer to write the UTF8-formatted value to</param> + /// <param name="bytesWritten">Receives the length of the formatted text in bytes</param> + /// <param name="format">The standard format to use</param> + /// <returns> + /// true for success. "bytesWritten" contains the length of the formatted text in bytes. + /// false if buffer was too short. Iteratively increase the size of the buffer and retry until it succeeds. + /// </returns> + /// <remarks> + /// Formats supported: + /// G/g (default) + /// F/f 12.45 Fixed point + /// E/e 1.245000e1 Exponential + /// </remarks> + /// <exceptions> + /// <cref>System.FormatException</cref> if the format is not valid for this data type. + /// </exceptions> + public static bool TryFormat(double value, Span<byte> destination, out int bytesWritten, StandardFormat format = default) + { + return TryFormatFloatingPoint<double>(value, destination, out bytesWritten, format); + } + + /// <summary> + /// Formats a Single as a UTF8 string. + /// </summary> + /// <param name="value">Value to format</param> + /// <param name="destination">Buffer to write the UTF8-formatted value to</param> + /// <param name="bytesWritten">Receives the length of the formatted text in bytes</param> + /// <param name="format">The standard format to use</param> + /// <returns> + /// true for success. "bytesWritten" contains the length of the formatted text in bytes. + /// false if buffer was too short. Iteratively increase the size of the buffer and retry until it succeeds. + /// </returns> + /// <remarks> + /// Formats supported: + /// G/g (default) + /// F/f 12.45 Fixed point + /// E/e 1.245000e1 Exponential + /// </remarks> + /// <exceptions> + /// <cref>System.FormatException</cref> if the format is not valid for this data type. + /// </exceptions> + public static bool TryFormat(float value, Span<byte> destination, out int bytesWritten, StandardFormat format = default) + { + return TryFormatFloatingPoint<float>(value, destination, out bytesWritten, format); + } + + // + // Common handler for TryFormat(Double) and TryFormat(Single). You may notice that this particular routine isn't getting into the "no allocation" spirit + // of things. The DoubleToNumber() code is incredibly complex and is one of the few pieces of Number formatting never C#-ized. It would be really + // be preferable not to have another version of that lying around. Until we really hit a scenario where floating point formatting needs the perf, we'll + // make do with this. + // + private static bool TryFormatFloatingPoint<T>(T value, Span<byte> destination, out int bytesWritten, StandardFormat format) where T : IFormattable + { + if (format.IsDefault) + { + format = 'G'; + } + + switch (format.Symbol) + { + case 'g': + case 'G': + if (format.Precision != StandardFormat.NoPrecision) + throw new NotSupportedException(SR.Argument_GWithPrecisionNotSupported); + break; + + case 'f': + case 'F': + case 'e': + case 'E': + break; + + default: + return ThrowHelper.TryFormatThrowFormatException(out bytesWritten); + } + + string formatString = format.ToString(); + string utf16Text = value.ToString(formatString, CultureInfo.InvariantCulture); + int length = utf16Text.Length; + if (length > destination.Length) + { + bytesWritten = 0; + return false; + } + + for (int i = 0; i < length; i++) + { + Debug.Assert(utf16Text[i] < 128, "A culture-invariant ToString() of a floating point expected to produce ASCII characters only."); + destination[i] = (byte)(utf16Text[i]); + } + + bytesWritten = length; + return true; + } + } +} diff --git a/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Formatter/Utf8Formatter.Guid.cs b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Formatter/Utf8Formatter.Guid.cs new file mode 100644 index 0000000000..a6311954b6 --- /dev/null +++ b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Formatter/Utf8Formatter.Guid.cs @@ -0,0 +1,219 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Runtime.InteropServices; + +namespace System.Buffers.Text +{ + public static partial class Utf8Formatter + { + #region Constants + + private const byte OpenBrace = (byte)'{'; + private const byte CloseBrace = (byte)'}'; + + private const byte OpenParen = (byte)'('; + private const byte CloseParen = (byte)')'; + + private const byte Dash = (byte)'-'; + + #endregion Constants + + /// <summary> + /// Formats a Guid as a UTF8 string. + /// </summary> + /// <param name="value">Value to format</param> + /// <param name="destination">Buffer to write the UTF8-formatted value to</param> + /// <param name="bytesWritten">Receives the length of the formatted text in bytes</param> + /// <param name="format">The standard format to use</param> + /// <returns> + /// true for success. "bytesWritten" contains the length of the formatted text in bytes. + /// false if buffer was too short. Iteratively increase the size of the buffer and retry until it succeeds. + /// </returns> + /// <remarks> + /// Formats supported: + /// D (default) nnnnnnnn-nnnn-nnnn-nnnn-nnnnnnnnnnnn + /// B {nnnnnnnn-nnnn-nnnn-nnnn-nnnnnnnnnnnn} + /// P (nnnnnnnn-nnnn-nnnn-nnnn-nnnnnnnnnnnn) + /// N nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn + /// </remarks> + /// <exceptions> + /// <cref>System.FormatException</cref> if the format is not valid for this data type. + /// </exceptions> + public static bool TryFormat(Guid value, Span<byte> destination, out int bytesWritten, StandardFormat format = default) + { + const int INSERT_DASHES = unchecked((int)0x80000000); + const int NO_DASHES = 0; + const int INSERT_CURLY_BRACES = (CloseBrace << 16) | (OpenBrace << 8); + const int INSERT_ROUND_BRACES = (CloseParen << 16) | (OpenParen << 8); + const int NO_BRACES = 0; + const int LEN_GUID_BASE = 32; + const int LEN_ADD_DASHES = 4; + const int LEN_ADD_BRACES = 2; + + // This is a 32-bit value whose contents (where 0 is the low byte) are: + // 0th byte: minimum required length of the output buffer, + // 1st byte: the ASCII byte to insert for the opening brace position (or 0 if no braces), + // 2nd byte: the ASCII byte to insert for the closing brace position (or 0 if no braces), + // 3rd byte: high bit set if dashes are to be inserted. + // + // The reason for keeping a single flag instead of separate vars is that we can avoid register spillage + // as we build up the output value. + int flags; + + switch (FormattingHelpers.GetSymbolOrDefault(format, 'D')) + { + case 'D': // nnnnnnnn-nnnn-nnnn-nnnn-nnnnnnnnnnnn + flags = INSERT_DASHES + NO_BRACES + LEN_GUID_BASE + LEN_ADD_DASHES; + break; + + case 'B': // {nnnnnnnn-nnnn-nnnn-nnnn-nnnnnnnnnnnn} + flags = INSERT_DASHES + INSERT_CURLY_BRACES + LEN_GUID_BASE + LEN_ADD_DASHES + LEN_ADD_BRACES; + break; + + case 'P': // (nnnnnnnn-nnnn-nnnn-nnnn-nnnnnnnnnnnn) + flags = INSERT_DASHES + INSERT_ROUND_BRACES + LEN_GUID_BASE + LEN_ADD_DASHES + LEN_ADD_BRACES; + break; + + case 'N': // nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn + flags = NO_BRACES + NO_DASHES + LEN_GUID_BASE; + break; + + default: + return ThrowHelper.TryFormatThrowFormatException(out bytesWritten); + } + + // At this point, the low byte of flags contains the minimum required length + + if ((byte)flags > destination.Length) + { + bytesWritten = 0; + return false; + } + + bytesWritten = (byte)flags; + flags >>= 8; + + // At this point, the low byte of flags contains the opening brace char (if any) + + if ((byte)flags != 0) + { + destination[0] = (byte)flags; + destination = destination.Slice(1); + } + flags >>= 8; + + // At this point, the low byte of flags contains the closing brace char (if any) + // And since we're performing arithmetic shifting the high bit of flags is set (flags is negative) if dashes are required + + DecomposedGuid guidAsBytes = default; + guidAsBytes.Guid = value; + + // When a GUID is blitted, the first three components are little-endian, and the last component is big-endian. + + // The line below forces the JIT to hoist the bounds check for the following segment. + // The JIT will optimize away the read, but it cannot optimize away the bounds check + // because it may have an observable side effect (throwing). + // We use 8 instead of 7 so that we also capture the dash if we're asked to insert one. + + { var unused = destination[8]; } + FormattingHelpers.WriteHexByte(guidAsBytes.Byte03, destination, 0, FormattingHelpers.HexCasing.Lowercase); + FormattingHelpers.WriteHexByte(guidAsBytes.Byte02, destination, 2, FormattingHelpers.HexCasing.Lowercase); + FormattingHelpers.WriteHexByte(guidAsBytes.Byte01, destination, 4, FormattingHelpers.HexCasing.Lowercase); + FormattingHelpers.WriteHexByte(guidAsBytes.Byte00, destination, 6, FormattingHelpers.HexCasing.Lowercase); + + if (flags < 0 /* use dash? */) + { + destination[8] = Dash; + destination = destination.Slice(9); + } + else + { + destination = destination.Slice(8); + } + + { var unused = destination[4]; } + FormattingHelpers.WriteHexByte(guidAsBytes.Byte05, destination, 0, FormattingHelpers.HexCasing.Lowercase); + FormattingHelpers.WriteHexByte(guidAsBytes.Byte04, destination, 2, FormattingHelpers.HexCasing.Lowercase); + + if (flags < 0 /* use dash? */) + { + destination[4] = Dash; + destination = destination.Slice(5); + } + else + { + destination = destination.Slice(4); + } + + { var unused = destination[4]; } + FormattingHelpers.WriteHexByte(guidAsBytes.Byte07, destination, 0, FormattingHelpers.HexCasing.Lowercase); + FormattingHelpers.WriteHexByte(guidAsBytes.Byte06, destination, 2, FormattingHelpers.HexCasing.Lowercase); + + if (flags < 0 /* use dash? */) + { + destination[4] = Dash; + destination = destination.Slice(5); + } + else + { + destination = destination.Slice(4); + } + + { var unused = destination[4]; } + FormattingHelpers.WriteHexByte(guidAsBytes.Byte08, destination, 0, FormattingHelpers.HexCasing.Lowercase); + FormattingHelpers.WriteHexByte(guidAsBytes.Byte09, destination, 2, FormattingHelpers.HexCasing.Lowercase); + + if (flags < 0 /* use dash? */) + { + destination[4] = Dash; + destination = destination.Slice(5); + } + else + { + destination = destination.Slice(4); + } + + { var unused = destination[11]; } // can't hoist bounds check on the final brace (if exists) + FormattingHelpers.WriteHexByte(guidAsBytes.Byte10, destination, 0, FormattingHelpers.HexCasing.Lowercase); + FormattingHelpers.WriteHexByte(guidAsBytes.Byte11, destination, 2, FormattingHelpers.HexCasing.Lowercase); + FormattingHelpers.WriteHexByte(guidAsBytes.Byte12, destination, 4, FormattingHelpers.HexCasing.Lowercase); + FormattingHelpers.WriteHexByte(guidAsBytes.Byte13, destination, 6, FormattingHelpers.HexCasing.Lowercase); + FormattingHelpers.WriteHexByte(guidAsBytes.Byte14, destination, 8, FormattingHelpers.HexCasing.Lowercase); + FormattingHelpers.WriteHexByte(guidAsBytes.Byte15, destination, 10, FormattingHelpers.HexCasing.Lowercase); + + if ((byte)flags != 0) + { + destination[12] = (byte)flags; + } + + return true; + } + + /// <summary> + /// Used to provide access to the individual bytes of a GUID. + /// </summary> + [StructLayout(LayoutKind.Explicit)] + private struct DecomposedGuid + { + [FieldOffset(00)] public Guid Guid; + [FieldOffset(00)] public byte Byte00; + [FieldOffset(01)] public byte Byte01; + [FieldOffset(02)] public byte Byte02; + [FieldOffset(03)] public byte Byte03; + [FieldOffset(04)] public byte Byte04; + [FieldOffset(05)] public byte Byte05; + [FieldOffset(06)] public byte Byte06; + [FieldOffset(07)] public byte Byte07; + [FieldOffset(08)] public byte Byte08; + [FieldOffset(09)] public byte Byte09; + [FieldOffset(10)] public byte Byte10; + [FieldOffset(11)] public byte Byte11; + [FieldOffset(12)] public byte Byte12; + [FieldOffset(13)] public byte Byte13; + [FieldOffset(14)] public byte Byte14; + [FieldOffset(15)] public byte Byte15; + } + } +} diff --git a/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Formatter/Utf8Formatter.Integer.Signed.D.cs b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Formatter/Utf8Formatter.Integer.Signed.D.cs new file mode 100644 index 0000000000..7532f0cf15 --- /dev/null +++ b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Formatter/Utf8Formatter.Integer.Signed.D.cs @@ -0,0 +1,27 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Runtime.CompilerServices; + +namespace System.Buffers.Text +{ + /// <summary> + /// Methods to format common data types as Utf8 strings. + /// </summary> + public static partial class Utf8Formatter + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static bool TryFormatInt64D(long value, byte precision, Span<byte> destination, out int bytesWritten) + { + bool insertNegationSign = false; + if (value < 0) + { + insertNegationSign = true; + value = -value; + } + + return TryFormatUInt64D((ulong)value, precision, destination, insertNegationSign, out bytesWritten); + } + } +} diff --git a/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Formatter/Utf8Formatter.Integer.Signed.Default.cs b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Formatter/Utf8Formatter.Integer.Signed.Default.cs new file mode 100644 index 0000000000..046f5baf66 --- /dev/null +++ b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Formatter/Utf8Formatter.Integer.Signed.Default.cs @@ -0,0 +1,148 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Diagnostics; +using System.Runtime.CompilerServices; + +namespace System.Buffers.Text +{ + /// <summary> + /// Methods to format common data types as Utf8 strings. + /// </summary> + public static partial class Utf8Formatter + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static bool TryFormatInt64Default(long value, Span<byte> destination, out int bytesWritten) + { + if ((ulong)value < 10) + { + return TryFormatUInt32SingleDigit((uint)value, destination, out bytesWritten); + } + + if (IntPtr.Size == 8) // x64 + { + return TryFormatInt64MultipleDigits(value, destination, out bytesWritten); + } + else // x86 + { + if (value <= int.MaxValue && value >= int.MinValue) + { + return TryFormatInt32MultipleDigits((int)value, destination, out bytesWritten); + } + else + { + if (value <= (long)Utf8Constants.BillionMaxUIntValue && value >= -(long)Utf8Constants.BillionMaxUIntValue) + { + return value < 0 ? + TryFormatInt64MoreThanNegativeBillionMaxUInt(-value, destination, out bytesWritten) : + TryFormatUInt64LessThanBillionMaxUInt((ulong)value, destination, out bytesWritten); + } + else + { + return value < 0 ? + TryFormatInt64LessThanNegativeBillionMaxUInt(-value, destination, out bytesWritten) : + TryFormatUInt64MoreThanBillionMaxUInt((ulong)value, destination, out bytesWritten); + } + } + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static bool TryFormatInt32MultipleDigits(int value, Span<byte> destination, out int bytesWritten) + { + if (value < 0) + { + value = -value; + int digitCount = FormattingHelpers.CountDigits((uint)value); + // WriteDigits does not do bounds checks + if (digitCount >= destination.Length) + { + bytesWritten = 0; + return false; + } + destination[0] = Utf8Constants.Minus; + bytesWritten = digitCount + 1; + FormattingHelpers.WriteDigits((uint)value, destination.Slice(1, digitCount)); + return true; + } + else + { + return TryFormatUInt32MultipleDigits((uint)value, destination, out bytesWritten); + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static bool TryFormatInt64MultipleDigits(long value, Span<byte> destination, out int bytesWritten) + { + if (value < 0) + { + value = -value; + int digitCount = FormattingHelpers.CountDigits((ulong)value); + // WriteDigits does not do bounds checks + if (digitCount >= destination.Length) + { + bytesWritten = 0; + return false; + } + destination[0] = Utf8Constants.Minus; + bytesWritten = digitCount + 1; + FormattingHelpers.WriteDigits((ulong)value, destination.Slice(1, digitCount)); + return true; + } + else + { + return TryFormatUInt64MultipleDigits((ulong)value, destination, out bytesWritten); + } + } + + // Split long into two parts that can each fit in a uint - {1-10 digits}{9 digits} + private static bool TryFormatInt64MoreThanNegativeBillionMaxUInt(long value, Span<byte> destination, out int bytesWritten) + { + uint overNineDigits = (uint)(value / Utf8Constants.Billion); + uint lastNineDigits = (uint)(value - (overNineDigits * Utf8Constants.Billion)); + + int digitCountOverNineDigits = FormattingHelpers.CountDigits(overNineDigits); + Debug.Assert(digitCountOverNineDigits >= 1 && digitCountOverNineDigits <= 10); + int digitCount = digitCountOverNineDigits + 9; + // WriteDigits does not do bounds checks + if (digitCount >= destination.Length) + { + bytesWritten = 0; + return false; + } + destination[0] = Utf8Constants.Minus; + bytesWritten = digitCount + 1; + FormattingHelpers.WriteDigits(overNineDigits, destination.Slice(1, digitCountOverNineDigits)); + FormattingHelpers.WriteDigits(lastNineDigits, destination.Slice(digitCountOverNineDigits + 1, 9)); + return true; + } + + // Split long into three parts that can each fit in a uint - {1 digit}{9 digits}{9 digits} + private static bool TryFormatInt64LessThanNegativeBillionMaxUInt(long value, Span<byte> destination, out int bytesWritten) + { + // value can still be negative if value == long.MinValue + // Therefore, cast to ulong, since (ulong)value actually equals abs(long.MinValue) + ulong overNineDigits = (ulong)value / Utf8Constants.Billion; + uint lastNineDigits = (uint)((ulong)value - (overNineDigits * Utf8Constants.Billion)); + uint overEighteenDigits = (uint)(overNineDigits / Utf8Constants.Billion); + uint middleNineDigits = (uint)(overNineDigits - (overEighteenDigits * Utf8Constants.Billion)); + + int digitCountOverEighteenDigits = FormattingHelpers.CountDigits(overEighteenDigits); + Debug.Assert(digitCountOverEighteenDigits == 1); + int digitCount = digitCountOverEighteenDigits + 18; + // WriteDigits does not do bounds checks + if (digitCount >= destination.Length) + { + bytesWritten = 0; + return false; + } + destination[0] = Utf8Constants.Minus; + bytesWritten = digitCount + 1; + FormattingHelpers.WriteDigits(overEighteenDigits, destination.Slice(1, digitCountOverEighteenDigits)); + FormattingHelpers.WriteDigits(middleNineDigits, destination.Slice(digitCountOverEighteenDigits + 1, 9)); + FormattingHelpers.WriteDigits(lastNineDigits, destination.Slice(digitCountOverEighteenDigits + 1 + 9, 9)); + return true; + } + } +} diff --git a/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Formatter/Utf8Formatter.Integer.Signed.N.cs b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Formatter/Utf8Formatter.Integer.Signed.N.cs new file mode 100644 index 0000000000..1c01b8d60d --- /dev/null +++ b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Formatter/Utf8Formatter.Integer.Signed.N.cs @@ -0,0 +1,27 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Runtime.CompilerServices; + +namespace System.Buffers.Text +{ + /// <summary> + /// Methods to format common data types as Utf8 strings. + /// </summary> + public static partial class Utf8Formatter + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static bool TryFormatInt64N(long value, byte precision, Span<byte> destination, out int bytesWritten) + { + bool insertNegationSign = false; + if (value < 0) + { + insertNegationSign = true; + value = -value; + } + + return TryFormatUInt64N((ulong)value, precision, destination, insertNegationSign, out bytesWritten); + } + } +} diff --git a/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Formatter/Utf8Formatter.Integer.Signed.cs b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Formatter/Utf8Formatter.Integer.Signed.cs new file mode 100644 index 0000000000..87966ca358 --- /dev/null +++ b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Formatter/Utf8Formatter.Integer.Signed.cs @@ -0,0 +1,52 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Runtime.CompilerServices; + +namespace System.Buffers.Text +{ + /// <summary> + /// Methods to format common data types as Utf8 strings. + /// </summary> + public static partial class Utf8Formatter + { + // + // Common worker for all signed integer TryFormat overloads + // + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static bool TryFormatInt64(long value, ulong mask, Span<byte> destination, out int bytesWritten, StandardFormat format) + { + if (format.IsDefault) + { + return TryFormatInt64Default(value, destination, out bytesWritten); + } + + switch (format.Symbol) + { + case 'G': + case 'g': + if (format.HasPrecision) + throw new NotSupportedException(SR.Argument_GWithPrecisionNotSupported); // With a precision, 'G' can produce exponential format, even for integers. + return TryFormatInt64D(value, format.Precision, destination, out bytesWritten); + + case 'd': + case 'D': + return TryFormatInt64D(value, format.Precision, destination, out bytesWritten); + + case 'n': + case 'N': + return TryFormatInt64N(value, format.Precision, destination, out bytesWritten); + + case 'x': + return TryFormatUInt64X((ulong)value & mask, format.Precision, true, destination, out bytesWritten); + + case 'X': + return TryFormatUInt64X((ulong)value & mask, format.Precision, false, destination, out bytesWritten); + + default: + return ThrowHelper.TryFormatThrowFormatException(out bytesWritten); + } + } + } +} diff --git a/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Formatter/Utf8Formatter.Integer.Unsigned.D.cs b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Formatter/Utf8Formatter.Integer.Unsigned.D.cs new file mode 100644 index 0000000000..9cb8d64bc0 --- /dev/null +++ b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Formatter/Utf8Formatter.Integer.Unsigned.D.cs @@ -0,0 +1,54 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +namespace System.Buffers.Text +{ + /// <summary> + /// Methods to format common data types as Utf8 strings. + /// </summary> + public static partial class Utf8Formatter + { + private static bool TryFormatUInt64D(ulong value, byte precision, Span<byte> destination, bool insertNegationSign, out int bytesWritten) + { + // Calculate the actual digit count and the number of padding zeroes requested. + // From all of this we can get the required buffer length. + + int digitCount = FormattingHelpers.CountDigits(value); + int leadingZeroCount = ((precision == StandardFormat.NoPrecision) ? 0 : (int)precision) - digitCount; + if (leadingZeroCount < 0) + { + leadingZeroCount = 0; + } + + int requiredBufferLength = digitCount + leadingZeroCount; + + if (insertNegationSign) + { + requiredBufferLength++; + } + + if (requiredBufferLength > destination.Length) + { + bytesWritten = 0; + return false; + } + + bytesWritten = requiredBufferLength; + + if (insertNegationSign) + { + destination[0] = Utf8Constants.Minus; + destination = destination.Slice(1); + } + + if (leadingZeroCount > 0) + { + FormattingHelpers.FillWithAsciiZeros(destination.Slice(0, leadingZeroCount)); + } + FormattingHelpers.WriteDigits(value, destination.Slice(leadingZeroCount, digitCount)); + + return true; + } + } +} diff --git a/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Formatter/Utf8Formatter.Integer.Unsigned.Default.cs b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Formatter/Utf8Formatter.Integer.Unsigned.Default.cs new file mode 100644 index 0000000000..d83591ed98 --- /dev/null +++ b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Formatter/Utf8Formatter.Integer.Unsigned.Default.cs @@ -0,0 +1,135 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Diagnostics; +using System.Runtime.CompilerServices; + +namespace System.Buffers.Text +{ + /// <summary> + /// Methods to format common data types as Utf8 strings. + /// </summary> + public static partial class Utf8Formatter + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static bool TryFormatUInt64Default(ulong value, Span<byte> destination, out int bytesWritten) + { + if (value < 10) + { + return TryFormatUInt32SingleDigit((uint)value, destination, out bytesWritten); + } + + if (IntPtr.Size == 8) // x64 + { + return TryFormatUInt64MultipleDigits(value, destination, out bytesWritten); + } + else // x86 + { + if (value <= uint.MaxValue) + { + return TryFormatUInt32MultipleDigits((uint)value, destination, out bytesWritten); + } + else + { + if (value <= Utf8Constants.BillionMaxUIntValue) + { + return TryFormatUInt64LessThanBillionMaxUInt(value, destination, out bytesWritten); + } + else + { + return TryFormatUInt64MoreThanBillionMaxUInt(value, destination, out bytesWritten); + } + } + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static bool TryFormatUInt32SingleDigit(uint value, Span<byte> destination, out int bytesWritten) + { + if (destination.Length == 0) + { + bytesWritten = 0; + return false; + } + destination[0] = (byte)('0' + value); + bytesWritten = 1; + return true; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static bool TryFormatUInt32MultipleDigits(uint value, Span<byte> destination, out int bytesWritten) + { + int digitCount = FormattingHelpers.CountDigits(value); + // WriteDigits does not do bounds checks + if (digitCount > destination.Length) + { + bytesWritten = 0; + return false; + } + bytesWritten = digitCount; + FormattingHelpers.WriteDigits(value, destination.Slice(0, digitCount)); + return true; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static bool TryFormatUInt64MultipleDigits(ulong value, Span<byte> destination, out int bytesWritten) + { + int digitCount = FormattingHelpers.CountDigits(value); + // WriteDigits does not do bounds checks + if (digitCount > destination.Length) + { + bytesWritten = 0; + return false; + } + bytesWritten = digitCount; + FormattingHelpers.WriteDigits(value, destination.Slice(0, digitCount)); + return true; + } + + // Split ulong into two parts that can each fit in a uint - {1-10 digits}{9 digits} + private static bool TryFormatUInt64LessThanBillionMaxUInt(ulong value, Span<byte> destination, out int bytesWritten) + { + uint overNineDigits = (uint)(value / Utf8Constants.Billion); + uint lastNineDigits = (uint)(value - (overNineDigits * Utf8Constants.Billion)); + + int digitCountOverNineDigits = FormattingHelpers.CountDigits(overNineDigits); + Debug.Assert(digitCountOverNineDigits >= 1 && digitCountOverNineDigits <= 10); + int digitCount = digitCountOverNineDigits + 9; + // WriteDigits does not do bounds checks + if (digitCount > destination.Length) + { + bytesWritten = 0; + return false; + } + bytesWritten = digitCount; + FormattingHelpers.WriteDigits(overNineDigits, destination.Slice(0, digitCountOverNineDigits)); + FormattingHelpers.WriteDigits(lastNineDigits, destination.Slice(digitCountOverNineDigits, 9)); + return true; + } + + // Split ulong into three parts that can each fit in a uint - {1-2 digits}{9 digits}{9 digits} + private static bool TryFormatUInt64MoreThanBillionMaxUInt(ulong value, Span<byte> destination, out int bytesWritten) + { + ulong overNineDigits = value / Utf8Constants.Billion; + uint lastNineDigits = (uint)(value - (overNineDigits * Utf8Constants.Billion)); + uint overEighteenDigits = (uint)(overNineDigits / Utf8Constants.Billion); + uint middleNineDigits = (uint)(overNineDigits - (overEighteenDigits * Utf8Constants.Billion)); + + int digitCountOverEighteenDigits = FormattingHelpers.CountDigits(overEighteenDigits); + Debug.Assert(digitCountOverEighteenDigits >= 1 && digitCountOverEighteenDigits <= 2); + int digitCount = digitCountOverEighteenDigits + 18; + // WriteDigits does not do bounds checks + if (digitCount > destination.Length) + { + bytesWritten = 0; + return false; + } + bytesWritten = digitCount; + FormattingHelpers.WriteDigits(overEighteenDigits, destination.Slice(0, digitCountOverEighteenDigits)); + FormattingHelpers.WriteDigits(middleNineDigits, destination.Slice(digitCountOverEighteenDigits, 9)); + FormattingHelpers.WriteDigits(lastNineDigits, destination.Slice(digitCountOverEighteenDigits + 9, 9)); + return true; + } + } +} diff --git a/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Formatter/Utf8Formatter.Integer.Unsigned.N.cs b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Formatter/Utf8Formatter.Integer.Unsigned.N.cs new file mode 100644 index 0000000000..ce21c0d3dd --- /dev/null +++ b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Formatter/Utf8Formatter.Integer.Unsigned.N.cs @@ -0,0 +1,58 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +namespace System.Buffers.Text +{ + /// <summary> + /// Methods to format common data types as Utf8 strings. + /// </summary> + public static partial class Utf8Formatter + { + private static bool TryFormatUInt64N(ulong value, byte precision, Span<byte> destination, bool insertNegationSign, out int bytesWritten) + { + // Calculate the actual digit count, number of group separators required, and the + // number of trailing zeros requested. From all of this we can get the required + // buffer length. + + int digitCount = FormattingHelpers.CountDigits(value); + int commaCount = (digitCount - 1) / 3; + int trailingZeroCount = (precision == StandardFormat.NoPrecision) ? 2 /* default for 'N' */ : precision; + + int requiredBufferLength = digitCount + commaCount; + if (trailingZeroCount > 0) + { + requiredBufferLength += trailingZeroCount + 1; + } + + if (insertNegationSign) + { + requiredBufferLength++; + } + + if (requiredBufferLength > destination.Length) + { + bytesWritten = 0; + return false; + } + + bytesWritten = requiredBufferLength; + + if (insertNegationSign) + { + destination[0] = Utf8Constants.Minus; + destination = destination.Slice(1); + } + + FormattingHelpers.WriteDigitsWithGroupSeparator(value, destination.Slice(0, digitCount + commaCount)); + + if (trailingZeroCount > 0) + { + destination[digitCount + commaCount] = Utf8Constants.Period; + FormattingHelpers.FillWithAsciiZeros(destination.Slice(digitCount + commaCount + 1, trailingZeroCount)); + } + + return true; + } + } +} diff --git a/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Formatter/Utf8Formatter.Integer.Unsigned.X.cs b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Formatter/Utf8Formatter.Integer.Unsigned.X.cs new file mode 100644 index 0000000000..4cf4d52b5c --- /dev/null +++ b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Formatter/Utf8Formatter.Integer.Unsigned.X.cs @@ -0,0 +1,46 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +namespace System.Buffers.Text +{ + /// <summary> + /// Methods to format common data types as Utf8 strings. + /// </summary> + public static partial class Utf8Formatter + { + private static bool TryFormatUInt64X(ulong value, byte precision, bool useLower, Span<byte> destination, out int bytesWritten) + { + int actualDigitCount = FormattingHelpers.CountHexDigits(value); + int computedOutputLength = (precision == StandardFormat.NoPrecision) + ? actualDigitCount + : Math.Max(precision, actualDigitCount); + + if (destination.Length < computedOutputLength) + { + bytesWritten = 0; + return false; + } + + bytesWritten = computedOutputLength; + string hexTable = (useLower) ? FormattingHelpers.HexTableLower : FormattingHelpers.HexTableUpper; + + // Writing the output backward in this manner allows the JIT to elide + // bounds checking on the output buffer. The JIT won't elide the bounds + // check on the hex table lookup, but we can live with that for now. + + // It doesn't quite make sense to use the fast hex conversion functionality + // for this method since that routine works on bytes, and here we're working + // directly with nibbles. There may be opportunity for improvement by special- + // casing output lengths of 2, 4, 8, and 16 and running them down optimized + // code paths. + + while ((uint)(--computedOutputLength) < (uint)destination.Length) + { + destination[computedOutputLength] = (byte)hexTable[(int)value & 0xf]; + value >>= 4; + } + return true; + } + } +} diff --git a/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Formatter/Utf8Formatter.Integer.Unsigned.cs b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Formatter/Utf8Formatter.Integer.Unsigned.cs new file mode 100644 index 0000000000..b143061a58 --- /dev/null +++ b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Formatter/Utf8Formatter.Integer.Unsigned.cs @@ -0,0 +1,52 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Runtime.CompilerServices; + +namespace System.Buffers.Text +{ + /// <summary> + /// Methods to format common data types as Utf8 strings. + /// </summary> + public static partial class Utf8Formatter + { + // + // Common worker for all unsigned integer TryFormat overloads + // + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static bool TryFormatUInt64(ulong value, Span<byte> destination, out int bytesWritten, StandardFormat format) + { + if (format.IsDefault) + { + return TryFormatUInt64Default(value, destination, out bytesWritten); + } + + switch (format.Symbol) + { + case 'G': + case 'g': + if (format.HasPrecision) + throw new NotSupportedException(SR.Argument_GWithPrecisionNotSupported); // With a precision, 'G' can produce exponential format, even for integers. + return TryFormatUInt64D(value, format.Precision, destination, insertNegationSign: false, out bytesWritten); + + case 'd': + case 'D': + return TryFormatUInt64D(value, format.Precision, destination, insertNegationSign: false, out bytesWritten); + + case 'n': + case 'N': + return TryFormatUInt64N(value, format.Precision, destination, insertNegationSign: false, out bytesWritten); + + case 'x': + return TryFormatUInt64X(value, format.Precision, true /* useLower */, destination, out bytesWritten); + + case 'X': + return TryFormatUInt64X(value, format.Precision, false /* useLower */, destination, out bytesWritten); + + default: + return ThrowHelper.TryFormatThrowFormatException(out bytesWritten); + } + } + } +} diff --git a/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Formatter/Utf8Formatter.Integer.cs b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Formatter/Utf8Formatter.Integer.cs new file mode 100644 index 0000000000..3b83fb7512 --- /dev/null +++ b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Formatter/Utf8Formatter.Integer.cs @@ -0,0 +1,208 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +namespace System.Buffers.Text +{ + /// <summary> + /// Methods to format common data types as Utf8 strings. + /// </summary> + public static partial class Utf8Formatter + { + /// <summary> + /// Formats a Byte as a UTF8 string. + /// </summary> + /// <param name="value">Value to format</param> + /// <param name="destination">Buffer to write the UTF8-formatted value to</param> + /// <param name="bytesWritten">Receives the length of the formatted text in bytes</param> + /// <param name="format">The standard format to use</param> + /// <returns> + /// true for success. "bytesWritten" contains the length of the formatted text in bytes. + /// false if buffer was too short. Iteratively increase the size of the buffer and retry until it succeeds. + /// </returns> + /// <remarks> + /// Formats supported: + /// G/g (default) + /// D/d 32767 + /// N/n 32,767 + /// X/x 7fff + /// </remarks> + /// <exceptions> + /// <cref>System.FormatException</cref> if the format is not valid for this data type. + /// </exceptions> + public static bool TryFormat(byte value, Span<byte> destination, out int bytesWritten, StandardFormat format = default) + => TryFormatUInt64(value, destination, out bytesWritten, format); + + /// <summary> + /// Formats an SByte as a UTF8 string. + /// </summary> + /// <param name="value">Value to format</param> + /// <param name="destination">Buffer to write the UTF8-formatted value to</param> + /// <param name="bytesWritten">Receives the length of the formatted text in bytes</param> + /// <param name="format">The standard format to use</param> + /// <returns> + /// true for success. "bytesWritten" contains the length of the formatted text in bytes. + /// false if buffer was too short. Iteratively increase the size of the buffer and retry until it succeeds. + /// </returns> + /// <remarks> + /// Formats supported: + /// G/g (default) + /// D/d 32767 + /// N/n 32,767 + /// X/x 7fff + /// </remarks> + /// <exceptions> + /// <cref>System.FormatException</cref> if the format is not valid for this data type. + /// </exceptions> + [CLSCompliant(false)] + public static bool TryFormat(sbyte value, Span<byte> destination, out int bytesWritten, StandardFormat format = default) + => TryFormatInt64(value, 0xff, destination, out bytesWritten, format); + + /// <summary> + /// Formats a Unt16 as a UTF8 string. + /// </summary> + /// <param name="value">Value to format</param> + /// <param name="destination">Buffer to write the UTF8-formatted value to</param> + /// <param name="bytesWritten">Receives the length of the formatted text in bytes</param> + /// <param name="format">The standard format to use</param> + /// <returns> + /// true for success. "bytesWritten" contains the length of the formatted text in bytes. + /// false if buffer was too short. Iteratively increase the size of the buffer and retry until it succeeds. + /// </returns> + /// <remarks> + /// Formats supported: + /// G/g (default) + /// D/d 32767 + /// N/n 32,767 + /// X/x 7fff + /// </remarks> + /// <exceptions> + /// <cref>System.FormatException</cref> if the format is not valid for this data type. + /// </exceptions> + [CLSCompliant(false)] + public static bool TryFormat(ushort value, Span<byte> destination, out int bytesWritten, StandardFormat format = default) + => TryFormatUInt64(value, destination, out bytesWritten, format); + + /// <summary> + /// Formats an Int16 as a UTF8 string. + /// </summary> + /// <param name="value">Value to format</param> + /// <param name="destination">Buffer to write the UTF8-formatted value to</param> + /// <param name="bytesWritten">Receives the length of the formatted text in bytes</param> + /// <param name="format">The standard format to use</param> + /// <returns> + /// true for success. "bytesWritten" contains the length of the formatted text in bytes. + /// false if buffer was too short. Iteratively increase the size of the buffer and retry until it succeeds. + /// </returns> + /// <remarks> + /// Formats supported: + /// G/g (default) + /// D/d 32767 + /// N/n 32,767 + /// X/x 7fff + /// </remarks> + /// <exceptions> + /// <cref>System.FormatException</cref> if the format is not valid for this data type. + /// </exceptions> + public static bool TryFormat(short value, Span<byte> destination, out int bytesWritten, StandardFormat format = default) + => TryFormatInt64(value, 0xffff, destination, out bytesWritten, format); + + /// <summary> + /// Formats a UInt32 as a UTF8 string. + /// </summary> + /// <param name="value">Value to format</param> + /// <param name="destination">Buffer to write the UTF8-formatted value to</param> + /// <param name="bytesWritten">Receives the length of the formatted text in bytes</param> + /// <param name="format">The standard format to use</param> + /// <returns> + /// true for success. "bytesWritten" contains the length of the formatted text in bytes. + /// false if buffer was too short. Iteratively increase the size of the buffer and retry until it succeeds. + /// </returns> + /// <remarks> + /// Formats supported: + /// G/g (default) + /// D/d 32767 + /// N/n 32,767 + /// X/x 7fff + /// </remarks> + /// <exceptions> + /// <cref>System.FormatException</cref> if the format is not valid for this data type. + /// </exceptions> + [CLSCompliant(false)] + public static bool TryFormat(uint value, Span<byte> destination, out int bytesWritten, StandardFormat format = default) + => TryFormatUInt64(value, destination, out bytesWritten, format); + + /// <summary> + /// Formats an Int32 as a UTF8 string. + /// </summary> + /// <param name="value">Value to format</param> + /// <param name="destination">Buffer to write the UTF8-formatted value to</param> + /// <param name="bytesWritten">Receives the length of the formatted text in bytes</param> + /// <param name="format">The standard format to use</param> + /// <returns> + /// true for success. "bytesWritten" contains the length of the formatted text in bytes. + /// false if buffer was too short. Iteratively increase the size of the buffer and retry until it succeeds. + /// </returns> + /// <remarks> + /// Formats supported: + /// G/g (default) + /// D/d 32767 + /// N/n 32,767 + /// X/x 7fff + /// </remarks> + /// <exceptions> + /// <cref>System.FormatException</cref> if the format is not valid for this data type. + /// </exceptions> + public static bool TryFormat(int value, Span<byte> destination, out int bytesWritten, StandardFormat format = default) + => TryFormatInt64(value, 0xffffffff, destination, out bytesWritten, format); + + /// <summary> + /// Formats a UInt64 as a UTF8 string. + /// </summary> + /// <param name="value">Value to format</param> + /// <param name="destination">Buffer to write the UTF8-formatted value to</param> + /// <param name="bytesWritten">Receives the length of the formatted text in bytes</param> + /// <param name="format">The standard format to use</param> + /// <returns> + /// true for success. "bytesWritten" contains the length of the formatted text in bytes. + /// false if buffer was too short. Iteratively increase the size of the buffer and retry until it succeeds. + /// </returns> + /// <remarks> + /// Formats supported: + /// G/g (default) + /// D/d 32767 + /// N/n 32,767 + /// X/x 7fff + /// </remarks> + /// <exceptions> + /// <cref>System.FormatException</cref> if the format is not valid for this data type. + /// </exceptions> + [CLSCompliant(false)] + public static bool TryFormat(ulong value, Span<byte> destination, out int bytesWritten, StandardFormat format = default) + => TryFormatUInt64(value, destination, out bytesWritten, format); + + /// <summary> + /// Formats an Int64 as a UTF8 string. + /// </summary> + /// <param name="value">Value to format</param> + /// <param name="destination">Buffer to write the UTF8-formatted value to</param> + /// <param name="bytesWritten">Receives the length of the formatted text in bytes</param> + /// <param name="format">The standard format to use</param> + /// <returns> + /// true for success. "bytesWritten" contains the length of the formatted text in bytes. + /// false if buffer was too short. Iteratively increase the size of the buffer and retry until it succeeds. + /// </returns> + /// <remarks> + /// Formats supported: + /// G/g (default) + /// D/d 32767 + /// N/n 32,767 + /// X/x 7fff + /// </remarks> + /// <exceptions> + /// <cref>System.FormatException</cref> if the format is not valid for this data type. + /// </exceptions> + public static bool TryFormat(long value, Span<byte> destination, out int bytesWritten, StandardFormat format = default) + => TryFormatInt64(value, 0xffffffffffffffff, destination, out bytesWritten, format); + } +} diff --git a/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Formatter/Utf8Formatter.TimeSpan.cs b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Formatter/Utf8Formatter.TimeSpan.cs new file mode 100644 index 0000000000..1e29383bf7 --- /dev/null +++ b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Formatter/Utf8Formatter.TimeSpan.cs @@ -0,0 +1,222 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Diagnostics; + +namespace System.Buffers.Text +{ + public static partial class Utf8Formatter + { + /// <summary> + /// Formats a TimeSpan as a UTF8 string. + /// </summary> + /// <param name="value">Value to format</param> + /// <param name="destination">Buffer to write the UTF8-formatted value to</param> + /// <param name="bytesWritten">Receives the length of the formatted text in bytes</param> + /// <param name="format">The standard format to use</param> + /// <returns> + /// true for success. "bytesWritten" contains the length of the formatted text in bytes. + /// false if buffer was too short. Iteratively increase the size of the buffer and retry until it succeeds. + /// </returns> + /// <remarks> + /// Formats supported: + /// c/t/T (default) [-][d.]hh:mm:ss[.fffffff] (constant format) + /// G [-]d:hh:mm:ss.fffffff (general long) + /// g [-][d:][h]h:mm:ss[.f[f[f[f[f[f[f]]]]]] (general short) + /// </remarks> + /// <exceptions> + /// <cref>System.FormatException</cref> if the format is not valid for this data type. + /// </exceptions> + public static bool TryFormat(TimeSpan value, Span<byte> destination, out int bytesWritten, StandardFormat format = default) + { + char symbol = FormattingHelpers.GetSymbolOrDefault(format, 'c'); + + switch (symbol) + { + case 'c': + case 'G': + case 'g': + break; + + case 't': + case 'T': + symbol = 'c'; + break; + + default: + return ThrowHelper.TryFormatThrowFormatException(out bytesWritten); + } + + // First, calculate how large an output buffer is needed to hold the entire output. + + int requiredOutputLength = 8; // start with "hh:mm:ss" and adjust as necessary + + uint fraction; + ulong totalSecondsRemaining; + { + // Turn this into a non-negative TimeSpan if possible. + var ticks = value.Ticks; + if (ticks < 0) + { + ticks = -ticks; + if (ticks < 0) + { + Debug.Assert(ticks == long.MinValue /* -9223372036854775808 */); + + // We computed these ahead of time; they're straight from the decimal representation of Int64.MinValue. + fraction = 4775808; + totalSecondsRemaining = 922337203685; + goto AfterComputeFraction; + } + } + + totalSecondsRemaining = FormattingHelpers.DivMod((ulong)Math.Abs(value.Ticks), TimeSpan.TicksPerSecond, out ulong fraction64); + fraction = (uint)fraction64; + } + + AfterComputeFraction: + + int fractionDigits = 0; + if (symbol == 'c') + { + // Only write out the fraction if it's non-zero, and in that + // case write out the entire fraction (all digits). + if (fraction != 0) + { + fractionDigits = Utf8Constants.DateTimeNumFractionDigits; + } + } + else if (symbol == 'G') + { + // Always write out the fraction, even if it's zero. + fractionDigits = Utf8Constants.DateTimeNumFractionDigits; + } + else + { + // Only write out the fraction if it's non-zero, and in that + // case write out only the most significant digits. + if (fraction != 0) + { + fractionDigits = Utf8Constants.DateTimeNumFractionDigits - FormattingHelpers.CountDecimalTrailingZeros(fraction, out fraction); + } + } + + Debug.Assert(fraction < 10_000_000); + + // If we're going to write out a fraction, also need to write the leading decimal. + if (fractionDigits != 0) + { + requiredOutputLength += fractionDigits + 1; + } + + ulong totalMinutesRemaining = 0; + ulong seconds = 0; + if (totalSecondsRemaining > 0) + { + // Only compute minutes if the TimeSpan has an absolute value of >= 1 minute. + totalMinutesRemaining = FormattingHelpers.DivMod(totalSecondsRemaining, 60 /* seconds per minute */, out seconds); + } + + Debug.Assert(seconds < 60); + + ulong totalHoursRemaining = 0; + ulong minutes = 0; + if (totalMinutesRemaining > 0) + { + // Only compute hours if the TimeSpan has an absolute value of >= 1 hour. + totalHoursRemaining = FormattingHelpers.DivMod(totalMinutesRemaining, 60 /* minutes per hour */, out minutes); + } + + Debug.Assert(minutes < 60); + + // At this point, we can switch over to 32-bit divmod since the data has shrunk far enough. + Debug.Assert(totalHoursRemaining <= uint.MaxValue); + + uint days = 0; + uint hours = 0; + if (totalHoursRemaining > 0) + { + // Only compute days if the TimeSpan has an absolute value of >= 1 day. + days = FormattingHelpers.DivMod((uint)totalHoursRemaining, 24 /* hours per day */, out hours); + } + + Debug.Assert(hours < 24); + + int hourDigits = 2; + if (hours < 10 && symbol == 'g') + { + // Only writing a one-digit hour, not a two-digit hour + hourDigits--; + requiredOutputLength--; + } + + int dayDigits = 0; + if (days == 0) + { + if (symbol == 'G') + { + requiredOutputLength += 2; // for the leading "0:" + dayDigits = 1; + } + } + else + { + dayDigits = FormattingHelpers.CountDigits(days); + requiredOutputLength += dayDigits + 1; // for the leading "d:" (or "d.") + } + + if (value.Ticks < 0) + { + requiredOutputLength++; // for the leading '-' sign + } + + if (destination.Length < requiredOutputLength) + { + bytesWritten = 0; + return false; + } + + bytesWritten = requiredOutputLength; + + int idx = 0; + + // Write leading '-' if necessary + if (value.Ticks < 0) + { + destination[idx++] = Utf8Constants.Minus; + } + + // Write day (and separator) if necessary + if (dayDigits > 0) + { + FormattingHelpers.WriteDigits(days, destination.Slice(idx, dayDigits)); + idx += dayDigits; + destination[idx++] = (symbol == 'c') ? Utf8Constants.Period : Utf8Constants.Colon; + } + + // Write "[h]h:mm:ss" + FormattingHelpers.WriteDigits(hours, destination.Slice(idx, hourDigits)); + idx += hourDigits; + destination[idx++] = Utf8Constants.Colon; + FormattingHelpers.WriteDigits((uint)minutes, destination.Slice(idx, 2)); + idx += 2; + destination[idx++] = Utf8Constants.Colon; + FormattingHelpers.WriteDigits((uint)seconds, destination.Slice(idx, 2)); + idx += 2; + + // Write fraction (and separator) if necessary + if (fractionDigits > 0) + { + destination[idx++] = Utf8Constants.Period; + FormattingHelpers.WriteDigits(fraction, destination.Slice(idx, fractionDigits)); + idx += fractionDigits; + } + + // And we're done! + + Debug.Assert(idx == requiredOutputLength); + return true; + } + } +} diff --git a/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Parser/ParserHelpers.cs b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Parser/ParserHelpers.cs new file mode 100644 index 0000000000..b527433a7d --- /dev/null +++ b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Parser/ParserHelpers.cs @@ -0,0 +1,55 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Runtime.CompilerServices; + +namespace System.Buffers.Text +{ + internal static class ParserHelpers + { + public const int ByteOverflowLength = 3; + public const int ByteOverflowLengthHex = 2; + public const int UInt16OverflowLength = 5; + public const int UInt16OverflowLengthHex = 4; + public const int UInt32OverflowLength = 10; + public const int UInt32OverflowLengthHex = 8; + public const int UInt64OverflowLength = 20; + public const int UInt64OverflowLengthHex = 16; + + public const int SByteOverflowLength = 3; + public const int SByteOverflowLengthHex = 2; + public const int Int16OverflowLength = 5; + public const int Int16OverflowLengthHex = 4; + public const int Int32OverflowLength = 10; + public const int Int32OverflowLengthHex = 8; + public const int Int64OverflowLength = 19; + public const int Int64OverflowLengthHex = 16; + + public static readonly byte[] s_hexLookup = + { + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 15 + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 31 + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 47 + 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 63 + 0xFF, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 79 + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 95 + 0xFF, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 111 + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 127 + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 143 + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 159 + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 175 + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 191 + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 207 + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 223 + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 239 + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF // 255 + }; + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static bool IsDigit(int i) + { + return (uint)(i - '0') <= ('9' - '0'); + } + } +} diff --git a/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Parser/Utf8Parser.Boolean.cs b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Parser/Utf8Parser.Boolean.cs new file mode 100644 index 0000000000..41c57143a8 --- /dev/null +++ b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Parser/Utf8Parser.Boolean.cs @@ -0,0 +1,61 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Buffers.Binary; + +namespace System.Buffers.Text +{ + public static partial class Utf8Parser + { + /// <summary> + /// Parses a Boolean at the start of a Utf8 string. + /// </summary> + /// <param name="source">The Utf8 string to parse</param> + /// <param name="value">Receives the parsed value</param> + /// <param name="bytesConsumed">On a successful parse, receives the length in bytes of the substring that was parsed </param> + /// <param name="standardFormat">Expected format of the Utf8 string</param> + /// <returns> + /// true for success. "bytesConsumed" contains the length in bytes of the substring that was parsed. + /// false if the string was not syntactically valid or an overflow or underflow occurred. "bytesConsumed" is set to 0. + /// </returns> + /// <remarks> + /// Formats supported: + /// G (default) True/False + /// l true/false + /// </remarks> + /// <exceptions> + /// <cref>System.FormatException</cref> if the format is not valid for this data type. + /// </exceptions> + public static bool TryParse(ReadOnlySpan<byte> source, out bool value, out int bytesConsumed, char standardFormat = default) + { + if (!(standardFormat == default(char) || standardFormat == 'G' || standardFormat == 'l')) + return ThrowHelper.TryParseThrowFormatException(out value, out bytesConsumed); + + if (source.Length >= 4) + { + int dw = BinaryPrimitives.ReadInt32LittleEndian(source) & ~0x20202020; + if (dw == 0x45555254 /* 'EURT' */) + { + bytesConsumed = 4; + value = true; + return true; + } + + if (source.Length >= 5) + { + if (dw == 0x534c4146 /* 'SLAF' */ && (source[4] & ~0x20) == 'E') + { + bytesConsumed = 5; + value = false; + return true; + } + } + } + + bytesConsumed = 0; + value = default; + return false; + } + } +} diff --git a/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Parser/Utf8Parser.Date.Default.cs b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Parser/Utf8Parser.Date.Default.cs new file mode 100644 index 0000000000..73578ea88c --- /dev/null +++ b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Parser/Utf8Parser.Date.Default.cs @@ -0,0 +1,102 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +namespace System.Buffers.Text +{ + public static partial class Utf8Parser + { + // + // ToString() format for DateTimeOffset. Does not have a corresponding format symbol but it + // is the "G" format postpended with the UTC offset. + // + // 01234567890123456789012345 + // -------------------------- + // 05/25/2017 10:30:15 -08:00 + // + private static bool TryParseDateTimeOffsetDefault(ReadOnlySpan<byte> source, out DateTimeOffset value, out int bytesConsumed) + { + if (source.Length < 26) + { + bytesConsumed = 0; + value = default; + return false; + } + + if (!TryParseDateTimeG(source, out DateTime dateTime, out _, out _)) + { + bytesConsumed = 0; + value = default; + return false; + } + + if (source[19] != Utf8Constants.Space) + { + bytesConsumed = 0; + value = default; + return false; + } + + byte sign = source[20]; + if (sign != Utf8Constants.Plus && sign != Utf8Constants.Minus) + { + bytesConsumed = 0; + value = default; + return false; + } + + int offsetHours; + { + uint digit1 = source[21] - 48u; // '0' + uint digit2 = source[22] - 48u; // '0' + + if (digit1 > 9 || digit2 > 9) + { + bytesConsumed = 0; + value = default; + return false; + } + + offsetHours = (int)(digit1 * 10 + digit2); + } + + if (source[23] != Utf8Constants.Colon) + { + bytesConsumed = 0; + value = default; + return false; + } + + int offsetMinutes; + { + uint digit1 = source[24] - 48u; // '0' + uint digit2 = source[25] - 48u; // '0' + + if (digit1 > 9 || digit2 > 9) + { + bytesConsumed = 0; + value = default; + return false; + } + + offsetMinutes = (int)(digit1 * 10 + digit2); + } + + TimeSpan offset = new TimeSpan(hours: offsetHours, minutes: offsetMinutes, seconds: 0); + if (sign == Utf8Constants.Minus) + { + offset = -offset; + } + + if (!TryCreateDateTimeOffset(dateTime: dateTime, offsetNegative: sign == Utf8Constants.Minus, offsetHours: offsetHours, offsetMinutes: offsetMinutes, out value)) + { + bytesConsumed = 0; + value = default; + return false; + } + + bytesConsumed = 26; + return true; + } + } +} diff --git a/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Parser/Utf8Parser.Date.G.cs b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Parser/Utf8Parser.Date.G.cs new file mode 100644 index 0000000000..6e8edbcbdf --- /dev/null +++ b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Parser/Utf8Parser.Date.G.cs @@ -0,0 +1,177 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +namespace System.Buffers.Text +{ + public static partial class Utf8Parser + { + // + // 'G' format for DateTime. + // + // 0123456789012345678 + // --------------------------------- + // 05/25/2017 10:30:15 + // + private static bool TryParseDateTimeG(ReadOnlySpan<byte> source, out DateTime value, out DateTimeOffset valueAsOffset, out int bytesConsumed) + { + if (source.Length < 19) + { + bytesConsumed = 0; + value = default; + valueAsOffset = default; + return false; + } + + int month; + { + uint digit1 = source[0] - 48u; // '0' + uint digit2 = source[1] - 48u; // '0' + + if (digit1 > 9 || digit2 > 9) + { + bytesConsumed = 0; + value = default; + valueAsOffset = default; + return false; + } + + month = (int)(digit1 * 10 + digit2); + } + + if (source[2] != Utf8Constants.Slash) + { + bytesConsumed = 0; + value = default; + valueAsOffset = default; + return false; + } + + int day; + { + uint digit1 = source[3] - 48u; // '0' + uint digit2 = source[4] - 48u; // '0' + + if (digit1 > 9 || digit2 > 9) + { + bytesConsumed = 0; + value = default; + valueAsOffset = default; + return false; + } + + day = (int)(digit1 * 10 + digit2); + } + + if (source[5] != Utf8Constants.Slash) + { + bytesConsumed = 0; + value = default; + valueAsOffset = default; + return false; + } + + int year; + { + uint digit1 = source[6] - 48u; // '0' + uint digit2 = source[7] - 48u; // '0' + uint digit3 = source[8] - 48u; // '0' + uint digit4 = source[9] - 48u; // '0' + + if (digit1 > 9 || digit2 > 9 || digit3 > 9 || digit4 > 9) + { + bytesConsumed = 0; + value = default; + valueAsOffset = default; + return false; + } + + year = (int)(digit1 * 1000 + digit2 * 100 + digit3 * 10 + digit4); + } + + if (source[10] != Utf8Constants.Space) + { + bytesConsumed = 0; + value = default; + valueAsOffset = default; + return false; + } + + int hour; + { + uint digit1 = source[11] - 48u; // '0' + uint digit2 = source[12] - 48u; // '0' + + if (digit1 > 9 || digit2 > 9) + { + bytesConsumed = 0; + value = default; + valueAsOffset = default; + return false; + } + + hour = (int)(digit1 * 10 + digit2); + } + + if (source[13] != Utf8Constants.Colon) + { + bytesConsumed = 0; + value = default; + valueAsOffset = default; + return false; + } + + int minute; + { + uint digit1 = source[14] - 48u; // '0' + uint digit2 = source[15] - 48u; // '0' + + if (digit1 > 9 || digit2 > 9) + { + bytesConsumed = 0; + value = default; + valueAsOffset = default; + return false; + } + + minute = (int)(digit1 * 10 + digit2); + } + + if (source[16] != Utf8Constants.Colon) + { + bytesConsumed = 0; + value = default; + valueAsOffset = default; + return false; + } + + int second; + { + uint digit1 = source[17] - 48u; // '0' + uint digit2 = source[18] - 48u; // '0' + + if (digit1 > 9 || digit2 > 9) + { + bytesConsumed = 0; + value = default; + valueAsOffset = default; + return false; + } + + second = (int)(digit1 * 10 + digit2); + } + + if (!TryCreateDateTimeOffsetInterpretingDataAsLocalTime(year: year, month: month, day: day, hour: hour, minute: minute, second: second, fraction: 0, out valueAsOffset)) + { + bytesConsumed = 0; + value = default; + valueAsOffset = default; + return false; + } + + bytesConsumed = 19; + value = valueAsOffset.DateTime; + return true; + } + } +} diff --git a/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Parser/Utf8Parser.Date.Helpers.cs b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Parser/Utf8Parser.Date.Helpers.cs new file mode 100644 index 0000000000..d2fb06829a --- /dev/null +++ b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Parser/Utf8Parser.Date.Helpers.cs @@ -0,0 +1,162 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Diagnostics; + +namespace System.Buffers.Text +{ + public static partial class Utf8Parser + { + /// <summary> + /// Overflow-safe DateTimeOffset factory. + /// </summary> + private static bool TryCreateDateTimeOffset(DateTime dateTime, bool offsetNegative, int offsetHours, int offsetMinutes, out DateTimeOffset value) + { + if (((uint)offsetHours) > Utf8Constants.DateTimeMaxUtcOffsetHours) + { + value = default; + return false; + } + + if (((uint)offsetMinutes) > 59) + { + value = default; + return false; + } + + if (offsetHours == Utf8Constants.DateTimeMaxUtcOffsetHours && offsetMinutes != 0) + { + value = default; + return false; + } + + long offsetTicks = (((long)offsetHours) * 3600 + ((long)offsetMinutes) * 60) * TimeSpan.TicksPerSecond; + if (offsetNegative) + { + offsetTicks = -offsetTicks; + } + + try + { + value = new DateTimeOffset(ticks: dateTime.Ticks, offset: new TimeSpan(ticks: offsetTicks)); + } + catch (ArgumentOutOfRangeException) + { + // If we got here, the combination of the DateTime + UTC offset strayed outside the 1..9999 year range. This case seems rare enough + // that it's better to catch the exception rather than replicate DateTime's range checking (which it's going to do anyway.) + value = default; + return false; + } + + return true; + } + + /// <summary> + /// Overflow-safe DateTimeOffset factory. + /// </summary> + private static bool TryCreateDateTimeOffset(int year, int month, int day, int hour, int minute, int second, int fraction, bool offsetNegative, int offsetHours, int offsetMinutes, out DateTimeOffset value) + { + if (!TryCreateDateTime(year: year, month: month, day: day, hour: hour, minute: minute, second: second, fraction: fraction, kind: DateTimeKind.Unspecified, out DateTime dateTime)) + { + value = default; + return false; + } + + if (!TryCreateDateTimeOffset(dateTime: dateTime, offsetNegative: offsetNegative, offsetHours: offsetHours, offsetMinutes: offsetMinutes, out value)) + { + value = default; + return false; + } + + return true; + } + + /// <summary> + /// Overflow-safe DateTimeOffset/Local time conversion factory. + /// </summary> + private static bool TryCreateDateTimeOffsetInterpretingDataAsLocalTime(int year, int month, int day, int hour, int minute, int second, int fraction, out DateTimeOffset value) + { + if (!TryCreateDateTime(year: year, month: month, day: day, hour: hour, minute: minute, second: second, fraction: fraction, DateTimeKind.Local, out DateTime dateTime)) + { + value = default; + return false; + } + + try + { + value = new DateTimeOffset(dateTime); + } + catch (ArgumentOutOfRangeException) + { + // If we got here, the combination of the DateTime + UTC offset strayed outside the 1..9999 year range. This case seems rare enough + // that it's better to catch the exception rather than replicate DateTime's range checking (which it's going to do anyway.) + + value = default; + return false; + } + + return true; + } + + /// <summary> + /// Overflow-safe DateTime factory. + /// </summary> + private static bool TryCreateDateTime(int year, int month, int day, int hour, int minute, int second, int fraction, DateTimeKind kind, out DateTime value) + { + if (year == 0) + { + value = default; + return false; + } + + Debug.Assert(year <= 9999); // All of our callers to date parse the year from fixed 4-digit fields so this value is trusted. + + if ((((uint)month) - 1) >= 12) + { + value = default; + return false; + } + + uint dayMinusOne = ((uint)day) - 1; + if (dayMinusOne >= 28 && dayMinusOne >= DateTime.DaysInMonth(year, month)) + { + value = default; + return false; + } + + if (((uint)hour) > 23) + { + value = default; + return false; + } + + if (((uint)minute) > 59) + { + value = default; + return false; + } + + if (((uint)second) > 59) + { + value = default; + return false; + } + + Debug.Assert(fraction >= 0 && fraction <= Utf8Constants.MaxDateTimeFraction); // All of our callers to date parse the fraction from fixed 7-digit fields so this value is trusted. + + int[] days = DateTime.IsLeapYear(year) ? s_daysToMonth366 : s_daysToMonth365; + int yearMinusOne = year - 1; + int totalDays = (yearMinusOne * 365) + (yearMinusOne / 4) - (yearMinusOne / 100) + (yearMinusOne / 400) + days[month - 1] + day - 1; + long ticks = totalDays * TimeSpan.TicksPerDay; + int totalSeconds = (hour * 3600) + (minute * 60) + second; + ticks += totalSeconds * TimeSpan.TicksPerSecond; + ticks += fraction; + value = new DateTime(ticks: ticks, kind: kind); + return true; + } + + private static readonly int[] s_daysToMonth365 = { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365 }; + private static readonly int[] s_daysToMonth366 = { 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366 }; + } +} diff --git a/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Parser/Utf8Parser.Date.O.cs b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Parser/Utf8Parser.Date.O.cs new file mode 100644 index 0000000000..8d2c681f68 --- /dev/null +++ b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Parser/Utf8Parser.Date.O.cs @@ -0,0 +1,290 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Diagnostics; + +namespace System.Buffers.Text +{ + public static partial class Utf8Parser + { + // + // Roundtrippable format. One of + // + // 012345678901234567890123456789012 + // --------------------------------- + // 2017-06-12T05:30:45.7680000-07:00 + // 2017-06-12T05:30:45.7680000Z (Z is short for "+00:00" but also distinguishes DateTimeKind.Utc from DateTimeKind.Local) + // 2017-06-12T05:30:45.7680000 (interpreted as local time wrt to current time zone) + // + private static bool TryParseDateTimeOffsetO(ReadOnlySpan<byte> source, out DateTimeOffset value, out int bytesConsumed, out DateTimeKind kind) + { + if (source.Length < 27) + { + value = default; + bytesConsumed = 0; + kind = default; + return false; + } + + int year; + { + uint digit1 = source[0] - 48u; // '0' + uint digit2 = source[1] - 48u; // '0' + uint digit3 = source[2] - 48u; // '0' + uint digit4 = source[3] - 48u; // '0' + + if (digit1 > 9 || digit2 > 9 || digit3 > 9 || digit4 > 9) + { + value = default; + bytesConsumed = 0; + kind = default; + return false; + } + + year = (int)(digit1 * 1000 + digit2 * 100 + digit3 * 10 + digit4); + } + + if (source[4] != Utf8Constants.Hyphen) + { + value = default; + bytesConsumed = 0; + kind = default; + return false; + } + + int month; + { + uint digit1 = source[5] - 48u; // '0' + uint digit2 = source[6] - 48u; // '0' + + if (digit1 > 9 || digit2 > 9) + { + value = default; + bytesConsumed = 0; + kind = default; + return false; + } + + month = (int)(digit1 * 10 + digit2); + } + + if (source[7] != Utf8Constants.Hyphen) + { + value = default; + bytesConsumed = 0; + kind = default; + return false; + } + + int day; + { + uint digit1 = source[8] - 48u; // '0' + uint digit2 = source[9] - 48u; // '0' + + if (digit1 > 9 || digit2 > 9) + { + value = default; + bytesConsumed = 0; + kind = default; + return false; + } + + day = (int)(digit1 * 10 + digit2); + } + + if (source[10] != 'T') + { + value = default; + bytesConsumed = 0; + kind = default; + return false; + } + + int hour; + { + uint digit1 = source[11] - 48u; // '0' + uint digit2 = source[12] - 48u; // '0' + + if (digit1 > 9 || digit2 > 9) + { + value = default; + bytesConsumed = 0; + kind = default; + return false; + } + + hour = (int)(digit1 * 10 + digit2); + } + + if (source[13] != Utf8Constants.Colon) + { + value = default; + bytesConsumed = 0; + kind = default; + return false; + } + + int minute; + { + uint digit1 = source[14] - 48u; // '0' + uint digit2 = source[15] - 48u; // '0' + + if (digit1 > 9 || digit2 > 9) + { + value = default; + bytesConsumed = 0; + kind = default; + return false; + } + + minute = (int)(digit1 * 10 + digit2); + } + + if (source[16] != Utf8Constants.Colon) + { + value = default; + bytesConsumed = 0; + kind = default; + return false; + } + + int second; + { + uint digit1 = source[17] - 48u; // '0' + uint digit2 = source[18] - 48u; // '0' + + if (digit1 > 9 || digit2 > 9) + { + value = default; + bytesConsumed = 0; + kind = default; + return false; + } + + second = (int)(digit1 * 10 + digit2); + } + + if (source[19] != Utf8Constants.Period) + { + value = default; + bytesConsumed = 0; + kind = default; + return false; + } + + int fraction; + { + uint digit1 = source[20] - 48u; // '0' + uint digit2 = source[21] - 48u; // '0' + uint digit3 = source[22] - 48u; // '0' + uint digit4 = source[23] - 48u; // '0' + uint digit5 = source[24] - 48u; // '0' + uint digit6 = source[25] - 48u; // '0' + uint digit7 = source[26] - 48u; // '0' + + if (digit1 > 9 || digit2 > 9 || digit3 > 9 || digit4 > 9 || digit5 > 9 || digit6 > 9 || digit7 > 9) + { + value = default; + bytesConsumed = 0; + kind = default; + return false; + } + + fraction = (int)(digit1 * 1000000 + digit2 * 100000 + digit3 * 10000 + digit4 * 1000 + digit5 * 100 + digit6 * 10 + digit7); + } + + byte offsetChar = (source.Length <= 27) ? default : source[27]; + if (offsetChar != 'Z' && offsetChar != Utf8Constants.Plus && offsetChar != Utf8Constants.Minus) + { + if (!TryCreateDateTimeOffsetInterpretingDataAsLocalTime(year: year, month: month, day: day, hour: hour, minute: minute, second: second, fraction: fraction, out value)) + { + value = default; + bytesConsumed = 0; + kind = default; + return false; + } + bytesConsumed = 27; + kind = DateTimeKind.Unspecified; + return true; + } + + if (offsetChar == 'Z') + { + // Same as specifying an offset of "+00:00", except that DateTime's Kind gets set to UTC rather than Local + if (!TryCreateDateTimeOffset(year: year, month: month, day: day, hour: hour, minute: minute, second: second, fraction: fraction, offsetNegative: false, offsetHours: 0, offsetMinutes: 0, out value)) + { + value = default; + bytesConsumed = 0; + kind = default; + return false; + } + + bytesConsumed = 28; + kind = DateTimeKind.Utc; + return true; + } + + Debug.Assert(offsetChar == Utf8Constants.Plus || offsetChar == Utf8Constants.Minus); + if (source.Length < 33) + { + value = default; + bytesConsumed = 0; + kind = default; + return false; + } + + int offsetHours; + { + uint digit1 = source[28] - 48u; // '0' + uint digit2 = source[29] - 48u; // '0' + + if (digit1 > 9 || digit2 > 9) + { + value = default; + bytesConsumed = 0; + kind = default; + return false; + } + + offsetHours = (int)(digit1 * 10 + digit2); + } + + if (source[30] != Utf8Constants.Colon) + { + value = default; + bytesConsumed = 0; + kind = default; + return false; + } + + int offsetMinutes; + { + uint digit1 = source[31] - 48u; // '0' + uint digit2 = source[32] - 48u; // '0' + + if (digit1 > 9 || digit2 > 9) + { + value = default; + bytesConsumed = 0; + kind = default; + return false; + } + + offsetMinutes = (int)(digit1 * 10 + digit2); + } + + if (!TryCreateDateTimeOffset(year: year, month: month, day: day, hour: hour, minute: minute, second: second, fraction: fraction, offsetNegative: offsetChar == Utf8Constants.Minus, offsetHours: offsetHours, offsetMinutes: offsetMinutes, out value)) + { + value = default; + bytesConsumed = 0; + kind = default; + return false; + } + + bytesConsumed = 33; + kind = DateTimeKind.Local; + return true; + } + } +} diff --git a/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Parser/Utf8Parser.Date.R.cs b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Parser/Utf8Parser.Date.R.cs new file mode 100644 index 0000000000..316bee01b4 --- /dev/null +++ b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Parser/Utf8Parser.Date.R.cs @@ -0,0 +1,221 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +namespace System.Buffers.Text +{ + public static partial class Utf8Parser + { + // + // Parse an RFC1123 date string. + // + // 01234567890123456789012345678 + // ----------------------------- + // Tue, 03 Jan 2017 08:08:05 GMT + // + private static bool TryParseDateTimeOffsetR(ReadOnlySpan<byte> source, uint caseFlipXorMask, out DateTimeOffset dateTimeOffset, out int bytesConsumed) + { + if (source.Length < 29) + { + bytesConsumed = 0; + dateTimeOffset = default; + return false; + } + + DayOfWeek dayOfWeek; + { + uint dow0 = source[0] ^ caseFlipXorMask; + uint dow1 = source[1]; + uint dow2 = source[2]; + uint comma = source[3]; + uint dowString = (dow0 << 24) | (dow1 << 16) | (dow2 << 8) | comma; + switch (dowString) + { + case 0x53756E2c /* 'Sun,' */: dayOfWeek = DayOfWeek.Sunday; break; + case 0x4d6f6e2c /* 'Mon,' */: dayOfWeek = DayOfWeek.Monday; break; + case 0x5475652c /* 'Tue,' */: dayOfWeek = DayOfWeek.Tuesday; break; + case 0x5765642c /* 'Wed,' */: dayOfWeek = DayOfWeek.Wednesday; break; + case 0x5468752c /* 'Thu,' */: dayOfWeek = DayOfWeek.Thursday; break; + case 0x4672692c /* 'Fri,' */: dayOfWeek = DayOfWeek.Friday; break; + case 0x5361742c /* 'Sat,' */: dayOfWeek = DayOfWeek.Saturday; break; + default: + bytesConsumed = 0; + dateTimeOffset = default; + return false; + } + } + + if (source[4] != Utf8Constants.Space) + { + bytesConsumed = 0; + dateTimeOffset = default; + return false; + } + + int day; + { + uint digit1 = source[5] - 48u; // '0' + uint digit2 = source[6] - 48u; // '0' + + if (digit1 > 9 || digit2 > 9) + { + bytesConsumed = 0; + dateTimeOffset = default; + return false; + } + + day = (int)(digit1 * 10 + digit2); + } + + if (source[7] != Utf8Constants.Space) + { + bytesConsumed = 0; + dateTimeOffset = default; + return false; + } + + int month; + { + uint mon0 = source[8] ^ caseFlipXorMask; + uint mon1 = source[9]; + uint mon2 = source[10]; + uint space = source[11]; + uint monthString = (mon0 << 24) | (mon1 << 16) | (mon2 << 8) | space; + switch (monthString) + { + case 0x4a616e20 /* 'Jan ' */ : month = 1; break; + case 0x46656220 /* 'Feb ' */ : month = 2; break; + case 0x4d617220 /* 'Mar ' */ : month = 3; break; + case 0x41707220 /* 'Apr ' */ : month = 4; break; + case 0x4d617920 /* 'May ' */ : month = 5; break; + case 0x4a756e20 /* 'Jun ' */ : month = 6; break; + case 0x4a756c20 /* 'Jul ' */ : month = 7; break; + case 0x41756720 /* 'Aug ' */ : month = 8; break; + case 0x53657020 /* 'Sep ' */ : month = 9; break; + case 0x4f637420 /* 'Oct ' */ : month = 10; break; + case 0x4e6f7620 /* 'Nov ' */ : month = 11; break; + case 0x44656320 /* 'Dec ' */ : month = 12; break; + default: + bytesConsumed = 0; + dateTimeOffset = default; + return false; + } + } + + int year; + { + uint digit1 = source[12] - 48u; // '0' + uint digit2 = source[13] - 48u; // '0' + uint digit3 = source[14] - 48u; // '0' + uint digit4 = source[15] - 48u; // '0' + + if (digit1 > 9 || digit2 > 9 || digit3 > 9 || digit4 > 9) + { + bytesConsumed = 0; + dateTimeOffset = default; + return false; + } + + year = (int)(digit1 * 1000 + digit2 * 100 + digit3 * 10 + digit4); + } + + if (source[16] != Utf8Constants.Space) + { + bytesConsumed = 0; + dateTimeOffset = default; + return false; + } + + int hour; + { + uint digit1 = source[17] - 48u; // '0' + uint digit2 = source[18] - 48u; // '0' + + if (digit1 > 9 || digit2 > 9) + { + bytesConsumed = 0; + dateTimeOffset = default; + return false; + } + + hour = (int)(digit1 * 10 + digit2); + } + + if (source[19] != Utf8Constants.Colon) + { + bytesConsumed = 0; + dateTimeOffset = default; + return false; + } + + int minute; + { + uint digit1 = source[20] - 48u; // '0' + uint digit2 = source[21] - 48u; // '0' + + if (digit1 > 9 || digit2 > 9) + { + bytesConsumed = 0; + dateTimeOffset = default; + return false; + } + + minute = (int)(digit1 * 10 + digit2); + } + + if (source[22] != Utf8Constants.Colon) + { + bytesConsumed = 0; + dateTimeOffset = default; + return false; + } + + int second; + { + uint digit1 = source[23] - 48u; // '0' + uint digit2 = source[24] - 48u; // '0' + + if (digit1 > 9 || digit2 > 9) + { + bytesConsumed = 0; + dateTimeOffset = default; + return false; + } + + second = (int)(digit1 * 10 + digit2); + } + + { + uint space = source[25]; + uint g = source[26] ^ caseFlipXorMask; + uint m = source[27] ^ caseFlipXorMask; + uint t = source[28] ^ caseFlipXorMask; + uint gmtString = (space << 24) | (g << 16) | (m << 8) | t; + if (gmtString != 0x20474d54 /* ' GMT' */) + { + bytesConsumed = 0; + dateTimeOffset = default; + return false; + } + } + + if (!TryCreateDateTimeOffset(year: year, month: month, day: day, hour: hour, minute: minute, second: second, fraction: 0, offsetNegative: false, offsetHours: 0, offsetMinutes: 0, out dateTimeOffset)) + { + bytesConsumed = 0; + dateTimeOffset = default; + return false; + } + + if (dayOfWeek != dateTimeOffset.DayOfWeek) + { + // If we got here, the day of week did not match the actual date. + bytesConsumed = 0; + dateTimeOffset = default; + return false; + } + + bytesConsumed = 29; + return true; + } + } +} diff --git a/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Parser/Utf8Parser.Date.cs b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Parser/Utf8Parser.Date.cs new file mode 100644 index 0000000000..f103492461 --- /dev/null +++ b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Parser/Utf8Parser.Date.cs @@ -0,0 +1,149 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Diagnostics; + +namespace System.Buffers.Text +{ + public static partial class Utf8Parser + { + /// <summary> + /// Parses a DateTime at the start of a Utf8 string. + /// </summary> + /// <param name="source">The Utf8 string to parse</param> + /// <param name="value">Receives the parsed value</param> + /// <param name="bytesConsumed">On a successful parse, receives the length in bytes of the substring that was parsed </param> + /// <param name="standardFormat">Expected format of the Utf8 string</param> + /// <returns> + /// true for success. "bytesConsumed" contains the length in bytes of the substring that was parsed. + /// false if the string was not syntactically valid or an overflow or underflow occurred. "bytesConsumed" is set to 0. + /// </returns> + /// <remarks> + /// Formats supported: + /// default 05/25/2017 10:30:15 -08:00 + /// G 05/25/2017 10:30:15 + /// R Tue, 03 Jan 2017 08:08:05 GMT (RFC 1123) + /// l tue, 03 jan 2017 08:08:05 gmt (Lowercase RFC 1123) + /// O 2017-06-12T05:30:45.7680000-07:00 (Round-trippable) + /// </remarks> + /// <exceptions> + /// <cref>System.FormatException</cref> if the format is not valid for this data type. + /// </exceptions> + public static bool TryParse(ReadOnlySpan<byte> source, out DateTime value, out int bytesConsumed, char standardFormat = default) + { + switch (standardFormat) + { + case 'R': + { + if (!TryParseDateTimeOffsetR(source, NoFlipCase, out DateTimeOffset dateTimeOffset, out bytesConsumed)) + { + value = default; + return false; + } + value = dateTimeOffset.DateTime; // (returns a DateTimeKind.Unspecified to match DateTime.ParseExact(). Maybe better to return UtcDateTime instead?) + return true; + } + + case 'l': + { + if (!TryParseDateTimeOffsetR(source, FlipCase, out DateTimeOffset dateTimeOffset, out bytesConsumed)) + { + value = default; + return false; + } + value = dateTimeOffset.DateTime; // (returns a DateTimeKind.Unspecified to match DateTime.ParseExact(). Maybe better to return UtcDateTime instead?) + return true; + } + + case 'O': + { + // Emulates DateTime.ParseExact(text, "O", CultureInfo.InvariantCulture, DateTimeStyles.RoundtripKind) + // In particular, the formatted string "encodes" the DateTimeKind according to the following table: + // + // 2017-06-12T05:30:45.7680000 - Unspecified + // 2017-06-12T05:30:45.7680000+00:00 - Local + // 2017-06-12T05:30:45.7680000Z - Utc + + if (!TryParseDateTimeOffsetO(source, out DateTimeOffset dateTimeOffset, out bytesConsumed, out DateTimeKind kind)) + { + value = default; + bytesConsumed = 0; + return false; + } + + switch (kind) + { + case DateTimeKind.Local: + value = dateTimeOffset.LocalDateTime; + break; + case DateTimeKind.Utc: + value = dateTimeOffset.UtcDateTime; + break; + default: + Debug.Assert(kind == DateTimeKind.Unspecified); + value = dateTimeOffset.DateTime; + break; + } + + return true; + } + + case default(char): + case 'G': + return TryParseDateTimeG(source, out value, out _, out bytesConsumed); + + default: + return ThrowHelper.TryParseThrowFormatException(out value, out bytesConsumed); + } + } + + /// <summary> + /// Parses a DateTimeOffset at the start of a Utf8 string. + /// </summary> + /// <param name="source">The Utf8 string to parse</param> + /// <param name="value">Receives the parsed value</param> + /// <param name="bytesConsumed">On a successful parse, receives the length in bytes of the substring that was parsed </param> + /// <param name="standardFormat">Expected format of the Utf8 string</param> + /// <returns> + /// true for success. "bytesConsumed" contains the length in bytes of the substring that was parsed. + /// false if the string was not syntactically valid or an overflow or underflow occurred. "bytesConsumed" is set to 0. + /// </returns> + /// <remarks> + /// Formats supported: + /// G (default) 05/25/2017 10:30:15 + /// R Tue, 03 Jan 2017 08:08:05 GMT (RFC 1123) + /// l tue, 03 jan 2017 08:08:05 gmt (Lowercase RFC 1123) + /// O 2017-06-12T05:30:45.7680000-07:00 (Round-trippable) + /// </remarks> + /// <exceptions> + /// <cref>System.FormatException</cref> if the format is not valid for this data type. + /// </exceptions> + public static bool TryParse(ReadOnlySpan<byte> source, out DateTimeOffset value, out int bytesConsumed, char standardFormat = default) + { + switch (standardFormat) + { + case 'R': + return TryParseDateTimeOffsetR(source, NoFlipCase, out value, out bytesConsumed); + + case 'l': + return TryParseDateTimeOffsetR(source, FlipCase, out value, out bytesConsumed); + + case 'O': + return TryParseDateTimeOffsetO(source, out value, out bytesConsumed, out _); + + case default(char): + return TryParseDateTimeOffsetDefault(source, out value, out bytesConsumed); + + case 'G': + return TryParseDateTimeG(source, out DateTime _, out value, out bytesConsumed); + + default: + return ThrowHelper.TryParseThrowFormatException(out value, out bytesConsumed); + } + } + + private const uint FlipCase = 0x00000020u; // XOR mask to flip the case of a letter. + private const uint NoFlipCase = 0x00000000u; + } +} diff --git a/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Parser/Utf8Parser.Decimal.cs b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Parser/Utf8Parser.Decimal.cs new file mode 100644 index 0000000000..c0f1e0c040 --- /dev/null +++ b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Parser/Utf8Parser.Decimal.cs @@ -0,0 +1,82 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +namespace System.Buffers.Text +{ + public static partial class Utf8Parser + { + /// <summary> + /// Parses a Decimal at the start of a Utf8 string. + /// </summary> + /// <param name="source">The Utf8 string to parse</param> + /// <param name="value">Receives the parsed value</param> + /// <param name="bytesConsumed">On a successful parse, receives the length in bytes of the substring that was parsed </param> + /// <param name="standardFormat">Expected format of the Utf8 string</param> + /// <returns> + /// true for success. "bytesConsumed" contains the length in bytes of the substring that was parsed. + /// false if the string was not syntactically valid or an overflow or underflow occurred. "bytesConsumed" is set to 0. + /// </returns> + /// <remarks> + /// Formats supported: + /// G/g (default) + /// F/f 12.45 Fixed point + /// E/e 1.245000e1 Exponential + /// </remarks> + /// <exceptions> + /// <cref>System.FormatException</cref> if the format is not valid for this data type. + /// </exceptions> + public static bool TryParse(ReadOnlySpan<byte> source, out decimal value, out int bytesConsumed, char standardFormat = default) + { + ParseNumberOptions options; + switch (standardFormat) + { + case default(char): + case 'G': + case 'g': + case 'E': + case 'e': + options = ParseNumberOptions.AllowExponent; + break; + + case 'F': + case 'f': + options = default; + break; + + default: + return ThrowHelper.TryParseThrowFormatException(out value, out bytesConsumed); + } + + NumberBuffer number = default; + if (!TryParseNumber(source, ref number, out bytesConsumed, options, out bool textUsedExponentNotation)) + { + value = default; + return false; + } + + if ((!textUsedExponentNotation) && (standardFormat == 'E' || standardFormat == 'e')) + { + value = default; + bytesConsumed = 0; + return false; + } + + // More compat with .NET behavior - whether or not a 0 keeps the negative sign depends on whether it an "integer" 0 or a "fractional" 0 + if (number.Digits[0] == 0 && number.Scale == 0) + { + number.IsNegative = false; + } + + value = default; + if (!Number.NumberBufferToDecimal(ref number, ref value)) + { + value = default; + bytesConsumed = 0; + return false; + } + + return true; + } + } +} diff --git a/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Parser/Utf8Parser.Float.cs b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Parser/Utf8Parser.Float.cs new file mode 100644 index 0000000000..1bdc59d237 --- /dev/null +++ b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Parser/Utf8Parser.Float.cs @@ -0,0 +1,148 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +namespace System.Buffers.Text +{ + public static partial class Utf8Parser + { + /// <summary> + /// Parses a Single at the start of a Utf8 string. + /// </summary> + /// <param name="source">The Utf8 string to parse</param> + /// <param name="value">Receives the parsed value</param> + /// <param name="bytesConsumed">On a successful parse, receives the length in bytes of the substring that was parsed </param> + /// <param name="standardFormat">Expected format of the Utf8 string</param> + /// <returns> + /// true for success. "bytesConsumed" contains the length in bytes of the substring that was parsed. + /// false if the string was not syntactically valid or an overflow or underflow occurred. "bytesConsumed" is set to 0. + /// </returns> + /// <remarks> + /// Formats supported: + /// G/g (default) + /// F/f 12.45 Fixed point + /// E/e 1.245000e1 Exponential + /// </remarks> + /// <exceptions> + /// <cref>System.FormatException</cref> if the format is not valid for this data type. + /// </exceptions> + public static bool TryParse(ReadOnlySpan<byte> source, out float value, out int bytesConsumed, char standardFormat = default) + { + if (TryParseNormalAsFloatingPoint(source, out double d, out bytesConsumed, standardFormat)) + { + value = (float)(d); + return true; + } + + return TryParseAsSpecialFloatingPoint(source, float.PositiveInfinity, float.NegativeInfinity, float.NaN, out value, out bytesConsumed); + } + + /// <summary> + /// Parses a Double at the start of a Utf8 string. + /// </summary> + /// <param name="source">The Utf8 string to parse</param> + /// <param name="value">Receives the parsed value</param> + /// <param name="bytesConsumed">On a successful parse, receives the length in bytes of the substring that was parsed </param> + /// <param name="standardFormat">Expected format of the Utf8 string</param> + /// <returns> + /// true for success. "bytesConsumed" contains the length in bytes of the substring that was parsed. + /// false if the string was not syntactically valid or an overflow or underflow occurred. "bytesConsumed" is set to 0. + /// </returns> + /// <remarks> + /// Formats supported: + /// G/g (default) + /// F/f 12.45 Fixed point + /// E/e 1.245000e1 Exponential + /// </remarks> + /// <exceptions> + /// <cref>System.FormatException</cref> if the format is not valid for this data type. + /// </exceptions> + public static bool TryParse(ReadOnlySpan<byte> source, out double value, out int bytesConsumed, char standardFormat = default) + { + if (TryParseNormalAsFloatingPoint(source, out value, out bytesConsumed, standardFormat)) + return true; + + return TryParseAsSpecialFloatingPoint(source, double.PositiveInfinity, double.NegativeInfinity, double.NaN, out value, out bytesConsumed); + } + + // + // Attempt to parse the regular floating points (the ones without names like "Infinity" and "NaN") + // + private static bool TryParseNormalAsFloatingPoint(ReadOnlySpan<byte> source, out double value, out int bytesConsumed, char standardFormat) + { + ParseNumberOptions options; + switch (standardFormat) + { + case default(char): + case 'G': + case 'g': + case 'E': + case 'e': + options = ParseNumberOptions.AllowExponent; + break; + + case 'F': + case 'f': + options = default; + break; + + default: + return ThrowHelper.TryParseThrowFormatException(out value, out bytesConsumed); + } + + NumberBuffer number = default; + if (!TryParseNumber(source, ref number, out bytesConsumed, options, out bool textUsedExponentNotation)) + { + value = default; + return false; + } + + if ((!textUsedExponentNotation) && (standardFormat == 'E' || standardFormat == 'e')) + { + value = default; + bytesConsumed = 0; + return false; + } + + value = Number.NumberBufferToDouble(ref number); + return true; + } + + // + // Assuming the text doesn't look like a normal floating point, we attempt to parse it as one the special floating point values. + // + private static bool TryParseAsSpecialFloatingPoint<T>(ReadOnlySpan<byte> source, T positiveInfinity, T negativeInfinity, T nan, out T value, out int bytesConsumed) + { + if (source.Length >= 8 && + source[0] == 'I' && source[1] == 'n' && source[2] == 'f' && source[3] == 'i' && + source[4] == 'n' && source[5] == 'i' && source[6] == 't' && source[7] == 'y') + { + value = positiveInfinity; + bytesConsumed = 8; + return true; + } + + if (source.Length >= 9 && + source[0] == Utf8Constants.Minus && + source[1] == 'I' && source[2] == 'n' && source[3] == 'f' && source[4] == 'i' && + source[5] == 'n' && source[6] == 'i' && source[7] == 't' && source[8] == 'y') + { + value = negativeInfinity; + bytesConsumed = 9; + return true; + } + + if (source.Length >= 3 && + source[0] == 'N' && source[1] == 'a' && source[2] == 'N') + { + value = nan; + bytesConsumed = 3; + return true; + } + + value = default; + bytesConsumed = 0; + return false; + } + } +} diff --git a/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Parser/Utf8Parser.Guid.cs b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Parser/Utf8Parser.Guid.cs new file mode 100644 index 0000000000..17dec828bc --- /dev/null +++ b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Parser/Utf8Parser.Guid.cs @@ -0,0 +1,243 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +namespace System.Buffers.Text +{ + public static partial class Utf8Parser + { + /// <summary> + /// Parses a Guid at the start of a Utf8 string. + /// </summary> + /// <param name="source">The Utf8 string to parse</param> + /// <param name="value">Receives the parsed value</param> + /// <param name="bytesConsumed">On a successful parse, receives the length in bytes of the substring that was parsed </param> + /// <param name="standardFormat">Expected format of the Utf8 string</param> + /// <returns> + /// true for success. "bytesConsumed" contains the length in bytes of the substring that was parsed. + /// false if the string was not syntactically valid or an overflow or underflow occurred. "bytesConsumed" is set to 0. + /// </returns> + /// <remarks> + /// Formats supported: + /// D (default) nnnnnnnn-nnnn-nnnn-nnnn-nnnnnnnnnnnn + /// B {nnnnnnnn-nnnn-nnnn-nnnn-nnnnnnnnnnnn} + /// P (nnnnnnnn-nnnn-nnnn-nnnn-nnnnnnnnnnnn) + /// N nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn + /// </remarks> + /// <exceptions> + /// <cref>System.FormatException</cref> if the format is not valid for this data type. + /// </exceptions> + public static bool TryParse(ReadOnlySpan<byte> source, out Guid value, out int bytesConsumed, char standardFormat = default) + { + switch (standardFormat) + { + case default(char): + case 'D': + return TryParseGuidCore(source, false, ' ', ' ', out value, out bytesConsumed); + case 'B': + return TryParseGuidCore(source, true, '{', '}', out value, out bytesConsumed); + case 'P': + return TryParseGuidCore(source, true, '(', ')', out value, out bytesConsumed); + case 'N': + return TryParseGuidN(source, out value, out bytesConsumed); + default: + return ThrowHelper.TryParseThrowFormatException(out value, out bytesConsumed); + } + } + + // nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn (not very Guid-like, but the format is what it is...) + private static bool TryParseGuidN(ReadOnlySpan<byte> text, out Guid value, out int bytesConsumed) + { + if (text.Length < 32) + { + value = default; + bytesConsumed = 0; + return false; + } + + if (!TryParseUInt32X(text.Slice(0, 8), out uint i1, out int justConsumed) || justConsumed != 8) + { + value = default; + bytesConsumed = 0; + return false; // 8 digits + } + + if (!TryParseUInt16X(text.Slice(8, 4), out ushort i2, out justConsumed) || justConsumed != 4) + { + value = default; + bytesConsumed = 0; + return false; // next 4 digits + } + + if (!TryParseUInt16X(text.Slice(12, 4), out ushort i3, out justConsumed) || justConsumed != 4) + { + value = default; + bytesConsumed = 0; + return false; // next 4 digits + } + + if (!TryParseUInt16X(text.Slice(16, 4), out ushort i4, out justConsumed) || justConsumed != 4) + { + value = default; + bytesConsumed = 0; + return false; // next 4 digits + } + + if (!TryParseUInt64X(text.Slice(20), out ulong i5, out justConsumed) || justConsumed != 12) + { + value = default; + bytesConsumed = 0; + return false; // next 4 digits + } + + bytesConsumed = 32; + value = new Guid((int)i1, (short)i2, (short)i3, (byte)(i4 >> 8), (byte)i4, + (byte)(i5 >> 40), (byte)(i5 >> 32), (byte)(i5 >> 24), (byte)(i5 >> 16), (byte)(i5 >> 8), (byte)i5); + return true; + } + + // {8-4-4-4-12}, where number is the number of hex digits, and {/} are ends. + private static bool TryParseGuidCore(ReadOnlySpan<byte> source, bool ends, char begin, char end, out Guid value, out int bytesConsumed) + { + int expectedCodingUnits = 36 + (ends ? 2 : 0); // 32 hex digits + 4 delimiters + 2 optional ends + + if (source.Length < expectedCodingUnits) + { + value = default; + bytesConsumed = 0; + return false; + } + + if (ends) + { + if (source[0] != begin) + { + value = default; + bytesConsumed = 0; + return false; + } + + source = source.Slice(1); // skip begining + } + + if (!TryParseUInt32X(source, out uint i1, out int justConsumed)) + { + value = default; + bytesConsumed = 0; + return false; + } + + if (justConsumed != 8) + { + value = default; + bytesConsumed = 0; + return false; // 8 digits + } + + if (source[justConsumed] != '-') + { + value = default; + bytesConsumed = 0; + return false; + } + + source = source.Slice(9); // justConsumed + 1 for delimiter + + if (!TryParseUInt16X(source, out ushort i2, out justConsumed)) + { + value = default; + bytesConsumed = 0; + return false; + } + + if (justConsumed != 4) + { + value = default; + bytesConsumed = 0; + return false; // 4 digits + } + + if (source[justConsumed] != '-') + { + value = default; + bytesConsumed = 0; + return false; + } + + source = source.Slice(5); // justConsumed + 1 for delimiter + + if (!TryParseUInt16X(source, out ushort i3, out justConsumed)) + { + value = default; + bytesConsumed = 0; + return false; + } + + if (justConsumed != 4) + { + value = default; + bytesConsumed = 0; + return false; // 4 digits + } + + if (source[justConsumed] != '-') + { + value = default; + bytesConsumed = 0; + return false; + } + + source = source.Slice(5); // justConsumed + 1 for delimiter + + if (!TryParseUInt16X(source, out ushort i4, out justConsumed)) + { + value = default; + bytesConsumed = 0; + return false; + } + + if (justConsumed != 4) + { + value = default; + bytesConsumed = 0; + return false; // 4 digits + } + + if (source[justConsumed] != '-') + { + value = default; + bytesConsumed = 0; + return false; + } + + source = source.Slice(5);// justConsumed + 1 for delimiter + + if (!TryParseUInt64X(source, out ulong i5, out justConsumed)) + { + value = default; + bytesConsumed = 0; + return false; + } + + if (justConsumed != 12) + { + value = default; + bytesConsumed = 0; + return false; // 12 digits + } + + if (ends && source[justConsumed] != end) + { + value = default; + bytesConsumed = 0; + return false; + } + + bytesConsumed = expectedCodingUnits; + value = new Guid((int)i1, (short)i2, (short)i3, (byte)(i4 >> 8), (byte)i4, + (byte)(i5 >> 40), (byte)(i5 >> 32), (byte)(i5 >> 24), (byte)(i5 >> 16), (byte)(i5 >> 8), (byte)i5); + + return true; + } + } +} diff --git a/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Parser/Utf8Parser.Integer.Signed.D.cs b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Parser/Utf8Parser.Integer.Signed.D.cs new file mode 100644 index 0000000000..bf1871a1c9 --- /dev/null +++ b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Parser/Utf8Parser.Integer.Signed.D.cs @@ -0,0 +1,443 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +namespace System.Buffers.Text +{ + public static partial class Utf8Parser + { + private static bool TryParseSByteD(ReadOnlySpan<byte> source, out sbyte value, out int bytesConsumed) + { + if (source.Length < 1) + goto FalseExit; + + int sign = 1; + int index = 0; + int num = source[index]; + if (num == '-') + { + sign = -1; + index++; + if ((uint)index >= (uint)source.Length) + goto FalseExit; + num = source[index]; + } + else if (num == '+') + { + index++; + if ((uint)index >= (uint)source.Length) + goto FalseExit; + num = source[index]; + } + + int answer = 0; + + if (ParserHelpers.IsDigit(num)) + { + if (num == '0') + { + do + { + index++; + if ((uint)index >= (uint)source.Length) + goto Done; + num = source[index]; + } while (num == '0'); + if (!ParserHelpers.IsDigit(num)) + goto Done; + } + + answer = num - '0'; + index++; + + if ((uint)index >= (uint)source.Length) + goto Done; + num = source[index]; + if (!ParserHelpers.IsDigit(num)) + goto Done; + index++; + answer = 10 * answer + num - '0'; + + // Potential overflow + if ((uint)index >= (uint)source.Length) + goto Done; + num = source[index]; + if (!ParserHelpers.IsDigit(num)) + goto Done; + index++; + answer = answer * 10 + num - '0'; + // if sign < 0, (-1 * sign + 1) / 2 = 1 + // else, (-1 * sign + 1) / 2 = 0 + if ((uint)answer > (uint)sbyte.MaxValue + (-1 * sign + 1) / 2) + goto FalseExit; // Overflow + + if ((uint)index >= (uint)source.Length) + goto Done; + if (!ParserHelpers.IsDigit(source[index])) + goto Done; + + // Guaranteed overflow + goto FalseExit; + } + + FalseExit: + bytesConsumed = default; + value = default; + return false; + + Done: + bytesConsumed = index; + value = (sbyte)(answer * sign); + return true; + } + + private static bool TryParseInt16D(ReadOnlySpan<byte> source, out short value, out int bytesConsumed) + { + if (source.Length < 1) + goto FalseExit; + + int sign = 1; + int index = 0; + int num = source[index]; + if (num == '-') + { + sign = -1; + index++; + if ((uint)index >= (uint)source.Length) + goto FalseExit; + num = source[index]; + } + else if (num == '+') + { + index++; + if ((uint)index >= (uint)source.Length) + goto FalseExit; + num = source[index]; + } + + int answer = 0; + + if (ParserHelpers.IsDigit(num)) + { + if (num == '0') + { + do + { + index++; + if ((uint)index >= (uint)source.Length) + goto Done; + num = source[index]; + } while (num == '0'); + if (!ParserHelpers.IsDigit(num)) + goto Done; + } + + answer = num - '0'; + index++; + + if ((uint)index >= (uint)source.Length) + goto Done; + num = source[index]; + if (!ParserHelpers.IsDigit(num)) + goto Done; + index++; + answer = 10 * answer + num - '0'; + + if ((uint)index >= (uint)source.Length) + goto Done; + num = source[index]; + if (!ParserHelpers.IsDigit(num)) + goto Done; + index++; + answer = 10 * answer + num - '0'; + + if ((uint)index >= (uint)source.Length) + goto Done; + num = source[index]; + if (!ParserHelpers.IsDigit(num)) + goto Done; + index++; + answer = 10 * answer + num - '0'; + + // Potential overflow + if ((uint)index >= (uint)source.Length) + goto Done; + num = source[index]; + if (!ParserHelpers.IsDigit(num)) + goto Done; + index++; + answer = answer * 10 + num - '0'; + // if sign < 0, (-1 * sign + 1) / 2 = 1 + // else, (-1 * sign + 1) / 2 = 0 + if ((uint)answer > (uint)short.MaxValue + (-1 * sign + 1) / 2) + goto FalseExit; // Overflow + + if ((uint)index >= (uint)source.Length) + goto Done; + if (!ParserHelpers.IsDigit(source[index])) + goto Done; + + // Guaranteed overflow + goto FalseExit; + } + + FalseExit: + bytesConsumed = default; + value = default; + return false; + + Done: + bytesConsumed = index; + value = (short)(answer * sign); + return true; + } + + private static bool TryParseInt32D(ReadOnlySpan<byte> source, out int value, out int bytesConsumed) + { + if (source.Length < 1) + goto FalseExit; + + int sign = 1; + int index = 0; + int num = source[index]; + if (num == '-') + { + sign = -1; + index++; + if ((uint)index >= (uint)source.Length) + goto FalseExit; + num = source[index]; + } + else if (num == '+') + { + index++; + if ((uint)index >= (uint)source.Length) + goto FalseExit; + num = source[index]; + } + + int answer = 0; + + if (ParserHelpers.IsDigit(num)) + { + if (num == '0') + { + do + { + index++; + if ((uint)index >= (uint)source.Length) + goto Done; + num = source[index]; + } while (num == '0'); + if (!ParserHelpers.IsDigit(num)) + goto Done; + } + + answer = num - '0'; + index++; + + if ((uint)index >= (uint)source.Length) + goto Done; + num = source[index]; + if (!ParserHelpers.IsDigit(num)) + goto Done; + index++; + answer = 10 * answer + num - '0'; + + if ((uint)index >= (uint)source.Length) + goto Done; + num = source[index]; + if (!ParserHelpers.IsDigit(num)) + goto Done; + index++; + answer = 10 * answer + num - '0'; + + if ((uint)index >= (uint)source.Length) + goto Done; + num = source[index]; + if (!ParserHelpers.IsDigit(num)) + goto Done; + index++; + answer = 10 * answer + num - '0'; + + if ((uint)index >= (uint)source.Length) + goto Done; + num = source[index]; + if (!ParserHelpers.IsDigit(num)) + goto Done; + index++; + answer = 10 * answer + num - '0'; + + if ((uint)index >= (uint)source.Length) + goto Done; + num = source[index]; + if (!ParserHelpers.IsDigit(num)) + goto Done; + index++; + answer = 10 * answer + num - '0'; + + if ((uint)index >= (uint)source.Length) + goto Done; + num = source[index]; + if (!ParserHelpers.IsDigit(num)) + goto Done; + index++; + answer = 10 * answer + num - '0'; + + if ((uint)index >= (uint)source.Length) + goto Done; + num = source[index]; + if (!ParserHelpers.IsDigit(num)) + goto Done; + index++; + answer = 10 * answer + num - '0'; + + if ((uint)index >= (uint)source.Length) + goto Done; + num = source[index]; + if (!ParserHelpers.IsDigit(num)) + goto Done; + index++; + answer = 10 * answer + num - '0'; + + // Potential overflow + if ((uint)index >= (uint)source.Length) + goto Done; + num = source[index]; + if (!ParserHelpers.IsDigit(num)) + goto Done; + index++; + if (answer > int.MaxValue / 10) + goto FalseExit; // Overflow + answer = answer * 10 + num - '0'; + // if sign < 0, (-1 * sign + 1) / 2 = 1 + // else, (-1 * sign + 1) / 2 = 0 + if ((uint)answer > (uint)int.MaxValue + (-1 * sign + 1) / 2) + goto FalseExit; // Overflow + + if ((uint)index >= (uint)source.Length) + goto Done; + if (!ParserHelpers.IsDigit(source[index])) + goto Done; + + // Guaranteed overflow + goto FalseExit; + } + + FalseExit: + bytesConsumed = default; + value = default; + return false; + + Done: + bytesConsumed = index; + value = answer * sign; + return true; + } + + private static bool TryParseInt64D(ReadOnlySpan<byte> source, out long value, out int bytesConsumed) + { + if (source.Length < 1) + { + bytesConsumed = 0; + value = default; + return false; + } + + int indexOfFirstDigit = 0; + int sign = 1; + if (source[0] == '-') + { + indexOfFirstDigit = 1; + sign = -1; + + if (source.Length <= indexOfFirstDigit) + { + bytesConsumed = 0; + value = default; + return false; + } + } + else if (source[0] == '+') + { + indexOfFirstDigit = 1; + + if (source.Length <= indexOfFirstDigit) + { + bytesConsumed = 0; + value = default; + return false; + } + } + + int overflowLength = ParserHelpers.Int64OverflowLength + indexOfFirstDigit; + + // Parse the first digit separately. If invalid here, we need to return false. + long firstDigit = source[indexOfFirstDigit] - 48; // '0' + if (firstDigit < 0 || firstDigit > 9) + { + bytesConsumed = 0; + value = default; + return false; + } + ulong parsedValue = (ulong)firstDigit; + + if (source.Length < overflowLength) + { + // Length is less than Parsers.Int64OverflowLength; overflow is not possible + for (int index = indexOfFirstDigit + 1; index < source.Length; index++) + { + long nextDigit = source[index] - 48; // '0' + if (nextDigit < 0 || nextDigit > 9) + { + bytesConsumed = index; + value = ((long)parsedValue) * sign; + return true; + } + parsedValue = parsedValue * 10 + (ulong)nextDigit; + } + } + else + { + // Length is greater than Parsers.Int64OverflowLength; overflow is only possible after Parsers.Int64OverflowLength + // digits. There may be no overflow after Parsers.Int64OverflowLength if there are leading zeroes. + for (int index = indexOfFirstDigit + 1; index < overflowLength - 1; index++) + { + long nextDigit = source[index] - 48; // '0' + if (nextDigit < 0 || nextDigit > 9) + { + bytesConsumed = index; + value = ((long)parsedValue) * sign; + return true; + } + parsedValue = parsedValue * 10 + (ulong)nextDigit; + } + for (int index = overflowLength - 1; index < source.Length; index++) + { + long nextDigit = source[index] - 48; // '0' + if (nextDigit < 0 || nextDigit > 9) + { + bytesConsumed = index; + value = ((long)parsedValue) * sign; + return true; + } + // If parsedValue > (long.MaxValue / 10), any more appended digits will cause overflow. + // if parsedValue == (long.MaxValue / 10), any nextDigit greater than 7 or 8 (depending on sign) implies overflow. + bool positive = sign > 0; + bool nextDigitTooLarge = nextDigit > 8 || (positive && nextDigit > 7); + if (parsedValue > long.MaxValue / 10 || parsedValue == long.MaxValue / 10 && nextDigitTooLarge) + { + bytesConsumed = 0; + value = default; + return false; + } + parsedValue = parsedValue * 10 + (ulong)nextDigit; + } + } + + bytesConsumed = source.Length; + value = ((long)parsedValue) * sign; + return true; + } + } +} diff --git a/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Parser/Utf8Parser.Integer.Signed.N.cs b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Parser/Utf8Parser.Integer.Signed.N.cs new file mode 100644 index 0000000000..fd8ce572f2 --- /dev/null +++ b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Parser/Utf8Parser.Integer.Signed.N.cs @@ -0,0 +1,383 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +namespace System.Buffers.Text +{ + public static partial class Utf8Parser + { + private static bool TryParseSByteN(ReadOnlySpan<byte> source, out sbyte value, out int bytesConsumed) + { + if (source.Length < 1) + goto FalseExit; + + int sign = 1; + int index = 0; + int c = source[index]; + if (c == '-') + { + sign = -1; + index++; + if ((uint)index >= (uint)source.Length) + goto FalseExit; + c = source[index]; + } + else if (c == '+') + { + index++; + if ((uint)index >= (uint)source.Length) + goto FalseExit; + c = source[index]; + } + + int answer; + + // Handle the first digit (or period) as a special case. This ensures some compatible edge-case behavior with the classic parse routines + // (at least one digit must precede any commas, and a string without any digits prior to the decimal point must have at least + // one digit after the decimal point.) + if (c == Utf8Constants.Period) + goto FractionalPartWithoutLeadingDigits; + if (!ParserHelpers.IsDigit(c)) + goto FalseExit; + answer = c - '0'; + + for (; ; ) + { + index++; + if ((uint)index >= (uint)source.Length) + goto Done; + + c = source[index]; + if (c == Utf8Constants.Comma) + continue; + + if (c == Utf8Constants.Period) + goto FractionalDigits; + + if (!ParserHelpers.IsDigit(c)) + goto Done; + + answer = answer * 10 + c - '0'; + + // if sign < 0, (-1 * sign + 1) / 2 = 1 + // else, (-1 * sign + 1) / 2 = 0 + if (answer > sbyte.MaxValue + (-1 * sign + 1) / 2) + goto FalseExit; // Overflow + } + + FractionalPartWithoutLeadingDigits: // If we got here, we found a decimal point before we found any digits. This is legal as long as there's at least one zero after the decimal point. + answer = 0; + index++; + if ((uint)index >= (uint)source.Length) + goto FalseExit; + if (source[index] != '0') + goto FalseExit; + + FractionalDigits: // "N" format allows a fractional portion despite being an integer format but only if the post-fraction digits are all 0. + do + { + index++; + if ((uint)index >= (uint)source.Length) + goto Done; + c = source[index]; + } + while (c == '0'); + + if (ParserHelpers.IsDigit(c)) + goto FalseExit; // The fractional portion contained a non-zero digit. Treat this as an error, not an early termination. + goto Done; + + FalseExit: + bytesConsumed = default; + value = default; + return false; + + Done: + bytesConsumed = index; + value = (sbyte)(answer * sign); + return true; + } + + private static bool TryParseInt16N(ReadOnlySpan<byte> source, out short value, out int bytesConsumed) + { + if (source.Length < 1) + goto FalseExit; + + int sign = 1; + int index = 0; + int c = source[index]; + if (c == '-') + { + sign = -1; + index++; + if ((uint)index >= (uint)source.Length) + goto FalseExit; + c = source[index]; + } + else if (c == '+') + { + index++; + if ((uint)index >= (uint)source.Length) + goto FalseExit; + c = source[index]; + } + + int answer; + + // Handle the first digit (or period) as a special case. This ensures some compatible edge-case behavior with the classic parse routines + // (at least one digit must precede any commas, and a string without any digits prior to the decimal point must have at least + // one digit after the decimal point.) + if (c == Utf8Constants.Period) + goto FractionalPartWithoutLeadingDigits; + if (!ParserHelpers.IsDigit(c)) + goto FalseExit; + answer = c - '0'; + + for (; ; ) + { + index++; + if ((uint)index >= (uint)source.Length) + goto Done; + + c = source[index]; + if (c == Utf8Constants.Comma) + continue; + + if (c == Utf8Constants.Period) + goto FractionalDigits; + + if (!ParserHelpers.IsDigit(c)) + goto Done; + + answer = answer * 10 + c - '0'; + + // if sign < 0, (-1 * sign + 1) / 2 = 1 + // else, (-1 * sign + 1) / 2 = 0 + if (answer > short.MaxValue + (-1 * sign + 1) / 2) + goto FalseExit; // Overflow + } + + FractionalPartWithoutLeadingDigits: // If we got here, we found a decimal point before we found any digits. This is legal as long as there's at least one zero after the decimal point. + answer = 0; + index++; + if ((uint)index >= (uint)source.Length) + goto FalseExit; + if (source[index] != '0') + goto FalseExit; + + FractionalDigits: // "N" format allows a fractional portion despite being an integer format but only if the post-fraction digits are all 0. + do + { + index++; + if ((uint)index >= (uint)source.Length) + goto Done; + c = source[index]; + } + while (c == '0'); + + if (ParserHelpers.IsDigit(c)) + goto FalseExit; // The fractional portion contained a non-zero digit. Treat this as an error, not an early termination. + goto Done; + + FalseExit: + bytesConsumed = default; + value = default; + return false; + + Done: + bytesConsumed = index; + value = (short)(answer * sign); + return true; + } + + private static bool TryParseInt32N(ReadOnlySpan<byte> source, out int value, out int bytesConsumed) + { + if (source.Length < 1) + goto FalseExit; + + int sign = 1; + int index = 0; + int c = source[index]; + if (c == '-') + { + sign = -1; + index++; + if ((uint)index >= (uint)source.Length) + goto FalseExit; + c = source[index]; + } + else if (c == '+') + { + index++; + if ((uint)index >= (uint)source.Length) + goto FalseExit; + c = source[index]; + } + + int answer; + + // Handle the first digit (or period) as a special case. This ensures some compatible edge-case behavior with the classic parse routines + // (at least one digit must precede any commas, and a string without any digits prior to the decimal point must have at least + // one digit after the decimal point.) + if (c == Utf8Constants.Period) + goto FractionalPartWithoutLeadingDigits; + if (!ParserHelpers.IsDigit(c)) + goto FalseExit; + answer = c - '0'; + + for (; ; ) + { + index++; + if ((uint)index >= (uint)source.Length) + goto Done; + + c = source[index]; + if (c == Utf8Constants.Comma) + continue; + + if (c == Utf8Constants.Period) + goto FractionalDigits; + + if (!ParserHelpers.IsDigit(c)) + goto Done; + + if (((uint)answer) > int.MaxValue / 10) + goto FalseExit; + + answer = answer * 10 + c - '0'; + + // if sign < 0, (-1 * sign + 1) / 2 = 1 + // else, (-1 * sign + 1) / 2 = 0 + if ((uint)answer > (uint)int.MaxValue + (-1 * sign + 1) / 2) + goto FalseExit; // Overflow + } + + FractionalPartWithoutLeadingDigits: // If we got here, we found a decimal point before we found any digits. This is legal as long as there's at least one zero after the decimal point. + answer = 0; + index++; + if ((uint)index >= (uint)source.Length) + goto FalseExit; + if (source[index] != '0') + goto FalseExit; + + FractionalDigits: // "N" format allows a fractional portion despite being an integer format but only if the post-fraction digits are all 0. + do + { + index++; + if ((uint)index >= (uint)source.Length) + goto Done; + c = source[index]; + } + while (c == '0'); + + if (ParserHelpers.IsDigit(c)) + goto FalseExit; // The fractional portion contained a non-zero digit. Treat this as an error, not an early termination. + goto Done; + + FalseExit: + bytesConsumed = default; + value = default; + return false; + + Done: + bytesConsumed = index; + value = answer * sign; + return true; + } + + private static bool TryParseInt64N(ReadOnlySpan<byte> source, out long value, out int bytesConsumed) + { + if (source.Length < 1) + goto FalseExit; + + int sign = 1; + int index = 0; + int c = source[index]; + if (c == '-') + { + sign = -1; + index++; + if ((uint)index >= (uint)source.Length) + goto FalseExit; + c = source[index]; + } + else if (c == '+') + { + index++; + if ((uint)index >= (uint)source.Length) + goto FalseExit; + c = source[index]; + } + + long answer; + + // Handle the first digit (or period) as a special case. This ensures some compatible edge-case behavior with the classic parse routines + // (at least one digit must precede any commas, and a string without any digits prior to the decimal point must have at least + // one digit after the decimal point.) + if (c == Utf8Constants.Period) + goto FractionalPartWithoutLeadingDigits; + if (!ParserHelpers.IsDigit(c)) + goto FalseExit; + answer = c - '0'; + + for (; ; ) + { + index++; + if ((uint)index >= (uint)source.Length) + goto Done; + + c = source[index]; + if (c == Utf8Constants.Comma) + continue; + + if (c == Utf8Constants.Period) + goto FractionalDigits; + + if (!ParserHelpers.IsDigit(c)) + goto Done; + + if (((ulong)answer) > long.MaxValue / 10) + goto FalseExit; + + answer = answer * 10 + c - '0'; + + // if sign < 0, (-1 * sign + 1) / 2 = 1 + // else, (-1 * sign + 1) / 2 = 0 + if ((ulong)answer > (ulong)(long.MaxValue + (-1 * sign + 1) / 2)) + goto FalseExit; // Overflow + } + + FractionalPartWithoutLeadingDigits: // If we got here, we found a decimal point before we found any digits. This is legal as long as there's at least one zero after the decimal point. + answer = 0; + index++; + if ((uint)index >= (uint)source.Length) + goto FalseExit; + if (source[index] != '0') + goto FalseExit; + + FractionalDigits: // "N" format allows a fractional portion despite being an integer format but only if the post-fraction digits are all 0. + do + { + index++; + if ((uint)index >= (uint)source.Length) + goto Done; + c = source[index]; + } + while (c == '0'); + + if (ParserHelpers.IsDigit(c)) + goto FalseExit; // The fractional portion contained a non-zero digit. Treat this as an error, not an early termination. + goto Done; + + FalseExit: + bytesConsumed = default; + value = default; + return false; + + Done: + bytesConsumed = index; + value = answer * sign; + return true; + } + } +} diff --git a/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Parser/Utf8Parser.Integer.Signed.cs b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Parser/Utf8Parser.Integer.Signed.cs new file mode 100644 index 0000000000..b30291c6f2 --- /dev/null +++ b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Parser/Utf8Parser.Integer.Signed.cs @@ -0,0 +1,199 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using Internal.Runtime.CompilerServices; + +namespace System.Buffers.Text +{ + /// <summary> + /// Methods to parse common data types to Utf8 strings. + /// </summary> + public static partial class Utf8Parser + { + /// <summary> + /// Parses a SByte at the start of a Utf8 string. + /// </summary> + /// <param name="source">The Utf8 string to parse</param> + /// <param name="value">Receives the parsed value</param> + /// <param name="bytesConsumed">On a successful parse, receives the length in bytes of the substring that was parsed </param> + /// <param name="standardFormat">Expected format of the Utf8 string</param> + /// <returns> + /// true for success. "bytesConsumed" contains the length in bytes of the substring that was parsed. + /// false if the string was not syntactically valid or an overflow or underflow occurred. "bytesConsumed" is set to 0. + /// </returns> + /// <remarks> + /// Formats supported: + /// G/g (default) + /// D/d 32767 + /// N/n 32,767 + /// X/x 7fff + /// </remarks> + /// <exceptions> + /// <cref>System.FormatException</cref> if the format is not valid for this data type. + /// </exceptions> + [CLSCompliant(false)] + public static bool TryParse(ReadOnlySpan<byte> source, out sbyte value, out int bytesConsumed, char standardFormat = default) + { + switch (standardFormat) + { + case default(char): + case 'g': + case 'G': + case 'd': + case 'D': + return TryParseSByteD(source, out value, out bytesConsumed); + + case 'n': + case 'N': + return TryParseSByteN(source, out value, out bytesConsumed); + + case 'x': + case 'X': + value = default; + return TryParseByteX(source, out Unsafe.As<sbyte, byte>(ref value), out bytesConsumed); + + default: + return ThrowHelper.TryParseThrowFormatException(out value, out bytesConsumed); + } + } + + /// <summary> + /// Parses an Int16 at the start of a Utf8 string. + /// </summary> + /// <param name="source">The Utf8 string to parse</param> + /// <param name="value">Receives the parsed value</param> + /// <param name="bytesConsumed">On a successful parse, receives the length in bytes of the substring that was parsed </param> + /// <param name="standardFormat">Expected format of the Utf8 string</param> + /// <returns> + /// true for success. "bytesConsumed" contains the length in bytes of the substring that was parsed. + /// false if the string was not syntactically valid or an overflow or underflow occurred. "bytesConsumed" is set to 0. + /// </returns> + /// <remarks> + /// Formats supported: + /// G/g (default) + /// D/d 32767 + /// N/n 32,767 + /// X/x 7fff + /// </remarks> + /// <exceptions> + /// <cref>System.FormatException</cref> if the format is not valid for this data type. + /// </exceptions> + public static bool TryParse(ReadOnlySpan<byte> source, out short value, out int bytesConsumed, char standardFormat = default) + { + switch (standardFormat) + { + case default(char): + case 'g': + case 'G': + case 'd': + case 'D': + return TryParseInt16D(source, out value, out bytesConsumed); + + case 'n': + case 'N': + return TryParseInt16N(source, out value, out bytesConsumed); + + case 'x': + case 'X': + value = default; + return TryParseUInt16X(source, out Unsafe.As<short, ushort>(ref value), out bytesConsumed); + + default: + return ThrowHelper.TryParseThrowFormatException(out value, out bytesConsumed); + } + } + + /// <summary> + /// Parses an Int32 at the start of a Utf8 string. + /// </summary> + /// <param name="source">The Utf8 string to parse</param> + /// <param name="value">Receives the parsed value</param> + /// <param name="bytesConsumed">On a successful parse, receives the length in bytes of the substring that was parsed </param> + /// <param name="standardFormat">Expected format of the Utf8 string</param> + /// <returns> + /// true for success. "bytesConsumed" contains the length in bytes of the substring that was parsed. + /// false if the string was not syntactically valid or an overflow or underflow occurred. "bytesConsumed" is set to 0. + /// </returns> + /// <remarks> + /// Formats supported: + /// G/g (default) + /// D/d 32767 + /// N/n 32,767 + /// X/x 7fff + /// </remarks> + /// <exceptions> + /// <cref>System.FormatException</cref> if the format is not valid for this data type. + /// </exceptions> + public static bool TryParse(ReadOnlySpan<byte> source, out int value, out int bytesConsumed, char standardFormat = default) + { + switch (standardFormat) + { + case default(char): + case 'g': + case 'G': + case 'd': + case 'D': + return TryParseInt32D(source, out value, out bytesConsumed); + + case 'n': + case 'N': + return TryParseInt32N(source, out value, out bytesConsumed); + + case 'x': + case 'X': + value = default; + return TryParseUInt32X(source, out Unsafe.As<int, uint>(ref value), out bytesConsumed); + + default: + return ThrowHelper.TryParseThrowFormatException(out value, out bytesConsumed); + } + } + + /// <summary> + /// Parses an Int64 at the start of a Utf8 string. + /// </summary> + /// <param name="source">The Utf8 string to parse</param> + /// <param name="value">Receives the parsed value</param> + /// <param name="bytesConsumed">On a successful parse, receives the length in bytes of the substring that was parsed </param> + /// <param name="standardFormat">Expected format of the Utf8 string</param> + /// <returns> + /// true for success. "bytesConsumed" contains the length in bytes of the substring that was parsed. + /// false if the string was not syntactically valid or an overflow or underflow occurred. "bytesConsumed" is set to 0. + /// </returns> + /// <remarks> + /// Formats supported: + /// G/g (default) + /// D/d 32767 + /// N/n 32,767 + /// X/x 7fff + /// </remarks> + /// <exceptions> + /// <cref>System.FormatException</cref> if the format is not valid for this data type. + /// </exceptions> + public static bool TryParse(ReadOnlySpan<byte> source, out long value, out int bytesConsumed, char standardFormat = default) + { + switch (standardFormat) + { + case default(char): + case 'g': + case 'G': + case 'd': + case 'D': + return TryParseInt64D(source, out value, out bytesConsumed); + + case 'n': + case 'N': + return TryParseInt64N(source, out value, out bytesConsumed); + + case 'x': + case 'X': + value = default; + return TryParseUInt64X(source, out Unsafe.As<long, ulong>(ref value), out bytesConsumed); + + default: + return ThrowHelper.TryParseThrowFormatException(out value, out bytesConsumed); + } + } + } +} diff --git a/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Parser/Utf8Parser.Integer.Unsigned.D.cs b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Parser/Utf8Parser.Integer.Unsigned.D.cs new file mode 100644 index 0000000000..46753f5c57 --- /dev/null +++ b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Parser/Utf8Parser.Integer.Unsigned.D.cs @@ -0,0 +1,354 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +namespace System.Buffers.Text +{ + public static partial class Utf8Parser + { + private static bool TryParseByteD(ReadOnlySpan<byte> source, out byte value, out int bytesConsumed) + { + if (source.Length < 1) + goto FalseExit; + + int index = 0; + int num = source[index]; + int answer = 0; + + if (ParserHelpers.IsDigit(num)) + { + if (num == '0') + { + do + { + index++; + if ((uint)index >= (uint)source.Length) + goto Done; + num = source[index]; + } while (num == '0'); + if (!ParserHelpers.IsDigit(num)) + goto Done; + } + + answer = num - '0'; + index++; + + if ((uint)index >= (uint)source.Length) + goto Done; + num = source[index]; + if (!ParserHelpers.IsDigit(num)) + goto Done; + index++; + answer = 10 * answer + num - '0'; + + // Potential overflow + if ((uint)index >= (uint)source.Length) + goto Done; + num = source[index]; + if (!ParserHelpers.IsDigit(num)) + goto Done; + index++; + answer = answer * 10 + num - '0'; + if ((uint)answer > byte.MaxValue) + goto FalseExit; // Overflow + + if ((uint)index >= (uint)source.Length) + goto Done; + if (!ParserHelpers.IsDigit(source[index])) + goto Done; + + // Guaranteed overflow + goto FalseExit; + } + + FalseExit: + bytesConsumed = default; + value = default; + return false; + + Done: + bytesConsumed = index; + value = (byte)answer; + return true; + } + + private static bool TryParseUInt16D(ReadOnlySpan<byte> source, out ushort value, out int bytesConsumed) + { + if (source.Length < 1) + goto FalseExit; + + int index = 0; + int num = source[index]; + int answer = 0; + + if (ParserHelpers.IsDigit(num)) + { + if (num == '0') + { + do + { + index++; + if ((uint)index >= (uint)source.Length) + goto Done; + num = source[index]; + } while (num == '0'); + if (!ParserHelpers.IsDigit(num)) + goto Done; + } + + answer = num - '0'; + index++; + + if ((uint)index >= (uint)source.Length) + goto Done; + num = source[index]; + if (!ParserHelpers.IsDigit(num)) + goto Done; + index++; + answer = 10 * answer + num - '0'; + + if ((uint)index >= (uint)source.Length) + goto Done; + num = source[index]; + if (!ParserHelpers.IsDigit(num)) + goto Done; + index++; + answer = 10 * answer + num - '0'; + + if ((uint)index >= (uint)source.Length) + goto Done; + num = source[index]; + if (!ParserHelpers.IsDigit(num)) + goto Done; + index++; + answer = 10 * answer + num - '0'; + + // Potential overflow + if ((uint)index >= (uint)source.Length) + goto Done; + num = source[index]; + if (!ParserHelpers.IsDigit(num)) + goto Done; + index++; + answer = answer * 10 + num - '0'; + if ((uint)answer > ushort.MaxValue) + goto FalseExit; // Overflow + + if ((uint)index >= (uint)source.Length) + goto Done; + if (!ParserHelpers.IsDigit(source[index])) + goto Done; + + // Guaranteed overflow + goto FalseExit; + } + + FalseExit: + bytesConsumed = default; + value = default; + return false; + + Done: + bytesConsumed = index; + value = (ushort)answer; + return true; + } + + private static bool TryParseUInt32D(ReadOnlySpan<byte> source, out uint value, out int bytesConsumed) + { + if (source.Length < 1) + goto FalseExit; + + int index = 0; + int num = source[index]; + int answer = 0; + + if (ParserHelpers.IsDigit(num)) + { + if (num == '0') + { + do + { + index++; + if ((uint)index >= (uint)source.Length) + goto Done; + num = source[index]; + } while (num == '0'); + if (!ParserHelpers.IsDigit(num)) + goto Done; + } + + answer = num - '0'; + index++; + + if ((uint)index >= (uint)source.Length) + goto Done; + num = source[index]; + if (!ParserHelpers.IsDigit(num)) + goto Done; + index++; + answer = 10 * answer + num - '0'; + + if ((uint)index >= (uint)source.Length) + goto Done; + num = source[index]; + if (!ParserHelpers.IsDigit(num)) + goto Done; + index++; + answer = 10 * answer + num - '0'; + + if ((uint)index >= (uint)source.Length) + goto Done; + num = source[index]; + if (!ParserHelpers.IsDigit(num)) + goto Done; + index++; + answer = 10 * answer + num - '0'; + + if ((uint)index >= (uint)source.Length) + goto Done; + num = source[index]; + if (!ParserHelpers.IsDigit(num)) + goto Done; + index++; + answer = 10 * answer + num - '0'; + + if ((uint)index >= (uint)source.Length) + goto Done; + num = source[index]; + if (!ParserHelpers.IsDigit(num)) + goto Done; + index++; + answer = 10 * answer + num - '0'; + + if ((uint)index >= (uint)source.Length) + goto Done; + num = source[index]; + if (!ParserHelpers.IsDigit(num)) + goto Done; + index++; + answer = 10 * answer + num - '0'; + + if ((uint)index >= (uint)source.Length) + goto Done; + num = source[index]; + if (!ParserHelpers.IsDigit(num)) + goto Done; + index++; + answer = 10 * answer + num - '0'; + + if ((uint)index >= (uint)source.Length) + goto Done; + num = source[index]; + if (!ParserHelpers.IsDigit(num)) + goto Done; + index++; + answer = 10 * answer + num - '0'; + + // Potential overflow + if ((uint)index >= (uint)source.Length) + goto Done; + num = source[index]; + if (!ParserHelpers.IsDigit(num)) + goto Done; + index++; + if (((uint)answer) > uint.MaxValue / 10 || (((uint)answer) == uint.MaxValue / 10 && num > '5')) + goto FalseExit; // Overflow + answer = answer * 10 + num - '0'; + + if ((uint)index >= (uint)source.Length) + goto Done; + if (!ParserHelpers.IsDigit(source[index])) + goto Done; + + // Guaranteed overflow + goto FalseExit; + } + + FalseExit: + bytesConsumed = default; + value = default; + return false; + + Done: + bytesConsumed = index; + value = (uint)answer; + return true; + } + + private static bool TryParseUInt64D(ReadOnlySpan<byte> source, out ulong value, out int bytesConsumed) + { + if (source.Length < 1) + { + bytesConsumed = 0; + value = default; + return false; + } + + // Parse the first digit separately. If invalid here, we need to return false. + ulong firstDigit = source[0] - 48u; // '0' + if (firstDigit > 9) + { + bytesConsumed = 0; + value = default; + return false; + } + ulong parsedValue = firstDigit; + + if (source.Length < ParserHelpers.Int64OverflowLength) + { + // Length is less than Parsers.Int64OverflowLength; overflow is not possible + for (int index = 1; index < source.Length; index++) + { + ulong nextDigit = source[index] - 48u; // '0' + if (nextDigit > 9) + { + bytesConsumed = index; + value = parsedValue; + return true; + } + parsedValue = parsedValue * 10 + nextDigit; + } + } + else + { + // Length is greater than Parsers.Int64OverflowLength; overflow is only possible after Parsers.Int64OverflowLength + // digits. There may be no overflow after Parsers.Int64OverflowLength if there are leading zeroes. + for (int index = 1; index < ParserHelpers.Int64OverflowLength - 1; index++) + { + ulong nextDigit = source[index] - 48u; // '0' + if (nextDigit > 9) + { + bytesConsumed = index; + value = parsedValue; + return true; + } + parsedValue = parsedValue * 10 + nextDigit; + } + for (int index = ParserHelpers.Int64OverflowLength - 1; index < source.Length; index++) + { + ulong nextDigit = source[index] - 48u; // '0' + if (nextDigit > 9) + { + bytesConsumed = index; + value = parsedValue; + return true; + } + // If parsedValue > (ulong.MaxValue / 10), any more appended digits will cause overflow. + // if parsedValue == (ulong.MaxValue / 10), any nextDigit greater than 5 implies overflow. + if (parsedValue > ulong.MaxValue / 10 || (parsedValue == ulong.MaxValue / 10 && nextDigit > 5)) + { + bytesConsumed = 0; + value = default; + return false; + } + parsedValue = parsedValue * 10 + nextDigit; + } + } + + bytesConsumed = source.Length; + value = parsedValue; + return true; + } + } +} diff --git a/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Parser/Utf8Parser.Integer.Unsigned.N.cs b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Parser/Utf8Parser.Integer.Unsigned.N.cs new file mode 100644 index 0000000000..2db20c1270 --- /dev/null +++ b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Parser/Utf8Parser.Integer.Unsigned.N.cs @@ -0,0 +1,336 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +namespace System.Buffers.Text +{ + // + // Parsing unsigned integers for the 'N' format. Emulating int.TryParse(NumberStyles.AllowThousands | NumberStyles.Integer | NumberStyles.AllowDecimalPoint) + // + public static partial class Utf8Parser + { + private static bool TryParseByteN(ReadOnlySpan<byte> source, out byte value, out int bytesConsumed) + { + if (source.Length < 1) + goto FalseExit; + + int index = 0; + int c = source[index]; + if (c == '+') + { + index++; + if ((uint)index >= (uint)source.Length) + goto FalseExit; + c = source[index]; + } + + int answer; + + // Handle the first digit (or period) as a special case. This ensures some compatible edge-case behavior with the classic parse routines + // (at least one digit must precede any commas, and a string without any digits prior to the decimal point must have at least + // one digit after the decimal point.) + if (c == Utf8Constants.Period) + goto FractionalPartWithoutLeadingDigits; + if (!ParserHelpers.IsDigit(c)) + goto FalseExit; + answer = c - '0'; + + for (; ; ) + { + index++; + if ((uint)index >= (uint)source.Length) + goto Done; + + c = source[index]; + if (c == Utf8Constants.Comma) + continue; + + if (c == Utf8Constants.Period) + goto FractionalDigits; + + if (!ParserHelpers.IsDigit(c)) + goto Done; + + answer = answer * 10 + c - '0'; + + if (answer > byte.MaxValue) + goto FalseExit; // Overflow + } + + FractionalPartWithoutLeadingDigits: // If we got here, we found a decimal point before we found any digits. This is legal as long as there's at least one zero after the decimal point. + answer = 0; + index++; + if ((uint)index >= (uint)source.Length) + goto FalseExit; + if (source[index] != '0') + goto FalseExit; + + FractionalDigits: // "N" format allows a fractional portion despite being an integer format but only if the post-fraction digits are all 0. + do + { + index++; + if ((uint)index >= (uint)source.Length) + goto Done; + c = source[index]; + } + while (c == '0'); + + if (ParserHelpers.IsDigit(c)) + goto FalseExit; // The fractional portion contained a non-zero digit. Treat this as an error, not an early termination. + goto Done; + + FalseExit: + bytesConsumed = default; + value = default; + return false; + + Done: + bytesConsumed = index; + value = (byte)answer; + return true; + } + + private static bool TryParseUInt16N(ReadOnlySpan<byte> source, out ushort value, out int bytesConsumed) + { + if (source.Length < 1) + goto FalseExit; + + int index = 0; + int c = source[index]; + if (c == '+') + { + index++; + if ((uint)index >= (uint)source.Length) + goto FalseExit; + c = source[index]; + } + + int answer; + + // Handle the first digit (or period) as a special case. This ensures some compatible edge-case behavior with the classic parse routines + // (at least one digit must precede any commas, and a string without any digits prior to the decimal point must have at least + // one digit after the decimal point.) + if (c == Utf8Constants.Period) + goto FractionalPartWithoutLeadingDigits; + if (!ParserHelpers.IsDigit(c)) + goto FalseExit; + answer = c - '0'; + + for (; ; ) + { + index++; + if ((uint)index >= (uint)source.Length) + goto Done; + + c = source[index]; + if (c == Utf8Constants.Comma) + continue; + + if (c == Utf8Constants.Period) + goto FractionalDigits; + + if (!ParserHelpers.IsDigit(c)) + goto Done; + + answer = answer * 10 + c - '0'; + + if (answer > ushort.MaxValue) + goto FalseExit; // Overflow + } + + FractionalPartWithoutLeadingDigits: // If we got here, we found a decimal point before we found any digits. This is legal as long as there's at least one zero after the decimal point. + answer = 0; + index++; + if ((uint)index >= (uint)source.Length) + goto FalseExit; + if (source[index] != '0') + goto FalseExit; + + FractionalDigits: // "N" format allows a fractional portion despite being an integer format but only if the post-fraction digits are all 0. + do + { + index++; + if ((uint)index >= (uint)source.Length) + goto Done; + c = source[index]; + } + while (c == '0'); + + if (ParserHelpers.IsDigit(c)) + goto FalseExit; // The fractional portion contained a non-zero digit. Treat this as an error, not an early termination. + goto Done; + + FalseExit: + bytesConsumed = default; + value = default; + return false; + + Done: + bytesConsumed = index; + value = (ushort)answer; + return true; + } + + private static bool TryParseUInt32N(ReadOnlySpan<byte> source, out uint value, out int bytesConsumed) + { + if (source.Length < 1) + goto FalseExit; + + int index = 0; + int c = source[index]; + if (c == '+') + { + index++; + if ((uint)index >= (uint)source.Length) + goto FalseExit; + c = source[index]; + } + + int answer; + + // Handle the first digit (or period) as a special case. This ensures some compatible edge-case behavior with the classic parse routines + // (at least one digit must precede any commas, and a string without any digits prior to the decimal point must have at least + // one digit after the decimal point.) + if (c == Utf8Constants.Period) + goto FractionalPartWithoutLeadingDigits; + if (!ParserHelpers.IsDigit(c)) + goto FalseExit; + answer = c - '0'; + + for (; ; ) + { + index++; + if ((uint)index >= (uint)source.Length) + goto Done; + + c = source[index]; + if (c == Utf8Constants.Comma) + continue; + + if (c == Utf8Constants.Period) + goto FractionalDigits; + + if (!ParserHelpers.IsDigit(c)) + goto Done; + + if (((uint)answer) > uint.MaxValue / 10 || (((uint)answer) == uint.MaxValue / 10 && c > '5')) + goto FalseExit; // Overflow + + answer = answer * 10 + c - '0'; + } + + FractionalPartWithoutLeadingDigits: // If we got here, we found a decimal point before we found any digits. This is legal as long as there's at least one zero after the decimal point. + answer = 0; + index++; + if ((uint)index >= (uint)source.Length) + goto FalseExit; + if (source[index] != '0') + goto FalseExit; + + FractionalDigits: // "N" format allows a fractional portion despite being an integer format but only if the post-fraction digits are all 0. + do + { + index++; + if ((uint)index >= (uint)source.Length) + goto Done; + c = source[index]; + } + while (c == '0'); + + if (ParserHelpers.IsDigit(c)) + goto FalseExit; // The fractional portion contained a non-zero digit. Treat this as an error, not an early termination. + goto Done; + + FalseExit: + bytesConsumed = default; + value = default; + return false; + + Done: + bytesConsumed = index; + value = (uint)answer; + return true; + } + + private static bool TryParseUInt64N(ReadOnlySpan<byte> source, out ulong value, out int bytesConsumed) + { + if (source.Length < 1) + goto FalseExit; + + int index = 0; + int c = source[index]; + if (c == '+') + { + index++; + if ((uint)index >= (uint)source.Length) + goto FalseExit; + c = source[index]; + } + + long answer; + + // Handle the first digit (or period) as a special case. This ensures some compatible edge-case behavior with the classic parse routines + // (at least one digit must precede any commas, and a string without any digits prior to the decimal point must have at least + // one digit after the decimal point.) + if (c == Utf8Constants.Period) + goto FractionalPartWithoutLeadingDigits; + if (!ParserHelpers.IsDigit(c)) + goto FalseExit; + answer = c - '0'; + + for (; ; ) + { + index++; + if ((uint)index >= (uint)source.Length) + goto Done; + + c = source[index]; + if (c == Utf8Constants.Comma) + continue; + + if (c == Utf8Constants.Period) + goto FractionalDigits; + + if (!ParserHelpers.IsDigit(c)) + goto Done; + + if (((ulong)answer) > ulong.MaxValue / 10 || (((ulong)answer) == ulong.MaxValue / 10 && c > '5')) + goto FalseExit; // Overflow + + answer = answer * 10 + c - '0'; + } + + FractionalPartWithoutLeadingDigits: // If we got here, we found a decimal point before we found any digits. This is legal as long as there's at least one zero after the decimal point. + answer = 0; + index++; + if ((uint)index >= (uint)source.Length) + goto FalseExit; + if (source[index] != '0') + goto FalseExit; + + FractionalDigits: // "N" format allows a fractional portion despite being an integer format but only if the post-fraction digits are all 0. + do + { + index++; + if ((uint)index >= (uint)source.Length) + goto Done; + c = source[index]; + } + while (c == '0'); + + if (ParserHelpers.IsDigit(c)) + goto FalseExit; // The fractional portion contained a non-zero digit. Treat this as an error, not an early termination. + goto Done; + + FalseExit: + bytesConsumed = default; + value = default; + return false; + + Done: + bytesConsumed = index; + value = (ulong)answer; + return true; + } + } +} diff --git a/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Parser/Utf8Parser.Integer.Unsigned.X.cs b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Parser/Utf8Parser.Integer.Unsigned.X.cs new file mode 100644 index 0000000000..7e7867a56f --- /dev/null +++ b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Parser/Utf8Parser.Integer.Unsigned.X.cs @@ -0,0 +1,341 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +namespace System.Buffers.Text +{ + public static partial class Utf8Parser + { + private static bool TryParseByteX(ReadOnlySpan<byte> source, out byte value, out int bytesConsumed) + { + if (source.Length < 1) + { + bytesConsumed = 0; + value = default; + return false; + } + byte nextCharacter; + byte nextDigit; + + // Cache Parsers.s_HexLookup in order to avoid static constructor checks + byte[] hexLookup = ParserHelpers.s_hexLookup; + + // Parse the first digit separately. If invalid here, we need to return false. + nextCharacter = source[0]; + nextDigit = hexLookup[nextCharacter]; + if (nextDigit == 0xFF) + { + bytesConsumed = 0; + value = default; + return false; + } + uint parsedValue = nextDigit; + + if (source.Length <= ParserHelpers.ByteOverflowLengthHex) + { + // Length is less than or equal to Parsers.ByteOverflowLengthHex; overflow is not possible + for (int index = 1; index < source.Length; index++) + { + nextCharacter = source[index]; + nextDigit = hexLookup[nextCharacter]; + if (nextDigit == 0xFF) + { + bytesConsumed = index; + value = (byte)(parsedValue); + return true; + } + parsedValue = (parsedValue << 4) + nextDigit; + } + } + else + { + // Length is greater than Parsers.ByteOverflowLengthHex; overflow is only possible after Parsers.ByteOverflowLengthHex + // digits. There may be no overflow after Parsers.ByteOverflowLengthHex if there are leading zeroes. + for (int index = 1; index < ParserHelpers.ByteOverflowLengthHex; index++) + { + nextCharacter = source[index]; + nextDigit = hexLookup[nextCharacter]; + if (nextDigit == 0xFF) + { + bytesConsumed = index; + value = (byte)(parsedValue); + return true; + } + parsedValue = (parsedValue << 4) + nextDigit; + } + for (int index = ParserHelpers.ByteOverflowLengthHex; index < source.Length; index++) + { + nextCharacter = source[index]; + nextDigit = hexLookup[nextCharacter]; + if (nextDigit == 0xFF) + { + bytesConsumed = index; + value = (byte)(parsedValue); + return true; + } + // If we try to append a digit to anything larger than byte.MaxValue / 0x10, there will be overflow + if (parsedValue > byte.MaxValue / 0x10) + { + bytesConsumed = 0; + value = default; + return false; + } + parsedValue = (parsedValue << 4) + nextDigit; + } + } + + bytesConsumed = source.Length; + value = (byte)(parsedValue); + return true; + } + + private static bool TryParseUInt16X(ReadOnlySpan<byte> source, out ushort value, out int bytesConsumed) + { + if (source.Length < 1) + { + bytesConsumed = 0; + value = default; + return false; + } + byte nextCharacter; + byte nextDigit; + + // Cache Parsers.s_HexLookup in order to avoid static constructor checks + byte[] hexLookup = ParserHelpers.s_hexLookup; + + // Parse the first digit separately. If invalid here, we need to return false. + nextCharacter = source[0]; + nextDigit = hexLookup[nextCharacter]; + if (nextDigit == 0xFF) + { + bytesConsumed = 0; + value = default; + return false; + } + uint parsedValue = nextDigit; + + if (source.Length <= ParserHelpers.Int16OverflowLengthHex) + { + // Length is less than or equal to Parsers.Int16OverflowLengthHex; overflow is not possible + for (int index = 1; index < source.Length; index++) + { + nextCharacter = source[index]; + nextDigit = hexLookup[nextCharacter]; + if (nextDigit == 0xFF) + { + bytesConsumed = index; + value = (ushort)(parsedValue); + return true; + } + parsedValue = (parsedValue << 4) + nextDigit; + } + } + else + { + // Length is greater than Parsers.Int16OverflowLengthHex; overflow is only possible after Parsers.Int16OverflowLengthHex + // digits. There may be no overflow after Parsers.Int16OverflowLengthHex if there are leading zeroes. + for (int index = 1; index < ParserHelpers.Int16OverflowLengthHex; index++) + { + nextCharacter = source[index]; + nextDigit = hexLookup[nextCharacter]; + if (nextDigit == 0xFF) + { + bytesConsumed = index; + value = (ushort)(parsedValue); + return true; + } + parsedValue = (parsedValue << 4) + nextDigit; + } + for (int index = ParserHelpers.Int16OverflowLengthHex; index < source.Length; index++) + { + nextCharacter = source[index]; + nextDigit = hexLookup[nextCharacter]; + if (nextDigit == 0xFF) + { + bytesConsumed = index; + value = (ushort)(parsedValue); + return true; + } + // If we try to append a digit to anything larger than ushort.MaxValue / 0x10, there will be overflow + if (parsedValue > ushort.MaxValue / 0x10) + { + bytesConsumed = 0; + value = default; + return false; + } + parsedValue = (parsedValue << 4) + nextDigit; + } + } + + bytesConsumed = source.Length; + value = (ushort)(parsedValue); + return true; + } + + private static bool TryParseUInt32X(ReadOnlySpan<byte> source, out uint value, out int bytesConsumed) + { + if (source.Length < 1) + { + bytesConsumed = 0; + value = default; + return false; + } + byte nextCharacter; + byte nextDigit; + + // Cache Parsers.s_HexLookup in order to avoid static constructor checks + byte[] hexLookup = ParserHelpers.s_hexLookup; + + // Parse the first digit separately. If invalid here, we need to return false. + nextCharacter = source[0]; + nextDigit = hexLookup[nextCharacter]; + if (nextDigit == 0xFF) + { + bytesConsumed = 0; + value = default; + return false; + } + uint parsedValue = nextDigit; + + if (source.Length <= ParserHelpers.Int32OverflowLengthHex) + { + // Length is less than or equal to Parsers.Int32OverflowLengthHex; overflow is not possible + for (int index = 1; index < source.Length; index++) + { + nextCharacter = source[index]; + nextDigit = hexLookup[nextCharacter]; + if (nextDigit == 0xFF) + { + bytesConsumed = index; + value = parsedValue; + return true; + } + parsedValue = (parsedValue << 4) + nextDigit; + } + } + else + { + // Length is greater than Parsers.Int32OverflowLengthHex; overflow is only possible after Parsers.Int32OverflowLengthHex + // digits. There may be no overflow after Parsers.Int32OverflowLengthHex if there are leading zeroes. + for (int index = 1; index < ParserHelpers.Int32OverflowLengthHex; index++) + { + nextCharacter = source[index]; + nextDigit = hexLookup[nextCharacter]; + if (nextDigit == 0xFF) + { + bytesConsumed = index; + value = parsedValue; + return true; + } + parsedValue = (parsedValue << 4) + nextDigit; + } + for (int index = ParserHelpers.Int32OverflowLengthHex; index < source.Length; index++) + { + nextCharacter = source[index]; + nextDigit = hexLookup[nextCharacter]; + if (nextDigit == 0xFF) + { + bytesConsumed = index; + value = parsedValue; + return true; + } + // If we try to append a digit to anything larger than uint.MaxValue / 0x10, there will be overflow + if (parsedValue > uint.MaxValue / 0x10) + { + bytesConsumed = 0; + value = default; + return false; + } + parsedValue = (parsedValue << 4) + nextDigit; + } + } + + bytesConsumed = source.Length; + value = parsedValue; + return true; + } + + private static bool TryParseUInt64X(ReadOnlySpan<byte> source, out ulong value, out int bytesConsumed) + { + if (source.Length < 1) + { + bytesConsumed = 0; + value = default; + return false; + } + byte nextCharacter; + byte nextDigit; + + // Cache Parsers.s_HexLookup in order to avoid static constructor checks + byte[] hexLookup = ParserHelpers.s_hexLookup; + + // Parse the first digit separately. If invalid here, we need to return false. + nextCharacter = source[0]; + nextDigit = hexLookup[nextCharacter]; + if (nextDigit == 0xFF) + { + bytesConsumed = 0; + value = default; + return false; + } + ulong parsedValue = nextDigit; + + if (source.Length <= ParserHelpers.Int64OverflowLengthHex) + { + // Length is less than or equal to Parsers.Int64OverflowLengthHex; overflow is not possible + for (int index = 1; index < source.Length; index++) + { + nextCharacter = source[index]; + nextDigit = hexLookup[nextCharacter]; + if (nextDigit == 0xFF) + { + bytesConsumed = index; + value = parsedValue; + return true; + } + parsedValue = (parsedValue << 4) + nextDigit; + } + } + else + { + // Length is greater than Parsers.Int64OverflowLengthHex; overflow is only possible after Parsers.Int64OverflowLengthHex + // digits. There may be no overflow after Parsers.Int64OverflowLengthHex if there are leading zeroes. + for (int index = 1; index < ParserHelpers.Int64OverflowLengthHex; index++) + { + nextCharacter = source[index]; + nextDigit = hexLookup[nextCharacter]; + if (nextDigit == 0xFF) + { + bytesConsumed = index; + value = parsedValue; + return true; + } + parsedValue = (parsedValue << 4) + nextDigit; + } + for (int index = ParserHelpers.Int64OverflowLengthHex; index < source.Length; index++) + { + nextCharacter = source[index]; + nextDigit = hexLookup[nextCharacter]; + if (nextDigit == 0xFF) + { + bytesConsumed = index; + value = parsedValue; + return true; + } + // If we try to append a digit to anything larger than ulong.MaxValue / 0x10, there will be overflow + if (parsedValue > ulong.MaxValue / 0x10) + { + bytesConsumed = 0; + value = default; + return false; + } + parsedValue = (parsedValue << 4) + nextDigit; + } + } + + bytesConsumed = source.Length; + value = parsedValue; + return true; + } + } +} diff --git a/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Parser/Utf8Parser.Integer.Unsigned.cs b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Parser/Utf8Parser.Integer.Unsigned.cs new file mode 100644 index 0000000000..ae23c29d04 --- /dev/null +++ b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Parser/Utf8Parser.Integer.Unsigned.cs @@ -0,0 +1,192 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +namespace System.Buffers.Text +{ + public static partial class Utf8Parser + { + /// <summary> + /// Parses a Byte at the start of a Utf8 string. + /// </summary> + /// <param name="source">The Utf8 string to parse</param> + /// <param name="value">Receives the parsed value</param> + /// <param name="bytesConsumed">On a successful parse, receives the length in bytes of the substring that was parsed </param> + /// <param name="standardFormat">Expected format of the Utf8 string</param> + /// <returns> + /// true for success. "bytesConsumed" contains the length in bytes of the substring that was parsed. + /// false if the string was not syntactically valid or an overflow or underflow occurred. "bytesConsumed" is set to 0. + /// </returns> + /// <remarks> + /// Formats supported: + /// G/g (default) + /// D/d 32767 + /// N/n 32,767 + /// X/x 7fff + /// </remarks> + /// <exceptions> + /// <cref>System.FormatException</cref> if the format is not valid for this data type. + /// </exceptions> + public static bool TryParse(ReadOnlySpan<byte> source, out byte value, out int bytesConsumed, char standardFormat = default) + { + switch (standardFormat) + { + case default(char): + case 'g': + case 'G': + case 'd': + case 'D': + return TryParseByteD(source, out value, out bytesConsumed); + + case 'n': + case 'N': + return TryParseByteN(source, out value, out bytesConsumed); + + case 'x': + case 'X': + return TryParseByteX(source, out value, out bytesConsumed); + + default: + return ThrowHelper.TryParseThrowFormatException(out value, out bytesConsumed); + } + } + + /// <summary> + /// Parses a UInt16 at the start of a Utf8 string. + /// </summary> + /// <param name="source">The Utf8 string to parse</param> + /// <param name="value">Receives the parsed value</param> + /// <param name="bytesConsumed">On a successful parse, receives the length in bytes of the substring that was parsed </param> + /// <param name="standardFormat">Expected format of the Utf8 string</param> + /// <returns> + /// true for success. "bytesConsumed" contains the length in bytes of the substring that was parsed. + /// false if the string was not syntactically valid or an overflow or underflow occurred. "bytesConsumed" is set to 0. + /// </returns> + /// <remarks> + /// Formats supported: + /// G/g (default) + /// D/d 32767 + /// N/n 32,767 + /// X/x 7fff + /// </remarks> + /// <exceptions> + /// <cref>System.FormatException</cref> if the format is not valid for this data type. + /// </exceptions> + [CLSCompliant(false)] + public static bool TryParse(ReadOnlySpan<byte> source, out ushort value, out int bytesConsumed, char standardFormat = default) + { + switch (standardFormat) + { + case default(char): + case 'g': + case 'G': + case 'd': + case 'D': + return TryParseUInt16D(source, out value, out bytesConsumed); + + case 'n': + case 'N': + return TryParseUInt16N(source, out value, out bytesConsumed); + + case 'x': + case 'X': + return TryParseUInt16X(source, out value, out bytesConsumed); + + default: + return ThrowHelper.TryParseThrowFormatException(out value, out bytesConsumed); + } + } + + /// <summary> + /// Parses a UInt32 at the start of a Utf8 string. + /// </summary> + /// <param name="source">The Utf8 string to parse</param> + /// <param name="value">Receives the parsed value</param> + /// <param name="bytesConsumed">On a successful parse, receives the length in bytes of the substring that was parsed </param> + /// <param name="standardFormat">Expected format of the Utf8 string</param> + /// <returns> + /// true for success. "bytesConsumed" contains the length in bytes of the substring that was parsed. + /// false if the string was not syntactically valid or an overflow or underflow occurred. "bytesConsumed" is set to 0. + /// </returns> + /// <remarks> + /// Formats supported: + /// G/g (default) + /// D/d 32767 + /// N/n 32,767 + /// X/x 7fff + /// </remarks> + /// <exceptions> + /// <cref>System.FormatException</cref> if the format is not valid for this data type. + /// </exceptions> + [CLSCompliant(false)] + public static bool TryParse(ReadOnlySpan<byte> source, out uint value, out int bytesConsumed, char standardFormat = default) + { + switch (standardFormat) + { + case default(char): + case 'g': + case 'G': + case 'd': + case 'D': + return TryParseUInt32D(source, out value, out bytesConsumed); + + case 'n': + case 'N': + return TryParseUInt32N(source, out value, out bytesConsumed); + + case 'x': + case 'X': + return TryParseUInt32X(source, out value, out bytesConsumed); + + default: + return ThrowHelper.TryParseThrowFormatException(out value, out bytesConsumed); + } + } + + /// <summary> + /// Parses a UInt64 at the start of a Utf8 string. + /// </summary> + /// <param name="source">The Utf8 string to parse</param> + /// <param name="value">Receives the parsed value</param> + /// <param name="bytesConsumed">On a successful parse, receives the length in bytes of the substring that was parsed </param> + /// <param name="standardFormat">Expected format of the Utf8 string</param> + /// <returns> + /// true for success. "bytesConsumed" contains the length in bytes of the substring that was parsed. + /// false if the string was not syntactically valid or an overflow or underflow occurred. "bytesConsumed" is set to 0. + /// </returns> + /// <remarks> + /// Formats supported: + /// G/g (default) + /// D/d 32767 + /// N/n 32,767 + /// X/x 7fff + /// </remarks> + /// <exceptions> + /// <cref>System.FormatException</cref> if the format is not valid for this data type. + /// </exceptions> + [CLSCompliant(false)] + public static bool TryParse(ReadOnlySpan<byte> source, out ulong value, out int bytesConsumed, char standardFormat = default) + { + switch (standardFormat) + { + case default(char): + case 'g': + case 'G': + case 'd': + case 'D': + return TryParseUInt64D(source, out value, out bytesConsumed); + + case 'n': + case 'N': + return TryParseUInt64N(source, out value, out bytesConsumed); + + case 'x': + case 'X': + return TryParseUInt64X(source, out value, out bytesConsumed); + + default: + return ThrowHelper.TryParseThrowFormatException(out value, out bytesConsumed); + } + } + } +} diff --git a/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Parser/Utf8Parser.Number.cs b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Parser/Utf8Parser.Number.cs new file mode 100644 index 0000000000..813a1f0a6e --- /dev/null +++ b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Parser/Utf8Parser.Number.cs @@ -0,0 +1,246 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Diagnostics; + +namespace System.Buffers.Text +{ + public static partial class Utf8Parser + { + [Flags] + private enum ParseNumberOptions + { + AllowExponent = 0x00000001, + } + + private static bool TryParseNumber(ReadOnlySpan<byte> source, ref NumberBuffer number, out int bytesConsumed, ParseNumberOptions options, out bool textUsedExponentNotation) + { + Debug.Assert(number.Digits[0] == 0 && number.Scale == 0 && !number.IsNegative, "Number not initialized to default(NumberBuffer)"); + + textUsedExponentNotation = false; + + if (source.Length == 0) + { + bytesConsumed = 0; + return false; + } + + Span<byte> digits = number.Digits; + + int srcIndex = 0; + int dstIndex = 0; + + // Consume the leading sign if any. + byte c = source[srcIndex]; + switch (c) + { + case Utf8Constants.Minus: + number.IsNegative = true; + goto case Utf8Constants.Plus; + + case Utf8Constants.Plus: + srcIndex++; + if (srcIndex == source.Length) + { + bytesConsumed = 0; + return false; + } + c = source[srcIndex]; + break; + + default: + break; + } + + int startIndexDigitsBeforeDecimal = srcIndex; + + // Throw away any leading zeroes + while (srcIndex != source.Length) + { + c = source[srcIndex]; + if (c != '0') + break; + srcIndex++; + } + + if (srcIndex == source.Length) + { + digits[0] = 0; + number.Scale = 0; + number.IsNegative = false; + bytesConsumed = srcIndex; + number.CheckConsistency(); + return true; + } + + int startIndexNonLeadingDigitsBeforeDecimal = srcIndex; + while (srcIndex != source.Length) + { + c = source[srcIndex]; + if ((c - 48u) > 9) + break; + srcIndex++; + } + + int numDigitsBeforeDecimal = srcIndex - startIndexDigitsBeforeDecimal; + int numNonLeadingDigitsBeforeDecimal = srcIndex - startIndexNonLeadingDigitsBeforeDecimal; + + Debug.Assert(dstIndex == 0); + int numNonLeadingDigitsBeforeDecimalToCopy = Math.Min(numNonLeadingDigitsBeforeDecimal, NumberBuffer.BufferSize - 1); + source.Slice(startIndexNonLeadingDigitsBeforeDecimal, numNonLeadingDigitsBeforeDecimalToCopy).CopyTo(digits); + dstIndex = numNonLeadingDigitsBeforeDecimalToCopy; + number.Scale = numNonLeadingDigitsBeforeDecimal; + + if (srcIndex == source.Length) + { + bytesConsumed = srcIndex; + number.CheckConsistency(); + return true; + } + + int numDigitsAfterDecimal = 0; + if (c == Utf8Constants.Period) + { + // + // Parse the digits after the decimal point. + // + + srcIndex++; + int startIndexDigitsAfterDecimal = srcIndex; + while (srcIndex != source.Length) + { + c = source[srcIndex]; + if ((c - 48u) > 9) + break; + srcIndex++; + } + numDigitsAfterDecimal = srcIndex - startIndexDigitsAfterDecimal; + + int startIndexOfDigitsAfterDecimalToCopy = startIndexDigitsAfterDecimal; + if (dstIndex == 0) + { + // Not copied any digits to the Number struct yet. This means we must continue discarding leading zeroes even though they appeared after the decimal point. + while (startIndexOfDigitsAfterDecimalToCopy < srcIndex && source[startIndexOfDigitsAfterDecimalToCopy] == '0') + { + number.Scale--; + startIndexOfDigitsAfterDecimalToCopy++; + } + } + + int numDigitsAfterDecimalToCopy = Math.Min(srcIndex - startIndexOfDigitsAfterDecimalToCopy, NumberBuffer.BufferSize - dstIndex - 1); + source.Slice(startIndexOfDigitsAfterDecimalToCopy, numDigitsAfterDecimalToCopy).CopyTo(digits.Slice(dstIndex)); + dstIndex += numDigitsAfterDecimalToCopy; + // We "should" really NUL terminate, but there are multiple places we'd have to do this and it is a precondition that the caller pass in a fully zero=initialized Number. + + if (srcIndex == source.Length) + { + if (numDigitsBeforeDecimal == 0 && numDigitsAfterDecimal == 0) + { + // For compatibility. You can say "5." and ".5" but you can't say "." + bytesConsumed = 0; + return false; + } + + bytesConsumed = srcIndex; + number.CheckConsistency(); + return true; + } + } + + if (numDigitsBeforeDecimal == 0 && numDigitsAfterDecimal == 0) + { + bytesConsumed = 0; + return false; + } + + if ((c & ~0x20u) != 'E') + { + if ((digits[0] == 0) && (numDigitsAfterDecimal == 0)) + { + number.IsNegative = false; + } + + bytesConsumed = srcIndex; + number.CheckConsistency(); + return true; + } + + // + // Parse the exponent after the "E" + // + textUsedExponentNotation = true; + srcIndex++; + + if ((options & ParseNumberOptions.AllowExponent) == 0) + { + bytesConsumed = 0; + return false; + } + + if (srcIndex == source.Length) + { + bytesConsumed = 0; + return false; + } + + bool exponentIsNegative = false; + c = source[srcIndex]; + switch (c) + { + case Utf8Constants.Minus: + exponentIsNegative = true; + goto case Utf8Constants.Plus; + + case Utf8Constants.Plus: + srcIndex++; + if (srcIndex == source.Length) + { + bytesConsumed = 0; + return false; + } + c = source[srcIndex]; + break; + + default: + break; + } + + if (!Utf8Parser.TryParseUInt32D(source.Slice(srcIndex), out uint absoluteExponent, out int bytesConsumedByExponent)) + { + bytesConsumed = 0; + return false; + } + + srcIndex += bytesConsumedByExponent; + + if (exponentIsNegative) + { + if (number.Scale < int.MinValue + (long)absoluteExponent) + { + // A scale underflow means all non-zero digits are all so far to the right of the decimal point, no + // number format we have will be able to see them. Just pin the scale at the absolute minimum + // and let the converter produce a 0 with the max precision available for that type. + number.Scale = int.MinValue; + } + else + { + number.Scale -= (int)absoluteExponent; + } + } + else + { + if (number.Scale > int.MaxValue - (long)absoluteExponent) + { + bytesConsumed = 0; + return false; + } + number.Scale += (int)absoluteExponent; + } + + bytesConsumed = srcIndex; + number.CheckConsistency(); + return true; + } + } +} diff --git a/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Parser/Utf8Parser.TimeSpan.BigG.cs b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Parser/Utf8Parser.TimeSpan.BigG.cs new file mode 100644 index 0000000000..6bcb4d5277 --- /dev/null +++ b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Parser/Utf8Parser.TimeSpan.BigG.cs @@ -0,0 +1,132 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +namespace System.Buffers.Text +{ + public static partial class Utf8Parser + { + private static bool TryParseTimeSpanBigG(ReadOnlySpan<byte> source, out TimeSpan value, out int bytesConsumed) + { + int srcIndex = 0; + byte c = default; + while (srcIndex != source.Length) + { + c = source[srcIndex]; + if (!(c == ' ' || c == '\t')) + break; + srcIndex++; + } + + if (srcIndex == source.Length) + { + value = default; + bytesConsumed = 0; + return false; + } + + bool isNegative = false; + if (c == Utf8Constants.Minus) + { + isNegative = true; + srcIndex++; + if (srcIndex == source.Length) + { + value = default; + bytesConsumed = 0; + return false; + } + } + + if (!TryParseUInt32D(source.Slice(srcIndex), out uint days, out int justConsumed)) + { + value = default; + bytesConsumed = 0; + return false; + } + srcIndex += justConsumed; + + if (srcIndex == source.Length || source[srcIndex++] != Utf8Constants.Colon) + { + value = default; + bytesConsumed = 0; + return false; + } + + if (!TryParseUInt32D(source.Slice(srcIndex), out uint hours, out justConsumed)) + { + value = default; + bytesConsumed = 0; + return false; + } + srcIndex += justConsumed; + + if (srcIndex == source.Length || source[srcIndex++] != Utf8Constants.Colon) + { + value = default; + bytesConsumed = 0; + return false; + } + + if (!TryParseUInt32D(source.Slice(srcIndex), out uint minutes, out justConsumed)) + { + value = default; + bytesConsumed = 0; + return false; + } + srcIndex += justConsumed; + + if (srcIndex == source.Length || source[srcIndex++] != Utf8Constants.Colon) + { + value = default; + bytesConsumed = 0; + return false; + } + + if (!TryParseUInt32D(source.Slice(srcIndex), out uint seconds, out justConsumed)) + { + value = default; + bytesConsumed = 0; + return false; + } + srcIndex += justConsumed; + + if (srcIndex == source.Length || source[srcIndex++] != Utf8Constants.Period) + { + value = default; + bytesConsumed = 0; + return false; + } + + if (!TryParseTimeSpanFraction(source.Slice(srcIndex), out uint fraction, out justConsumed)) + { + value = default; + bytesConsumed = 0; + return false; + } + + srcIndex += justConsumed; + + if (!TryCreateTimeSpan(isNegative: isNegative, days: days, hours: hours, minutes: minutes, seconds: seconds, fraction: fraction, out value)) + { + value = default; + bytesConsumed = 0; + return false; + } + + // + // There cannot legally be a sixth number. If the next character is a period or colon, treat this as a error as it's likely + // to indicate the start of a sixth number. Otherwise, treat as end of parse with data left over. + // + if (srcIndex != source.Length && (source[srcIndex] == Utf8Constants.Period || source[srcIndex] == Utf8Constants.Colon)) + { + value = default; + bytesConsumed = 0; + return false; + } + + bytesConsumed = srcIndex; + return true; + } + } +} diff --git a/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Parser/Utf8Parser.TimeSpan.C.cs b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Parser/Utf8Parser.TimeSpan.C.cs new file mode 100644 index 0000000000..d0a28969be --- /dev/null +++ b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Parser/Utf8Parser.TimeSpan.C.cs @@ -0,0 +1,67 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +namespace System.Buffers.Text +{ + public static partial class Utf8Parser + { + private static bool TryParseTimeSpanC(ReadOnlySpan<byte> source, out TimeSpan value, out int bytesConsumed) + { + TimeSpanSplitter s = default; + if (!s.TrySplitTimeSpan(source, periodUsedToSeparateDay: true, out bytesConsumed)) + { + value = default; + return false; + } + + bool isNegative = s.IsNegative; + + bool success; + switch (s.Separators) + { + case 0x00000000: // dd + success = TryCreateTimeSpan(isNegative: isNegative, days: s.V1, hours: 0, minutes: 0, seconds: 0, fraction: 0, out value); + break; + + case 0x01000000: // hh:mm + success = TryCreateTimeSpan(isNegative: isNegative, days: 0, hours: s.V1, minutes: s.V2, seconds: 0, fraction: 0, out value); + break; + + case 0x02010000: // dd.hh:mm + success = TryCreateTimeSpan(isNegative: isNegative, days: s.V1, hours: s.V2, minutes: s.V3, seconds: 0, fraction: 0, out value); + break; + + case 0x01010000: // hh:mm:ss + success = TryCreateTimeSpan(isNegative: isNegative, days: 0, hours: s.V1, minutes: s.V2, seconds: s.V3, fraction: 0, out value); + break; + + case 0x02010100: // dd.hh:mm:ss + success = TryCreateTimeSpan(isNegative: isNegative, days: s.V1, hours: s.V2, minutes: s.V3, seconds: s.V4, fraction: 0, out value); + break; + + case 0x01010200: // hh:mm:ss.fffffff + success = TryCreateTimeSpan(isNegative: isNegative, days: 0, hours: s.V1, minutes: s.V2, seconds: s.V3, fraction: s.V4, out value); + break; + + case 0x02010102: // dd.hh:mm:ss.fffffff + success = TryCreateTimeSpan(isNegative: isNegative, days: s.V1, hours: s.V2, minutes: s.V3, seconds: s.V4, fraction: s.V5, out value); + break; + + default: + value = default; + success = false; + break; + } + + if (!success) + { + bytesConsumed = 0; + return false; + } + + return true; + } + } +} + diff --git a/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Parser/Utf8Parser.TimeSpan.LittleG.cs b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Parser/Utf8Parser.TimeSpan.LittleG.cs new file mode 100644 index 0000000000..19208b9eac --- /dev/null +++ b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Parser/Utf8Parser.TimeSpan.LittleG.cs @@ -0,0 +1,62 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +namespace System.Buffers.Text +{ + public static partial class Utf8Parser + { + private static bool TryParseTimeSpanLittleG(ReadOnlySpan<byte> source, out TimeSpan value, out int bytesConsumed) + { + TimeSpanSplitter s = default; + if (!s.TrySplitTimeSpan(source, periodUsedToSeparateDay: false, out bytesConsumed)) + { + value = default; + return false; + } + + bool isNegative = s.IsNegative; + + bool success; + switch (s.Separators) + { + case 0x00000000: // dd + success = TryCreateTimeSpan(isNegative: isNegative, days: s.V1, hours: 0, minutes: 0, seconds: 0, fraction: 0, out value); + break; + + case 0x01000000: // hh:mm + success = TryCreateTimeSpan(isNegative: isNegative, days: 0, hours: s.V1, minutes: s.V2, seconds: 0, fraction: 0, out value); + break; + + case 0x01010000: // hh:mm:ss + success = TryCreateTimeSpan(isNegative: isNegative, days: 0, hours: s.V1, minutes: s.V2, seconds: s.V3, fraction: 0, out value); + break; + + case 0x01010100: // dd:hh:mm:ss + success = TryCreateTimeSpan(isNegative: isNegative, days: s.V1, hours: s.V2, minutes: s.V3, seconds: s.V4, fraction: 0, out value); + break; + + case 0x01010200: // hh:mm:ss.fffffff + success = TryCreateTimeSpan(isNegative: isNegative, days: 0, hours: s.V1, minutes: s.V2, seconds: s.V3, fraction: s.V4, out value); + break; + + case 0x01010102: // dd:hh:mm:ss.fffffff + success = TryCreateTimeSpan(isNegative: isNegative, days: s.V1, hours: s.V2, minutes: s.V3, seconds: s.V4, fraction: s.V5, out value); + break; + + default: + value = default; + success = false; + break; + } + + if (!success) + { + bytesConsumed = 0; + return false; + } + + return true; + } + } +} diff --git a/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Parser/Utf8Parser.TimeSpan.cs b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Parser/Utf8Parser.TimeSpan.cs new file mode 100644 index 0000000000..0ce810b392 --- /dev/null +++ b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Parser/Utf8Parser.TimeSpan.cs @@ -0,0 +1,192 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Diagnostics; + +namespace System.Buffers.Text +{ + public static partial class Utf8Parser + { + /// <summary> + /// Parses a TimeSpan at the start of a Utf8 string. + /// </summary> + /// <param name="source">The Utf8 string to parse</param> + /// <param name="value">Receives the parsed value</param> + /// <param name="bytesConsumed">On a successful parse, receives the length in bytes of the substring that was parsed </param> + /// <param name="standardFormat">Expected format of the Utf8 string</param> + /// <returns> + /// true for success. "bytesConsumed" contains the length in bytes of the substring that was parsed. + /// false if the string was not syntactically valid or an overflow or underflow occurred. "bytesConsumed" is set to 0. + /// </returns> + /// <remarks> + /// Formats supported: + /// c/t/T (default) [-][d.]hh:mm:ss[.fffffff] (constant format) + /// G [-]d:hh:mm:ss.fffffff (general long) + /// g [-][d:]h:mm:ss[.f[f[f[f[f[f[f[]]]]]]] (general short) + /// </remarks> + /// <exceptions> + /// <cref>System.FormatException</cref> if the format is not valid for this data type. + /// </exceptions> + public static bool TryParse(ReadOnlySpan<byte> source, out TimeSpan value, out int bytesConsumed, char standardFormat = default) + { + switch (standardFormat) + { + case default(char): + case 'c': + case 't': + case 'T': + return TryParseTimeSpanC(source, out value, out bytesConsumed); + + case 'G': + return TryParseTimeSpanBigG(source, out value, out bytesConsumed); + + case 'g': + return TryParseTimeSpanLittleG(source, out value, out bytesConsumed); + + default: + return ThrowHelper.TryParseThrowFormatException(out value, out bytesConsumed); + } + } + + /// <summary> + /// Parse the fraction portion of a TimeSpan. Must be 1..7 digits. If fewer than 7, zeroes are implied to the right. If more than 7, the TimeSpan + /// parser rejects the string (even if the extra digits are all zeroes.) + /// </summary> + private static bool TryParseTimeSpanFraction(ReadOnlySpan<byte> source, out uint value, out int bytesConsumed) + { + int srcIndex = 0; + + if (srcIndex == source.Length) + { + value = default; + bytesConsumed = 0; + return false; + } + + uint digit = source[srcIndex] - 48u; // '0' + if (digit > 9) + { + value = default; + bytesConsumed = 0; + return false; + } + srcIndex++; + + uint fraction = digit; + int digitCount = 1; + + while (srcIndex != source.Length) + { + digit = source[srcIndex] - 48u; // '0' + if (digit > 9) + break; + srcIndex++; + digitCount++; + if (digitCount > Utf8Constants.DateTimeNumFractionDigits) + { + // Yes, TimeSpan fraction parsing is that picky. + value = default; + bytesConsumed = 0; + return false; + } + fraction = 10 * fraction + digit; + } + + switch (digitCount) + { + case 7: + break; + + case 6: + fraction *= 10; + break; + + case 5: + fraction *= 100; + break; + + case 4: + fraction *= 1000; + break; + + case 3: + fraction *= 10000; + break; + + case 2: + fraction *= 100000; + break; + + default: + Debug.Assert(digitCount == 1); + fraction *= 1000000; + break; + } + + value = fraction; + bytesConsumed = srcIndex; + return true; + } + + /// <summary> + /// Overflow-safe TryCreateTimeSpan + /// </summary> + private static bool TryCreateTimeSpan(bool isNegative, uint days, uint hours, uint minutes, uint seconds, uint fraction, out TimeSpan timeSpan) + { + const long MaxMilliSeconds = long.MaxValue / TimeSpan.TicksPerMillisecond; + const long MinMilliSeconds = long.MinValue / TimeSpan.TicksPerMillisecond; + + Debug.Assert(days >= 0 && hours >= 0 && minutes >= 0 && seconds >= 00 && fraction >= 0); + if (hours > 23 || minutes > 59 || seconds > 59) + { + timeSpan = default; + return false; + } + + Debug.Assert(fraction <= Utf8Constants.MaxDateTimeFraction); // This value comes from TryParseTimeSpanFraction() which already rejects any fraction string longer than 7 digits. + + long millisecondsWithoutFraction = (((long)days) * 3600 * 24 + ((long)hours) * 3600 + ((long)minutes) * 60 + seconds) * 1000; + + long ticks; + if (isNegative) + { + millisecondsWithoutFraction = -millisecondsWithoutFraction; + if (millisecondsWithoutFraction < MinMilliSeconds) + { + timeSpan = default; + return false; + } + + long ticksWithoutFraction = millisecondsWithoutFraction * TimeSpan.TicksPerMillisecond; + if (ticksWithoutFraction < long.MinValue + fraction) + { + timeSpan = default; + return false; + } + + ticks = ticksWithoutFraction - fraction; + } + else + { + if (millisecondsWithoutFraction > MaxMilliSeconds) + { + timeSpan = default; + return false; + } + + long ticksWithoutFraction = millisecondsWithoutFraction * TimeSpan.TicksPerMillisecond; + if (ticksWithoutFraction > long.MaxValue - fraction) + { + timeSpan = default; + return false; + } + + ticks = ticksWithoutFraction + fraction; + } + + timeSpan = new TimeSpan(ticks); + return true; + } + } +} diff --git a/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Parser/Utf8Parser.TimeSpanSplitter.cs b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Parser/Utf8Parser.TimeSpanSplitter.cs new file mode 100644 index 0000000000..0c72d1f3a2 --- /dev/null +++ b/src/System.Private.CoreLib/shared/System/Buffers/Text/Utf8Parser/Utf8Parser.TimeSpanSplitter.cs @@ -0,0 +1,225 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Diagnostics; + +namespace System.Buffers.Text +{ + public static partial class Utf8Parser + { + private enum ComponentParseResult : byte + { + // Do not change or add values in this enum unless you review every use of the TimeSpanSplitter.Separators field. That field is an "array of four + // ComponentParseResults" encoded as a 32-bit integer with each of its four bytes containing one of 0 (NoMoreData), 1 (Colon) or 2 (Period). + // (So a value of 0x01010200 means the string parsed as "nn:nn:nn.nnnnnnn") + NoMoreData = 0, + Colon = 1, + Period = 2, + ParseFailure = 3, + } + + private struct TimeSpanSplitter + { + public uint V1; + public uint V2; + public uint V3; + public uint V4; + public uint V5; + + public bool IsNegative; + + // Encodes an "array of four ComponentParseResults" as a 32-bit integer with each of its four bytes containing one of 0 (NoMoreData), 1 (Colon) or 2 (Period). + // (So a value of 0x01010200 means the string parsed as "nn:nn:nn.nnnnnnn") + public uint Separators; + + public bool TrySplitTimeSpan(ReadOnlySpan<byte> source, bool periodUsedToSeparateDay, out int bytesConsumed) + { + int srcIndex = 0; + byte c = default; + + // Unlike many other data types, TimeSpan allow leading whitespace. + while (srcIndex != source.Length) + { + c = source[srcIndex]; + if (!(c == ' ' || c == '\t')) + break; + srcIndex++; + } + + if (srcIndex == source.Length) + { + bytesConsumed = 0; + return false; + } + + // Check for an option negative sign. ('+' is not allowed.) + if (c == Utf8Constants.Minus) + { + IsNegative = true; + srcIndex++; + if (srcIndex == source.Length) + { + bytesConsumed = 0; + return false; + } + } + + // From here, we terminate on anything that's not a digit, ':' or '.' The '.' is only allowed after at least three components have + // been specified. If we see it earlier, we'll assume that's an error and fail out rather than treating it as the end of data. + + // + // Timespan has to start with a number - parse the first one. + // + if (!TryParseUInt32D(source.Slice(srcIndex), out V1, out int justConsumed)) + { + bytesConsumed = 0; + return false; + } + srcIndex += justConsumed; + + ComponentParseResult result; + + // + // Split out the second number (if any) For the 'c' format, a period might validly appear here as it;s used both to separate the day and the fraction - however, + // the fraction is always the fourth component at earliest, so if we do see a period at this stage, always parse the integer as a regular integer, not as + // a fraction. + // + result = ParseComponent(source, neverParseAsFraction: periodUsedToSeparateDay, ref srcIndex, out V2); + if (result == ComponentParseResult.ParseFailure) + { + bytesConsumed = 0; + return false; + } + else if (result == ComponentParseResult.NoMoreData) + { + bytesConsumed = srcIndex; + return true; + } + else + { + Debug.Assert(result == ComponentParseResult.Colon || result == ComponentParseResult.Period); + Separators |= ((uint)result) << 24; + } + + // + // Split out the third number (if any) + // + result = ParseComponent(source, false, ref srcIndex, out V3); + if (result == ComponentParseResult.ParseFailure) + { + bytesConsumed = 0; + return false; + } + else if (result == ComponentParseResult.NoMoreData) + { + bytesConsumed = srcIndex; + return true; + } + else + { + Debug.Assert(result == ComponentParseResult.Colon || result == ComponentParseResult.Period); + Separators |= ((uint)result) << 16; + } + + // + // Split out the fourth number (if any) + // + result = ParseComponent(source, false, ref srcIndex, out V4); + if (result == ComponentParseResult.ParseFailure) + { + bytesConsumed = 0; + return false; + } + else if (result == ComponentParseResult.NoMoreData) + { + bytesConsumed = srcIndex; + return true; + } + else + { + Debug.Assert(result == ComponentParseResult.Colon || result == ComponentParseResult.Period); + Separators |= ((uint)result) << 8; + } + + // + // Split out the fifth number (if any) + // + result = ParseComponent(source, false, ref srcIndex, out V5); + if (result == ComponentParseResult.ParseFailure) + { + bytesConsumed = 0; + return false; + } + else if (result == ComponentParseResult.NoMoreData) + { + bytesConsumed = srcIndex; + return true; + } + else + { + Debug.Assert(result == ComponentParseResult.Colon || result == ComponentParseResult.Period); + Separators |= (uint)result; + } + + // + // There cannot legally be a sixth number. If the next character is a period or colon, treat this as a error as it's likely + // to indicate the start of a sixth number. Otherwise, treat as end of parse with data left over. + // + if (srcIndex != source.Length && (source[srcIndex] == Utf8Constants.Period || source[srcIndex] == Utf8Constants.Colon)) + { + bytesConsumed = 0; + return false; + } + + bytesConsumed = srcIndex; + return true; + } + + // + // Look for a separator followed by an unsigned integer. + // + private static ComponentParseResult ParseComponent(ReadOnlySpan<byte> source, bool neverParseAsFraction, ref int srcIndex, out uint value) + { + if (srcIndex == source.Length) + { + value = default; + return ComponentParseResult.NoMoreData; + } + + byte c = source[srcIndex]; + if (c == Utf8Constants.Colon || (c == Utf8Constants.Period && neverParseAsFraction)) + { + srcIndex++; + + if (!TryParseUInt32D(source.Slice(srcIndex), out value, out int bytesConsumed)) + { + value = default; + return ComponentParseResult.ParseFailure; + } + + srcIndex += bytesConsumed; + return c == Utf8Constants.Colon ? ComponentParseResult.Colon : ComponentParseResult.Period; + } + else if (c == Utf8Constants.Period) + { + srcIndex++; + + if (!TryParseTimeSpanFraction(source.Slice(srcIndex), out value, out int bytesConsumed)) + { + value = default; + return ComponentParseResult.ParseFailure; + } + + srcIndex += bytesConsumed; + return ComponentParseResult.Period; + } + else + { + value = default; + return ComponentParseResult.NoMoreData; + } + } + } + } +} |