diff options
author | Tanner Gooding <tagoo@outlook.com> | 2019-02-21 03:09:40 -0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-02-21 03:09:40 -0800 |
commit | 4ba958c9fd412b217e21a1dd508ec466a21aa462 (patch) | |
tree | b405b229b15ba500dec8b72e61460e5a7752fc1d /src | |
parent | feed8ae9df458f336b2dd76cc9abe934bb2a5751 (diff) | |
download | coreclr-4ba958c9fd412b217e21a1dd508ec466a21aa462.tar.gz coreclr-4ba958c9fd412b217e21a1dd508ec466a21aa462.tar.bz2 coreclr-4ba958c9fd412b217e21a1dd508ec466a21aa462.zip |
Enable the HWIntrinsic extension methods and remove the instance implementations (#22705)
* Removing various S.R.I.Vector instance method APIs, since they should now be exposed as extension methods
* Updating the JIT to recognize the S.R.I.Vector extension methods.
* Updating various S.R.I.Vector test templates
* Regenerating the S.R.I tests that are created from a template.
* Fixing the numArgs for Base_Vector256_GetLower
* Fixing the handling for `Base_VectorXXX_As` to normalize the struct type.
* Adding the Base_Vector128_As intrinsics back for arm64
Diffstat (limited to 'src')
-rw-r--r-- | src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector128.cs | 3 | ||||
-rw-r--r-- | src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector128_1.cs | 217 | ||||
-rw-r--r-- | src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector256.cs | 79 | ||||
-rw-r--r-- | src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector256_1.cs | 245 | ||||
-rw-r--r-- | src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector64_1.cs | 160 | ||||
-rw-r--r-- | src/jit/compiler.h | 8 | ||||
-rw-r--r-- | src/jit/hwintrinsicArm64.cpp | 50 | ||||
-rw-r--r-- | src/jit/hwintrinsiclistxarch.h | 70 | ||||
-rw-r--r-- | src/jit/hwintrinsicxarch.cpp | 638 | ||||
-rw-r--r-- | src/jit/importer.cpp | 758 |
10 files changed, 845 insertions, 1383 deletions
diff --git a/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector128.cs b/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector128.cs index c2e530ba1c..4c3232693d 100644 --- a/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector128.cs +++ b/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector128.cs @@ -1630,6 +1630,7 @@ namespace System.Runtime.Intrinsics /// <returns>The value of the element at <paramref name="index" />.</returns> /// <exception cref="NotSupportedException">The type of <paramref name="vector" /> (<typeparamref name="T" />) is not supported.</exception> /// <exception cref="ArgumentOutOfRangeException"><paramref name="index" /> was less than zero or greater than the number of elements.</exception> + [Intrinsic] public static T GetElement<T>(this Vector128<T> vector, int index) where T : struct { @@ -1652,6 +1653,7 @@ namespace System.Runtime.Intrinsics /// <returns>A <see cref="Vector128{T}" /> with the value of the element at <paramref name="index" /> set to <paramref name="value" /> and the remaining elements set to the same value as that in <paramref name="vector" />.</returns> /// <exception cref="NotSupportedException">The type of <paramref name="vector" /> (<typeparamref name="T" />) is not supported.</exception> /// <exception cref="ArgumentOutOfRangeException"><paramref name="index" /> was less than zero or greater than the number of elements.</exception> + [Intrinsic] public static Vector128<T> WithElement<T>(this Vector128<T> vector, int index, T value) where T : struct { @@ -1677,7 +1679,6 @@ namespace System.Runtime.Intrinsics where T : struct { ThrowHelper.ThrowForUnsupportedVectorBaseType<T>(); - return Unsafe.As<Vector128<T>, Vector64<T>>(ref vector); } diff --git a/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector128_1.cs b/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector128_1.cs index 80da2c4f77..6598a573ce 100644 --- a/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector128_1.cs +++ b/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector128_1.cs @@ -91,83 +91,6 @@ namespace System.Runtime.Intrinsics } } - /// <summary>Reinterprets the current instance as a new <see cref="Vector128{U}" />.</summary> - /// <typeparam name="U">The type of the vector the current instance should be reinterpreted as.</typeparam> - /// <returns>The current instance reinterpreted as a new <see cref="Vector128{U}" />.</returns> - /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) or the type of the target (<typeparamref name="U" />) is not supported.</exception> - [Intrinsic] - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public Vector128<U> As<U>() where U : struct - { - ThrowHelper.ThrowForUnsupportedVectorBaseType<T>(); - ThrowHelper.ThrowForUnsupportedVectorBaseType<U>(); - return Unsafe.As<Vector128<T>, Vector128<U>>(ref Unsafe.AsRef(in this)); - } - - /// <summary>Reinterprets the current instance as a new <see cref="Vector128{Byte}" />.</summary> - /// <returns>The current instance reinterpreted as a new <see cref="Vector128{Byte}" />.</returns> - /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception> - [Intrinsic] - public Vector128<byte> AsByte() => As<byte>(); - - /// <summary>Reinterprets the current instance as a new <see cref="Vector128{Double}" />.</summary> - /// <returns>The current instance reinterpreted as a new <see cref="Vector128{Double}" />.</returns> - /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception> - [Intrinsic] - public Vector128<double> AsDouble() => As<double>(); - - /// <summary>Reinterprets the current instance as a new <see cref="Vector128{Int16}" />.</summary> - /// <returns>The current instance reinterpreted as a new <see cref="Vector128{Int16}" />.</returns> - /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception> - [Intrinsic] - public Vector128<short> AsInt16() => As<short>(); - - /// <summary>Reinterprets the current instance as a new <see cref="Vector128{Int32}" />.</summary> - /// <returns>The current instance reinterpreted as a new <see cref="Vector128{Int32}" />.</returns> - /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception> - [Intrinsic] - public Vector128<int> AsInt32() => As<int>(); - - /// <summary>Reinterprets the current instance as a new <see cref="Vector128{Int64}" />.</summary> - /// <returns>The current instance reinterpreted as a new <see cref="Vector128{Int64}" />.</returns> - /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception> - [Intrinsic] - public Vector128<long> AsInt64() => As<long>(); - - /// <summary>Reinterprets the current instance as a new <see cref="Vector128{SByte}" />.</summary> - /// <returns>The current instance reinterpreted as a new <see cref="Vector128{SByte}" />.</returns> - /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception> - [Intrinsic] - [CLSCompliant(false)] - public Vector128<sbyte> AsSByte() => As<sbyte>(); - - /// <summary>Reinterprets the current instance as a new <see cref="Vector128{Single}" />.</summary> - /// <returns>The current instance reinterpreted as a new <see cref="Vector128{Single}" />.</returns> - /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception> - [Intrinsic] - public Vector128<float> AsSingle() => As<float>(); - - /// <summary>Reinterprets the current instance as a new <see cref="Vector128{UInt16}" />.</summary> - /// <returns>The current instance reinterpreted as a new <see cref="Vector128{UInt16}" />.</returns> - /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception> - [Intrinsic] - [CLSCompliant(false)] - public Vector128<ushort> AsUInt16() => As<ushort>(); - - /// <summary>Reinterprets the current instance as a new <see cref="Vector128{UInt32}" />.</summary> - /// <returns>The current instance reinterpreted as a new <see cref="Vector128{UInt32}" />.</returns> - /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception> - [Intrinsic] - [CLSCompliant(false)] - public Vector128<uint> AsUInt32() => As<uint>(); - - /// <summary>Reinterprets the current instance as a new <see cref="Vector128{UInt64}" />.</summary> - /// <returns>The current instance reinterpreted as a new <see cref="Vector128{UInt64}" />.</returns> - /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception> - [Intrinsic] - [CLSCompliant(false)] - public Vector128<ulong> AsUInt64() => As<ulong>(); - /// <summary>Determines whether the specified <see cref="Vector128{T}" /> is equal to the current instance.</summary> /// <param name="other">The <see cref="Vector128{T}" /> to compare with the current instance.</param> /// <returns><c>true</c> if <paramref name="other" /> is equal to the current instance; otherwise, <c>false</c>.</returns> @@ -179,7 +102,7 @@ namespace System.Runtime.Intrinsics if (Sse.IsSupported && (typeof(T) == typeof(float))) { - Vector128<float> result = Sse.CompareEqual(AsSingle(), other.AsSingle()); + Vector128<float> result = Sse.CompareEqual(this.AsSingle(), other.AsSingle()); return Sse.MoveMask(result) == 0b1111; // We have one bit per element } @@ -187,7 +110,7 @@ namespace System.Runtime.Intrinsics { if (typeof(T) == typeof(double)) { - Vector128<double> result = Sse2.CompareEqual(AsDouble(), other.AsDouble()); + Vector128<double> result = Sse2.CompareEqual(this.AsDouble(), other.AsDouble()); return Sse2.MoveMask(result) == 0b11; // We have one bit per element } else @@ -197,7 +120,7 @@ namespace System.Runtime.Intrinsics // bytes are exactly the same. Debug.Assert((typeof(T) != typeof(float)) && (typeof(T) != typeof(double))); - Vector128<byte> result = Sse2.CompareEqual(AsByte(), other.AsByte()); + Vector128<byte> result = Sse2.CompareEqual(this.AsByte(), other.AsByte()); return Sse2.MoveMask(result) == 0b1111_1111_1111_1111; // We have one bit per element } } @@ -227,47 +150,6 @@ namespace System.Runtime.Intrinsics return (obj is Vector128<T>) && Equals((Vector128<T>)(obj)); } - /// <summary>Gets the element at the specified index.</summary> - /// <param name="index">The index of the element to get.</param> - /// <returns>The value of the element at <paramref name="index" />.</returns> - /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception> - /// <exception cref="ArgumentOutOfRangeException"><paramref name="index" /> was less than zero or greater than the number of elements.</exception> - [Intrinsic] - public T GetElement(int index) - { - ThrowHelper.ThrowForUnsupportedVectorBaseType<T>(); - - if ((uint)(index) >= (uint)(Count)) - { - ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.index); - } - - ref T e0 = ref Unsafe.As<Vector128<T>, T>(ref Unsafe.AsRef(in this)); - return Unsafe.Add(ref e0, index); - } - - /// <summary>Creates a new <see cref="Vector128{T}" /> with the element at the specified index set to the specified value and the remaining elements set to the same value as that in the current instance.</summary> - /// <param name="index">The index of the element to set.</param> - /// <param name="value">The value to set the value to.</param> - /// <returns>A <see cref="Vector128{T}" /> with the value of the element at <paramref name="index" /> set to <paramref name="value" /> and the remaining elements set to the same value as that in the current instance.</returns> - /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception> - /// <exception cref="ArgumentOutOfRangeException"><paramref name="index" /> was less than zero or greater than the number of elements.</exception> - [Intrinsic] - public Vector128<T> WithElement(int index, T value) - { - ThrowHelper.ThrowForUnsupportedVectorBaseType<T>(); - - if ((uint)(index) >= (uint)(Count)) - { - ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.index); - } - - Vector128<T> result = this; - ref T e0 = ref Unsafe.As<Vector128<T>, T>(ref result); - Unsafe.Add(ref e0, index) = value; - return result; - } - /// <summary>Gets the hash code for the instance.</summary> /// <returns>The hash code for the instance.</returns> /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception> @@ -279,70 +161,12 @@ namespace System.Runtime.Intrinsics for (int i = 0; i < Count; i++) { - hashCode = HashCode.Combine(hashCode, GetElement(i).GetHashCode()); + hashCode = HashCode.Combine(hashCode, this.GetElement(i).GetHashCode()); } return hashCode; } - /// <summary>Gets the value of the lower 64-bits as a new <see cref="Vector64{T}" />.</summary> - /// <returns>The value of the lower 64-bits as a new <see cref="Vector64{T}" />.</returns> - /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception> - public Vector64<T> GetLower() - { - ThrowHelper.ThrowForUnsupportedVectorBaseType<T>(); - - return Unsafe.As<Vector128<T>, Vector64<T>>(ref Unsafe.AsRef(in this)); - } - - /// <summary>Creates a new <see cref="Vector128{T}" /> with the lower 64-bits set to the specified value and the upper 64-bits set to the same value as that in the current instance.</summary> - /// <param name="value">The value of the lower 64-bits as a <see cref="Vector64{T}" />.</param> - /// <returns>A new <see cref="Vector128{T}" /> with the lower 64-bits set to the specified value and the upper 64-bits set to the same value as that in the current instance.</returns> - /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception> - public Vector128<T> WithLower(Vector64<T> value) - { - ThrowHelper.ThrowForUnsupportedVectorBaseType<T>(); - - Vector128<T> result = this; - Unsafe.As<Vector128<T>, Vector64<T>>(ref result) = value; - return result; - } - - /// <summary>Gets the value of the upper 64-bits as a new <see cref="Vector64{T}" />.</summary> - /// <returns>The value of the upper 64-bits as a new <see cref="Vector64{T}" />.</returns> - /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception> - public Vector64<T> GetUpper() - { - ThrowHelper.ThrowForUnsupportedVectorBaseType<T>(); - - ref Vector64<T> lower = ref Unsafe.As<Vector128<T>, Vector64<T>>(ref Unsafe.AsRef(in this)); - return Unsafe.Add(ref lower, 1); - } - - /// <summary>Creates a new <see cref="Vector128{T}" /> with the upper 64-bits set to the specified value and the upper 64-bits set to the same value as that in the current instance.</summary> - /// <param name="value">The value of the upper 64-bits as a <see cref="Vector64{T}" />.</param> - /// <returns>A new <see cref="Vector128{T}" /> with the upper 64-bits set to the specified value and the upper 64-bits set to the same value as that in the current instance.</returns> - /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception> - public Vector128<T> WithUpper(Vector64<T> value) - { - ThrowHelper.ThrowForUnsupportedVectorBaseType<T>(); - - Vector128<T> result = this; - ref Vector64<T> lower = ref Unsafe.As<Vector128<T>, Vector64<T>>(ref result); - Unsafe.Add(ref lower, 1) = value; - return result; - } - - /// <summary>Converts the current instance to a scalar containing the value of the first element.</summary> - /// <returns>A scalar <typeparamref name="T" /> containing the value of the first element.</returns> - /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception> - [Intrinsic] - public T ToScalar() - { - ThrowHelper.ThrowForUnsupportedVectorBaseType<T>(); - return Unsafe.As<Vector128<T>, T>(ref Unsafe.AsRef(in this)); - } - /// <summary>Converts the current instance to an equivalent string representation.</summary> /// <returns>An equivalent string representation of the current instance.</returns> /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception> @@ -377,43 +201,14 @@ namespace System.Runtime.Intrinsics for (int i = 0; i < lastElement; i++) { - sb.Append(((IFormattable)(GetElement(i))).ToString(format, formatProvider)); + sb.Append(((IFormattable)(this.GetElement(i))).ToString(format, formatProvider)); sb.Append(separator); sb.Append(' '); } - sb.Append(((IFormattable)(GetElement(lastElement))).ToString(format, formatProvider)); + sb.Append(((IFormattable)(this.GetElement(lastElement))).ToString(format, formatProvider)); sb.Append('>'); return StringBuilderCache.GetStringAndRelease(sb); } - - /// <summary>Converts the current instance to a new <see cref="Vector256{T}" /> with the lower 128-bits set to the value of the current instance and the upper 128-bits initialized to zero.</summary> - /// <returns>A new <see cref="Vector256{T}" /> with the lower 128-bits set to the value of the current instance and the upper 128-bits initialized to zero.</returns> - /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception> - [Intrinsic] - public Vector256<T> ToVector256() - { - ThrowHelper.ThrowForUnsupportedVectorBaseType<T>(); - - Vector256<T> result = Vector256<T>.Zero; - Unsafe.As<Vector256<T>, Vector128<T>>(ref result) = this; - return result; - } - - /// <summary>Converts the current instance to a new <see cref="Vector256{T}" /> with the lower 128-bits set to the value of the current instance and the upper 128-bits left uninitialized.</summary> - /// <returns>A new <see cref="Vector256{T}" /> with the lower 128-bits set to the value of the current instance and the upper 128-bits left uninitialized.</returns> - /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception> - [Intrinsic] - public unsafe Vector256<T> ToVector256Unsafe() - { - ThrowHelper.ThrowForUnsupportedVectorBaseType<T>(); - - // This relies on us stripping the "init" flag from the ".locals" - // declaration to let the upper bits be uninitialized. - - var pResult = stackalloc byte[Vector256.Size]; - Unsafe.AsRef<Vector128<T>>(pResult) = this; - return Unsafe.AsRef<Vector256<T>>(pResult); - } } } diff --git a/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector256.cs b/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector256.cs index 1af31dc61a..adf8207bab 100644 --- a/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector256.cs +++ b/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector256.cs @@ -1758,6 +1758,7 @@ namespace System.Runtime.Intrinsics /// <returns>The value of the element at <paramref name="index" />.</returns> /// <exception cref="NotSupportedException">The type of <paramref name="vector" /> (<typeparamref name="T" />) is not supported.</exception> /// <exception cref="ArgumentOutOfRangeException"><paramref name="index" /> was less than zero or greater than the number of elements.</exception> + [Intrinsic] public static T GetElement<T>(this Vector256<T> vector, int index) where T : struct { @@ -1780,6 +1781,7 @@ namespace System.Runtime.Intrinsics /// <returns>A <see cref="Vector256{T}" /> with the value of the element at <paramref name="index" /> set to <paramref name="value" /> and the remaining elements set to the same value as that in <paramref name="vector" />.</returns> /// <exception cref="NotSupportedException">The type of <paramref name="vector" /> (<typeparamref name="T" />) is not supported.</exception> /// <exception cref="ArgumentOutOfRangeException"><paramref name="index" /> was less than zero or greater than the number of elements.</exception> + [Intrinsic] public static Vector256<T> WithElement<T>(this Vector256<T> vector, int index, T value) where T : struct { @@ -1801,11 +1803,11 @@ namespace System.Runtime.Intrinsics /// <param name="vector">The vector to get the lower 128-bits from.</param> /// <returns>The value of the lower 128-bits as a new <see cref="Vector128{T}" />.</returns> /// <exception cref="NotSupportedException">The type of <paramref name="vector" /> (<typeparamref name="T" />) is not supported.</exception> + [Intrinsic] public static Vector128<T> GetLower<T>(this Vector256<T> vector) where T : struct { ThrowHelper.ThrowForUnsupportedVectorBaseType<T>(); - return Unsafe.As<Vector256<T>, Vector128<T>>(ref vector); } @@ -1815,14 +1817,33 @@ namespace System.Runtime.Intrinsics /// <param name="value">The value of the lower 128-bits as a <see cref="Vector128{T}" />.</param> /// <returns>A new <see cref="Vector256{T}" /> with the lower 128-bits set to the specified value and the upper 128-bits set to the same value as that in <paramref name="vector" />.</returns> /// <exception cref="NotSupportedException">The type of <paramref name="vector" /> (<typeparamref name="T" />) is not supported.</exception> + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256<T> WithLower<T>(this Vector256<T> vector, Vector128<T> value) where T : struct { ThrowHelper.ThrowForUnsupportedVectorBaseType<T>(); - Vector256<T> result = vector; - Unsafe.As<Vector256<T>, Vector128<T>>(ref result) = value; - return result; + if (Avx2.IsSupported && ((typeof(T) != typeof(float)) && (typeof(T) != typeof(double)))) + { + // All integral types generate the same instruction, so just pick one rather than handling each T separately + return Avx2.InsertVector128(vector.AsByte(), value.AsByte(), 0).As<byte, T>(); + } + + if (Avx.IsSupported) + { + // All floating-point types generate the same instruction, so just pick one rather than handling each T separately + // We also just fallback to this for integral types if AVX2 isn't supported, since that is still faster than software + return Avx.InsertVector128(vector.AsSingle(), value.AsSingle(), 0).As<float, T>(); + } + + return SoftwareFallback(vector, value); + + static Vector256<T> SoftwareFallback(Vector256<T> vector, Vector128<T> value) + { + Vector256<T> result = vector; + Unsafe.As<Vector256<T>, Vector128<T>>(ref result) = value; + return result; + } } /// <summary>Gets the value of the upper 128-bits as a new <see cref="Vector128{T}" />.</summary> @@ -1830,13 +1851,32 @@ namespace System.Runtime.Intrinsics /// <param name="vector">The vector to get the upper 128-bits from.</param> /// <returns>The value of the upper 128-bits as a new <see cref="Vector128{T}" />.</returns> /// <exception cref="NotSupportedException">The type of <paramref name="vector" /> (<typeparamref name="T" />) is not supported.</exception> + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector128<T> GetUpper<T>(this Vector256<T> vector) where T : struct { ThrowHelper.ThrowForUnsupportedVectorBaseType<T>(); - ref Vector128<T> lower = ref Unsafe.As<Vector256<T>, Vector128<T>>(ref vector); - return Unsafe.Add(ref lower, 1); + if (Avx2.IsSupported && ((typeof(T) != typeof(float)) && (typeof(T) != typeof(double)))) + { + // All integral types generate the same instruction, so just pick one rather than handling each T separately + return Avx2.ExtractVector128(vector.AsByte(), 1).As<byte, T>(); + } + + if (Avx.IsSupported) + { + // All floating-point types generate the same instruction, so just pick one rather than handling each T separately + // We also just fallback to this for integral types if AVX2 isn't supported, since that is still faster than software + return Avx.ExtractVector128(vector.AsSingle(), 1).As<float, T>(); + } + + return SoftwareFallback(vector); + + static Vector128<T> SoftwareFallback(Vector256<T> vector) + { + ref Vector128<T> lower = ref Unsafe.As<Vector256<T>, Vector128<T>>(ref vector); + return Unsafe.Add(ref lower, 1); + } } /// <summary>Creates a new <see cref="Vector256{T}" /> with the upper 128-bits set to the specified value and the upper 128-bits set to the same value as that in the given vector.</summary> @@ -1845,15 +1885,34 @@ namespace System.Runtime.Intrinsics /// <param name="value">The value of the upper 128-bits as a <see cref="Vector128{T}" />.</param> /// <returns>A new <see cref="Vector256{T}" /> with the upper 128-bits set to the specified value and the upper 128-bits set to the same value as that in <paramref name="vector" />.</returns> /// <exception cref="NotSupportedException">The type of <paramref name="vector" /> (<typeparamref name="T" />) is not supported.</exception> + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256<T> WithUpper<T>(this Vector256<T> vector, Vector128<T> value) where T : struct { ThrowHelper.ThrowForUnsupportedVectorBaseType<T>(); - Vector256<T> result = vector; - ref Vector128<T> lower = ref Unsafe.As<Vector256<T>, Vector128<T>>(ref result); - Unsafe.Add(ref lower, 1) = value; - return result; + if (Avx2.IsSupported && ((typeof(T) != typeof(float)) && (typeof(T) != typeof(double)))) + { + // All integral types generate the same instruction, so just pick one rather than handling each T separately + return Avx2.InsertVector128(vector.AsByte(), value.AsByte(), 1).As<byte, T>(); + } + + if (Avx.IsSupported) + { + // All floating-point types generate the same instruction, so just pick one rather than handling each T separately + // We also just fallback to this for integral types if AVX2 isn't supported, since that is still faster than software + return Avx.InsertVector128(vector.AsSingle(), value.AsSingle(), 1).As<float, T>(); + } + + return SoftwareFallback(vector, value); + + static Vector256<T> SoftwareFallback(Vector256<T> vector, Vector128<T> value) + { + Vector256<T> result = vector; + ref Vector128<T> lower = ref Unsafe.As<Vector256<T>, Vector128<T>>(ref result); + Unsafe.Add(ref lower, 1) = value; + return result; + } } /// <summary>Converts the given vector to a scalar containing the value of the first element.</summary> diff --git a/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector256_1.cs b/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector256_1.cs index 968faf1a5b..07ba97163e 100644 --- a/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector256_1.cs +++ b/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector256_1.cs @@ -93,83 +93,6 @@ namespace System.Runtime.Intrinsics } } - /// <summary>Reinterprets the current instance as a new <see cref="Vector256{U}" />.</summary> - /// <typeparam name="U">The type of the vector the current instance should be reinterpreted as.</typeparam> - /// <returns>The current instance reinterpreted as a new <see cref="Vector256{U}" />.</returns> - /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) or the type of the target (<typeparamref name="U" />) is not supported.</exception> - [Intrinsic] - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public Vector256<U> As<U>() where U : struct - { - ThrowHelper.ThrowForUnsupportedVectorBaseType<T>(); - ThrowHelper.ThrowForUnsupportedVectorBaseType<U>(); - return Unsafe.As<Vector256<T>, Vector256<U>>(ref Unsafe.AsRef(in this)); - } - - /// <summary>Reinterprets the current instance as a new <see cref="Vector256{Byte}" />.</summary> - /// <returns>The current instance reinterpreted as a new <see cref="Vector256{Byte}" />.</returns> - /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception> - [Intrinsic] - public Vector256<byte> AsByte() => As<byte>(); - - /// <summary>Reinterprets the current instance as a new <see cref="Vector256{Double}" />.</summary> - /// <returns>The current instance reinterpreted as a new <see cref="Vector256{Double}" />.</returns> - /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception> - [Intrinsic] - public Vector256<double> AsDouble() => As<double>(); - - /// <summary>Reinterprets the current instance as a new <see cref="Vector256{Int16}" />.</summary> - /// <returns>The current instance reinterpreted as a new <see cref="Vector256{Int16}" />.</returns> - /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception> - [Intrinsic] - public Vector256<short> AsInt16() => As<short>(); - - /// <summary>Reinterprets the current instance as a new <see cref="Vector256{Int32}" />.</summary> - /// <returns>The current instance reinterpreted as a new <see cref="Vector256{Int32}" />.</returns> - /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception> - [Intrinsic] - public Vector256<int> AsInt32() => As<int>(); - - /// <summary>Reinterprets the current instance as a new <see cref="Vector256{Int64}" />.</summary> - /// <returns>The current instance reinterpreted as a new <see cref="Vector256{Int64}" />.</returns> - /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception> - [Intrinsic] - public Vector256<long> AsInt64() => As<long>(); - - /// <summary>Reinterprets the current instance as a new <see cref="Vector256{SByte}" />.</summary> - /// <returns>The current instance reinterpreted as a new <see cref="Vector256{SByte}" />.</returns> - /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception> - [Intrinsic] - [CLSCompliant(false)] - public Vector256<sbyte> AsSByte() => As<sbyte>(); - - /// <summary>Reinterprets the current instance as a new <see cref="Vector256{Single}" />.</summary> - /// <returns>The current instance reinterpreted as a new <see cref="Vector256{Single}" />.</returns> - /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception> - [Intrinsic] - public Vector256<float> AsSingle() => As<float>(); - - /// <summary>Reinterprets the current instance as a new <see cref="Vector256{UInt16}" />.</summary> - /// <returns>The current instance reinterpreted as a new <see cref="Vector256{UInt16}" />.</returns> - /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception> - [Intrinsic] - [CLSCompliant(false)] - public Vector256<ushort> AsUInt16() => As<ushort>(); - - /// <summary>Reinterprets the current instance as a new <see cref="Vector256{UInt32}" />.</summary> - /// <returns>The current instance reinterpreted as a new <see cref="Vector256{UInt32}" />.</returns> - /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception> - [Intrinsic] - [CLSCompliant(false)] - public Vector256<uint> AsUInt32() => As<uint>(); - - /// <summary>Reinterprets the current instance as a new <see cref="Vector256{UInt64}" />.</summary> - /// <returns>The current instance reinterpreted as a new <see cref="Vector256{UInt64}" />.</returns> - /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception> - [Intrinsic] - [CLSCompliant(false)] - public Vector256<ulong> AsUInt64() => As<ulong>(); - /// <summary>Determines whether the specified <see cref="Vector256{T}" /> is equal to the current instance.</summary> /// <param name="other">The <see cref="Vector256{T}" /> to compare with the current instance.</param> /// <returns><c>true</c> if <paramref name="other" /> is equal to the current instance; otherwise, <c>false</c>.</returns> @@ -181,13 +104,13 @@ namespace System.Runtime.Intrinsics { if (typeof(T) == typeof(float)) { - Vector256<float> result = Avx.Compare(AsSingle(), other.AsSingle(), FloatComparisonMode.EqualOrderedNonSignaling); + Vector256<float> result = Avx.Compare(this.AsSingle(), other.AsSingle(), FloatComparisonMode.EqualOrderedNonSignaling); return Avx.MoveMask(result) == 0b1111_1111; // We have one bit per element } if (typeof(T) == typeof(double)) { - Vector256<double> result = Avx.Compare(AsDouble(), other.AsDouble(), FloatComparisonMode.EqualOrderedNonSignaling); + Vector256<double> result = Avx.Compare(this.AsDouble(), other.AsDouble(), FloatComparisonMode.EqualOrderedNonSignaling); return Avx.MoveMask(result) == 0b1111; // We have one bit per element } } @@ -199,7 +122,7 @@ namespace System.Runtime.Intrinsics // bytes are exactly the same. Debug.Assert((typeof(T) != typeof(float)) && (typeof(T) != typeof(double))); - Vector256<byte> result = Avx2.CompareEqual(AsByte(), other.AsByte()); + Vector256<byte> result = Avx2.CompareEqual(this.AsByte(), other.AsByte()); return Avx2.MoveMask(result) == unchecked((int)(0b1111_1111_1111_1111_1111_1111_1111_1111)); // We have one bit per element } @@ -228,47 +151,6 @@ namespace System.Runtime.Intrinsics return (obj is Vector256<T>) && Equals((Vector256<T>)(obj)); } - /// <summary>Gets the element at the specified index.</summary> - /// <param name="index">The index of the element to get.</param> - /// <returns>The value of the element at <paramref name="index" />.</returns> - /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception> - /// <exception cref="ArgumentOutOfRangeException"><paramref name="index" /> was less than zero or greater than the number of elements.</exception> - [Intrinsic] - public T GetElement(int index) - { - ThrowHelper.ThrowForUnsupportedVectorBaseType<T>(); - - if ((uint)(index) >= (uint)(Count)) - { - ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.index); - } - - ref T e0 = ref Unsafe.As<Vector256<T>, T>(ref Unsafe.AsRef(in this)); - return Unsafe.Add(ref e0, index); - } - - /// <summary>Creates a new <see cref="Vector256{T}" /> with the element at the specified index set to the specified value and the remaining elements set to the same value as that in the current instance.</summary> - /// <param name="index">The index of the element to set.</param> - /// <param name="value">The value to set the value to.</param> - /// <returns>A <see cref="Vector256{T}" /> with the value of the element at <paramref name="index" /> set to <paramref name="value" /> and the remaining elements set to the same value as that in the current instance.</returns> - /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception> - /// <exception cref="ArgumentOutOfRangeException"><paramref name="index" /> was less than zero or greater than the number of elements.</exception> - [Intrinsic] - public Vector256<T> WithElement(int index, T value) - { - ThrowHelper.ThrowForUnsupportedVectorBaseType<T>(); - - if ((uint)(index) >= (uint)(Count)) - { - ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.index); - } - - Vector256<T> result = this; - ref T e0 = ref Unsafe.As<Vector256<T>, T>(ref result); - Unsafe.Add(ref e0, index) = value; - return result; - } - /// <summary>Gets the hash code for the instance.</summary> /// <returns>The hash code for the instance.</returns> /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception> @@ -280,127 +162,12 @@ namespace System.Runtime.Intrinsics for (int i = 0; i < Count; i++) { - hashCode = HashCode.Combine(hashCode, GetElement(i).GetHashCode()); + hashCode = HashCode.Combine(hashCode, this.GetElement(i).GetHashCode()); } return hashCode; } - /// <summary>Gets the value of the lower 128-bits as a new <see cref="Vector128{T}" />.</summary> - /// <returns>The value of the lower 128-bits as a new <see cref="Vector128{T}" />.</returns> - /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception> - [Intrinsic] - public Vector128<T> GetLower() - { - ThrowHelper.ThrowForUnsupportedVectorBaseType<T>(); - return Unsafe.As<Vector256<T>, Vector128<T>>(ref Unsafe.AsRef(in this)); - } - - /// <summary>Creates a new <see cref="Vector256{T}" /> with the lower 128-bits set to the specified value and the lower 128-bits set to the same value as that in the current instance.</summary> - /// <param name="value">The value of the lower 128-bits as a <see cref="Vector128{T}" />.</param> - /// <returns>A new <see cref="Vector256{T}" /> with the lower 128-bits set to the specified value and the lower 128-bits set to the same value as that in the current instance.</returns> - /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception> - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public Vector256<T> WithLower(Vector128<T> value) - { - ThrowHelper.ThrowForUnsupportedVectorBaseType<T>(); - - if (Avx2.IsSupported && ((typeof(T) != typeof(float)) && (typeof(T) != typeof(double)))) - { - // All integral types generate the same instruction, so just pick one rather than handling each T separately - return Avx2.InsertVector128(AsByte(), value.AsByte(), 0).As<T>(); - } - - if (Avx.IsSupported) - { - // All floating-point types generate the same instruction, so just pick one rather than handling each T separately - // We also just fallback to this for integral types if AVX2 isn't supported, since that is still faster than software - return Avx.InsertVector128(AsSingle(), value.AsSingle(), 0).As<T>(); - } - - return SoftwareFallback(in this, value); - - static Vector256<T> SoftwareFallback(in Vector256<T> vector, Vector128<T> value) - { - Vector256<T> result = vector; - Unsafe.As<Vector256<T>, Vector128<T>>(ref result) = value; - return result; - } - } - - /// <summary>Gets the value of the upper 128-bits as a new <see cref="Vector128{T}" />.</summary> - /// <returns>The value of the upper 128-bits as a new <see cref="Vector128{T}" />.</returns> - /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception> - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public Vector128<T> GetUpper() - { - ThrowHelper.ThrowForUnsupportedVectorBaseType<T>(); - - if (Avx2.IsSupported && ((typeof(T) != typeof(float)) && (typeof(T) != typeof(double)))) - { - // All integral types generate the same instruction, so just pick one rather than handling each T separately - return Avx2.ExtractVector128(AsByte(), 1).As<T>(); - } - - if (Avx.IsSupported) - { - // All floating-point types generate the same instruction, so just pick one rather than handling each T separately - // We also just fallback to this for integral types if AVX2 isn't supported, since that is still faster than software - return Avx.ExtractVector128(AsSingle(), 1).As<T>(); - } - - return SoftwareFallback(in this); - - static Vector128<T> SoftwareFallback(in Vector256<T> vector) - { - ref Vector128<T> lower = ref Unsafe.As<Vector256<T>, Vector128<T>>(ref Unsafe.AsRef(in vector)); - return Unsafe.Add(ref lower, 1); - } - } - - /// <summary>Creates a new <see cref="Vector256{T}" /> with the upper 128-bits set to the specified value and the upper 128-bits set to the same value as that in the current instance.</summary> - /// <param name="value">The value of the upper 128-bits as a <see cref="Vector128{T}" />.</param> - /// <returns>A new <see cref="Vector256{T}" /> with the upper 128-bits set to the specified value and the upper 128-bits set to the same value as that in the current instance.</returns> - /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception> - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public Vector256<T> WithUpper(Vector128<T> value) - { - ThrowHelper.ThrowForUnsupportedVectorBaseType<T>(); - - if (Avx2.IsSupported && ((typeof(T) != typeof(float)) && (typeof(T) != typeof(double)))) - { - // All integral types generate the same instruction, so just pick one rather than handling each T separately - return Avx2.InsertVector128(AsByte(), value.AsByte(), 1).As<T>(); - } - - if (Avx.IsSupported) - { - // All floating-point types generate the same instruction, so just pick one rather than handling each T separately - // We also just fallback to this for integral types if AVX2 isn't supported, since that is still faster than software - return Avx.InsertVector128(AsSingle(), value.AsSingle(), 1).As<T>(); - } - - return SoftwareFallback(in this, value); - - static Vector256<T> SoftwareFallback(in Vector256<T> vector, Vector128<T> value) - { - Vector256<T> result = vector; - ref Vector128<T> lower = ref Unsafe.As<Vector256<T>, Vector128<T>>(ref result); - Unsafe.Add(ref lower, 1) = value; - return result; - } - } - - /// <summary>Converts the current instance to a scalar containing the value of the first element.</summary> - /// <returns>A scalar <typeparamref name="T" /> containing the value of the first element.</returns> - /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception> - [Intrinsic] - public T ToScalar() - { - ThrowHelper.ThrowForUnsupportedVectorBaseType<T>(); - return Unsafe.As<Vector256<T>, T>(ref Unsafe.AsRef(in this)); - } - /// <summary>Converts the current instance to an equivalent string representation.</summary> /// <returns>An equivalent string representation of the current instance.</returns> /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception> @@ -435,11 +202,11 @@ namespace System.Runtime.Intrinsics for (int i = 0; i < lastElement; i++) { - sb.Append(((IFormattable)(GetElement(i))).ToString(format, formatProvider)); + sb.Append(((IFormattable)(this.GetElement(i))).ToString(format, formatProvider)); sb.Append(separator); sb.Append(' '); } - sb.Append(((IFormattable)(GetElement(lastElement))).ToString(format, formatProvider)); + sb.Append(((IFormattable)(this.GetElement(lastElement))).ToString(format, formatProvider)); sb.Append('>'); return StringBuilderCache.GetStringAndRelease(sb); diff --git a/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector64_1.cs b/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector64_1.cs index d6b1883a94..99db8c0955 100644 --- a/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector64_1.cs +++ b/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector64_1.cs @@ -77,83 +77,6 @@ namespace System.Runtime.Intrinsics } } - /// <summary>Reinterprets the current instance as a new <see cref="Vector64{U}" />.</summary> - /// <typeparam name="U">The type of the vector the current instance should be reinterpreted as.</typeparam> - /// <returns>The current instance reinterpreted as a new <see cref="Vector64{U}" />.</returns> - /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) or the type of the target (<typeparamref name="U" />) is not supported.</exception> - [Intrinsic] - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public Vector64<U> As<U>() where U : struct - { - ThrowHelper.ThrowForUnsupportedVectorBaseType<T>(); - ThrowHelper.ThrowForUnsupportedVectorBaseType<U>(); - return Unsafe.As<Vector64<T>, Vector64<U>>(ref Unsafe.AsRef(in this)); - } - - /// <summary>Reinterprets the current instance as a new <see cref="Vector64{Byte}" />.</summary> - /// <returns>The current instance reinterpreted as a new <see cref="Vector64{Byte}" />.</returns> - /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception> - [Intrinsic] - public Vector64<byte> AsByte() => As<byte>(); - - /// <summary>Reinterprets the current instance as a new <see cref="Vector64{Double}" />.</summary> - /// <returns>The current instance reinterpreted as a new <see cref="Vector64{Double}" />.</returns> - /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception> - [Intrinsic] - public Vector64<double> AsDouble() => As<double>(); - - /// <summary>Reinterprets the current instance as a new <see cref="Vector64{Int16}" />.</summary> - /// <returns>The current instance reinterpreted as a new <see cref="Vector64{Int16}" />.</returns> - /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception> - [Intrinsic] - public Vector64<short> AsInt16() => As<short>(); - - /// <summary>Reinterprets the current instance as a new <see cref="Vector64{Int32}" />.</summary> - /// <returns>The current instance reinterpreted as a new <see cref="Vector64{Int32}" />.</returns> - /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception> - [Intrinsic] - public Vector64<int> AsInt32() => As<int>(); - - /// <summary>Reinterprets the current instance as a new <see cref="Vector64{Int64}" />.</summary> - /// <returns>The current instance reinterpreted as a new <see cref="Vector64{Int64}" />.</returns> - /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception> - [Intrinsic] - public Vector64<long> AsInt64() => As<long>(); - - /// <summary>Reinterprets the current instance as a new <see cref="Vector64{SByte}" />.</summary> - /// <returns>The current instance reinterpreted as a new <see cref="Vector64{SByte}" />.</returns> - /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception> - [Intrinsic] - [CLSCompliant(false)] - public Vector64<sbyte> AsSByte() => As<sbyte>(); - - /// <summary>Reinterprets the current instance as a new <see cref="Vector64{Single}" />.</summary> - /// <returns>The current instance reinterpreted as a new <see cref="Vector64{Single}" />.</returns> - /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception> - [Intrinsic] - public Vector64<float> AsSingle() => As<float>(); - - /// <summary>Reinterprets the current instance as a new <see cref="Vector64{Int16}" />.</summary> - /// <returns>The current instance reinterpreted as a new <see cref="Vector64{Int16}" />.</returns> - /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception> - [Intrinsic] - [CLSCompliant(false)] - public Vector64<ushort> AsUInt16() => As<ushort>(); - - /// <summary>Reinterprets the current instance as a new <see cref="Vector64{UInt32}" />.</summary> - /// <returns>The current instance reinterpreted as a new <see cref="Vector64{UInt32}" />.</returns> - /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception> - [Intrinsic] - [CLSCompliant(false)] - public Vector64<uint> AsUInt32() => As<uint>(); - - /// <summary>Reinterprets the current instance as a new <see cref="Vector64{UInt64}" />.</summary> - /// <returns>The current instance reinterpreted as a new <see cref="Vector64{UInt64}" />.</returns> - /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception> - [Intrinsic] - [CLSCompliant(false)] - public Vector64<ulong> AsUInt64() => As<ulong>(); - /// <summary>Determines whether the specified <see cref="Vector64{T}" /> is equal to the current instance.</summary> /// <param name="other">The <see cref="Vector64{T}" /> to compare with the current instance.</param> /// <returns><c>true</c> if <paramref name="other" /> is equal to the current instance; otherwise, <c>false</c>.</returns> @@ -164,7 +87,7 @@ namespace System.Runtime.Intrinsics for (int i = 0; i < Count; i++) { - if (!((IEquatable<T>)(GetElement(i))).Equals(other.GetElement(i))) + if (!((IEquatable<T>)(this.GetElement(i))).Equals(other.GetElement(i))) { return false; } @@ -182,45 +105,6 @@ namespace System.Runtime.Intrinsics return (obj is Vector64<T>) && Equals((Vector64<T>)(obj)); } - /// <summary>Gets the element at the specified index.</summary> - /// <param name="index">The index of the element to get.</param> - /// <returns>The value of the element at <paramref name="index" />.</returns> - /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception> - /// <exception cref="ArgumentOutOfRangeException"><paramref name="index" /> was less than zero or greater than the number of elements.</exception> - public T GetElement(int index) - { - ThrowHelper.ThrowForUnsupportedVectorBaseType<T>(); - - if ((uint)(index) >= (uint)(Count)) - { - ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.index); - } - - ref T e0 = ref Unsafe.As<Vector64<T>, T>(ref Unsafe.AsRef(in this)); - return Unsafe.Add(ref e0, index); - } - - /// <summary>Creates a new <see cref="Vector64{T}" /> with the element at the specified index set to the specified value and the remaining elements set to the same value as that in the current instance.</summary> - /// <param name="index">The index of the element to set.</param> - /// <param name="value">The value to set the value to.</param> - /// <returns>A <see cref="Vector64{T}" /> with the value of the element at <paramref name="index" /> set to <paramref name="value" /> and the remaining elements set to the same value as that in the current instance.</returns> - /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception> - /// <exception cref="ArgumentOutOfRangeException"><paramref name="index" /> was less than zero or greater than the number of elements.</exception> - public Vector64<T> WithElement(int index, T value) - { - ThrowHelper.ThrowForUnsupportedVectorBaseType<T>(); - - if ((uint)(index) >= (uint)(Count)) - { - throw new ArgumentOutOfRangeException(nameof(index)); - } - - Vector64<T> result = this; - ref T e0 = ref Unsafe.As<Vector64<T>, T>(ref result); - Unsafe.Add(ref e0, index) = value; - return result; - } - /// <summary>Gets the hash code for the instance.</summary> /// <returns>The hash code for the instance.</returns> /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception> @@ -232,21 +116,12 @@ namespace System.Runtime.Intrinsics for (int i = 0; i < Count; i++) { - hashCode = HashCode.Combine(hashCode, GetElement(i).GetHashCode()); + hashCode = HashCode.Combine(hashCode, this.GetElement(i).GetHashCode()); } return hashCode; } - /// <summary>Converts the current instance to a scalar containing the value of the first element.</summary> - /// <returns>A scalar <typeparamref name="T" /> containing the value of the first element.</returns> - /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception> - public T ToScalar() - { - ThrowHelper.ThrowForUnsupportedVectorBaseType<T>(); - return Unsafe.As<Vector64<T>, T>(ref Unsafe.AsRef(in this)); - } - /// <summary>Converts the current instance to an equivalent string representation.</summary> /// <returns>An equivalent string representation of the current instance.</returns> /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception> @@ -281,41 +156,14 @@ namespace System.Runtime.Intrinsics for (int i = 0; i < lastElement; i++) { - sb.Append(((IFormattable)(GetElement(i))).ToString(format, formatProvider)); + sb.Append(((IFormattable)(this.GetElement(i))).ToString(format, formatProvider)); sb.Append(separator); sb.Append(' '); } - sb.Append(((IFormattable)(GetElement(lastElement))).ToString(format, formatProvider)); + sb.Append(((IFormattable)(this.GetElement(lastElement))).ToString(format, formatProvider)); sb.Append('>'); return StringBuilderCache.GetStringAndRelease(sb); } - - /// <summary>Converts the current instance to a new <see cref="Vector128{T}" /> with the lower 64-bits set to the value of the current instance and the upper 64-bits initialized to zero.</summary> - /// <returns>A new <see cref="Vector128{T}" /> with the lower 64-bits set to the value of the current instance and the upper 64-bits initialized to zero.</returns> - /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception> - public Vector128<T> ToVector128() - { - ThrowHelper.ThrowForUnsupportedVectorBaseType<T>(); - - Vector128<T> result = Vector128<T>.Zero; - Unsafe.As<Vector128<T>, Vector64<T>>(ref result) = this; - return result; - } - - /// <summary>Converts the current instance to a new <see cref="Vector128{T}" /> with the lower 64-bits set to the value of the current instance and the upper 64-bits left uninitialized.</summary> - /// <returns>A new <see cref="Vector128{T}" /> with the lower 64-bits set to the value of the current instance and the upper 64-bits initialized to zero.</returns> - /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception> - public unsafe Vector128<T> ToVector128Unsafe() - { - ThrowHelper.ThrowForUnsupportedVectorBaseType<T>(); - - // This relies on us stripping the "init" flag from the ".locals" - // declaration to let the upper bits be uninitialized. - - var pResult = stackalloc byte[Vector128.Size]; - Unsafe.AsRef<Vector64<T>>(pResult) = this; - return Unsafe.AsRef<Vector128<T>>(pResult); - } } } diff --git a/src/jit/compiler.h b/src/jit/compiler.h index ca47df7013..ab98059ad9 100644 --- a/src/jit/compiler.h +++ b/src/jit/compiler.h @@ -3444,10 +3444,6 @@ protected: NamedIntrinsic lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method); #ifdef FEATURE_HW_INTRINSICS - GenTree* impBaseIntrinsic(NamedIntrinsic intrinsic, - CORINFO_CLASS_HANDLE clsHnd, - CORINFO_METHOD_HANDLE method, - CORINFO_SIG_INFO* sig); GenTree* impHWIntrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig, @@ -3461,6 +3457,10 @@ protected: bool compSupportsHWIntrinsic(InstructionSet isa); #ifdef _TARGET_XARCH_ + GenTree* impBaseIntrinsic(NamedIntrinsic intrinsic, + CORINFO_METHOD_HANDLE method, + CORINFO_SIG_INFO* sig, + bool mustExpand); GenTree* impSSEIntrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig, diff --git a/src/jit/hwintrinsicArm64.cpp b/src/jit/hwintrinsicArm64.cpp index d82eef59ac..2309f7569b 100644 --- a/src/jit/hwintrinsicArm64.cpp +++ b/src/jit/hwintrinsicArm64.cpp @@ -291,6 +291,56 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, var_types simdBaseType = TYP_UNKNOWN; unsigned simdSizeBytes = 0; + switch (intrinsic) + { + case NI_Base_Vector64_AsByte: + case NI_Base_Vector64_AsInt16: + case NI_Base_Vector64_AsInt32: + case NI_Base_Vector64_AsSByte: + case NI_Base_Vector64_AsSingle: + case NI_Base_Vector64_AsUInt16: + case NI_Base_Vector64_AsUInt32: + case NI_Base_Vector128_As: + case NI_Base_Vector128_AsByte: + case NI_Base_Vector128_AsDouble: + case NI_Base_Vector128_AsInt16: + case NI_Base_Vector128_AsInt32: + case NI_Base_Vector128_AsInt64: + case NI_Base_Vector128_AsSByte: + case NI_Base_Vector128_AsSingle: + case NI_Base_Vector128_AsUInt16: + case NI_Base_Vector128_AsUInt32: + case NI_Base_Vector128_AsUInt64: + { + // We fold away the cast here, as it only exists to satisfy + // the type system. It is safe to do this here since the retNode type + // and the signature return type are both the same TYP_SIMD. + + var_types op1SimdBaseType = TYP_UNKNOWN; + + assert(!sig->hasThis()); + assert(sig->numArgs == 1); + assert(JITtype2varType(sig->retType) == TYP_STRUCT); + + simdBaseType = getBaseTypeAndSizeOfSIMDType(sig->retTypeClass, &simdSizeBytes); + op1SimdBaseType = getBaseTypeOfSIMDType(info.compCompHnd->getArgClass(sig, sig->args)); + + if (!varTypeIsArithmetic(simdBaseType) || !varTypeIsArithmetic(op1SimdBaseType)) + { + return nullptr; + } + + retNode = impSIMDPopStack(getSIMDTypeForSize(simdSizeBytes), /* expectAddr: */ false, sig->retTypeClass); + SetOpLclRelatedToSIMDIntrinsic(retNode); + assert(retNode->gtType == getSIMDTypeForSize(getSIMDTypeSizeInBytes(sig->retTypeSigClass))); + + return retNode; + } + + default: + break; + } + switch (HWIntrinsicInfo::lookup(intrinsic).form) { case HWIntrinsicInfo::SimdBinaryOp: diff --git a/src/jit/hwintrinsiclistxarch.h b/src/jit/hwintrinsiclistxarch.h index 8e02aabf6c..69fb795c71 100644 --- a/src/jit/hwintrinsiclistxarch.h +++ b/src/jit/hwintrinsiclistxarch.h @@ -27,41 +27,41 @@ // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // Base Intrinsics -HARDWARE_INTRINSIC(Base_Vector128_As, "As", Base, -1, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(Base_Vector128_AsByte, "AsByte", Base, -1, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(Base_Vector128_AsDouble, "AsDouble", Base, -1, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(Base_Vector128_AsInt16, "AsInt16", Base, -1, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(Base_Vector128_AsInt32, "AsInt32", Base, -1, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(Base_Vector128_AsInt64, "AsInt64", Base, -1, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(Base_Vector128_AsSByte, "AsSByte", Base, -1, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(Base_Vector128_AsSingle, "AsSingle", Base, -1, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(Base_Vector128_AsUInt16, "AsUInt16", Base, -1, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(Base_Vector128_AsUInt32, "AsUInt32", Base, -1, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(Base_Vector128_AsUInt64, "AsUInt64", Base, -1, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(Base_Vector128_CreateScalarUnsafe, "CreateScalarUnsafe", Base, -1, 16, 1, {INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_movss, INS_movsdsse2}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(Base_Vector128_GetElement, "GetElement", Base, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Base_Vector128_WithElement, "WithElement", Base, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Base_Vector128_ToScalar, "ToScalar", Base, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movss, INS_movsdsse2}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(Base_Vector128_ToVector256, "ToVector256", Base, -1, 16, 1, {INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movups, INS_movupd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(Base_Vector128_ToVector256Unsafe, "ToVector256Unsafe", Base, -1, 16, 1, {INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movups, INS_movupd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(Base_Vector128_Zero, "get_Zero", Base, -1, 16, 0, {INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps}, HW_Category_Helper, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(Base_Vector256_As, "As", Base, -1, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(Base_Vector256_AsByte, "AsByte", Base, -1, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(Base_Vector256_AsDouble, "AsDouble", Base, -1, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(Base_Vector256_AsInt16, "AsInt16", Base, -1, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(Base_Vector256_AsInt32, "AsInt32", Base, -1, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(Base_Vector256_AsInt64, "AsInt64", Base, -1, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(Base_Vector256_AsSByte, "AsSByte", Base, -1, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(Base_Vector256_AsSingle, "AsSingle", Base, -1, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(Base_Vector256_AsUInt16, "AsUInt16", Base, -1, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(Base_Vector256_AsUInt32, "AsUInt32", Base, -1, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(Base_Vector256_AsUInt64, "AsUInt64", Base, -1, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(Base_Vector256_CreateScalarUnsafe, "CreateScalarUnsafe", Base, -1, 32, 1, {INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_movss, INS_movsdsse2}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(Base_Vector256_GetElement, "GetElement", Base, -1, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Base_Vector256_WithElement, "WithElement", Base, -1, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Base_Vector256_GetLower, "GetLower", Base, -1, 32, 1, {INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movups, INS_movupd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(Base_Vector256_ToScalar, "ToScalar", Base, -1, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movss, INS_movsdsse2}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(Base_Vector256_Zero, "get_Zero", Base, -1, 32, 0, {INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps}, HW_Category_Helper, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(Base_Vector128_As, "As", Base, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(Base_Vector128_AsByte, "AsByte", Base, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(Base_Vector128_AsDouble, "AsDouble", Base, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(Base_Vector128_AsInt16, "AsInt16", Base, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(Base_Vector128_AsInt32, "AsInt32", Base, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(Base_Vector128_AsInt64, "AsInt64", Base, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(Base_Vector128_AsSByte, "AsSByte", Base, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(Base_Vector128_AsSingle, "AsSingle", Base, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(Base_Vector128_AsUInt16, "AsUInt16", Base, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(Base_Vector128_AsUInt32, "AsUInt32", Base, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(Base_Vector128_AsUInt64, "AsUInt64", Base, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(Base_Vector128_CreateScalarUnsafe, "CreateScalarUnsafe", Base, -1, 16, 1, {INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_movss, INS_movsdsse2}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(Base_Vector128_GetElement, "GetElement", Base, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Base_Vector128_WithElement, "WithElement", Base, -1, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Base_Vector128_ToScalar, "ToScalar", Base, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movss, INS_movsdsse2}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(Base_Vector128_ToVector256, "ToVector256", Base, -1, 16, 1, {INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movups, INS_movupd}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(Base_Vector128_ToVector256Unsafe, "ToVector256Unsafe", Base, -1, 16, 1, {INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movups, INS_movupd}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(Base_Vector128_Zero, "get_Zero", Base, -1, 16, 0, {INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(Base_Vector256_As, "As", Base, -1, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(Base_Vector256_AsByte, "AsByte", Base, -1, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(Base_Vector256_AsDouble, "AsDouble", Base, -1, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(Base_Vector256_AsInt16, "AsInt16", Base, -1, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(Base_Vector256_AsInt32, "AsInt32", Base, -1, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(Base_Vector256_AsInt64, "AsInt64", Base, -1, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(Base_Vector256_AsSByte, "AsSByte", Base, -1, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(Base_Vector256_AsSingle, "AsSingle", Base, -1, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(Base_Vector256_AsUInt16, "AsUInt16", Base, -1, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(Base_Vector256_AsUInt32, "AsUInt32", Base, -1, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(Base_Vector256_AsUInt64, "AsUInt64", Base, -1, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(Base_Vector256_CreateScalarUnsafe, "CreateScalarUnsafe", Base, -1, 32, 1, {INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_movss, INS_movsdsse2}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(Base_Vector256_GetElement, "GetElement", Base, -1, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Base_Vector256_WithElement, "WithElement", Base, -1, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Base_Vector256_GetLower, "GetLower", Base, -1, 32, 1, {INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movups, INS_movupd}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(Base_Vector256_ToScalar, "ToScalar", Base, -1, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movss, INS_movsdsse2}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(Base_Vector256_Zero, "get_Zero", Base, -1, 32, 0, {INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics) // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // Intrinsic ID Function name ISA ival SIMD size NumArg instructions Category Flags diff --git a/src/jit/hwintrinsicxarch.cpp b/src/jit/hwintrinsicxarch.cpp index c2602bd0bd..14a76dea5f 100644 --- a/src/jit/hwintrinsicxarch.cpp +++ b/src/jit/hwintrinsicxarch.cpp @@ -504,6 +504,7 @@ bool HWIntrinsicInfo::isFullyImplementedIsa(InstructionSet isa) case InstructionSet_AES: case InstructionSet_AVX: case InstructionSet_AVX2: + case InstructionSet_Base: case InstructionSet_BMI1: case InstructionSet_BMI2: case InstructionSet_BMI1_X64: @@ -918,6 +919,8 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, // other intrinsics need special importation switch (isa) { + case InstructionSet_Base: + return impBaseIntrinsic(intrinsic, method, sig, mustExpand); case InstructionSet_SSE: return impSSEIntrinsic(intrinsic, method, sig, mustExpand); case InstructionSet_SSE2: @@ -952,6 +955,641 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, } } +//------------------------------------------------------------------------ +// impBaseIntrinsic: dispatch intrinsics to their own implementation +// +// Arguments: +// intrinsic -- id of the intrinsic function. +// method -- method handle of the intrinsic function. +// sig -- signature of the intrinsic call +// mustExpand -- true if the compiler is compiling the fallback(GT_CALL) of this intrinsics +// +// Return Value: +// the expanded intrinsic. +// +GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic intrinsic, + CORINFO_METHOD_HANDLE method, + CORINFO_SIG_INFO* sig, + bool mustExpand) +{ + GenTree* retNode = nullptr; + GenTree* op1 = nullptr; + + if (!featureSIMD) + { + return nullptr; + } + + unsigned simdSize = 0; + var_types baseType = TYP_UNKNOWN; + var_types retType = JITtype2varType(sig->retType); + + assert(!sig->hasThis()); + + if (HWIntrinsicInfo::BaseTypeFromFirstArg(intrinsic)) + { + baseType = getBaseTypeAndSizeOfSIMDType(info.compCompHnd->getArgClass(sig, sig->args), &simdSize); + + if (retType == TYP_STRUCT) + { + unsigned retSimdSize = 0; + var_types retBasetype = getBaseTypeAndSizeOfSIMDType(sig->retTypeClass, &retSimdSize); + if (!varTypeIsArithmetic(retBasetype)) + { + return nullptr; + } + retType = getSIMDTypeForSize(retSimdSize); + } + } + else + { + assert(retType == TYP_STRUCT); + baseType = getBaseTypeAndSizeOfSIMDType(sig->retTypeClass, &simdSize); + retType = getSIMDTypeForSize(simdSize); + } + + if (!varTypeIsArithmetic(baseType)) + { + return nullptr; + } + + switch (intrinsic) + { + case NI_Base_Vector256_As: + case NI_Base_Vector256_AsByte: + case NI_Base_Vector256_AsDouble: + case NI_Base_Vector256_AsInt16: + case NI_Base_Vector256_AsInt32: + case NI_Base_Vector256_AsInt64: + case NI_Base_Vector256_AsSByte: + case NI_Base_Vector256_AsSingle: + case NI_Base_Vector256_AsUInt16: + case NI_Base_Vector256_AsUInt32: + case NI_Base_Vector256_AsUInt64: + { + if (!compSupports(InstructionSet_AVX)) + { + // We don't want to deal with TYP_SIMD32 if the compiler doesn't otherwise support the type. + break; + } + + __fallthrough; + } + + case NI_Base_Vector128_As: + case NI_Base_Vector128_AsByte: + case NI_Base_Vector128_AsDouble: + case NI_Base_Vector128_AsInt16: + case NI_Base_Vector128_AsInt32: + case NI_Base_Vector128_AsInt64: + case NI_Base_Vector128_AsSByte: + case NI_Base_Vector128_AsSingle: + case NI_Base_Vector128_AsUInt16: + case NI_Base_Vector128_AsUInt32: + case NI_Base_Vector128_AsUInt64: + { + // We fold away the cast here, as it only exists to satisfy + // the type system. It is safe to do this here since the retNode type + // and the signature return type are both the same TYP_SIMD. + + assert(sig->numArgs == 1); + + retNode = impSIMDPopStack(retType, /* expectAddr: */ false, sig->retTypeClass); + SetOpLclRelatedToSIMDIntrinsic(retNode); + assert(retNode->gtType == getSIMDTypeForSize(getSIMDTypeSizeInBytes(sig->retTypeSigClass))); + break; + } + + case NI_Base_Vector128_CreateScalarUnsafe: + { + assert(sig->numArgs == 1); + +#ifdef _TARGET_X86_ + if (varTypeIsLong(baseType)) + { + // TODO-XARCH-CQ: It may be beneficial to emit the movq + // instruction, which takes a 64-bit memory address and + // works on 32-bit x86 systems. + break; + } +#endif // _TARGET_X86_ + + if (compSupports(InstructionSet_SSE2) || (compSupports(InstructionSet_SSE) && (baseType == TYP_FLOAT))) + { + op1 = impPopStack().val; + retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, baseType, simdSize); + } + break; + } + + case NI_Base_Vector128_ToScalar: + { + assert(sig->numArgs == 1); + + if (compSupports(InstructionSet_SSE) && varTypeIsFloating(baseType)) + { + op1 = impSIMDPopStack(getSIMDTypeForSize(simdSize)); + retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, baseType, 16); + } + break; + } + + case NI_Base_Vector128_ToVector256: + case NI_Base_Vector128_ToVector256Unsafe: + case NI_Base_Vector256_GetLower: + { + assert(sig->numArgs == 1); + + if (compSupports(InstructionSet_AVX)) + { + op1 = impSIMDPopStack(getSIMDTypeForSize(simdSize)); + retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, baseType, simdSize); + } + break; + } + + case NI_Base_Vector128_Zero: + { + assert(sig->numArgs == 0); + + if (compSupports(InstructionSet_SSE)) + { + retNode = gtNewSimdHWIntrinsicNode(retType, intrinsic, baseType, simdSize); + } + break; + } + + case NI_Base_Vector256_CreateScalarUnsafe: + { + assert(sig->numArgs == 1); + +#ifdef _TARGET_X86_ + if (varTypeIsLong(baseType)) + { + // TODO-XARCH-CQ: It may be beneficial to emit the movq + // instruction, which takes a 64-bit memory address and + // works on 32-bit x86 systems. + break; + } +#endif // _TARGET_X86_ + + if (compSupports(InstructionSet_AVX)) + { + op1 = impPopStack().val; + retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, baseType, simdSize); + } + break; + } + + case NI_Base_Vector256_ToScalar: + { + assert(sig->numArgs == 1); + + if (compSupports(InstructionSet_AVX) && varTypeIsFloating(baseType)) + { + op1 = impSIMDPopStack(getSIMDTypeForSize(simdSize)); + retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, baseType, 32); + } + break; + } + + case NI_Base_Vector256_Zero: + { + assert(sig->numArgs == 0); + + if (compSupports(InstructionSet_AVX)) + { + retNode = gtNewSimdHWIntrinsicNode(retType, intrinsic, baseType, simdSize); + } + break; + } + + case NI_Base_Vector256_WithElement: + { + if (!compSupports(InstructionSet_AVX)) + { + // Using software fallback if JIT/hardware don't support AVX instructions and YMM registers + return nullptr; + } + __fallthrough; + } + + case NI_Base_Vector128_WithElement: + { + assert(sig->numArgs == 3); + GenTree* indexOp = impStackTop(1).val; + if (!compSupports(InstructionSet_SSE2) || !varTypeIsArithmetic(baseType) || !indexOp->OperIsConst()) + { + // Using software fallback if + // 1. JIT/hardware don't support SSE2 instructions + // 2. baseType is not a numeric type (throw execptions) + // 3. index is not a constant + return nullptr; + } + + switch (baseType) + { + // Using software fallback if baseType is not supported by hardware + case TYP_BYTE: + case TYP_UBYTE: + case TYP_INT: + case TYP_UINT: + if (!compSupports(InstructionSet_SSE41)) + { + return nullptr; + } + break; + + case TYP_LONG: + case TYP_ULONG: + if (!compSupports(InstructionSet_SSE41_X64)) + { + return nullptr; + } + break; + + case TYP_DOUBLE: + case TYP_FLOAT: + case TYP_SHORT: + case TYP_USHORT: + // short/ushort/float/double is supported by SSE2 + break; + + default: + unreached(); + break; + } + + ssize_t imm8 = indexOp->AsIntCon()->IconValue(); + ssize_t cachedImm8 = imm8; + ssize_t count = simdSize / genTypeSize(baseType); + + if (imm8 >= count || imm8 < 0) + { + // Using software fallback if index is out of range (throw exeception) + return nullptr; + } + + GenTree* valueOp = impPopStack().val; + impPopStack(); + GenTree* vectorOp = impSIMDPopStack(getSIMDTypeForSize(simdSize)); + + GenTree* clonedVectorOp = nullptr; + + if (simdSize == 32) + { + // Extract the half vector that will be modified + assert(compSupports(InstructionSet_AVX)); + + // copy `vectorOp` to accept the modified half vector + vectorOp = impCloneExpr(vectorOp, &clonedVectorOp, NO_CLASS_HANDLE, (unsigned)CHECK_SPILL_ALL, + nullptr DEBUGARG("Clone Vector for Vector256<T>.WithElement")); + + if (imm8 >= count / 2) + { + imm8 -= count / 2; + vectorOp = gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, gtNewIconNode(1), NI_AVX_ExtractVector128, + baseType, simdSize); + } + else + { + vectorOp = + gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, NI_Base_Vector256_GetLower, baseType, simdSize); + } + } + + GenTree* immNode = gtNewIconNode(imm8); + + switch (baseType) + { + case TYP_LONG: + case TYP_ULONG: + retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, valueOp, immNode, NI_SSE41_X64_Insert, + baseType, 16); + break; + + case TYP_FLOAT: + { + if (!compSupports(InstructionSet_SSE41)) + { + // Emulate Vector128<float>.WithElement by SSE instructions + if (imm8 == 0) + { + // vector.WithElement(0, value) + // => + // movss xmm0, xmm1 (xmm0 = vector, xmm1 = value) + valueOp = gtNewSimdHWIntrinsicNode(TYP_SIMD16, valueOp, + NI_Base_Vector128_CreateScalarUnsafe, TYP_FLOAT, 16); + retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, valueOp, NI_SSE_MoveScalar, + TYP_FLOAT, 16); + } + else if (imm8 == 1) + { + // vector.WithElement(1, value) + // => + // shufps xmm1, xmm0, 0 (xmm0 = vector, xmm1 = value) + // shufps xmm1, xmm0, 226 + GenTree* tmpOp = + gtNewSimdHWIntrinsicNode(TYP_SIMD16, valueOp, NI_Base_Vector128_CreateScalarUnsafe, + TYP_FLOAT, 16); + GenTree* dupVectorOp = nullptr; + vectorOp = impCloneExpr(vectorOp, &dupVectorOp, NO_CLASS_HANDLE, (unsigned)CHECK_SPILL_ALL, + nullptr DEBUGARG("Clone Vector for Vector128<float>.WithElement")); + tmpOp = gtNewSimdHWIntrinsicNode(TYP_SIMD16, tmpOp, vectorOp, gtNewIconNode(0), + NI_SSE_Shuffle, TYP_FLOAT, 16); + retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, tmpOp, dupVectorOp, gtNewIconNode(226), + NI_SSE_Shuffle, TYP_FLOAT, 16); + } + else + { + ssize_t controlBits1 = 0; + ssize_t controlBits2 = 0; + if (imm8 == 2) + { + controlBits1 = 48; + controlBits2 = 132; + } + else + { + controlBits1 = 32; + controlBits2 = 36; + } + // vector.WithElement(2, value) + // => + // shufps xmm1, xmm0, 48 (xmm0 = vector, xmm1 = value) + // shufps xmm0, xmm1, 132 + // + // vector.WithElement(3, value) + // => + // shufps xmm1, xmm0, 32 (xmm0 = vector, xmm1 = value) + // shufps xmm0, xmm1, 36 + GenTree* tmpOp = + gtNewSimdHWIntrinsicNode(TYP_SIMD16, valueOp, NI_Base_Vector128_CreateScalarUnsafe, + TYP_FLOAT, 16); + GenTree* dupVectorOp = nullptr; + vectorOp = impCloneExpr(vectorOp, &dupVectorOp, NO_CLASS_HANDLE, (unsigned)CHECK_SPILL_ALL, + nullptr DEBUGARG("Clone Vector for Vector128<float>.WithElement")); + valueOp = gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, tmpOp, gtNewIconNode(controlBits1), + NI_SSE_Shuffle, TYP_FLOAT, 16); + retNode = + gtNewSimdHWIntrinsicNode(TYP_SIMD16, valueOp, dupVectorOp, gtNewIconNode(controlBits2), + NI_SSE_Shuffle, TYP_FLOAT, 16); + } + break; + } + else + { + valueOp = gtNewSimdHWIntrinsicNode(TYP_SIMD16, valueOp, NI_Base_Vector128_CreateScalarUnsafe, + TYP_FLOAT, 16); + immNode->AsIntCon()->SetIconValue(imm8 * 16); + __fallthrough; + } + } + + case TYP_BYTE: + case TYP_UBYTE: + case TYP_INT: + case TYP_UINT: + retNode = + gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, valueOp, immNode, NI_SSE41_Insert, baseType, 16); + break; + + case TYP_SHORT: + case TYP_USHORT: + retNode = + gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, valueOp, immNode, NI_SSE2_Insert, baseType, 16); + break; + + case TYP_DOUBLE: + { + // vector.WithElement(0, value) + // => + // movsd xmm0, xmm1 (xmm0 = vector, xmm1 = value) + // + // vector.WithElement(1, value) + // => + // unpcklpd xmm0, xmm1 (xmm0 = vector, xmm1 = value) + valueOp = gtNewSimdHWIntrinsicNode(TYP_SIMD16, valueOp, NI_Base_Vector128_CreateScalarUnsafe, + TYP_DOUBLE, 16); + NamedIntrinsic in = (imm8 == 0) ? NI_SSE2_MoveScalar : NI_SSE2_UnpackLow; + retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, valueOp, in, TYP_DOUBLE, 16); + break; + } + + default: + unreached(); + break; + } + + if (simdSize == 32) + { + assert(clonedVectorOp); + int upperOrLower = (cachedImm8 >= count / 2) ? 1 : 0; + retNode = gtNewSimdHWIntrinsicNode(retType, clonedVectorOp, retNode, gtNewIconNode(upperOrLower), + NI_AVX_InsertVector128, baseType, simdSize); + } + + break; + } + + case NI_Base_Vector256_GetElement: + { + if (!compSupports(InstructionSet_AVX)) + { + // Using software fallback if JIT/hardware don't support AVX instructions and YMM registers + return nullptr; + } + __fallthrough; + } + + case NI_Base_Vector128_GetElement: + { + assert(sig->numArgs == 2); + GenTree* indexOp = impStackTop().val; + if (!compSupports(InstructionSet_SSE2) || !varTypeIsArithmetic(baseType) || !indexOp->OperIsConst()) + { + // Using software fallback if + // 1. JIT/hardware don't support SSE2 instructions + // 2. baseType is not a numeric type (throw execptions) + // 3. index is not a constant + return nullptr; + } + + switch (baseType) + { + // Using software fallback if baseType is not supported by hardware + case TYP_BYTE: + case TYP_UBYTE: + case TYP_INT: + case TYP_UINT: + if (!compSupports(InstructionSet_SSE41)) + { + return nullptr; + } + break; + + case TYP_LONG: + case TYP_ULONG: + if (!compSupports(InstructionSet_SSE41_X64)) + { + return nullptr; + } + break; + + case TYP_DOUBLE: + case TYP_FLOAT: + case TYP_SHORT: + case TYP_USHORT: + // short/ushort/float/double is supported by SSE2 + break; + + default: + break; + } + + ssize_t imm8 = indexOp->AsIntCon()->IconValue(); + ssize_t count = simdSize / genTypeSize(baseType); + + if (imm8 >= count || imm8 < 0) + { + // Using software fallback if index is out of range (throw exeception) + return nullptr; + } + + impPopStack(); + GenTree* vectorOp = impSIMDPopStack(getSIMDTypeForSize(simdSize)); + NamedIntrinsic resIntrinsic = NI_Illegal; + + if (simdSize == 32) + { + assert(compSupports(InstructionSet_AVX)); + + if (imm8 >= count / 2) + { + imm8 -= count / 2; + vectorOp = gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, gtNewIconNode(1), NI_AVX_ExtractVector128, + baseType, simdSize); + } + else + { + vectorOp = + gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, NI_Base_Vector256_GetLower, baseType, simdSize); + } + } + + if (imm8 == 0 && (genTypeSize(baseType) >= 4)) + { + switch (baseType) + { + case TYP_LONG: + resIntrinsic = NI_SSE2_X64_ConvertToInt64; + break; + + case TYP_ULONG: + resIntrinsic = NI_SSE2_X64_ConvertToUInt64; + break; + + case TYP_INT: + resIntrinsic = NI_SSE2_ConvertToInt32; + break; + + case TYP_UINT: + resIntrinsic = NI_SSE2_ConvertToUInt32; + break; + + case TYP_FLOAT: + case TYP_DOUBLE: + resIntrinsic = NI_Base_Vector128_ToScalar; + break; + + default: + unreached(); + } + + return gtNewSimdHWIntrinsicNode(retType, vectorOp, resIntrinsic, baseType, 16); + } + + GenTree* immNode = gtNewIconNode(imm8); + + switch (baseType) + { + case TYP_LONG: + case TYP_ULONG: + retNode = gtNewSimdHWIntrinsicNode(retType, vectorOp, immNode, NI_SSE41_X64_Extract, baseType, 16); + break; + + case TYP_FLOAT: + { + if (!compSupports(InstructionSet_SSE41)) + { + assert(imm8 >= 1); + assert(imm8 <= 3); + // Emulate Vector128<float>.GetElement(i) by SSE instructions + // vector.GetElement(i) + // => + // shufps xmm0, xmm0, control + // (xmm0 = vector, control = i + 228) + immNode->AsIntCon()->SetIconValue(228 + imm8); + GenTree* clonedVectorOp = nullptr; + vectorOp = impCloneExpr(vectorOp, &clonedVectorOp, NO_CLASS_HANDLE, (unsigned)CHECK_SPILL_ALL, + nullptr DEBUGARG("Clone Vector for Vector128<float>.GetElement")); + vectorOp = gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, clonedVectorOp, immNode, + NI_SSE_Shuffle, TYP_FLOAT, 16); + return gtNewSimdHWIntrinsicNode(retType, vectorOp, NI_Base_Vector128_ToScalar, TYP_FLOAT, 16); + } + __fallthrough; + } + + case TYP_UBYTE: + case TYP_INT: + case TYP_UINT: + retNode = gtNewSimdHWIntrinsicNode(retType, vectorOp, immNode, NI_SSE41_Extract, baseType, 16); + break; + + case TYP_BYTE: + // We do not have SSE41/SSE2 Extract APIs on signed small int, so need a CAST on the result + retNode = gtNewSimdHWIntrinsicNode(TYP_UBYTE, vectorOp, immNode, NI_SSE41_Extract, TYP_UBYTE, 16); + retNode = gtNewCastNode(TYP_INT, retNode, true, TYP_BYTE); + break; + + case TYP_SHORT: + case TYP_USHORT: + // We do not have SSE41/SSE2 Extract APIs on signed small int, so need a CAST on the result + retNode = gtNewSimdHWIntrinsicNode(TYP_USHORT, vectorOp, immNode, NI_SSE2_Extract, TYP_USHORT, 16); + if (baseType == TYP_SHORT) + { + retNode = gtNewCastNode(TYP_INT, retNode, true, TYP_SHORT); + } + break; + + case TYP_DOUBLE: + assert(imm8 == 1); + // vector.GetElement(1) + // => + // pshufd xmm1, xmm0, 0xEE (xmm0 = vector) + vectorOp = gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, gtNewIconNode(0xEE), NI_SSE2_Shuffle, + TYP_INT, 16); + retNode = + gtNewSimdHWIntrinsicNode(TYP_DOUBLE, vectorOp, NI_Base_Vector128_ToScalar, TYP_DOUBLE, 16); + break; + + default: + unreached(); + } + + break; + } + + default: + { + unreached(); + break; + } + } + + return retNode; +} + GenTree* Compiler::impSSEIntrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig, diff --git a/src/jit/importer.cpp b/src/jit/importer.cpp index e677d37e0b..5d3a82e4af 100644 --- a/src/jit/importer.cpp +++ b/src/jit/importer.cpp @@ -3458,64 +3458,6 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis, ni = lookupNamedIntrinsic(method); #ifdef FEATURE_HW_INTRINSICS - switch (ni) - { -#if defined(_TARGET_ARM64_) - case NI_Base_Vector64_AsByte: - case NI_Base_Vector64_AsInt16: - case NI_Base_Vector64_AsInt32: - case NI_Base_Vector64_AsSByte: - case NI_Base_Vector64_AsSingle: - case NI_Base_Vector64_AsUInt16: - case NI_Base_Vector64_AsUInt32: -#endif // _TARGET_ARM64_ - case NI_Base_Vector128_As: - case NI_Base_Vector128_AsByte: - case NI_Base_Vector128_AsDouble: - case NI_Base_Vector128_AsInt16: - case NI_Base_Vector128_AsInt32: - case NI_Base_Vector128_AsInt64: - case NI_Base_Vector128_AsSByte: - case NI_Base_Vector128_AsSingle: - case NI_Base_Vector128_AsUInt16: - case NI_Base_Vector128_AsUInt32: - case NI_Base_Vector128_AsUInt64: -#if defined(_TARGET_XARCH_) - case NI_Base_Vector128_CreateScalarUnsafe: - case NI_Base_Vector128_GetElement: - case NI_Base_Vector128_WithElement: - case NI_Base_Vector128_ToScalar: - case NI_Base_Vector128_ToVector256: - case NI_Base_Vector128_ToVector256Unsafe: - case NI_Base_Vector128_Zero: - case NI_Base_Vector256_As: - case NI_Base_Vector256_AsByte: - case NI_Base_Vector256_AsDouble: - case NI_Base_Vector256_AsInt16: - case NI_Base_Vector256_AsInt32: - case NI_Base_Vector256_AsInt64: - case NI_Base_Vector256_AsSByte: - case NI_Base_Vector256_AsSingle: - case NI_Base_Vector256_AsUInt16: - case NI_Base_Vector256_AsUInt32: - case NI_Base_Vector256_AsUInt64: - case NI_Base_Vector256_CreateScalarUnsafe: - case NI_Base_Vector256_GetElement: - case NI_Base_Vector256_WithElement: - case NI_Base_Vector256_GetLower: - case NI_Base_Vector256_ToScalar: - case NI_Base_Vector256_Zero: -#endif // _TARGET_XARCH_ - { - return impBaseIntrinsic(ni, clsHnd, method, sig); - } - - default: - { - break; - } - } - if ((ni > NI_HW_INTRINSIC_START) && (ni < NI_HW_INTRINSIC_END)) { GenTree* hwintrinsic = impHWIntrinsic(ni, method, sig, mustExpand); @@ -4171,643 +4113,6 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis, return retNode; } -#ifdef FEATURE_HW_INTRINSICS -//------------------------------------------------------------------------ -// impBaseIntrinsic: dispatch intrinsics to their own implementation -// -// Arguments: -// intrinsic -- id of the intrinsic function. -// clsHnd -- handle for the intrinsic method's class -// method -- method handle of the intrinsic function. -// sig -- signature of the intrinsic call -// -// Return Value: -// the expanded intrinsic. -// -GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic intrinsic, - CORINFO_CLASS_HANDLE clsHnd, - CORINFO_METHOD_HANDLE method, - CORINFO_SIG_INFO* sig) -{ - GenTree* retNode = nullptr; - GenTree* op1 = nullptr; - - if (!featureSIMD) - { - return nullptr; - } - - unsigned simdSize = 0; - var_types baseType = TYP_UNKNOWN; - var_types retType = JITtype2varType(sig->retType); - - if (sig->hasThis()) - { - baseType = getBaseTypeAndSizeOfSIMDType(clsHnd, &simdSize); - - if (retType == TYP_STRUCT) - { - unsigned retSimdSize = 0; - var_types retBasetype = getBaseTypeAndSizeOfSIMDType(sig->retTypeClass, &retSimdSize); - if (!varTypeIsArithmetic(retBasetype)) - { - return nullptr; - } - retType = getSIMDTypeForSize(retSimdSize); - } - } - else - { - assert(retType == TYP_STRUCT); - baseType = getBaseTypeAndSizeOfSIMDType(sig->retTypeClass, &simdSize); - retType = getSIMDTypeForSize(simdSize); - } - - if (!varTypeIsArithmetic(baseType)) - { - return nullptr; - } - - switch (intrinsic) - { -#if defined(_TARGET_XARCH_) - case NI_Base_Vector256_As: - case NI_Base_Vector256_AsByte: - case NI_Base_Vector256_AsDouble: - case NI_Base_Vector256_AsInt16: - case NI_Base_Vector256_AsInt32: - case NI_Base_Vector256_AsInt64: - case NI_Base_Vector256_AsSByte: - case NI_Base_Vector256_AsSingle: - case NI_Base_Vector256_AsUInt16: - case NI_Base_Vector256_AsUInt32: - case NI_Base_Vector256_AsUInt64: - { - if (!compSupports(InstructionSet_AVX)) - { - // We don't want to deal with TYP_SIMD32 if the compiler doesn't otherwise support the type. - break; - } - - __fallthrough; - } -#endif // _TARGET_XARCH_ - -#if defined(_TARGET_ARM64_) - case NI_Base_Vector64_AsByte: - case NI_Base_Vector64_AsInt16: - case NI_Base_Vector64_AsInt32: - case NI_Base_Vector64_AsSByte: - case NI_Base_Vector64_AsSingle: - case NI_Base_Vector64_AsUInt16: - case NI_Base_Vector64_AsUInt32: -#endif // _TARGET_ARM64_ - case NI_Base_Vector128_As: - case NI_Base_Vector128_AsByte: - case NI_Base_Vector128_AsDouble: - case NI_Base_Vector128_AsInt16: - case NI_Base_Vector128_AsInt32: - case NI_Base_Vector128_AsInt64: - case NI_Base_Vector128_AsSByte: - case NI_Base_Vector128_AsSingle: - case NI_Base_Vector128_AsUInt16: - case NI_Base_Vector128_AsUInt32: - case NI_Base_Vector128_AsUInt64: - { - // We fold away the cast here, as it only exists to satisfy - // the type system. It is safe to do this here since the retNode type - // and the signature return type are both the same TYP_SIMD. - - assert(sig->numArgs == 0); - assert(sig->hasThis()); - - retNode = impSIMDPopStack(retType, true, sig->retTypeClass); - SetOpLclRelatedToSIMDIntrinsic(retNode); - assert(retNode->gtType == getSIMDTypeForSize(getSIMDTypeSizeInBytes(sig->retTypeSigClass))); - break; - } - -#ifdef _TARGET_XARCH_ - case NI_Base_Vector128_CreateScalarUnsafe: - { - assert(sig->numArgs == 1); - -#ifdef _TARGET_X86_ - if (varTypeIsLong(baseType)) - { - // TODO-XARCH-CQ: It may be beneficial to emit the movq - // instruction, which takes a 64-bit memory address and - // works on 32-bit x86 systems. - break; - } -#endif // _TARGET_X86_ - - if (compSupports(InstructionSet_SSE2) || (compSupports(InstructionSet_SSE) && (baseType == TYP_FLOAT))) - { - op1 = impPopStack().val; - retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, baseType, simdSize); - } - break; - } - - case NI_Base_Vector128_ToScalar: - { - assert(sig->numArgs == 0); - assert(sig->hasThis()); - - if (compSupports(InstructionSet_SSE) && varTypeIsFloating(baseType)) - { - op1 = impSIMDPopStack(getSIMDTypeForSize(simdSize), true, clsHnd); - retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, baseType, 16); - } - break; - } - - case NI_Base_Vector128_ToVector256: - case NI_Base_Vector128_ToVector256Unsafe: - case NI_Base_Vector256_GetLower: - { - assert(sig->numArgs == 0); - assert(sig->hasThis()); - - if (compSupports(InstructionSet_AVX)) - { - op1 = impSIMDPopStack(getSIMDTypeForSize(simdSize), true, clsHnd); - retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, baseType, simdSize); - } - break; - } - - case NI_Base_Vector128_Zero: - { - assert(sig->numArgs == 0); - - if (compSupports(InstructionSet_SSE)) - { - retNode = gtNewSimdHWIntrinsicNode(retType, intrinsic, baseType, simdSize); - } - break; - } - - case NI_Base_Vector256_CreateScalarUnsafe: - { - assert(sig->numArgs == 1); - -#ifdef _TARGET_X86_ - if (varTypeIsLong(baseType)) - { - // TODO-XARCH-CQ: It may be beneficial to emit the movq - // instruction, which takes a 64-bit memory address and - // works on 32-bit x86 systems. - break; - } -#endif // _TARGET_X86_ - - if (compSupports(InstructionSet_AVX)) - { - op1 = impPopStack().val; - retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, baseType, simdSize); - } - break; - } - - case NI_Base_Vector256_ToScalar: - { - assert(sig->numArgs == 0); - assert(sig->hasThis()); - - if (compSupports(InstructionSet_AVX) && varTypeIsFloating(baseType)) - { - op1 = impSIMDPopStack(getSIMDTypeForSize(simdSize), true, clsHnd); - retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, baseType, 32); - } - break; - } - - case NI_Base_Vector256_Zero: - { - assert(sig->numArgs == 0); - - if (compSupports(InstructionSet_AVX)) - { - retNode = gtNewSimdHWIntrinsicNode(retType, intrinsic, baseType, simdSize); - } - break; - } - - case NI_Base_Vector256_WithElement: - { - if (!compSupports(InstructionSet_AVX)) - { - // Using software fallback if JIT/hardware don't support AVX instructions and YMM registers - return nullptr; - } - __fallthrough; - } - case NI_Base_Vector128_WithElement: - { - assert(sig->numArgs == 2); - GenTree* indexOp = impStackTop(1).val; - if (!compSupports(InstructionSet_SSE2) || !varTypeIsArithmetic(baseType) || !indexOp->OperIsConst()) - { - // Using software fallback if - // 1. JIT/hardware don't support SSE2 instructions - // 2. baseType is not a numeric type (throw execptions) - // 3. index is not a constant - return nullptr; - } - - switch (baseType) - { - // Using software fallback if baseType is not supported by hardware - case TYP_BYTE: - case TYP_UBYTE: - case TYP_INT: - case TYP_UINT: - if (!compSupports(InstructionSet_SSE41)) - { - return nullptr; - } - break; - case TYP_LONG: - case TYP_ULONG: - if (!compSupports(InstructionSet_SSE41_X64)) - { - return nullptr; - } - break; - case TYP_DOUBLE: - case TYP_FLOAT: - case TYP_SHORT: - case TYP_USHORT: - // short/ushort/float/double is supported by SSE2 - break; - default: - unreached(); - break; - } - - ssize_t imm8 = indexOp->AsIntCon()->IconValue(); - ssize_t cachedImm8 = imm8; - ssize_t count = simdSize / genTypeSize(baseType); - - if (imm8 >= count || imm8 < 0) - { - // Using software fallback if index is out of range (throw exeception) - return nullptr; - } - - GenTree* valueOp = impPopStack().val; - impPopStack(); - GenTree* vectorOp = impSIMDPopStack(getSIMDTypeForSize(simdSize), true, clsHnd); - - GenTree* clonedVectorOp = nullptr; - - if (simdSize == 32) - { - // Extract the half vector that will be modified - assert(compSupports(InstructionSet_AVX)); - - // copy `vectorOp` to accept the modified half vector - vectorOp = impCloneExpr(vectorOp, &clonedVectorOp, NO_CLASS_HANDLE, (unsigned)CHECK_SPILL_ALL, - nullptr DEBUGARG("Clone Vector for Vector256<T>.WithElement")); - - if (imm8 >= count / 2) - { - imm8 -= count / 2; - vectorOp = gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, gtNewIconNode(1), NI_AVX_ExtractVector128, - baseType, simdSize); - } - else - { - vectorOp = - gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, NI_Base_Vector256_GetLower, baseType, simdSize); - } - } - - GenTree* immNode = gtNewIconNode(imm8); - - switch (baseType) - { - case TYP_LONG: - case TYP_ULONG: - retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, valueOp, immNode, NI_SSE41_X64_Insert, - baseType, 16); - break; - - case TYP_FLOAT: - { - if (!compSupports(InstructionSet_SSE41)) - { - // Emulate Vector128<float>.WithElement by SSE instructions - if (imm8 == 0) - { - // vector.WithElement(0, value) - // => - // movss xmm0, xmm1 (xmm0 = vector, xmm1 = value) - valueOp = gtNewSimdHWIntrinsicNode(TYP_SIMD16, valueOp, - NI_Base_Vector128_CreateScalarUnsafe, TYP_FLOAT, 16); - retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, valueOp, NI_SSE_MoveScalar, - TYP_FLOAT, 16); - } - else if (imm8 == 1) - { - // vector.WithElement(1, value) - // => - // shufps xmm1, xmm0, 0 (xmm0 = vector, xmm1 = value) - // shufps xmm1, xmm0, 226 - GenTree* tmpOp = - gtNewSimdHWIntrinsicNode(TYP_SIMD16, valueOp, NI_Base_Vector128_CreateScalarUnsafe, - TYP_FLOAT, 16); - GenTree* dupVectorOp = nullptr; - vectorOp = impCloneExpr(vectorOp, &dupVectorOp, NO_CLASS_HANDLE, (unsigned)CHECK_SPILL_ALL, - nullptr DEBUGARG("Clone Vector for Vector128<float>.WithElement")); - tmpOp = gtNewSimdHWIntrinsicNode(TYP_SIMD16, tmpOp, vectorOp, gtNewIconNode(0), - NI_SSE_Shuffle, TYP_FLOAT, 16); - retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, tmpOp, dupVectorOp, gtNewIconNode(226), - NI_SSE_Shuffle, TYP_FLOAT, 16); - } - else - { - ssize_t controlBits1 = 0; - ssize_t controlBits2 = 0; - if (imm8 == 2) - { - controlBits1 = 48; - controlBits2 = 132; - } - else - { - controlBits1 = 32; - controlBits2 = 36; - } - // vector.WithElement(2, value) - // => - // shufps xmm1, xmm0, 48 (xmm0 = vector, xmm1 = value) - // shufps xmm0, xmm1, 132 - // - // vector.WithElement(3, value) - // => - // shufps xmm1, xmm0, 32 (xmm0 = vector, xmm1 = value) - // shufps xmm0, xmm1, 36 - GenTree* tmpOp = - gtNewSimdHWIntrinsicNode(TYP_SIMD16, valueOp, NI_Base_Vector128_CreateScalarUnsafe, - TYP_FLOAT, 16); - GenTree* dupVectorOp = nullptr; - vectorOp = impCloneExpr(vectorOp, &dupVectorOp, NO_CLASS_HANDLE, (unsigned)CHECK_SPILL_ALL, - nullptr DEBUGARG("Clone Vector for Vector128<float>.WithElement")); - valueOp = gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, tmpOp, gtNewIconNode(controlBits1), - NI_SSE_Shuffle, TYP_FLOAT, 16); - retNode = - gtNewSimdHWIntrinsicNode(TYP_SIMD16, valueOp, dupVectorOp, gtNewIconNode(controlBits2), - NI_SSE_Shuffle, TYP_FLOAT, 16); - } - break; - } - else - { - valueOp = gtNewSimdHWIntrinsicNode(TYP_SIMD16, valueOp, NI_Base_Vector128_CreateScalarUnsafe, - TYP_FLOAT, 16); - immNode->AsIntCon()->SetIconValue(imm8 * 16); - __fallthrough; - } - } - - case TYP_BYTE: - case TYP_UBYTE: - case TYP_INT: - case TYP_UINT: - retNode = - gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, valueOp, immNode, NI_SSE41_Insert, baseType, 16); - break; - - case TYP_SHORT: - case TYP_USHORT: - retNode = - gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, valueOp, immNode, NI_SSE2_Insert, baseType, 16); - break; - - case TYP_DOUBLE: - { - // vector.WithElement(0, value) - // => - // movsd xmm0, xmm1 (xmm0 = vector, xmm1 = value) - // - // vector.WithElement(1, value) - // => - // unpcklpd xmm0, xmm1 (xmm0 = vector, xmm1 = value) - valueOp = gtNewSimdHWIntrinsicNode(TYP_SIMD16, valueOp, NI_Base_Vector128_CreateScalarUnsafe, - TYP_DOUBLE, 16); - NamedIntrinsic in = (imm8 == 0) ? NI_SSE2_MoveScalar : NI_SSE2_UnpackLow; - retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, valueOp, in, TYP_DOUBLE, 16); - break; - } - - default: - unreached(); - break; - } - - if (simdSize == 32) - { - assert(clonedVectorOp); - int upperOrLower = (cachedImm8 >= count / 2) ? 1 : 0; - retNode = gtNewSimdHWIntrinsicNode(retType, clonedVectorOp, retNode, gtNewIconNode(upperOrLower), - NI_AVX_InsertVector128, baseType, simdSize); - } - - break; - } - - case NI_Base_Vector256_GetElement: - { - if (!compSupports(InstructionSet_AVX)) - { - // Using software fallback if JIT/hardware don't support AVX instructions and YMM registers - return nullptr; - } - __fallthrough; - } - case NI_Base_Vector128_GetElement: - { - assert(sig->numArgs == 1); - GenTree* indexOp = impStackTop().val; - if (!compSupports(InstructionSet_SSE2) || !varTypeIsArithmetic(baseType) || !indexOp->OperIsConst()) - { - // Using software fallback if - // 1. JIT/hardware don't support SSE2 instructions - // 2. baseType is not a numeric type (throw execptions) - // 3. index is not a constant - return nullptr; - } - - switch (baseType) - { - // Using software fallback if baseType is not supported by hardware - case TYP_BYTE: - case TYP_UBYTE: - case TYP_INT: - case TYP_UINT: - if (!compSupports(InstructionSet_SSE41)) - { - return nullptr; - } - break; - case TYP_LONG: - case TYP_ULONG: - if (!compSupports(InstructionSet_SSE41_X64)) - { - return nullptr; - } - break; - case TYP_DOUBLE: - case TYP_FLOAT: - case TYP_SHORT: - case TYP_USHORT: - // short/ushort/float/double is supported by SSE2 - break; - default: - break; - } - - ssize_t imm8 = indexOp->AsIntCon()->IconValue(); - ssize_t count = simdSize / genTypeSize(baseType); - - if (imm8 >= count || imm8 < 0) - { - // Using software fallback if index is out of range (throw exeception) - return nullptr; - } - - impPopStack(); - GenTree* vectorOp = impSIMDPopStack(getSIMDTypeForSize(simdSize), true, clsHnd); - NamedIntrinsic resIntrinsic = NI_Illegal; - - if (simdSize == 32) - { - assert(compSupports(InstructionSet_AVX)); - if (imm8 >= count / 2) - { - imm8 -= count / 2; - vectorOp = gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, gtNewIconNode(1), NI_AVX_ExtractVector128, - baseType, simdSize); - } - else - { - vectorOp = - gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, NI_Base_Vector256_GetLower, baseType, simdSize); - } - } - - if (imm8 == 0 && (genTypeSize(baseType) >= 4)) - { - switch (baseType) - { - case TYP_LONG: - resIntrinsic = NI_SSE2_X64_ConvertToInt64; - break; - case TYP_ULONG: - resIntrinsic = NI_SSE2_X64_ConvertToUInt64; - break; - case TYP_INT: - resIntrinsic = NI_SSE2_ConvertToInt32; - break; - case TYP_UINT: - resIntrinsic = NI_SSE2_ConvertToUInt32; - break; - case TYP_FLOAT: - case TYP_DOUBLE: - resIntrinsic = NI_Base_Vector128_ToScalar; - break; - default: - unreached(); - } - return gtNewSimdHWIntrinsicNode(retType, vectorOp, resIntrinsic, baseType, 16); - } - - GenTree* immNode = gtNewIconNode(imm8); - - switch (baseType) - { - case TYP_LONG: - case TYP_ULONG: - retNode = gtNewSimdHWIntrinsicNode(retType, vectorOp, immNode, NI_SSE41_X64_Extract, baseType, 16); - break; - - case TYP_FLOAT: - { - if (!compSupports(InstructionSet_SSE41)) - { - assert(imm8 >= 1); - assert(imm8 <= 3); - // Emulate Vector128<float>.GetElement(i) by SSE instructions - // vector.GetElement(i) - // => - // shufps xmm0, xmm0, control - // (xmm0 = vector, control = i + 228) - immNode->AsIntCon()->SetIconValue(228 + imm8); - GenTree* clonedVectorOp = nullptr; - vectorOp = impCloneExpr(vectorOp, &clonedVectorOp, NO_CLASS_HANDLE, (unsigned)CHECK_SPILL_ALL, - nullptr DEBUGARG("Clone Vector for Vector128<float>.GetElement")); - vectorOp = gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, clonedVectorOp, immNode, - NI_SSE_Shuffle, TYP_FLOAT, 16); - return gtNewSimdHWIntrinsicNode(retType, vectorOp, NI_Base_Vector128_ToScalar, TYP_FLOAT, 16); - } - __fallthrough; - } - case TYP_UBYTE: - case TYP_INT: - case TYP_UINT: - retNode = gtNewSimdHWIntrinsicNode(retType, vectorOp, immNode, NI_SSE41_Extract, baseType, 16); - break; - - case TYP_BYTE: - // We do not have SSE41/SSE2 Extract APIs on signed small int, so need a CAST on the result - retNode = gtNewSimdHWIntrinsicNode(TYP_UBYTE, vectorOp, immNode, NI_SSE41_Extract, TYP_UBYTE, 16); - retNode = gtNewCastNode(TYP_INT, retNode, true, TYP_BYTE); - break; - - case TYP_SHORT: - case TYP_USHORT: - // We do not have SSE41/SSE2 Extract APIs on signed small int, so need a CAST on the result - retNode = gtNewSimdHWIntrinsicNode(TYP_USHORT, vectorOp, immNode, NI_SSE2_Extract, TYP_USHORT, 16); - if (baseType == TYP_SHORT) - { - retNode = gtNewCastNode(TYP_INT, retNode, true, TYP_SHORT); - } - break; - - case TYP_DOUBLE: - assert(imm8 == 1); - // vector.GetElement(1) - // => - // pshufd xmm1, xmm0, 0xEE (xmm0 = vector) - vectorOp = gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, gtNewIconNode(0xEE), NI_SSE2_Shuffle, - TYP_INT, 16); - retNode = - gtNewSimdHWIntrinsicNode(TYP_DOUBLE, vectorOp, NI_Base_Vector128_ToScalar, TYP_DOUBLE, 16); - break; - - default: - unreached(); - } - - break; - } - -#endif // _TARGET_XARCH_ - - default: - { - unreached(); - break; - } - } - - return retNode; -} -#endif // FEATURE_HW_INTRINSICS - GenTree* Compiler::impMathIntrinsic(CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig, var_types callType, @@ -4979,7 +4284,7 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method) { className += 2; - if (strcmp(className, "`1") == 0) + if (className[0] == '\0') { if (strncmp(methodName, "As", 2) == 0) { @@ -5026,18 +4331,8 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method) { className += 3; -#if defined(_TARGET_XARCH_) if (className[0] == '\0') { - if (strcmp(methodName, "CreateScalarUnsafe") == 0) - { - result = NI_Base_Vector128_CreateScalarUnsafe; - } - } - else -#endif // _TARGET_XARCH_ - if (strcmp(className, "`1") == 0) - { if (strncmp(methodName, "As", 2) == 0) { methodName += 2; @@ -5088,17 +4383,13 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method) } } #if defined(_TARGET_XARCH_) - else if (strcmp(methodName, "GetElement") == 0) - { - result = NI_Base_Vector128_GetElement; - } - else if (strcmp(methodName, "WithElement") == 0) + else if (strcmp(methodName, "CreateScalarUnsafe") == 0) { - result = NI_Base_Vector128_WithElement; + result = NI_Base_Vector128_CreateScalarUnsafe; } - else if (strcmp(methodName, "get_Zero") == 0) + else if (strcmp(methodName, "GetElement") == 0) { - result = NI_Base_Vector128_Zero; + result = NI_Base_Vector128_GetElement; } else if (strncmp(methodName, "To", 2) == 0) { @@ -5122,8 +4413,21 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method) } } } + else if (strcmp(methodName, "WithElement") == 0) + { + result = NI_Base_Vector128_WithElement; + } #endif // _TARGET_XARCH_ } +#if defined(_TARGET_XARCH_) + else if (strcmp(className, "`1") == 0) + { + if (strcmp(methodName, "get_Zero") == 0) + { + result = NI_Base_Vector128_Zero; + } + } +#endif // _TARGET_XARCH_ } #if defined(_TARGET_XARCH_) else if (strncmp(className, "256", 3) == 0) @@ -5132,13 +4436,6 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method) if (className[0] == '\0') { - if (strcmp(methodName, "CreateScalarUnsafe") == 0) - { - result = NI_Base_Vector256_CreateScalarUnsafe; - } - } - else if (strcmp(className, "`1") == 0) - { if (strncmp(methodName, "As", 2) == 0) { methodName += 2; @@ -5188,25 +4485,32 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method) result = NI_Base_Vector256_AsUInt64; } } - else if (strcmp(methodName, "get_Zero") == 0) + else if (strcmp(methodName, "CreateScalarUnsafe") == 0) { - result = NI_Base_Vector256_Zero; + result = NI_Base_Vector256_CreateScalarUnsafe; + } + else if (strcmp(methodName, "GetElement") == 0) + { + result = NI_Base_Vector256_GetElement; } else if (strcmp(methodName, "GetLower") == 0) { result = NI_Base_Vector256_GetLower; } - else if (strcmp(methodName, "GetElement") == 0) + else if (strcmp(methodName, "ToScalar") == 0) { - result = NI_Base_Vector256_GetElement; + result = NI_Base_Vector256_ToScalar; } else if (strcmp(methodName, "WithElement") == 0) { result = NI_Base_Vector256_WithElement; } - else if (strcmp(methodName, "ToScalar") == 0) + } + else if (strcmp(className, "`1") == 0) + { + if (strcmp(methodName, "get_Zero") == 0) { - result = NI_Base_Vector256_ToScalar; + result = NI_Base_Vector256_Zero; } } } |