summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTanner Gooding <tagoo@outlook.com>2018-12-11 13:11:00 -0800
committerGitHub <noreply@github.com>2018-12-11 13:11:00 -0800
commitaaa00748a9029dc23a3b5455e81a3133d91afe56 (patch)
treeb914bcdcc31e3a25e25737e7ab746e61d9126365
parentf4060cacc908d76eb2051b86a8e84a961228f949 (diff)
downloadcoreclr-aaa00748a9029dc23a3b5455e81a3133d91afe56.tar.gz
coreclr-aaa00748a9029dc23a3b5455e81a3133d91afe56.tar.bz2
coreclr-aaa00748a9029dc23a3b5455e81a3133d91afe56.zip
Moving the remaining Vector128/256 helper intrinsics to be implemented using other intrinsics (#21451)
* Updating Vector256.Create(V128, V128) to be implemented using other intrinsics * Updating Vector128.Create(T, ...) and Vector256.Create(T, ...) to be implemented using other intrinsics * Renaming CreateSoftware to SoftwareFallback and adding clarifying comments to the helper method code * Fixing Vector128.Create(int, int, int, int) to properly consume the elements for the Sse2 codepath
-rw-r--r--src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector128.cs500
-rw-r--r--src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector256.cs688
-rw-r--r--src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector256_1.cs9
3 files changed, 876 insertions, 321 deletions
diff --git a/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector128.cs b/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector128.cs
index b877bbe2c1..a57f999112 100644
--- a/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector128.cs
+++ b/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector128.cs
@@ -43,6 +43,9 @@ namespace System.Runtime.Intrinsics
if (Sse2.IsSupported)
{
+ // We first unpack as bytes to duplicate value into the lower 2 bytes, then we treat it as a ushort and unpack again to duplicate those
+ // bits into the lower 2 words, we can finally treat it as a uint and shuffle the lower dword to duplicate value across the entire result
+
Vector128<byte> result = CreateScalarUnsafe(value); // < v, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? >
result = Sse2.UnpackLow(result, result); // < v, v, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? >
result = Sse2.UnpackLow(result.AsUInt16(), result.AsUInt16()).AsByte(); // < v, v, v, v, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? >
@@ -89,8 +92,11 @@ namespace System.Runtime.Intrinsics
return Sse3.MoveAndDuplicate(result); // < v, v >
}
- if (Sse.IsSupported)
+ if (Sse2.IsSupported)
{
+ // Treating the value as a set of singles and emitting MoveLowToHigh is more efficient than dealing with the elements directly as double
+ // However, we still need to check if Sse2 is supported since CreateScalarUnsafe needs it to for movsd, when value is not already in register
+
Vector128<double> result = CreateScalarUnsafe(value); // < v, ? >
return Sse.MoveLowToHigh(result.AsSingle(), result.AsSingle()).AsDouble(); // < v, v >
}
@@ -123,6 +129,9 @@ namespace System.Runtime.Intrinsics
if (Sse2.IsSupported)
{
+ // We first unpack as ushort to duplicate value into the lower 2 words, then we can treat it as a uint and shuffle the lower dword to
+ // duplicate value across the entire result
+
Vector128<short> result = CreateScalarUnsafe(value); // < v, ?, ?, ?, ?, ?, ?, ? >
result = Sse2.UnpackLow(result, result); // < v, v, ?, ?, ?, ?, ?, ? >
return Sse2.Shuffle(result.AsInt32(), 0x00).AsInt16(); // < v, v, v, v, v, v, v, v >
@@ -237,6 +246,9 @@ namespace System.Runtime.Intrinsics
if (Sse2.IsSupported)
{
+ // We first unpack as bytes to duplicate value into the lower 2 bytes, then we treat it as a ushort and unpack again to duplicate those
+ // bits into the lower 2 words, we can finally treat it as a uint and shuffle the lower dword to duplicate value across the entire result
+
Vector128<sbyte> result = CreateScalarUnsafe(value); // < v, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? >
result = Sse2.UnpackLow(result, result); // < v, v, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? >
result = Sse2.UnpackLow(result.AsInt16(), result.AsInt16()).AsSByte(); // < v, v, v, v, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? >
@@ -326,6 +338,9 @@ namespace System.Runtime.Intrinsics
if (Sse2.IsSupported)
{
+ // We first unpack as ushort to duplicate value into the lower 2 words, then we can treat it as a uint and shuffle the lower dword to
+ // duplicate value across the entire result
+
Vector128<ushort> result = CreateScalarUnsafe(value); // < v, ?, ?, ?, ?, ?, ?, ? >
result = Sse2.UnpackLow(result, result); // < v, v, ?, ?, ?, ?, ?, ? >
return Sse2.Shuffle(result.AsUInt32(), 0x00).AsUInt16(); // < v, v, v, v, v, v, v, v >
@@ -439,44 +454,117 @@ namespace System.Runtime.Intrinsics
/// <param name="e14">The value that element 14 will be initialized to.</param>
/// <param name="e15">The value that element 15 will be initialized to.</param>
/// <returns>A new <see cref="Vector128{Byte}" /> with each element initialized to corresponding specified value.</returns>
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static unsafe Vector128<byte> Create(byte e0, byte e1, byte e2, byte e3, byte e4, byte e5, byte e6, byte e7, byte e8, byte e9, byte e10, byte e11, byte e12, byte e13, byte e14, byte e15)
{
- var pResult = stackalloc byte[16]
- {
- e0,
- e1,
- e2,
- e3,
- e4,
- e5,
- e6,
- e7,
- e8,
- e9,
- e10,
- e11,
- e12,
- e13,
- e14,
- e15,
- };
+ if (Sse41.IsSupported)
+ {
+ Vector128<byte> result = CreateScalarUnsafe(e0); // < 0, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ?? >
+ result = Sse41.Insert(result, e1, 1); // < 0, 1, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ?? >
+ result = Sse41.Insert(result, e2, 2); // < 0, 1, 2, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ?? >
+ result = Sse41.Insert(result, e3, 3); // < 0, 1, 2, 3, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ?? >
+ result = Sse41.Insert(result, e4, 4); // < 0, 1, 2, 3, 4, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ?? >
+ result = Sse41.Insert(result, e5, 5); // < 0, 1, 2, 3, 4, 5, ??, ??, ??, ??, ??, ??, ??, ??, ??, ?? >
+ result = Sse41.Insert(result, e6, 6); // < 0, 1, 2, 3, 4, 5, 6, ??, ??, ??, ??, ??, ??, ??, ??, ?? >
+ result = Sse41.Insert(result, e7, 7); // < 0, 1, 2, 3, 4, 5, 6, 7, ??, ??, ??, ??, ??, ??, ??, ?? >
+ result = Sse41.Insert(result, e8, 8); // < 0, 1, 2, 3, 4, 5, 6, 7, 8, ??, ??, ??, ??, ??, ??, ?? >
+ result = Sse41.Insert(result, e9, 9); // < 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, ??, ??, ??, ??, ??, ?? >
+ result = Sse41.Insert(result, e10, 10); // < 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, ??, ??, ??, ??, ?? >
+ result = Sse41.Insert(result, e11, 11); // < 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, ??, ??, ??, ?? >
+ result = Sse41.Insert(result, e12, 12); // < 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, ??, ??, ?? >
+ result = Sse41.Insert(result, e13, 13); // < 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, ??, ?? >
+ result = Sse41.Insert(result, e14, 14); // < 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, ?? >
+ return Sse41.Insert(result, e15, 15); // < 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 >
+ }
- return Unsafe.AsRef<Vector128<byte>>(pResult);
+ if (Sse2.IsSupported)
+ {
+ // We deal with the elements in order, unpacking the ordered pairs of bytes into vectors. We then treat those vectors as ushort and
+ // unpack them again, then again treating those results as uint, and a final time treating them as ulong. This efficiently gets all
+ // bytes ordered into the result.
+
+ Vector128<ushort> lo16, hi16;
+ Vector128<uint> lo32, hi32;
+ Vector128<ulong> lo64, hi64;
+
+ lo16 = Sse2.UnpackLow(CreateScalarUnsafe(e0), CreateScalarUnsafe(e1)).AsUInt16(); // < 0, 1, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ?? >
+ hi16 = Sse2.UnpackLow(CreateScalarUnsafe(e2), CreateScalarUnsafe(e3)).AsUInt16(); // < 2, 3, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ?? >
+ lo32 = Sse2.UnpackLow(lo16, hi16).AsUInt32(); // < 0, 1, 2, 3, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ?? >
+
+ lo16 = Sse2.UnpackLow(CreateScalarUnsafe(e4), CreateScalarUnsafe(e5)).AsUInt16(); // < 4, 5, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ?? >
+ hi16 = Sse2.UnpackLow(CreateScalarUnsafe(e6), CreateScalarUnsafe(e7)).AsUInt16(); // < 6, 7, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ?? >
+ hi32 = Sse2.UnpackLow(lo16, hi16).AsUInt32(); // < 4, 5, 6, 7, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ?? >
+
+ lo64 = Sse2.UnpackLow(lo32, hi32).AsUInt64(); // < 0, 1, 2, 3, 4, 5, 6, 7, ??, ??, ??, ??, ??, ??, ??, ?? >
+
+ lo16 = Sse2.UnpackLow(CreateScalarUnsafe(e8), CreateScalarUnsafe(e9)).AsUInt16(); // < 8, 9, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ?? >
+ hi16 = Sse2.UnpackLow(CreateScalarUnsafe(e10), CreateScalarUnsafe(e11)).AsUInt16(); // < 10, 11, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ?? >
+ lo32 = Sse2.UnpackLow(lo16, hi16).AsUInt32(); // < 8, 9, 10, 11, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ?? >
+
+ lo16 = Sse2.UnpackLow(CreateScalarUnsafe(e12), CreateScalarUnsafe(e13)).AsUInt16(); // < 12, 13, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ?? >
+ hi16 = Sse2.UnpackLow(CreateScalarUnsafe(e14), CreateScalarUnsafe(e15)).AsUInt16(); // < 14, 15, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ?? >
+ hi32 = Sse2.UnpackLow(lo16, hi16).AsUInt32(); // < 12, 13, 14, 15, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ?? >
+
+ hi64 = Sse2.UnpackLow(lo32, hi32).AsUInt64(); // < 8, 9, 10, 11, 12, 13, 14, 15, ??, ??, ??, ??, ??, ??, ??, ?? >
+
+ return Sse2.UnpackLow(lo64, hi64).AsByte(); // < 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 >
+ }
+
+ return SoftwareFallback(e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15);
+
+ Vector128<byte> SoftwareFallback(byte i0, byte i1, byte i2, byte i3, byte i4, byte i5, byte i6, byte i7, byte i8, byte i9, byte i10, byte i11, byte i12, byte i13, byte i14, byte i15)
+ {
+ var pResult = stackalloc byte[16]
+ {
+ i0,
+ i1,
+ i2,
+ i3,
+ i4,
+ i5,
+ i6,
+ i7,
+ i8,
+ i9,
+ i10,
+ i11,
+ i12,
+ i13,
+ i14,
+ i15,
+ };
+
+ return Unsafe.AsRef<Vector128<byte>>(pResult);
+ }
}
/// <summary>Creates a new <see cref="Vector128{Double}" /> instance with each element initialized to the corresponding specified value.</summary>
/// <param name="e0">The value that element 0 will be initialized to.</param>
/// <param name="e1">The value that element 1 will be initialized to.</param>
/// <returns>A new <see cref="Vector128{Double}" /> with each element initialized to corresponding specified value.</returns>
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static unsafe Vector128<double> Create(double e0, double e1)
{
- var pResult = stackalloc double[2]
+ if (Sse2.IsSupported)
{
- e0,
- e1,
- };
+ // Treating the value as a set of singles and emitting MoveLowToHigh is more efficient than dealing with the elements directly as double
+ // However, we still need to check if Sse2 is supported since CreateScalarUnsafe needs it to for movsd, when value is not already in register
- return Unsafe.AsRef<Vector128<double>>(pResult);
+ return Sse.MoveLowToHigh(CreateScalarUnsafe(e0).AsSingle(), CreateScalarUnsafe(e1).AsSingle()).AsDouble();
+ }
+
+ return SoftwareFallback(e0, e1);
+
+ Vector128<double> SoftwareFallback(double i0, double i1)
+ {
+ var pResult = stackalloc double[2]
+ {
+ i0,
+ i1,
+ };
+
+ return Unsafe.AsRef<Vector128<double>>(pResult);
+ }
}
/// <summary>Creates a new <see cref="Vector128{Int16}" /> instance with each element initialized to the corresponding specified value.</summary>
@@ -489,21 +577,39 @@ namespace System.Runtime.Intrinsics
/// <param name="e6">The value that element 6 will be initialized to.</param>
/// <param name="e7">The value that element 7 will be initialized to.</param>
/// <returns>A new <see cref="Vector128{Int16}" /> with each element initialized to corresponding specified value.</returns>
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static unsafe Vector128<short> Create(short e0, short e1, short e2, short e3, short e4, short e5, short e6, short e7)
{
- var pResult = stackalloc short[8]
- {
- e0,
- e1,
- e2,
- e3,
- e4,
- e5,
- e6,
- e7,
- };
+ if (Sse2.IsSupported)
+ {
+ Vector128<short> result = CreateScalarUnsafe(e0); // < 0, ?, ?, ?, ?, ?, ?, ? >
+ result = Sse2.Insert(result, e1, 1); // < 0, 1, ?, ?, ?, ?, ?, ? >
+ result = Sse2.Insert(result, e2, 2); // < 0, 1, 2, ?, ?, ?, ?, ? >
+ result = Sse2.Insert(result, e3, 3); // < 0, 1, 2, 3, ?, ?, ?, ? >
+ result = Sse2.Insert(result, e4, 4); // < 0, 1, 2, 3, 4, ?, ?, ? >
+ result = Sse2.Insert(result, e5, 5); // < 0, 1, 2, 3, 4, 5, ?, ? >
+ result = Sse2.Insert(result, e6, 6); // < 0, 1, 2, 3, 4, 5, 6, ? >
+ return Sse2.Insert(result, e7, 7); // < 0, 1, 2, 3, 4, 5, 6, 7 >
+ }
- return Unsafe.AsRef<Vector128<short>>(pResult);
+ return SoftwareFallback(e0, e1, e2, e3, e4, e5, e6, e7);
+
+ Vector128<short> SoftwareFallback(short i0, short i1, short i2, short i3, short i4, short i5, short i6, short i7)
+ {
+ var pResult = stackalloc short[8]
+ {
+ i0,
+ i1,
+ i2,
+ i3,
+ i4,
+ i5,
+ i6,
+ i7,
+ };
+
+ return Unsafe.AsRef<Vector128<short>>(pResult);
+ }
}
/// <summary>Creates a new <see cref="Vector128{Int32}" /> instance with each element initialized to the corresponding specified value.</summary>
@@ -512,32 +618,74 @@ namespace System.Runtime.Intrinsics
/// <param name="e2">The value that element 2 will be initialized to.</param>
/// <param name="e3">The value that element 3 will be initialized to.</param>
/// <returns>A new <see cref="Vector128{Int32}" /> with each element initialized to corresponding specified value.</returns>
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static unsafe Vector128<int> Create(int e0, int e1, int e2, int e3)
{
- var pResult = stackalloc int[4]
+ if (Sse41.IsSupported)
{
- e0,
- e1,
- e2,
- e3,
- };
+ Vector128<int> result = CreateScalarUnsafe(e0); // < 0, ?, ?, ? >
+ result = Sse41.Insert(result, e1, 1); // < 0, 1, ?, ? >
+ result = Sse41.Insert(result, e2, 2); // < 0, 1, 2, ? >
+ return Sse41.Insert(result, e3, 3); // < 0, 1, 2, 3 >
+ }
- return Unsafe.AsRef<Vector128<int>>(pResult);
+ if (Sse2.IsSupported)
+ {
+ // We deal with the elements in order, unpacking the ordered pairs of int into vectors. We then treat those vectors as ulong and
+ // unpack them again. This efficiently gets all ints ordered into the result.
+
+ Vector128<long> lo64, hi64;
+ lo64 = Sse2.UnpackLow(CreateScalarUnsafe(e0), CreateScalarUnsafe(e1)).AsInt64(); // < 0, 1, ?, ? >
+ hi64 = Sse2.UnpackLow(CreateScalarUnsafe(e2), CreateScalarUnsafe(e3)).AsInt64(); // < 2, 3, ?, ? >
+ return Sse2.UnpackLow(lo64, hi64).AsInt32(); // < 0, 1, 2, 3 >
+ }
+
+ return SoftwareFallback(e0, e1, e2, e3);
+
+ Vector128<int> SoftwareFallback(int i0, int i1, int i2, int i3)
+ {
+ var pResult = stackalloc int[4]
+ {
+ i0,
+ i1,
+ i2,
+ i3,
+ };
+
+ return Unsafe.AsRef<Vector128<int>>(pResult);
+ }
}
/// <summary>Creates a new <see cref="Vector128{Int64}" /> instance with each element initialized to the corresponding specified value.</summary>
/// <param name="e0">The value that element 0 will be initialized to.</param>
/// <param name="e1">The value that element 1 will be initialized to.</param>
/// <returns>A new <see cref="Vector128{Int64}" /> with each element initialized to corresponding specified value.</returns>
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static unsafe Vector128<long> Create(long e0, long e1)
{
- var pResult = stackalloc long[2]
+ if (Sse41.X64.IsSupported)
{
- e0,
- e1,
- };
+ Vector128<long> result = CreateScalarUnsafe(e0); // < 0, ? >
+ return Sse41.X64.Insert(result, e1, 1); // < 0, 1 >
+ }
- return Unsafe.AsRef<Vector128<long>>(pResult);
+ if (Sse2.X64.IsSupported)
+ {
+ return Sse2.UnpackLow(CreateScalarUnsafe(e0), CreateScalarUnsafe(e1)); // < 0, 1 >
+ }
+
+ return SoftwareFallback(e0, e1);
+
+ Vector128<long> SoftwareFallback(long i0, long i1)
+ {
+ var pResult = stackalloc long[2]
+ {
+ i0,
+ i1,
+ };
+
+ return Unsafe.AsRef<Vector128<long>>(pResult);
+ }
}
/// <summary>Creates a new <see cref="Vector128{SByte}" /> instance with each element initialized to the corresponding specified value.</summary>
@@ -558,30 +706,89 @@ namespace System.Runtime.Intrinsics
/// <param name="e14">The value that element 14 will be initialized to.</param>
/// <param name="e15">The value that element 15 will be initialized to.</param>
/// <returns>A new <see cref="Vector128{SByte}" /> with each element initialized to corresponding specified value.</returns>
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
[CLSCompliant(false)]
public static unsafe Vector128<sbyte> Create(sbyte e0, sbyte e1, sbyte e2, sbyte e3, sbyte e4, sbyte e5, sbyte e6, sbyte e7, sbyte e8, sbyte e9, sbyte e10, sbyte e11, sbyte e12, sbyte e13, sbyte e14, sbyte e15)
{
- var pResult = stackalloc sbyte[16]
- {
- e0,
- e1,
- e2,
- e3,
- e4,
- e5,
- e6,
- e7,
- e8,
- e9,
- e10,
- e11,
- e12,
- e13,
- e14,
- e15,
- };
+ if (Sse41.IsSupported)
+ {
+ Vector128<sbyte> result = CreateScalarUnsafe(e0); // < 0, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ?? >
+ result = Sse41.Insert(result, e1, 1); // < 0, 1, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ?? >
+ result = Sse41.Insert(result, e2, 2); // < 0, 1, 2, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ?? >
+ result = Sse41.Insert(result, e3, 3); // < 0, 1, 2, 3, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ?? >
+ result = Sse41.Insert(result, e4, 4); // < 0, 1, 2, 3, 4, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ?? >
+ result = Sse41.Insert(result, e5, 5); // < 0, 1, 2, 3, 4, 5, ??, ??, ??, ??, ??, ??, ??, ??, ??, ?? >
+ result = Sse41.Insert(result, e6, 6); // < 0, 1, 2, 3, 4, 5, 6, ??, ??, ??, ??, ??, ??, ??, ??, ?? >
+ result = Sse41.Insert(result, e7, 7); // < 0, 1, 2, 3, 4, 5, 6, 7, ??, ??, ??, ??, ??, ??, ??, ?? >
+ result = Sse41.Insert(result, e8, 8); // < 0, 1, 2, 3, 4, 5, 6, 7, 8, ??, ??, ??, ??, ??, ??, ?? >
+ result = Sse41.Insert(result, e9, 9); // < 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, ??, ??, ??, ??, ??, ?? >
+ result = Sse41.Insert(result, e10, 10); // < 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, ??, ??, ??, ??, ?? >
+ result = Sse41.Insert(result, e11, 11); // < 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, ??, ??, ??, ?? >
+ result = Sse41.Insert(result, e12, 12); // < 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, ??, ??, ?? >
+ result = Sse41.Insert(result, e13, 13); // < 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, ??, ?? >
+ result = Sse41.Insert(result, e14, 14); // < 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, ?? >
+ return Sse41.Insert(result, e15, 15); // < 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 >
+ }
- return Unsafe.AsRef<Vector128<sbyte>>(pResult);
+ if (Sse2.IsSupported)
+ {
+ // We deal with the elements in order, unpacking the ordered pairs of bytes into vectors. We then treat those vectors as ushort and
+ // unpack them again, then again treating those results as uint, and a final time treating them as ulong. This efficiently gets all
+ // bytes ordered into the result.
+
+ Vector128<short> lo16, hi16;
+ Vector128<int> lo32, hi32;
+ Vector128<long> lo64, hi64;
+
+ lo16 = Sse2.UnpackLow(CreateScalarUnsafe(e0), CreateScalarUnsafe(e1)).AsInt16(); // < 0, 1, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ?? >
+ hi16 = Sse2.UnpackLow(CreateScalarUnsafe(e2), CreateScalarUnsafe(e3)).AsInt16(); // < 2, 3, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ?? >
+ lo32 = Sse2.UnpackLow(lo16, hi16).AsInt32(); // < 0, 1, 2, 3, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ?? >
+
+ lo16 = Sse2.UnpackLow(CreateScalarUnsafe(e4), CreateScalarUnsafe(e5)).AsInt16(); // < 4, 5, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ?? >
+ hi16 = Sse2.UnpackLow(CreateScalarUnsafe(e6), CreateScalarUnsafe(e7)).AsInt16(); // < 6, 7, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ?? >
+ hi32 = Sse2.UnpackLow(lo16, hi16).AsInt32(); // < 4, 5, 6, 7, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ?? >
+
+ lo64 = Sse2.UnpackLow(lo32, hi32).AsInt64(); // < 0, 1, 2, 3, 4, 5, 6, 7, ??, ??, ??, ??, ??, ??, ??, ?? >
+
+ lo16 = Sse2.UnpackLow(CreateScalarUnsafe(e8), CreateScalarUnsafe(e9)).AsInt16(); // < 8, 9, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ?? >
+ hi16 = Sse2.UnpackLow(CreateScalarUnsafe(e10), CreateScalarUnsafe(e11)).AsInt16(); // < 10, 11, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ?? >
+ lo32 = Sse2.UnpackLow(lo16, hi16).AsInt32(); // < 8, 9, 10, 11, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ?? >
+
+ lo16 = Sse2.UnpackLow(CreateScalarUnsafe(e12), CreateScalarUnsafe(e13)).AsInt16(); // < 12, 13, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ?? >
+ hi16 = Sse2.UnpackLow(CreateScalarUnsafe(e14), CreateScalarUnsafe(e15)).AsInt16(); // < 14, 15, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ?? >
+ hi32 = Sse2.UnpackLow(lo16, hi16).AsInt32(); // < 12, 13, 14, 15, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ??, ?? >
+
+ hi64 = Sse2.UnpackLow(lo32, hi32).AsInt64(); // < 8, 9, 10, 11, 12, 13, 14, 15, ??, ??, ??, ??, ??, ??, ??, ?? >
+
+ return Sse2.UnpackLow(lo64, hi64).AsSByte(); // < 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 >
+ }
+
+ return SoftwareFallback(e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15);
+
+ Vector128<sbyte> SoftwareFallback(sbyte i0, sbyte i1, sbyte i2, sbyte i3, sbyte i4, sbyte i5, sbyte i6, sbyte i7, sbyte i8, sbyte i9, sbyte i10, sbyte i11, sbyte i12, sbyte i13, sbyte i14, sbyte i15)
+ {
+ var pResult = stackalloc sbyte[16]
+ {
+ i0,
+ i1,
+ i2,
+ i3,
+ i4,
+ i5,
+ i6,
+ i7,
+ i8,
+ i9,
+ i10,
+ i11,
+ i12,
+ i13,
+ i14,
+ i15,
+ };
+
+ return Unsafe.AsRef<Vector128<sbyte>>(pResult);
+ }
}
/// <summary>Creates a new <see cref="Vector128{Single}" /> instance with each element initialized to the corresponding specified value.</summary>
@@ -590,17 +797,39 @@ namespace System.Runtime.Intrinsics
/// <param name="e2">The value that element 2 will be initialized to.</param>
/// <param name="e3">The value that element 3 will be initialized to.</param>
/// <returns>A new <see cref="Vector128{Single}" /> with each element initialized to corresponding specified value.</returns>
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static unsafe Vector128<float> Create(float e0, float e1, float e2, float e3)
{
- var pResult = stackalloc float[4]
+ if (Sse41.IsSupported)
{
- e0,
- e1,
- e2,
- e3,
- };
+ Vector128<float> result = CreateScalarUnsafe(e0); // < 0, ?, ?, ? >
+ result = Sse41.Insert(result, CreateScalarUnsafe(e1), 0x10); // < 0, 1, ?, ? >
+ result = Sse41.Insert(result, CreateScalarUnsafe(e2), 0x20); // < 0, 1, 2, ? >
+ return Sse41.Insert(result, CreateScalarUnsafe(e3), 0x30); // < 0, 1, 2, 3 >
+ }
- return Unsafe.AsRef<Vector128<float>>(pResult);
+ if (Sse.IsSupported)
+ {
+ Vector128<float> lo64, hi64;
+ lo64 = Sse.UnpackLow(CreateScalarUnsafe(e0), CreateScalarUnsafe(e1)); // < 0, 1, ?, ? >
+ hi64 = Sse.UnpackLow(CreateScalarUnsafe(e2), CreateScalarUnsafe(e3)); // < 2, 3, ?, ? >
+ return Sse.MoveLowToHigh(lo64, hi64); // < 0, 1, 2, 3 >
+ }
+
+ return SoftwareFallback(e0, e1, e2, e3);
+
+ Vector128<float> SoftwareFallback(float i0, float i1, float i2, float i3)
+ {
+ var pResult = stackalloc float[4]
+ {
+ i0,
+ i1,
+ i2,
+ i3,
+ };
+
+ return Unsafe.AsRef<Vector128<float>>(pResult);
+ }
}
/// <summary>Creates a new <see cref="Vector128{UInt16}" /> instance with each element initialized to the corresponding specified value.</summary>
@@ -613,22 +842,40 @@ namespace System.Runtime.Intrinsics
/// <param name="e6">The value that element 6 will be initialized to.</param>
/// <param name="e7">The value that element 7 will be initialized to.</param>
/// <returns>A new <see cref="Vector128{UInt16}" /> with each element initialized to corresponding specified value.</returns>
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
[CLSCompliant(false)]
public static unsafe Vector128<ushort> Create(ushort e0, ushort e1, ushort e2, ushort e3, ushort e4, ushort e5, ushort e6, ushort e7)
{
- var pResult = stackalloc ushort[8]
- {
- e0,
- e1,
- e2,
- e3,
- e4,
- e5,
- e6,
- e7,
- };
+ if (Sse2.IsSupported)
+ {
+ Vector128<ushort> result = CreateScalarUnsafe(e0); // < 0, ?, ?, ?, ?, ?, ?, ? >
+ result = Sse2.Insert(result, e1, 1); // < 0, 1, ?, ?, ?, ?, ?, ? >
+ result = Sse2.Insert(result, e2, 2); // < 0, 1, 2, ?, ?, ?, ?, ? >
+ result = Sse2.Insert(result, e3, 3); // < 0, 1, 2, 3, ?, ?, ?, ? >
+ result = Sse2.Insert(result, e4, 4); // < 0, 1, 2, 3, 4, ?, ?, ? >
+ result = Sse2.Insert(result, e5, 5); // < 0, 1, 2, 3, 4, 5, ?, ? >
+ result = Sse2.Insert(result, e6, 6); // < 0, 1, 2, 3, 4, 5, 6, ? >
+ return Sse2.Insert(result, e7, 7); // < 0, 1, 2, 3, 4, 5, 6, 7 >
+ }
- return Unsafe.AsRef<Vector128<ushort>>(pResult);
+ return SoftwareFallback(e0, e1, e2, e3, e4, e5, e6, e7);
+
+ Vector128<ushort> SoftwareFallback(ushort i0, ushort i1, ushort i2, ushort i3, ushort i4, ushort i5, ushort i6, ushort i7)
+ {
+ var pResult = stackalloc ushort[8]
+ {
+ i0,
+ i1,
+ i2,
+ i3,
+ i4,
+ i5,
+ i6,
+ i7,
+ };
+
+ return Unsafe.AsRef<Vector128<ushort>>(pResult);
+ }
}
/// <summary>Creates a new <see cref="Vector128{UInt32}" /> instance with each element initialized to the corresponding specified value.</summary>
@@ -637,34 +884,76 @@ namespace System.Runtime.Intrinsics
/// <param name="e2">The value that element 2 will be initialized to.</param>
/// <param name="e3">The value that element 3 will be initialized to.</param>
/// <returns>A new <see cref="Vector128{UInt32}" /> with each element initialized to corresponding specified value.</returns>
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
[CLSCompliant(false)]
public static unsafe Vector128<uint> Create(uint e0, uint e1, uint e2, uint e3)
{
- var pResult = stackalloc uint[4]
+ if (Sse41.IsSupported)
{
- e0,
- e1,
- e2,
- e3,
- };
+ Vector128<uint> result = CreateScalarUnsafe(e0); // < 0, ?, ?, ? >
+ result = Sse41.Insert(result, e1, 1); // < 0, 1, ?, ? >
+ result = Sse41.Insert(result, e2, 2); // < 0, 1, 2, ? >
+ return Sse41.Insert(result, e3, 3); // < 0, 1, 2, 3 >
+ }
- return Unsafe.AsRef<Vector128<uint>>(pResult);
+ if (Sse2.IsSupported)
+ {
+ // We deal with the elements in order, unpacking the ordered pairs of int into vectors. We then treat those vectors as ulong and
+ // unpack them again. This efficiently gets all ints ordered into the result.
+
+ Vector128<ulong> lo64, hi64;
+ lo64 = Sse2.UnpackLow(CreateScalarUnsafe(e0), CreateScalarUnsafe(e1)).AsUInt64(); // < 0, 1, ?, ? >
+ hi64 = Sse2.UnpackLow(CreateScalarUnsafe(e2), CreateScalarUnsafe(e3)).AsUInt64(); // < 2, 3, ?, ? >
+ return Sse2.UnpackLow(lo64, hi64).AsUInt32(); // < 0, 1, 2, 3 >
+ }
+
+ return SoftwareFallback(e0, e1, e2, e3);
+
+ Vector128<uint> SoftwareFallback(uint i0, uint i1, uint i2, uint i3)
+ {
+ var pResult = stackalloc uint[4]
+ {
+ i0,
+ i1,
+ i2,
+ i3,
+ };
+
+ return Unsafe.AsRef<Vector128<uint>>(pResult);
+ }
}
/// <summary>Creates a new <see cref="Vector128{UInt64}" /> instance with each element initialized to the corresponding specified value.</summary>
/// <param name="e0">The value that element 0 will be initialized to.</param>
/// <param name="e1">The value that element 1 will be initialized to.</param>
/// <returns>A new <see cref="Vector128{UInt64}" /> with each element initialized to corresponding specified value.</returns>
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
[CLSCompliant(false)]
public static unsafe Vector128<ulong> Create(ulong e0, ulong e1)
{
- var pResult = stackalloc ulong[2]
+ if (Sse41.X64.IsSupported)
{
- e0,
- e1,
- };
+ Vector128<ulong> result = CreateScalarUnsafe(e0); // < 0, ? >
+ return Sse41.X64.Insert(result, e1, 1); // < 0, 1 >
+ }
- return Unsafe.AsRef<Vector128<ulong>>(pResult);
+ if (Sse2.X64.IsSupported)
+ {
+ return Sse2.UnpackLow(CreateScalarUnsafe(e0), CreateScalarUnsafe(e1)); // < 0, 1 >
+ }
+
+ return SoftwareFallback(e0, e1);
+
+ Vector128<ulong> SoftwareFallback(ulong i0, ulong i1)
+ {
+ var pResult = stackalloc ulong[2]
+ {
+ i0,
+ i1,
+ };
+
+ return Unsafe.AsRef<Vector128<ulong>>(pResult);
+ }
}
/// <summary>Creates a new <see cref="Vector128{Byte}" /> instance from two <see cref="Vector64{Byte}" /> instances.</summary>
@@ -829,6 +1118,8 @@ namespace System.Runtime.Intrinsics
{
if (Sse2.IsSupported)
{
+ // ConvertScalarToVector128 only deals with 32/64-bit inputs and we need to ensure all upper-bits are zeroed, so we call
+ // the UInt32 overload to ensure zero extension. We can then just treat the result as byte and return.
return Sse2.ConvertScalarToVector128UInt32(value).AsByte();
}
@@ -871,6 +1162,8 @@ namespace System.Runtime.Intrinsics
{
if (Sse2.IsSupported)
{
+ // ConvertScalarToVector128 only deals with 32/64-bit inputs and we need to ensure all upper-bits are zeroed, so we cast
+ // to ushort and call the UInt32 overload to ensure zero extension. We can then just treat the result as short and return.
return Sse2.ConvertScalarToVector128UInt32((ushort)(value)).AsInt16();
}
@@ -934,7 +1227,8 @@ namespace System.Runtime.Intrinsics
{
if (Sse2.IsSupported)
{
- // Convert to byte so that we zero-extend, rather than sign-extend
+ // ConvertScalarToVector128 only deals with 32/64-bit inputs and we need to ensure all upper-bits are zeroed, so we cast
+ // to byte and call the UInt32 overload to ensure zero extension. We can then just treat the result as sbyte and return.
return Sse2.ConvertScalarToVector128UInt32((byte)(value)).AsSByte();
}
@@ -978,6 +1272,8 @@ namespace System.Runtime.Intrinsics
{
if (Sse2.IsSupported)
{
+ // ConvertScalarToVector128 only deals with 32/64-bit inputs and we need to ensure all upper-bits are zeroed, so we call
+ // the UInt32 overload to ensure zero extension. We can then just treat the result as ushort and return.
return Sse2.ConvertScalarToVector128UInt32(value).AsUInt16();
}
diff --git a/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector256.cs b/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector256.cs
index 34066a54e4..f1f70786ca 100644
--- a/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector256.cs
+++ b/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector256.cs
@@ -497,45 +497,58 @@ namespace System.Runtime.Intrinsics
/// <param name="e30">The value that element 30 will be initialized to.</param>
/// <param name="e31">The value that element 31 will be initialized to.</param>
/// <returns>A new <see cref="Vector256{Byte}" /> with each element initialized to corresponding specified value.</returns>
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static unsafe Vector256<byte> Create(byte e0, byte e1, byte e2, byte e3, byte e4, byte e5, byte e6, byte e7, byte e8, byte e9, byte e10, byte e11, byte e12, byte e13, byte e14, byte e15, byte e16, byte e17, byte e18, byte e19, byte e20, byte e21, byte e22, byte e23, byte e24, byte e25, byte e26, byte e27, byte e28, byte e29, byte e30, byte e31)
{
- var pResult = stackalloc byte[32]
- {
- e0,
- e1,
- e2,
- e3,
- e4,
- e5,
- e6,
- e7,
- e8,
- e9,
- e10,
- e11,
- e12,
- e13,
- e14,
- e15,
- e16,
- e17,
- e18,
- e19,
- e20,
- e21,
- e22,
- e23,
- e24,
- e25,
- e26,
- e27,
- e28,
- e29,
- e30,
- e31,
- };
+ if (Avx.IsSupported)
+ {
+ Vector128<byte> lo128 = Vector128.Create(e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15);
+ Vector128<byte> hi128 = Vector128.Create(e16, e17, e18, e19, e20, e21, e22, e23, e24, e25, e26, e27, e28, e29, e30, e31);
+ return Create(lo128, hi128);
+ }
- return Unsafe.AsRef<Vector256<byte>>(pResult);
+ return SoftwareFallback(e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, e16, e17, e18, e19, e20, e21, e22, e23, e24, e25, e26, e27, e28, e29, e30, e31);
+
+ Vector256<byte> SoftwareFallback(byte i0, byte i1, byte i2, byte i3, byte i4, byte i5, byte i6, byte i7, byte i8, byte i9, byte i10, byte i11, byte i12, byte i13, byte i14, byte i15, byte i16, byte i17, byte i18, byte i19, byte i20, byte i21, byte i22, byte i23, byte i24, byte i25, byte i26, byte i27, byte i28, byte i29, byte i30, byte i31)
+ {
+ var pResult = stackalloc byte[32]
+ {
+ i0,
+ i1,
+ i2,
+ i3,
+ i4,
+ i5,
+ i6,
+ i7,
+ i8,
+ i9,
+ i10,
+ i11,
+ i12,
+ i13,
+ i14,
+ i15,
+ i16,
+ i17,
+ i18,
+ i19,
+ i20,
+ i21,
+ i22,
+ i23,
+ i24,
+ i25,
+ i26,
+ i27,
+ i28,
+ i29,
+ i30,
+ i31,
+ };
+
+ return Unsafe.AsRef<Vector256<byte>>(pResult);
+ }
}
/// <summary>Creates a new <see cref="Vector256{Double}" /> instance with each element initialized to the corresponding specified value.</summary>
@@ -544,17 +557,30 @@ namespace System.Runtime.Intrinsics
/// <param name="e2">The value that element 2 will be initialized to.</param>
/// <param name="e3">The value that element 3 will be initialized to.</param>
/// <returns>A new <see cref="Vector256{Double}" /> with each element initialized to corresponding specified value.</returns>
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static unsafe Vector256<double> Create(double e0, double e1, double e2, double e3)
{
- var pResult = stackalloc double[4]
+ if (Avx.IsSupported)
{
- e0,
- e1,
- e2,
- e3,
- };
+ Vector128<double> lo128 = Vector128.Create(e0, e1);
+ Vector128<double> hi128 = Vector128.Create(e2, e3);
+ return Create(lo128, hi128);
+ }
- return Unsafe.AsRef<Vector256<double>>(pResult);
+ return SoftwareFallback(e0, e1, e2, e3);
+
+ Vector256<double> SoftwareFallback(double i0, double i1, double i2, double i3)
+ {
+ var pResult = stackalloc double[4]
+ {
+ i0,
+ i1,
+ i2,
+ i3,
+ };
+
+ return Unsafe.AsRef<Vector256<double>>(pResult);
+ }
}
/// <summary>Creates a new <see cref="Vector256{Int16}" /> instance with each element initialized to the corresponding specified value.</summary>
@@ -575,29 +601,42 @@ namespace System.Runtime.Intrinsics
/// <param name="e14">The value that element 14 will be initialized to.</param>
/// <param name="e15">The value that element 15 will be initialized to.</param>
/// <returns>A new <see cref="Vector256{Int16}" /> with each element initialized to corresponding specified value.</returns>
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static unsafe Vector256<short> Create(short e0, short e1, short e2, short e3, short e4, short e5, short e6, short e7, short e8, short e9, short e10, short e11, short e12, short e13, short e14, short e15)
{
- var pResult = stackalloc short[16]
- {
- e0,
- e1,
- e2,
- e3,
- e4,
- e5,
- e6,
- e7,
- e8,
- e9,
- e10,
- e11,
- e12,
- e13,
- e14,
- e15,
- };
+ if (Avx.IsSupported)
+ {
+ Vector128<short> lo128 = Vector128.Create(e0, e1, e2, e3, e4, e5, e6, e7);
+ Vector128<short> hi128 = Vector128.Create(e8, e9, e10, e11, e12, e13, e14, e15);
+ return Create(lo128, hi128);
+ }
- return Unsafe.AsRef<Vector256<short>>(pResult);
+ return SoftwareFallback(e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15);
+
+ Vector256<short> SoftwareFallback(short i0, short i1, short i2, short i3, short i4, short i5, short i6, short i7, short i8, short i9, short i10, short i11, short i12, short i13, short i14, short i15)
+ {
+ var pResult = stackalloc short[16]
+ {
+ i0,
+ i1,
+ i2,
+ i3,
+ i4,
+ i5,
+ i6,
+ i7,
+ i8,
+ i9,
+ i10,
+ i11,
+ i12,
+ i13,
+ i14,
+ i15,
+ };
+
+ return Unsafe.AsRef<Vector256<short>>(pResult);
+ }
}
/// <summary>Creates a new <see cref="Vector256{Int32}" /> instance with each element initialized to the corresponding specified value.</summary>
@@ -610,21 +649,34 @@ namespace System.Runtime.Intrinsics
/// <param name="e6">The value that element 6 will be initialized to.</param>
/// <param name="e7">The value that element 7 will be initialized to.</param>
/// <returns>A new <see cref="Vector256{Int32}" /> with each element initialized to corresponding specified value.</returns>
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static unsafe Vector256<int> Create(int e0, int e1, int e2, int e3, int e4, int e5, int e6, int e7)
{
- var pResult = stackalloc int[8]
- {
- e0,
- e1,
- e2,
- e3,
- e4,
- e5,
- e6,
- e7,
- };
+ if (Avx.IsSupported)
+ {
+ Vector128<int> lo128 = Vector128.Create(e0, e1, e2, e3);
+ Vector128<int> hi128 = Vector128.Create(e4, e5, e6, e7);
+ return Create(lo128, hi128);
+ }
- return Unsafe.AsRef<Vector256<int>>(pResult);
+ return SoftwareFallback(e0, e1, e2, e3, e4, e5, e6, e7);
+
+ Vector256<int> SoftwareFallback(int i0, int i1, int i2, int i3, int i4, int i5, int i6, int i7)
+ {
+ var pResult = stackalloc int[8]
+ {
+ i0,
+ i1,
+ i2,
+ i3,
+ i4,
+ i5,
+ i6,
+ i7,
+ };
+
+ return Unsafe.AsRef<Vector256<int>>(pResult);
+ }
}
/// <summary>Creates a new <see cref="Vector256{Int64}" /> instance with each element initialized to the corresponding specified value.</summary>
@@ -633,17 +685,30 @@ namespace System.Runtime.Intrinsics
/// <param name="e2">The value that element 2 will be initialized to.</param>
/// <param name="e3">The value that element 3 will be initialized to.</param>
/// <returns>A new <see cref="Vector256{Int64}" /> with each element initialized to corresponding specified value.</returns>
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static unsafe Vector256<long> Create(long e0, long e1, long e2, long e3)
{
- var pResult = stackalloc long[4]
+ if (Sse2.X64.IsSupported && Avx.IsSupported)
{
- e0,
- e1,
- e2,
- e3,
- };
+ Vector128<long> lo128 = Vector128.Create(e0, e1);
+ Vector128<long> hi128 = Vector128.Create(e2, e3);
+ return Create(lo128, hi128);
+ }
- return Unsafe.AsRef<Vector256<long>>(pResult);
+ return SoftwareFallback(e0, e1, e2, e3);
+
+ Vector256<long> SoftwareFallback(long i0, long i1, long i2, long i3)
+ {
+ var pResult = stackalloc long[4]
+ {
+ i0,
+ i1,
+ i2,
+ i3,
+ };
+
+ return Unsafe.AsRef<Vector256<long>>(pResult);
+ }
}
/// <summary>Creates a new <see cref="Vector256{SByte}" /> instance with each element initialized to the corresponding specified value.</summary>
@@ -680,46 +745,59 @@ namespace System.Runtime.Intrinsics
/// <param name="e30">The value that element 30 will be initialized to.</param>
/// <param name="e31">The value that element 31 will be initialized to.</param>
/// <returns>A new <see cref="Vector256{SByte}" /> with each element initialized to corresponding specified value.</returns>
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
[CLSCompliant(false)]
public static unsafe Vector256<sbyte> Create(sbyte e0, sbyte e1, sbyte e2, sbyte e3, sbyte e4, sbyte e5, sbyte e6, sbyte e7, sbyte e8, sbyte e9, sbyte e10, sbyte e11, sbyte e12, sbyte e13, sbyte e14, sbyte e15, sbyte e16, sbyte e17, sbyte e18, sbyte e19, sbyte e20, sbyte e21, sbyte e22, sbyte e23, sbyte e24, sbyte e25, sbyte e26, sbyte e27, sbyte e28, sbyte e29, sbyte e30, sbyte e31)
{
- var pResult = stackalloc sbyte[32]
- {
- e0,
- e1,
- e2,
- e3,
- e4,
- e5,
- e6,
- e7,
- e8,
- e9,
- e10,
- e11,
- e12,
- e13,
- e14,
- e15,
- e16,
- e17,
- e18,
- e19,
- e20,
- e21,
- e22,
- e23,
- e24,
- e25,
- e26,
- e27,
- e28,
- e29,
- e30,
- e31,
- };
+ if (Avx.IsSupported)
+ {
+ Vector128<sbyte> lo128 = Vector128.Create(e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15);
+ Vector128<sbyte> hi128 = Vector128.Create(e16, e17, e18, e19, e20, e21, e22, e23, e24, e25, e26, e27, e28, e29, e30, e31);
+ return Create(lo128, hi128);
+ }
- return Unsafe.AsRef<Vector256<sbyte>>(pResult);
+ return SoftwareFallback(e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, e16, e17, e18, e19, e20, e21, e22, e23, e24, e25, e26, e27, e28, e29, e30, e31);
+
+ Vector256<sbyte> SoftwareFallback(sbyte i0, sbyte i1, sbyte i2, sbyte i3, sbyte i4, sbyte i5, sbyte i6, sbyte i7, sbyte i8, sbyte i9, sbyte i10, sbyte i11, sbyte i12, sbyte i13, sbyte i14, sbyte i15, sbyte i16, sbyte i17, sbyte i18, sbyte i19, sbyte i20, sbyte i21, sbyte i22, sbyte i23, sbyte i24, sbyte i25, sbyte i26, sbyte i27, sbyte i28, sbyte i29, sbyte i30, sbyte i31)
+ {
+ var pResult = stackalloc sbyte[32]
+ {
+ i0,
+ i1,
+ i2,
+ i3,
+ i4,
+ i5,
+ i6,
+ i7,
+ i8,
+ i9,
+ i10,
+ i11,
+ i12,
+ i13,
+ i14,
+ i15,
+ i16,
+ i17,
+ i18,
+ i19,
+ i20,
+ i21,
+ i22,
+ i23,
+ i24,
+ i25,
+ i26,
+ i27,
+ i28,
+ i29,
+ i30,
+ i31,
+ };
+
+ return Unsafe.AsRef<Vector256<sbyte>>(pResult);
+ }
}
/// <summary>Creates a new <see cref="Vector256{Single}" /> instance with each element initialized to the corresponding specified value.</summary>
@@ -732,21 +810,34 @@ namespace System.Runtime.Intrinsics
/// <param name="e6">The value that element 6 will be initialized to.</param>
/// <param name="e7">The value that element 7 will be initialized to.</param>
/// <returns>A new <see cref="Vector256{Single}" /> with each element initialized to corresponding specified value.</returns>
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static unsafe Vector256<float> Create(float e0, float e1, float e2, float e3, float e4, float e5, float e6, float e7)
{
- var pResult = stackalloc float[8]
- {
- e0,
- e1,
- e2,
- e3,
- e4,
- e5,
- e6,
- e7,
- };
+ if (Avx.IsSupported)
+ {
+ Vector128<float> lo128 = Vector128.Create(e0, e1, e2, e3);
+ Vector128<float> hi128 = Vector128.Create(e4, e5, e6, e7);
+ return Create(lo128, hi128);
+ }
- return Unsafe.AsRef<Vector256<float>>(pResult);
+ return SoftwareFallback(e0, e1, e2, e3, e4, e5, e6, e7);
+
+ Vector256<float> SoftwareFallback(float i0, float i1, float i2, float i3, float i4, float i5, float i6, float i7)
+ {
+ var pResult = stackalloc float[8]
+ {
+ i0,
+ i1,
+ i2,
+ i3,
+ i4,
+ i5,
+ i6,
+ i7,
+ };
+
+ return Unsafe.AsRef<Vector256<float>>(pResult);
+ }
}
/// <summary>Creates a new <see cref="Vector256{UInt16}" /> instance with each element initialized to the corresponding specified value.</summary>
@@ -767,30 +858,43 @@ namespace System.Runtime.Intrinsics
/// <param name="e14">The value that element 14 will be initialized to.</param>
/// <param name="e15">The value that element 15 will be initialized to.</param>
/// <returns>A new <see cref="Vector256{UInt16}" /> with each element initialized to corresponding specified value.</returns>
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
[CLSCompliant(false)]
public static unsafe Vector256<ushort> Create(ushort e0, ushort e1, ushort e2, ushort e3, ushort e4, ushort e5, ushort e6, ushort e7, ushort e8, ushort e9, ushort e10, ushort e11, ushort e12, ushort e13, ushort e14, ushort e15)
{
- var pResult = stackalloc ushort[16]
- {
- e0,
- e1,
- e2,
- e3,
- e4,
- e5,
- e6,
- e7,
- e8,
- e9,
- e10,
- e11,
- e12,
- e13,
- e14,
- e15,
- };
+ if (Avx.IsSupported)
+ {
+ Vector128<ushort> lo128 = Vector128.Create(e0, e1, e2, e3, e4, e5, e6, e7);
+ Vector128<ushort> hi128 = Vector128.Create(e8, e9, e10, e11, e12, e13, e14, e15);
+ return Create(lo128, hi128);
+ }
- return Unsafe.AsRef<Vector256<ushort>>(pResult);
+ return SoftwareFallback(e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15);
+
+ Vector256<ushort> SoftwareFallback(ushort i0, ushort i1, ushort i2, ushort i3, ushort i4, ushort i5, ushort i6, ushort i7, ushort i8, ushort i9, ushort i10, ushort i11, ushort i12, ushort i13, ushort i14, ushort i15)
+ {
+ var pResult = stackalloc ushort[16]
+ {
+ i0,
+ i1,
+ i2,
+ i3,
+ i4,
+ i5,
+ i6,
+ i7,
+ i8,
+ i9,
+ i10,
+ i11,
+ i12,
+ i13,
+ i14,
+ i15,
+ };
+
+ return Unsafe.AsRef<Vector256<ushort>>(pResult);
+ }
}
/// <summary>Creates a new <see cref="Vector256{UInt32}" /> instance with each element initialized to the corresponding specified value.</summary>
@@ -803,22 +907,35 @@ namespace System.Runtime.Intrinsics
/// <param name="e6">The value that element 6 will be initialized to.</param>
/// <param name="e7">The value that element 7 will be initialized to.</param>
/// <returns>A new <see cref="Vector256{UInt32}" /> with each element initialized to corresponding specified value.</returns>
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
[CLSCompliant(false)]
public static unsafe Vector256<uint> Create(uint e0, uint e1, uint e2, uint e3, uint e4, uint e5, uint e6, uint e7)
{
- var pResult = stackalloc uint[8]
- {
- e0,
- e1,
- e2,
- e3,
- e4,
- e5,
- e6,
- e7,
- };
+ if (Avx.IsSupported)
+ {
+ Vector128<uint> lo128 = Vector128.Create(e0, e1, e2, e3);
+ Vector128<uint> hi128 = Vector128.Create(e4, e5, e6, e7);
+ return Create(lo128, hi128);
+ }
- return Unsafe.AsRef<Vector256<uint>>(pResult);
+ return SoftwareFallback(e0, e1, e2, e3, e4, e5, e6, e7);
+
+ Vector256<uint> SoftwareFallback(uint i0, uint i1, uint i2, uint i3, uint i4, uint i5, uint i6, uint i7)
+ {
+ var pResult = stackalloc uint[8]
+ {
+ i0,
+ i1,
+ i2,
+ i3,
+ i4,
+ i5,
+ i6,
+ i7,
+ };
+
+ return Unsafe.AsRef<Vector256<uint>>(pResult);
+ }
}
/// <summary>Creates a new <see cref="Vector256{UInt64}" /> instance with each element initialized to the corresponding specified value.</summary>
@@ -827,172 +944,305 @@ namespace System.Runtime.Intrinsics
/// <param name="e2">The value that element 2 will be initialized to.</param>
/// <param name="e3">The value that element 3 will be initialized to.</param>
/// <returns>A new <see cref="Vector256{UInt64}" /> with each element initialized to corresponding specified value.</returns>
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
[CLSCompliant(false)]
public static unsafe Vector256<ulong> Create(ulong e0, ulong e1, ulong e2, ulong e3)
{
- var pResult = stackalloc ulong[4]
+ if (Sse2.X64.IsSupported && Avx.IsSupported)
{
- e0,
- e1,
- e2,
- e3,
- };
+ Vector128<ulong> lo128 = Vector128.Create(e0, e1);
+ Vector128<ulong> hi128 = Vector128.Create(e2, e3);
+ return Create(lo128, hi128);
+ }
- return Unsafe.AsRef<Vector256<ulong>>(pResult);
+ return SoftwareFallback(e0, e1, e2, e3);
+
+ Vector256<ulong> SoftwareFallback(ulong i0, ulong i1, ulong i2, ulong i3)
+ {
+ var pResult = stackalloc ulong[4]
+ {
+ i0,
+ i1,
+ i2,
+ i3,
+ };
+
+ return Unsafe.AsRef<Vector256<ulong>>(pResult);
+ }
}
/// <summary>Creates a new <see cref="Vector256{Byte}" /> instance from two <see cref="Vector128{Byte}" /> instances.</summary>
/// <param name="lower">The value that the lower 128-bits will be initialized to.</param>
/// <param name="upper">The value that the upper 128-bits will be initialized to.</param>
/// <returns>A new <see cref="Vector256{Byte}" /> initialized from <paramref name="lower" /> and <paramref name="upper" />.</returns>
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static unsafe Vector256<byte> Create(Vector128<byte> lower, Vector128<byte> upper)
{
- Vector256<byte> result256 = Vector256<byte>.Zero;
+ if (Avx.IsSupported)
+ {
+ Vector256<byte> result = lower.ToVector256Unsafe();
+ return result.WithUpper(upper);
+ }
+
+ return SoftwareFallback(lower, upper);
+
+ Vector256<byte> SoftwareFallback(Vector128<byte> x, Vector128<byte> y)
+ {
+ Vector256<byte> result256 = Vector256<byte>.Zero;
- ref Vector128<byte> result128 = ref Unsafe.As<Vector256<byte>, Vector128<byte>>(ref result256);
- result128 = lower;
- Unsafe.Add(ref result128, 1) = upper;
+ ref Vector128<byte> result128 = ref Unsafe.As<Vector256<byte>, Vector128<byte>>(ref result256);
+ result128 = x;
+ Unsafe.Add(ref result128, 1) = y;
- return result256;
+ return result256;
+ }
}
/// <summary>Creates a new <see cref="Vector256{Double}" /> instance from two <see cref="Vector128{Double}" /> instances.</summary>
/// <param name="lower">The value that the lower 128-bits will be initialized to.</param>
/// <param name="upper">The value that the upper 128-bits will be initialized to.</param>
/// <returns>A new <see cref="Vector256{Double}" /> initialized from <paramref name="lower" /> and <paramref name="upper" />.</returns>
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static unsafe Vector256<double> Create(Vector128<double> lower, Vector128<double> upper)
{
- Vector256<double> result256 = Vector256<double>.Zero;
+ if (Avx.IsSupported)
+ {
+ Vector256<double> result = lower.ToVector256Unsafe();
+ return result.WithUpper(upper);
+ }
+
+ return SoftwareFallback(lower, upper);
- ref Vector128<double> result128 = ref Unsafe.As<Vector256<double>, Vector128<double>>(ref result256);
- result128 = lower;
- Unsafe.Add(ref result128, 1) = upper;
+ Vector256<double> SoftwareFallback(Vector128<double> x, Vector128<double> y)
+ {
+ Vector256<double> result256 = Vector256<double>.Zero;
+
+ ref Vector128<double> result128 = ref Unsafe.As<Vector256<double>, Vector128<double>>(ref result256);
+ result128 = x;
+ Unsafe.Add(ref result128, 1) = y;
- return result256;
+ return result256;
+ }
}
/// <summary>Creates a new <see cref="Vector256{Int16}" /> instance from two <see cref="Vector128{Int16}" /> instances.</summary>
/// <param name="lower">The value that the lower 128-bits will be initialized to.</param>
/// <param name="upper">The value that the upper 128-bits will be initialized to.</param>
/// <returns>A new <see cref="Vector256{Int16}" /> initialized from <paramref name="lower" /> and <paramref name="upper" />.</returns>
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static unsafe Vector256<short> Create(Vector128<short> lower, Vector128<short> upper)
{
- Vector256<short> result256 = Vector256<short>.Zero;
+ if (Avx.IsSupported)
+ {
+ Vector256<short> result = lower.ToVector256Unsafe();
+ return result.WithUpper(upper);
+ }
+
+ return SoftwareFallback(lower, upper);
+
+ Vector256<short> SoftwareFallback(Vector128<short> x, Vector128<short> y)
+ {
+ Vector256<short> result256 = Vector256<short>.Zero;
- ref Vector128<short> result128 = ref Unsafe.As<Vector256<short>, Vector128<short>>(ref result256);
- result128 = lower;
- Unsafe.Add(ref result128, 1) = upper;
+ ref Vector128<short> result128 = ref Unsafe.As<Vector256<short>, Vector128<short>>(ref result256);
+ result128 = x;
+ Unsafe.Add(ref result128, 1) = y;
- return result256;
+ return result256;
+ }
}
/// <summary>Creates a new <see cref="Vector256{Int32}" /> instance from two <see cref="Vector128{Int32}" /> instances.</summary>
/// <param name="lower">The value that the lower 128-bits will be initialized to.</param>
/// <param name="upper">The value that the upper 128-bits will be initialized to.</param>
/// <returns>A new <see cref="Vector256{Int32}" /> initialized from <paramref name="lower" /> and <paramref name="upper" />.</returns>
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static unsafe Vector256<int> Create(Vector128<int> lower, Vector128<int> upper)
{
- Vector256<int> result256 = Vector256<int>.Zero;
+ if (Avx.IsSupported)
+ {
+ Vector256<int> result = lower.ToVector256Unsafe();
+ return result.WithUpper(upper);
+ }
- ref Vector128<int> result128 = ref Unsafe.As<Vector256<int>, Vector128<int>>(ref result256);
- result128 = lower;
- Unsafe.Add(ref result128, 1) = upper;
+ return SoftwareFallback(lower, upper);
- return result256;
+ Vector256<int> SoftwareFallback(Vector128<int> x, Vector128<int> y)
+ {
+ Vector256<int> result256 = Vector256<int>.Zero;
+
+ ref Vector128<int> result128 = ref Unsafe.As<Vector256<int>, Vector128<int>>(ref result256);
+ result128 = x;
+ Unsafe.Add(ref result128, 1) = y;
+
+ return result256;
+ }
}
/// <summary>Creates a new <see cref="Vector256{Int64}" /> instance from two <see cref="Vector128{Int64}" /> instances.</summary>
/// <param name="lower">The value that the lower 128-bits will be initialized to.</param>
/// <param name="upper">The value that the upper 128-bits will be initialized to.</param>
/// <returns>A new <see cref="Vector256{Int64}" /> initialized from <paramref name="lower" /> and <paramref name="upper" />.</returns>
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static unsafe Vector256<long> Create(Vector128<long> lower, Vector128<long> upper)
{
- Vector256<long> result256 = Vector256<long>.Zero;
+ if (Avx.IsSupported)
+ {
+ Vector256<long> result = lower.ToVector256Unsafe();
+ return result.WithUpper(upper);
+ }
- ref Vector128<long> result128 = ref Unsafe.As<Vector256<long>, Vector128<long>>(ref result256);
- result128 = lower;
- Unsafe.Add(ref result128, 1) = upper;
+ return SoftwareFallback(lower, upper);
- return result256;
+ Vector256<long> SoftwareFallback(Vector128<long> x, Vector128<long> y)
+ {
+ Vector256<long> result256 = Vector256<long>.Zero;
+
+ ref Vector128<long> result128 = ref Unsafe.As<Vector256<long>, Vector128<long>>(ref result256);
+ result128 = x;
+ Unsafe.Add(ref result128, 1) = y;
+
+ return result256;
+ }
}
/// <summary>Creates a new <see cref="Vector256{SByte}" /> instance from two <see cref="Vector128{SByte}" /> instances.</summary>
/// <param name="lower">The value that the lower 128-bits will be initialized to.</param>
/// <param name="upper">The value that the upper 128-bits will be initialized to.</param>
/// <returns>A new <see cref="Vector256{SByte}" /> initialized from <paramref name="lower" /> and <paramref name="upper" />.</returns>
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
[CLSCompliant(false)]
public static unsafe Vector256<sbyte> Create(Vector128<sbyte> lower, Vector128<sbyte> upper)
{
- Vector256<sbyte> result256 = Vector256<sbyte>.Zero;
+ if (Avx.IsSupported)
+ {
+ Vector256<sbyte> result = lower.ToVector256Unsafe();
+ return result.WithUpper(upper);
+ }
- ref Vector128<sbyte> result128 = ref Unsafe.As<Vector256<sbyte>, Vector128<sbyte>>(ref result256);
- result128 = lower;
- Unsafe.Add(ref result128, 1) = upper;
+ return SoftwareFallback(lower, upper);
+
+ Vector256<sbyte> SoftwareFallback(Vector128<sbyte> x, Vector128<sbyte> y)
+ {
+ Vector256<sbyte> result256 = Vector256<sbyte>.Zero;
- return result256;
+ ref Vector128<sbyte> result128 = ref Unsafe.As<Vector256<sbyte>, Vector128<sbyte>>(ref result256);
+ result128 = x;
+ Unsafe.Add(ref result128, 1) = y;
+
+ return result256;
+ }
}
/// <summary>Creates a new <see cref="Vector256{Single}" /> instance from two <see cref="Vector128{Single}" /> instances.</summary>
/// <param name="lower">The value that the lower 128-bits will be initialized to.</param>
/// <param name="upper">The value that the upper 128-bits will be initialized to.</param>
/// <returns>A new <see cref="Vector256{Single}" /> initialized from <paramref name="lower" /> and <paramref name="upper" />.</returns>
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static unsafe Vector256<float> Create(Vector128<float> lower, Vector128<float> upper)
{
- Vector256<float> result256 = Vector256<float>.Zero;
+ if (Avx.IsSupported)
+ {
+ Vector256<float> result = lower.ToVector256Unsafe();
+ return result.WithUpper(upper);
+ }
+
+ return SoftwareFallback(lower, upper);
+
+ Vector256<float> SoftwareFallback(Vector128<float> x, Vector128<float> y)
+ {
+ Vector256<float> result256 = Vector256<float>.Zero;
- ref Vector128<float> result128 = ref Unsafe.As<Vector256<float>, Vector128<float>>(ref result256);
- result128 = lower;
- Unsafe.Add(ref result128, 1) = upper;
+ ref Vector128<float> result128 = ref Unsafe.As<Vector256<float>, Vector128<float>>(ref result256);
+ result128 = x;
+ Unsafe.Add(ref result128, 1) = y;
- return result256;
+ return result256;
+ }
}
/// <summary>Creates a new <see cref="Vector256{UInt16}" /> instance from two <see cref="Vector128{UInt16}" /> instances.</summary>
/// <param name="lower">The value that the lower 128-bits will be initialized to.</param>
/// <param name="upper">The value that the upper 128-bits will be initialized to.</param>
/// <returns>A new <see cref="Vector256{UInt16}" /> initialized from <paramref name="lower" /> and <paramref name="upper" />.</returns>
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
[CLSCompliant(false)]
public static unsafe Vector256<ushort> Create(Vector128<ushort> lower, Vector128<ushort> upper)
{
- Vector256<ushort> result256 = Vector256<ushort>.Zero;
+ if (Avx.IsSupported)
+ {
+ Vector256<ushort> result = lower.ToVector256Unsafe();
+ return result.WithUpper(upper);
+ }
+
+ return SoftwareFallback(lower, upper);
- ref Vector128<ushort> result128 = ref Unsafe.As<Vector256<ushort>, Vector128<ushort>>(ref result256);
- result128 = lower;
- Unsafe.Add(ref result128, 1) = upper;
+ Vector256<ushort> SoftwareFallback(Vector128<ushort> x, Vector128<ushort> y)
+ {
+ Vector256<ushort> result256 = Vector256<ushort>.Zero;
- return result256;
+ ref Vector128<ushort> result128 = ref Unsafe.As<Vector256<ushort>, Vector128<ushort>>(ref result256);
+ result128 = x;
+ Unsafe.Add(ref result128, 1) = y;
+
+ return result256;
+ }
}
/// <summary>Creates a new <see cref="Vector256{UInt32}" /> instance from two <see cref="Vector128{UInt32}" /> instances.</summary>
/// <param name="lower">The value that the lower 128-bits will be initialized to.</param>
/// <param name="upper">The value that the upper 128-bits will be initialized to.</param>
/// <returns>A new <see cref="Vector256{UInt32}" /> initialized from <paramref name="lower" /> and <paramref name="upper" />.</returns>
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
[CLSCompliant(false)]
public static unsafe Vector256<uint> Create(Vector128<uint> lower, Vector128<uint> upper)
{
- Vector256<uint> result256 = Vector256<uint>.Zero;
+ if (Avx.IsSupported)
+ {
+ Vector256<uint> result = lower.ToVector256Unsafe();
+ return result.WithUpper(upper);
+ }
- ref Vector128<uint> result128 = ref Unsafe.As<Vector256<uint>, Vector128<uint>>(ref result256);
- result128 = lower;
- Unsafe.Add(ref result128, 1) = upper;
+ return SoftwareFallback(lower, upper);
- return result256;
+ Vector256<uint> SoftwareFallback(Vector128<uint> x, Vector128<uint> y)
+ {
+ Vector256<uint> result256 = Vector256<uint>.Zero;
+
+ ref Vector128<uint> result128 = ref Unsafe.As<Vector256<uint>, Vector128<uint>>(ref result256);
+ result128 = x;
+ Unsafe.Add(ref result128, 1) = y;
+
+ return result256;
+ }
}
/// <summary>Creates a new <see cref="Vector256{UInt64}" /> instance from two <see cref="Vector128{UInt64}" /> instances.</summary>
/// <param name="lower">The value that the lower 128-bits will be initialized to.</param>
/// <param name="upper">The value that the upper 128-bits will be initialized to.</param>
/// <returns>A new <see cref="Vector256{UInt64}" /> initialized from <paramref name="lower" /> and <paramref name="upper" />.</returns>
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
[CLSCompliant(false)]
public static unsafe Vector256<ulong> Create(Vector128<ulong> lower, Vector128<ulong> upper)
{
- Vector256<ulong> result256 = Vector256<ulong>.Zero;
+ if (Avx.IsSupported)
+ {
+ Vector256<ulong> result = lower.ToVector256Unsafe();
+ return result.WithUpper(upper);
+ }
+
+ return SoftwareFallback(lower, upper);
- ref Vector128<ulong> result128 = ref Unsafe.As<Vector256<ulong>, Vector128<ulong>>(ref result256);
- result128 = lower;
- Unsafe.Add(ref result128, 1) = upper;
+ Vector256<ulong> SoftwareFallback(Vector128<ulong> x, Vector128<ulong> y)
+ {
+ Vector256<ulong> result256 = Vector256<ulong>.Zero;
+
+ ref Vector128<ulong> result128 = ref Unsafe.As<Vector256<ulong>, Vector128<ulong>>(ref result256);
+ result128 = x;
+ Unsafe.Add(ref result128, 1) = y;
- return result256;
+ return result256;
+ }
}
/// <summary>Creates a new <see cref="Vector256{Byte}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
diff --git a/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector256_1.cs b/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector256_1.cs
index 6c7a108c42..44f2928c5f 100644
--- a/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector256_1.cs
+++ b/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector256_1.cs
@@ -315,11 +315,14 @@ namespace System.Runtime.Intrinsics
if (Avx2.IsSupported && ((typeof(T) != typeof(float)) && (typeof(T) != typeof(double))))
{
+ // All integral types generate the same instruction, so just pick one rather than handling each T separately
return Avx2.InsertVector128(AsByte(), value.AsByte(), 0).As<T>();
}
if (Avx.IsSupported)
{
+ // All floating-point types generate the same instruction, so just pick one rather than handling each T separately
+ // We also just fallback to this for integral types if AVX2 isn't supported, since that is still faster than software
return Avx.InsertVector128(AsSingle(), value.AsSingle(), 0).As<T>();
}
@@ -344,11 +347,14 @@ namespace System.Runtime.Intrinsics
if (Avx2.IsSupported && ((typeof(T) != typeof(float)) && (typeof(T) != typeof(double))))
{
+ // All integral types generate the same instruction, so just pick one rather than handling each T separately
return Avx2.ExtractVector128(AsByte(), 1).As<T>();
}
if (Avx.IsSupported)
{
+ // All floating-point types generate the same instruction, so just pick one rather than handling each T separately
+ // We also just fallback to this for integral types if AVX2 isn't supported, since that is still faster than software
return Avx.ExtractVector128(AsSingle(), 1).As<T>();
}
@@ -373,11 +379,14 @@ namespace System.Runtime.Intrinsics
if (Avx2.IsSupported && ((typeof(T) != typeof(float)) && (typeof(T) != typeof(double))))
{
+ // All integral types generate the same instruction, so just pick one rather than handling each T separately
return Avx2.InsertVector128(AsByte(), value.AsByte(), 1).As<T>();
}
if (Avx.IsSupported)
{
+ // All floating-point types generate the same instruction, so just pick one rather than handling each T separately
+ // We also just fallback to this for integral types if AVX2 isn't supported, since that is still faster than software
return Avx.InsertVector128(AsSingle(), value.AsSingle(), 1).As<T>();
}