diff options
author | Jacek Blaszczynski <biosciencenow@outlook.com> | 2018-03-07 15:48:24 +0100 |
---|---|---|
committer | Jacek Blaszczynski <biosciencenow@outlook.com> | 2018-03-14 03:31:50 +0100 |
commit | 82862b0e6800a5bb096ccdaea2dcc27ffc0300e6 (patch) | |
tree | 399715371276290bf2eae44f2b1bc4c42dc8ee12 | |
parent | 449a6d515a5ae7c870dc87ba5788067e42b875bc (diff) | |
download | coreclr-82862b0e6800a5bb096ccdaea2dcc27ffc0300e6.tar.gz coreclr-82862b0e6800a5bb096ccdaea2dcc27ffc0300e6.tar.bz2 coreclr-82862b0e6800a5bb096ccdaea2dcc27ffc0300e6.zip |
Managed implementation of SSE2 SetAllVector128 helper HW intrinsic
-rw-r--r-- | src/jit/hwintrinsicxarch.cpp | 3 | ||||
-rw-r--r-- | src/jit/namedintrinsiclist.h | 4 | ||||
-rw-r--r-- | src/mscorlib/src/System/Runtime/Intrinsics/X86/Sse2.cs | 195 |
3 files changed, 147 insertions, 55 deletions
diff --git a/src/jit/hwintrinsicxarch.cpp b/src/jit/hwintrinsicxarch.cpp index 869361770c..f93141bdee 100644 --- a/src/jit/hwintrinsicxarch.cpp +++ b/src/jit/hwintrinsicxarch.cpp @@ -124,7 +124,7 @@ InstructionSet Compiler::lookupHWIntrinsicISA(const char* className) // isa -- instruction set of the intrinsic. // // Return Value: -// Id for the hardware intrinsic. +// Id for the hardware intrinsic // // TODO-Throughput: replace sequential search by binary search NamedIntrinsic Compiler::lookupHWIntrinsic(const char* methodName, InstructionSet isa) @@ -137,6 +137,7 @@ NamedIntrinsic Compiler::lookupHWIntrinsic(const char* methodName, InstructionSe if (isa == hwIntrinsicInfoArray[i].isa && strcmp(methodName, hwIntrinsicInfoArray[i].intrinsicName) == 0) { result = hwIntrinsicInfoArray[i].intrinsicID; + break; } } } diff --git a/src/jit/namedintrinsiclist.h b/src/jit/namedintrinsiclist.h index 91c9720580..f4d7cb2a88 100644 --- a/src/jit/namedintrinsiclist.h +++ b/src/jit/namedintrinsiclist.h @@ -27,8 +27,8 @@ enum NamedIntrinsic : unsigned int #define HARDWARE_INTRINSIC(id, isa, name, form, ins0, ins1, ins2, flags) id, #include "hwintrinsiclistArm64.h" #endif // !defined(_TARGET_XARCH_) && !defined(_TARGET_ARM64_) - NI_HW_INTRINSIC_END -#endif + NI_HW_INTRINSIC_END, +#endif // FEATURE_HW_INTRINSICS }; #if defined(FEATURE_HW_INTRINSICS) && defined(_TARGET_XARCH_) diff --git a/src/mscorlib/src/System/Runtime/Intrinsics/X86/Sse2.cs b/src/mscorlib/src/System/Runtime/Intrinsics/X86/Sse2.cs index 23d8c93493..d0eb9ef4da 100644 --- a/src/mscorlib/src/System/Runtime/Intrinsics/X86/Sse2.cs +++ b/src/mscorlib/src/System/Runtime/Intrinsics/X86/Sse2.cs @@ -969,6 +969,149 @@ namespace System.Runtime.Intrinsics.X86 public static Vector128<byte> PackUnsignedSaturate(Vector128<short> left, Vector128<short> right) => PackUnsignedSaturate(left, right); /// <summary> + /// __m128i _mm_set1_epi8 (char a) + /// HELPER + /// </summary> + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128<byte> SetAllVector128(byte value) + { + // Zero vector and load value et index 0 + Vector128<byte> vector1 = Sse.StaticCast<uint, byte>(ConvertScalarToVector128UInt32(value)); + // Create { -- -- -- -- -- -- -- -- -- -- -- -- -- -- vl vl } + Vector128<ushort> tmpVector1 = Sse.StaticCast<byte, ushort>(UnpackLow(vector1, vector1)); + // Create { -- -- -- -- -- -- -- -- -- -- -- -- vl vl vl vl } + Vector128<uint> tmpVector2 = Sse.StaticCast<ushort, uint>(UnpackLow(tmpVector1, tmpVector1)); + // Create { vl vl vl vl vl vl vl vl vl vl vl vl vl vl vl vl } and return result + return Sse.StaticCast<uint, byte>(Shuffle(tmpVector2, 0)); + } + /// <summary> + /// __m128i _mm_set1_epi8 (char a) + /// HELPER + /// </summary> + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128<sbyte> SetAllVector128(sbyte value) + { + // TODO-CQ Optimize algorithm choice based on benchmarks + + // Zero vector and load value et index 0 + Vector128<sbyte> vector = Sse.StaticCast<int, sbyte>(ConvertScalarToVector128Int32(value)); + // Create { -- -- -- -- -- -- -- -- -- -- -- -- -- -- vl vl } + Vector128<short> tmpVector1 = Sse.StaticCast<sbyte, short>(UnpackLow(vector, vector)); + // Create { -- -- -- -- -- -- -- -- -- -- -- -- vl vl vl vl } + Vector128<int> tmpVector2 = Sse.StaticCast<short, int>(UnpackLow(tmpVector1, tmpVector1)); + // Create { vl vl vl vl vl vl vl vl vl vl vl vl vl vl vl vl } and return result + return Sse.StaticCast<int, sbyte>(Shuffle(tmpVector2, 0)); + } + /// <summary> + /// __m128i _mm_set1_epi16 (short a) + /// HELPER + /// </summary> + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128<short> SetAllVector128(short value) + { + // TODO-CQ Optimize algorithm choice based on benchmarks + + // Zero vector and load value et index 0 + Vector128<short> vector = Sse.StaticCast<int, short>(ConvertScalarToVector128Int32(value)); + // Create { -- -- -- -- -- -- vl vl } + Vector128<int> tmpVector = Sse.StaticCast<short, int>(UnpackLow(vector, vector)); + // Create { vl vl vl vl vl vl vl vl } and return result + return Sse.StaticCast<int, short>(Shuffle(tmpVector, (byte)0)); + } + /// <summary> + /// __m128i _mm_set1_epi16 (short a) + /// HELPER + /// </summary> + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128<ushort> SetAllVector128(ushort value) + { + // TODO-CQ Optimize algorithm choice based on benchmarks + + // Zero vector and load value et index 0 + Vector128<ushort> vector = Sse.StaticCast<uint, ushort>(ConvertScalarToVector128UInt32(value)); + // Create { -- -- -- -- -- -- vl vl } + Vector128<uint> tmpVector = Sse.StaticCast<ushort, uint>(UnpackLow(vector, vector)); + // Create { vl vl vl vl vl vl vl vl } and return result + return Sse.StaticCast<uint, ushort>(Shuffle(tmpVector, (byte)0)); + } + /// <summary> + /// __m128i _mm_set1_epi32 (int a) + /// HELPER + /// </summary> + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128<int> SetAllVector128(int value) + { + // TODO-CQ Optimize algorithm choice based on benchmarks + + // Zero vector and load value et index 0 + Vector128<int> vector = ConvertScalarToVector128Int32(value); + // Create { vl vl vl vl } and return result + return Shuffle(vector, 0); + } + /// <summary> + /// __m128i _mm_set1_epi32 (int a) + /// HELPER + /// </summary> + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128<uint> SetAllVector128(uint value) + { + // TODO-CQ Optimize algorithm choice based on benchmarks + + // Zero vector and load value et index 0 + Vector128<uint> vector = ConvertScalarToVector128UInt32(value); + // Create { vl vl vl vl } and return result + return Shuffle(vector, 0); + } + /// <summary> + /// __m128i _mm_set1_epi64x (long long a) + /// HELPER + /// </summary> + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128<long> SetAllVector128(long value) + { + // TODO-CQ Optimize algorithm choice based on benchmarks + + // Zero vector and load value et index 0 + Vector128<long> vector = ConvertScalarToVector128Int64(value); + // Create { vl vl } and return result + return UnpackLow(vector, vector); + } + /// <summary> + /// __m128i _mm_set1_epi64x (long long a) + /// HELPER + /// </summary> + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128<ulong> SetAllVector128(ulong value) + { + // TODO-CQ Optimize algorithm choice based on benchmarks + + // Zero vector and load value et index 0 + Vector128<ulong> vector = ConvertScalarToVector128UInt64(value); + // Create { vl vl } and return result + return UnpackLow(vector, vector); + } + /// <summary> + /// __m128d _mm_set1_pd (double a) + /// HELPER + /// </summary> + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128<double> SetAllVector128(double value) + { + // TODO-CQ Optimize algorithm choice based on benchmarks + + // Zero vector and load value et index 0 + Vector128<double> vector = SetScalarVector128(value); + // Create { vl vl } and return result + return UnpackLow(vector, vector); + } + + /// <summary> + /// __m128d _mm_set_sd (double a) + /// HELPER + /// </summary> + public static Vector128<double> SetScalarVector128(double value) => SetScalarVector128(value); + + /// <summary> /// ___m128i _mm_set_epi8 (char e15, char e14, char e13, char e12, char e11, char e10, char e9, char e8, char e7, char e6, char e5, char e4, char e3, char e2, char e1, char e0) /// HELPER /// </summary> @@ -1159,58 +1302,6 @@ namespace System.Runtime.Intrinsics.X86 } /// <summary> - /// __m128d _mm_set_sd (double a) - /// HELPER - /// </summary> - public static Vector128<double> SetScalarVector128(double value) => SetScalarVector128(value); - - /// <summary> - /// __m128i _mm_set1_epi8 (char a) - /// HELPER - /// </summary> - public static Vector128<byte> SetAllVector128(byte value) => SetAllVector128(value); - /// <summary> - /// __m128i _mm_set1_epi8 (char a) - /// HELPER - /// </summary> - public static Vector128<sbyte> SetAllVector128(sbyte value) => SetAllVector128(value); - /// <summary> - /// __m128i _mm_set1_epi16 (short a) - /// HELPER - /// </summary> - public static Vector128<short> SetAllVector128(short value) => SetAllVector128(value); - /// <summary> - /// __m128i _mm_set1_epi16 (short a) - /// HELPER - /// </summary> - public static Vector128<ushort> SetAllVector128(ushort value) => SetAllVector128(value); - /// <summary> - /// __m128i _mm_set1_epi32 (int a) - /// HELPER - /// </summary> - public static Vector128<int> SetAllVector128(int value) => SetAllVector128(value); - /// <summary> - /// __m128i _mm_set1_epi32 (int a) - /// HELPER - /// </summary> - public static Vector128<uint> SetAllVector128(uint value) => SetAllVector128(value); - /// <summary> - /// __m128i _mm_set1_epi64x (long long a) - /// HELPER - /// </summary> - public static Vector128<long> SetAllVector128(long value) => SetAllVector128(value); - /// <summary> - /// __m128i _mm_set1_epi64x (long long a) - /// HELPER - /// </summary> - public static Vector128<ulong> SetAllVector128(ulong value) => SetAllVector128(value); - /// <summary> - /// __m128d _mm_set1_pd (double a) - /// HELPER - /// </summary> - public static Vector128<double> SetAllVector128(double value) => SetAllVector128(value); - - /// <summary> /// __m128i _mm_setzero_si128 () /// HELPER: PXOR /// __m128d _mm_setzero_pd (void) |