diff options
author | Carol Eidt <carol.eidt@microsoft.com> | 2018-03-14 14:56:50 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-03-14 14:56:50 -0700 |
commit | 81bffab55632d3404ab2e39775b09ad3ca191050 (patch) | |
tree | a9d067cb075908e76a28cbb4584ce6eeb7935209 | |
parent | 76be5e3928cbfa01e96086e532a6476f1acac293 (diff) | |
parent | bd55cc7f832b44500d9f8ee0ced8c205247436f9 (diff) | |
download | coreclr-81bffab55632d3404ab2e39775b09ad3ca191050.tar.gz coreclr-81bffab55632d3404ab2e39775b09ad3ca191050.tar.bz2 coreclr-81bffab55632d3404ab2e39775b09ad3ca191050.zip |
Merge pull request #16832 from dotnetrt/StoreNonTemporal
Implement SSE2 StoreNonTemporal HW intrinsic - complete SSE2 ISA
-rw-r--r-- | src/jit/emitxarch.cpp | 21 | ||||
-rw-r--r-- | src/jit/hwintrinsiccodegenxarch.cpp | 12 | ||||
-rw-r--r-- | src/jit/hwintrinsiclistxarch.h | 1 | ||||
-rw-r--r-- | src/jit/hwintrinsicxarch.cpp | 12 | ||||
-rw-r--r-- | src/jit/instrsxarch.h | 1 | ||||
-rw-r--r-- | tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreNonTemporal.cs | 171 | ||||
-rw-r--r-- | tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreNonTemporal_r.csproj | 34 | ||||
-rw-r--r-- | tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreNonTemporal_ro.csproj | 34 |
8 files changed, 279 insertions, 7 deletions
diff --git a/src/jit/emitxarch.cpp b/src/jit/emitxarch.cpp index 3f0b23dd79..4e1bec97fb 100644 --- a/src/jit/emitxarch.cpp +++ b/src/jit/emitxarch.cpp @@ -312,11 +312,12 @@ bool emitter::Is4ByteSSE4OrAVXInstruction(instruction ins) bool emitter::TakesVexPrefix(instruction ins) { // special case vzeroupper as it requires 2-byte VEX prefix - // special case the fencing and the prefetch instructions as they never take a VEX prefix + // special case the fencing, movnti and the prefetch instructions as they never take a VEX prefix switch (ins) { case INS_lfence: case INS_mfence: + case INS_movnti: case INS_prefetchnta: case INS_prefetcht0: case INS_prefetcht1: @@ -418,13 +419,21 @@ bool TakesRexWPrefix(instruction ins, emitAttr attr) if (IsSSEOrAVXInstruction(ins)) { - if (ins == INS_cvttsd2si || ins == INS_cvttss2si || ins == INS_cvtsd2si || ins == INS_cvtss2si || - ins == INS_cvtsi2sd || ins == INS_cvtsi2ss || ins == INS_mov_xmm2i || ins == INS_mov_i2xmm) + switch (ins) { - return true; + case INS_cvttsd2si: + case INS_cvttss2si: + case INS_cvtsd2si: + case INS_cvtss2si: + case INS_cvtsi2sd: + case INS_cvtsi2ss: + case INS_mov_xmm2i: + case INS_mov_i2xmm: + case INS_movnti: + return true; + default: + return false; } - - return false; } // TODO-XArch-Cleanup: Better way to not emit REX.W when we don't need it, than just testing all these diff --git a/src/jit/hwintrinsiccodegenxarch.cpp b/src/jit/hwintrinsiccodegenxarch.cpp index 58fbde8bef..873f1c6dec 100644 --- a/src/jit/hwintrinsiccodegenxarch.cpp +++ b/src/jit/hwintrinsiccodegenxarch.cpp @@ -1082,6 +1082,18 @@ void CodeGen::genSSE2Intrinsic(GenTreeHWIntrinsic* node) break; } + case NI_SSE2_StoreNonTemporal: + { + assert(baseType == TYP_INT || baseType == TYP_UINT || baseType == TYP_LONG || baseType == TYP_ULONG); + assert(op1 != nullptr); + assert(op2 != nullptr); + + op2Reg = op2->gtRegNum; + instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType); + emit->emitIns_AR_R(ins, emitTypeSize(baseType), op2Reg, op1Reg, 0); + break; + } + default: unreached(); break; diff --git a/src/jit/hwintrinsiclistxarch.h b/src/jit/hwintrinsiclistxarch.h index 88cfb97389..c8c611f56d 100644 --- a/src/jit/hwintrinsiclistxarch.h +++ b/src/jit/hwintrinsiclistxarch.h @@ -228,6 +228,7 @@ HARDWARE_INTRINSIC(SSE2_StoreAligned, "StoreAlign HARDWARE_INTRINSIC(SSE2_StoreAlignedNonTemporal, "StoreAlignedNonTemporal", SSE2, -1, 16, 2, {INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_invalid, INS_movntpd}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2_StoreHigh, "StoreHigh", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movhpd}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2_StoreLow, "StoreLow", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movq, INS_movq, INS_invalid, INS_movlpd}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(SSE2_StoreNonTemporal, "StoreNonTemporal", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movnti, INS_movnti, INS_movnti, INS_movnti, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoRMWSemantics|HW_Flag_SecondArgMaybe64Bit) HARDWARE_INTRINSIC(SSE2_StoreScalar, "StoreScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movsdsse2}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2_Subtract, "Subtract", SSE2, -1, 16, 2, {INS_psubb, INS_psubb, INS_psubw, INS_psubw, INS_psubd, INS_psubd, INS_psubq, INS_psubq, INS_invalid, INS_subpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(SSE2_SubtractSaturate, "SubtractSaturate", SSE2, -1, 16, 2, {INS_psubsb, INS_psubusb, INS_psubsw, INS_psubusw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) diff --git a/src/jit/hwintrinsicxarch.cpp b/src/jit/hwintrinsicxarch.cpp index fd228d71fc..56c7e99669 100644 --- a/src/jit/hwintrinsicxarch.cpp +++ b/src/jit/hwintrinsicxarch.cpp @@ -546,7 +546,6 @@ bool Compiler::isFullyImplmentedISAClass(InstructionSet isa) { switch (isa) { - case InstructionSet_SSE2: case InstructionSet_SSE42: case InstructionSet_AVX: case InstructionSet_AVX2: @@ -558,6 +557,7 @@ bool Compiler::isFullyImplmentedISAClass(InstructionSet isa) return false; case InstructionSet_SSE: + case InstructionSet_SSE2: case InstructionSet_SSE3: case InstructionSet_SSSE3: case InstructionSet_SSE41: @@ -1005,6 +1005,16 @@ GenTree* Compiler::impSSE2Intrinsic(NamedIntrinsic intrinsic, break; } + case NI_SSE2_StoreNonTemporal: + { + assert(sig->numArgs == 2); + assert(JITtype2varType(sig->retType) == TYP_VOID); + op2 = impPopStack().val; + op1 = impPopStack().val; + retNode = gtNewSimdHWIntrinsicNode(TYP_VOID, op1, op2, NI_SSE2_StoreNonTemporal, op2->TypeGet(), 0); + break; + } + default: JITDUMP("Not implemented hardware intrinsic"); break; diff --git a/src/jit/instrsxarch.h b/src/jit/instrsxarch.h index b5b88da982..c0cd91d6a2 100644 --- a/src/jit/instrsxarch.h +++ b/src/jit/instrsxarch.h @@ -196,6 +196,7 @@ INST3( cvttsd2si, "cvttsd2si" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE #ifndef LEGACY_BACKEND INST3( movntdq, "movntdq" , 0, IUM_WR, 0, 0, PCKDBL(0xE7), BAD_CODE, BAD_CODE) +INST3( movnti, "movnti" , 0, IUM_WR, 0, 0, PCKFLT(0xC3), BAD_CODE, BAD_CODE) INST3( movntpd, "movntpd" , 0, IUM_WR, 0, 0, PCKDBL(0x2B), BAD_CODE, BAD_CODE) INST3( movntps, "movntps" , 0, IUM_WR, 0, 0, PCKFLT(0x2B), BAD_CODE, BAD_CODE) INST3( movdqu, "movdqu" , 0, IUM_WR, 0, 0, SSEFLT(0x7F), BAD_CODE, SSEFLT(0x6F)) diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreNonTemporal.cs b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreNonTemporal.cs new file mode 100644 index 0000000000..76b468e12e --- /dev/null +++ b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreNonTemporal.cs @@ -0,0 +1,171 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. +// + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics.X86; +using System.Runtime.Intrinsics; + +namespace IntelHardwareIntrinsicTest +{ + class Program + { + const int Pass = 100; + const int Fail = 0; + + static unsafe int Main(string[] args) + { + int testResult = Pass; + + if (Sse2.IsSupported) + { + if (Environment.Is64BitProcess) + { + { + long* inArray = stackalloc long[2]; + inArray[0] = 0xffffffff01l; + long* outBuffer = stackalloc long[2]; + + Sse2.StoreNonTemporal(outBuffer, inArray[0]); + + for (var i = 0; i < 2; i++) + { + if (inArray[i] != outBuffer[i]) + { + Console.WriteLine("Sse2 StoreNonTemporal failed on long:"); + for (var n = 0; n < 2; n++) + { + Console.Write(outBuffer[n] + ", "); + } + Console.WriteLine(); + + testResult = Fail; + break; + } + } + } + + { + ulong* inArray = stackalloc ulong[2]; + inArray[0] = 0xffffffffff01ul; + ulong* outBuffer = stackalloc ulong[2]; + + Sse2.StoreNonTemporal(outBuffer, inArray[0]); + + for (var i = 0; i < 2; i++) + { + if (inArray[i] != outBuffer[i]) + { + Console.WriteLine("Sse2 StoreNonTemporal failed on ulong:"); + for (var n = 0; n < 2; n++) + { + Console.Write(outBuffer[n] + ", "); + } + Console.WriteLine(); + + testResult = Fail; + break; + } + } + } + } + else + { + try + { + long* inArray = stackalloc long[2]; + inArray[0] = 0xffffffff01l; + long* outBuffer = stackalloc long[2]; + + Sse2.StoreNonTemporal(outBuffer, inArray[0]); + testResult = Fail; + Console.WriteLine($"{nameof(Sse2)}.{nameof(Sse2.StoreNonTemporal)} failed on long: expected PlatformNotSupportedException exception."); + } + catch (PlatformNotSupportedException) + { + + } + catch(Exception ex) + { + testResult = Fail; + Console.WriteLine($"{nameof(Sse2)}.{nameof(Sse2.StoreNonTemporal)}-{ex} failed on long: expected PlatformNotSupportedException exception."); + } + + try + { + ulong* inArray = stackalloc ulong[2]; + inArray[0] = 0xffffffffff01ul; + ulong* outBuffer = stackalloc ulong[2]; + + Sse2.StoreNonTemporal(outBuffer, inArray[0]); + testResult = Fail; + Console.WriteLine($"{nameof(Sse2)}.{nameof(Sse2.StoreNonTemporal)} failed on ulong: expected PlatformNotSupportedException exception."); + } + catch (PlatformNotSupportedException) + { + + } + catch(Exception ex) + { + testResult = Fail; + Console.WriteLine($"{nameof(Sse2)}.{nameof(Sse2.StoreNonTemporal)}-{ex} failed on ulong: expected PlatformNotSupportedException exception."); + } + } + + { + int* inArray = stackalloc int[4]; + inArray[0] = -784561; + int* outBuffer = stackalloc int[4]; + + Sse2.StoreNonTemporal(outBuffer, inArray[0]); + + for (var i = 0; i < 4; i++) + { + if (inArray[i] != outBuffer[i]) + { + Console.WriteLine("Sse2 StoreNonTemporal failed on int:"); + for (var n = 0; n < 4; n++) + { + Console.Write(outBuffer[n] + ", "); + } + Console.WriteLine(); + + testResult = Fail; + break; + } + } + } + + { + uint* inArray = stackalloc uint[4]; + inArray[0] = 0xffffff02u; + uint* outBuffer = stackalloc uint[4]; + + Sse2.StoreNonTemporal(outBuffer, inArray[0]); + + for (var i = 0; i < 4; i++) + { + if (inArray[i] != outBuffer[i]) + { + Console.WriteLine("Sse2 StoreNonTemporal failed on uint:"); + for (var n = 0; n < 4; n++) + { + Console.Write(outBuffer[n] + ", "); + } + Console.WriteLine(); + + testResult = Fail; + break; + } + } + } + } + + return testResult; + } + } +} + diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreNonTemporal_r.csproj b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreNonTemporal_r.csproj new file mode 100644 index 0000000000..8ca2a261c6 --- /dev/null +++ b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreNonTemporal_r.csproj @@ -0,0 +1,34 @@ +<?xml version="1.0" encoding="utf-8"?> +<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> + <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.props))\dir.props" /> + <PropertyGroup> + <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration> + <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform> + <SchemaVersion>2.0</SchemaVersion> + <ProjectGuid>{95DFC527-4DC1-495E-97D7-E94EE1F7140D}</ProjectGuid> + <OutputType>Exe</OutputType> + <ProjectTypeGuids>{786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}</ProjectTypeGuids> + <SolutionDir Condition="$(SolutionDir) == '' Or $(SolutionDir) == '*Undefined*'">..\..\</SolutionDir> + <AllowUnsafeBlocks>true</AllowUnsafeBlocks> + </PropertyGroup> + <!-- Default configurations to help VS understand the configurations --> + <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' "></PropertyGroup> + <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' " /> + <ItemGroup> + <CodeAnalysisDependentAssemblyPaths Condition=" '$(VS100COMNTOOLS)' != '' " Include="$(VS100COMNTOOLS)..\IDE\PrivateAssemblies"> + <Visible>False</Visible> + </CodeAnalysisDependentAssemblyPaths> + </ItemGroup> + <PropertyGroup> + <DebugType>None</DebugType> + <Optimize></Optimize> + </PropertyGroup> + <ItemGroup> + <Service Include="{82A7F48D-3B50-4B1E-B82E-3ADA8210C358}" /> + </ItemGroup> + <ItemGroup> + <Compile Include="StoreNonTemporal.cs" /> + </ItemGroup> + <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.targets))\dir.targets" /> + <PropertyGroup Condition=" '$(MsBuildProjectDirOverride)' != '' "></PropertyGroup> +</Project>
\ No newline at end of file diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreNonTemporal_ro.csproj b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreNonTemporal_ro.csproj new file mode 100644 index 0000000000..4f00c2b7c5 --- /dev/null +++ b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreNonTemporal_ro.csproj @@ -0,0 +1,34 @@ +<?xml version="1.0" encoding="utf-8"?> +<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> + <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.props))\dir.props" /> + <PropertyGroup> + <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration> + <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform> + <SchemaVersion>2.0</SchemaVersion> + <ProjectGuid>{95DFC527-4DC1-495E-97D7-E94EE1F7140D}</ProjectGuid> + <OutputType>Exe</OutputType> + <ProjectTypeGuids>{786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}</ProjectTypeGuids> + <SolutionDir Condition="$(SolutionDir) == '' Or $(SolutionDir) == '*Undefined*'">..\..\</SolutionDir> + <AllowUnsafeBlocks>true</AllowUnsafeBlocks> + </PropertyGroup> + <!-- Default configurations to help VS understand the configurations --> + <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' "></PropertyGroup> + <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' " /> + <ItemGroup> + <CodeAnalysisDependentAssemblyPaths Condition=" '$(VS100COMNTOOLS)' != '' " Include="$(VS100COMNTOOLS)..\IDE\PrivateAssemblies"> + <Visible>False</Visible> + </CodeAnalysisDependentAssemblyPaths> + </ItemGroup> + <PropertyGroup> + <DebugType>None</DebugType> + <Optimize>True</Optimize> + </PropertyGroup> + <ItemGroup> + <Service Include="{82A7F48D-3B50-4B1E-B82E-3ADA8210C358}" /> + </ItemGroup> + <ItemGroup> + <Compile Include="StoreNonTemporal.cs" /> + </ItemGroup> + <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.targets))\dir.targets" /> + <PropertyGroup Condition=" '$(MsBuildProjectDirOverride)' != '' "></PropertyGroup> +</Project>
\ No newline at end of file |