summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCarol Eidt <carol.eidt@microsoft.com>2018-03-14 14:56:50 -0700
committerGitHub <noreply@github.com>2018-03-14 14:56:50 -0700
commit81bffab55632d3404ab2e39775b09ad3ca191050 (patch)
treea9d067cb075908e76a28cbb4584ce6eeb7935209
parent76be5e3928cbfa01e96086e532a6476f1acac293 (diff)
parentbd55cc7f832b44500d9f8ee0ced8c205247436f9 (diff)
downloadcoreclr-81bffab55632d3404ab2e39775b09ad3ca191050.tar.gz
coreclr-81bffab55632d3404ab2e39775b09ad3ca191050.tar.bz2
coreclr-81bffab55632d3404ab2e39775b09ad3ca191050.zip
Merge pull request #16832 from dotnetrt/StoreNonTemporal
Implement SSE2 StoreNonTemporal HW intrinsic - complete SSE2 ISA
-rw-r--r--src/jit/emitxarch.cpp21
-rw-r--r--src/jit/hwintrinsiccodegenxarch.cpp12
-rw-r--r--src/jit/hwintrinsiclistxarch.h1
-rw-r--r--src/jit/hwintrinsicxarch.cpp12
-rw-r--r--src/jit/instrsxarch.h1
-rw-r--r--tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreNonTemporal.cs171
-rw-r--r--tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreNonTemporal_r.csproj34
-rw-r--r--tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreNonTemporal_ro.csproj34
8 files changed, 279 insertions, 7 deletions
diff --git a/src/jit/emitxarch.cpp b/src/jit/emitxarch.cpp
index 3f0b23dd79..4e1bec97fb 100644
--- a/src/jit/emitxarch.cpp
+++ b/src/jit/emitxarch.cpp
@@ -312,11 +312,12 @@ bool emitter::Is4ByteSSE4OrAVXInstruction(instruction ins)
bool emitter::TakesVexPrefix(instruction ins)
{
// special case vzeroupper as it requires 2-byte VEX prefix
- // special case the fencing and the prefetch instructions as they never take a VEX prefix
+ // special case the fencing, movnti and the prefetch instructions as they never take a VEX prefix
switch (ins)
{
case INS_lfence:
case INS_mfence:
+ case INS_movnti:
case INS_prefetchnta:
case INS_prefetcht0:
case INS_prefetcht1:
@@ -418,13 +419,21 @@ bool TakesRexWPrefix(instruction ins, emitAttr attr)
if (IsSSEOrAVXInstruction(ins))
{
- if (ins == INS_cvttsd2si || ins == INS_cvttss2si || ins == INS_cvtsd2si || ins == INS_cvtss2si ||
- ins == INS_cvtsi2sd || ins == INS_cvtsi2ss || ins == INS_mov_xmm2i || ins == INS_mov_i2xmm)
+ switch (ins)
{
- return true;
+ case INS_cvttsd2si:
+ case INS_cvttss2si:
+ case INS_cvtsd2si:
+ case INS_cvtss2si:
+ case INS_cvtsi2sd:
+ case INS_cvtsi2ss:
+ case INS_mov_xmm2i:
+ case INS_mov_i2xmm:
+ case INS_movnti:
+ return true;
+ default:
+ return false;
}
-
- return false;
}
// TODO-XArch-Cleanup: Better way to not emit REX.W when we don't need it, than just testing all these
diff --git a/src/jit/hwintrinsiccodegenxarch.cpp b/src/jit/hwintrinsiccodegenxarch.cpp
index 58fbde8bef..873f1c6dec 100644
--- a/src/jit/hwintrinsiccodegenxarch.cpp
+++ b/src/jit/hwintrinsiccodegenxarch.cpp
@@ -1082,6 +1082,18 @@ void CodeGen::genSSE2Intrinsic(GenTreeHWIntrinsic* node)
break;
}
+ case NI_SSE2_StoreNonTemporal:
+ {
+ assert(baseType == TYP_INT || baseType == TYP_UINT || baseType == TYP_LONG || baseType == TYP_ULONG);
+ assert(op1 != nullptr);
+ assert(op2 != nullptr);
+
+ op2Reg = op2->gtRegNum;
+ instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
+ emit->emitIns_AR_R(ins, emitTypeSize(baseType), op2Reg, op1Reg, 0);
+ break;
+ }
+
default:
unreached();
break;
diff --git a/src/jit/hwintrinsiclistxarch.h b/src/jit/hwintrinsiclistxarch.h
index 88cfb97389..c8c611f56d 100644
--- a/src/jit/hwintrinsiclistxarch.h
+++ b/src/jit/hwintrinsiclistxarch.h
@@ -228,6 +228,7 @@ HARDWARE_INTRINSIC(SSE2_StoreAligned, "StoreAlign
HARDWARE_INTRINSIC(SSE2_StoreAlignedNonTemporal, "StoreAlignedNonTemporal", SSE2, -1, 16, 2, {INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_invalid, INS_movntpd}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2_StoreHigh, "StoreHigh", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movhpd}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2_StoreLow, "StoreLow", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movq, INS_movq, INS_invalid, INS_movlpd}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_StoreNonTemporal, "StoreNonTemporal", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movnti, INS_movnti, INS_movnti, INS_movnti, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoRMWSemantics|HW_Flag_SecondArgMaybe64Bit)
HARDWARE_INTRINSIC(SSE2_StoreScalar, "StoreScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movsdsse2}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2_Subtract, "Subtract", SSE2, -1, 16, 2, {INS_psubb, INS_psubb, INS_psubw, INS_psubw, INS_psubd, INS_psubd, INS_psubq, INS_psubq, INS_invalid, INS_subpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE2_SubtractSaturate, "SubtractSaturate", SSE2, -1, 16, 2, {INS_psubsb, INS_psubusb, INS_psubsw, INS_psubusw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
diff --git a/src/jit/hwintrinsicxarch.cpp b/src/jit/hwintrinsicxarch.cpp
index fd228d71fc..56c7e99669 100644
--- a/src/jit/hwintrinsicxarch.cpp
+++ b/src/jit/hwintrinsicxarch.cpp
@@ -546,7 +546,6 @@ bool Compiler::isFullyImplmentedISAClass(InstructionSet isa)
{
switch (isa)
{
- case InstructionSet_SSE2:
case InstructionSet_SSE42:
case InstructionSet_AVX:
case InstructionSet_AVX2:
@@ -558,6 +557,7 @@ bool Compiler::isFullyImplmentedISAClass(InstructionSet isa)
return false;
case InstructionSet_SSE:
+ case InstructionSet_SSE2:
case InstructionSet_SSE3:
case InstructionSet_SSSE3:
case InstructionSet_SSE41:
@@ -1005,6 +1005,16 @@ GenTree* Compiler::impSSE2Intrinsic(NamedIntrinsic intrinsic,
break;
}
+ case NI_SSE2_StoreNonTemporal:
+ {
+ assert(sig->numArgs == 2);
+ assert(JITtype2varType(sig->retType) == TYP_VOID);
+ op2 = impPopStack().val;
+ op1 = impPopStack().val;
+ retNode = gtNewSimdHWIntrinsicNode(TYP_VOID, op1, op2, NI_SSE2_StoreNonTemporal, op2->TypeGet(), 0);
+ break;
+ }
+
default:
JITDUMP("Not implemented hardware intrinsic");
break;
diff --git a/src/jit/instrsxarch.h b/src/jit/instrsxarch.h
index b5b88da982..c0cd91d6a2 100644
--- a/src/jit/instrsxarch.h
+++ b/src/jit/instrsxarch.h
@@ -196,6 +196,7 @@ INST3( cvttsd2si, "cvttsd2si" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE
#ifndef LEGACY_BACKEND
INST3( movntdq, "movntdq" , 0, IUM_WR, 0, 0, PCKDBL(0xE7), BAD_CODE, BAD_CODE)
+INST3( movnti, "movnti" , 0, IUM_WR, 0, 0, PCKFLT(0xC3), BAD_CODE, BAD_CODE)
INST3( movntpd, "movntpd" , 0, IUM_WR, 0, 0, PCKDBL(0x2B), BAD_CODE, BAD_CODE)
INST3( movntps, "movntps" , 0, IUM_WR, 0, 0, PCKFLT(0x2B), BAD_CODE, BAD_CODE)
INST3( movdqu, "movdqu" , 0, IUM_WR, 0, 0, SSEFLT(0x7F), BAD_CODE, SSEFLT(0x6F))
diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreNonTemporal.cs b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreNonTemporal.cs
new file mode 100644
index 0000000000..76b468e12e
--- /dev/null
+++ b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreNonTemporal.cs
@@ -0,0 +1,171 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+//
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics.X86;
+using System.Runtime.Intrinsics;
+
+namespace IntelHardwareIntrinsicTest
+{
+ class Program
+ {
+ const int Pass = 100;
+ const int Fail = 0;
+
+ static unsafe int Main(string[] args)
+ {
+ int testResult = Pass;
+
+ if (Sse2.IsSupported)
+ {
+ if (Environment.Is64BitProcess)
+ {
+ {
+ long* inArray = stackalloc long[2];
+ inArray[0] = 0xffffffff01l;
+ long* outBuffer = stackalloc long[2];
+
+ Sse2.StoreNonTemporal(outBuffer, inArray[0]);
+
+ for (var i = 0; i < 2; i++)
+ {
+ if (inArray[i] != outBuffer[i])
+ {
+ Console.WriteLine("Sse2 StoreNonTemporal failed on long:");
+ for (var n = 0; n < 2; n++)
+ {
+ Console.Write(outBuffer[n] + ", ");
+ }
+ Console.WriteLine();
+
+ testResult = Fail;
+ break;
+ }
+ }
+ }
+
+ {
+ ulong* inArray = stackalloc ulong[2];
+ inArray[0] = 0xffffffffff01ul;
+ ulong* outBuffer = stackalloc ulong[2];
+
+ Sse2.StoreNonTemporal(outBuffer, inArray[0]);
+
+ for (var i = 0; i < 2; i++)
+ {
+ if (inArray[i] != outBuffer[i])
+ {
+ Console.WriteLine("Sse2 StoreNonTemporal failed on ulong:");
+ for (var n = 0; n < 2; n++)
+ {
+ Console.Write(outBuffer[n] + ", ");
+ }
+ Console.WriteLine();
+
+ testResult = Fail;
+ break;
+ }
+ }
+ }
+ }
+ else
+ {
+ try
+ {
+ long* inArray = stackalloc long[2];
+ inArray[0] = 0xffffffff01l;
+ long* outBuffer = stackalloc long[2];
+
+ Sse2.StoreNonTemporal(outBuffer, inArray[0]);
+ testResult = Fail;
+ Console.WriteLine($"{nameof(Sse2)}.{nameof(Sse2.StoreNonTemporal)} failed on long: expected PlatformNotSupportedException exception.");
+ }
+ catch (PlatformNotSupportedException)
+ {
+
+ }
+ catch(Exception ex)
+ {
+ testResult = Fail;
+ Console.WriteLine($"{nameof(Sse2)}.{nameof(Sse2.StoreNonTemporal)}-{ex} failed on long: expected PlatformNotSupportedException exception.");
+ }
+
+ try
+ {
+ ulong* inArray = stackalloc ulong[2];
+ inArray[0] = 0xffffffffff01ul;
+ ulong* outBuffer = stackalloc ulong[2];
+
+ Sse2.StoreNonTemporal(outBuffer, inArray[0]);
+ testResult = Fail;
+ Console.WriteLine($"{nameof(Sse2)}.{nameof(Sse2.StoreNonTemporal)} failed on ulong: expected PlatformNotSupportedException exception.");
+ }
+ catch (PlatformNotSupportedException)
+ {
+
+ }
+ catch(Exception ex)
+ {
+ testResult = Fail;
+ Console.WriteLine($"{nameof(Sse2)}.{nameof(Sse2.StoreNonTemporal)}-{ex} failed on ulong: expected PlatformNotSupportedException exception.");
+ }
+ }
+
+ {
+ int* inArray = stackalloc int[4];
+ inArray[0] = -784561;
+ int* outBuffer = stackalloc int[4];
+
+ Sse2.StoreNonTemporal(outBuffer, inArray[0]);
+
+ for (var i = 0; i < 4; i++)
+ {
+ if (inArray[i] != outBuffer[i])
+ {
+ Console.WriteLine("Sse2 StoreNonTemporal failed on int:");
+ for (var n = 0; n < 4; n++)
+ {
+ Console.Write(outBuffer[n] + ", ");
+ }
+ Console.WriteLine();
+
+ testResult = Fail;
+ break;
+ }
+ }
+ }
+
+ {
+ uint* inArray = stackalloc uint[4];
+ inArray[0] = 0xffffff02u;
+ uint* outBuffer = stackalloc uint[4];
+
+ Sse2.StoreNonTemporal(outBuffer, inArray[0]);
+
+ for (var i = 0; i < 4; i++)
+ {
+ if (inArray[i] != outBuffer[i])
+ {
+ Console.WriteLine("Sse2 StoreNonTemporal failed on uint:");
+ for (var n = 0; n < 4; n++)
+ {
+ Console.Write(outBuffer[n] + ", ");
+ }
+ Console.WriteLine();
+
+ testResult = Fail;
+ break;
+ }
+ }
+ }
+ }
+
+ return testResult;
+ }
+ }
+}
+
diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreNonTemporal_r.csproj b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreNonTemporal_r.csproj
new file mode 100644
index 0000000000..8ca2a261c6
--- /dev/null
+++ b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreNonTemporal_r.csproj
@@ -0,0 +1,34 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.props))\dir.props" />
+ <PropertyGroup>
+ <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+ <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+ <SchemaVersion>2.0</SchemaVersion>
+ <ProjectGuid>{95DFC527-4DC1-495E-97D7-E94EE1F7140D}</ProjectGuid>
+ <OutputType>Exe</OutputType>
+ <ProjectTypeGuids>{786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}</ProjectTypeGuids>
+ <SolutionDir Condition="$(SolutionDir) == '' Or $(SolutionDir) == '*Undefined*'">..\..\</SolutionDir>
+ <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+ </PropertyGroup>
+ <!-- Default configurations to help VS understand the configurations -->
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' "></PropertyGroup>
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' " />
+ <ItemGroup>
+ <CodeAnalysisDependentAssemblyPaths Condition=" '$(VS100COMNTOOLS)' != '' " Include="$(VS100COMNTOOLS)..\IDE\PrivateAssemblies">
+ <Visible>False</Visible>
+ </CodeAnalysisDependentAssemblyPaths>
+ </ItemGroup>
+ <PropertyGroup>
+ <DebugType>None</DebugType>
+ <Optimize></Optimize>
+ </PropertyGroup>
+ <ItemGroup>
+ <Service Include="{82A7F48D-3B50-4B1E-B82E-3ADA8210C358}" />
+ </ItemGroup>
+ <ItemGroup>
+ <Compile Include="StoreNonTemporal.cs" />
+ </ItemGroup>
+ <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.targets))\dir.targets" />
+ <PropertyGroup Condition=" '$(MsBuildProjectDirOverride)' != '' "></PropertyGroup>
+</Project> \ No newline at end of file
diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreNonTemporal_ro.csproj b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreNonTemporal_ro.csproj
new file mode 100644
index 0000000000..4f00c2b7c5
--- /dev/null
+++ b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/StoreNonTemporal_ro.csproj
@@ -0,0 +1,34 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.props))\dir.props" />
+ <PropertyGroup>
+ <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+ <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+ <SchemaVersion>2.0</SchemaVersion>
+ <ProjectGuid>{95DFC527-4DC1-495E-97D7-E94EE1F7140D}</ProjectGuid>
+ <OutputType>Exe</OutputType>
+ <ProjectTypeGuids>{786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}</ProjectTypeGuids>
+ <SolutionDir Condition="$(SolutionDir) == '' Or $(SolutionDir) == '*Undefined*'">..\..\</SolutionDir>
+ <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+ </PropertyGroup>
+ <!-- Default configurations to help VS understand the configurations -->
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' "></PropertyGroup>
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' " />
+ <ItemGroup>
+ <CodeAnalysisDependentAssemblyPaths Condition=" '$(VS100COMNTOOLS)' != '' " Include="$(VS100COMNTOOLS)..\IDE\PrivateAssemblies">
+ <Visible>False</Visible>
+ </CodeAnalysisDependentAssemblyPaths>
+ </ItemGroup>
+ <PropertyGroup>
+ <DebugType>None</DebugType>
+ <Optimize>True</Optimize>
+ </PropertyGroup>
+ <ItemGroup>
+ <Service Include="{82A7F48D-3B50-4B1E-B82E-3ADA8210C358}" />
+ </ItemGroup>
+ <ItemGroup>
+ <Compile Include="StoreNonTemporal.cs" />
+ </ItemGroup>
+ <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.targets))\dir.targets" />
+ <PropertyGroup Condition=" '$(MsBuildProjectDirOverride)' != '' "></PropertyGroup>
+</Project> \ No newline at end of file