diff options
-rw-r--r-- | src/jit/gentree.h | 3 | ||||
-rw-r--r-- | src/jit/simd.cpp | 22 | ||||
-rw-r--r-- | src/jit/simd.h | 15 | ||||
-rw-r--r-- | src/jit/simdcodegenxarch.cpp | 58 | ||||
-rw-r--r-- | tests/src/JIT/Regression/JitBlue/GitHub_8220/GitHub_8220.cs | 170 | ||||
-rw-r--r-- | tests/src/JIT/Regression/JitBlue/GitHub_8220/GitHub_8220.csproj | 45 |
6 files changed, 292 insertions, 21 deletions
diff --git a/src/jit/gentree.h b/src/jit/gentree.h index 9be18b8bff..05a3a4a070 100644 --- a/src/jit/gentree.h +++ b/src/jit/gentree.h @@ -955,6 +955,9 @@ public: // struct fields constituting a single call argument. //---------------------------------------------------------------- +#define GTF_SIMD12_OP 0x80000000 // GT_SIMD -- Indicates that the operands need to be handled as SIMD12 + // even if they have been retyped as SIMD16. +//---------------------------------------------------------------- #define GTF_STMT_CMPADD 0x80000000 // GT_STMT -- added by compiler #define GTF_STMT_HAS_CSE 0x40000000 // GT_STMT -- CSE def or use was subsituted diff --git a/src/jit/simd.cpp b/src/jit/simd.cpp index 7b3896089f..39664c47bf 100644 --- a/src/jit/simd.cpp +++ b/src/jit/simd.cpp @@ -754,7 +754,7 @@ SIMDIntrinsicID Compiler::impSIMDLongRelOpEqual(CORINFO_CLASS_HANDLE typeHnd, // // Equality(v1, v2): // tmp = (v1 == v2) i.e. compare for equality as if v1 and v2 are vector<int> - // result = BitwiseAnd(t, shuffle(t, (2, 3, 1 0))) + // result = BitwiseAnd(t, shuffle(t, (2, 3, 0, 1))) // Shuffle is meant to swap the comparison results of low-32-bits and high 32-bits of respective long elements. // Compare vector<long> as if they were vector<int> and assign the result to a temp @@ -768,7 +768,7 @@ SIMDIntrinsicID Compiler::impSIMDLongRelOpEqual(CORINFO_CLASS_HANDLE typeHnd, // op2 = Shuffle(tmp, 0xB1) // IntrinsicId = BitwiseAnd *pOp1 = gtNewOperNode(GT_COMMA, simdType, asg, tmp); - *pOp2 = gtNewSIMDNode(simdType, gtNewLclvNode(lclNum, simdType), gtNewIconNode(SHUFFLE_ZWYX, TYP_INT), + *pOp2 = gtNewSIMDNode(simdType, gtNewLclvNode(lclNum, simdType), gtNewIconNode(SHUFFLE_ZWXY, TYP_INT), SIMDIntrinsicShuffleSSE2, TYP_INT, size); return SIMDIntrinsicBitwiseAnd; } @@ -2248,7 +2248,11 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode, assert(op2->TypeGet() == simdType); simdTree = gtNewSIMDNode(genActualType(callType), op1, op2, SIMDIntrinsicOpEquality, baseType, size); - retVal = simdTree; + if (simdType == TYP_SIMD12) + { + simdTree->gtFlags |= GTF_SIMD12_OP; + } + retVal = simdTree; } break; @@ -2259,7 +2263,11 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode, op2 = impSIMDPopStack(simdType); op1 = impSIMDPopStack(simdType, instMethod); simdTree = gtNewSIMDNode(genActualType(callType), op1, op2, SIMDIntrinsicOpInEquality, baseType, size); - retVal = simdTree; + if (simdType == TYP_SIMD12) + { + simdTree->gtFlags |= GTF_SIMD12_OP; + } + retVal = simdTree; } break; @@ -2407,7 +2415,11 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode, op1 = impSIMDPopStack(simdType, instMethod); simdTree = gtNewSIMDNode(baseType, op1, op2, simdIntrinsicID, baseType, size); - retVal = simdTree; + if (simdType == TYP_SIMD12) + { + simdTree->gtFlags |= GTF_SIMD12_OP; + } + retVal = simdTree; } break; diff --git a/src/jit/simd.h b/src/jit/simd.h index 807ccf90f1..c4a8866b07 100644 --- a/src/jit/simd.h +++ b/src/jit/simd.h @@ -32,11 +32,16 @@ struct SIMDIntrinsicInfo #ifdef _TARGET_XARCH_ // SSE2 Shuffle control byte to shuffle vector <W, Z, Y, X> // These correspond to shuffle immediate byte in shufps SSE2 instruction. -#define SHUFFLE_XXXX 0x00 -#define SHUFFLE_ZWYX 0xB1 -#define SHUFFLE_WWYY 0xF5 -#define SHUFFLE_ZZXX 0xA0 -#endif // _TARGET_XARCH_ +#define SHUFFLE_XXXX 0x00 // 00 00 00 00 +#define SHUFFLE_XXWW 0x0F // 00 00 11 11 +#define SHUFFLE_XYZW 0x1B // 00 01 10 11 +#define SHUFFLE_YXYX 0x44 // 01 00 01 00 +#define SHUFFLE_YYZZ 0x5A // 01 01 10 10 +#define SHUFFLE_ZXXY 0x81 // 10 00 00 01 +#define SHUFFLE_ZWXY 0xB1 // 10 11 00 01 +#define SHUFFLE_WWYY 0xF5 // 11 11 01 01 +#define SHUFFLE_ZZXX 0xA0 // 10 10 00 00 +#endif #endif // FEATURE_SIMD diff --git a/src/jit/simdcodegenxarch.cpp b/src/jit/simdcodegenxarch.cpp index effb68bb92..5641ad1681 100644 --- a/src/jit/simdcodegenxarch.cpp +++ b/src/jit/simdcodegenxarch.cpp @@ -1199,11 +1199,26 @@ void CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode) // "all ones" where the number of ones is given by the vector byte size, not by the // vector component count. So, for AVX registers we need to compare to 0xFFFFFFFF and // for SSE registers we need to compare to 0x0000FFFF. + // The SIMD12 case is handled specially, because we can't rely on the upper bytes being + // zero, so we must compare only the lower 3 floats (hence the byte mask of 0xFFF). // Note that -1 is used instead of 0xFFFFFFFF, on x64 emit doesn't correctly recognize // that 0xFFFFFFFF can be encoded in a single byte and emits the longer 3DFFFFFFFF // encoding instead of 83F8FF. - getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, intReg, - emitActualTypeSize(simdType) == 32 ? -1 : 0x0000FFFF); + ssize_t mask; + if ((simdNode->gtFlags & GTF_SIMD12_OP) != 0) + { + mask = 0x00000FFF; + getEmitter()->emitIns_R_I(INS_and, EA_4BYTE, intReg, mask); + } + else if (emitActualTypeSize(simdType) == 32) + { + mask = -1; + } + else + { + mask = 0x0000FFFF; + } + getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, intReg, mask); } if (targetReg != REG_NA) @@ -1343,15 +1358,34 @@ void CodeGen::genSIMDIntrinsicDotProduct(GenTreeSIMD* simdNode) // DotProduct(v1, v2) // Here v0 = targetReg, v1 = op1Reg, v2 = op2Reg and tmp = tmpReg1 - if (baseType == TYP_FLOAT) + if ((simdNode->gtFlags & GTF_SIMD12_OP) != 0) { + assert(baseType == TYP_FLOAT); // v0 = v1 * v2 // tmp = v0 // v0 = (3, 2, 1, 0) - each element is given by its // // position - // tmp = shuffle(tmp, tmp, Shuffle(2,3,0,1)) // tmp = (2, 3, 0, 1) + // tmp = shuffle(tmp, tmp, SHUFFLE_ZXXY) // tmp = (2, 0, 0, 1) - don't really care what's in upper + // // bits + // v0 = v0 + tmp // v0 = (3+2, 0+2, 1+0, 0+1) + // tmp = shuffle(tmp, tmp, SHUFFLE_XXWW) // tmp = ( 1, 1, 2, 2) + // v0 = v0 + tmp // v0 = (1+2+3, 0+1+2, 0+1+2, 0+1+2) + // + inst_RV_RV(INS_mulps, targetReg, op2Reg); + inst_RV_RV(INS_movaps, tmpReg1, targetReg); + inst_RV_RV_IV(INS_shufps, EA_16BYTE, tmpReg1, tmpReg1, SHUFFLE_ZXXY); + inst_RV_RV(INS_addps, targetReg, tmpReg1); + inst_RV_RV_IV(INS_shufps, EA_16BYTE, tmpReg1, tmpReg1, SHUFFLE_XXWW); + inst_RV_RV(INS_addps, targetReg, tmpReg1); + } + else if (baseType == TYP_FLOAT) + { + // v0 = v1 * v2 + // tmp = v0 // v0 = (3, 2, 1, 0) - each element is given by its + // // position + // tmp = shuffle(tmp, tmp, SHUFFLE_ZWXY) // tmp = (2, 3, 0, 1) // v0 = v0 + tmp // v0 = (3+2, 2+3, 1+0, 0+1) // tmp = v0 - // tmp = shuffle(tmp, tmp, Shuffle(0,1,2,3)) // tmp = (0+1, 1+0, 2+3, 3+2) + // tmp = shuffle(tmp, tmp, SHUFFLE_XYZW) // tmp = (0+1, 1+0, 2+3, 3+2) // v0 = v0 + tmp // v0 = (0+1+2+3, 0+1+2+3, 0+1+2+3, 0+1+2+3) // // Essentially horizontal addtion of all elements. // // We could achieve the same using SSEv3 instruction @@ -1359,10 +1393,10 @@ void CodeGen::genSIMDIntrinsicDotProduct(GenTreeSIMD* simdNode) // inst_RV_RV(INS_mulps, targetReg, op2Reg); inst_RV_RV(INS_movaps, tmpReg1, targetReg); - inst_RV_RV_IV(INS_shufps, EA_16BYTE, tmpReg1, tmpReg1, 0xb1); + inst_RV_RV_IV(INS_shufps, EA_16BYTE, tmpReg1, tmpReg1, SHUFFLE_ZWXY); inst_RV_RV(INS_addps, targetReg, tmpReg1); inst_RV_RV(INS_movaps, tmpReg1, targetReg); - inst_RV_RV_IV(INS_shufps, EA_16BYTE, tmpReg1, tmpReg1, 0x1b); + inst_RV_RV_IV(INS_shufps, EA_16BYTE, tmpReg1, tmpReg1, SHUFFLE_XYZW); inst_RV_RV(INS_addps, targetReg, tmpReg1); } else @@ -1406,8 +1440,10 @@ void CodeGen::genSIMDIntrinsicDotProduct(GenTreeSIMD* simdNode) emitAttr emitSize = emitActualTypeSize(simdEvalType); if (baseType == TYP_FLOAT) { - inst_RV_RV_IV(INS_dpps, emitSize, targetReg, op2Reg, 0xf1); - + // dpps computes the dot product of the upper & lower halves of the 32-byte register. + // Notice that if this is a TYP_SIMD16 or smaller on AVX, then we don't need a tmpReg. + unsigned mask = ((simdNode->gtFlags & GTF_SIMD12_OP) != 0) ? 0x71 : 0xf1; + inst_RV_RV_IV(INS_dpps, emitSize, targetReg, op2Reg, mask); // dpps computes the dot product of the upper & lower halves of the 32-byte register. // Notice that if this is a TYP_SIMD16 or smaller on AVX, then we don't need a tmpReg. // If this is TYP_SIMD32, we need to combine the lower & upper results. @@ -1993,7 +2029,7 @@ void CodeGen::genLoadIndTypeSIMD12(GenTree* treeNode) getEmitter()->emitIns_R_AR(ins_Load(TYP_DOUBLE), EA_8BYTE, targetReg, operandReg, 0); // combine upper 4 bytes and lower 8 bytes in targetReg - getEmitter()->emitIns_R_R_I(INS_shufps, emitActualTypeSize(TYP_SIMD16), targetReg, tmpReg, 0x44); + getEmitter()->emitIns_R_R_I(INS_shufps, emitActualTypeSize(TYP_SIMD16), targetReg, tmpReg, SHUFFLE_YXYX); genProduceReg(treeNode); } @@ -2076,7 +2112,7 @@ void CodeGen::genLoadLclTypeSIMD12(GenTree* treeNode) getEmitter()->emitIns_R_S(ins_Move_Extend(TYP_DOUBLE, false), EA_8BYTE, targetReg, varNum, offs); // combine upper 4 bytes and lower 8 bytes in targetReg - getEmitter()->emitIns_R_R_I(INS_shufps, emitActualTypeSize(TYP_SIMD16), targetReg, tmpReg, 0x44); + getEmitter()->emitIns_R_R_I(INS_shufps, emitActualTypeSize(TYP_SIMD16), targetReg, tmpReg, SHUFFLE_YXYX); genProduceReg(treeNode); } diff --git a/tests/src/JIT/Regression/JitBlue/GitHub_8220/GitHub_8220.cs b/tests/src/JIT/Regression/JitBlue/GitHub_8220/GitHub_8220.cs new file mode 100644 index 0000000000..f38d5ba698 --- /dev/null +++ b/tests/src/JIT/Regression/JitBlue/GitHub_8220/GitHub_8220.cs @@ -0,0 +1,170 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +// Regression test for Vector3 intrinsics using upper non-zero'd bits from +// a byref return. + +using System; +using System.Diagnostics; +using System.Runtime.CompilerServices; +using System.Numerics; + +namespace Test +{ + + public class Program + { + static Random random; + + static Program() + { + random = new Random(1); + } + + [MethodImpl( MethodImplOptions.NoInlining )] + public static double StackScribble() + { + double d1 = random.NextDouble(); + double d2 = random.NextDouble(); + double d3 = random.NextDouble(); + double d4 = random.NextDouble(); + double d5 = random.NextDouble(); + double d6 = random.NextDouble(); + double d7 = random.NextDouble(); + double d8 = random.NextDouble(); + double d9 = random.NextDouble(); + double d10 = random.NextDouble(); + double d11 = random.NextDouble(); + double d12 = random.NextDouble(); + double d13 = random.NextDouble(); + double d14 = random.NextDouble(); + double d15 = random.NextDouble(); + double d16 = random.NextDouble(); + double d17 = random.NextDouble(); + double d18 = random.NextDouble(); + double d19 = random.NextDouble(); + double d20 = random.NextDouble(); + double d21 = random.NextDouble(); + double d22 = random.NextDouble(); + double d23 = random.NextDouble(); + double d24 = random.NextDouble(); + double d25 = random.NextDouble(); + double d26 = random.NextDouble(); + double d27 = random.NextDouble(); + double d28 = random.NextDouble(); + double d29 = random.NextDouble(); + double d30 = random.NextDouble(); + double d31 = random.NextDouble(); + double d32 = random.NextDouble(); + double d33 = random.NextDouble(); + double d34 = random.NextDouble(); + double d35 = random.NextDouble(); + double d36 = random.NextDouble(); + double d37 = random.NextDouble(); + double d38 = random.NextDouble(); + double d39 = random.NextDouble(); + double d40 = random.NextDouble(); + return d1 + d2 + d3 + d4 + d5 + d6 + d7 + d8 + d9 + d10 + + d11 + d12 + d13 + d14 + d15 + d16 + d17 + d18 + d19 + d20 + + d21 + d22 + d23 + d24 + d25 + d26 + d27 + d28 + d29 + d20 + + d31 + d32 + d33 + d34 + d35 + d36 + d37 + d38 + d39 + d40; + } + + [MethodImpl( MethodImplOptions.NoInlining )] + public static Vector3 getTestValue(float f1, float f2, float f3) + { + return new Vector3(f1, f2, f3); + } + + public static bool Check(float value, float expectedValue) + { + // These may differ in the last place. + float expectedValueLow; + float expectedValueHigh; + + unsafe + { + UInt32 expectedValueUInt = *(UInt32*)&expectedValue; + UInt32 expectedValueUIntLow = (expectedValueUInt == 0) ? 0 : expectedValueUInt - 1; + UInt32 expectedValueUIntHigh = (expectedValueUInt == 0xffffffff) ? 0xffffffff : expectedValueUInt + 1; + expectedValueLow = *(float*)&expectedValueUIntLow; + expectedValueHigh = *(float*)&expectedValueUIntHigh; + } + float errorMargin = Math.Abs(expectedValueHigh - expectedValueLow); + if (Math.Abs(value - expectedValue) > errorMargin) + { + return false; + } + return true; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + public static int testDotProduct(Vector3 v0) + { + float f1 = (float)random.NextDouble(); + float f2 = (float)random.NextDouble(); + float f3 = (float)random.NextDouble(); + + Vector3 v1 = Vector3.Normalize(getTestValue(f1, f2, f3) - v0); + Vector3 v2 = new Vector3(f1, f2, f3) - v0; + v2 = v2 / v2.Length(); + + if (!Check(v1.X, v2.X) || !Check(v1.Y, v2.Y) || !Check(v1.Z, v2.Z)) + { + Console.WriteLine("Vectors do not match " + v1 + v2); + return -1; + } + + return 100; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + public static int testEquals(Vector3 v0) + { + float f1 = (float)random.NextDouble(); + float f2 = (float)random.NextDouble(); + float f3 = (float)random.NextDouble(); + + Vector3 v1 = new Vector3(f1, f2, f3) - v0; + bool result = v1.Equals(getTestValue(f1, f2, f3) - v0); + + if ((result == false) || !v1.Equals(getTestValue(f1, f2, f3) - v0)) + { + Console.WriteLine("Equals returns wrong value " + v1); + return -1; + } + + return 100; + } + + public static int Main() + { + int returnValue = 100; + Console.WriteLine("Testing Dot Product"); + for (int i = 0; i < 10; i++) + { + StackScribble(); + if (testDotProduct(new Vector3(1.0F, 2.0F, 3.0F)) != 100) + { + Console.WriteLine("Failed on iteration " + i); + returnValue = -1; + break; + } + } + Console.WriteLine("Testing Equals"); + for (int i = 0; i < 10; i++) + { + StackScribble(); + if (testEquals(new Vector3(1.0F, 2.0F, 3.0F)) != 100) + { + Console.WriteLine("Failed on iteration " + i); + returnValue = -1; + break; + } + } + return returnValue; + } + } +} + diff --git a/tests/src/JIT/Regression/JitBlue/GitHub_8220/GitHub_8220.csproj b/tests/src/JIT/Regression/JitBlue/GitHub_8220/GitHub_8220.csproj new file mode 100644 index 0000000000..939d0106ed --- /dev/null +++ b/tests/src/JIT/Regression/JitBlue/GitHub_8220/GitHub_8220.csproj @@ -0,0 +1,45 @@ +<?xml version="1.0" encoding="utf-8"?> +<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> + <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.props))\dir.props" /> + <PropertyGroup> + <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration> + <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform> + <AssemblyName>$(MSBuildProjectName)</AssemblyName> + <SchemaVersion>2.0</SchemaVersion> + <ProjectGuid>{76E69AA0-8C5A-4F76-8561-B8089FFA8D79}</ProjectGuid> + <OutputType>Exe</OutputType> + <ProjectTypeGuids>{786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}</ProjectTypeGuids> + <ReferencePath>$(ProgramFiles)\Common Files\microsoft shared\VSTT\11.0\UITestExtensionPackages</ReferencePath> + <SolutionDir Condition="$(SolutionDir) == '' Or $(SolutionDir) == '*Undefined*'">..\..\</SolutionDir> + + <NuGetPackageImportStamp>7a9bfb7d</NuGetPackageImportStamp> + </PropertyGroup> + <!-- Default configurations to help VS understand the configurations --> + <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' "> + </PropertyGroup> + <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' "> + </PropertyGroup> + <ItemGroup> + <CodeAnalysisDependentAssemblyPaths Condition=" '$(VS100COMNTOOLS)' != '' " Include="$(VS100COMNTOOLS)..\IDE\PrivateAssemblies"> + <Visible>False</Visible> + </CodeAnalysisDependentAssemblyPaths> + </ItemGroup> + <PropertyGroup> + <DebugType></DebugType> + <Optimize>True</Optimize> + <AllowUnsafeBlocks>True</AllowUnsafeBlocks> + </PropertyGroup> + <ItemGroup> + <Compile Include="$(MSBuildProjectName).cs" /> + </ItemGroup> + <ItemGroup> + <Service Include="{82A7F48D-3B50-4B1E-B82E-3ADA8210C358}" /> + </ItemGroup> + <PropertyGroup> + <ProjectJson>$(JitPackagesConfigFileDirectory)benchmark\project.json</ProjectJson> + <ProjectLockJson>$(JitPackagesConfigFileDirectory)benchmark\project.lock.json</ProjectLockJson> + </PropertyGroup> + <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.targets))\dir.targets" /> + <PropertyGroup Condition=" '$(MsBuildProjectDirOverride)' != '' "> + </PropertyGroup> +</Project> |