summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCarol Eidt <carol.eidt@microsoft.com>2017-09-13 17:36:55 -0700
committerGitHub <noreply@github.com>2017-09-13 17:36:55 -0700
commitf386cd64979c591a53e0542e5dafbe82ed278da0 (patch)
treed32ade570d8c64e1d3fa6ec3bb0e72e1d00332b2
parent0d309876c9c5dd5a8e2d34341ba49fe89bf15404 (diff)
parent81bc72f1a9478cf8c99279614936776dfc80befc (diff)
downloadcoreclr-f386cd64979c591a53e0542e5dafbe82ed278da0.tar.gz
coreclr-f386cd64979c591a53e0542e5dafbe82ed278da0.tar.bz2
coreclr-f386cd64979c591a53e0542e5dafbe82ed278da0.zip
Merge pull request #13626 from mikedn/test-bt
Lower TEST(x, LSH(1, y)) to BT(x, y)
-rw-r--r--src/jit/codegenlinear.h1
-rw-r--r--src/jit/codegenxarch.cpp28
-rw-r--r--src/jit/emitxarch.cpp27
-rw-r--r--src/jit/gtlist.h5
-rw-r--r--src/jit/instrsxarch.h4
-rw-r--r--src/jit/lower.cpp49
-rw-r--r--src/jit/lsraxarch.cpp1
-rw-r--r--tests/src/JIT/Directed/BitTest/BitTest.cs120
-rw-r--r--tests/src/JIT/Directed/BitTest/BitTest.csproj24
9 files changed, 258 insertions, 1 deletions
diff --git a/src/jit/codegenlinear.h b/src/jit/codegenlinear.h
index 40f61bce93..f3f6e9d560 100644
--- a/src/jit/codegenlinear.h
+++ b/src/jit/codegenlinear.h
@@ -163,6 +163,7 @@ void genCodeForShiftLong(GenTreePtr tree);
#ifdef _TARGET_XARCH_
void genCodeForShiftRMW(GenTreeStoreInd* storeInd);
+void genCodeForBT(GenTreeOp* bt);
#endif // _TARGET_XARCH_
void genCodeForCast(GenTreeOp* tree);
diff --git a/src/jit/codegenxarch.cpp b/src/jit/codegenxarch.cpp
index 5128785c8d..8e772e2819 100644
--- a/src/jit/codegenxarch.cpp
+++ b/src/jit/codegenxarch.cpp
@@ -1475,6 +1475,30 @@ void CodeGen::genCodeForCompare(GenTreeOp* tree)
}
//------------------------------------------------------------------------
+// genCodeForBT: Generates code for a GT_BT node.
+//
+// Arguments:
+// tree - The node.
+//
+void CodeGen::genCodeForBT(GenTreeOp* bt)
+{
+ assert(bt->OperIs(GT_BT));
+
+ GenTree* op1 = bt->gtGetOp1();
+ GenTree* op2 = bt->gtGetOp2();
+ var_types type = genActualType(op1->TypeGet());
+
+ assert(op1->isUsedFromReg() && op2->isUsedFromReg());
+ assert((genTypeSize(type) >= genTypeSize(TYP_INT)) && (genTypeSize(type) <= genTypeSize(TYP_I_IMPL)));
+
+ genConsumeOperands(bt);
+ // Note that the emitter doesn't fully support INS_bt, it only supports the reg,reg
+ // form and encodes the registers in reverse order. To get the correct order we need
+ // to reverse the operands when calling emitIns_R_R.
+ getEmitter()->emitIns_R_R(INS_bt, emitTypeSize(type), op2->gtRegNum, op1->gtRegNum);
+}
+
+//------------------------------------------------------------------------
// genCodeForJumpTrue: Generates code for jmpTrue statement.
//
// Arguments:
@@ -1875,6 +1899,10 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
genCodeForSetcc(treeNode->AsCC());
break;
+ case GT_BT:
+ genCodeForBT(treeNode->AsOp());
+ break;
+
case GT_RETURNTRAP:
genCodeForReturnTrap(treeNode->AsOp());
break;
diff --git a/src/jit/emitxarch.cpp b/src/jit/emitxarch.cpp
index a24a778b2c..889e07e314 100644
--- a/src/jit/emitxarch.cpp
+++ b/src/jit/emitxarch.cpp
@@ -1903,6 +1903,9 @@ UNATIVE_OFFSET emitter::emitInsSizeAM(instrDesc* id, code_t code)
}
else if (code & 0x00FF0000)
{
+ // BT supports 16 bit operands and this code doesn't handle the necessary 66 prefix.
+ assert(ins != INS_bt);
+
assert((attrSize == EA_4BYTE) || (attrSize == EA_PTRSIZE) // Only for x64
|| (attrSize == EA_16BYTE) // only for x64
|| (ins == INS_movzx) || (ins == INS_movsx));
@@ -2079,6 +2082,11 @@ inline UNATIVE_OFFSET emitter::emitInsSizeAM(instrDesc* id, code_t code, int val
UNATIVE_OFFSET valSize = EA_SIZE_IN_BYTES(id->idOpSize());
bool valInByte = ((signed char)val == val) && (ins != INS_mov) && (ins != INS_test);
+ // We should never generate BT mem,reg because it has poor performance. BT mem,imm might be useful
+ // but it requires special handling of the immediate value (it is always encoded in a byte).
+ // Let's not complicate things until this is needed.
+ assert(ins != INS_bt);
+
#ifdef _TARGET_AMD64_
// mov reg, imm64 is the only opcode which takes a full 8 byte immediate
// all other opcodes take a sign-extended 4-byte immediate
@@ -3481,6 +3489,10 @@ void emitter::emitIns_R_I(instruction ins, emitAttr attr, regNumber reg, ssize_t
insFormat fmt = emitInsModeFormat(ins, IF_RRD_CNS);
bool valInByte = ((signed char)val == val) && (ins != INS_mov) && (ins != INS_test);
+ // BT reg,imm might be useful but it requires special handling of the immediate value
+ // (it is always encoded in a byte). Let's not complicate things until this is needed.
+ assert(ins != INS_bt);
+
// Figure out the size of the instruction
switch (ins)
{
@@ -7064,6 +7076,11 @@ void emitter::emitDispIns(
{
printf("%s, %s", emitRegName(id->idReg1(), EA_PTRSIZE), emitRegName(id->idReg2(), attr));
}
+ else if (ins == INS_bt)
+ {
+ // INS_bt operands are reversed. Display them in the normal order.
+ printf("%s, %s", emitRegName(id->idReg2(), attr), emitRegName(id->idReg1(), attr));
+ }
else
{
printf("%s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr));
@@ -7635,6 +7652,9 @@ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)
}
else if (code & 0x00FF0000)
{
+ // BT supports 16 bit operands and this code doesn't handle the necessary 66 prefix.
+ assert(ins != INS_bt);
+
// Output the REX prefix
dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
@@ -8186,6 +8206,9 @@ BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)
}
else if (code & 0x00FF0000)
{
+ // BT supports 16 bit operands and this code doesn't add the necessary 66 prefix.
+ assert(ins != INS_bt);
+
// Output the REX prefix
dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
@@ -9558,6 +9581,10 @@ BYTE* emitter::emitOutputRI(BYTE* dst, instrDesc* id)
ssize_t val = emitGetInsSC(id);
bool valInByte = ((signed char)val == val) && (ins != INS_mov) && (ins != INS_test);
+ // BT reg,imm might be useful but it requires special handling of the immediate value
+ // (it is always encoded in a byte). Let's not complicate things until this is needed.
+ assert(ins != INS_bt);
+
if (id->idIsCnsReloc())
{
valInByte = false; // relocs can't be placed in a byte
diff --git a/src/jit/gtlist.h b/src/jit/gtlist.h
index 544023391f..314660ff3d 100644
--- a/src/jit/gtlist.h
+++ b/src/jit/gtlist.h
@@ -225,7 +225,10 @@ GTNODE(JCC , GenTreeCC ,0,GTK_LEAF|GTK_NOVALUE) // Check
// by GenTreeCC::gtCondition is true.
GTNODE(SETCC , GenTreeCC ,0,GTK_LEAF) // Checks the condition flags and produces 1 if the condition specified
// by GenTreeCC::gtCondition is true and 0 otherwise.
-
+#ifdef _TARGET_XARCH_
+GTNODE(BT , GenTreeOp ,0,GTK_BINOP|GTK_NOVALUE) // The XARCH BT instruction. Like CMP, this sets the condition flags (CF
+ // to be precise) and does not produce a value.
+#endif
//-----------------------------------------------------------------------------
// Other nodes that look like unary/binary operators:
//-----------------------------------------------------------------------------
diff --git a/src/jit/instrsxarch.h b/src/jit/instrsxarch.h
index 095277064e..225539ae39 100644
--- a/src/jit/instrsxarch.h
+++ b/src/jit/instrsxarch.h
@@ -82,6 +82,10 @@ INST4(lea , "lea" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, 0x00008D, B
// enum name FP updmode rf wf R/M,R/M[reg] R/M,icon reg,R/M
+// Note that emitter has only partial support for BT. It can only emit the reg,reg form
+// and the registers need to be reversed to get the correct encoding.
+INST3(bt , "bt" , 0, IUM_RD, 0, 1, 0x0F00A3, BAD_CODE, 0x0F00A3)
+
INST3(movsx , "movsx" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, 0x0F00BE)
#ifdef _TARGET_AMD64_
INST3(movsxd , "movsxd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, 0x4800000063LL )
diff --git a/src/jit/lower.cpp b/src/jit/lower.cpp
index 86c1c5a348..5a8f423355 100644
--- a/src/jit/lower.cpp
+++ b/src/jit/lower.cpp
@@ -2135,6 +2135,7 @@ GenTree* Lowering::LowerTailCallViaHelper(GenTreeCall* call, GenTree* callTarget
// - Narrow operands to enable memory operand containment (XARCH specific).
// - Transform cmp(and(x, y), 0) into test(x, y) (XARCH/Arm64 specific but could
// be used for ARM as well if support for GT_TEST_EQ/GT_TEST_NE is added).
+// - Transform TEST(x, LSH(1, y)) into BT(x, y) (XARCH specific)
void Lowering::LowerCompare(GenTree* cmp)
{
@@ -2532,6 +2533,54 @@ void Lowering::LowerCompare(GenTree* cmp)
#endif // defined(_TARGET_XARCH_) || defined(_TARGET_ARM64_)
#ifdef _TARGET_XARCH_
+ if (cmp->OperIs(GT_TEST_EQ, GT_TEST_NE))
+ {
+ //
+ // Transform TEST_EQ|NE(x, LSH(1, y)) into BT(x, y) when possible. Using BT
+ // results in smaller and faster code. It also doesn't have special register
+ // requirements, unlike LSH that requires the shift count to be in ECX.
+ // Note that BT has the same behavior as LSH when the bit index exceeds the
+ // operand bit size - it uses (bit_index MOD bit_size).
+ //
+
+ GenTree* lsh = cmp->gtGetOp2();
+ LIR::Use cmpUse;
+
+ if (lsh->OperIs(GT_LSH) && varTypeIsIntOrI(lsh->TypeGet()) && lsh->gtGetOp1()->IsIntegralConst(1) &&
+ BlockRange().TryGetUse(cmp, &cmpUse))
+ {
+ genTreeOps condition = cmp->OperIs(GT_TEST_NE) ? GT_LT : GT_GE;
+
+ cmp->SetOper(GT_BT);
+ cmp->gtType = TYP_VOID;
+ cmp->gtFlags |= GTF_SET_FLAGS;
+ cmp->gtOp.gtOp2 = lsh->gtGetOp2();
+ cmp->gtGetOp2()->ClearContained();
+
+ BlockRange().Remove(lsh->gtGetOp1());
+ BlockRange().Remove(lsh);
+
+ GenTreeCC* cc;
+
+ if (cmpUse.User()->OperIs(GT_JTRUE))
+ {
+ cmpUse.User()->ChangeOper(GT_JCC);
+ cc = cmpUse.User()->AsCC();
+ cc->gtCondition = condition;
+ }
+ else
+ {
+ cc = new (comp, GT_SETCC) GenTreeCC(GT_SETCC, condition, TYP_INT);
+ BlockRange().InsertAfter(cmp, cc);
+ cmpUse.ReplaceWith(comp, cc);
+ }
+
+ cc->gtFlags |= GTF_USE_FLAGS | GTF_UNSIGNED;
+
+ return;
+ }
+ }
+
if (cmp->gtGetOp1()->TypeGet() == cmp->gtGetOp2()->TypeGet())
{
if (varTypeIsSmall(cmp->gtGetOp1()->TypeGet()) && varTypeIsUnsigned(cmp->gtGetOp1()->TypeGet()))
diff --git a/src/jit/lsraxarch.cpp b/src/jit/lsraxarch.cpp
index a42d8ec3fe..280e2f36f0 100644
--- a/src/jit/lsraxarch.cpp
+++ b/src/jit/lsraxarch.cpp
@@ -343,6 +343,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree)
case GT_AND:
case GT_OR:
case GT_XOR:
+ case GT_BT:
info->srcCount = GetOperandSourceCount(tree->gtOp.gtOp1);
info->srcCount += GetOperandSourceCount(tree->gtOp.gtOp2);
break;
diff --git a/tests/src/JIT/Directed/BitTest/BitTest.cs b/tests/src/JIT/Directed/BitTest/BitTest.cs
new file mode 100644
index 0000000000..7cb6a2568d
--- /dev/null
+++ b/tests/src/JIT/Directed/BitTest/BitTest.cs
@@ -0,0 +1,120 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+//
+
+using System;
+using System.Runtime.CompilerServices;
+
+class Program
+{
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ static bool I1_BT_reg_reg(sbyte x, int y) => (x & (1 << y)) != 0;
+
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ static bool I1_BT_mem_reg(ref sbyte x, int y) => (x & (1 << y)) != 0;
+
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ static bool I2_BT_reg_reg(short x, int y) => (x & (1 << y)) != 0;
+
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ static bool I2_BT_mem_reg(ref short x, int y) => (x & (1 << y)) != 0;
+
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ static bool I4_BT_reg_reg(int x, int y) => (x & (1 << y)) != 0;
+
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ static bool I4_BT_reg_reg_EQ(int x, int y) => (x & (1 << y)) == 0;
+
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ static int I4_BT_reg_reg_JCC(int x, int y) => (x & (1 << y)) == 0 ? (x + 1) : (x - 1);
+
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ static bool I4_BT_mem_reg(ref int x, int y) => (x & (1 << y)) != 0;
+
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ static bool I8_BT_reg_reg(long x, int y) => (x & (1L << y)) != 0;
+
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ static bool I8_BT_mem_reg(ref long x, int y) => (x & (1L << y)) != 0;
+
+ static int Main()
+ {
+ sbyte i1min = sbyte.MinValue;
+ sbyte i1one = 1;
+ sbyte i1two = 2;
+ short i2min = short.MinValue;
+ short i2one = 1;
+ short i2two = 2;
+ int i4one = 1;
+ int i4two = 2;
+ long i8one = 1;
+ long i8two = 2;
+ bool pass = true;
+
+ pass &= I1_BT_reg_reg(i1min, 7);
+ pass &= I1_BT_reg_reg(i1min, 8);
+ pass &= I1_BT_reg_reg(i1one, 0);
+ pass &= !I1_BT_reg_reg(i1one, 8);
+ pass &= I1_BT_reg_reg(i1one, 32);
+ pass &= !I1_BT_reg_reg(i1two, 0);
+
+ pass &= I1_BT_mem_reg(ref i1min, 7);
+ pass &= I1_BT_mem_reg(ref i1min, 8);
+ pass &= I1_BT_mem_reg(ref i1one, 0);
+ pass &= !I1_BT_mem_reg(ref i1one, 8);
+ pass &= I1_BT_mem_reg(ref i1one, 32);
+ pass &= !I1_BT_mem_reg(ref i1two, 0);
+
+ pass &= I2_BT_reg_reg(i2min, 15);
+ pass &= I2_BT_reg_reg(i2min, 16);
+ pass &= I2_BT_reg_reg(i2one, 0);
+ pass &= !I2_BT_reg_reg(i2one, 16);
+ pass &= I2_BT_reg_reg(i2one, 32);
+ pass &= !I2_BT_reg_reg(i2two, 0);
+
+ pass &= I2_BT_mem_reg(ref i2min, 15);
+ pass &= I2_BT_mem_reg(ref i2min, 16);
+ pass &= I2_BT_mem_reg(ref i2one, 0);
+ pass &= !I2_BT_mem_reg(ref i2one, 16);
+ pass &= I2_BT_mem_reg(ref i2one, 32);
+ pass &= !I2_BT_mem_reg(ref i2two, 0);
+
+ pass &= I4_BT_reg_reg(i4one, 0);
+ pass &= I4_BT_reg_reg(i4one, 32);
+ pass &= !I4_BT_reg_reg(i4two, 0);
+
+ pass &= !I4_BT_reg_reg_EQ(i4one, 0);
+ pass &= !I4_BT_reg_reg_EQ(i4one, 32);
+ pass &= I4_BT_reg_reg_EQ(i4two, 0);
+
+ pass &= I4_BT_reg_reg_JCC(i4one, 0) == 0;
+ pass &= I4_BT_reg_reg_JCC(i4one, 32) == 0;
+ pass &= I4_BT_reg_reg_JCC(i4two, 0) == 3;
+
+ pass &= I4_BT_mem_reg(ref i4one, 0);
+ pass &= I4_BT_mem_reg(ref i4one, 32);
+ pass &= !I4_BT_mem_reg(ref i4two, 0);
+
+ pass &= I8_BT_reg_reg(i8one, 0);
+ pass &= !I8_BT_reg_reg(i8one, 32);
+ pass &= I8_BT_reg_reg(i8one, 64);
+ pass &= !I8_BT_reg_reg(i8two, 0);
+
+ pass &= I8_BT_mem_reg(ref i8one, 0);
+ pass &= !I8_BT_mem_reg(ref i8one, 32);
+ pass &= I8_BT_mem_reg(ref i8one, 64);
+ pass &= !I8_BT_mem_reg(ref i8two, 0);
+
+ if (pass)
+ {
+ Console.WriteLine("PASSED");
+ return 100;
+ }
+ else
+ {
+ Console.WriteLine("FAILED");
+ return 1;
+ }
+ }
+}
diff --git a/tests/src/JIT/Directed/BitTest/BitTest.csproj b/tests/src/JIT/Directed/BitTest/BitTest.csproj
new file mode 100644
index 0000000000..89f824d7a2
--- /dev/null
+++ b/tests/src/JIT/Directed/BitTest/BitTest.csproj
@@ -0,0 +1,24 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.props))\dir.props" />
+ <PropertyGroup>
+ <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+ <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+ <AssemblyName>$(MSBuildProjectName)</AssemblyName>
+ <ProjectGuid>{A62D095E-4206-4D11-8762-11DDD63E931E}</ProjectGuid>
+ <OutputType>Exe</OutputType>
+ <SolutionDir Condition="$(SolutionDir) == '' Or $(SolutionDir) == '*Undefined*'">..\..\</SolutionDir>
+ <CLRTestPriority>1</CLRTestPriority>
+ </PropertyGroup>
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' "></PropertyGroup>
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' "></PropertyGroup>
+ <PropertyGroup>
+ <DebugType>None</DebugType>
+ <Optimize>True</Optimize>
+ </PropertyGroup>
+ <ItemGroup>
+ <Compile Include="BitTest.cs" />
+ </ItemGroup>
+ <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.targets))\dir.targets" />
+ <PropertyGroup Condition=" '$(MsBuildProjectDirOverride)' != '' "></PropertyGroup>
+</Project> \ No newline at end of file