diff options
author | Carol Eidt <carol.eidt@microsoft.com> | 2017-09-13 17:36:55 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-09-13 17:36:55 -0700 |
commit | f386cd64979c591a53e0542e5dafbe82ed278da0 (patch) | |
tree | d32ade570d8c64e1d3fa6ec3bb0e72e1d00332b2 | |
parent | 0d309876c9c5dd5a8e2d34341ba49fe89bf15404 (diff) | |
parent | 81bc72f1a9478cf8c99279614936776dfc80befc (diff) | |
download | coreclr-f386cd64979c591a53e0542e5dafbe82ed278da0.tar.gz coreclr-f386cd64979c591a53e0542e5dafbe82ed278da0.tar.bz2 coreclr-f386cd64979c591a53e0542e5dafbe82ed278da0.zip |
Merge pull request #13626 from mikedn/test-bt
Lower TEST(x, LSH(1, y)) to BT(x, y)
-rw-r--r-- | src/jit/codegenlinear.h | 1 | ||||
-rw-r--r-- | src/jit/codegenxarch.cpp | 28 | ||||
-rw-r--r-- | src/jit/emitxarch.cpp | 27 | ||||
-rw-r--r-- | src/jit/gtlist.h | 5 | ||||
-rw-r--r-- | src/jit/instrsxarch.h | 4 | ||||
-rw-r--r-- | src/jit/lower.cpp | 49 | ||||
-rw-r--r-- | src/jit/lsraxarch.cpp | 1 | ||||
-rw-r--r-- | tests/src/JIT/Directed/BitTest/BitTest.cs | 120 | ||||
-rw-r--r-- | tests/src/JIT/Directed/BitTest/BitTest.csproj | 24 |
9 files changed, 258 insertions, 1 deletions
diff --git a/src/jit/codegenlinear.h b/src/jit/codegenlinear.h index 40f61bce93..f3f6e9d560 100644 --- a/src/jit/codegenlinear.h +++ b/src/jit/codegenlinear.h @@ -163,6 +163,7 @@ void genCodeForShiftLong(GenTreePtr tree); #ifdef _TARGET_XARCH_ void genCodeForShiftRMW(GenTreeStoreInd* storeInd); +void genCodeForBT(GenTreeOp* bt); #endif // _TARGET_XARCH_ void genCodeForCast(GenTreeOp* tree); diff --git a/src/jit/codegenxarch.cpp b/src/jit/codegenxarch.cpp index 5128785c8d..8e772e2819 100644 --- a/src/jit/codegenxarch.cpp +++ b/src/jit/codegenxarch.cpp @@ -1475,6 +1475,30 @@ void CodeGen::genCodeForCompare(GenTreeOp* tree) } //------------------------------------------------------------------------ +// genCodeForBT: Generates code for a GT_BT node. +// +// Arguments: +// tree - The node. +// +void CodeGen::genCodeForBT(GenTreeOp* bt) +{ + assert(bt->OperIs(GT_BT)); + + GenTree* op1 = bt->gtGetOp1(); + GenTree* op2 = bt->gtGetOp2(); + var_types type = genActualType(op1->TypeGet()); + + assert(op1->isUsedFromReg() && op2->isUsedFromReg()); + assert((genTypeSize(type) >= genTypeSize(TYP_INT)) && (genTypeSize(type) <= genTypeSize(TYP_I_IMPL))); + + genConsumeOperands(bt); + // Note that the emitter doesn't fully support INS_bt, it only supports the reg,reg + // form and encodes the registers in reverse order. To get the correct order we need + // to reverse the operands when calling emitIns_R_R. + getEmitter()->emitIns_R_R(INS_bt, emitTypeSize(type), op2->gtRegNum, op1->gtRegNum); +} + +//------------------------------------------------------------------------ // genCodeForJumpTrue: Generates code for jmpTrue statement. // // Arguments: @@ -1875,6 +1899,10 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode) genCodeForSetcc(treeNode->AsCC()); break; + case GT_BT: + genCodeForBT(treeNode->AsOp()); + break; + case GT_RETURNTRAP: genCodeForReturnTrap(treeNode->AsOp()); break; diff --git a/src/jit/emitxarch.cpp b/src/jit/emitxarch.cpp index a24a778b2c..889e07e314 100644 --- a/src/jit/emitxarch.cpp +++ b/src/jit/emitxarch.cpp @@ -1903,6 +1903,9 @@ UNATIVE_OFFSET emitter::emitInsSizeAM(instrDesc* id, code_t code) } else if (code & 0x00FF0000) { + // BT supports 16 bit operands and this code doesn't handle the necessary 66 prefix. + assert(ins != INS_bt); + assert((attrSize == EA_4BYTE) || (attrSize == EA_PTRSIZE) // Only for x64 || (attrSize == EA_16BYTE) // only for x64 || (ins == INS_movzx) || (ins == INS_movsx)); @@ -2079,6 +2082,11 @@ inline UNATIVE_OFFSET emitter::emitInsSizeAM(instrDesc* id, code_t code, int val UNATIVE_OFFSET valSize = EA_SIZE_IN_BYTES(id->idOpSize()); bool valInByte = ((signed char)val == val) && (ins != INS_mov) && (ins != INS_test); + // We should never generate BT mem,reg because it has poor performance. BT mem,imm might be useful + // but it requires special handling of the immediate value (it is always encoded in a byte). + // Let's not complicate things until this is needed. + assert(ins != INS_bt); + #ifdef _TARGET_AMD64_ // mov reg, imm64 is the only opcode which takes a full 8 byte immediate // all other opcodes take a sign-extended 4-byte immediate @@ -3481,6 +3489,10 @@ void emitter::emitIns_R_I(instruction ins, emitAttr attr, regNumber reg, ssize_t insFormat fmt = emitInsModeFormat(ins, IF_RRD_CNS); bool valInByte = ((signed char)val == val) && (ins != INS_mov) && (ins != INS_test); + // BT reg,imm might be useful but it requires special handling of the immediate value + // (it is always encoded in a byte). Let's not complicate things until this is needed. + assert(ins != INS_bt); + // Figure out the size of the instruction switch (ins) { @@ -7064,6 +7076,11 @@ void emitter::emitDispIns( { printf("%s, %s", emitRegName(id->idReg1(), EA_PTRSIZE), emitRegName(id->idReg2(), attr)); } + else if (ins == INS_bt) + { + // INS_bt operands are reversed. Display them in the normal order. + printf("%s, %s", emitRegName(id->idReg2(), attr), emitRegName(id->idReg1(), attr)); + } else { printf("%s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr)); @@ -7635,6 +7652,9 @@ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) } else if (code & 0x00FF0000) { + // BT supports 16 bit operands and this code doesn't handle the necessary 66 prefix. + assert(ins != INS_bt); + // Output the REX prefix dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code); @@ -8186,6 +8206,9 @@ BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) } else if (code & 0x00FF0000) { + // BT supports 16 bit operands and this code doesn't add the necessary 66 prefix. + assert(ins != INS_bt); + // Output the REX prefix dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code); @@ -9558,6 +9581,10 @@ BYTE* emitter::emitOutputRI(BYTE* dst, instrDesc* id) ssize_t val = emitGetInsSC(id); bool valInByte = ((signed char)val == val) && (ins != INS_mov) && (ins != INS_test); + // BT reg,imm might be useful but it requires special handling of the immediate value + // (it is always encoded in a byte). Let's not complicate things until this is needed. + assert(ins != INS_bt); + if (id->idIsCnsReloc()) { valInByte = false; // relocs can't be placed in a byte diff --git a/src/jit/gtlist.h b/src/jit/gtlist.h index 544023391f..314660ff3d 100644 --- a/src/jit/gtlist.h +++ b/src/jit/gtlist.h @@ -225,7 +225,10 @@ GTNODE(JCC , GenTreeCC ,0,GTK_LEAF|GTK_NOVALUE) // Check // by GenTreeCC::gtCondition is true. GTNODE(SETCC , GenTreeCC ,0,GTK_LEAF) // Checks the condition flags and produces 1 if the condition specified // by GenTreeCC::gtCondition is true and 0 otherwise. - +#ifdef _TARGET_XARCH_ +GTNODE(BT , GenTreeOp ,0,GTK_BINOP|GTK_NOVALUE) // The XARCH BT instruction. Like CMP, this sets the condition flags (CF + // to be precise) and does not produce a value. +#endif //----------------------------------------------------------------------------- // Other nodes that look like unary/binary operators: //----------------------------------------------------------------------------- diff --git a/src/jit/instrsxarch.h b/src/jit/instrsxarch.h index 095277064e..225539ae39 100644 --- a/src/jit/instrsxarch.h +++ b/src/jit/instrsxarch.h @@ -82,6 +82,10 @@ INST4(lea , "lea" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, 0x00008D, B // enum name FP updmode rf wf R/M,R/M[reg] R/M,icon reg,R/M +// Note that emitter has only partial support for BT. It can only emit the reg,reg form +// and the registers need to be reversed to get the correct encoding. +INST3(bt , "bt" , 0, IUM_RD, 0, 1, 0x0F00A3, BAD_CODE, 0x0F00A3) + INST3(movsx , "movsx" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, 0x0F00BE) #ifdef _TARGET_AMD64_ INST3(movsxd , "movsxd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, 0x4800000063LL ) diff --git a/src/jit/lower.cpp b/src/jit/lower.cpp index 86c1c5a348..5a8f423355 100644 --- a/src/jit/lower.cpp +++ b/src/jit/lower.cpp @@ -2135,6 +2135,7 @@ GenTree* Lowering::LowerTailCallViaHelper(GenTreeCall* call, GenTree* callTarget // - Narrow operands to enable memory operand containment (XARCH specific). // - Transform cmp(and(x, y), 0) into test(x, y) (XARCH/Arm64 specific but could // be used for ARM as well if support for GT_TEST_EQ/GT_TEST_NE is added). +// - Transform TEST(x, LSH(1, y)) into BT(x, y) (XARCH specific) void Lowering::LowerCompare(GenTree* cmp) { @@ -2532,6 +2533,54 @@ void Lowering::LowerCompare(GenTree* cmp) #endif // defined(_TARGET_XARCH_) || defined(_TARGET_ARM64_) #ifdef _TARGET_XARCH_ + if (cmp->OperIs(GT_TEST_EQ, GT_TEST_NE)) + { + // + // Transform TEST_EQ|NE(x, LSH(1, y)) into BT(x, y) when possible. Using BT + // results in smaller and faster code. It also doesn't have special register + // requirements, unlike LSH that requires the shift count to be in ECX. + // Note that BT has the same behavior as LSH when the bit index exceeds the + // operand bit size - it uses (bit_index MOD bit_size). + // + + GenTree* lsh = cmp->gtGetOp2(); + LIR::Use cmpUse; + + if (lsh->OperIs(GT_LSH) && varTypeIsIntOrI(lsh->TypeGet()) && lsh->gtGetOp1()->IsIntegralConst(1) && + BlockRange().TryGetUse(cmp, &cmpUse)) + { + genTreeOps condition = cmp->OperIs(GT_TEST_NE) ? GT_LT : GT_GE; + + cmp->SetOper(GT_BT); + cmp->gtType = TYP_VOID; + cmp->gtFlags |= GTF_SET_FLAGS; + cmp->gtOp.gtOp2 = lsh->gtGetOp2(); + cmp->gtGetOp2()->ClearContained(); + + BlockRange().Remove(lsh->gtGetOp1()); + BlockRange().Remove(lsh); + + GenTreeCC* cc; + + if (cmpUse.User()->OperIs(GT_JTRUE)) + { + cmpUse.User()->ChangeOper(GT_JCC); + cc = cmpUse.User()->AsCC(); + cc->gtCondition = condition; + } + else + { + cc = new (comp, GT_SETCC) GenTreeCC(GT_SETCC, condition, TYP_INT); + BlockRange().InsertAfter(cmp, cc); + cmpUse.ReplaceWith(comp, cc); + } + + cc->gtFlags |= GTF_USE_FLAGS | GTF_UNSIGNED; + + return; + } + } + if (cmp->gtGetOp1()->TypeGet() == cmp->gtGetOp2()->TypeGet()) { if (varTypeIsSmall(cmp->gtGetOp1()->TypeGet()) && varTypeIsUnsigned(cmp->gtGetOp1()->TypeGet())) diff --git a/src/jit/lsraxarch.cpp b/src/jit/lsraxarch.cpp index a42d8ec3fe..280e2f36f0 100644 --- a/src/jit/lsraxarch.cpp +++ b/src/jit/lsraxarch.cpp @@ -343,6 +343,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) case GT_AND: case GT_OR: case GT_XOR: + case GT_BT: info->srcCount = GetOperandSourceCount(tree->gtOp.gtOp1); info->srcCount += GetOperandSourceCount(tree->gtOp.gtOp2); break; diff --git a/tests/src/JIT/Directed/BitTest/BitTest.cs b/tests/src/JIT/Directed/BitTest/BitTest.cs new file mode 100644 index 0000000000..7cb6a2568d --- /dev/null +++ b/tests/src/JIT/Directed/BitTest/BitTest.cs @@ -0,0 +1,120 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. +// + +using System; +using System.Runtime.CompilerServices; + +class Program +{ + [MethodImpl(MethodImplOptions.NoInlining)] + static bool I1_BT_reg_reg(sbyte x, int y) => (x & (1 << y)) != 0; + + [MethodImpl(MethodImplOptions.NoInlining)] + static bool I1_BT_mem_reg(ref sbyte x, int y) => (x & (1 << y)) != 0; + + [MethodImpl(MethodImplOptions.NoInlining)] + static bool I2_BT_reg_reg(short x, int y) => (x & (1 << y)) != 0; + + [MethodImpl(MethodImplOptions.NoInlining)] + static bool I2_BT_mem_reg(ref short x, int y) => (x & (1 << y)) != 0; + + [MethodImpl(MethodImplOptions.NoInlining)] + static bool I4_BT_reg_reg(int x, int y) => (x & (1 << y)) != 0; + + [MethodImpl(MethodImplOptions.NoInlining)] + static bool I4_BT_reg_reg_EQ(int x, int y) => (x & (1 << y)) == 0; + + [MethodImpl(MethodImplOptions.NoInlining)] + static int I4_BT_reg_reg_JCC(int x, int y) => (x & (1 << y)) == 0 ? (x + 1) : (x - 1); + + [MethodImpl(MethodImplOptions.NoInlining)] + static bool I4_BT_mem_reg(ref int x, int y) => (x & (1 << y)) != 0; + + [MethodImpl(MethodImplOptions.NoInlining)] + static bool I8_BT_reg_reg(long x, int y) => (x & (1L << y)) != 0; + + [MethodImpl(MethodImplOptions.NoInlining)] + static bool I8_BT_mem_reg(ref long x, int y) => (x & (1L << y)) != 0; + + static int Main() + { + sbyte i1min = sbyte.MinValue; + sbyte i1one = 1; + sbyte i1two = 2; + short i2min = short.MinValue; + short i2one = 1; + short i2two = 2; + int i4one = 1; + int i4two = 2; + long i8one = 1; + long i8two = 2; + bool pass = true; + + pass &= I1_BT_reg_reg(i1min, 7); + pass &= I1_BT_reg_reg(i1min, 8); + pass &= I1_BT_reg_reg(i1one, 0); + pass &= !I1_BT_reg_reg(i1one, 8); + pass &= I1_BT_reg_reg(i1one, 32); + pass &= !I1_BT_reg_reg(i1two, 0); + + pass &= I1_BT_mem_reg(ref i1min, 7); + pass &= I1_BT_mem_reg(ref i1min, 8); + pass &= I1_BT_mem_reg(ref i1one, 0); + pass &= !I1_BT_mem_reg(ref i1one, 8); + pass &= I1_BT_mem_reg(ref i1one, 32); + pass &= !I1_BT_mem_reg(ref i1two, 0); + + pass &= I2_BT_reg_reg(i2min, 15); + pass &= I2_BT_reg_reg(i2min, 16); + pass &= I2_BT_reg_reg(i2one, 0); + pass &= !I2_BT_reg_reg(i2one, 16); + pass &= I2_BT_reg_reg(i2one, 32); + pass &= !I2_BT_reg_reg(i2two, 0); + + pass &= I2_BT_mem_reg(ref i2min, 15); + pass &= I2_BT_mem_reg(ref i2min, 16); + pass &= I2_BT_mem_reg(ref i2one, 0); + pass &= !I2_BT_mem_reg(ref i2one, 16); + pass &= I2_BT_mem_reg(ref i2one, 32); + pass &= !I2_BT_mem_reg(ref i2two, 0); + + pass &= I4_BT_reg_reg(i4one, 0); + pass &= I4_BT_reg_reg(i4one, 32); + pass &= !I4_BT_reg_reg(i4two, 0); + + pass &= !I4_BT_reg_reg_EQ(i4one, 0); + pass &= !I4_BT_reg_reg_EQ(i4one, 32); + pass &= I4_BT_reg_reg_EQ(i4two, 0); + + pass &= I4_BT_reg_reg_JCC(i4one, 0) == 0; + pass &= I4_BT_reg_reg_JCC(i4one, 32) == 0; + pass &= I4_BT_reg_reg_JCC(i4two, 0) == 3; + + pass &= I4_BT_mem_reg(ref i4one, 0); + pass &= I4_BT_mem_reg(ref i4one, 32); + pass &= !I4_BT_mem_reg(ref i4two, 0); + + pass &= I8_BT_reg_reg(i8one, 0); + pass &= !I8_BT_reg_reg(i8one, 32); + pass &= I8_BT_reg_reg(i8one, 64); + pass &= !I8_BT_reg_reg(i8two, 0); + + pass &= I8_BT_mem_reg(ref i8one, 0); + pass &= !I8_BT_mem_reg(ref i8one, 32); + pass &= I8_BT_mem_reg(ref i8one, 64); + pass &= !I8_BT_mem_reg(ref i8two, 0); + + if (pass) + { + Console.WriteLine("PASSED"); + return 100; + } + else + { + Console.WriteLine("FAILED"); + return 1; + } + } +} diff --git a/tests/src/JIT/Directed/BitTest/BitTest.csproj b/tests/src/JIT/Directed/BitTest/BitTest.csproj new file mode 100644 index 0000000000..89f824d7a2 --- /dev/null +++ b/tests/src/JIT/Directed/BitTest/BitTest.csproj @@ -0,0 +1,24 @@ +<?xml version="1.0" encoding="utf-8"?> +<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> + <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.props))\dir.props" /> + <PropertyGroup> + <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration> + <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform> + <AssemblyName>$(MSBuildProjectName)</AssemblyName> + <ProjectGuid>{A62D095E-4206-4D11-8762-11DDD63E931E}</ProjectGuid> + <OutputType>Exe</OutputType> + <SolutionDir Condition="$(SolutionDir) == '' Or $(SolutionDir) == '*Undefined*'">..\..\</SolutionDir> + <CLRTestPriority>1</CLRTestPriority> + </PropertyGroup> + <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' "></PropertyGroup> + <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' "></PropertyGroup> + <PropertyGroup> + <DebugType>None</DebugType> + <Optimize>True</Optimize> + </PropertyGroup> + <ItemGroup> + <Compile Include="BitTest.cs" /> + </ItemGroup> + <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.targets))\dir.targets" /> + <PropertyGroup Condition=" '$(MsBuildProjectDirOverride)' != '' "></PropertyGroup> +</Project>
\ No newline at end of file |