diff options
author | Eugene Rozenfeld <erozen@microsoft.com> | 2015-10-12 18:36:25 -0700 |
---|---|---|
committer | Eugene Rozenfeld <erozen@microsoft.com> | 2015-10-21 18:51:48 -0700 |
commit | 7b4508ba9419157570a9c47e4ebc8a79dcc53a11 (patch) | |
tree | cf3feb736ff173462fc82b71633f2d66e75ee9cc /src/jit | |
parent | 65b55ab02eba5fc71daadb0db9f4d4a424906dba (diff) | |
download | coreclr-7b4508ba9419157570a9c47e4ebc8a79dcc53a11.tar.gz coreclr-7b4508ba9419157570a9c47e4ebc8a79dcc53a11.tar.bz2 coreclr-7b4508ba9419157570a9c47e4ebc8a79dcc53a11.zip |
Generate efficient code for rotation patterns.
This change adds code to recognize rotation idioms and generate efficient instructions for them.
Two new operators are added: GT_ROL and GT_ROR.
The patterns recognized:
(x << c1) | (x >>> c2) => x rol c1
(x >>> c1) | (x << c2) => x ror c2
where c1 and c2 are constant and c1 + c2 == bitsize(x)
(x << y) | (x >>> (N - y)) => x rol y
(x >>> y) | (x << (N - y)) => x ror y
where N == bitsize(x)
(x << y & M1) | (x >>> (N - y) & M2) => x rol y
(x >>> y & M1) | (x << (N - y) & M2) => x ror y
where N == bitsize(x)
M1 & (N - 1) == N - 1
M2 & (N - 1) == N - 1
For a simple benchmark with 4 rotation patterns in a tight loop
time goes from 7.324 to 2.600 (2.8 speedup).
Rotations found and optimized in mscorlib:
System.Security.Cryptography.SHA256Managed::RotateRight
System.Security.Cryptography.SHA384Managed::RotateRight
System.Security.Cryptography.SHA512Managed::RotateRight
System.Security.Cryptography.RIPEMD160Managed:MDTransform (320 instances!)
System.Diagnostics.Tracing.EventSource.Sha1ForNonSecretPurposes::Rol1
System.Diagnostics.Tracing.EventSource.Sha1ForNonSecretPurposes::Rol5
System.Diagnostics.Tracing.EventSource.Sha1ForNonSecretPurposes::Rol30
System.Diagnostics.Tracing.EventSource.Sha1ForNonSecretPurposes::Drain
(9 instances of Sha1ForNonSecretPurposes::Rol* inlined)
Closes #1619.
Diffstat (limited to 'src/jit')
-rw-r--r-- | src/jit/codegenarm64.cpp | 4 | ||||
-rw-r--r-- | src/jit/codegenxarch.cpp | 28 | ||||
-rw-r--r-- | src/jit/compiler.h | 4 | ||||
-rw-r--r-- | src/jit/emitxarch.cpp | 22 | ||||
-rw-r--r-- | src/jit/gentree.cpp | 22 | ||||
-rw-r--r-- | src/jit/gentree.h | 18 | ||||
-rw-r--r-- | src/jit/gtlist.h | 2 | ||||
-rw-r--r-- | src/jit/instr.cpp | 6 | ||||
-rw-r--r-- | src/jit/instrsxarch.h | 7 | ||||
-rw-r--r-- | src/jit/lclvars.cpp | 4 | ||||
-rw-r--r-- | src/jit/lower.cpp | 7 | ||||
-rw-r--r-- | src/jit/lower.h | 1 | ||||
-rw-r--r-- | src/jit/lowerarm.cpp | 5 | ||||
-rw-r--r-- | src/jit/lowerarm64.cpp | 26 | ||||
-rw-r--r-- | src/jit/lowerxarch.cpp | 15 | ||||
-rw-r--r-- | src/jit/lsra.cpp | 2 | ||||
-rw-r--r-- | src/jit/morph.cpp | 220 | ||||
-rw-r--r-- | src/jit/optcse.cpp | 2 | ||||
-rw-r--r-- | src/jit/valuenum.cpp | 23 |
19 files changed, 395 insertions, 23 deletions
diff --git a/src/jit/codegenarm64.cpp b/src/jit/codegenarm64.cpp index 93ad2a3bdf..952a7d024e 100644 --- a/src/jit/codegenarm64.cpp +++ b/src/jit/codegenarm64.cpp @@ -2411,6 +2411,7 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode) case GT_LSH: case GT_RSH: case GT_RSZ: + case GT_ROR: genCodeForShift(treeNode->gtGetOp1(), treeNode->gtGetOp2(), treeNode); // genCodeForShift() calls genProduceReg() break; @@ -4432,6 +4433,9 @@ instruction CodeGen::genGetInsForOper(genTreeOps oper, var_types type) case GT_OR: ins = INS_orr; break; + case GT_ROR: + ins = INS_ror; + break; case GT_RSH: ins = INS_asr; break; diff --git a/src/jit/codegenxarch.cpp b/src/jit/codegenxarch.cpp index 7064862c4c..d55a6b37f7 100644 --- a/src/jit/codegenxarch.cpp +++ b/src/jit/codegenxarch.cpp @@ -1516,6 +1516,8 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode) case GT_LSH: case GT_RSH: case GT_RSZ: + case GT_ROL: + case GT_ROR: genCodeForShift(treeNode->gtGetOp1(), treeNode->gtGetOp2(), treeNode); // genCodeForShift() calls genProduceReg() break; @@ -2584,7 +2586,9 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode) { if (data->OperGet() == GT_LSH || data->OperGet() == GT_RSH || - data->OperGet() == GT_RSZ) + data->OperGet() == GT_RSZ || + data->OperGet() == GT_ROL || + data->OperGet() == GT_ROR) { genCodeForShift(addr, data->gtOp.gtOp2, data); } @@ -4644,11 +4648,13 @@ instruction CodeGen::genGetInsForOper(genTreeOps oper, var_types type) { case GT_ADD: ins = INS_add; break; case GT_AND: ins = INS_and; break; - case GT_MUL: ins = INS_imul; break; case GT_LSH: ins = INS_shl; break; + case GT_MUL: ins = INS_imul; break; case GT_NEG: ins = INS_neg; break; case GT_NOT: ins = INS_not; break; case GT_OR: ins = INS_or; break; + case GT_ROL: ins = INS_rol; break; + case GT_ROR: ins = INS_ror; break; case GT_RSH: ins = INS_sar; break; case GT_RSZ: ins = INS_shr; break; case GT_SUB: ins = INS_sub; break; @@ -4660,10 +4666,10 @@ instruction CodeGen::genGetInsForOper(genTreeOps oper, var_types type) } /** Generates the code sequence for a GenTree node that - * represents a bit shift operation (<<, >>, >>>). + * represents a bit shift or rotate operation (<<, >>, >>>, rol, ror). * - * Arguments: operand: the value to be shifted by shiftBy bits. - * shiftBy: the number of bits to shift the operand. + * Arguments: operand: the value to be shifted or rotated by shiftBy bits. + * shiftBy: the number of bits to shift or rotate the operand. * parent: the actual bitshift node (that specifies the * type of bitshift to perform. * @@ -4758,6 +4764,12 @@ void CodeGen::genCodeForShift(GenTreePtr operand, GenTreePtr shiftBy, case INS_shr: ins = INS_shr_1; break; + case INS_rol: + ins = INS_rol_1; + break; + case INS_ror: + ins = INS_ror_1; + break; default: // leave 'ins' unchanged break; @@ -4777,6 +4789,12 @@ void CodeGen::genCodeForShift(GenTreePtr operand, GenTreePtr shiftBy, case INS_shr: ins = INS_shr_N; break; + case INS_rol: + ins = INS_rol_N; + break; + case INS_ror: + ins = INS_ror_N; + break; default: // leave 'ins' unchanged break; diff --git a/src/jit/compiler.h b/src/jit/compiler.h index bc851dcf1d..13c64b7bac 100644 --- a/src/jit/compiler.h +++ b/src/jit/compiler.h @@ -4435,7 +4435,9 @@ private: void fgFixupIfCallArg(ArrayStack<GenTree *> *parentStack, GenTree *oldChild, GenTree *newChild); - + // Recognize a rotation pattern and convert into a GT_ROL or a GT_ROR node. + GenTreePtr fgMorphRotation(GenTreePtr tree); + //-------- Determine the order in which the trees will be evaluated ------- unsigned fgTreeSeqNum; diff --git a/src/jit/emitxarch.cpp b/src/jit/emitxarch.cpp index d6de1f2dba..d052ce212c 100644 --- a/src/jit/emitxarch.cpp +++ b/src/jit/emitxarch.cpp @@ -3389,6 +3389,8 @@ void emitter::emitIns_R_I(instruction ins, case INS_rcl_N: case INS_rcr_N: + case INS_rol_N: + case INS_ror_N: case INS_shl_N: case INS_shr_N: case INS_sar_N: @@ -3968,6 +3970,8 @@ void emitter::emitIns_C_I (instruction ins, { case INS_rcl_N: case INS_rcr_N: + case INS_rol_N: + case INS_ror_N: case INS_shl_N: case INS_shr_N: case INS_sar_N: @@ -4146,6 +4150,8 @@ void emitter::emitIns_I_AR (instruction ins, { case INS_rcl_N: case INS_rcr_N: + case INS_rol_N: + case INS_ror_N: case INS_shl_N: case INS_shr_N: case INS_sar_N: @@ -4212,6 +4218,8 @@ void emitter::emitIns_I_AI (instruction ins, { case INS_rcl_N: case INS_rcr_N: + case INS_rol_N: + case INS_ror_N: case INS_shl_N: case INS_shr_N: case INS_sar_N: @@ -4474,6 +4482,8 @@ void emitter::emitIns_I_ARR (instruction ins, { case INS_rcl_N: case INS_rcr_N: + case INS_rol_N: + case INS_ror_N: case INS_shl_N: case INS_shr_N: case INS_sar_N: @@ -4620,6 +4630,8 @@ void emitter::emitIns_I_ARX (instruction ins, { case INS_rcl_N: case INS_rcr_N: + case INS_rol_N: + case INS_ror_N: case INS_shl_N: case INS_shr_N: case INS_sar_N: @@ -4768,6 +4780,8 @@ void emitter::emitIns_I_AX (instruction ins, { case INS_rcl_N: case INS_rcr_N: + case INS_rol_N: + case INS_ror_N: case INS_shl_N: case INS_shr_N: case INS_sar_N: @@ -5050,6 +5064,8 @@ void emitter::emitIns_S_I (instruction ins, { case INS_rcl_N: case INS_rcr_N: + case INS_rol_N: + case INS_ror_N: case INS_shl_N: case INS_shr_N: case INS_sar_N: @@ -6363,6 +6379,8 @@ void emitter::emitDispShift(instruction ins, int cnt) { case INS_rcl_1: case INS_rcr_1: + case INS_rol_1: + case INS_ror_1: case INS_shl_1: case INS_shr_1: case INS_sar_1: @@ -6371,6 +6389,8 @@ void emitter::emitDispShift(instruction ins, int cnt) case INS_rcl: case INS_rcr: + case INS_rol: + case INS_ror: case INS_shl: case INS_shr: case INS_sar: @@ -6379,6 +6399,8 @@ void emitter::emitDispShift(instruction ins, int cnt) case INS_rcl_N: case INS_rcr_N: + case INS_rol_N: + case INS_ror_N: case INS_shl_N: case INS_shr_N: case INS_sar_N: diff --git a/src/jit/gentree.cpp b/src/jit/gentree.cpp index 3c06925fe4..d6bd3af777 100644 --- a/src/jit/gentree.cpp +++ b/src/jit/gentree.cpp @@ -3694,6 +3694,8 @@ COMMON_CNS: case GT_LSH: case GT_RSH: case GT_RSZ: + case GT_ROL: + case GT_ROR: case GT_ASG_LSH: case GT_ASG_RSH: case GT_ASG_RSZ: @@ -9248,6 +9250,8 @@ GenTreePtr Compiler::gtFoldExprSpecial(GenTreePtr tree) case GT_LSH: case GT_RSH: case GT_RSZ: + case GT_ROL: + case GT_ROR: case GT_ASG_LSH: case GT_ASG_RSH: case GT_ASG_RSZ: @@ -9943,9 +9947,13 @@ CHK_OVF: case GT_LSH: i1 <<= (i2 & 0x1f); break; case GT_RSH: i1 >>= (i2 & 0x1f); break; case GT_RSZ: - /* logical shift -> make it unsigned to propagate the sign bit */ + /* logical shift -> make it unsigned to not propagate the sign bit */ i1 = UINT32(i1) >> (i2 & 0x1f); break; + case GT_ROL: i1 = (i1 << (i2 & 0x1f)) | (UINT32(i1) >> ((32 - i2) & 0x1f)); + break; + case GT_ROR: i1 = (i1 << ((32 - i2) & 0x1f)) | (UINT32(i1) >> (i2 & 0x1f)); + break; /* DIV and MOD can generate an INT 0 - if division by 0 * or overflow - when dividing MIN by -1 */ @@ -10284,11 +10292,15 @@ LNG_ADD_CHKOVF: case GT_XOR: lval1 ^= lval2; break; case GT_AND: lval1 &= lval2; break; - case GT_LSH: lval1 <<= (op2->gtIntConCommon.IconValue() & 0x3f); break; - case GT_RSH: lval1 >>= (op2->gtIntConCommon.IconValue() & 0x3f); break; + case GT_LSH: lval1 <<= (lval2 & 0x3f); break; + case GT_RSH: lval1 >>= (lval2 & 0x3f); break; case GT_RSZ: - /* logical shift -> make it unsigned to propagate the sign bit */ - lval1 = UINT64(lval1) >> (op2->gtIntConCommon.IconValue() & 0x3f); + /* logical shift -> make it unsigned to not propagate the sign bit */ + lval1 = UINT64(lval1) >> (lval2 & 0x3f); + break; + case GT_ROL: lval1 = (lval1 << (lval2 & 0x3f)) | (UINT64(lval1) >> ((64 - lval2) & 0x3f)); + break; + case GT_ROR: lval1 = (lval1 << ((64 - lval2) & 0x3f)) | (UINT64(lval1) >> (lval2 & 0x3f)); break; //Both DIV and IDIV on x86 raise an exception for min_int (and min_long) / -1. So we preserve diff --git a/src/jit/gentree.h b/src/jit/gentree.h index 1402445da0..431f5c85b1 100644 --- a/src/jit/gentree.h +++ b/src/jit/gentree.h @@ -1086,23 +1086,25 @@ public: int OperIsArithmetic() const { genTreeOps op = OperGet(); - return op==GT_ADD - || op==GT_SUB - || op==GT_MUL + return op==GT_ADD + || op==GT_SUB + || op==GT_MUL || op==GT_DIV || op==GT_MOD - + || op==GT_UDIV || op==GT_UMOD - || op==GT_OR + || op==GT_OR || op==GT_XOR || op==GT_AND || op==GT_LSH || op==GT_RSH - || op==GT_RSZ; - + || op==GT_RSZ + + || op==GT_ROL + || op==GT_ROR; } static @@ -3506,6 +3508,8 @@ inline bool GenTree::RequiresNonNullOp2(genTreeOps oper) case GT_LSH: case GT_RSH: case GT_RSZ: + case GT_ROL: + case GT_ROR: case GT_INDEX: case GT_ASG: case GT_ASG_ADD: diff --git a/src/jit/gtlist.h b/src/jit/gtlist.h index 9283374592..a23d8ad579 100644 --- a/src/jit/gtlist.h +++ b/src/jit/gtlist.h @@ -108,6 +108,8 @@ GTNODE(GT_AND , "&" ,1,GTK_BINOP|GTK_LOGOP) GTNODE(GT_LSH , "<<" ,0,GTK_BINOP) GTNODE(GT_RSH , ">>" ,0,GTK_BINOP) GTNODE(GT_RSZ , ">>>" ,0,GTK_BINOP) +GTNODE(GT_ROL , "rol" ,0,GTK_BINOP) +GTNODE(GT_ROR , "ror" ,0,GTK_BINOP) GTNODE(GT_MULHI , "mulhi" ,1,GTK_BINOP) // returns high bits (top N bits of the 2N bit result of an NxN multiply) GTNODE(GT_ASG , "=" ,0,GTK_BINOP|GTK_ASGOP) diff --git a/src/jit/instr.cpp b/src/jit/instr.cpp index d1cc16f18e..c63952ae12 100644 --- a/src/jit/instr.cpp +++ b/src/jit/instr.cpp @@ -2627,6 +2627,8 @@ void CodeGen::inst_RV_SH(instruction ins, assert(ins == INS_rcl || ins == INS_rcr || + ins == INS_rol || + ins == INS_ror || ins == INS_shl || ins == INS_shr || ins == INS_sar); @@ -2639,6 +2641,8 @@ void CodeGen::inst_RV_SH(instruction ins, assert(INS_rcl + 1 == INS_rcl_1); assert(INS_rcr + 1 == INS_rcr_1); + assert(INS_rol + 1 == INS_rol_1); + assert(INS_ror + 1 == INS_ror_1); assert(INS_shl + 1 == INS_shl_1); assert(INS_shr + 1 == INS_shr_1); assert(INS_sar + 1 == INS_sar_1); @@ -2651,6 +2655,8 @@ void CodeGen::inst_RV_SH(instruction ins, assert(INS_rcl + 2 == INS_rcl_N); assert(INS_rcr + 2 == INS_rcr_N); + assert(INS_rol + 2 == INS_rol_N); + assert(INS_ror + 2 == INS_ror_N); assert(INS_shl + 2 == INS_shl_N); assert(INS_shr + 2 == INS_shr_N); assert(INS_sar + 2 == INS_sar_N); diff --git a/src/jit/instrsxarch.h b/src/jit/instrsxarch.h index c0be9aae73..9f8a9956a3 100644 --- a/src/jit/instrsxarch.h +++ b/src/jit/instrsxarch.h @@ -339,6 +339,13 @@ INST2(ret , "ret" , 0, IUM_RD, 0, 0, 0x0000C3, 0x0000C2) INST2(loop , "loop" , 0, IUM_RD, 0, 0, BAD_CODE, 0x0000E2) INST2(call , "call" , 0, IUM_RD, 0, 1, 0x0010FF, 0x0000E8) +INST2(rol , "rol" , 0, IUM_RW, 0, 1, 0x0000D2, BAD_CODE) +INST2(rol_1 , "rol" , 0, IUM_RW, 0, 1, 0x0000D0, 0x0000D0) +INST2(rol_N , "rol" , 0, IUM_RW, 0, 1, 0x0000C0, 0x0000C0) +INST2(ror , "ror" , 0, IUM_RW, 0, 1, 0x0008D2, BAD_CODE) +INST2(ror_1 , "ror" , 0, IUM_RW, 0, 1, 0x0008D0, 0x0008D0) +INST2(ror_N , "ror" , 0, IUM_RW, 0, 1, 0x0008C0, 0x0008C0) + INST2(rcl , "rcl" , 0, IUM_RW, 1, 1, 0x0010D2, BAD_CODE) INST2(rcl_1 , "rcl" , 0, IUM_RW, 1, 1, 0x0010D0, 0x0010D0) INST2(rcl_N , "rcl" , 0, IUM_RW, 1, 1, 0x0010C0, 0x0010C0) diff --git a/src/jit/lclvars.cpp b/src/jit/lclvars.cpp index b9e89f156d..15c85040ed 100644 --- a/src/jit/lclvars.cpp +++ b/src/jit/lclvars.cpp @@ -2899,7 +2899,9 @@ void Compiler::lvaMarkLclRefs(GenTreePtr tree) if (tree->gtOper == GT_LSH || tree->gtOper == GT_RSH || - tree->gtOper == GT_RSZ) + tree->gtOper == GT_RSZ || + tree->gtOper == GT_ROL || + tree->gtOper == GT_ROR) { if (tree->gtType == TYP_INT) { diff --git a/src/jit/lower.cpp b/src/jit/lower.cpp index 5882ecfa71..d94b0ba1c7 100644 --- a/src/jit/lower.cpp +++ b/src/jit/lower.cpp @@ -400,6 +400,8 @@ void Lowering::DecomposeNode(GenTreePtr* pTree, Compiler::fgWalkData* data) case GT_LSH: case GT_RSH: case GT_RSZ: + case GT_ROL: + case GT_ROR: case GT_MULHI: NYI("Arithmetic binary operators on TYP_LONG"); break; @@ -525,6 +527,11 @@ void Lowering::LowerNode(GenTreePtr* ppTree, Compiler::fgWalkData* data) } break; + case GT_ROL: + case GT_ROR: + LowerRotate(*ppTree); + break; + #ifdef FEATURE_SIMD case GT_SIMD: if ((*ppTree)->TypeGet() == TYP_SIMD12) diff --git a/src/jit/lower.h b/src/jit/lower.h index 6754b7b75d..03f7a6e71f 100644 --- a/src/jit/lower.h +++ b/src/jit/lower.h @@ -161,6 +161,7 @@ private: void HandleIndirAddressExpression(GenTree *indirTree, GenTree* tree); void LowerGCWriteBarrier(GenTree *tree); void LowerArrElem(GenTree **ppTree, Compiler::fgWalkData* data); + void LowerRotate(GenTree *tree); // Utility functions void MorphBlkIntoHelperCall (GenTreePtr pTree, GenTreePtr treeStmt); diff --git a/src/jit/lowerarm.cpp b/src/jit/lowerarm.cpp index d99347d335..6669370604 100644 --- a/src/jit/lowerarm.cpp +++ b/src/jit/lowerarm.cpp @@ -41,6 +41,11 @@ void Lowering::LowerCntBlockOp(GenTreePtr *ppTree) NYI_ARM("ARM Lowering for BlockOp"); } +void Lowering::LowerRotate(GenTreePtr tree) +{ + NYI_ARM("ARM Lowering for ROL and ROR"); +} + void Lowering::TreeNodeInfoInitCall(GenTree *tree, TreeNodeInfo &info, int &srcCount, // out int &dstCount // out diff --git a/src/jit/lowerarm64.cpp b/src/jit/lowerarm64.cpp index 9467813833..f05fd80b5d 100644 --- a/src/jit/lowerarm64.cpp +++ b/src/jit/lowerarm64.cpp @@ -486,6 +486,7 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt) case GT_LSH: case GT_RSH: case GT_RSZ: + case GT_ROR: { info->srcCount = 2; info->dstCount = 1; @@ -1625,6 +1626,31 @@ void Lowering::LowerCast( GenTreePtr* ppTree) } } +void Lowering::LowerRotate(GenTreePtr tree) +{ + if (tree->OperGet() == GT_ROL) + { + // There is no ROL instruction on ARM. Convert rol into ROR. + GenTreePtr rotatedValue = tree->gtOp.gtOp1; + unsigned rotatedValueBitSize = genTypeSize(rotatedValue->gtType) * 8; + GenTreePtr rotateLeftIndexNode = tree->gtOp.gtOp2; + + if (rotateLeftIndexNode->IsCnsIntOrI()) + { + ssize_t rotateLeftIndex = rotateLeftIndexNode->gtIntCon.gtIconVal; + ssize_t rotateRightIndex = rotatedValueBitSize - rotateLeftIndex; + rotateIndexNode->gtIntCon.gtIconVal = rotateRightIndex; + } + else + { + GenTreePtr tmp = gtNewOperNode(GT_NEG, genActualType(rotateLeftIndexNode->gtType), rotateLeftIndexNode); + rotateLeftIndexNode->InsertAfterSelf(tmp); + tree->gtOp.gtOp2 = tmp; + } + tree->ChangeOper(GT_ROR); + } +} + // TODO-Cleanup: move to Lower.cpp? void Lowering::SetStoreIndOpCounts(GenTreePtr storeInd, GenTreePtr indirCandidate) { diff --git a/src/jit/lowerxarch.cpp b/src/jit/lowerxarch.cpp index a7b4600df9..c9df42e6a6 100644 --- a/src/jit/lowerxarch.cpp +++ b/src/jit/lowerxarch.cpp @@ -719,6 +719,8 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt) case GT_LSH: case GT_RSH: case GT_RSZ: + case GT_ROL: + case GT_ROR: { info->srcCount = 2; info->dstCount = 1; @@ -2914,7 +2916,9 @@ bool Lowering::LowerStoreInd(GenTreePtr tree) oper != GT_XOR && oper != GT_LSH && oper != GT_RSH && - oper != GT_RSZ) + oper != GT_RSZ && + oper != GT_ROL && + oper != GT_ROR) { JITDUMP("Lower of StoreInd didn't mark the node as self contained\n"); JITDUMP("because the node operator not yet supported:\n"); @@ -2924,7 +2928,9 @@ bool Lowering::LowerStoreInd(GenTreePtr tree) if ((oper == GT_LSH || oper == GT_RSH || - oper == GT_RSZ) && + oper == GT_RSZ || + oper == GT_ROL || + oper == GT_ROR) && varTypeIsSmall(tree)) { //In ldind, Integer values smaller than 4 bytes, a boolean, or a character converted to 4 bytes by sign or zero-extension as appropriate. @@ -3017,6 +3023,11 @@ bool Lowering::LowerStoreInd(GenTreePtr tree) } } +void Lowering::LowerRotate(GenTreePtr tree) +{ + // xarch supports both ROL and ROR instructions so no lowering is required. +} + void Lowering::SetStoreIndOpCounts(GenTreePtr storeInd, GenTreePtr indirCandidate) { GenTreePtr indirDst = storeInd->gtGetOp1(); diff --git a/src/jit/lsra.cpp b/src/jit/lsra.cpp index 8f11af9878..5515f83dc9 100644 --- a/src/jit/lsra.cpp +++ b/src/jit/lsra.cpp @@ -2479,6 +2479,8 @@ LinearScan::getKillSetForNode(GenTree* tree) case GT_LSH: case GT_RSH: case GT_RSZ: + case GT_ROL: + case GT_ROR: if (tree->gtLsraInfo.isHelperCallWithKills) { killMask = RBM_CALLEE_TRASH; diff --git a/src/jit/morph.cpp b/src/jit/morph.cpp index b000f58969..2cdbe0ed05 100644 --- a/src/jit/morph.cpp +++ b/src/jit/morph.cpp @@ -10324,6 +10324,17 @@ CM_ADD_OP: } #endif // LEA_AVAILABLE } + else if(oper == GT_OR) + { + tree = fgMorphRotation(tree); + + // fgMorphRotation may return a new tree + oper = tree->OperGet(); + typ = tree->TypeGet(); + op1 = tree->gtOp.gtOp1; + op2 = tree->gtOp.gtOp2; + } + break; case GT_CHS: @@ -10929,7 +10940,8 @@ CM_ADD_OP: /* for the shift nodes the type of op2 can differ from the tree type */ if ((typ == TYP_LONG) && (genActualType(op2->gtType) == TYP_INT)) { - noway_assert((oper == GT_LSH) || (oper == GT_RSH) || (oper == GT_RSZ)); + noway_assert((oper == GT_LSH) || (oper == GT_RSH) || (oper == GT_RSZ) || + (oper == GT_ROL) || (oper == GT_ROR)); GenTreePtr commaOp2 = op2->gtOp.gtOp2; @@ -11864,6 +11876,208 @@ GenTree* Compiler::fgMorphDivByConst(GenTreeOp* tree) return result; } +//------------------------------------------------------------------------------ +// fgMorphRotation : Check if the tree represents a left or right rotation. If so, return +// an equivalent GT_ROL or GT_ROR tree; otherwise, return the original tree. +// +// Arguments: +// tree - tree to check for a rotation pattern +// +// Return Value: +// An equivalent GT_ROL or GT_ROR tree if a pattern is found; original tree otherwise. +// +// Assumption: +// The input is a GT_OR tree. + +GenTreePtr Compiler::fgMorphRotation(GenTreePtr tree) +{ +#ifndef LEGACY_BACKEND + // + // Check for a rotation pattern, e.g., + // + // OR ROL + // / \ / \ + // LSH RSZ -> x y + // / \ / \ + // x AND x AND + // / \ / \ + // y 31 ADD 31 + // / \ + // NEG 32 + // | + // y + + genTreeOps oper = tree->OperGet(); + noway_assert(oper == GT_OR); + + if ((tree->gtFlags & GTF_ALL_EFFECT) != 0) + { + return tree; // Can't do anything due to side effects. + } + + // Check if we have an LSH on one side of the OR and an RSZ on the other side. + GenTreePtr op1 = tree->gtGetOp1(); + GenTreePtr op2 = tree->gtGetOp2(); + GenTreePtr leftShiftTree = nullptr; + GenTreePtr rightShiftTree = nullptr; + if ((op1->OperGet() == GT_LSH) && (op2->OperGet() == GT_RSZ)) + { + leftShiftTree = op1; + rightShiftTree = op2; + } + else if ((op1->OperGet() == GT_RSZ) && (op2->OperGet() == GT_LSH)) + { + leftShiftTree = op2; + rightShiftTree = op1; + } + else + { + return tree; + } + + // Check if the trees representing the value to shift are identical. + // We already checked that there are no side effects above. + if (GenTree::Compare(leftShiftTree->gtGetOp1(), rightShiftTree->gtGetOp1())) + { + GenTreePtr rotatedValue = leftShiftTree->gtGetOp1(); + ssize_t rotatedValueBitSize = genTypeSize(rotatedValue->gtType) * 8; + noway_assert((rotatedValueBitSize == 32) || (rotatedValueBitSize == 64)); + GenTreePtr leftShiftIndex = leftShiftTree->gtGetOp2(); + GenTreePtr rightShiftIndex = rightShiftTree->gtGetOp2(); + + // The shift index may be masked. At least (rotatedValueBitSize - 1) lower bits + // shouldn't be masked for the transformation to be valid. If additional + // higher bits are not masked, the transformation is still valid since the result + // of MSIL shift instructions is unspecified if the shift amount is greater or equal + // than the width of the value being shifted. + ssize_t minimalMask = rotatedValueBitSize - 1; + ssize_t leftShiftMask = -1; + ssize_t rightShiftMask = -1; + + if ((leftShiftIndex->OperGet() == GT_AND)) + { + if (leftShiftIndex->gtGetOp2()->IsCnsIntOrI()) + { + leftShiftMask = leftShiftIndex->gtGetOp2()->gtIntCon.gtIconVal; + leftShiftIndex = leftShiftIndex->gtGetOp1(); + } + else + { + return tree; + } + } + + if ((rightShiftIndex->OperGet() == GT_AND)) + { + if (rightShiftIndex->gtGetOp2()->IsCnsIntOrI()) + { + rightShiftMask = rightShiftIndex->gtGetOp2()->gtIntCon.gtIconVal; + rightShiftIndex = rightShiftIndex->gtGetOp1(); + } + else + { + return tree; + } + } + + if (((minimalMask & leftShiftMask) != minimalMask) || + ((minimalMask & rightShiftMask) != minimalMask)) + { + // The shift index is overmasked, e.g., we have + // something like (x << y & 15) or + // (x >> (32 - y) & 15 with 32 bit x. + // The transformation is not valid. + return tree; + } + + GenTreePtr shiftIndexWithAdd = nullptr; + GenTreePtr shiftIndexWithoutAdd = nullptr; + genTreeOps rotateOp = GT_NONE; + GenTreePtr rotateIndex = nullptr; + + if (leftShiftIndex->OperGet() == GT_ADD) + { + shiftIndexWithAdd = leftShiftIndex; + shiftIndexWithoutAdd = rightShiftIndex; + rotateOp = GT_ROR; + } + else if (rightShiftIndex->OperGet() == GT_ADD) + { + shiftIndexWithAdd = rightShiftIndex; + shiftIndexWithoutAdd = leftShiftIndex; + rotateOp = GT_ROL; + } + + if (shiftIndexWithAdd != nullptr) + { + if (shiftIndexWithAdd->gtGetOp2()->IsCnsIntOrI()) + { + if (shiftIndexWithAdd->gtGetOp2()->gtIntCon.gtIconVal == rotatedValueBitSize) + { + if (shiftIndexWithAdd->gtGetOp1()->OperGet() == GT_NEG) + { + if (GenTree::Compare(shiftIndexWithAdd->gtGetOp1()->gtGetOp1(), shiftIndexWithoutAdd)) + { + // We found one of these patterns: + // (x << (y & M)) | (x >>> ((-y + N) & M)) + // (x << y) | (x >>> (-y + N)) + // (x >>> (y & M)) | (x << ((-y + N) & M)) + // (x >>> y) | (x << (-y + N)) + // where N == bitsize(x), M is const, and + // M & (N - 1) == N - 1 + +#ifndef _TARGET_64BIT_ + if (!shiftIndexWithoutAdd->IsCnsIntOrI() && (rotatedValueBitSize == 64)) + { + // TODO: we need to handle variable-sized long shifts specially on x86. + // GT_LSH, GT_RSH, and GT_RSZ have helpers for this case. We may need + // to add helpers for GT_ROL and GT_ROR. + NYI("Rotation of a long value by variable amount"); + } +#endif + + rotateIndex = shiftIndexWithoutAdd; + } + } + } + } + } + else if ((leftShiftIndex->IsCnsIntOrI() && + rightShiftIndex->IsCnsIntOrI())) + { + if (leftShiftIndex->gtIntCon.gtIconVal + + rightShiftIndex->gtIntCon.gtIconVal == rotatedValueBitSize) + { + // We found this pattern: + // (x << c1) | (x >>> c2) + // where c1 and c2 are const and c1 + c2 == bitsize(x) + rotateOp = GT_ROL; + rotateIndex = leftShiftIndex; + } + } + + if (rotateIndex != nullptr) + { + noway_assert((rotateOp == GT_ROL) || (rotateOp == GT_ROR)); + + // We can use the same tree only during global morph; reusing the tree in a later morph + // may invalidate value numbers. + if (fgGlobalMorph) + { + tree->gtOp.gtOp1 = rotatedValue; + tree->gtOp.gtOp2 = rotateIndex; + tree->ChangeOper(rotateOp); + } + else + { + tree = gtNewOperNode(rotateOp, genActualType(rotatedValue->gtType), rotatedValue, rotateIndex); + } + return tree; + } + } +#endif //LEGACY_BACKEND + return tree; +} #if !CPU_HAS_FP_SUPPORT GenTreePtr Compiler::fgMorphToEmulatedFP(GenTreePtr tree) @@ -12054,7 +12268,7 @@ GenTreePtr Compiler::fgMorphToEmulatedFP(GenTreePtr tree) /***************************************************************************** * - * Transform the given tree for code generation and returns an equivalent tree. + * Transform the given tree for code generation and return an equivalent tree. */ @@ -15198,6 +15412,8 @@ Compiler::fgWalkResult Compiler::fgMarkAddrTakenLocalsPreCB(GenTreePtr* pTr case GT_LSH: case GT_RSH: case GT_RSZ: + case GT_ROL: + case GT_ROR: case GT_EQ: case GT_NE: case GT_LT: diff --git a/src/jit/optcse.cpp b/src/jit/optcse.cpp index adc32af2ee..c188f4ac98 100644 --- a/src/jit/optcse.cpp +++ b/src/jit/optcse.cpp @@ -2022,6 +2022,8 @@ bool Compiler::optIsCSEcandidate(GenTreePtr tree) case GT_XOR: case GT_RSH: case GT_RSZ: + case GT_ROL: + case GT_ROR: return true; // CSE these Binary Operators case GT_ADD: // Check for ADDRMODE flag on these Binary Operators diff --git a/src/jit/valuenum.cpp b/src/jit/valuenum.cpp index 6178cdfd61..9476e5472f 100644 --- a/src/jit/valuenum.cpp +++ b/src/jit/valuenum.cpp @@ -345,6 +345,25 @@ T ValueNumStore::EvalOpIntegral(VNFunc vnf, T v0, T v1, ValueNum* pExcSet) { return UINT32(v0) >> v1; } + case GT_ROL: + if (sizeof(T) == 8) + { + return (v0 << v1) | (UINT64(v0) >> (64 - v1)); + } + else + { + return (v0 << v1) | (UINT32(v0) >> (32 - v1)); + } + + case GT_ROR: + if (sizeof(T) == 8) + { + return (v0 << (64 - v1)) | (UINT64(v0) >> v1); + } + else + { + return (v0 << (32 - v1)) | (UINT32(v0) >> v1); + } case GT_DIV: case GT_MOD: @@ -1050,8 +1069,12 @@ ValueNum ValueNumStore::VNForFunc(var_types typ, VNFunc func, ValueNum arg0VN, V case GT_LSH: case GT_RSH: case GT_RSZ: + case GT_ROL: + case GT_ROR: // (x << 0) => x // (x >> 0) => x + // (x rol 0) => x + // (x ror 0) => x ZeroVN = VNZeroForType(typ); if (arg1VN == ZeroVN) return arg0VN; |