summaryrefslogtreecommitdiff
path: root/src/jit
diff options
context:
space:
mode:
authorEugene Rozenfeld <erozen@microsoft.com>2015-10-12 18:36:25 -0700
committerEugene Rozenfeld <erozen@microsoft.com>2015-10-21 18:51:48 -0700
commit7b4508ba9419157570a9c47e4ebc8a79dcc53a11 (patch)
treecf3feb736ff173462fc82b71633f2d66e75ee9cc /src/jit
parent65b55ab02eba5fc71daadb0db9f4d4a424906dba (diff)
downloadcoreclr-7b4508ba9419157570a9c47e4ebc8a79dcc53a11.tar.gz
coreclr-7b4508ba9419157570a9c47e4ebc8a79dcc53a11.tar.bz2
coreclr-7b4508ba9419157570a9c47e4ebc8a79dcc53a11.zip
Generate efficient code for rotation patterns.
This change adds code to recognize rotation idioms and generate efficient instructions for them. Two new operators are added: GT_ROL and GT_ROR. The patterns recognized: (x << c1) | (x >>> c2) => x rol c1 (x >>> c1) | (x << c2) => x ror c2 where c1 and c2 are constant and c1 + c2 == bitsize(x) (x << y) | (x >>> (N - y)) => x rol y (x >>> y) | (x << (N - y)) => x ror y where N == bitsize(x) (x << y & M1) | (x >>> (N - y) & M2) => x rol y (x >>> y & M1) | (x << (N - y) & M2) => x ror y where N == bitsize(x) M1 & (N - 1) == N - 1 M2 & (N - 1) == N - 1 For a simple benchmark with 4 rotation patterns in a tight loop time goes from 7.324 to 2.600 (2.8 speedup). Rotations found and optimized in mscorlib: System.Security.Cryptography.SHA256Managed::RotateRight System.Security.Cryptography.SHA384Managed::RotateRight System.Security.Cryptography.SHA512Managed::RotateRight System.Security.Cryptography.RIPEMD160Managed:MDTransform (320 instances!) System.Diagnostics.Tracing.EventSource.Sha1ForNonSecretPurposes::Rol1 System.Diagnostics.Tracing.EventSource.Sha1ForNonSecretPurposes::Rol5 System.Diagnostics.Tracing.EventSource.Sha1ForNonSecretPurposes::Rol30 System.Diagnostics.Tracing.EventSource.Sha1ForNonSecretPurposes::Drain (9 instances of Sha1ForNonSecretPurposes::Rol* inlined) Closes #1619.
Diffstat (limited to 'src/jit')
-rw-r--r--src/jit/codegenarm64.cpp4
-rw-r--r--src/jit/codegenxarch.cpp28
-rw-r--r--src/jit/compiler.h4
-rw-r--r--src/jit/emitxarch.cpp22
-rw-r--r--src/jit/gentree.cpp22
-rw-r--r--src/jit/gentree.h18
-rw-r--r--src/jit/gtlist.h2
-rw-r--r--src/jit/instr.cpp6
-rw-r--r--src/jit/instrsxarch.h7
-rw-r--r--src/jit/lclvars.cpp4
-rw-r--r--src/jit/lower.cpp7
-rw-r--r--src/jit/lower.h1
-rw-r--r--src/jit/lowerarm.cpp5
-rw-r--r--src/jit/lowerarm64.cpp26
-rw-r--r--src/jit/lowerxarch.cpp15
-rw-r--r--src/jit/lsra.cpp2
-rw-r--r--src/jit/morph.cpp220
-rw-r--r--src/jit/optcse.cpp2
-rw-r--r--src/jit/valuenum.cpp23
19 files changed, 395 insertions, 23 deletions
diff --git a/src/jit/codegenarm64.cpp b/src/jit/codegenarm64.cpp
index 93ad2a3bdf..952a7d024e 100644
--- a/src/jit/codegenarm64.cpp
+++ b/src/jit/codegenarm64.cpp
@@ -2411,6 +2411,7 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
case GT_LSH:
case GT_RSH:
case GT_RSZ:
+ case GT_ROR:
genCodeForShift(treeNode->gtGetOp1(), treeNode->gtGetOp2(), treeNode);
// genCodeForShift() calls genProduceReg()
break;
@@ -4432,6 +4433,9 @@ instruction CodeGen::genGetInsForOper(genTreeOps oper, var_types type)
case GT_OR:
ins = INS_orr;
break;
+ case GT_ROR:
+ ins = INS_ror;
+ break;
case GT_RSH:
ins = INS_asr;
break;
diff --git a/src/jit/codegenxarch.cpp b/src/jit/codegenxarch.cpp
index 7064862c4c..d55a6b37f7 100644
--- a/src/jit/codegenxarch.cpp
+++ b/src/jit/codegenxarch.cpp
@@ -1516,6 +1516,8 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
case GT_LSH:
case GT_RSH:
case GT_RSZ:
+ case GT_ROL:
+ case GT_ROR:
genCodeForShift(treeNode->gtGetOp1(), treeNode->gtGetOp2(), treeNode);
// genCodeForShift() calls genProduceReg()
break;
@@ -2584,7 +2586,9 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
{
if (data->OperGet() == GT_LSH ||
data->OperGet() == GT_RSH ||
- data->OperGet() == GT_RSZ)
+ data->OperGet() == GT_RSZ ||
+ data->OperGet() == GT_ROL ||
+ data->OperGet() == GT_ROR)
{
genCodeForShift(addr, data->gtOp.gtOp2, data);
}
@@ -4644,11 +4648,13 @@ instruction CodeGen::genGetInsForOper(genTreeOps oper, var_types type)
{
case GT_ADD: ins = INS_add; break;
case GT_AND: ins = INS_and; break;
- case GT_MUL: ins = INS_imul; break;
case GT_LSH: ins = INS_shl; break;
+ case GT_MUL: ins = INS_imul; break;
case GT_NEG: ins = INS_neg; break;
case GT_NOT: ins = INS_not; break;
case GT_OR: ins = INS_or; break;
+ case GT_ROL: ins = INS_rol; break;
+ case GT_ROR: ins = INS_ror; break;
case GT_RSH: ins = INS_sar; break;
case GT_RSZ: ins = INS_shr; break;
case GT_SUB: ins = INS_sub; break;
@@ -4660,10 +4666,10 @@ instruction CodeGen::genGetInsForOper(genTreeOps oper, var_types type)
}
/** Generates the code sequence for a GenTree node that
- * represents a bit shift operation (<<, >>, >>>).
+ * represents a bit shift or rotate operation (<<, >>, >>>, rol, ror).
*
- * Arguments: operand: the value to be shifted by shiftBy bits.
- * shiftBy: the number of bits to shift the operand.
+ * Arguments: operand: the value to be shifted or rotated by shiftBy bits.
+ * shiftBy: the number of bits to shift or rotate the operand.
* parent: the actual bitshift node (that specifies the
* type of bitshift to perform.
*
@@ -4758,6 +4764,12 @@ void CodeGen::genCodeForShift(GenTreePtr operand, GenTreePtr shiftBy,
case INS_shr:
ins = INS_shr_1;
break;
+ case INS_rol:
+ ins = INS_rol_1;
+ break;
+ case INS_ror:
+ ins = INS_ror_1;
+ break;
default:
// leave 'ins' unchanged
break;
@@ -4777,6 +4789,12 @@ void CodeGen::genCodeForShift(GenTreePtr operand, GenTreePtr shiftBy,
case INS_shr:
ins = INS_shr_N;
break;
+ case INS_rol:
+ ins = INS_rol_N;
+ break;
+ case INS_ror:
+ ins = INS_ror_N;
+ break;
default:
// leave 'ins' unchanged
break;
diff --git a/src/jit/compiler.h b/src/jit/compiler.h
index bc851dcf1d..13c64b7bac 100644
--- a/src/jit/compiler.h
+++ b/src/jit/compiler.h
@@ -4435,7 +4435,9 @@ private:
void fgFixupIfCallArg(ArrayStack<GenTree *> *parentStack,
GenTree *oldChild,
GenTree *newChild);
-
+ // Recognize a rotation pattern and convert into a GT_ROL or a GT_ROR node.
+ GenTreePtr fgMorphRotation(GenTreePtr tree);
+
//-------- Determine the order in which the trees will be evaluated -------
unsigned fgTreeSeqNum;
diff --git a/src/jit/emitxarch.cpp b/src/jit/emitxarch.cpp
index d6de1f2dba..d052ce212c 100644
--- a/src/jit/emitxarch.cpp
+++ b/src/jit/emitxarch.cpp
@@ -3389,6 +3389,8 @@ void emitter::emitIns_R_I(instruction ins,
case INS_rcl_N:
case INS_rcr_N:
+ case INS_rol_N:
+ case INS_ror_N:
case INS_shl_N:
case INS_shr_N:
case INS_sar_N:
@@ -3968,6 +3970,8 @@ void emitter::emitIns_C_I (instruction ins,
{
case INS_rcl_N:
case INS_rcr_N:
+ case INS_rol_N:
+ case INS_ror_N:
case INS_shl_N:
case INS_shr_N:
case INS_sar_N:
@@ -4146,6 +4150,8 @@ void emitter::emitIns_I_AR (instruction ins,
{
case INS_rcl_N:
case INS_rcr_N:
+ case INS_rol_N:
+ case INS_ror_N:
case INS_shl_N:
case INS_shr_N:
case INS_sar_N:
@@ -4212,6 +4218,8 @@ void emitter::emitIns_I_AI (instruction ins,
{
case INS_rcl_N:
case INS_rcr_N:
+ case INS_rol_N:
+ case INS_ror_N:
case INS_shl_N:
case INS_shr_N:
case INS_sar_N:
@@ -4474,6 +4482,8 @@ void emitter::emitIns_I_ARR (instruction ins,
{
case INS_rcl_N:
case INS_rcr_N:
+ case INS_rol_N:
+ case INS_ror_N:
case INS_shl_N:
case INS_shr_N:
case INS_sar_N:
@@ -4620,6 +4630,8 @@ void emitter::emitIns_I_ARX (instruction ins,
{
case INS_rcl_N:
case INS_rcr_N:
+ case INS_rol_N:
+ case INS_ror_N:
case INS_shl_N:
case INS_shr_N:
case INS_sar_N:
@@ -4768,6 +4780,8 @@ void emitter::emitIns_I_AX (instruction ins,
{
case INS_rcl_N:
case INS_rcr_N:
+ case INS_rol_N:
+ case INS_ror_N:
case INS_shl_N:
case INS_shr_N:
case INS_sar_N:
@@ -5050,6 +5064,8 @@ void emitter::emitIns_S_I (instruction ins,
{
case INS_rcl_N:
case INS_rcr_N:
+ case INS_rol_N:
+ case INS_ror_N:
case INS_shl_N:
case INS_shr_N:
case INS_sar_N:
@@ -6363,6 +6379,8 @@ void emitter::emitDispShift(instruction ins, int cnt)
{
case INS_rcl_1:
case INS_rcr_1:
+ case INS_rol_1:
+ case INS_ror_1:
case INS_shl_1:
case INS_shr_1:
case INS_sar_1:
@@ -6371,6 +6389,8 @@ void emitter::emitDispShift(instruction ins, int cnt)
case INS_rcl:
case INS_rcr:
+ case INS_rol:
+ case INS_ror:
case INS_shl:
case INS_shr:
case INS_sar:
@@ -6379,6 +6399,8 @@ void emitter::emitDispShift(instruction ins, int cnt)
case INS_rcl_N:
case INS_rcr_N:
+ case INS_rol_N:
+ case INS_ror_N:
case INS_shl_N:
case INS_shr_N:
case INS_sar_N:
diff --git a/src/jit/gentree.cpp b/src/jit/gentree.cpp
index 3c06925fe4..d6bd3af777 100644
--- a/src/jit/gentree.cpp
+++ b/src/jit/gentree.cpp
@@ -3694,6 +3694,8 @@ COMMON_CNS:
case GT_LSH:
case GT_RSH:
case GT_RSZ:
+ case GT_ROL:
+ case GT_ROR:
case GT_ASG_LSH:
case GT_ASG_RSH:
case GT_ASG_RSZ:
@@ -9248,6 +9250,8 @@ GenTreePtr Compiler::gtFoldExprSpecial(GenTreePtr tree)
case GT_LSH:
case GT_RSH:
case GT_RSZ:
+ case GT_ROL:
+ case GT_ROR:
case GT_ASG_LSH:
case GT_ASG_RSH:
case GT_ASG_RSZ:
@@ -9943,9 +9947,13 @@ CHK_OVF:
case GT_LSH: i1 <<= (i2 & 0x1f); break;
case GT_RSH: i1 >>= (i2 & 0x1f); break;
case GT_RSZ:
- /* logical shift -> make it unsigned to propagate the sign bit */
+ /* logical shift -> make it unsigned to not propagate the sign bit */
i1 = UINT32(i1) >> (i2 & 0x1f);
break;
+ case GT_ROL: i1 = (i1 << (i2 & 0x1f)) | (UINT32(i1) >> ((32 - i2) & 0x1f));
+ break;
+ case GT_ROR: i1 = (i1 << ((32 - i2) & 0x1f)) | (UINT32(i1) >> (i2 & 0x1f));
+ break;
/* DIV and MOD can generate an INT 0 - if division by 0
* or overflow - when dividing MIN by -1 */
@@ -10284,11 +10292,15 @@ LNG_ADD_CHKOVF:
case GT_XOR: lval1 ^= lval2; break;
case GT_AND: lval1 &= lval2; break;
- case GT_LSH: lval1 <<= (op2->gtIntConCommon.IconValue() & 0x3f); break;
- case GT_RSH: lval1 >>= (op2->gtIntConCommon.IconValue() & 0x3f); break;
+ case GT_LSH: lval1 <<= (lval2 & 0x3f); break;
+ case GT_RSH: lval1 >>= (lval2 & 0x3f); break;
case GT_RSZ:
- /* logical shift -> make it unsigned to propagate the sign bit */
- lval1 = UINT64(lval1) >> (op2->gtIntConCommon.IconValue() & 0x3f);
+ /* logical shift -> make it unsigned to not propagate the sign bit */
+ lval1 = UINT64(lval1) >> (lval2 & 0x3f);
+ break;
+ case GT_ROL: lval1 = (lval1 << (lval2 & 0x3f)) | (UINT64(lval1) >> ((64 - lval2) & 0x3f));
+ break;
+ case GT_ROR: lval1 = (lval1 << ((64 - lval2) & 0x3f)) | (UINT64(lval1) >> (lval2 & 0x3f));
break;
//Both DIV and IDIV on x86 raise an exception for min_int (and min_long) / -1. So we preserve
diff --git a/src/jit/gentree.h b/src/jit/gentree.h
index 1402445da0..431f5c85b1 100644
--- a/src/jit/gentree.h
+++ b/src/jit/gentree.h
@@ -1086,23 +1086,25 @@ public:
int OperIsArithmetic() const
{
genTreeOps op = OperGet();
- return op==GT_ADD
- || op==GT_SUB
- || op==GT_MUL
+ return op==GT_ADD
+ || op==GT_SUB
+ || op==GT_MUL
|| op==GT_DIV
|| op==GT_MOD
-
+
|| op==GT_UDIV
|| op==GT_UMOD
- || op==GT_OR
+ || op==GT_OR
|| op==GT_XOR
|| op==GT_AND
|| op==GT_LSH
|| op==GT_RSH
- || op==GT_RSZ;
-
+ || op==GT_RSZ
+
+ || op==GT_ROL
+ || op==GT_ROR;
}
static
@@ -3506,6 +3508,8 @@ inline bool GenTree::RequiresNonNullOp2(genTreeOps oper)
case GT_LSH:
case GT_RSH:
case GT_RSZ:
+ case GT_ROL:
+ case GT_ROR:
case GT_INDEX:
case GT_ASG:
case GT_ASG_ADD:
diff --git a/src/jit/gtlist.h b/src/jit/gtlist.h
index 9283374592..a23d8ad579 100644
--- a/src/jit/gtlist.h
+++ b/src/jit/gtlist.h
@@ -108,6 +108,8 @@ GTNODE(GT_AND , "&" ,1,GTK_BINOP|GTK_LOGOP)
GTNODE(GT_LSH , "<<" ,0,GTK_BINOP)
GTNODE(GT_RSH , ">>" ,0,GTK_BINOP)
GTNODE(GT_RSZ , ">>>" ,0,GTK_BINOP)
+GTNODE(GT_ROL , "rol" ,0,GTK_BINOP)
+GTNODE(GT_ROR , "ror" ,0,GTK_BINOP)
GTNODE(GT_MULHI , "mulhi" ,1,GTK_BINOP) // returns high bits (top N bits of the 2N bit result of an NxN multiply)
GTNODE(GT_ASG , "=" ,0,GTK_BINOP|GTK_ASGOP)
diff --git a/src/jit/instr.cpp b/src/jit/instr.cpp
index d1cc16f18e..c63952ae12 100644
--- a/src/jit/instr.cpp
+++ b/src/jit/instr.cpp
@@ -2627,6 +2627,8 @@ void CodeGen::inst_RV_SH(instruction ins,
assert(ins == INS_rcl ||
ins == INS_rcr ||
+ ins == INS_rol ||
+ ins == INS_ror ||
ins == INS_shl ||
ins == INS_shr ||
ins == INS_sar);
@@ -2639,6 +2641,8 @@ void CodeGen::inst_RV_SH(instruction ins,
assert(INS_rcl + 1 == INS_rcl_1);
assert(INS_rcr + 1 == INS_rcr_1);
+ assert(INS_rol + 1 == INS_rol_1);
+ assert(INS_ror + 1 == INS_ror_1);
assert(INS_shl + 1 == INS_shl_1);
assert(INS_shr + 1 == INS_shr_1);
assert(INS_sar + 1 == INS_sar_1);
@@ -2651,6 +2655,8 @@ void CodeGen::inst_RV_SH(instruction ins,
assert(INS_rcl + 2 == INS_rcl_N);
assert(INS_rcr + 2 == INS_rcr_N);
+ assert(INS_rol + 2 == INS_rol_N);
+ assert(INS_ror + 2 == INS_ror_N);
assert(INS_shl + 2 == INS_shl_N);
assert(INS_shr + 2 == INS_shr_N);
assert(INS_sar + 2 == INS_sar_N);
diff --git a/src/jit/instrsxarch.h b/src/jit/instrsxarch.h
index c0be9aae73..9f8a9956a3 100644
--- a/src/jit/instrsxarch.h
+++ b/src/jit/instrsxarch.h
@@ -339,6 +339,13 @@ INST2(ret , "ret" , 0, IUM_RD, 0, 0, 0x0000C3, 0x0000C2)
INST2(loop , "loop" , 0, IUM_RD, 0, 0, BAD_CODE, 0x0000E2)
INST2(call , "call" , 0, IUM_RD, 0, 1, 0x0010FF, 0x0000E8)
+INST2(rol , "rol" , 0, IUM_RW, 0, 1, 0x0000D2, BAD_CODE)
+INST2(rol_1 , "rol" , 0, IUM_RW, 0, 1, 0x0000D0, 0x0000D0)
+INST2(rol_N , "rol" , 0, IUM_RW, 0, 1, 0x0000C0, 0x0000C0)
+INST2(ror , "ror" , 0, IUM_RW, 0, 1, 0x0008D2, BAD_CODE)
+INST2(ror_1 , "ror" , 0, IUM_RW, 0, 1, 0x0008D0, 0x0008D0)
+INST2(ror_N , "ror" , 0, IUM_RW, 0, 1, 0x0008C0, 0x0008C0)
+
INST2(rcl , "rcl" , 0, IUM_RW, 1, 1, 0x0010D2, BAD_CODE)
INST2(rcl_1 , "rcl" , 0, IUM_RW, 1, 1, 0x0010D0, 0x0010D0)
INST2(rcl_N , "rcl" , 0, IUM_RW, 1, 1, 0x0010C0, 0x0010C0)
diff --git a/src/jit/lclvars.cpp b/src/jit/lclvars.cpp
index b9e89f156d..15c85040ed 100644
--- a/src/jit/lclvars.cpp
+++ b/src/jit/lclvars.cpp
@@ -2899,7 +2899,9 @@ void Compiler::lvaMarkLclRefs(GenTreePtr tree)
if (tree->gtOper == GT_LSH ||
tree->gtOper == GT_RSH ||
- tree->gtOper == GT_RSZ)
+ tree->gtOper == GT_RSZ ||
+ tree->gtOper == GT_ROL ||
+ tree->gtOper == GT_ROR)
{
if (tree->gtType == TYP_INT)
{
diff --git a/src/jit/lower.cpp b/src/jit/lower.cpp
index 5882ecfa71..d94b0ba1c7 100644
--- a/src/jit/lower.cpp
+++ b/src/jit/lower.cpp
@@ -400,6 +400,8 @@ void Lowering::DecomposeNode(GenTreePtr* pTree, Compiler::fgWalkData* data)
case GT_LSH:
case GT_RSH:
case GT_RSZ:
+ case GT_ROL:
+ case GT_ROR:
case GT_MULHI:
NYI("Arithmetic binary operators on TYP_LONG");
break;
@@ -525,6 +527,11 @@ void Lowering::LowerNode(GenTreePtr* ppTree, Compiler::fgWalkData* data)
}
break;
+ case GT_ROL:
+ case GT_ROR:
+ LowerRotate(*ppTree);
+ break;
+
#ifdef FEATURE_SIMD
case GT_SIMD:
if ((*ppTree)->TypeGet() == TYP_SIMD12)
diff --git a/src/jit/lower.h b/src/jit/lower.h
index 6754b7b75d..03f7a6e71f 100644
--- a/src/jit/lower.h
+++ b/src/jit/lower.h
@@ -161,6 +161,7 @@ private:
void HandleIndirAddressExpression(GenTree *indirTree, GenTree* tree);
void LowerGCWriteBarrier(GenTree *tree);
void LowerArrElem(GenTree **ppTree, Compiler::fgWalkData* data);
+ void LowerRotate(GenTree *tree);
// Utility functions
void MorphBlkIntoHelperCall (GenTreePtr pTree, GenTreePtr treeStmt);
diff --git a/src/jit/lowerarm.cpp b/src/jit/lowerarm.cpp
index d99347d335..6669370604 100644
--- a/src/jit/lowerarm.cpp
+++ b/src/jit/lowerarm.cpp
@@ -41,6 +41,11 @@ void Lowering::LowerCntBlockOp(GenTreePtr *ppTree)
NYI_ARM("ARM Lowering for BlockOp");
}
+void Lowering::LowerRotate(GenTreePtr tree)
+{
+ NYI_ARM("ARM Lowering for ROL and ROR");
+}
+
void Lowering::TreeNodeInfoInitCall(GenTree *tree, TreeNodeInfo &info,
int &srcCount, // out
int &dstCount // out
diff --git a/src/jit/lowerarm64.cpp b/src/jit/lowerarm64.cpp
index 9467813833..f05fd80b5d 100644
--- a/src/jit/lowerarm64.cpp
+++ b/src/jit/lowerarm64.cpp
@@ -486,6 +486,7 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt)
case GT_LSH:
case GT_RSH:
case GT_RSZ:
+ case GT_ROR:
{
info->srcCount = 2;
info->dstCount = 1;
@@ -1625,6 +1626,31 @@ void Lowering::LowerCast( GenTreePtr* ppTree)
}
}
+void Lowering::LowerRotate(GenTreePtr tree)
+{
+ if (tree->OperGet() == GT_ROL)
+ {
+ // There is no ROL instruction on ARM. Convert rol into ROR.
+ GenTreePtr rotatedValue = tree->gtOp.gtOp1;
+ unsigned rotatedValueBitSize = genTypeSize(rotatedValue->gtType) * 8;
+ GenTreePtr rotateLeftIndexNode = tree->gtOp.gtOp2;
+
+ if (rotateLeftIndexNode->IsCnsIntOrI())
+ {
+ ssize_t rotateLeftIndex = rotateLeftIndexNode->gtIntCon.gtIconVal;
+ ssize_t rotateRightIndex = rotatedValueBitSize - rotateLeftIndex;
+ rotateIndexNode->gtIntCon.gtIconVal = rotateRightIndex;
+ }
+ else
+ {
+ GenTreePtr tmp = gtNewOperNode(GT_NEG, genActualType(rotateLeftIndexNode->gtType), rotateLeftIndexNode);
+ rotateLeftIndexNode->InsertAfterSelf(tmp);
+ tree->gtOp.gtOp2 = tmp;
+ }
+ tree->ChangeOper(GT_ROR);
+ }
+}
+
// TODO-Cleanup: move to Lower.cpp?
void Lowering::SetStoreIndOpCounts(GenTreePtr storeInd, GenTreePtr indirCandidate)
{
diff --git a/src/jit/lowerxarch.cpp b/src/jit/lowerxarch.cpp
index a7b4600df9..c9df42e6a6 100644
--- a/src/jit/lowerxarch.cpp
+++ b/src/jit/lowerxarch.cpp
@@ -719,6 +719,8 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt)
case GT_LSH:
case GT_RSH:
case GT_RSZ:
+ case GT_ROL:
+ case GT_ROR:
{
info->srcCount = 2;
info->dstCount = 1;
@@ -2914,7 +2916,9 @@ bool Lowering::LowerStoreInd(GenTreePtr tree)
oper != GT_XOR &&
oper != GT_LSH &&
oper != GT_RSH &&
- oper != GT_RSZ)
+ oper != GT_RSZ &&
+ oper != GT_ROL &&
+ oper != GT_ROR)
{
JITDUMP("Lower of StoreInd didn't mark the node as self contained\n");
JITDUMP("because the node operator not yet supported:\n");
@@ -2924,7 +2928,9 @@ bool Lowering::LowerStoreInd(GenTreePtr tree)
if ((oper == GT_LSH ||
oper == GT_RSH ||
- oper == GT_RSZ) &&
+ oper == GT_RSZ ||
+ oper == GT_ROL ||
+ oper == GT_ROR) &&
varTypeIsSmall(tree))
{
//In ldind, Integer values smaller than 4 bytes, a boolean, or a character converted to 4 bytes by sign or zero-extension as appropriate.
@@ -3017,6 +3023,11 @@ bool Lowering::LowerStoreInd(GenTreePtr tree)
}
}
+void Lowering::LowerRotate(GenTreePtr tree)
+{
+ // xarch supports both ROL and ROR instructions so no lowering is required.
+}
+
void Lowering::SetStoreIndOpCounts(GenTreePtr storeInd, GenTreePtr indirCandidate)
{
GenTreePtr indirDst = storeInd->gtGetOp1();
diff --git a/src/jit/lsra.cpp b/src/jit/lsra.cpp
index 8f11af9878..5515f83dc9 100644
--- a/src/jit/lsra.cpp
+++ b/src/jit/lsra.cpp
@@ -2479,6 +2479,8 @@ LinearScan::getKillSetForNode(GenTree* tree)
case GT_LSH:
case GT_RSH:
case GT_RSZ:
+ case GT_ROL:
+ case GT_ROR:
if (tree->gtLsraInfo.isHelperCallWithKills)
{
killMask = RBM_CALLEE_TRASH;
diff --git a/src/jit/morph.cpp b/src/jit/morph.cpp
index b000f58969..2cdbe0ed05 100644
--- a/src/jit/morph.cpp
+++ b/src/jit/morph.cpp
@@ -10324,6 +10324,17 @@ CM_ADD_OP:
}
#endif // LEA_AVAILABLE
}
+ else if(oper == GT_OR)
+ {
+ tree = fgMorphRotation(tree);
+
+ // fgMorphRotation may return a new tree
+ oper = tree->OperGet();
+ typ = tree->TypeGet();
+ op1 = tree->gtOp.gtOp1;
+ op2 = tree->gtOp.gtOp2;
+ }
+
break;
case GT_CHS:
@@ -10929,7 +10940,8 @@ CM_ADD_OP:
/* for the shift nodes the type of op2 can differ from the tree type */
if ((typ == TYP_LONG) && (genActualType(op2->gtType) == TYP_INT))
{
- noway_assert((oper == GT_LSH) || (oper == GT_RSH) || (oper == GT_RSZ));
+ noway_assert((oper == GT_LSH) || (oper == GT_RSH) || (oper == GT_RSZ) ||
+ (oper == GT_ROL) || (oper == GT_ROR));
GenTreePtr commaOp2 = op2->gtOp.gtOp2;
@@ -11864,6 +11876,208 @@ GenTree* Compiler::fgMorphDivByConst(GenTreeOp* tree)
return result;
}
+//------------------------------------------------------------------------------
+// fgMorphRotation : Check if the tree represents a left or right rotation. If so, return
+// an equivalent GT_ROL or GT_ROR tree; otherwise, return the original tree.
+//
+// Arguments:
+// tree - tree to check for a rotation pattern
+//
+// Return Value:
+// An equivalent GT_ROL or GT_ROR tree if a pattern is found; original tree otherwise.
+//
+// Assumption:
+// The input is a GT_OR tree.
+
+GenTreePtr Compiler::fgMorphRotation(GenTreePtr tree)
+{
+#ifndef LEGACY_BACKEND
+ //
+ // Check for a rotation pattern, e.g.,
+ //
+ // OR ROL
+ // / \ / \
+ // LSH RSZ -> x y
+ // / \ / \
+ // x AND x AND
+ // / \ / \
+ // y 31 ADD 31
+ // / \
+ // NEG 32
+ // |
+ // y
+
+ genTreeOps oper = tree->OperGet();
+ noway_assert(oper == GT_OR);
+
+ if ((tree->gtFlags & GTF_ALL_EFFECT) != 0)
+ {
+ return tree; // Can't do anything due to side effects.
+ }
+
+ // Check if we have an LSH on one side of the OR and an RSZ on the other side.
+ GenTreePtr op1 = tree->gtGetOp1();
+ GenTreePtr op2 = tree->gtGetOp2();
+ GenTreePtr leftShiftTree = nullptr;
+ GenTreePtr rightShiftTree = nullptr;
+ if ((op1->OperGet() == GT_LSH) && (op2->OperGet() == GT_RSZ))
+ {
+ leftShiftTree = op1;
+ rightShiftTree = op2;
+ }
+ else if ((op1->OperGet() == GT_RSZ) && (op2->OperGet() == GT_LSH))
+ {
+ leftShiftTree = op2;
+ rightShiftTree = op1;
+ }
+ else
+ {
+ return tree;
+ }
+
+ // Check if the trees representing the value to shift are identical.
+ // We already checked that there are no side effects above.
+ if (GenTree::Compare(leftShiftTree->gtGetOp1(), rightShiftTree->gtGetOp1()))
+ {
+ GenTreePtr rotatedValue = leftShiftTree->gtGetOp1();
+ ssize_t rotatedValueBitSize = genTypeSize(rotatedValue->gtType) * 8;
+ noway_assert((rotatedValueBitSize == 32) || (rotatedValueBitSize == 64));
+ GenTreePtr leftShiftIndex = leftShiftTree->gtGetOp2();
+ GenTreePtr rightShiftIndex = rightShiftTree->gtGetOp2();
+
+ // The shift index may be masked. At least (rotatedValueBitSize - 1) lower bits
+ // shouldn't be masked for the transformation to be valid. If additional
+ // higher bits are not masked, the transformation is still valid since the result
+ // of MSIL shift instructions is unspecified if the shift amount is greater or equal
+ // than the width of the value being shifted.
+ ssize_t minimalMask = rotatedValueBitSize - 1;
+ ssize_t leftShiftMask = -1;
+ ssize_t rightShiftMask = -1;
+
+ if ((leftShiftIndex->OperGet() == GT_AND))
+ {
+ if (leftShiftIndex->gtGetOp2()->IsCnsIntOrI())
+ {
+ leftShiftMask = leftShiftIndex->gtGetOp2()->gtIntCon.gtIconVal;
+ leftShiftIndex = leftShiftIndex->gtGetOp1();
+ }
+ else
+ {
+ return tree;
+ }
+ }
+
+ if ((rightShiftIndex->OperGet() == GT_AND))
+ {
+ if (rightShiftIndex->gtGetOp2()->IsCnsIntOrI())
+ {
+ rightShiftMask = rightShiftIndex->gtGetOp2()->gtIntCon.gtIconVal;
+ rightShiftIndex = rightShiftIndex->gtGetOp1();
+ }
+ else
+ {
+ return tree;
+ }
+ }
+
+ if (((minimalMask & leftShiftMask) != minimalMask) ||
+ ((minimalMask & rightShiftMask) != minimalMask))
+ {
+ // The shift index is overmasked, e.g., we have
+ // something like (x << y & 15) or
+ // (x >> (32 - y) & 15 with 32 bit x.
+ // The transformation is not valid.
+ return tree;
+ }
+
+ GenTreePtr shiftIndexWithAdd = nullptr;
+ GenTreePtr shiftIndexWithoutAdd = nullptr;
+ genTreeOps rotateOp = GT_NONE;
+ GenTreePtr rotateIndex = nullptr;
+
+ if (leftShiftIndex->OperGet() == GT_ADD)
+ {
+ shiftIndexWithAdd = leftShiftIndex;
+ shiftIndexWithoutAdd = rightShiftIndex;
+ rotateOp = GT_ROR;
+ }
+ else if (rightShiftIndex->OperGet() == GT_ADD)
+ {
+ shiftIndexWithAdd = rightShiftIndex;
+ shiftIndexWithoutAdd = leftShiftIndex;
+ rotateOp = GT_ROL;
+ }
+
+ if (shiftIndexWithAdd != nullptr)
+ {
+ if (shiftIndexWithAdd->gtGetOp2()->IsCnsIntOrI())
+ {
+ if (shiftIndexWithAdd->gtGetOp2()->gtIntCon.gtIconVal == rotatedValueBitSize)
+ {
+ if (shiftIndexWithAdd->gtGetOp1()->OperGet() == GT_NEG)
+ {
+ if (GenTree::Compare(shiftIndexWithAdd->gtGetOp1()->gtGetOp1(), shiftIndexWithoutAdd))
+ {
+ // We found one of these patterns:
+ // (x << (y & M)) | (x >>> ((-y + N) & M))
+ // (x << y) | (x >>> (-y + N))
+ // (x >>> (y & M)) | (x << ((-y + N) & M))
+ // (x >>> y) | (x << (-y + N))
+ // where N == bitsize(x), M is const, and
+ // M & (N - 1) == N - 1
+
+#ifndef _TARGET_64BIT_
+ if (!shiftIndexWithoutAdd->IsCnsIntOrI() && (rotatedValueBitSize == 64))
+ {
+ // TODO: we need to handle variable-sized long shifts specially on x86.
+ // GT_LSH, GT_RSH, and GT_RSZ have helpers for this case. We may need
+ // to add helpers for GT_ROL and GT_ROR.
+ NYI("Rotation of a long value by variable amount");
+ }
+#endif
+
+ rotateIndex = shiftIndexWithoutAdd;
+ }
+ }
+ }
+ }
+ }
+ else if ((leftShiftIndex->IsCnsIntOrI() &&
+ rightShiftIndex->IsCnsIntOrI()))
+ {
+ if (leftShiftIndex->gtIntCon.gtIconVal +
+ rightShiftIndex->gtIntCon.gtIconVal == rotatedValueBitSize)
+ {
+ // We found this pattern:
+ // (x << c1) | (x >>> c2)
+ // where c1 and c2 are const and c1 + c2 == bitsize(x)
+ rotateOp = GT_ROL;
+ rotateIndex = leftShiftIndex;
+ }
+ }
+
+ if (rotateIndex != nullptr)
+ {
+ noway_assert((rotateOp == GT_ROL) || (rotateOp == GT_ROR));
+
+ // We can use the same tree only during global morph; reusing the tree in a later morph
+ // may invalidate value numbers.
+ if (fgGlobalMorph)
+ {
+ tree->gtOp.gtOp1 = rotatedValue;
+ tree->gtOp.gtOp2 = rotateIndex;
+ tree->ChangeOper(rotateOp);
+ }
+ else
+ {
+ tree = gtNewOperNode(rotateOp, genActualType(rotatedValue->gtType), rotatedValue, rotateIndex);
+ }
+ return tree;
+ }
+ }
+#endif //LEGACY_BACKEND
+ return tree;
+}
#if !CPU_HAS_FP_SUPPORT
GenTreePtr Compiler::fgMorphToEmulatedFP(GenTreePtr tree)
@@ -12054,7 +12268,7 @@ GenTreePtr Compiler::fgMorphToEmulatedFP(GenTreePtr tree)
/*****************************************************************************
*
- * Transform the given tree for code generation and returns an equivalent tree.
+ * Transform the given tree for code generation and return an equivalent tree.
*/
@@ -15198,6 +15412,8 @@ Compiler::fgWalkResult Compiler::fgMarkAddrTakenLocalsPreCB(GenTreePtr* pTr
case GT_LSH:
case GT_RSH:
case GT_RSZ:
+ case GT_ROL:
+ case GT_ROR:
case GT_EQ:
case GT_NE:
case GT_LT:
diff --git a/src/jit/optcse.cpp b/src/jit/optcse.cpp
index adc32af2ee..c188f4ac98 100644
--- a/src/jit/optcse.cpp
+++ b/src/jit/optcse.cpp
@@ -2022,6 +2022,8 @@ bool Compiler::optIsCSEcandidate(GenTreePtr tree)
case GT_XOR:
case GT_RSH:
case GT_RSZ:
+ case GT_ROL:
+ case GT_ROR:
return true; // CSE these Binary Operators
case GT_ADD: // Check for ADDRMODE flag on these Binary Operators
diff --git a/src/jit/valuenum.cpp b/src/jit/valuenum.cpp
index 6178cdfd61..9476e5472f 100644
--- a/src/jit/valuenum.cpp
+++ b/src/jit/valuenum.cpp
@@ -345,6 +345,25 @@ T ValueNumStore::EvalOpIntegral(VNFunc vnf, T v0, T v1, ValueNum* pExcSet)
{
return UINT32(v0) >> v1;
}
+ case GT_ROL:
+ if (sizeof(T) == 8)
+ {
+ return (v0 << v1) | (UINT64(v0) >> (64 - v1));
+ }
+ else
+ {
+ return (v0 << v1) | (UINT32(v0) >> (32 - v1));
+ }
+
+ case GT_ROR:
+ if (sizeof(T) == 8)
+ {
+ return (v0 << (64 - v1)) | (UINT64(v0) >> v1);
+ }
+ else
+ {
+ return (v0 << (32 - v1)) | (UINT32(v0) >> v1);
+ }
case GT_DIV:
case GT_MOD:
@@ -1050,8 +1069,12 @@ ValueNum ValueNumStore::VNForFunc(var_types typ, VNFunc func, ValueNum arg0VN, V
case GT_LSH:
case GT_RSH:
case GT_RSZ:
+ case GT_ROL:
+ case GT_ROR:
// (x << 0) => x
// (x >> 0) => x
+ // (x rol 0) => x
+ // (x ror 0) => x
ZeroVN = VNZeroForType(typ);
if (arg1VN == ZeroVN)
return arg0VN;