JIT: Suppress emitting same-reg zero extending move (#22454)

Add a peephole optimization to suppress emitting zero extending moves if the previous instruction has already done a suitable zero extension. Only implemented for x64 currently. Closes #21923
author: Andy Ayers <andya@microsoft.com> 2019-02-08 08:10:57 -0800
committer: GitHub <noreply@github.com> 2019-02-08 08:10:57 -0800
commit: d5f638a1cd52fc2733e769e716d5a3a1d61fc804 (patch)
tree: 023ea4a10643b5adafc97b48ac27d5b8c1f7dab7
parent: 76c322d09ee66c0c51f020c8d95dbee7765224d6 (diff)
download: coreclr-d5f638a1cd52fc2733e769e716d5a3a1d61fc804.tar.gz
coreclr-d5f638a1cd52fc2733e769e716d5a3a1d61fc804.tar.bz2
coreclr-d5f638a1cd52fc2733e769e716d5a3a1d61fc804.zip
3 files changed, 88 insertions, 1 deletions
diff --git a/src/jit/codegenxarch.cpp b/src/jit/codegenxarch.cpp
index 707378f196..2327988079 100644
--- a/src/jit/codegenxarch.cpp
+++ b/src/jit/codegenxarch.cpp
@@ -6398,6 +6398,7 @@ void CodeGen::genIntToIntCast(GenTreeCast* cast)
 
     const regNumber srcReg = cast->gtGetOp1()->gtRegNum;
     const regNumber dstReg = cast->gtRegNum;
+    emitter*        emit   = getEmitter();
 
     assert(genIsValidIntReg(srcReg));
     assert(genIsValidIntReg(dstReg));
@@ -6413,6 +6414,7 @@ void CodeGen::genIntToIntCast(GenTreeCast* cast)
     {
         instruction ins;
         unsigned    insSize;
+        bool        canSkip = false;
 
         switch (desc.ExtendKind())
         {
@@ -6426,6 +6428,12 @@ void CodeGen::genIntToIntCast(GenTreeCast* cast)
                 break;
 #ifdef _TARGET_64BIT_
             case GenIntCastDesc::ZERO_EXTEND_INT:
+                // We can skip emitting this zero extending move if the previous instruction zero extended implicitly
+                if ((srcReg == dstReg) && compiler->opts.OptimizationEnabled())
+                {
+                    canSkip = emit->AreUpper32BitsZero(srcReg);
+                }
+
                 ins     = INS_mov;
                 insSize = 4;
                 break;
@@ -6436,12 +6444,20 @@ void CodeGen::genIntToIntCast(GenTreeCast* cast)
 #endif
             default:
                 assert(desc.ExtendKind() == GenIntCastDesc::COPY);
+                assert(srcReg != dstReg);
                 ins     = INS_mov;
                 insSize = desc.ExtendSrcSize();
                 break;
         }
 
-        getEmitter()->emitIns_R_R(ins, EA_ATTR(insSize), dstReg, srcReg);
+        if (canSkip)
+        {
+            JITDUMP("\n -- suppressing emission as previous instruction already properly extends.\n");
+        }
+        else
+        {
+            emit->emitIns_R_R(ins, EA_ATTR(insSize), dstReg, srcReg);
+        }
     }
 
     genProduceReg(cast);
diff --git a/src/jit/emitxarch.cpp b/src/jit/emitxarch.cpp
index e56c4b2e2a..bbaeb81d0f 100644
--- a/src/jit/emitxarch.cpp
+++ b/src/jit/emitxarch.cpp
@@ -145,6 +145,75 @@ bool emitter::IsDstSrcSrcAVXInstruction(instruction ins)
     return ((CodeGenInterface::instInfo[ins] & INS_Flags_IsDstSrcSrcAVXInstruction) != 0) && IsAVXInstruction(ins);
 }
 
+//------------------------------------------------------------------------
+// AreUpper32BitsZero: check if some previously emitted
+//     instruction set the upper 32 bits of reg to zero.
+//
+// Arguments:
+//    reg - register of interest
+//
+// Return Value:
+//    true if previous instruction zeroed reg's upper 32 bits.
+//    false if it did not, or if we can't safely determine.
+//
+// Notes:
+//    Currently only looks back one instruction.
+//
+//    movsx eax, ... might seem viable but we always encode this
+//    instruction with a 64 bit destination. See TakesRexWPrefix.
+
+bool emitter::AreUpper32BitsZero(regNumber reg)
+{
+    // Don't look back across IG boundaries (possible control flow)
+    if (emitCurIGinsCnt == 0)
+    {
+        return false;
+    }
+
+    instrDesc* id  = emitLastIns;
+    insFormat  fmt = id->idInsFmt();
+
+    // This isn't meant to be a comprehensive check. Just look for what
+    // seems to be common.
+    switch (fmt)
+    {
+        case IF_RWR_CNS:
+        case IF_RRW_CNS:
+        case IF_RRW_SHF:
+        case IF_RWR_RRD:
+        case IF_RRW_RRD:
+        case IF_RWR_MRD:
+        case IF_RWR_SRD:
+        case IF_RWR_ARD:
+
+            // Bail if not writing to the right register
+            if (id->idReg1() != reg)
+            {
+                return false;
+            }
+
+            // Bail if movsx, we always have movsx sign extend to 8 bytes
+            if (id->idIns() == INS_movsx)
+            {
+                return false;
+            }
+
+            // movzx always zeroes the upper 32 bits.
+            if (id->idIns() == INS_movzx)
+            {
+                return true;
+            }
+
+            // Else rely on operation size.
+            return (id->idOpSize() == EA_4BYTE);
+
+        default:
+            break;
+    }
+
+    return false;
+}
+
 #ifdef FEATURE_HW_INTRINSICS
 //------------------------------------------------------------------------
 // IsDstSrcImmAvxInstruction: Checks if the instruction has a "reg, reg/mem, imm" or
diff --git a/src/jit/emitxarch.h b/src/jit/emitxarch.h
index cfdfdc8cbf..5b06838b07 100644
--- a/src/jit/emitxarch.h
+++ b/src/jit/emitxarch.h
@@ -94,6 +94,8 @@ code_t AddRexPrefix(instruction ins, code_t code);
 bool EncodedBySSE38orSSE3A(instruction ins);
 bool Is4ByteSSEInstruction(instruction ins);
 
+bool AreUpper32BitsZero(regNumber reg);
+
 // Adjust code size for CRC32 that has 4-byte opcode
 // but does not use SSE38 or EES3A encoding.
 UNATIVE_OFFSET emitAdjustSizeCrc32(instruction ins, emitAttr attr)
author	Andy Ayers <andya@microsoft.com>	2019-02-08 08:10:57 -0800
committer	GitHub <noreply@github.com>	2019-02-08 08:10:57 -0800
commit	d5f638a1cd52fc2733e769e716d5a3a1d61fc804 (patch)
tree	023ea4a10643b5adafc97b48ac27d5b8c1f7dab7
parent	76c322d09ee66c0c51f020c8d95dbee7765224d6 (diff)
download	coreclr-d5f638a1cd52fc2733e769e716d5a3a1d61fc804.tar.gz coreclr-d5f638a1cd52fc2733e769e716d5a3a1d61fc804.tar.bz2 coreclr-d5f638a1cd52fc2733e769e716d5a3a1d61fc804.zip