1 files changed, 277 insertions, 72 deletions
diff --git a/src/jit/emitarm.cpp b/src/jit/emitarm.cpp
index 1b3ef1bdc7..53ee88b3a2 100644
--- a/src/jit/emitarm.cpp
+++ b/src/jit/emitarm.cpp
@@ -1380,7 +1380,7 @@ DONE:
 
 /*****************************************************************************
  *
- *  emitIns_valid_imm_for_add() returns true when the immediate 'imm'
+ *  emitins_valid_imm_for_add() returns true when the immediate 'imm'
  *   can be encoded using a single add or sub instruction.
  */
 /*static*/ bool emitter::emitIns_valid_imm_for_add(int imm, insFlags flags)
@@ -1396,6 +1396,20 @@ DONE:
 
 /*****************************************************************************
  *
+ *  emitins_valid_imm_for_cmp() returns true if this 'imm'
+ *   can be encoded as a input operand to an cmp instruction.
+ */
+/*static*/ bool emitter::emitIns_valid_imm_for_cmp(int imm, insFlags flags)
+{
+    if (isModImmConst(imm)) // funky arm immediate
+        return true;
+    if (isModImmConst(-imm)) // funky arm immediate via sub
+        return true;
+    return false;
+}
+
+/*****************************************************************************
+ *
  *  emitIns_valid_imm_for_add_sp() returns true when the immediate 'imm'
  *   can be encoded in "add Rd,SP,i10".
  */
@@ -1408,6 +1422,20 @@ DONE:
 
 /*****************************************************************************
  *
+ *  emitIns_valid_imm_for_ldst_offset() returns true when the immediate 'imm'
+ *   can be encoded as the offset in a ldr/str instruction.
+ */
+/*static*/ bool emitter::emitIns_valid_imm_for_ldst_offset(int imm, emitAttr size)
+{
+    if ((imm & 0x0fff) == imm)
+        return true; // encodable using IF_T2_K1
+    if (unsigned_abs(imm) <= 0x0ff)
+        return true; // encodable using IF_T2_H0
+    return false;
+}
+
+/*****************************************************************************
+ *
  *  Add an instruction with no operands.
  */
 
@@ -4289,14 +4317,12 @@ void emitter::emitIns_R_D(instruction ins, emitAttr attr, unsigned offs, regNumb
     id->idInsFmt(fmt);
     id->idInsSize(isz);
 
-#if RELOC_SUPPORT
     if (emitComp->opts.compReloc)
     {
         // Set the relocation flags - these give hint to zap to perform
         // relocation of the specified 32bit address.
         id->idSetRelocFlags(attr);
     }
-#endif // RELOC_SUPPORT
 
     dispIns(id);
     appendToCurIG(id);
@@ -4579,7 +4605,6 @@ void emitter::emitIns_Call(EmitCallType          callType,
             id->idSetIsCallAddr();
         }
 
-#if RELOC_SUPPORT
         if (emitComp->opts.compReloc)
         {
             // Since this is an indirect call through a pointer and we don't
@@ -4588,7 +4613,6 @@ void emitter::emitIns_Call(EmitCallType          callType,
 
             id->idSetIsDspReloc();
         }
-#endif
     }
 
 #ifdef DEBUG
@@ -5254,7 +5278,6 @@ BYTE* emitter::emitOutputLJ(insGroup* ig, BYTE* dst, instrDesc* i)
             else if (fmt == IF_T2_J2)
             {
                 assert((distVal & 1) == 0);
-#ifdef RELOC_SUPPORT
                 if (emitComp->opts.compReloc && emitJumpCrossHotColdBoundary(srcOffs, dstOffs))
                 {
                     // dst isn't an actual final target location, just some intermediate
@@ -5263,7 +5286,6 @@ BYTE* emitter::emitOutputLJ(insGroup* ig, BYTE* dst, instrDesc* i)
                     // rely on the relocation to do all the work
                 }
                 else
-#endif
                 {
                     assert(distVal >= CALL_DIST_MAX_NEG);
                     assert(distVal <= CALL_DIST_MAX_POS);
@@ -5290,7 +5312,6 @@ BYTE* emitter::emitOutputLJ(insGroup* ig, BYTE* dst, instrDesc* i)
 
             unsigned instrSize = emitOutput_Thumb2Instr(dst, code);
 
-#ifdef RELOC_SUPPORT
             if (emitComp->opts.compReloc)
             {
                 if (emitJumpCrossHotColdBoundary(srcOffs, dstOffs))
@@ -5303,7 +5324,6 @@ BYTE* emitter::emitOutputLJ(insGroup* ig, BYTE* dst, instrDesc* i)
                     }
                 }
             }
-#endif // RELOC_SUPPORT
 
             dst += instrSize;
         }
@@ -5968,9 +5988,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
                 assert(!id->idIsLclVar());
                 assert((ins == INS_movw) || (ins == INS_movt));
                 imm += (size_t)emitConsBlock;
-#ifdef RELOC_SUPPORT
                 if (!id->idIsCnsReloc() && !id->idIsDspReloc())
-#endif
                 {
                     goto SPLIT_IMM;
                 }
@@ -5988,7 +6006,6 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
                 }
             }
 
-#ifdef RELOC_SUPPORT
             if (id->idIsCnsReloc() || id->idIsDspReloc())
             {
                 assert((ins == INS_movt) || (ins == INS_movw));
@@ -5997,7 +6014,6 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
                     emitRecordRelocation((void*)(dst - 8), (void*)imm, IMAGE_REL_BASED_THUMB_MOV32);
             }
             else
-#endif // RELOC_SUPPORT
             {
                 assert((imm & 0x0000ffff) == imm);
                 code |= (imm & 0x00ff);
@@ -6220,7 +6236,6 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
             }
             code = emitInsCode(ins, fmt);
 
-#ifdef RELOC_SUPPORT
             if (id->idIsDspReloc())
             {
                 callInstrSize = SafeCvtAssert<unsigned char>(emitOutput_Thumb2Instr(dst, code));
@@ -6229,7 +6244,6 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
                     emitRecordRelocation((void*)(dst - 4), addr, IMAGE_REL_BASED_THUMB_BRANCH24);
             }
             else
-#endif // RELOC_SUPPORT
             {
                 addr = (BYTE*)((size_t)addr & ~1); // Clear the lowest bit from target address
 
@@ -6935,14 +6949,12 @@ void emitter::emitDispInsHelp(
             {
                 if (emitComp->opts.disDiffable)
                     imm = 0xD1FF;
-#if RELOC_SUPPORT
                 if (id->idIsCnsReloc() || id->idIsDspReloc())
                 {
                     if (emitComp->opts.disDiffable)
                         imm = 0xD1FFAB1E;
                     printf("%s RELOC ", (id->idIns() == INS_movw) ? "LOW" : "HIGH");
                 }
-#endif // RELOC_SUPPORT
             }
             emitDispImm(imm, false, (fmt == IF_T2_N));
             break;
@@ -6973,12 +6985,10 @@ void emitter::emitDispInsHelp(
 
                 assert(jdsc != NULL);
 
-#ifdef RELOC_SUPPORT
                 if (id->idIsDspReloc())
                 {
                     printf("reloc ");
                 }
-#endif
                 printf("%s ADDRESS J_M%03u_DS%02u", (id->idIns() == INS_movw) ? "LOW" : "HIGH",
                        Compiler::s_compMethodsCount, imm);
 
@@ -7528,89 +7538,115 @@ void emitter::emitDispFrameRef(int varx, int disp, int offs, bool asmfm)
 
 #ifndef LEGACY_BACKEND
 
-// this is very similar to emitInsBinary and probably could be folded in to same
-// except the requirements on the incoming parameter are different,
-// ex: the memory op in storeind case must NOT be contained
-void emitter::emitInsMov(instruction ins, emitAttr attr, GenTree* node)
+void emitter::emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataReg, GenTreeIndir* indir)
 {
-    switch (node->OperGet())
+    GenTree* addr = indir->Addr();
+    GenTree* data = indir->gtOp.gtOp2;
+
+    if (addr->isContained())
     {
-        case GT_IND:
-        case GT_STOREIND:
-        {
-            GenTreeIndir* indir = node->AsIndir();
-            GenTree*      addr  = indir->Addr();
-            GenTree*      data  = indir->gtOp.gtOp2;
+        assert(addr->OperGet() == GT_LCL_VAR_ADDR || addr->OperGet() == GT_LEA);
 
-            regNumber reg = (node->OperGet() == GT_IND) ? node->gtRegNum : data->gtRegNum;
+        int   offset = 0;
+        DWORD lsl    = 0;
 
-            if (addr->isContained())
+        if (addr->OperGet() == GT_LEA)
+        {
+            offset = (int)addr->AsAddrMode()->gtOffset;
+            if (addr->AsAddrMode()->gtScale > 0)
             {
-                assert(addr->OperGet() == GT_LCL_VAR_ADDR || addr->OperGet() == GT_LEA);
+                assert(isPow2(addr->AsAddrMode()->gtScale));
+                BitScanForward(&lsl, addr->AsAddrMode()->gtScale);
+            }
+        }
+
+        GenTree* memBase = indir->Base();
+
+        if (indir->HasIndex())
+        {
+            GenTree* index = indir->Index();
 
-                int   offset = 0;
-                DWORD lsl    = 0;
+            if (offset != 0)
+            {
+                regMaskTP tmpRegMask = indir->gtRsvdRegs;
+                regNumber tmpReg     = genRegNumFromMask(tmpRegMask);
+                noway_assert(tmpReg != REG_NA);
 
-                if (addr->OperGet() == GT_LEA)
+                if (emitIns_valid_imm_for_add(offset, INS_FLAGS_DONT_CARE))
                 {
-                    offset = (int)addr->AsAddrMode()->gtOffset;
-                    if (addr->AsAddrMode()->gtScale > 0)
+                    if (lsl > 0)
                     {
-                        assert(isPow2(addr->AsAddrMode()->gtScale));
-                        BitScanForward(&lsl, addr->AsAddrMode()->gtScale);
+                        // Generate code to set tmpReg = base + index*scale
+                        emitIns_R_R_R_I(INS_add, EA_PTRSIZE, tmpReg, memBase->gtRegNum, index->gtRegNum, lsl,
+                                        INS_FLAGS_DONT_CARE, INS_OPTS_LSL);
+                    }
+                    else // no scale
+                    {
+                        // Generate code to set tmpReg = base + index
+                        emitIns_R_R_R(INS_add, EA_PTRSIZE, tmpReg, memBase->gtRegNum, index->gtRegNum);
                     }
-                }
 
-                GenTree* memBase = indir->Base();
+                    noway_assert(emitInsIsLoad(ins) || (tmpReg != dataReg));
 
-                if (indir->HasIndex())
-                {
-                    NYI_ARM("emitInsMov HasIndex");
+                    // Then load/store dataReg from/to [tmpReg + offset]
+                    emitIns_R_R_I(ins, attr, dataReg, tmpReg, offset);
                 }
-                else
+                else // large offset
                 {
-                    // TODO check offset is valid for encoding
-                    emitIns_R_R_I(ins, attr, reg, memBase->gtRegNum, offset);
+                    // First load/store tmpReg with the large offset constant
+                    codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, tmpReg, offset);
+                    // Then add the base register
+                    //      rd = rd + base
+                    emitIns_R_R_R(INS_add, EA_PTRSIZE, tmpReg, tmpReg, memBase->gtRegNum);
+
+                    noway_assert(emitInsIsLoad(ins) || (tmpReg != dataReg));
+                    noway_assert(tmpReg != index->gtRegNum);
+
+                    // Then load/store dataReg from/to [tmpReg + index*scale]
+                    emitIns_R_R_R_I(ins, attr, dataReg, tmpReg, index->gtRegNum, lsl, INS_FLAGS_DONT_CARE,
+                                    INS_OPTS_LSL);
                 }
             }
-            else
+            else // (offset == 0)
             {
-                if (addr->OperGet() == GT_CLS_VAR_ADDR)
+                if (lsl > 0)
                 {
-                    emitIns_C_R(ins, attr, addr->gtClsVar.gtClsVarHnd, data->gtRegNum, 0);
+                    // Then load/store dataReg from/to [memBase + index*scale]
+                    emitIns_R_R_R_I(ins, attr, dataReg, memBase->gtRegNum, index->gtRegNum, lsl, INS_FLAGS_DONT_CARE,
+                                    INS_OPTS_LSL);
                 }
-                else
+                else // no scale
                 {
-                    emitIns_R_R(ins, attr, reg, addr->gtRegNum);
+                    // Then load/store dataReg from/to [memBase + index]
+                    emitIns_R_R_R(ins, attr, dataReg, memBase->gtRegNum, index->gtRegNum);
                 }
             }
         }
-        break;
-
-        case GT_STORE_LCL_VAR:
+        else // no Index
         {
-            GenTreeLclVarCommon* varNode = node->AsLclVarCommon();
-
-            GenTree* data = node->gtOp.gtOp1->gtEffectiveVal();
-            codeGen->inst_set_SV_var(varNode);
-            assert(varNode->gtRegNum == REG_NA); // stack store
-
-            if (data->isContainedIntOrIImmed())
+            if (emitIns_valid_imm_for_ldst_offset(offset, attr))
             {
-                emitIns_S_I(ins, attr, varNode->GetLclNum(), 0, (int)data->AsIntConCommon()->IconValue());
-                codeGen->genUpdateLife(varNode);
+                // Then load/store dataReg from/to [memBase + offset]
+                emitIns_R_R_I(ins, attr, dataReg, memBase->gtRegNum, offset);
             }
             else
             {
-                assert(!data->isContained());
-                emitIns_S_R(ins, attr, data->gtRegNum, varNode->GetLclNum(), 0);
-                codeGen->genUpdateLife(varNode);
+                // We require a tmpReg to hold the offset
+                regMaskTP tmpRegMask = indir->gtRsvdRegs;
+                regNumber tmpReg     = genRegNumFromMask(tmpRegMask);
+                noway_assert(tmpReg != REG_NA);
+
+                // First load/store tmpReg with the large offset constant
+                codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, tmpReg, offset);
+
+                // Then load/store dataReg from/to [memBase + tmpReg]
+                emitIns_R_R_R(ins, attr, dataReg, memBase->gtRegNum, tmpReg);
             }
         }
-            return;
-
-        default:
-            unreached();
+    }
+    else
+    {
+        emitIns_R_R(ins, attr, dataReg, addr->gtRegNum);
     }
 }
 
@@ -7646,5 +7682,174 @@ regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, G
     }
 }
 
+regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, GenTree* src1, GenTree* src2)
+{
+    regNumber result = REG_NA;
+
+    // dst can only be a reg
+    assert(!dst->isContained());
+
+    // find immed (if any) - it cannot be a dst
+    // Only one src can be an int.
+    GenTreeIntConCommon* intConst  = nullptr;
+    GenTree*             nonIntReg = nullptr;
+
+    if (varTypeIsFloating(dst))
+    {
+        // src1 can only be a reg
+        assert(!src1->isContained());
+        // src2 can only be a reg
+        assert(!src2->isContained());
+    }
+    else // not floating point
+    {
+        // src2 can be immed or reg
+        assert(!src2->isContained() || src2->isContainedIntOrIImmed());
+
+        // Check src2 first as we can always allow it to be a contained immediate
+        if (src2->isContainedIntOrIImmed())
+        {
+            intConst  = src2->AsIntConCommon();
+            nonIntReg = src1;
+        }
+        // Only for commutative operations do we check src1 and allow it to be a contained immediate
+        else if (dst->OperIsCommutative())
+        {
+            // src1 can be immed or reg
+            assert(!src1->isContained() || src1->isContainedIntOrIImmed());
+
+            // Check src1 and allow it to be a contained immediate
+            if (src1->isContainedIntOrIImmed())
+            {
+                assert(!src2->isContainedIntOrIImmed());
+                intConst  = src1->AsIntConCommon();
+                nonIntReg = src2;
+            }
+        }
+        else
+        {
+            // src1 can only be a reg
+            assert(!src1->isContained());
+        }
+    }
+    bool      isMulOverflow = false;
+    bool      isUnsignedMul = false;
+    regNumber extraReg      = REG_NA;
+    if (dst->gtOverflowEx())
+    {
+        NYI_ARM("emitInsTernary overflow");
+#if 0
+        if (ins == INS_add)
+        {
+            ins = INS_adds;
+        }
+        else if (ins == INS_sub)
+        {
+            ins = INS_subs;
+        }
+        else if (ins == INS_mul)
+        {
+            isMulOverflow = true;
+            isUnsignedMul = ((dst->gtFlags & GTF_UNSIGNED) != 0);
+            assert(intConst == nullptr); // overflow format doesn't support an int constant operand
+        }
+        else
+        {
+            assert(!"Invalid ins for overflow check");
+        }
+#endif
+    }
+    if (intConst != nullptr)
+    {
+        emitIns_R_R_I(ins, attr, dst->gtRegNum, nonIntReg->gtRegNum, intConst->IconValue());
+    }
+    else
+    {
+        if (isMulOverflow)
+        {
+            NYI_ARM("emitInsTernary overflow");
+#if 0
+            // Make sure that we have an internal register
+            assert(genCountBits(dst->gtRsvdRegs) == 2);
+
+            // There will be two bits set in tmpRegsMask.
+            // Remove the bit for 'dst->gtRegNum' from 'tmpRegsMask'
+            regMaskTP tmpRegsMask = dst->gtRsvdRegs & ~genRegMask(dst->gtRegNum);
+            assert(tmpRegsMask != RBM_NONE);
+            regMaskTP tmpRegMask = genFindLowestBit(tmpRegsMask); // set tmpRegMsk to a one-bit mask
+            extraReg             = genRegNumFromMask(tmpRegMask); // set tmpReg from that mask
+
+            if (isUnsignedMul)
+            {
+                if (attr == EA_4BYTE)
+                {
+                    // Compute 8 byte results from 4 byte by 4 byte multiplication.
+                    emitIns_R_R_R(INS_umull, EA_8BYTE, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum);
+
+                    // Get the high result by shifting dst.
+                    emitIns_R_R_I(INS_lsr, EA_8BYTE, extraReg, dst->gtRegNum, 32);
+                }
+                else
+                {
+                    assert(attr == EA_8BYTE);
+                    // Compute the high result.
+                    emitIns_R_R_R(INS_umulh, attr, extraReg, src1->gtRegNum, src2->gtRegNum);
+
+                    // Now multiply without skewing the high result.
+                    emitIns_R_R_R(ins, attr, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum);
+                }
+
+                // zero-sign bit comparision to detect overflow.
+                emitIns_R_I(INS_cmp, attr, extraReg, 0);
+            }
+            else
+            {
+                int bitShift = 0;
+                if (attr == EA_4BYTE)
+                {
+                    // Compute 8 byte results from 4 byte by 4 byte multiplication.
+                    emitIns_R_R_R(INS_smull, EA_8BYTE, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum);
+
+                    // Get the high result by shifting dst.
+                    emitIns_R_R_I(INS_lsr, EA_8BYTE, extraReg, dst->gtRegNum, 32);
+
+                    bitShift = 31;
+                }
+                else
+                {
+                    assert(attr == EA_8BYTE);
+                    // Save the high result in a temporary register.
+                    emitIns_R_R_R(INS_smulh, attr, extraReg, src1->gtRegNum, src2->gtRegNum);
+
+                    // Now multiply without skewing the high result.
+                    emitIns_R_R_R(ins, attr, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum);
+
+                    bitShift = 63;
+                }
+
+                // Sign bit comparision to detect overflow.
+                emitIns_R_R_I(INS_cmp, attr, extraReg, dst->gtRegNum, bitShift, INS_OPTS_ASR);
+            }
+#endif
+        }
+        else
+        {
+            // We can just multiply.
+            emitIns_R_R_R(ins, attr, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum);
+        }
+    }
+
+    if (dst->gtOverflowEx())
+    {
+        NYI_ARM("emitInsTernary overflow");
+#if 0
+        assert(!varTypeIsFloating(dst));
+        codeGen->genCheckOverflow(dst);
+#endif
+    }
+
+    return dst->gtRegNum;
+}
+
 #endif // !LEGACY_BACKEND
 #endif // defined(_TARGET_ARM_)