summaryrefslogtreecommitdiff
path: root/src/jit
diff options
context:
space:
mode:
authorTanner Gooding <tagoo@outlook.com>2018-08-07 13:31:30 -0700
committerTanner Gooding <tagoo@outlook.com>2018-08-09 12:35:54 -0700
commit2110b70551bac4f4ad65fcbbdfd853eea9000b4e (patch)
treefc5cfac5e5e61758f6f8b307ca6c1e8e7c173d31 /src/jit
parentce175cbb849378c2ef1985ff3994cd0d82f3d8fe (diff)
downloadcoreclr-2110b70551bac4f4ad65fcbbdfd853eea9000b4e.tar.gz
coreclr-2110b70551bac4f4ad65fcbbdfd853eea9000b4e.tar.bz2
coreclr-2110b70551bac4f4ad65fcbbdfd853eea9000b4e.zip
Implementing the Avx.MaskStore intrinsics
Diffstat (limited to 'src/jit')
-rw-r--r--src/jit/emitfmtsxarch.h2
-rw-r--r--src/jit/emitxarch.cpp91
-rw-r--r--src/jit/emitxarch.h2
-rw-r--r--src/jit/hwintrinsiccodegenxarch.cpp21
-rw-r--r--src/jit/hwintrinsiclistxarch.h1
-rw-r--r--src/jit/hwintrinsicxarch.cpp17
-rw-r--r--src/jit/instrsxarch.h4
7 files changed, 118 insertions, 20 deletions
diff --git a/src/jit/emitfmtsxarch.h b/src/jit/emitfmtsxarch.h
index 190a6e0ea9..b070b3d083 100644
--- a/src/jit/emitfmtsxarch.h
+++ b/src/jit/emitfmtsxarch.h
@@ -195,6 +195,8 @@ IF_DEF(ARD_RRD, IS_AM_RD|IS_R1_RD, AMD ) // read [adr], read
IF_DEF(AWR_RRD, IS_AM_WR|IS_R1_RD, AMD ) // write [adr], read reg
IF_DEF(ARW_RRD, IS_AM_RW|IS_R1_RD, AMD ) // r/w [adr], read reg
+IF_DEF(AWR_RRD_RRD, IS_AM_WR|IS_R1_RD|IS_R2_RD, AMD ) // write [adr], read reg, read reg
+
IF_DEF(ARD_CNS, IS_AM_RD, AMD_CNS) // read [adr], const
IF_DEF(AWR_CNS, IS_AM_WR, AMD_CNS) // write [adr], const
IF_DEF(ARW_CNS, IS_AM_RW, AMD_CNS) // r/w [adr], const
diff --git a/src/jit/emitxarch.cpp b/src/jit/emitxarch.cpp
index 673bb550f8..08860079c4 100644
--- a/src/jit/emitxarch.cpp
+++ b/src/jit/emitxarch.cpp
@@ -4148,6 +4148,42 @@ void emitter::emitIns_AR(instruction ins, emitAttr attr, regNumber base, int off
emitCurIGsize += sz;
}
+//------------------------------------------------------------------------
+// emitIns_AR_R_R: emits the code for an instruction that takes a base memory register, two register operands
+// and that does not return a value
+//
+// Arguments:
+// ins -- The instruction being emitted
+// attr -- The emit attribute
+// targetReg -- The target register
+// op2Reg -- The register of the second operand
+// op3Reg -- The register of the third operand
+// base -- The base register used for the memory address (first operand)
+// offs -- The offset from base
+//
+void emitter::emitIns_AR_R_R(
+ instruction ins, emitAttr attr, regNumber op2Reg, regNumber op3Reg, regNumber base, int offs)
+{
+ assert(IsSSEOrAVXInstruction(ins));
+ assert(IsThreeOperandAVXInstruction(ins));
+
+ instrDesc* id = emitNewInstrAmd(attr, offs);
+
+ id->idIns(ins);
+ id->idReg1(op2Reg);
+ id->idReg2(op3Reg);
+
+ id->idInsFmt(IF_AWR_RRD_RRD);
+ id->idAddr()->iiaAddrMode.amBaseReg = base;
+ id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
+
+ UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeMR(ins));
+ id->idCodeSize(sz);
+
+ dispIns(id);
+ emitCurIGsize += sz;
+}
+
void emitter::emitIns_R_A(instruction ins, emitAttr attr, regNumber reg1, GenTreeIndir* indir)
{
ssize_t offs = indir->Offset();
@@ -8586,6 +8622,15 @@ void emitter::emitDispIns(
printf(", %s", emitRegName(id->idReg1(), attr));
break;
+ case IF_AWR_RRD_RRD:
+ {
+ printf("%s", sstr);
+ emitDispAddrMode(id);
+ printf(", %s", emitRegName(id->idReg1(), attr));
+ printf(", %s", emitRegName(id->idReg2(), attr));
+ break;
+ }
+
case IF_ARD_CNS:
case IF_AWR_CNS:
case IF_ARW_CNS:
@@ -9412,12 +9457,23 @@ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)
{
if (IsDstDstSrcAVXInstruction(ins))
{
- regNumber src1 = id->idReg2();
+ regNumber src1 = REG_NA;
- if ((id->idInsFmt() != IF_RWR_RRD_ARD) && (id->idInsFmt() != IF_RWR_RRD_ARD_CNS) &&
- (id->idInsFmt() != IF_RWR_RRD_ARD_RRD))
+ switch (id->idInsFmt())
{
- src1 = id->idReg1();
+ case IF_RWR_RRD_ARD:
+ case IF_RWR_RRD_ARD_CNS:
+ case IF_RWR_RRD_ARD_RRD:
+ {
+ src1 = id->idReg2();
+ break;
+ }
+
+ default:
+ {
+ src1 = id->idReg1();
+ break;
+ }
}
// encode source operand reg in 'vvvv' bits in 1's complement form
@@ -9469,7 +9525,20 @@ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)
}
if (reg345 == REG_NA)
{
- reg345 = id->idReg1();
+ switch (id->idInsFmt())
+ {
+ case IF_AWR_RRD_RRD:
+ {
+ reg345 = id->idReg2();
+ break;
+ }
+
+ default:
+ {
+ reg345 = id->idReg1();
+ break;
+ }
+ }
}
unsigned regcode = insEncodeReg345(ins, reg345, size, &code);
@@ -10100,6 +10169,9 @@ DONE:
case IF_AWR_RRD:
break;
+ case IF_AWR_RRD_RRD:
+ break;
+
case IF_ARD_CNS:
case IF_AWR_CNS:
break;
@@ -13080,6 +13152,15 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
sz = emitSizeOfInsDsc(id);
break;
+ case IF_AWR_RRD_RRD:
+ {
+ code = insCodeMR(ins);
+ code = AddVexPrefixIfNeeded(ins, code, size);
+ dst = emitOutputAM(dst, id, code);
+ sz = emitSizeOfInsDsc(id);
+ break;
+ }
+
case IF_ARD_CNS:
case IF_AWR_CNS:
case IF_ARW_CNS:
diff --git a/src/jit/emitxarch.h b/src/jit/emitxarch.h
index e7a93a1bc6..3ec962f613 100644
--- a/src/jit/emitxarch.h
+++ b/src/jit/emitxarch.h
@@ -319,6 +319,8 @@ void emitIns_R_R_I(instruction ins, emitAttr attr, regNumber reg1, regNumber reg
void emitIns_AR(instruction ins, emitAttr attr, regNumber base, int offs);
+void emitIns_AR_R_R(instruction ins, emitAttr attr, regNumber op2Reg, regNumber op3Reg, regNumber base, int offs);
+
void emitIns_R_A(instruction ins, emitAttr attr, regNumber reg1, GenTreeIndir* indir);
void emitIns_R_A_I(instruction ins, emitAttr attr, regNumber reg1, GenTreeIndir* indir, int ival);
diff --git a/src/jit/hwintrinsiccodegenxarch.cpp b/src/jit/hwintrinsiccodegenxarch.cpp
index 42302840ff..ad1be6a061 100644
--- a/src/jit/hwintrinsiccodegenxarch.cpp
+++ b/src/jit/hwintrinsiccodegenxarch.cpp
@@ -253,15 +253,22 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
}
else if (category == HW_Category_MemoryStore)
{
- assert(intrinsicId == NI_SSE2_MaskMove);
- assert(targetReg == REG_NA);
-
- // SSE2 MaskMove hardcodes the destination (op3) in DI/EDI/RDI
- if (op3Reg != REG_EDI)
+ if (intrinsicId == NI_AVX_MaskStore)
+ {
+ emit->emitIns_AR_R_R(ins, simdSize, op2Reg, op3Reg, op1Reg, 0);
+ }
+ else
{
- emit->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_EDI, op3Reg);
+ assert(intrinsicId == NI_SSE2_MaskMove);
+ assert(targetReg == REG_NA);
+
+ // SSE2 MaskMove hardcodes the destination (op3) in DI/EDI/RDI
+ if (op3Reg != REG_EDI)
+ {
+ emit->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_EDI, op3Reg);
+ }
+ emit->emitIns_R_R(ins, simdSize, op1Reg, op2Reg);
}
- emit->emitIns_R_R(ins, simdSize, op1Reg, op2Reg);
}
else
{
diff --git a/src/jit/hwintrinsiclistxarch.h b/src/jit/hwintrinsiclistxarch.h
index ff6f2ff372..f085ff545f 100644
--- a/src/jit/hwintrinsiclistxarch.h
+++ b/src/jit/hwintrinsiclistxarch.h
@@ -368,6 +368,7 @@ HARDWARE_INTRINSIC(AVX_LoadVector256, "LoadVector2
HARDWARE_INTRINSIC(AVX_Max, "Max", AVX, -1, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_maxps, INS_maxpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
HARDWARE_INTRINSIC(AVX_Min, "Min", AVX, -1, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_minps, INS_minpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
HARDWARE_INTRINSIC(AVX_MaskLoad, "MaskLoad", AVX, -1, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vmaskmovps, INS_vmaskmovpd}, HW_Category_MemoryLoad, HW_Flag_NoContainment|HW_Flag_UnfixedSIMDSize)
+HARDWARE_INTRINSIC(AVX_MaskStore, "MaskStore", AVX, -1, 0, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vmaskmovps, INS_vmaskmovpd}, HW_Category_MemoryStore, HW_Flag_NoContainment|HW_Flag_UnfixedSIMDSize|HW_Flag_BaseTypeFromSecondArg)
HARDWARE_INTRINSIC(AVX_MoveMask, "MoveMask", AVX, -1, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movmskps, INS_movmskpd}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(AVX_Multiply, "Multiply", AVX, -1, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulps, INS_mulpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
HARDWARE_INTRINSIC(AVX_Or, "Or", AVX, -1, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_orps, INS_orpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
diff --git a/src/jit/hwintrinsicxarch.cpp b/src/jit/hwintrinsicxarch.cpp
index 9032138e31..3cf1ac6664 100644
--- a/src/jit/hwintrinsicxarch.cpp
+++ b/src/jit/hwintrinsicxarch.cpp
@@ -184,11 +184,16 @@ unsigned HWIntrinsicInfo::lookupSimdSize(Compiler* comp, NamedIntrinsic id, CORI
{
typeHnd = sig->retTypeSigClass;
}
- else
+ else if (HWIntrinsicInfo::BaseTypeFromFirstArg(id))
{
- assert(HWIntrinsicInfo::BaseTypeFromFirstArg(id));
typeHnd = comp->info.compCompHnd->getArgClass(sig, sig->args);
}
+ else
+ {
+ assert(HWIntrinsicInfo::BaseTypeFromSecondArg(id));
+ CORINFO_ARG_LIST_HANDLE secondArg = comp->info.compCompHnd->getArgNext(sig->args);
+ typeHnd = comp->info.compCompHnd->getArgClass(sig, secondArg);
+ }
unsigned simdSize = 0;
var_types baseType = comp->getBaseTypeAndSizeOfSIMDType(typeHnd, &simdSize);
@@ -385,7 +390,6 @@ bool HWIntrinsicInfo::isFullyImplementedIsa(InstructionSet isa)
}
// These ISAs are partially implemented
- case InstructionSet_AVX:
case InstructionSet_AVX2:
case InstructionSet_BMI1:
case InstructionSet_BMI2:
@@ -395,14 +399,15 @@ bool HWIntrinsicInfo::isFullyImplementedIsa(InstructionSet isa)
}
// These ISAs are fully implemented
+ case InstructionSet_AVX:
+ case InstructionSet_FMA:
+ case InstructionSet_LZCNT:
+ case InstructionSet_POPCNT:
case InstructionSet_SSE:
case InstructionSet_SSE2:
case InstructionSet_SSE3:
case InstructionSet_SSSE3:
case InstructionSet_SSE41:
- case InstructionSet_FMA:
- case InstructionSet_LZCNT:
- case InstructionSet_POPCNT:
{
return true;
}
diff --git a/src/jit/instrsxarch.h b/src/jit/instrsxarch.h
index 87fd76a5d6..e38638aa40 100644
--- a/src/jit/instrsxarch.h
+++ b/src/jit/instrsxarch.h
@@ -498,8 +498,8 @@ INST3( vpermilpdvar, "permilpdvar" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SS
INST3( vperm2f128, "perm2f128" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE3A(0x06)) // Permute Floating-Point Values
INST3(vbroadcastf128,"broadcastf128",0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x1A)) // Broadcast packed float values read from memory to entire ymm register
INST3(vbroadcasti128,"broadcasti128",0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x5A)) // Broadcast packed integer values read from memory to entire ymm register
-INST3(vmaskmovps, "maskmovps" ,0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x2C)) // Conditional SIMD Packed Loads Float
-INST3(vmaskmovpd, "maskmovpd" ,0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x2D)) // Conditional SIMD Packed Loads Double
+INST3(vmaskmovps, "maskmovps" ,0, IUM_WR, 0, 0, SSE38(0x2E), BAD_CODE, SSE38(0x2C)) // Conditional SIMD Packed Single-Precision Floating-Point Loads and Stores
+INST3(vmaskmovpd, "maskmovpd" ,0, IUM_WR, 0, 0, SSE38(0x2F), BAD_CODE, SSE38(0x2D)) // Conditional SIMD Packed Double-Precision Floating-Point Loads and Stores
INST3(FIRST_FMA_INSTRUCTION, "FIRST_FMA_INSTRUCTION", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, BAD_CODE)
// enum name FP updmode rf wf MR MI RM