summaryrefslogtreecommitdiff
path: root/src/jit/simdcodegenxarch.cpp
diff options
context:
space:
mode:
authorMichelle McDaniel <adiaaida@gmail.com>2016-08-09 13:15:05 -0700
committerMichelle McDaniel <adiaaida@gmail.com>2016-08-11 09:53:41 -0700
commit36a2b906c008cd3693a9ab5aef7b4402addd6c74 (patch)
tree27333c6f26304490169825ae1c17484534246dc6 /src/jit/simdcodegenxarch.cpp
parentab7d6a8df73d3d89210a778338feaa9fedf4146a (diff)
downloadcoreclr-36a2b906c008cd3693a9ab5aef7b4402addd6c74.tar.gz
coreclr-36a2b906c008cd3693a9ab5aef7b4402addd6c74.tar.bz2
coreclr-36a2b906c008cd3693a9ab5aef7b4402addd6c74.zip
Reformat jit sources with clang-tidy and format
This change is the result of running clang-tidy and clang-format on jit sources.
Diffstat (limited to 'src/jit/simdcodegenxarch.cpp')
-rw-r--r--src/jit/simdcodegenxarch.cpp705
1 files changed, 344 insertions, 361 deletions
diff --git a/src/jit/simdcodegenxarch.cpp b/src/jit/simdcodegenxarch.cpp
index 2c22d48267..14c4493f07 100644
--- a/src/jit/simdcodegenxarch.cpp
+++ b/src/jit/simdcodegenxarch.cpp
@@ -32,9 +32,9 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
// - bits 6 and 7 of the immediate indicate which source item to select (0..3)
// - bits 4 and 5 of the immediate indicate which target item to insert into (0..3)
// - bits 0 to 3 of the immediate indicate which target item to zero
-#define INSERTPS_SOURCE_SELECT(i) (i<<6)
-#define INSERTPS_TARGET_SELECT(i) (i<<4)
-#define INSERTPS_ZERO(i) (1<<i)
+#define INSERTPS_SOURCE_SELECT(i) (i << 6)
+#define INSERTPS_TARGET_SELECT(i) (i << 4)
+#define INSERTPS_ZERO(i) (1 << i)
// getOpForSIMDIntrinsic: return the opcode for the given SIMD Intrinsic
//
@@ -42,21 +42,18 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
// intrinsicId - SIMD intrinsic Id
// baseType - Base type of the SIMD vector
// immed - Out param. Any immediate byte operand that needs to be passed to SSE2 opcode
-//
+//
//
// Return Value:
// Instruction (op) to be used, and immed is set if instruction requires an immediate operand.
//
-instruction
-CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId,
- var_types baseType,
- unsigned *ival /*=nullptr*/)
+instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_types baseType, unsigned* ival /*=nullptr*/)
{
// Minimal required instruction set is SSE2.
assert(compiler->canUseSSE2());
instruction result = INS_invalid;
- switch(intrinsicId)
+ switch (intrinsicId)
{
case SIMDIntrinsicInit:
if (compiler->canUseAVX())
@@ -70,17 +67,34 @@ CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId,
}
switch (baseType)
{
- case TYP_FLOAT: result = INS_vbroadcastss; break;
- case TYP_DOUBLE: result = INS_vbroadcastsd; break;
- case TYP_ULONG: __fallthrough;
- case TYP_LONG: result = INS_vpbroadcastq; break;
- case TYP_UINT: __fallthrough;
- case TYP_INT: result = INS_vpbroadcastd; break;
- case TYP_CHAR: __fallthrough;
- case TYP_SHORT: result = INS_vpbroadcastw; break;
- case TYP_UBYTE: __fallthrough;
- case TYP_BYTE: result = INS_vpbroadcastb; break;
- default: unreached();
+ case TYP_FLOAT:
+ result = INS_vbroadcastss;
+ break;
+ case TYP_DOUBLE:
+ result = INS_vbroadcastsd;
+ break;
+ case TYP_ULONG:
+ __fallthrough;
+ case TYP_LONG:
+ result = INS_vpbroadcastq;
+ break;
+ case TYP_UINT:
+ __fallthrough;
+ case TYP_INT:
+ result = INS_vpbroadcastd;
+ break;
+ case TYP_CHAR:
+ __fallthrough;
+ case TYP_SHORT:
+ result = INS_vpbroadcastw;
+ break;
+ case TYP_UBYTE:
+ __fallthrough;
+ case TYP_BYTE:
+ result = INS_vpbroadcastb;
+ break;
+ default:
+ unreached();
}
break;
}
@@ -102,7 +116,7 @@ CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId,
else if (baseType == TYP_LONG || baseType == TYP_ULONG)
{
// We don't have a seperate SSE2 instruction and will
- // use the instruction meant for doubles since it is
+ // use the instruction meant for doubles since it is
// of the same size as a long.
result = INS_shufpd;
}
@@ -117,7 +131,7 @@ CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId,
{
result = INS_sqrtpd;
}
- else
+ else
{
unreached();
}
@@ -296,13 +310,13 @@ CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId,
assert(baseType != TYP_INT);
if (baseType == TYP_FLOAT)
- {
+ {
result = INS_cmpps;
assert(ival != nullptr);
*ival = 1;
}
else if (baseType == TYP_DOUBLE)
- {
+ {
result = INS_cmppd;
assert(ival != nullptr);
*ival = 1;
@@ -314,13 +328,13 @@ CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId,
assert(baseType != TYP_INT);
if (baseType == TYP_FLOAT)
- {
+ {
result = INS_cmpps;
assert(ival != nullptr);
*ival = 2;
}
else if (baseType == TYP_DOUBLE)
- {
+ {
result = INS_cmppd;
assert(ival != nullptr);
*ival = 2;
@@ -335,7 +349,7 @@ CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId,
if (baseType == TYP_INT)
{
result = INS_pcmpgtd;
- }
+ }
else if (baseType == TYP_SHORT)
{
result = INS_pcmpgtw;
@@ -460,8 +474,7 @@ CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId,
// Notes:
// This is currently only supported for floating point types.
//
-void
-CodeGen::genSIMDScalarMove(var_types type, regNumber targetReg, regNumber srcReg, SIMDScalarMoveType moveType)
+void CodeGen::genSIMDScalarMove(var_types type, regNumber targetReg, regNumber srcReg, SIMDScalarMoveType moveType)
{
var_types targetType = compiler->getSIMDVectorType();
assert(varTypeIsFloating(type));
@@ -470,45 +483,46 @@ CodeGen::genSIMDScalarMove(var_types type, regNumber targetReg, regNumber srcReg
{
switch (moveType)
{
- case SMT_PreserveUpper:
- if (srcReg != targetReg)
- {
- instruction ins = ins_Store(type);
- if (getEmitter()->IsThreeOperandMoveAVXInstruction(ins))
- {
- // In general, when we use a three-operands move instruction, we want to merge the src with
- // itself. This is an exception in that we actually want the "merge" behavior, so we must
- // specify it with all 3 operands.
- inst_RV_RV_RV(ins, targetReg, targetReg, srcReg, emitTypeSize(targetType));
- }
- else
+ case SMT_PreserveUpper:
+ if (srcReg != targetReg)
{
- inst_RV_RV(ins, targetReg, srcReg, targetType, emitTypeSize(targetType));
+ instruction ins = ins_Store(type);
+ if (getEmitter()->IsThreeOperandMoveAVXInstruction(ins))
+ {
+ // In general, when we use a three-operands move instruction, we want to merge the src with
+ // itself. This is an exception in that we actually want the "merge" behavior, so we must
+ // specify it with all 3 operands.
+ inst_RV_RV_RV(ins, targetReg, targetReg, srcReg, emitTypeSize(targetType));
+ }
+ else
+ {
+ inst_RV_RV(ins, targetReg, srcReg, targetType, emitTypeSize(targetType));
+ }
}
- }
- break;
+ break;
- case SMT_ZeroInitUpper:
+ case SMT_ZeroInitUpper:
{
// insertps is a 128-bit only instruction, and clears the upper 128 bits, which is what we want.
// The insertpsImm selects which fields are copied and zero'd of the lower 128 bits, so we choose
// to zero all but the lower bits.
- unsigned int insertpsImm = (INSERTPS_TARGET_SELECT(0) | INSERTPS_ZERO(1) | INSERTPS_ZERO(2) | INSERTPS_ZERO(3));
+ unsigned int insertpsImm =
+ (INSERTPS_TARGET_SELECT(0) | INSERTPS_ZERO(1) | INSERTPS_ZERO(2) | INSERTPS_ZERO(3));
inst_RV_RV_IV(INS_insertps, EA_16BYTE, targetReg, srcReg, insertpsImm);
break;
}
- case SMT_ZeroInitUpper_SrcHasUpperZeros:
- if (srcReg != targetReg)
- {
- instruction ins = ins_Copy(type);
- assert(!getEmitter()->IsThreeOperandMoveAVXInstruction(ins));
- inst_RV_RV(ins, targetReg, srcReg, targetType, emitTypeSize(targetType));
- }
- break;
+ case SMT_ZeroInitUpper_SrcHasUpperZeros:
+ if (srcReg != targetReg)
+ {
+ instruction ins = ins_Copy(type);
+ assert(!getEmitter()->IsThreeOperandMoveAVXInstruction(ins));
+ inst_RV_RV(ins, targetReg, srcReg, targetType, emitTypeSize(targetType));
+ }
+ break;
- default:
- unreached();
+ default:
+ unreached();
}
}
else
@@ -518,45 +532,44 @@ CodeGen::genSIMDScalarMove(var_types type, regNumber targetReg, regNumber srcReg
switch (moveType)
{
- case SMT_PreserveUpper:
- if (srcReg != targetReg)
- {
- inst_RV_RV(ins_Store(type), targetReg, srcReg, targetType, emitTypeSize(targetType));
- }
- break;
+ case SMT_PreserveUpper:
+ if (srcReg != targetReg)
+ {
+ inst_RV_RV(ins_Store(type), targetReg, srcReg, targetType, emitTypeSize(targetType));
+ }
+ break;
- case SMT_ZeroInitUpper:
- if (srcReg == targetReg)
- {
- // There is no guarantee that upper bits of op1Reg are zero.
- // We achieve this by using left logical shift 12-bytes and right logical shift 12 bytes.
- instruction ins = getOpForSIMDIntrinsic(SIMDIntrinsicShiftLeftInternal, type);
- getEmitter()->emitIns_R_I(ins, EA_16BYTE, srcReg, 12);
- ins = getOpForSIMDIntrinsic(SIMDIntrinsicShiftRightInternal, type);
- getEmitter()->emitIns_R_I(ins, EA_16BYTE, srcReg, 12);
- }
- else
- {
- genSIMDZero(targetType, TYP_FLOAT, targetReg);
- inst_RV_RV(ins_Store(type), targetReg, srcReg);
- }
- break;
+ case SMT_ZeroInitUpper:
+ if (srcReg == targetReg)
+ {
+ // There is no guarantee that upper bits of op1Reg are zero.
+ // We achieve this by using left logical shift 12-bytes and right logical shift 12 bytes.
+ instruction ins = getOpForSIMDIntrinsic(SIMDIntrinsicShiftLeftInternal, type);
+ getEmitter()->emitIns_R_I(ins, EA_16BYTE, srcReg, 12);
+ ins = getOpForSIMDIntrinsic(SIMDIntrinsicShiftRightInternal, type);
+ getEmitter()->emitIns_R_I(ins, EA_16BYTE, srcReg, 12);
+ }
+ else
+ {
+ genSIMDZero(targetType, TYP_FLOAT, targetReg);
+ inst_RV_RV(ins_Store(type), targetReg, srcReg);
+ }
+ break;
- case SMT_ZeroInitUpper_SrcHasUpperZeros:
- if (srcReg != targetReg)
- {
- inst_RV_RV(ins_Copy(type), targetReg, srcReg, targetType, emitTypeSize(targetType));
- }
- break;
+ case SMT_ZeroInitUpper_SrcHasUpperZeros:
+ if (srcReg != targetReg)
+ {
+ inst_RV_RV(ins_Copy(type), targetReg, srcReg, targetType, emitTypeSize(targetType));
+ }
+ break;
- default:
- unreached();
+ default:
+ unreached();
}
}
}
-void
-CodeGen::genSIMDZero(var_types targetType, var_types baseType, regNumber targetReg)
+void CodeGen::genSIMDZero(var_types targetType, var_types baseType, regNumber targetReg)
{
// pxor reg, reg
instruction ins = getOpForSIMDIntrinsic(SIMDIntrinsicBitwiseXor, baseType);
@@ -572,18 +585,17 @@ CodeGen::genSIMDZero(var_types targetType, var_types baseType, regNumber targetR
// Return Value:
// None.
//
-void
-CodeGen::genSIMDIntrinsicInit(GenTreeSIMD* simdNode)
+void CodeGen::genSIMDIntrinsicInit(GenTreeSIMD* simdNode)
{
assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicInit);
- GenTree* op1 = simdNode->gtGetOp1();
- var_types baseType = simdNode->gtSIMDBaseType;
+ GenTree* op1 = simdNode->gtGetOp1();
+ var_types baseType = simdNode->gtSIMDBaseType;
regNumber targetReg = simdNode->gtRegNum;
assert(targetReg != REG_NA);
- var_types targetType = simdNode->TypeGet();
- InstructionSet iset = compiler->getSIMDInstructionSet();
- unsigned size = simdNode->gtSIMDSize;
+ var_types targetType = simdNode->TypeGet();
+ InstructionSet iset = compiler->getSIMDInstructionSet();
+ unsigned size = simdNode->gtSIMDSize;
// Should never see small int base type vectors except for zero initialization.
noway_assert(!varTypeIsSmallInt(baseType) || op1->IsIntegralConst(0));
@@ -592,7 +604,7 @@ CodeGen::genSIMDIntrinsicInit(GenTreeSIMD* simdNode)
if (op1->isContained())
{
if (op1->IsIntegralConst(0) || op1->IsFPZero())
- {
+ {
genSIMDZero(targetType, baseType, targetReg);
}
else if (varTypeIsIntegral(baseType) && op1->IsIntegralConst(-1))
@@ -614,7 +626,8 @@ CodeGen::genSIMDIntrinsicInit(GenTreeSIMD* simdNode)
else if (op1->OperIsLocalAddr())
{
unsigned offset = (op1->OperGet() == GT_LCL_FLD_ADDR) ? op1->gtLclFld.gtLclOffs : 0;
- getEmitter()->emitIns_R_S(ins, emitTypeSize(targetType), targetReg, op1->gtLclVarCommon.gtLclNum, offset);
+ getEmitter()->emitIns_R_S(ins, emitTypeSize(targetType), targetReg, op1->gtLclVarCommon.gtLclNum,
+ offset);
}
else
{
@@ -626,8 +639,7 @@ CodeGen::genSIMDIntrinsicInit(GenTreeSIMD* simdNode)
else if (iset == InstructionSet_AVX && ((size == 32) || (size == 16)))
{
regNumber srcReg = genConsumeReg(op1);
- if (baseType == TYP_INT || baseType == TYP_UINT ||
- baseType == TYP_LONG || baseType == TYP_ULONG)
+ if (baseType == TYP_INT || baseType == TYP_UINT || baseType == TYP_LONG || baseType == TYP_ULONG)
{
ins = ins_CopyIntToFloat(baseType, TYP_FLOAT);
assert(ins != INS_invalid);
@@ -643,26 +655,25 @@ CodeGen::genSIMDIntrinsicInit(GenTreeSIMD* simdNode)
// If we reach here, op1 is not contained and we are using SSE or it is a SubRegisterSIMDType.
// In either case we are going to use the SSE2 shuffle instruction.
- regNumber op1Reg = genConsumeReg(op1);
- unsigned shuffleControl = 0;
+ regNumber op1Reg = genConsumeReg(op1);
+ unsigned shuffleControl = 0;
if (compiler->isSubRegisterSIMDType(simdNode))
{
- assert(baseType == TYP_FLOAT);
+ assert(baseType == TYP_FLOAT);
// We cannot assume that upper bits of op1Reg or targetReg be zero.
// Therefore we need to explicitly zero out upper bits. This is
// essential for the shuffle operation performed below.
//
- // If op1 is a float/double constant, we would have loaded it from
+ // If op1 is a float/double constant, we would have loaded it from
// data section using movss/sd. Similarly if op1 is a memory op we
// would have loaded it using movss/sd. Movss/sd when loading a xmm reg
// from memory would zero-out upper bits. In these cases we can
// avoid explicitly zero'ing out targetReg if targetReg and op1Reg are the same or do it more efficiently
// if they are not the same.
- SIMDScalarMoveType moveType = op1->IsCnsFltOrDbl() || op1->isMemoryOp()
- ? SMT_ZeroInitUpper_SrcHasUpperZeros
- : SMT_ZeroInitUpper;
+ SIMDScalarMoveType moveType =
+ op1->IsCnsFltOrDbl() || op1->isMemoryOp() ? SMT_ZeroInitUpper_SrcHasUpperZeros : SMT_ZeroInitUpper;
genSIMDScalarMove(TYP_FLOAT, targetReg, op1Reg, moveType);
@@ -675,20 +686,19 @@ CodeGen::genSIMDIntrinsicInit(GenTreeSIMD* simdNode)
shuffleControl = 0x40;
}
else
- {
+ {
noway_assert(!"Unexpected size for SIMD type");
}
}
else // Vector<T>
- {
+ {
if (op1Reg != targetReg)
- {
+ {
if (varTypeIsFloating(baseType))
{
ins = ins_Copy(targetType);
}
- else if (baseType == TYP_INT || baseType == TYP_UINT ||
- baseType == TYP_LONG || baseType == TYP_ULONG)
+ else if (baseType == TYP_INT || baseType == TYP_UINT || baseType == TYP_LONG || baseType == TYP_ULONG)
{
ins = ins_CopyIntToFloat(baseType, TYP_FLOAT);
}
@@ -715,10 +725,9 @@ CodeGen::genSIMDIntrinsicInit(GenTreeSIMD* simdNode)
// Return Value:
// None.
//
-void
-CodeGen::genSIMDIntrinsicInitN(GenTreeSIMD* simdNode)
+void CodeGen::genSIMDIntrinsicInitN(GenTreeSIMD* simdNode)
{
- assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicInitN);
+ assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicInitN);
// Right now this intrinsic is supported only on TYP_FLOAT vectors
var_types baseType = simdNode->gtSIMDBaseType;
@@ -729,8 +738,8 @@ CodeGen::genSIMDIntrinsicInitN(GenTreeSIMD* simdNode)
var_types targetType = simdNode->TypeGet();
- // Note that we cannot use targetReg before consumed all source operands. Therefore,
- // Need an internal register to stitch together all the values into a single vector
+ // Note that we cannot use targetReg before consumed all source operands. Therefore,
+ // Need an internal register to stitch together all the values into a single vector
// in an XMM reg.
assert(simdNode->gtRsvdRegs != RBM_NONE);
assert(genCountBits(simdNode->gtRsvdRegs) == 1);
@@ -744,19 +753,19 @@ CodeGen::genSIMDIntrinsicInitN(GenTreeSIMD* simdNode)
}
unsigned int baseTypeSize = genTypeSize(baseType);
- instruction insLeftShift = getOpForSIMDIntrinsic(SIMDIntrinsicShiftLeftInternal, baseType);
+ instruction insLeftShift = getOpForSIMDIntrinsic(SIMDIntrinsicShiftLeftInternal, baseType);
// We will first consume the list items in execution (left to right) order,
// and record the registers.
regNumber operandRegs[SIMD_INTRINSIC_MAX_PARAM_COUNT];
- unsigned initCount = 0;
+ unsigned initCount = 0;
for (GenTree* list = simdNode->gtGetOp1(); list != nullptr; list = list->gtGetOp2())
{
assert(list->OperGet() == GT_LIST);
GenTree* listItem = list->gtGetOp1();
assert(listItem->TypeGet() == baseType);
assert(!listItem->isContained());
- regNumber operandReg = genConsumeReg(listItem);
+ regNumber operandReg = genConsumeReg(listItem);
operandRegs[initCount] = operandReg;
initCount++;
}
@@ -773,7 +782,7 @@ CodeGen::genSIMDIntrinsicInitN(GenTreeSIMD* simdNode)
regNumber operandReg = operandRegs[initCount - i - 1];
if (offset != 0)
- {
+ {
getEmitter()->emitIns_R_I(insLeftShift, EA_16BYTE, vectorReg, baseTypeSize);
}
genSIMDScalarMove(baseType, vectorReg, operandReg, SMT_PreserveUpper);
@@ -782,7 +791,7 @@ CodeGen::genSIMDIntrinsicInitN(GenTreeSIMD* simdNode)
}
noway_assert(offset == simdNode->gtSIMDSize);
-
+
// Load the initialized value.
if (targetReg != vectorReg)
{
@@ -800,19 +809,18 @@ CodeGen::genSIMDIntrinsicInitN(GenTreeSIMD* simdNode)
// Return Value:
// None.
//
-void
-CodeGen::genSIMDIntrinsicUnOp(GenTreeSIMD* simdNode)
+void CodeGen::genSIMDIntrinsicUnOp(GenTreeSIMD* simdNode)
{
assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicSqrt || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicCast);
- GenTree* op1 = simdNode->gtGetOp1();
- var_types baseType = simdNode->gtSIMDBaseType;
+ GenTree* op1 = simdNode->gtGetOp1();
+ var_types baseType = simdNode->gtSIMDBaseType;
regNumber targetReg = simdNode->gtRegNum;
assert(targetReg != REG_NA);
var_types targetType = simdNode->TypeGet();
- regNumber op1Reg = genConsumeReg(op1);
- instruction ins = getOpForSIMDIntrinsic(simdNode->gtSIMDIntrinsicID, baseType);
+ regNumber op1Reg = genConsumeReg(op1);
+ instruction ins = getOpForSIMDIntrinsic(simdNode->gtSIMDIntrinsicID, baseType);
if (simdNode->gtSIMDIntrinsicID != SIMDIntrinsicCast || targetReg != op1Reg)
{
inst_RV_RV(ins, targetReg, op1Reg, targetType, emitActualTypeSize(targetType));
@@ -830,41 +838,34 @@ CodeGen::genSIMDIntrinsicUnOp(GenTreeSIMD* simdNode)
// Return Value:
// None.
//
-void
-CodeGen::genSIMDIntrinsicBinOp(GenTreeSIMD* simdNode)
+void CodeGen::genSIMDIntrinsicBinOp(GenTreeSIMD* simdNode)
{
- assert( simdNode->gtSIMDIntrinsicID == SIMDIntrinsicAdd ||
- simdNode->gtSIMDIntrinsicID == SIMDIntrinsicSub ||
- simdNode->gtSIMDIntrinsicID == SIMDIntrinsicMul ||
- simdNode->gtSIMDIntrinsicID == SIMDIntrinsicDiv ||
- simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseAnd ||
- simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseAndNot ||
- simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseOr ||
- simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseXor ||
- simdNode->gtSIMDIntrinsicID == SIMDIntrinsicMin ||
- simdNode->gtSIMDIntrinsicID == SIMDIntrinsicMax
- );
-
- GenTree* op1 = simdNode->gtGetOp1();
- GenTree* op2 = simdNode->gtGetOp2();
- var_types baseType = simdNode->gtSIMDBaseType;
+ assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicAdd || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicSub ||
+ simdNode->gtSIMDIntrinsicID == SIMDIntrinsicMul || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicDiv ||
+ simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseAnd ||
+ simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseAndNot ||
+ simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseOr ||
+ simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseXor || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicMin ||
+ simdNode->gtSIMDIntrinsicID == SIMDIntrinsicMax);
+
+ GenTree* op1 = simdNode->gtGetOp1();
+ GenTree* op2 = simdNode->gtGetOp2();
+ var_types baseType = simdNode->gtSIMDBaseType;
regNumber targetReg = simdNode->gtRegNum;
assert(targetReg != REG_NA);
- var_types targetType = simdNode->TypeGet();
- InstructionSet iset = compiler->getSIMDInstructionSet();
+ var_types targetType = simdNode->TypeGet();
+ InstructionSet iset = compiler->getSIMDInstructionSet();
genConsumeOperands(simdNode);
- regNumber op1Reg = op1->gtRegNum;
- regNumber op2Reg = op2->gtRegNum;
+ regNumber op1Reg = op1->gtRegNum;
+ regNumber op2Reg = op2->gtRegNum;
regNumber otherReg = op2Reg;
// Vector<Int>.Mul:
// SSE2 doesn't have an instruction to perform this operation directly
// whereas SSE4.1 does (pmulld). This is special cased and computed
// as follows.
- if (simdNode->gtSIMDIntrinsicID == SIMDIntrinsicMul &&
- baseType == TYP_INT &&
- iset == InstructionSet_SSE2)
+ if (simdNode->gtSIMDIntrinsicID == SIMDIntrinsicMul && baseType == TYP_INT && iset == InstructionSet_SSE2)
{
// We need a temporary register that is NOT the same as the target,
// and we MAY need another.
@@ -874,14 +875,13 @@ CodeGen::genSIMDIntrinsicBinOp(GenTreeSIMD* simdNode)
regMaskTP tmpRegsMask = simdNode->gtRsvdRegs;
regMaskTP tmpReg1Mask = genFindLowestBit(tmpRegsMask);
tmpRegsMask &= ~tmpReg1Mask;
- regNumber tmpReg = genRegNumFromMask(tmpReg1Mask);
+ regNumber tmpReg = genRegNumFromMask(tmpReg1Mask);
regNumber tmpReg2 = genRegNumFromMask(tmpRegsMask);
// The register allocator guarantees the following conditions:
// - the only registers that may be the same among op1Reg, op2Reg, tmpReg
// and tmpReg2 are op1Reg and op2Reg.
// Let's be extra-careful and assert that now.
- assert((op1Reg != tmpReg) && (op1Reg != tmpReg2) &&
- (op2Reg != tmpReg) && (op2Reg != tmpReg2) &&
+ assert((op1Reg != tmpReg) && (op1Reg != tmpReg2) && (op2Reg != tmpReg) && (op2Reg != tmpReg2) &&
(tmpReg != tmpReg2));
// We will start by setting things up so that:
@@ -956,8 +956,8 @@ CodeGen::genSIMDIntrinsicBinOp(GenTreeSIMD* simdNode)
getEmitter()->emitIns_R_I(INS_psrldq, emitActualTypeSize(targetType), tmpReg, 4);
// tmp = unsigned double word multiply of targetReg and tmpReg. Essentially
- // tmpReg[63:0] = op1[1] * op2[1]
- // tmpReg[127:64] = op1[3] * op2[3]
+ // tmpReg[63:0] = op1[1] * op2[1]
+ // tmpReg[127:64] = op1[3] * op2[3]
inst_RV_RV(INS_pmuludq, tmpReg, targetReg, targetType, emitActualTypeSize(targetType));
// Extract first and third double word results from tmpReg
@@ -965,7 +965,7 @@ CodeGen::genSIMDIntrinsicBinOp(GenTreeSIMD* simdNode)
getEmitter()->emitIns_R_R_I(INS_pshufd, emitActualTypeSize(targetType), tmpReg, tmpReg, 0x08);
// targetReg[63:0] = op1[0] * op2[0]
- // targetReg[127:64] = op1[2] * op2[2]
+ // targetReg[127:64] = op1[2] * op2[2]
inst_RV_RV(INS_movaps, targetReg, op1Reg, targetType, emitActualTypeSize(targetType));
inst_RV_RV(INS_pmuludq, targetReg, op2Reg, targetType, emitActualTypeSize(targetType));
@@ -980,11 +980,9 @@ CodeGen::genSIMDIntrinsicBinOp(GenTreeSIMD* simdNode)
{
instruction ins = getOpForSIMDIntrinsic(simdNode->gtSIMDIntrinsicID, baseType);
- //Currently AVX doesn't support integer.
- //if the ins is INS_cvtsi2ss or INS_cvtsi2sd, we won't use AVX.
- if (op1Reg != targetReg &&
- compiler->canUseAVX() &&
- !(ins == INS_cvtsi2ss || ins == INS_cvtsi2sd) &&
+ // Currently AVX doesn't support integer.
+ // if the ins is INS_cvtsi2ss or INS_cvtsi2sd, we won't use AVX.
+ if (op1Reg != targetReg && compiler->canUseAVX() && !(ins == INS_cvtsi2ss || ins == INS_cvtsi2sd) &&
getEmitter()->IsThreeOperandAVXInstruction(ins))
{
inst_RV_RV_RV(ins, targetReg, op1Reg, op2Reg, emitActualTypeSize(targetType));
@@ -1032,30 +1030,28 @@ CodeGen::genSIMDIntrinsicBinOp(GenTreeSIMD* simdNode)
// Return Value:
// None.
//
-void
-CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode)
+void CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode)
{
- GenTree* op1 = simdNode->gtGetOp1();
- GenTree* op2 = simdNode->gtGetOp2();
- var_types baseType = simdNode->gtSIMDBaseType;
+ GenTree* op1 = simdNode->gtGetOp1();
+ GenTree* op2 = simdNode->gtGetOp2();
+ var_types baseType = simdNode->gtSIMDBaseType;
regNumber targetReg = simdNode->gtRegNum;
assert(targetReg != REG_NA);
- var_types targetType = simdNode->TypeGet();
- InstructionSet iset = compiler->getSIMDInstructionSet();
+ var_types targetType = simdNode->TypeGet();
+ InstructionSet iset = compiler->getSIMDInstructionSet();
genConsumeOperands(simdNode);
- regNumber op1Reg = op1->gtRegNum;
- regNumber op2Reg = op2->gtRegNum;
+ regNumber op1Reg = op1->gtRegNum;
+ regNumber op2Reg = op2->gtRegNum;
regNumber otherReg = op2Reg;
- switch(simdNode->gtSIMDIntrinsicID)
+ switch (simdNode->gtSIMDIntrinsicID)
{
- case SIMDIntrinsicEqual:
- case SIMDIntrinsicGreaterThan:
+ case SIMDIntrinsicEqual:
+ case SIMDIntrinsicGreaterThan:
{
// SSE2: vector<(u)long> relation op should be implemented in terms of TYP_INT comparison operations
- assert(((iset == InstructionSet_AVX) || (baseType != TYP_LONG)) &&
- (baseType != TYP_ULONG));
+ assert(((iset == InstructionSet_AVX) || (baseType != TYP_LONG)) && (baseType != TYP_ULONG));
// Greater-than: Floating point vectors use "<" with swapped operands
if (simdNode->gtSIMDIntrinsicID == SIMDIntrinsicGreaterThan)
@@ -1063,8 +1059,8 @@ CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode)
assert(!varTypeIsFloating(baseType));
}
- unsigned ival = 0;
- instruction ins = getOpForSIMDIntrinsic(simdNode->gtSIMDIntrinsicID, baseType, &ival);
+ unsigned ival = 0;
+ instruction ins = getOpForSIMDIntrinsic(simdNode->gtSIMDIntrinsicID, baseType, &ival);
// targetReg = op1reg > op2reg
// Therefore, we can optimize if op1Reg == targetReg
@@ -1093,14 +1089,14 @@ CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode)
}
break;
- case SIMDIntrinsicLessThan:
- case SIMDIntrinsicLessThanOrEqual:
+ case SIMDIntrinsicLessThan:
+ case SIMDIntrinsicLessThanOrEqual:
{
// Int vectors use ">" and ">=" with swapped operands
assert(varTypeIsFloating(baseType));
// Get the instruction opcode for compare operation
- unsigned ival;
+ unsigned ival;
instruction ins = getOpForSIMDIntrinsic(simdNode->gtSIMDIntrinsicID, baseType, &ival);
// targetReg = op1reg RelOp op2reg
@@ -1114,9 +1110,9 @@ CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode)
}
break;
- // (In)Equality that produces bool result instead of a bit vector
- case SIMDIntrinsicOpEquality:
- case SIMDIntrinsicOpInEquality:
+ // (In)Equality that produces bool result instead of a bit vector
+ case SIMDIntrinsicOpEquality:
+ case SIMDIntrinsicOpInEquality:
{
assert(genIsValidIntReg(targetReg));
@@ -1127,12 +1123,14 @@ CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode)
regMaskTP tmpRegsMask = simdNode->gtRsvdRegs;
regMaskTP tmpReg1Mask = genFindLowestBit(tmpRegsMask);
tmpRegsMask &= ~tmpReg1Mask;
- regNumber tmpReg1 = genRegNumFromMask(tmpReg1Mask);
- regNumber tmpReg2 = genRegNumFromMask(tmpRegsMask);
+ regNumber tmpReg1 = genRegNumFromMask(tmpReg1Mask);
+ regNumber tmpReg2 = genRegNumFromMask(tmpRegsMask);
var_types simdType = op1->TypeGet();
// TODO-1stClassStructs: Temporary to minimize asmDiffs
if (simdType == TYP_DOUBLE)
+ {
simdType = TYP_SIMD8;
+ }
// Here we should consider TYP_SIMD12 operands as if they were TYP_SIMD16
// since both the operands will be in XMM registers.
@@ -1157,8 +1155,9 @@ CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode)
}
// For all integer types we can use TYP_INT comparison.
- unsigned ival = 0;
- instruction ins = getOpForSIMDIntrinsic(SIMDIntrinsicEqual, varTypeIsFloating(baseType) ? baseType : TYP_INT, &ival);
+ unsigned ival = 0;
+ instruction ins =
+ getOpForSIMDIntrinsic(SIMDIntrinsicEqual, varTypeIsFloating(baseType) ? baseType : TYP_INT, &ival);
if (varTypeIsFloating(baseType))
{
@@ -1168,7 +1167,7 @@ CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode)
{
inst_RV_RV(ins, tmpReg1, otherReg, simdType, emitActualTypeSize(simdType));
}
-
+
// If we have 32 bytes, start by anding the two 16-byte halves to get a 16-byte result.
if (compiler->canUseAVX() && (simdType == TYP_SIMD32))
{
@@ -1179,7 +1178,7 @@ CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode)
// tmpReg2[128..255] <- 0
// tmpReg2[0..127] <- tmpReg1[128..255]
// - vandps tmpReg1, tempReg2
- // This will zero-out upper portion of tmpReg1 and
+ // This will zero-out upper portion of tmpReg1 and
// lower portion of tmpReg1 is and of upper and lower 128-bit comparison result.
getEmitter()->emitIns_R_R_I(INS_vextractf128, EA_32BYTE, tmpReg2, tmpReg1, 0x01);
inst_RV_RV(INS_andps, tmpReg1, tmpReg2, simdType, emitActualTypeSize(simdType));
@@ -1225,22 +1224,22 @@ CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode)
// movzx targetReg, targetReg
//
getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, targetReg, 0xFFFFFFFF);
- inst_RV((simdNode->gtSIMDIntrinsicID == SIMDIntrinsicOpEquality) ? INS_sete : INS_setne, targetReg, TYP_INT, EA_1BYTE);
+ inst_RV((simdNode->gtSIMDIntrinsicID == SIMDIntrinsicOpEquality) ? INS_sete : INS_setne, targetReg, TYP_INT,
+ EA_1BYTE);
assert(simdNode->TypeGet() == TYP_INT);
// Set the higher bytes to 0
inst_RV_RV(ins_Move_Extend(TYP_UBYTE, true), targetReg, targetReg, TYP_UBYTE, emitTypeSize(TYP_UBYTE));
}
break;
- default:
- noway_assert(!"Unimplemented SIMD relational operation.");
- unreached();
+ default:
+ noway_assert(!"Unimplemented SIMD relational operation.");
+ unreached();
}
genProduceReg(simdNode);
}
-
//--------------------------------------------------------------------------------
// genSIMDIntrinsicDotProduct: Generate code for SIMD Intrinsic Dot Product.
//
@@ -1250,20 +1249,21 @@ CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode)
// Return Value:
// None.
//
-void
-CodeGen::genSIMDIntrinsicDotProduct(GenTreeSIMD* simdNode)
+void CodeGen::genSIMDIntrinsicDotProduct(GenTreeSIMD* simdNode)
{
assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicDotProduct);
- GenTree* op1 = simdNode->gtGetOp1();
- GenTree* op2 = simdNode->gtGetOp2();
+ GenTree* op1 = simdNode->gtGetOp1();
+ GenTree* op2 = simdNode->gtGetOp2();
var_types baseType = simdNode->gtSIMDBaseType;
var_types simdType = op1->TypeGet();
// TODO-1stClassStructs: Temporary to minimize asmDiffs
if (simdType == TYP_DOUBLE)
+ {
simdType = TYP_SIMD8;
+ }
var_types simdEvalType = (simdType == TYP_SIMD12) ? TYP_SIMD16 : simdType;
- regNumber targetReg = simdNode->gtRegNum;
+ regNumber targetReg = simdNode->gtRegNum;
assert(targetReg != REG_NA);
// DotProduct is only supported on floating point types.
@@ -1295,7 +1295,7 @@ CodeGen::genSIMDIntrinsicDotProduct(GenTreeSIMD* simdNode)
{
tmpReg = tmpReg1;
}
- else
+ else
{
assert(targetReg != tmpReg2);
tmpReg = tmpReg2;
@@ -1306,7 +1306,7 @@ CodeGen::genSIMDIntrinsicDotProduct(GenTreeSIMD* simdNode)
if (compiler->getSIMDInstructionSet() == InstructionSet_SSE2)
{
- // We avoid reg move if either op1Reg == targetReg or op2Reg == targetReg
+ // We avoid reg move if either op1Reg == targetReg or op2Reg == targetReg
if (op1Reg == targetReg)
{
// Best case
@@ -1330,7 +1330,7 @@ CodeGen::genSIMDIntrinsicDotProduct(GenTreeSIMD* simdNode)
// // position
// tmp = shuffle(tmp, tmp, Shuffle(2,3,0,1)) // tmp = (2, 3, 0, 1)
// v0 = v0 + tmp // v0 = (3+2, 2+3, 1+0, 0+1)
- // tmp = v0
+ // tmp = v0
// tmp = shuffle(tmp, tmp, Shuffle(0,1,2,3)) // tmp = (0+1, 1+0, 2+3, 3+2)
// v0 = v0 + tmp // v0 = (0+1+2+3, 0+1+2+3, 0+1+2+3, 0+1+2+3)
// // Essentially horizontal addtion of all elements.
@@ -1427,24 +1427,23 @@ CodeGen::genSIMDIntrinsicDotProduct(GenTreeSIMD* simdNode)
// Return Value:
// None.
//
-void
-CodeGen::genSIMDIntrinsicGetItem(GenTreeSIMD* simdNode)
+void CodeGen::genSIMDIntrinsicGetItem(GenTreeSIMD* simdNode)
{
assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicGetItem);
- GenTree* op1 = simdNode->gtGetOp1();
- GenTree* op2 = simdNode->gtGetOp2();
- var_types simdType = op1->TypeGet();
+ GenTree* op1 = simdNode->gtGetOp1();
+ GenTree* op2 = simdNode->gtGetOp2();
+ var_types simdType = op1->TypeGet();
assert(varTypeIsSIMD(simdType));
- // op1 of TYP_SIMD12 should be considered as TYP_SIMD16,
+ // op1 of TYP_SIMD12 should be considered as TYP_SIMD16,
// since it is in XMM register.
if (simdType == TYP_SIMD12)
{
simdType = TYP_SIMD16;
}
- var_types baseType = simdNode->gtSIMDBaseType;
+ var_types baseType = simdNode->gtSIMDBaseType;
regNumber targetReg = simdNode->gtRegNum;
assert(targetReg != REG_NA);
var_types targetType = simdNode->TypeGet();
@@ -1455,7 +1454,7 @@ CodeGen::genSIMDIntrinsicGetItem(GenTreeSIMD* simdNode)
// - the index of the value to be returned.
genConsumeOperands(simdNode);
regNumber srcReg = op1->gtRegNum;
-
+
// SSE2 doesn't have an instruction to implement this intrinsic if the index is not a constant.
// For the non-constant case, we will use the SIMD temp location to store the vector, and
// the load the desired element.
@@ -1465,28 +1464,28 @@ CodeGen::genSIMDIntrinsicGetItem(GenTreeSIMD* simdNode)
{
unsigned simdInitTempVarNum = compiler->lvaSIMDInitTempVarNum;
noway_assert(simdInitTempVarNum != BAD_VAR_NUM);
- bool isEBPbased;
- unsigned offs = compiler->lvaFrameAddress(simdInitTempVarNum, &isEBPbased);
+ bool isEBPbased;
+ unsigned offs = compiler->lvaFrameAddress(simdInitTempVarNum, &isEBPbased);
regNumber indexReg = op2->gtRegNum;
// Store the vector to the temp location.
- getEmitter()->emitIns_S_R(ins_Store(simdType, compiler->isSIMDTypeLocalAligned(simdInitTempVarNum)),
- emitTypeSize(simdType), srcReg, simdInitTempVarNum, 0);
+ getEmitter()->emitIns_S_R(ins_Store(simdType, compiler->isSIMDTypeLocalAligned(simdInitTempVarNum)),
+ emitTypeSize(simdType), srcReg, simdInitTempVarNum, 0);
// Now, load the desired element.
- getEmitter()->emitIns_R_ARX(ins_Move_Extend(baseType, false), // Load
- emitTypeSize(baseType), // Of the vector baseType
- targetReg, // To targetReg
- (isEBPbased) ? REG_EBP : REG_ESP, // Stack-based
- indexReg, // Indexed
- genTypeSize(baseType), // by the size of the baseType
+ getEmitter()->emitIns_R_ARX(ins_Move_Extend(baseType, false), // Load
+ emitTypeSize(baseType), // Of the vector baseType
+ targetReg, // To targetReg
+ (isEBPbased) ? REG_EBP : REG_ESP, // Stack-based
+ indexReg, // Indexed
+ genTypeSize(baseType), // by the size of the baseType
offs);
genProduceReg(simdNode);
return;
}
noway_assert(op2->isContained());
- unsigned int index = (unsigned int) op2->gtIntCon.gtIconVal;
+ unsigned int index = (unsigned int)op2->gtIntCon.gtIconVal;
unsigned int byteShiftCnt = index * genTypeSize(baseType);
// In general we shouldn't have an index greater than or equal to the length of the vector.
@@ -1507,8 +1506,7 @@ CodeGen::genSIMDIntrinsicGetItem(GenTreeSIMD* simdNode)
}
else
{
- assert((byteShiftCnt == 0) ||
- varTypeIsFloating(baseType) ||
+ assert((byteShiftCnt == 0) || varTypeIsFloating(baseType) ||
(varTypeIsSmallInt(baseType) && (byteShiftCnt < 16)));
}
@@ -1536,7 +1534,7 @@ CodeGen::genSIMDIntrinsicGetItem(GenTreeSIMD* simdNode)
// 1) baseType is floating point
// movaps targetReg, srcReg
// psrldq targetReg, byteShiftCnt <-- not generated if accessing zero'th element
- //
+ //
// 2) baseType is not floating point
// movaps tmpReg, srcReg <-- not generated if accessing zero'th element
// OR if tmpReg == srcReg
@@ -1555,8 +1553,8 @@ CodeGen::genSIMDIntrinsicGetItem(GenTreeSIMD* simdNode)
getEmitter()->emitIns_R_I(ins, emitActualTypeSize(simdType), targetReg, byteShiftCnt);
}
}
- else
- {
+ else
+ {
if (varTypeIsSmallInt(baseType))
{
// Note that pextrw extracts 16-bit value by index and zero extends it to 32-bits.
@@ -1582,7 +1580,7 @@ CodeGen::genSIMDIntrinsicGetItem(GenTreeSIMD* simdNode)
bool ZeroOrSignExtnReqd = true;
if (baseSize == 1)
- {
+ {
if ((op2->gtIntCon.gtIconVal % 2) == 1)
{
// Right shift extracted word by 8-bits if index is odd if we are extracting a byte sized element.
@@ -1593,13 +1591,13 @@ CodeGen::genSIMDIntrinsicGetItem(GenTreeSIMD* simdNode)
}
// else - we just need to zero/sign extend the byte since pextrw extracted 16-bits
}
- else
+ else
{
// Since Pextrw zero extends to 32-bits, we need sign extension in case of TYP_SHORT
assert(baseSize == 2);
ZeroOrSignExtnReqd = (baseType == TYP_SHORT);
}
-
+
if (ZeroOrSignExtnReqd)
{
// Zero/sign extend the byte/short to 32-bits
@@ -1609,7 +1607,7 @@ CodeGen::genSIMDIntrinsicGetItem(GenTreeSIMD* simdNode)
else
{
// We need a temp xmm register if the baseType is not floating point and
- // accessing non-zero'th element.
+ // accessing non-zero'th element.
instruction ins;
if (byteShiftCnt != 0)
@@ -1650,28 +1648,27 @@ CodeGen::genSIMDIntrinsicGetItem(GenTreeSIMD* simdNode)
//
// TODO-CQ: Use SIMDIntrinsicShuffleSSE2 for the SSE2 case.
//
-void
-CodeGen::genSIMDIntrinsicSetItem(GenTreeSIMD* simdNode)
+void CodeGen::genSIMDIntrinsicSetItem(GenTreeSIMD* simdNode)
{
// Determine index based on intrinsic ID
int index = -1;
- switch(simdNode->gtSIMDIntrinsicID)
+ switch (simdNode->gtSIMDIntrinsicID)
{
- case SIMDIntrinsicSetX:
- index = 0;
- break;
- case SIMDIntrinsicSetY:
- index = 1;
- break;
- case SIMDIntrinsicSetZ:
- index = 2;
- break;
- case SIMDIntrinsicSetW:
- index = 3;
- break;
+ case SIMDIntrinsicSetX:
+ index = 0;
+ break;
+ case SIMDIntrinsicSetY:
+ index = 1;
+ break;
+ case SIMDIntrinsicSetZ:
+ index = 2;
+ break;
+ case SIMDIntrinsicSetW:
+ index = 3;
+ break;
- default:
- unreached();
+ default:
+ unreached();
}
assert(index != -1);
@@ -1680,7 +1677,7 @@ CodeGen::genSIMDIntrinsicSetItem(GenTreeSIMD* simdNode)
GenTree* op1 = simdNode->gtGetOp1();
GenTree* op2 = simdNode->gtGetOp2();
- var_types baseType = simdNode->gtSIMDBaseType;
+ var_types baseType = simdNode->gtSIMDBaseType;
regNumber targetReg = simdNode->gtRegNum;
assert(targetReg != REG_NA);
var_types targetType = simdNode->TypeGet();
@@ -1708,7 +1705,7 @@ CodeGen::genSIMDIntrinsicSetItem(GenTreeSIMD* simdNode)
noway_assert(baseType == TYP_FLOAT);
if (compiler->getSIMDInstructionSet() == InstructionSet_SSE2)
- {
+ {
// We need one additional int register as scratch
assert(simdNode->gtRsvdRegs != RBM_NONE);
assert(genCountBits(simdNode->gtRsvdRegs) == 1);
@@ -1722,15 +1719,15 @@ CodeGen::genSIMDIntrinsicSetItem(GenTreeSIMD* simdNode)
// First insert the lower 16-bits of tmpReg in targetReg at 2*index position
// since every float has two 16-bit words.
- getEmitter()->emitIns_R_R_I(INS_pinsrw, emitTypeSize(TYP_INT), targetReg, tmpReg, 2*index);
+ getEmitter()->emitIns_R_R_I(INS_pinsrw, emitTypeSize(TYP_INT), targetReg, tmpReg, 2 * index);
// Logical right shift tmpReg by 16-bits and insert in targetReg at 2*index + 1 position
inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_4BYTE, tmpReg, 16);
- getEmitter()->emitIns_R_R_I(INS_pinsrw, emitTypeSize(TYP_INT), targetReg, tmpReg, 2*index+1);
+ getEmitter()->emitIns_R_R_I(INS_pinsrw, emitTypeSize(TYP_INT), targetReg, tmpReg, 2 * index + 1);
}
else
{
- unsigned int insertpsImm = (INSERTPS_SOURCE_SELECT(0)|INSERTPS_TARGET_SELECT(index));
+ unsigned int insertpsImm = (INSERTPS_SOURCE_SELECT(0) | INSERTPS_TARGET_SELECT(index));
inst_RV_RV_IV(INS_insertps, EA_16BYTE, targetReg, op2Reg, insertpsImm);
}
@@ -1746,8 +1743,7 @@ CodeGen::genSIMDIntrinsicSetItem(GenTreeSIMD* simdNode)
// Return Value:
// None.
//
-void
-CodeGen::genSIMDIntrinsicShuffleSSE2(GenTreeSIMD* simdNode)
+void CodeGen::genSIMDIntrinsicShuffleSSE2(GenTreeSIMD* simdNode)
{
assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicShuffleSSE2);
noway_assert(compiler->getSIMDInstructionSet() == InstructionSet_SSE2);
@@ -1756,10 +1752,10 @@ CodeGen::genSIMDIntrinsicShuffleSSE2(GenTreeSIMD* simdNode)
GenTree* op2 = simdNode->gtGetOp2();
assert(op2->isContained());
assert(op2->IsCnsIntOrI());
- int shuffleControl = (int) op2->AsIntConCommon()->IconValue();
- var_types baseType = simdNode->gtSIMDBaseType;
- var_types targetType = simdNode->TypeGet();
- regNumber targetReg = simdNode->gtRegNum;
+ int shuffleControl = (int)op2->AsIntConCommon()->IconValue();
+ var_types baseType = simdNode->gtSIMDBaseType;
+ var_types targetType = simdNode->TypeGet();
+ regNumber targetReg = simdNode->gtRegNum;
assert(targetReg != REG_NA);
regNumber op1Reg = genConsumeReg(op1);
@@ -1780,19 +1776,18 @@ CodeGen::genSIMDIntrinsicShuffleSSE2(GenTreeSIMD* simdNode)
//
// Arguments:
// treeNode - tree node that is attempting to store indirect
-//
+//
//
// Return Value:
// None.
//
-void
-CodeGen::genStoreIndTypeSIMD12(GenTree* treeNode)
+void CodeGen::genStoreIndTypeSIMD12(GenTree* treeNode)
{
assert(treeNode->OperGet() == GT_STOREIND);
GenTree* addr = treeNode->gtOp.gtOp1;
GenTree* data = treeNode->gtOp.gtOp2;
-
+
// addr and data should not be contained.
assert(!data->isContained());
assert(!addr->isContained());
@@ -1827,18 +1822,17 @@ CodeGen::genStoreIndTypeSIMD12(GenTree* treeNode)
//
// Arguments:
// treeNode - tree node of GT_IND
-//
+//
//
// Return Value:
// None.
//
-void
-CodeGen::genLoadIndTypeSIMD12(GenTree* treeNode)
+void CodeGen::genLoadIndTypeSIMD12(GenTree* treeNode)
{
assert(treeNode->OperGet() == GT_IND);
- regNumber targetReg = treeNode->gtRegNum;
- GenTreePtr op1 = treeNode->gtOp.gtOp1;
+ regNumber targetReg = treeNode->gtRegNum;
+ GenTreePtr op1 = treeNode->gtOp.gtOp1;
assert(!op1->isContained());
regNumber operandReg = genConsumeReg(op1);
@@ -1846,7 +1840,7 @@ CodeGen::genLoadIndTypeSIMD12(GenTree* treeNode)
assert(treeNode->gtRsvdRegs != RBM_NONE);
assert(genCountBits(treeNode->gtRsvdRegs) == 2);
- regNumber tmpReg = REG_NA;
+ regNumber tmpReg = REG_NA;
regMaskTP tmpRegsMask = treeNode->gtRsvdRegs;
regMaskTP tmpReg1Mask = genFindLowestBit(tmpRegsMask);
tmpRegsMask &= ~tmpReg1Mask;
@@ -1858,7 +1852,7 @@ CodeGen::genLoadIndTypeSIMD12(GenTree* treeNode)
{
tmpReg = tmpReg1;
}
- else
+ else
{
assert(targetReg != tmpReg2);
tmpReg = tmpReg2;
@@ -1885,16 +1879,15 @@ CodeGen::genLoadIndTypeSIMD12(GenTree* treeNode)
//
// Arguments:
// treeNode - tree node that is attempting to store TYP_SIMD12 field
-//
+//
// Return Value:
// None.
//
-void
-CodeGen::genStoreLclFldTypeSIMD12(GenTree* treeNode)
+void CodeGen::genStoreLclFldTypeSIMD12(GenTree* treeNode)
{
assert(treeNode->OperGet() == GT_STORE_LCL_FLD);
- unsigned offs = treeNode->gtLclFld.gtLclOffs;
+ unsigned offs = treeNode->gtLclFld.gtLclOffs;
unsigned varNum = treeNode->gtLclVarCommon.gtLclNum;
assert(varNum < compiler->lvaCount);
@@ -1914,7 +1907,7 @@ CodeGen::genStoreLclFldTypeSIMD12(GenTree* treeNode)
getEmitter()->emitIns_R_R_I(INS_pshufd, emitActualTypeSize(TYP_SIMD16), tmpReg, operandReg, 0x02);
// Store upper 4 bytes
- getEmitter()->emitIns_S_R(ins_Store(TYP_FLOAT), EA_4BYTE, tmpReg, varNum, offs+8);
+ getEmitter()->emitIns_S_R(ins_Store(TYP_FLOAT), EA_4BYTE, tmpReg, varNum, offs + 8);
}
//-----------------------------------------------------------------------------
@@ -1924,25 +1917,24 @@ CodeGen::genStoreLclFldTypeSIMD12(GenTree* treeNode)
//
// Arguments:
// treeNode - tree node that is attempting to load TYP_SIMD12 field
-//
+//
// Return Value:
// None.
//
-void
-CodeGen::genLoadLclFldTypeSIMD12(GenTree* treeNode)
+void CodeGen::genLoadLclFldTypeSIMD12(GenTree* treeNode)
{
assert(treeNode->OperGet() == GT_LCL_FLD);
- regNumber targetReg = treeNode->gtRegNum;
- unsigned offs = treeNode->gtLclFld.gtLclOffs;
- unsigned varNum = treeNode->gtLclVarCommon.gtLclNum;
+ regNumber targetReg = treeNode->gtRegNum;
+ unsigned offs = treeNode->gtLclFld.gtLclOffs;
+ unsigned varNum = treeNode->gtLclVarCommon.gtLclNum;
assert(varNum < compiler->lvaCount);
// Need an addtional Xmm register to read upper 4 bytes
assert(treeNode->gtRsvdRegs != RBM_NONE);
assert(genCountBits(treeNode->gtRsvdRegs) == 2);
- regNumber tmpReg = REG_NA;
+ regNumber tmpReg = REG_NA;
regMaskTP tmpRegsMask = treeNode->gtRsvdRegs;
regMaskTP tmpReg1Mask = genFindLowestBit(tmpRegsMask);
tmpRegsMask &= ~tmpReg1Mask;
@@ -1954,7 +1946,7 @@ CodeGen::genLoadLclFldTypeSIMD12(GenTree* treeNode)
{
tmpReg = tmpReg1;
}
- else
+ else
{
assert(targetReg != tmpReg2);
tmpReg = tmpReg2;
@@ -1963,9 +1955,9 @@ CodeGen::genLoadLclFldTypeSIMD12(GenTree* treeNode)
assert(tmpReg != targetReg);
// Read upper 4 bytes to tmpReg
- getEmitter()->emitIns_R_S(ins_Move_Extend(TYP_FLOAT, false), EA_4BYTE, tmpReg, varNum, offs+8);
+ getEmitter()->emitIns_R_S(ins_Move_Extend(TYP_FLOAT, false), EA_4BYTE, tmpReg, varNum, offs + 8);
- // Read lower 8 bytes to targetReg
+ // Read lower 8 bytes to targetReg
getEmitter()->emitIns_R_S(ins_Move_Extend(TYP_DOUBLE, false), EA_8BYTE, targetReg, varNum, offs);
// combine upper 4 bytes and lower 8 bytes in targetReg
@@ -1993,15 +1985,14 @@ CodeGen::genLoadLclFldTypeSIMD12(GenTree* treeNode)
// value to the stack. (Note that if there are no caller-save registers available, the entire 32 byte
// value will be spilled to the stack.)
//
-void
-CodeGen::genSIMDIntrinsicUpperSave(GenTreeSIMD* simdNode)
+void CodeGen::genSIMDIntrinsicUpperSave(GenTreeSIMD* simdNode)
{
assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicUpperSave);
GenTree* op1 = simdNode->gtGetOp1();
assert(op1->IsLocal() && op1->TypeGet() == TYP_SIMD32);
regNumber targetReg = simdNode->gtRegNum;
- regNumber op1Reg = genConsumeReg(op1);
+ regNumber op1Reg = genConsumeReg(op1);
assert(op1Reg != REG_NA);
assert(targetReg != REG_NA);
getEmitter()->emitIns_R_R_I(INS_vextractf128, EA_32BYTE, targetReg, op1Reg, 0x01);
@@ -2030,16 +2021,15 @@ CodeGen::genSIMDIntrinsicUpperSave(GenTreeSIMD* simdNode)
// spilled tree (saveNode) in order to perform the reload. We can easily find that tree,
// as it is in the spill descriptor for the register from which it was saved.
//
-void
-CodeGen::genSIMDIntrinsicUpperRestore(GenTreeSIMD* simdNode)
+void CodeGen::genSIMDIntrinsicUpperRestore(GenTreeSIMD* simdNode)
{
assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicUpperRestore);
GenTree* op1 = simdNode->gtGetOp1();
assert(op1->IsLocal() && op1->TypeGet() == TYP_SIMD32);
- regNumber srcReg = simdNode->gtRegNum;
+ regNumber srcReg = simdNode->gtRegNum;
regNumber lclVarReg = genConsumeReg(op1);
- unsigned varNum = op1->AsLclVarCommon()->gtLclNum;
+ unsigned varNum = op1->AsLclVarCommon()->gtLclNum;
assert(lclVarReg != REG_NA);
assert(srcReg != REG_NA);
if (simdNode->gtFlags & GTF_SPILLED)
@@ -2065,92 +2055,85 @@ CodeGen::genSIMDIntrinsicUpperRestore(GenTreeSIMD* simdNode)
// Currently, we only recognize SIMDVector<float> and SIMDVector<int>, and
// a limited set of methods.
//
-void
-CodeGen::genSIMDIntrinsic(GenTreeSIMD* simdNode)
+void CodeGen::genSIMDIntrinsic(GenTreeSIMD* simdNode)
{
// NYI for unsupported base types
- if (simdNode->gtSIMDBaseType != TYP_INT &&
- simdNode->gtSIMDBaseType != TYP_LONG &&
- simdNode->gtSIMDBaseType != TYP_FLOAT &&
- simdNode->gtSIMDBaseType != TYP_DOUBLE &&
- simdNode->gtSIMDBaseType != TYP_CHAR &&
- simdNode->gtSIMDBaseType != TYP_UBYTE &&
- simdNode->gtSIMDBaseType != TYP_SHORT &&
- simdNode->gtSIMDBaseType != TYP_BYTE &&
- simdNode->gtSIMDBaseType != TYP_UINT &&
- simdNode->gtSIMDBaseType != TYP_ULONG
- )
+ if (simdNode->gtSIMDBaseType != TYP_INT && simdNode->gtSIMDBaseType != TYP_LONG &&
+ simdNode->gtSIMDBaseType != TYP_FLOAT && simdNode->gtSIMDBaseType != TYP_DOUBLE &&
+ simdNode->gtSIMDBaseType != TYP_CHAR && simdNode->gtSIMDBaseType != TYP_UBYTE &&
+ simdNode->gtSIMDBaseType != TYP_SHORT && simdNode->gtSIMDBaseType != TYP_BYTE &&
+ simdNode->gtSIMDBaseType != TYP_UINT && simdNode->gtSIMDBaseType != TYP_ULONG)
{
noway_assert(!"SIMD intrinsic with unsupported base type.");
}
- switch(simdNode->gtSIMDIntrinsicID)
+ switch (simdNode->gtSIMDIntrinsicID)
{
- case SIMDIntrinsicInit:
- genSIMDIntrinsicInit(simdNode);
- break;
+ case SIMDIntrinsicInit:
+ genSIMDIntrinsicInit(simdNode);
+ break;
- case SIMDIntrinsicInitN:
- genSIMDIntrinsicInitN(simdNode);
- break;
+ case SIMDIntrinsicInitN:
+ genSIMDIntrinsicInitN(simdNode);
+ break;
- case SIMDIntrinsicSqrt:
- case SIMDIntrinsicCast:
- genSIMDIntrinsicUnOp(simdNode);
- break;
+ case SIMDIntrinsicSqrt:
+ case SIMDIntrinsicCast:
+ genSIMDIntrinsicUnOp(simdNode);
+ break;
- case SIMDIntrinsicAdd:
- case SIMDIntrinsicSub:
- case SIMDIntrinsicMul:
- case SIMDIntrinsicDiv:
- case SIMDIntrinsicBitwiseAnd:
- case SIMDIntrinsicBitwiseAndNot:
- case SIMDIntrinsicBitwiseOr:
- case SIMDIntrinsicBitwiseXor:
- case SIMDIntrinsicMin:
- case SIMDIntrinsicMax:
- genSIMDIntrinsicBinOp(simdNode);
- break;
+ case SIMDIntrinsicAdd:
+ case SIMDIntrinsicSub:
+ case SIMDIntrinsicMul:
+ case SIMDIntrinsicDiv:
+ case SIMDIntrinsicBitwiseAnd:
+ case SIMDIntrinsicBitwiseAndNot:
+ case SIMDIntrinsicBitwiseOr:
+ case SIMDIntrinsicBitwiseXor:
+ case SIMDIntrinsicMin:
+ case SIMDIntrinsicMax:
+ genSIMDIntrinsicBinOp(simdNode);
+ break;
- case SIMDIntrinsicOpEquality:
- case SIMDIntrinsicOpInEquality:
- case SIMDIntrinsicEqual:
- case SIMDIntrinsicLessThan:
- case SIMDIntrinsicGreaterThan:
- case SIMDIntrinsicLessThanOrEqual:
- case SIMDIntrinsicGreaterThanOrEqual:
- genSIMDIntrinsicRelOp(simdNode);
- break;
+ case SIMDIntrinsicOpEquality:
+ case SIMDIntrinsicOpInEquality:
+ case SIMDIntrinsicEqual:
+ case SIMDIntrinsicLessThan:
+ case SIMDIntrinsicGreaterThan:
+ case SIMDIntrinsicLessThanOrEqual:
+ case SIMDIntrinsicGreaterThanOrEqual:
+ genSIMDIntrinsicRelOp(simdNode);
+ break;
- case SIMDIntrinsicDotProduct:
- genSIMDIntrinsicDotProduct(simdNode);
- break;
+ case SIMDIntrinsicDotProduct:
+ genSIMDIntrinsicDotProduct(simdNode);
+ break;
- case SIMDIntrinsicGetItem:
- genSIMDIntrinsicGetItem(simdNode);
- break;
+ case SIMDIntrinsicGetItem:
+ genSIMDIntrinsicGetItem(simdNode);
+ break;
- case SIMDIntrinsicShuffleSSE2:
- genSIMDIntrinsicShuffleSSE2(simdNode);
- break;
+ case SIMDIntrinsicShuffleSSE2:
+ genSIMDIntrinsicShuffleSSE2(simdNode);
+ break;
- case SIMDIntrinsicSetX:
- case SIMDIntrinsicSetY:
- case SIMDIntrinsicSetZ:
- case SIMDIntrinsicSetW:
- genSIMDIntrinsicSetItem(simdNode);
- break;
+ case SIMDIntrinsicSetX:
+ case SIMDIntrinsicSetY:
+ case SIMDIntrinsicSetZ:
+ case SIMDIntrinsicSetW:
+ genSIMDIntrinsicSetItem(simdNode);
+ break;
- case SIMDIntrinsicUpperSave:
- genSIMDIntrinsicUpperSave(simdNode);
- break;
- case SIMDIntrinsicUpperRestore:
- genSIMDIntrinsicUpperRestore(simdNode);
- break;
+ case SIMDIntrinsicUpperSave:
+ genSIMDIntrinsicUpperSave(simdNode);
+ break;
+ case SIMDIntrinsicUpperRestore:
+ genSIMDIntrinsicUpperRestore(simdNode);
+ break;
- default:
- noway_assert(!"Unimplemented SIMD intrinsic.");
- unreached();
+ default:
+ noway_assert(!"Unimplemented SIMD intrinsic.");
+ unreached();
}
}