summaryrefslogtreecommitdiff
path: root/src/jit/simdcodegenxarch.cpp
diff options
context:
space:
mode:
authordotnet-bot <dotnet-bot@microsoft.com>2015-03-17 11:54:26 -0700
committerdotnet-bot <dotnet-bot@microsoft.com>2015-03-17 11:54:26 -0700
commitfd60de4934239cd772c7dfa316a9befd3da18f04 (patch)
treeb76f6d3f7d340a852adafe24bc62af0f1ed7d0dd /src/jit/simdcodegenxarch.cpp
parent0805738bc10470dc60b65180308e02d305e0be84 (diff)
downloadcoreclr-fd60de4934239cd772c7dfa316a9befd3da18f04.tar.gz
coreclr-fd60de4934239cd772c7dfa316a9befd3da18f04.tar.bz2
coreclr-fd60de4934239cd772c7dfa316a9befd3da18f04.zip
Merge changes from parent branch
[tfs-changeset: 1434167]
Diffstat (limited to 'src/jit/simdcodegenxarch.cpp')
-rw-r--r--src/jit/simdcodegenxarch.cpp119
1 files changed, 91 insertions, 28 deletions
diff --git a/src/jit/simdcodegenxarch.cpp b/src/jit/simdcodegenxarch.cpp
index 8ea039f47f..8d6a21edf5 100644
--- a/src/jit/simdcodegenxarch.cpp
+++ b/src/jit/simdcodegenxarch.cpp
@@ -813,52 +813,115 @@ CodeGen::genSIMDIntrinsicBinOp(GenTreeSIMD* simdNode)
baseType == TYP_INT &&
iset == InstructionSet_SSE2)
{
- // We need an additional xmm register as temp.
+ // We need a temporary register that is NOT the same as the target,
+ // and we MAY need another.
assert(simdNode->gtRsvdRegs != RBM_NONE);
assert(genCountBits(simdNode->gtRsvdRegs) == 2);
regMaskTP tmpRegsMask = simdNode->gtRsvdRegs;
regMaskTP tmpReg1Mask = genFindLowestBit(tmpRegsMask);
tmpRegsMask &= ~tmpReg1Mask;
- regNumber tmpReg1 = genRegNumFromMask(tmpReg1Mask);
- regNumber tmpReg2 = genRegNumFromMask(tmpRegsMask);
-
- // tmpReg1 = op1 >> 4-bytes
- inst_RV_RV(INS_movaps, tmpReg1, op1Reg, targetType, emitActualTypeSize(targetType));
- getEmitter()->emitIns_R_I(INS_psrldq, emitActualTypeSize(targetType), tmpReg1, 4);
-
- // tmpReg2 = op2 >> 4-bytes
- inst_RV_RV(INS_movaps, tmpReg2, op2Reg);
- getEmitter()->emitIns_R_I(INS_psrldq, emitActualTypeSize(targetType), tmpReg2, 4);
-
- // tmpReg1 = unsigned double word multiply of tmpReg1 and tmpReg2. Essentially
- // tmpReg1[63:0] = op1[1] * op2[1]
- // tmpReg2[127:64] = op1[3] * op2[3]
- inst_RV_RV(INS_pmuludq, tmpReg1, tmpReg2, targetType, emitActualTypeSize(targetType));
+ regNumber tmpReg = genRegNumFromMask(tmpReg1Mask);
+ regNumber tmpReg2 = genRegNumFromMask(tmpRegsMask);
+ // The register allocator guarantees the following conditions:
+ // - the only registers that may be the same among op1Reg, op2Reg, tmpReg
+ // and tmpReg2 are op1Reg and op2Reg.
+ // Let's be extra-careful and assert that now.
+ assert((op1Reg != tmpReg) && (op1Reg != tmpReg2) &&
+ (op2Reg != tmpReg) && (op2Reg != tmpReg2) &&
+ (tmpReg != tmpReg2));
+
+ // We will start by setting things up so that:
+ // - We have op1 in op1Reg and targetReg, and they are different registers.
+ // - We have op2 in op2Reg and tmpReg
+ // - Either we will leave the input registers (the original op1Reg and op2Reg) unmodified,
+ // OR they are the targetReg that will be produced.
+ // (Note that in the code we generate below op1Reg and op2Reg are never written.)
+ // We will copy things as necessary to ensure that this is the case.
+ // Note that we can swap op1 and op2, since multiplication is commutative.
+ // We will not modify the values in op1Reg and op2Reg.
+ // (Though note that if either op1 or op2 is the same as targetReg, we will make
+ // a copy and use that copy as the input register. In that case we WILL modify
+ // the original value in the register, but will wind up with the result in targetReg
+ // in the end, as expected.)
+
+ // First, we need a tmpReg that is NOT the same as targetReg.
+ // Note that if we have another reg that is the same as targetReg,
+ // we can use tmpReg2 for that case, as we will not have hit this case.
+ if (tmpReg == targetReg)
+ {
+ tmpReg = tmpReg2;
+ }
- // targetReg[63:0] = op1[0] * op2[0]
- // targetReg[127:64] = op1[2] * op2[2]
if (op2Reg == targetReg)
{
- otherReg = op1Reg;
+ // We will swap the operands.
+ // Since the code below only deals with registers, this now becomes the case where
+ // op1Reg == targetReg.
+ op2Reg = op1Reg;
+ op1Reg = targetReg;
}
- else if (op1Reg != targetReg)
+ if (op1Reg == targetReg)
{
+ // Copy op1, and make tmpReg2 the new op1Reg.
+ // Note that those regs can't be the same, as we asserted above.
+ // Also, we know that tmpReg2 hasn't been used, because we couldn't have hit
+ // the "tmpReg == targetReg" case.
+ inst_RV_RV(INS_movaps, tmpReg2, op1Reg, targetType, emitActualTypeSize(targetType));
+ op1Reg = tmpReg2;
+ inst_RV_RV(INS_movaps, tmpReg, op2Reg, targetType, emitActualTypeSize(targetType));
+ // However, we have one more case to worry about: what if op2Reg is also targetReg
+ // (i.e. we have the same operand as op1 and op2)?
+ // In that case we will set op2Reg to the same register as op1Reg.
+ if (op2Reg == targetReg)
+ {
+ op2Reg = tmpReg2;
+ }
+ }
+ else
+ {
+ // Copy op1 to targetReg and op2 to tmpReg.
inst_RV_RV(INS_movaps, targetReg, op1Reg, targetType, emitActualTypeSize(targetType));
+ inst_RV_RV(INS_movaps, tmpReg, op2Reg, targetType, emitActualTypeSize(targetType));
}
- inst_RV_RV(INS_pmuludq, targetReg, otherReg, targetType, emitActualTypeSize(targetType));
+ // Let's assert that things are as we expect.
+ // - We have op1 in op1Reg and targetReg, and they are different registers.
+ assert(op1Reg != targetReg);
+ // - We have op2 in op2Reg and tmpReg, and they are different registers.
+ assert(op2Reg != tmpReg);
+ // - Either we are going to leave op1's reg unmodified, or it is the targetReg.
+ assert((op1->gtRegNum == op1Reg) || (op1->gtRegNum == op2Reg) || (op1->gtRegNum == targetReg));
+ // - Similarly, we are going to leave op2's reg unmodified, or it is the targetReg.
+ assert((op2->gtRegNum == op1Reg) || (op2->gtRegNum == op2Reg) || (op2->gtRegNum == targetReg));
+
+ // Now we can generate the code.
- // Extract first and third double word results from tmpReg1
- // tmpReg2 = shuffle(0,0,2,0) of tmpReg1
- getEmitter()->emitIns_R_R_I(INS_pshufd, emitActualTypeSize(targetType), tmpReg2, tmpReg1, 0x08);
+ // targetReg = op1 >> 4-bytes (op1 is already in targetReg)
+ getEmitter()->emitIns_R_I(INS_psrldq, emitActualTypeSize(targetType), targetReg, 4);
+
+ // tmpReg = op2 >> 4-bytes (op2 is already in tmpReg)
+ getEmitter()->emitIns_R_I(INS_psrldq, emitActualTypeSize(targetType), tmpReg, 4);
+
+ // tmp = unsigned double word multiply of targetReg and tmpReg. Essentially
+ // tmpReg[63:0] = op1[1] * op2[1]
+ // tmpReg[127:64] = op1[3] * op2[3]
+ inst_RV_RV(INS_pmuludq, tmpReg, targetReg, targetType, emitActualTypeSize(targetType));
+
+ // Extract first and third double word results from tmpReg
+ // tmpReg = shuffle(0,0,2,0) of tmpReg
+ getEmitter()->emitIns_R_R_I(INS_pshufd, emitActualTypeSize(targetType), tmpReg, tmpReg, 0x08);
+
+ // targetReg[63:0] = op1[0] * op2[0]
+ // targetReg[127:64] = op1[2] * op2[2]
+ inst_RV_RV(INS_movaps, targetReg, op1Reg, targetType, emitActualTypeSize(targetType));
+ inst_RV_RV(INS_pmuludq, targetReg, op2Reg, targetType, emitActualTypeSize(targetType));
// Extract first and third double word results from targetReg
- // tmpReg1 = shuffle(0,0,2,0) of targetReg
- getEmitter()->emitIns_R_R_I(INS_pshufd, emitActualTypeSize(targetType), tmpReg1, targetReg, 0x08);
+ // targetReg = shuffle(0,0,2,0) of targetReg
+ getEmitter()->emitIns_R_R_I(INS_pshufd, emitActualTypeSize(targetType), targetReg, targetReg, 0x08);
// pack the results into a single vector
- inst_RV_RV(INS_movaps, targetReg, tmpReg1, targetType, emitActualTypeSize(targetType));
- inst_RV_RV(INS_punpckldq, targetReg, tmpReg2, targetType, emitActualTypeSize(targetType));
+ inst_RV_RV(INS_punpckldq, targetReg, tmpReg, targetType, emitActualTypeSize(targetType));
}
else
{