diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/jit/codegenlinear.h | 2 | ||||
-rw-r--r-- | src/jit/emitxarch.cpp | 14 | ||||
-rw-r--r-- | src/jit/emitxarch.h | 4 | ||||
-rw-r--r-- | src/jit/hwintrinsiccodegenxarch.cpp | 275 | ||||
-rw-r--r-- | src/jit/lowerxarch.cpp | 66 |
5 files changed, 341 insertions, 20 deletions
diff --git a/src/jit/codegenlinear.h b/src/jit/codegenlinear.h index 0579fa5a00..1953686310 100644 --- a/src/jit/codegenlinear.h +++ b/src/jit/codegenlinear.h @@ -115,6 +115,8 @@ void genPutArgStkSIMD12(GenTree* treeNode); #ifdef FEATURE_HW_INTRINSICS void genHWIntrinsic(GenTreeHWIntrinsic* node); #if defined(_TARGET_XARCH_) +void genHWIntrinsic_R_RM(GenTreeHWIntrinsic* node, instruction ins, emitAttr attr); +void genHWIntrinsic_R_RM_I(GenTreeHWIntrinsic* node, instruction ins); void genHWIntrinsic_R_R_RM(GenTreeHWIntrinsic* node, instruction ins); void genHWIntrinsic_R_R_RM_I(GenTreeHWIntrinsic* node, instruction ins); void genHWIntrinsic_R_R_R_RM( diff --git a/src/jit/emitxarch.cpp b/src/jit/emitxarch.cpp index 4283f1c336..6a78bbf6cc 100644 --- a/src/jit/emitxarch.cpp +++ b/src/jit/emitxarch.cpp @@ -4024,7 +4024,7 @@ void emitter::emitIns_AR(instruction ins, emitAttr attr, regNumber base, int off emitCurIGsize += sz; } -void emitter::emitIns_R_A(instruction ins, emitAttr attr, regNumber reg1, GenTreeIndir* indir, insFormat fmt) +void emitter::emitIns_R_A(instruction ins, emitAttr attr, regNumber reg1, GenTreeIndir* indir) { ssize_t offs = indir->Offset(); instrDesc* id = emitNewInstrAmd(attr, offs); @@ -4032,7 +4032,7 @@ void emitter::emitIns_R_A(instruction ins, emitAttr attr, regNumber reg1, GenTre id->idIns(ins); id->idReg1(reg1); - emitHandleMemOp(indir, id, fmt, ins); + emitHandleMemOp(indir, id, IF_RRW_ARD, ins); UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins)); id->idCodeSize(sz); @@ -4160,7 +4160,7 @@ void emitter::emitIns_R_S_I(instruction ins, emitAttr attr, regNumber reg1, int } void emitter::emitIns_R_R_A( - instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, GenTreeIndir* indir, insFormat fmt) + instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, GenTreeIndir* indir) { assert(IsSSEOrAVXInstruction(ins)); assert(IsThreeOperandAVXInstruction(ins)); @@ -4172,7 +4172,7 @@ void emitter::emitIns_R_R_A( id->idReg1(reg1); id->idReg2(reg2); - emitHandleMemOp(indir, id, fmt, ins); + emitHandleMemOp(indir, id, IF_RWR_RRD_ARD, ins); UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins)) + emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins)); id->idCodeSize(sz); @@ -5407,7 +5407,7 @@ void emitter::emitIns_SIMD_R_R_A(instruction ins, emitAttr attr, regNumber targe { if (UseVEXEncoding()) { - emitIns_R_R_A(ins, attr, targetReg, op1Reg, indir, IF_RWR_RRD_ARD); + emitIns_R_R_A(ins, attr, targetReg, op1Reg, indir); } else { @@ -5415,7 +5415,7 @@ void emitter::emitIns_SIMD_R_R_A(instruction ins, emitAttr attr, regNumber targe { emitIns_R_R(INS_movaps, attr, targetReg, op1Reg); } - emitIns_R_A(ins, attr, targetReg, indir, IF_RRW_ARD); + emitIns_R_A(ins, attr, targetReg, indir); } } @@ -5589,7 +5589,7 @@ void emitter::emitIns_SIMD_R_R_R_A( emitIns_R_R(INS_movaps, attr, targetReg, op1Reg); } - emitIns_R_R_A(ins, attr, targetReg, op2Reg, indir, IF_RWR_RRD_ARD); + emitIns_R_R_A(ins, attr, targetReg, op2Reg, indir); } void emitter::emitIns_SIMD_R_R_R_AR( diff --git a/src/jit/emitxarch.h b/src/jit/emitxarch.h index 663e42c535..7e7bffbbe3 100644 --- a/src/jit/emitxarch.h +++ b/src/jit/emitxarch.h @@ -318,7 +318,7 @@ void emitIns_R_R_I(instruction ins, emitAttr attr, regNumber reg1, regNumber reg void emitIns_AR(instruction ins, emitAttr attr, regNumber base, int offs); -void emitIns_R_A(instruction ins, emitAttr attr, regNumber reg1, GenTreeIndir* indir, insFormat fmt); +void emitIns_R_A(instruction ins, emitAttr attr, regNumber reg1, GenTreeIndir* indir); void emitIns_R_A_I(instruction ins, emitAttr attr, regNumber reg1, GenTreeIndir* indir, int ival); @@ -328,7 +328,7 @@ void emitIns_R_C_I(instruction ins, emitAttr attr, regNumber reg1, CORINFO_FIELD void emitIns_R_S_I(instruction ins, emitAttr attr, regNumber reg1, int varx, int offs, int ival); -void emitIns_R_R_A(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, GenTreeIndir* indir, insFormat fmt); +void emitIns_R_R_A(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, GenTreeIndir* indir); void emitIns_R_R_AR(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber base, int offs); diff --git a/src/jit/hwintrinsiccodegenxarch.cpp b/src/jit/hwintrinsiccodegenxarch.cpp index 59cb48abcf..2e97be2b6f 100644 --- a/src/jit/hwintrinsiccodegenxarch.cpp +++ b/src/jit/hwintrinsiccodegenxarch.cpp @@ -84,6 +84,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) { genConsumeOperands(node); op1Reg = op1->gtRegNum; + if (category == HW_Category_MemoryLoad) { emit->emitIns_R_AR(ins, simdSize, targetReg, op1Reg, 0); @@ -95,11 +96,11 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) else if ((ival != -1) && varTypeIsFloating(baseType)) { assert((ival >= 0) && (ival <= 127)); - emit->emitIns_R_R_I(ins, simdSize, targetReg, op1Reg, (int8_t)ival); + genHWIntrinsic_R_RM_I(node, ins); } else { - emit->emitIns_R_R(ins, simdSize, targetReg, op1Reg); + genHWIntrinsic_R_RM(node, ins, simdSize); } break; } @@ -298,6 +299,263 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) } //------------------------------------------------------------------------ +// genHWIntrinsic_R_RM: Generates the code for a hardware intrinsic node that takes a +// register/memory operand and that returns a value in register +// +// Arguments: +// node - The hardware intrinsic node +// ins - The instruction being generated +// attr - The emit attribute for the instruciton being generated +// +void CodeGen::genHWIntrinsic_R_RM(GenTreeHWIntrinsic* node, instruction ins, emitAttr attr) +{ + var_types targetType = node->TypeGet(); + regNumber targetReg = node->gtRegNum; + GenTree* op1 = node->gtGetOp1(); + emitter* emit = getEmitter(); + + assert(targetReg != REG_NA); + assert(node->gtGetOp2() == nullptr); + assert(!node->OperIsCommutative()); + + if (op1->isContained() || op1->isUsedFromSpillTemp()) + { + assert(HWIntrinsicInfo::SupportsContainment(node->gtHWIntrinsicId)); + +#if DEBUG + bool supportsRegOptional = false; + bool isContainable = compiler->m_pLowering->IsContainableHWIntrinsicOp(node, op1, &supportsRegOptional); + assert(isContainable || (supportsRegOptional && op1->IsRegOptional())); +#endif // DEBUG + + TempDsc* tmpDsc = nullptr; + unsigned varNum = BAD_VAR_NUM; + unsigned offset = (unsigned)-1; + + if (op1->isUsedFromSpillTemp()) + { + assert(op1->IsRegOptional()); + + tmpDsc = getSpillTempDsc(op1); + varNum = tmpDsc->tdTempNum(); + offset = 0; + + compiler->tmpRlsTemp(tmpDsc); + } + else if (op1->OperIsHWIntrinsic()) + { + emit->emitIns_R_AR(ins, attr, targetReg, op1->gtGetOp1()->gtRegNum, 0); + return; + } + else if (op1->isIndir()) + { + GenTreeIndir* memIndir = op1->AsIndir(); + GenTree* memBase = memIndir->gtOp1; + + switch (memBase->OperGet()) + { + case GT_LCL_VAR_ADDR: + { + varNum = memBase->AsLclVarCommon()->GetLclNum(); + offset = 0; + + // Ensure that all the GenTreeIndir values are set to their defaults. + assert(!memIndir->HasIndex()); + assert(memIndir->Scale() == 1); + assert(memIndir->Offset() == 0); + + break; + } + + case GT_CLS_VAR_ADDR: + { + emit->emitIns_R_C(ins, attr, targetReg, memBase->gtClsVar.gtClsVarHnd, 0); + return; + } + + default: + { + emit->emitIns_R_A(ins, attr, targetReg, memIndir); + return; + } + } + } + else + { + switch (op1->OperGet()) + { + case GT_LCL_FLD: + { + GenTreeLclFld* lclField = op1->AsLclFld(); + + varNum = lclField->GetLclNum(); + offset = lclField->gtLclFld.gtLclOffs; + break; + } + + case GT_LCL_VAR: + { + assert(op1->IsRegOptional() || !compiler->lvaTable[op1->gtLclVar.gtLclNum].lvIsRegCandidate()); + varNum = op1->AsLclVar()->GetLclNum(); + offset = 0; + break; + } + + default: + { + unreached(); + break; + } + } + } + + // Ensure we got a good varNum and offset. + // We also need to check for `tmpDsc != nullptr` since spill temp numbers + // are negative and start with -1, which also happens to be BAD_VAR_NUM. + assert((varNum != BAD_VAR_NUM) || (tmpDsc != nullptr)); + assert(offset != (unsigned)-1); + + emit->emitIns_R_S(ins, attr, targetReg, varNum, offset); + } + else + { + regNumber op1Reg = op1->gtRegNum; + emit->emitIns_R_R(ins, attr, targetReg, op1Reg); + } +} + +//------------------------------------------------------------------------ +// genHWIntrinsic_R_RM_I: Generates the code for a hardware intrinsic node that takes a register operand, a +// register/memory operand, an immediate operand, and that returns a value in register +// +// Arguments: +// node - The hardware intrinsic node +// ins - The instruction being generated +// +void CodeGen::genHWIntrinsic_R_RM_I(GenTreeHWIntrinsic* node, instruction ins) +{ + var_types targetType = node->TypeGet(); + regNumber targetReg = node->gtRegNum; + GenTree* op1 = node->gtGetOp1(); + emitAttr simdSize = EA_ATTR(node->gtSIMDSize); + emitter* emit = getEmitter(); + + int ival = HWIntrinsicInfo::lookupIval(node->gtHWIntrinsicId); + assert((ival >= 0) && (ival <= 127)); + + // TODO-XArch-CQ: Commutative operations can have op1 be contained + // TODO-XArch-CQ: Non-VEX encoded instructions can have both ops contained + + assert(targetReg != REG_NA); + assert(node->gtGetOp2() == nullptr); + assert(!node->OperIsCommutative()); + + if (op1->isContained() || op1->isUsedFromSpillTemp()) + { + assert(HWIntrinsicInfo::SupportsContainment(node->gtHWIntrinsicId)); + +#if DEBUG + bool supportsRegOptional = false; + bool isContainable = compiler->m_pLowering->IsContainableHWIntrinsicOp(node, op1, &supportsRegOptional); + assert(isContainable || (supportsRegOptional && op1->IsRegOptional())); +#endif // DEBUG + + TempDsc* tmpDsc = nullptr; + unsigned varNum = BAD_VAR_NUM; + unsigned offset = (unsigned)-1; + + if (op1->isUsedFromSpillTemp()) + { + assert(op1->IsRegOptional()); + + tmpDsc = getSpillTempDsc(op1); + varNum = tmpDsc->tdTempNum(); + offset = 0; + + compiler->tmpRlsTemp(tmpDsc); + } + else if (op1->OperIsHWIntrinsic()) + { + emit->emitIns_R_AR_I(ins, simdSize, targetReg, op1->gtGetOp1()->gtRegNum, 0, ival); + return; + } + else if (op1->isIndir()) + { + GenTreeIndir* memIndir = op1->AsIndir(); + GenTree* memBase = memIndir->gtOp1; + + switch (memBase->OperGet()) + { + case GT_LCL_VAR_ADDR: + { + varNum = memBase->AsLclVarCommon()->GetLclNum(); + offset = 0; + + // Ensure that all the GenTreeIndir values are set to their defaults. + assert(!memIndir->HasIndex()); + assert(memIndir->Scale() == 1); + assert(memIndir->Offset() == 0); + + break; + } + + case GT_CLS_VAR_ADDR: + { + emit->emitIns_R_C_I(ins, simdSize, targetReg, memBase->gtClsVar.gtClsVarHnd, 0, + ival); + return; + } + + default: + { + emit->emitIns_R_A_I(ins, simdSize, targetReg, memIndir, ival); + return; + } + } + } + else + { + switch (op1->OperGet()) + { + case GT_LCL_FLD: + { + GenTreeLclFld* lclField = op1->AsLclFld(); + + varNum = lclField->GetLclNum(); + offset = lclField->gtLclFld.gtLclOffs; + break; + } + + case GT_LCL_VAR: + { + assert(op1->IsRegOptional() || !compiler->lvaTable[op1->gtLclVar.gtLclNum].lvIsRegCandidate()); + varNum = op1->AsLclVar()->GetLclNum(); + offset = 0; + break; + } + + default: + unreached(); + break; + } + } + + // Ensure we got a good varNum and offset. + // We also need to check for `tmpDsc != nullptr` since spill temp numbers + // are negative and start with -1, which also happens to be BAD_VAR_NUM. + assert((varNum != BAD_VAR_NUM) || (tmpDsc != nullptr)); + assert(offset != (unsigned)-1); + + emit->emitIns_R_S_I(ins, simdSize, targetReg, varNum, offset, ival); + } + else + { + regNumber op1Reg = op1->gtRegNum; + emit->emitIns_R_R_I(ins, simdSize, targetReg, op1Reg, ival); + } +} + +//------------------------------------------------------------------------ // genHWIntrinsic_R_R_RM: Generates the code for a hardware intrinsic node that takes a register operand, a // register/memory operand, and that returns a value in register // @@ -1130,7 +1388,7 @@ void CodeGen::genSSE2Intrinsic(GenTreeHWIntrinsic* node) assert(op1 != nullptr); assert(op2 == nullptr); instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType); - emit->emitIns_R_R(ins, emitTypeSize(baseType), targetReg, op1Reg); + genHWIntrinsic_R_RM(node, ins, emitTypeSize(baseType)); break; } @@ -1152,16 +1410,17 @@ void CodeGen::genSSE2Intrinsic(GenTreeHWIntrinsic* node) case NI_SSE2_ConvertToUInt64: { assert(op2 == nullptr); - assert(baseType == TYP_DOUBLE || baseType == TYP_FLOAT || baseType == TYP_INT || baseType == TYP_UINT || - baseType == TYP_LONG || baseType == TYP_ULONG); instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType); - if (baseType == TYP_DOUBLE || baseType == TYP_FLOAT) + + if (varTypeIsIntegral(baseType)) { - emit->emitIns_R_R(ins, emitTypeSize(targetType), targetReg, op1Reg); + assert(baseType == TYP_INT || baseType == TYP_UINT || baseType == TYP_LONG || baseType == TYP_ULONG); + emit->emitIns_R_R(ins, emitActualTypeSize(baseType), op1Reg, targetReg); } else { - emit->emitIns_R_R(ins, emitActualTypeSize(baseType), op1Reg, targetReg); + assert(baseType == TYP_DOUBLE || baseType == TYP_FLOAT); + genHWIntrinsic_R_RM(node, ins, emitTypeSize(targetType)); } break; } diff --git a/src/jit/lowerxarch.cpp b/src/jit/lowerxarch.cpp index 080d70d5b0..b9054b96cd 100644 --- a/src/jit/lowerxarch.cpp +++ b/src/jit/lowerxarch.cpp @@ -2308,7 +2308,7 @@ void Lowering::ContainCheckSIMD(GenTreeSIMD* simdNode) // bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* containingNode, GenTree* node, bool* supportsRegOptional) { - NamedIntrinsic containingintrinsicId = containingNode->gtHWintrinsicId; + NamedIntrinsic containingintrinsicId = containingNode->gtHWIntrinsicId; HWIntrinsicCategory category = HWIntrinsicInfo::lookupCategory(containingintrinsicId); // We shouldn't have called in here if containingNode doesn't support containment @@ -2461,7 +2461,7 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* containingNode, Ge // TODO-XArch: Update this to be table driven, if possible. - NamedIntrinsic intrinsicId = node->AsHWIntrinsic()->gtHWintrinsicId; + NamedIntrinsic intrinsicId = node->AsHWIntrinsic()->gtHWIntrinsicId; switch (intrinsicId) { @@ -2502,7 +2502,7 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* containingNode, Ge // void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) { - NamedIntrinsic intrinsicId = node->gtHWintrinsicId; + NamedIntrinsic intrinsicId = node->gtHWIntrinsicId; HWIntrinsicCategory category = HWIntrinsicInfo::lookupCategory(intrinsicId); int numArgs = HWIntrinsicInfo::lookupNumArgs(node); var_types baseType = node->gtSIMDBaseType; @@ -2531,6 +2531,66 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) switch (category) { + case HW_Category_SimpleSIMD: + case HW_Category_SIMDScalar: + { + switch (intrinsicId) + { + case NI_SSE_ReciprocalScalar: + case NI_SSE_ReciprocalSqrtScalar: + case NI_SSE_SqrtScalar: + case NI_SSE2_SqrtScalar: + case NI_SSE41_CeilingScalar: + case NI_SSE41_FloorScalar: + case NI_SSE41_RoundCurrentDirectionScalar: + case NI_SSE41_RoundToNearestIntegerScalar: + case NI_SSE41_RoundToNegativeInfinityScalar: + case NI_SSE41_RoundToPositiveInfinityScalar: + case NI_SSE41_RoundToZeroScalar: + { + // These intrinsics have both 1 and 2-operand overloads. + // + // The 1-operand overload basically does `intrinsic(op1, op1)` + // + // Because of this, the operand must be loaded into a register + // and cannot be contained. + return; + } + + case NI_SSE2_ConvertToInt32: + case NI_SSE2_ConvertToInt64: + case NI_SSE2_ConvertToUInt32: + case NI_SSE2_ConvertToUInt64: + { + if (varTypeIsIntegral(baseType)) + { + // These intrinsics are "ins reg/mem, xmm" and don't + // currently support containment. + return; + } + + break; + } + + default: + { + break; + } + } + + bool supportsRegOptional = false; + + if (IsContainableHWIntrinsicOp(node, op1, &supportsRegOptional)) + { + MakeSrcContained(node, op1); + } + else if (supportsRegOptional) + { + op1->SetRegOptional(); + } + break; + } + default: { // TODO-XArch-CQ: Assert that this is unreached after we have ensured the relevant node types are |