diff options
author | Tanner Gooding <tagoo@outlook.com> | 2019-04-29 14:39:19 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-04-29 14:39:19 -0700 |
commit | 54af92b34fc18ee9d8adada35d1f527c67224be7 (patch) | |
tree | 6fe82e8dcbbdfc65432349681f40f6356577e884 /src | |
parent | e3f602bb27a5798115a016b282ae6ab87c0decdc (diff) | |
download | coreclr-54af92b34fc18ee9d8adada35d1f527c67224be7.tar.gz coreclr-54af92b34fc18ee9d8adada35d1f527c67224be7.tar.bz2 coreclr-54af92b34fc18ee9d8adada35d1f527c67224be7.zip |
Resolves an assert around Sse41.ConvertToVector128Int* and Avx2.ConvertToVector256Int* (#24289)
* Adding tests for the overloads of Sse41.ConvertToVector128Int* and Avx2.ConvertToVector256Int* that take a pointer
* Fixing the handling of Sse41.ConvertToVector128* and Avx2.ConvertToVector256*
Diffstat (limited to 'src')
-rw-r--r-- | src/jit/hwintrinsiccodegenxarch.cpp | 40 | ||||
-rw-r--r-- | src/jit/hwintrinsiclistxarch.h | 12 | ||||
-rw-r--r-- | src/jit/hwintrinsicxarch.cpp | 55 | ||||
-rw-r--r-- | src/jit/hwintrinsicxarch.h | 6 | ||||
-rw-r--r-- | src/jit/lowerxarch.cpp | 47 |
5 files changed, 122 insertions, 38 deletions
diff --git a/src/jit/hwintrinsiccodegenxarch.cpp b/src/jit/hwintrinsiccodegenxarch.cpp index 2866e76ae8..7627d125da 100644 --- a/src/jit/hwintrinsiccodegenxarch.cpp +++ b/src/jit/hwintrinsiccodegenxarch.cpp @@ -1764,6 +1764,26 @@ void CodeGen::genSSE41Intrinsic(GenTreeHWIntrinsic* node) switch (intrinsicId) { + case NI_SSE41_ConvertToVector128Int16: + case NI_SSE41_ConvertToVector128Int32: + case NI_SSE41_ConvertToVector128Int64: + { + instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType); + + if (!varTypeIsSIMD(op1->gtType)) + { + // Until we improve the handling of addressing modes in the emitter, we'll create a + // temporary GT_IND to generate code with. + GenTreeIndir load = indirForm(node->TypeGet(), op1); + emit->emitInsLoadInd(ins, emitTypeSize(TYP_SIMD16), node->gtRegNum, &load); + } + else + { + genHWIntrinsic_R_RM(node, ins, emitTypeSize(TYP_SIMD16)); + } + break; + } + case NI_SSE41_TestAllOnes: { op1Reg = op1->gtRegNum; @@ -1955,6 +1975,26 @@ void CodeGen::genAvxOrAvx2Intrinsic(GenTreeHWIntrinsic* node) break; } + case NI_AVX2_ConvertToVector256Int16: + case NI_AVX2_ConvertToVector256Int32: + case NI_AVX2_ConvertToVector256Int64: + { + instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType); + + if (!varTypeIsSIMD(op1->gtType)) + { + // Until we improve the handling of addressing modes in the emitter, we'll create a + // temporary GT_IND to generate code with. + GenTreeIndir load = indirForm(node->TypeGet(), op1); + emit->emitInsLoadInd(ins, emitTypeSize(TYP_SIMD32), node->gtRegNum, &load); + } + else + { + genHWIntrinsic_R_RM(node, ins, emitTypeSize(TYP_SIMD32)); + } + break; + } + case NI_AVX2_GatherVector128: case NI_AVX2_GatherVector256: case NI_AVX2_GatherMaskVector128: diff --git a/src/jit/hwintrinsiclistxarch.h b/src/jit/hwintrinsiclistxarch.h index 41ddfc8dd2..b7de03be20 100644 --- a/src/jit/hwintrinsiclistxarch.h +++ b/src/jit/hwintrinsiclistxarch.h @@ -330,9 +330,9 @@ HARDWARE_INTRINSIC(SSE41_BlendVariable, "BlendVariab HARDWARE_INTRINSIC(SSE41_Ceiling, "Ceiling", SSE41, 10, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE41_CeilingScalar, "CeilingScalar", SSE41, 10, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundss, INS_roundsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) HARDWARE_INTRINSIC(SSE41_CompareEqual, "CompareEqual", SSE41, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pcmpeqq, INS_pcmpeqq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(SSE41_ConvertToVector128Int16, "ConvertToVector128Int16", SSE41, -1, 16, 1, {INS_pmovsxbw, INS_pmovzxbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE41_ConvertToVector128Int32, "ConvertToVector128Int32", SSE41, -1, 16, 1, {INS_pmovsxbd, INS_pmovzxbd, INS_pmovsxwd, INS_pmovzxwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE41_ConvertToVector128Int64, "ConvertToVector128Int64", SSE41, -1, 16, 1, {INS_pmovsxbq, INS_pmovzxbq, INS_pmovsxwq, INS_pmovzxwq, INS_pmovsxdq, INS_pmovzxdq, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(SSE41_ConvertToVector128Int16, "ConvertToVector128Int16", SSE41, -1, 16, 1, {INS_pmovsxbw, INS_pmovzxbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(SSE41_ConvertToVector128Int32, "ConvertToVector128Int32", SSE41, -1, 16, 1, {INS_pmovsxbd, INS_pmovzxbd, INS_pmovsxwd, INS_pmovzxwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(SSE41_ConvertToVector128Int64, "ConvertToVector128Int64", SSE41, -1, 16, 1, {INS_pmovsxbq, INS_pmovzxbq, INS_pmovsxwq, INS_pmovzxwq, INS_pmovsxdq, INS_pmovzxdq, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE41_DotProduct, "DotProduct", SSE41, -1, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_dpps, INS_dppd}, HW_Category_IMM, HW_Flag_FullRangeIMM) HARDWARE_INTRINSIC(SSE41_Extract, "Extract", SSE41, -1, 16, 2, {INS_pextrb, INS_pextrb, INS_invalid, INS_invalid, INS_pextrd, INS_pextrd, INS_invalid, INS_invalid, INS_extractps, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_BaseTypeFromFirstArg|HW_Flag_MultiIns|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE41_Floor, "Floor", SSE41, 9, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) @@ -474,9 +474,9 @@ HARDWARE_INTRINSIC(AVX2_CompareGreaterThan, "CompareGrea HARDWARE_INTRINSIC(AVX2_ExtractVector128, "ExtractVector128", AVX2, -1, 32, 2, {INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) HARDWARE_INTRINSIC(AVX2_ConvertToInt32, "ConvertToInt32", AVX2, -1, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mov_xmm2i, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(AVX2_ConvertToUInt32, "ConvertToUInt32", AVX2, -1, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mov_xmm2i, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(AVX2_ConvertToVector256Int16, "ConvertToVector256Int16", AVX2, -1, 32, 1, {INS_pmovsxbw, INS_pmovzxbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX2_ConvertToVector256Int32, "ConvertToVector256Int32", AVX2, -1, 32, 1, {INS_pmovsxbd, INS_pmovzxbd, INS_pmovsxwd, INS_pmovzxwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX2_ConvertToVector256Int64, "ConvertToVector256Int64", AVX2, -1, 32, 1, {INS_pmovsxbq, INS_pmovzxbq, INS_pmovsxwq, INS_pmovzxwq, INS_pmovsxdq, INS_pmovzxdq, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX2_ConvertToVector256Int16, "ConvertToVector256Int16", AVX2, -1, 32, 1, {INS_pmovsxbw, INS_pmovzxbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX2_ConvertToVector256Int32, "ConvertToVector256Int32", AVX2, -1, 32, 1, {INS_pmovsxbd, INS_pmovzxbd, INS_pmovsxwd, INS_pmovzxwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX2_ConvertToVector256Int64, "ConvertToVector256Int64", AVX2, -1, 32, 1, {INS_pmovsxbq, INS_pmovzxbq, INS_pmovsxwq, INS_pmovzxwq, INS_pmovsxdq, INS_pmovzxdq, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(AVX2_GatherVector128, "GatherVector128", AVX2, -1, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpgatherdd, INS_vpgatherdd, INS_vpgatherdq, INS_vpgatherdq, INS_vgatherdps, INS_vgatherdpd}, HW_Category_IMM, HW_Flag_SpecialCodeGen|HW_Flag_NoContainment) HARDWARE_INTRINSIC(AVX2_GatherVector256, "GatherVector256", AVX2, -1, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpgatherdd, INS_vpgatherdd, INS_vpgatherdq, INS_vpgatherdq, INS_vgatherdps, INS_vgatherdpd}, HW_Category_IMM, HW_Flag_MaybeMemoryLoad|HW_Flag_SpecialCodeGen|HW_Flag_NoContainment) HARDWARE_INTRINSIC(AVX2_GatherMaskVector128, "GatherMaskVector128", AVX2, -1, 16, 5, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpgatherdd, INS_vpgatherdd, INS_vpgatherdq, INS_vpgatherdq, INS_vgatherdps, INS_vgatherdpd}, HW_Category_IMM, HW_Flag_MaybeMemoryLoad|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NoContainment) diff --git a/src/jit/hwintrinsicxarch.cpp b/src/jit/hwintrinsicxarch.cpp index 4d78fde975..bb224f9a17 100644 --- a/src/jit/hwintrinsicxarch.cpp +++ b/src/jit/hwintrinsicxarch.cpp @@ -807,29 +807,6 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, } } - bool isTableDriven = impIsTableDrivenHWIntrinsic(intrinsic, category); - - if (isTableDriven && ((category == HW_Category_MemoryStore) || HWIntrinsicInfo::BaseTypeFromFirstArg(intrinsic) || - HWIntrinsicInfo::BaseTypeFromSecondArg(intrinsic))) - { - if (HWIntrinsicInfo::BaseTypeFromFirstArg(intrinsic)) - { - baseType = getBaseTypeOfSIMDType(info.compCompHnd->getArgClass(sig, sig->args)); - } - else - { - assert((category == HW_Category_MemoryStore) || HWIntrinsicInfo::BaseTypeFromSecondArg(intrinsic)); - CORINFO_ARG_LIST_HANDLE secondArg = info.compCompHnd->getArgNext(sig->args); - CORINFO_CLASS_HANDLE secondArgClass = info.compCompHnd->getArgClass(sig, secondArg); - baseType = getBaseTypeOfSIMDType(secondArgClass); - - if (baseType == TYP_UNKNOWN) // the second argument is not a vector - { - baseType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, secondArg, &secondArgClass))); - } - } - } - if (HWIntrinsicInfo::IsFloatingPointUsed(intrinsic)) { // Set `compFloatingPointUsed` to cover the scenario where an intrinsic is being on SIMD fields, but @@ -838,8 +815,37 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, } // table-driven importer of simple intrinsics - if (isTableDriven) + if (impIsTableDrivenHWIntrinsic(intrinsic, category)) { + if ((category == HW_Category_MemoryStore) || HWIntrinsicInfo::BaseTypeFromFirstArg(intrinsic) || + HWIntrinsicInfo::BaseTypeFromSecondArg(intrinsic)) + { + CORINFO_ARG_LIST_HANDLE arg = sig->args; + + if ((category == HW_Category_MemoryStore) || HWIntrinsicInfo::BaseTypeFromSecondArg(intrinsic)) + { + arg = info.compCompHnd->getArgNext(arg); + } + + CORINFO_CLASS_HANDLE argClass = info.compCompHnd->getArgClass(sig, arg); + baseType = getBaseTypeAndSizeOfSIMDType(argClass); + + if (baseType == TYP_UNKNOWN) // the argument is not a vector + { + CORINFO_CLASS_HANDLE tmpClass; + CorInfoType corInfoType = strip(info.compCompHnd->getArgType(sig, arg, &tmpClass)); + + if (corInfoType == CORINFO_TYPE_PTR) + { + corInfoType = info.compCompHnd->getChildType(argClass, &tmpClass); + } + + baseType = JITtype2varType(corInfoType); + } + + assert(baseType != TYP_UNKNOWN); + } + unsigned simdSize = HWIntrinsicInfo::lookupSimdSize(this, intrinsic, sig); CORINFO_ARG_LIST_HANDLE argList = sig->args; CORINFO_CLASS_HANDLE argClass; @@ -1737,6 +1743,7 @@ GenTree* Compiler::impSSE42Intrinsic(NamedIntrinsic intrinsic, CORINFO_ARG_LIST_HANDLE argList = sig->args; CORINFO_CLASS_HANDLE argClass; CorInfoType corType; + switch (intrinsic) { case NI_SSE42_Crc32: diff --git a/src/jit/hwintrinsicxarch.h b/src/jit/hwintrinsicxarch.h index 30a4d9bddb..d4469e7a2e 100644 --- a/src/jit/hwintrinsicxarch.h +++ b/src/jit/hwintrinsicxarch.h @@ -178,7 +178,11 @@ struct HWIntrinsicInfo static instruction lookupIns(NamedIntrinsic id, var_types type) { - assert((type >= TYP_BYTE) && (type <= TYP_DOUBLE)); + if ((type < TYP_BYTE) || (type > TYP_DOUBLE)) + { + assert(!"Unexpected type"); + return INS_invalid; + } return lookup(id).ins[type - TYP_BYTE]; } diff --git a/src/jit/lowerxarch.cpp b/src/jit/lowerxarch.cpp index 9a1f223d8f..bcb5bef51f 100644 --- a/src/jit/lowerxarch.cpp +++ b/src/jit/lowerxarch.cpp @@ -2492,15 +2492,32 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* containingNode, Ge case HW_Category_SimpleSIMD: { - // These intrinsics only expect 16 or 32-byte nodes for containment - assert((genTypeSize(node->TypeGet()) == 16) || (genTypeSize(node->TypeGet()) == 32)); - assert(supportsSIMDScalarLoads == false); + switch (containingIntrinsicId) + { + case NI_SSE41_ConvertToVector128Int16: + case NI_SSE41_ConvertToVector128Int32: + case NI_SSE41_ConvertToVector128Int64: + case NI_AVX2_ConvertToVector256Int16: + case NI_AVX2_ConvertToVector256Int32: + case NI_AVX2_ConvertToVector256Int64: + { + supportsGeneralLoads = (!node->OperIsHWIntrinsic()); + break; + } - supportsAlignedSIMDLoads = - !comp->canUseVexEncoding() && (containingIntrinsicId != NI_SSE2_ConvertToVector128Double); - supportsUnalignedSIMDLoads = !supportsAlignedSIMDLoads; - supportsGeneralLoads = supportsUnalignedSIMDLoads; + default: + { + // These intrinsics only expect 16 or 32-byte nodes for containment + assert((genTypeSize(node->TypeGet()) == 16) || (genTypeSize(node->TypeGet()) == 32)); + supportsAlignedSIMDLoads = + !comp->canUseVexEncoding() && (containingIntrinsicId != NI_SSE2_ConvertToVector128Double); + supportsUnalignedSIMDLoads = !supportsAlignedSIMDLoads; + supportsGeneralLoads = supportsUnalignedSIMDLoads; + break; + } + } + assert(supportsSIMDScalarLoads == false); break; } @@ -2908,6 +2925,22 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) break; } + case NI_SSE41_ConvertToVector128Int16: + case NI_SSE41_ConvertToVector128Int32: + case NI_SSE41_ConvertToVector128Int64: + case NI_AVX2_ConvertToVector256Int16: + case NI_AVX2_ConvertToVector256Int32: + case NI_AVX2_ConvertToVector256Int64: + { + if (!varTypeIsSIMD(op1->gtType)) + { + GenTree** pAddr = &node->gtOp1; + ContainCheckHWIntrinsicAddr(node, pAddr); + return; + } + break; + } + default: { break; |