summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/jit/codegenarm64.cpp106
-rw-r--r--src/jit/lowerarmarch.cpp12
-rw-r--r--src/jit/lsraarm64.cpp34
3 files changed, 121 insertions, 31 deletions
diff --git a/src/jit/codegenarm64.cpp b/src/jit/codegenarm64.cpp
index da9c7bfe44..e63d961763 100644
--- a/src/jit/codegenarm64.cpp
+++ b/src/jit/codegenarm64.cpp
@@ -4578,13 +4578,9 @@ void CodeGen::genSIMDIntrinsicGetItem(GenTreeSIMD* simdNode)
// - the source of SIMD type (op1)
// - the index of the value to be returned.
genConsumeOperands(simdNode);
- regNumber srcReg = op1->gtRegNum;
- // TODO-ARM64-CQ Optimize SIMDIntrinsicGetItem
- // Optimize the case of op1 is in memory and trying to access ith element.
- assert(op1->isUsedFromReg());
-
- emitAttr baseTypeSize = emitTypeSize(baseType);
+ emitAttr baseTypeSize = emitTypeSize(baseType);
+ unsigned baseTypeScale = genLog2(EA_SIZE_IN_BYTES(baseTypeSize));
if (op2->IsCnsIntOrI())
{
@@ -4592,34 +4588,102 @@ void CodeGen::genSIMDIntrinsicGetItem(GenTreeSIMD* simdNode)
ssize_t index = op2->gtIntCon.gtIconVal;
+ // We only need to generate code for the get if the index is valid
+ // If the index is invalid, previously generated for the range check will throw
if (getEmitter()->isValidVectorIndex(emitTypeSize(simdType), baseTypeSize, index))
{
- // Only generate code for the get if the index is valid
- // Otherwise generated code will throw
- getEmitter()->emitIns_R_R_I(INS_mov, baseTypeSize, targetReg, srcReg, index);
+ if (op1->isContained())
+ {
+ int offset = (int)index * genTypeSize(baseType);
+ instruction ins = ins_Load(baseType);
+ baseTypeSize = varTypeIsFloating(baseType)
+ ? baseTypeSize
+ : getEmitter()->emitInsAdjustLoadStoreAttr(ins, baseTypeSize);
+
+ assert(!op1->isUsedFromReg());
+
+ if (op1->OperIsLocal())
+ {
+ unsigned varNum = op1->gtLclVarCommon.gtLclNum;
+
+ getEmitter()->emitIns_R_S(ins, baseTypeSize, targetReg, varNum, offset);
+ }
+ else
+ {
+ assert(op1->OperGet() == GT_IND);
+
+ GenTree* addr = op1->AsIndir()->Addr();
+ assert(!addr->isContained());
+ regNumber baseReg = addr->gtRegNum;
+
+ // ldr targetReg, [baseReg, #offset]
+ getEmitter()->emitIns_R_R_I(ins, baseTypeSize, targetReg, baseReg, offset);
+ }
+ }
+ else
+ {
+ assert(op1->isUsedFromReg());
+ regNumber srcReg = op1->gtRegNum;
+
+ // mov targetReg, srcReg[#index]
+ getEmitter()->emitIns_R_R_I(INS_mov, baseTypeSize, targetReg, srcReg, index);
+ }
}
}
else
{
- unsigned simdInitTempVarNum = compiler->lvaSIMDInitTempVarNum;
- noway_assert(compiler->lvaSIMDInitTempVarNum != BAD_VAR_NUM);
+ assert(!op2->isContained());
+ regNumber baseReg = REG_NA;
regNumber indexReg = op2->gtRegNum;
- regNumber tmpReg = simdNode->ExtractTempReg();
- assert(genIsValidIntReg(tmpReg));
- assert(tmpReg != indexReg);
+ if (op1->isContained())
+ {
+ // Optimize the case of op1 is in memory and trying to access ith element.
+ assert(!op1->isUsedFromReg());
+ if (op1->OperIsLocal())
+ {
+ unsigned varNum = op1->gtLclVarCommon.gtLclNum;
+
+ baseReg = simdNode->ExtractTempReg();
+
+ // Load the address of varNum
+ getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, baseReg, varNum, 0);
+ }
+ else
+ {
+ // Require GT_IND addr to be not contained.
+ assert(op1->OperGet() == GT_IND);
+
+ GenTree* addr = op1->AsIndir()->Addr();
+ assert(!addr->isContained());
- unsigned baseTypeScale = genLog2(EA_SIZE_IN_BYTES(baseTypeSize));
+ baseReg = addr->gtRegNum;
+ }
+ }
+ else
+ {
+ assert(op1->isUsedFromReg());
+ regNumber srcReg = op1->gtRegNum;
- // Load the address of simdInitTempVarNum
- getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, tmpReg, simdInitTempVarNum, 0);
+ unsigned simdInitTempVarNum = compiler->lvaSIMDInitTempVarNum;
+ noway_assert(compiler->lvaSIMDInitTempVarNum != BAD_VAR_NUM);
+
+ baseReg = simdNode->ExtractTempReg();
+
+ // Load the address of simdInitTempVarNum
+ getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, baseReg, simdInitTempVarNum, 0);
+
+ // Store the vector to simdInitTempVarNum
+ getEmitter()->emitIns_R_R(INS_str, emitTypeSize(simdType), srcReg, baseReg);
+ }
- // Store the vector to simdInitTempVarNum
- getEmitter()->emitIns_R_R(INS_str, emitTypeSize(simdType), srcReg, tmpReg);
+ assert(genIsValidIntReg(indexReg));
+ assert(genIsValidIntReg(baseReg));
+ assert(baseReg != indexReg);
- // Load item at simdInitTempVarNum[index]
- getEmitter()->emitIns_R_R_R_Ext(ins_Load(baseType), baseTypeSize, targetReg, tmpReg, indexReg, INS_OPTS_LSL,
+ // Load item at baseReg[index]
+ getEmitter()->emitIns_R_R_R_Ext(ins_Load(baseType), baseTypeSize, targetReg, baseReg, indexReg, INS_OPTS_LSL,
baseTypeScale);
}
diff --git a/src/jit/lowerarmarch.cpp b/src/jit/lowerarmarch.cpp
index 231650180a..5aa3ff1e6b 100644
--- a/src/jit/lowerarmarch.cpp
+++ b/src/jit/lowerarmarch.cpp
@@ -785,12 +785,11 @@ void Lowering::ContainCheckSIMD(GenTreeSIMD* simdNode)
case SIMDIntrinsicGetItem:
{
- // TODO-ARM64-CQ Support containing op1 memory ops
-
// This implements get_Item method. The sources are:
// - the source SIMD struct
// - index (which element to get)
// The result is baseType of SIMD struct.
+ op1 = simdNode->gtOp.gtOp1;
op2 = simdNode->gtOp.gtOp2;
// If the index is a constant, mark it as contained.
@@ -798,6 +797,15 @@ void Lowering::ContainCheckSIMD(GenTreeSIMD* simdNode)
{
MakeSrcContained(simdNode, op2);
}
+
+ if (IsContainableMemoryOp(op1))
+ {
+ MakeSrcContained(simdNode, op1);
+ if (op1->OperGet() == GT_IND)
+ {
+ op1->AsIndir()->Addr()->ClearContained();
+ }
+ }
break;
}
diff --git a/src/jit/lsraarm64.cpp b/src/jit/lsraarm64.cpp
index 22fb56e7a3..7c9b60174c 100644
--- a/src/jit/lsraarm64.cpp
+++ b/src/jit/lsraarm64.cpp
@@ -815,20 +815,38 @@ void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree)
break;
case SIMDIntrinsicGetItem:
+ op1 = simdTree->gtGetOp1();
+ op2 = simdTree->gtGetOp2();
+
// We have an object and an item, which may be contained.
- info->srcCount = simdTree->gtGetOp2()->isContained() ? 1 : 2;
+ info->srcCount = (op2->isContained() ? 1 : 2);
- if (!simdTree->gtGetOp2()->IsCnsIntOrI())
+ if (op1->isContained())
{
- // If the index is not a constant, we will need a general purpose register
- info->internalIntCount = 1;
+ // Although GT_IND of TYP_SIMD12 reserves an internal register for reading 4 and 8 bytes from memory
+ // and assembling them into target reg, it is not required in this case.
+ op1->gtLsraInfo.internalIntCount = 0;
+ op1->gtLsraInfo.internalFloatCount = 0;
+ info->srcCount -= 1;
+ info->srcCount += GetOperandSourceCount(op1);
+ }
- // If the index is not a constant, we will use the SIMD temp location to store the vector.
- compiler->getSIMDInitTempVarNum();
+ if (!op2->IsCnsIntOrI() && (!op1->isContained() || op1->OperIsLocal()))
+ {
+ // If the index is not a constant and not contained or is a local
+ // we will need a general purpose register to calculate the address
+ info->internalIntCount = 1;
// internal register must not clobber input index
- simdTree->gtOp.gtOp2->gtLsraInfo.isDelayFree = true;
- info->hasDelayFreeSrc = true;
+ op2->gtLsraInfo.isDelayFree = true;
+ info->hasDelayFreeSrc = true;
+ }
+
+ if (!op2->IsCnsIntOrI() && (!op1->isContained()))
+ {
+ // If vector is not already in memory (contained) and the index is not a constant,
+ // we will use the SIMD temp location to store the vector.
+ compiler->getSIMDInitTempVarNum();
}
break;