summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHyeongseok Oh <hseok82.oh@samsung.com>2017-05-23 01:42:10 +0900
committerBruce Forstall <brucefo@microsoft.com>2017-05-22 09:42:10 -0700
commit4349824ed5da261f408042a6a023e080ff3e71db (patch)
treefa1c58b5ffcf6fda6a1ddbe46b62961fe614c1d5
parent5493b8439dca2ada6e912716e035654b16685bb1 (diff)
downloadcoreclr-4349824ed5da261f408042a6a023e080ff3e71db.tar.gz
coreclr-4349824ed5da261f408042a6a023e080ff3e71db.tar.bz2
coreclr-4349824ed5da261f408042a6a023e080ff3e71db.zip
[ARM32/RyuJIT] Enable passing struct argument that use stack only (#11541)
* [ARM32/RyuJIT] Enable passing struct argument that use stack only Enable passing struct argument when it uses stack only. Cannot pass splitted struct argument that uses stack and register(s) yet. * Remove redundant GC type assignment and fix internal register count - Remove redundant GC type assignment in genPutArgStk - Fix internal register count for ARM32: 2 -> 1
-rw-r--r--src/jit/codegenarmarch.cpp85
-rw-r--r--src/jit/lsraarm.cpp1
-rw-r--r--src/jit/lsraarmarch.cpp5
3 files changed, 68 insertions, 23 deletions
diff --git a/src/jit/codegenarmarch.cpp b/src/jit/codegenarmarch.cpp
index db02d8c7ae..4b262dd5e3 100644
--- a/src/jit/codegenarmarch.cpp
+++ b/src/jit/codegenarmarch.cpp
@@ -583,18 +583,17 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode)
{
noway_assert((source->OperGet() == GT_LCL_VAR) || (source->OperGet() == GT_OBJ));
- NYI_ARM("genPutArgStk: GT_OBJ or GT_LCL_VAR source of struct type");
-
-#ifdef _TARGET_ARM64_
-
var_types targetType = source->TypeGet();
noway_assert(varTypeIsStruct(targetType));
- // We will copy this struct to the stack, possibly using a ldp instruction
- // Setup loReg and hiReg from the internal registers that we reserved in lower.
+ // We will copy this struct to the stack, possibly using a ldp/ldr instruction
+ // in ARM64/ARM
+ // Setup loReg (and hiReg) from the internal registers that we reserved in lower.
//
- regNumber loReg = treeNode->ExtractTempReg();
- regNumber hiReg = treeNode->GetSingleTempReg();
+ regNumber loReg = treeNode->ExtractTempReg();
+#ifdef _TARGET_ARM64_
+ regNumber hiReg = treeNode->GetSingleTempReg();
+#endif // _TARGET_ARM64_
regNumber addrReg = REG_NA;
GenTreeLclVarCommon* varNode = nullptr;
@@ -630,8 +629,10 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode)
// the xor ensures that only one of the two is setup, not both
assert((varNode != nullptr) ^ (addrNode != nullptr));
- BYTE gcPtrs[MAX_ARG_REG_COUNT] = {}; // TYPE_GC_NONE = 0
- unsigned gcPtrCount; // The count of GC pointers in the struct
+ BYTE gcPtrArray[MAX_ARG_REG_COUNT] = {}; // TYPE_GC_NONE = 0
+ BYTE* gcPtrs = gcPtrArray;
+
+ unsigned gcPtrCount; // The count of GC pointers in the struct
int structSize;
bool isHfa;
@@ -652,10 +653,15 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode)
structSize = varDsc->lvSize(); // This yields the roundUp size, but that is fine
// as that is how much stack is allocated for this LclVar
- isHfa = varDsc->lvIsHfa();
+ isHfa = varDsc->lvIsHfa();
+#ifdef _TARGET_ARM64_
gcPtrCount = varDsc->lvStructGcCount;
for (unsigned i = 0; i < gcPtrCount; ++i)
gcPtrs[i] = varDsc->lvGcLayout[i];
+#else // _TARGET_ARM_
+ gcPtrs = treeNode->gtGcPtrs;
+ gcPtrCount = treeNode->gtNumSlots;
+#endif // _TARGET_ARM_
}
else // addrNode is used
{
@@ -665,6 +671,7 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode)
genConsumeAddress(addrNode);
addrReg = addrNode->gtRegNum;
+#ifdef _TARGET_ARM64_
// If addrReg equal to loReg, swap(loReg, hiReg)
// This reduces code complexity by only supporting one addrReg overwrite case
if (loReg == addrReg)
@@ -672,6 +679,7 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode)
loReg = hiReg;
hiReg = addrReg;
}
+#endif // _TARGET_ARM64_
CORINFO_CLASS_HANDLE objClass = source->gtObj.gtClass;
@@ -686,21 +694,24 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode)
{
noway_assert(gcPtrCount == 0);
}
+#ifdef _TARGET_ARM64_
else
{
noway_assert(structSize <= 2 * TARGET_POINTER_SIZE);
}
noway_assert(structSize <= MAX_PASS_MULTIREG_BYTES);
-
- // For a >= 16-byte structSize we will generate a ldp and stp instruction each loop
- // ldp x2, x3, [x0]
- // stp x2, x3, [sp, #16]
+#endif // _TARGET_ARM64_
int remainingSize = structSize;
unsigned structOffset = 0;
unsigned nextIndex = 0;
+#ifdef _TARGET_ARM64_
+ // For a >= 16-byte structSize we will generate a ldp and stp instruction each loop
+ // ldp x2, x3, [x0]
+ // stp x2, x3, [sp, #16]
+
while (remainingSize >= 2 * TARGET_POINTER_SIZE)
{
var_types type0 = compiler->getJitGCType(gcPtrs[nextIndex + 0]);
@@ -733,6 +744,39 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode)
structOffset += (2 * TARGET_POINTER_SIZE);
nextIndex += 2;
}
+#else // _TARGET_ARM_
+ // For a >= 4 byte structSize we will generate a ldr and str instruction each loop
+ // ldr r2, [r0]
+ // str r2, [sp, #16]
+ while (remainingSize >= TARGET_POINTER_SIZE)
+ {
+ var_types type = compiler->getJitGCType(gcPtrs[nextIndex]);
+
+ if (varNode != nullptr)
+ {
+ // Load from our varNumImp source
+ emit->emitIns_R_S(INS_ldr, emitTypeSize(type), loReg, varNumInp, 0);
+ }
+ else
+ {
+ // check for case of destroying the addrRegister while we still need it
+ assert(loReg != addrReg);
+ noway_assert(remainingSize == TARGET_POINTER_SIZE);
+
+ // Load from our address expression source
+ emit->emitIns_R_R_I(INS_ldr, emitTypeSize(type), loReg, addrReg, structOffset);
+ }
+
+ // Emit str instruction to store the register into the outgoing argument area
+ emit->emitIns_S_R(INS_str, emitTypeSize(type), loReg, varNumOut, argOffsetOut);
+ argOffsetOut += TARGET_POINTER_SIZE; // We stored 4-bytes of the struct
+ assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area
+
+ remainingSize -= TARGET_POINTER_SIZE; // We loaded 4-bytes of the struct
+ structOffset += TARGET_POINTER_SIZE;
+ nextIndex += 1;
+ }
+#endif // _TARGET_ARM_
// For a 12-byte structSize we will we will generate two load instructions
// ldr x2, [x0]
@@ -740,13 +784,12 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode)
// str x2, [sp, #16]
// str w3, [sp, #24]
- var_types nextType = compiler->getJitGCType(gcPtrs[nextIndex]);
- emitAttr nextAttr = emitTypeSize(nextType);
-
while (remainingSize > 0)
{
if (remainingSize >= TARGET_POINTER_SIZE)
{
+ var_types nextType = compiler->getJitGCType(gcPtrs[nextIndex]);
+ emitAttr nextAttr = emitTypeSize(nextType);
remainingSize -= TARGET_POINTER_SIZE;
if (varNode != nullptr)
@@ -768,8 +811,6 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode)
structOffset += TARGET_POINTER_SIZE;
nextIndex++;
- nextType = compiler->getJitGCType(gcPtrs[nextIndex]);
- nextAttr = emitTypeSize(nextType);
}
else // (remainingSize < TARGET_POINTER_SIZE)
{
@@ -780,7 +821,7 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode)
assert(varNode == nullptr);
// the left over size is smaller than a pointer and thus can never be a GC type
- assert(varTypeIsGC(nextType) == false);
+ assert(varTypeIsGC(compiler->getJitGCType(gcPtrs[nextIndex])) == false);
var_types loadType = TYP_UINT;
if (loadSize == 1)
@@ -810,8 +851,6 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode)
assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area
}
}
-
-#endif // _TARGET_ARM64_
}
}
}
diff --git a/src/jit/lsraarm.cpp b/src/jit/lsraarm.cpp
index 325abb6ce2..0e033cb661 100644
--- a/src/jit/lsraarm.cpp
+++ b/src/jit/lsraarm.cpp
@@ -740,6 +740,7 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
case GT_PINVOKE_PROLOG:
case GT_JCC:
case GT_MEMORYBARRIER:
+ case GT_OBJ:
info->dstCount = tree->IsValue() ? 1 : 0;
if (kind & (GTK_CONST | GTK_LEAF))
{
diff --git a/src/jit/lsraarmarch.cpp b/src/jit/lsraarmarch.cpp
index f661babc5b..06a44c75d7 100644
--- a/src/jit/lsraarmarch.cpp
+++ b/src/jit/lsraarmarch.cpp
@@ -654,8 +654,13 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* argNode, fgArgTabEntr
}
else
{
+#ifdef _TARGET_ARM64_
// We could use a ldp/stp sequence so we need two internal registers
argNode->gtLsraInfo.internalIntCount = 2;
+#else // _TARGET_ARM_
+ // We could use a ldr/str sequence so we need a internal register
+ argNode->gtLsraInfo.internalIntCount = 1;
+#endif // _TARGET_ARM_
if (putArgChild->OperGet() == GT_OBJ)
{