diff options
author | Hyeongseok Oh <hseok82.oh@samsung.com> | 2017-05-23 01:42:10 +0900 |
---|---|---|
committer | Bruce Forstall <brucefo@microsoft.com> | 2017-05-22 09:42:10 -0700 |
commit | 4349824ed5da261f408042a6a023e080ff3e71db (patch) | |
tree | fa1c58b5ffcf6fda6a1ddbe46b62961fe614c1d5 | |
parent | 5493b8439dca2ada6e912716e035654b16685bb1 (diff) | |
download | coreclr-4349824ed5da261f408042a6a023e080ff3e71db.tar.gz coreclr-4349824ed5da261f408042a6a023e080ff3e71db.tar.bz2 coreclr-4349824ed5da261f408042a6a023e080ff3e71db.zip |
[ARM32/RyuJIT] Enable passing struct argument that use stack only (#11541)
* [ARM32/RyuJIT] Enable passing struct argument that use stack only
Enable passing struct argument when it uses stack only.
Cannot pass splitted struct argument that uses stack and register(s) yet.
* Remove redundant GC type assignment and fix internal register count
- Remove redundant GC type assignment in genPutArgStk
- Fix internal register count for ARM32: 2 -> 1
-rw-r--r-- | src/jit/codegenarmarch.cpp | 85 | ||||
-rw-r--r-- | src/jit/lsraarm.cpp | 1 | ||||
-rw-r--r-- | src/jit/lsraarmarch.cpp | 5 |
3 files changed, 68 insertions, 23 deletions
diff --git a/src/jit/codegenarmarch.cpp b/src/jit/codegenarmarch.cpp index db02d8c7ae..4b262dd5e3 100644 --- a/src/jit/codegenarmarch.cpp +++ b/src/jit/codegenarmarch.cpp @@ -583,18 +583,17 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode) { noway_assert((source->OperGet() == GT_LCL_VAR) || (source->OperGet() == GT_OBJ)); - NYI_ARM("genPutArgStk: GT_OBJ or GT_LCL_VAR source of struct type"); - -#ifdef _TARGET_ARM64_ - var_types targetType = source->TypeGet(); noway_assert(varTypeIsStruct(targetType)); - // We will copy this struct to the stack, possibly using a ldp instruction - // Setup loReg and hiReg from the internal registers that we reserved in lower. + // We will copy this struct to the stack, possibly using a ldp/ldr instruction + // in ARM64/ARM + // Setup loReg (and hiReg) from the internal registers that we reserved in lower. // - regNumber loReg = treeNode->ExtractTempReg(); - regNumber hiReg = treeNode->GetSingleTempReg(); + regNumber loReg = treeNode->ExtractTempReg(); +#ifdef _TARGET_ARM64_ + regNumber hiReg = treeNode->GetSingleTempReg(); +#endif // _TARGET_ARM64_ regNumber addrReg = REG_NA; GenTreeLclVarCommon* varNode = nullptr; @@ -630,8 +629,10 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode) // the xor ensures that only one of the two is setup, not both assert((varNode != nullptr) ^ (addrNode != nullptr)); - BYTE gcPtrs[MAX_ARG_REG_COUNT] = {}; // TYPE_GC_NONE = 0 - unsigned gcPtrCount; // The count of GC pointers in the struct + BYTE gcPtrArray[MAX_ARG_REG_COUNT] = {}; // TYPE_GC_NONE = 0 + BYTE* gcPtrs = gcPtrArray; + + unsigned gcPtrCount; // The count of GC pointers in the struct int structSize; bool isHfa; @@ -652,10 +653,15 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode) structSize = varDsc->lvSize(); // This yields the roundUp size, but that is fine // as that is how much stack is allocated for this LclVar - isHfa = varDsc->lvIsHfa(); + isHfa = varDsc->lvIsHfa(); +#ifdef _TARGET_ARM64_ gcPtrCount = varDsc->lvStructGcCount; for (unsigned i = 0; i < gcPtrCount; ++i) gcPtrs[i] = varDsc->lvGcLayout[i]; +#else // _TARGET_ARM_ + gcPtrs = treeNode->gtGcPtrs; + gcPtrCount = treeNode->gtNumSlots; +#endif // _TARGET_ARM_ } else // addrNode is used { @@ -665,6 +671,7 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode) genConsumeAddress(addrNode); addrReg = addrNode->gtRegNum; +#ifdef _TARGET_ARM64_ // If addrReg equal to loReg, swap(loReg, hiReg) // This reduces code complexity by only supporting one addrReg overwrite case if (loReg == addrReg) @@ -672,6 +679,7 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode) loReg = hiReg; hiReg = addrReg; } +#endif // _TARGET_ARM64_ CORINFO_CLASS_HANDLE objClass = source->gtObj.gtClass; @@ -686,21 +694,24 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode) { noway_assert(gcPtrCount == 0); } +#ifdef _TARGET_ARM64_ else { noway_assert(structSize <= 2 * TARGET_POINTER_SIZE); } noway_assert(structSize <= MAX_PASS_MULTIREG_BYTES); - - // For a >= 16-byte structSize we will generate a ldp and stp instruction each loop - // ldp x2, x3, [x0] - // stp x2, x3, [sp, #16] +#endif // _TARGET_ARM64_ int remainingSize = structSize; unsigned structOffset = 0; unsigned nextIndex = 0; +#ifdef _TARGET_ARM64_ + // For a >= 16-byte structSize we will generate a ldp and stp instruction each loop + // ldp x2, x3, [x0] + // stp x2, x3, [sp, #16] + while (remainingSize >= 2 * TARGET_POINTER_SIZE) { var_types type0 = compiler->getJitGCType(gcPtrs[nextIndex + 0]); @@ -733,6 +744,39 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode) structOffset += (2 * TARGET_POINTER_SIZE); nextIndex += 2; } +#else // _TARGET_ARM_ + // For a >= 4 byte structSize we will generate a ldr and str instruction each loop + // ldr r2, [r0] + // str r2, [sp, #16] + while (remainingSize >= TARGET_POINTER_SIZE) + { + var_types type = compiler->getJitGCType(gcPtrs[nextIndex]); + + if (varNode != nullptr) + { + // Load from our varNumImp source + emit->emitIns_R_S(INS_ldr, emitTypeSize(type), loReg, varNumInp, 0); + } + else + { + // check for case of destroying the addrRegister while we still need it + assert(loReg != addrReg); + noway_assert(remainingSize == TARGET_POINTER_SIZE); + + // Load from our address expression source + emit->emitIns_R_R_I(INS_ldr, emitTypeSize(type), loReg, addrReg, structOffset); + } + + // Emit str instruction to store the register into the outgoing argument area + emit->emitIns_S_R(INS_str, emitTypeSize(type), loReg, varNumOut, argOffsetOut); + argOffsetOut += TARGET_POINTER_SIZE; // We stored 4-bytes of the struct + assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area + + remainingSize -= TARGET_POINTER_SIZE; // We loaded 4-bytes of the struct + structOffset += TARGET_POINTER_SIZE; + nextIndex += 1; + } +#endif // _TARGET_ARM_ // For a 12-byte structSize we will we will generate two load instructions // ldr x2, [x0] @@ -740,13 +784,12 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode) // str x2, [sp, #16] // str w3, [sp, #24] - var_types nextType = compiler->getJitGCType(gcPtrs[nextIndex]); - emitAttr nextAttr = emitTypeSize(nextType); - while (remainingSize > 0) { if (remainingSize >= TARGET_POINTER_SIZE) { + var_types nextType = compiler->getJitGCType(gcPtrs[nextIndex]); + emitAttr nextAttr = emitTypeSize(nextType); remainingSize -= TARGET_POINTER_SIZE; if (varNode != nullptr) @@ -768,8 +811,6 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode) structOffset += TARGET_POINTER_SIZE; nextIndex++; - nextType = compiler->getJitGCType(gcPtrs[nextIndex]); - nextAttr = emitTypeSize(nextType); } else // (remainingSize < TARGET_POINTER_SIZE) { @@ -780,7 +821,7 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode) assert(varNode == nullptr); // the left over size is smaller than a pointer and thus can never be a GC type - assert(varTypeIsGC(nextType) == false); + assert(varTypeIsGC(compiler->getJitGCType(gcPtrs[nextIndex])) == false); var_types loadType = TYP_UINT; if (loadSize == 1) @@ -810,8 +851,6 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode) assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area } } - -#endif // _TARGET_ARM64_ } } } diff --git a/src/jit/lsraarm.cpp b/src/jit/lsraarm.cpp index 325abb6ce2..0e033cb661 100644 --- a/src/jit/lsraarm.cpp +++ b/src/jit/lsraarm.cpp @@ -740,6 +740,7 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) case GT_PINVOKE_PROLOG: case GT_JCC: case GT_MEMORYBARRIER: + case GT_OBJ: info->dstCount = tree->IsValue() ? 1 : 0; if (kind & (GTK_CONST | GTK_LEAF)) { diff --git a/src/jit/lsraarmarch.cpp b/src/jit/lsraarmarch.cpp index f661babc5b..06a44c75d7 100644 --- a/src/jit/lsraarmarch.cpp +++ b/src/jit/lsraarmarch.cpp @@ -654,8 +654,13 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* argNode, fgArgTabEntr } else { +#ifdef _TARGET_ARM64_ // We could use a ldp/stp sequence so we need two internal registers argNode->gtLsraInfo.internalIntCount = 2; +#else // _TARGET_ARM_ + // We could use a ldr/str sequence so we need a internal register + argNode->gtLsraInfo.internalIntCount = 1; +#endif // _TARGET_ARM_ if (putArgChild->OperGet() == GT_OBJ) { |