From 41c02957420988f42275c789e06de32a3a3913b9 Mon Sep 17 00:00:00 2001 From: Jan Vorlicek Date: Tue, 13 Mar 2018 19:56:34 +0100 Subject: Fix shuffling thunk for Unix AMD64 (#16904) The shufflign thunk was generated incorrectly for some edge cases when a struct was passed in a register or a pair of registers in the destination, but on stack in the source. This change implements a new algorithm that ensures that argument slots are never overwritten before their current value is moved out. It also adds an extensive regression test that checks various interesting combinations of arguments that were causing issues before. --- src/vm/comdelegate.cpp | 148 +++++++++++++++++++++++++----------------- src/vm/i386/stublinkerx86.cpp | 56 +++++++++++----- 2 files changed, 131 insertions(+), 73 deletions(-) (limited to 'src') diff --git a/src/vm/comdelegate.cpp b/src/vm/comdelegate.cpp index bd3a46cbd3..173a8fe6d1 100644 --- a/src/vm/comdelegate.cpp +++ b/src/vm/comdelegate.cpp @@ -214,6 +214,32 @@ public: #endif +#if defined(UNIX_AMD64_ABI) && defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) +// Return an index of argument slot. First indices are reserved for general purpose registers, +// the following ones for float registers and then the rest for stack slots. +// This index is independent of how many registers are actually used to pass arguments. +int GetNormalizedArgumentSlotIndex(UINT16 offset) +{ + int index; + + if (offset & ShuffleEntry::FPREGMASK) + { + index = NUM_ARGUMENT_REGISTERS + (offset & ShuffleEntry::OFSREGMASK); + } + else if (offset & ShuffleEntry::REGMASK) + { + index = offset & ShuffleEntry::OFSREGMASK; + } + else + { + // stack slot + index = NUM_ARGUMENT_REGISTERS + NUM_FLOAT_ARGUMENT_REGISTERS + (offset & ShuffleEntry::OFSMASK); + } + + return index; +} +#endif // UNIX_AMD64_ABI && FEATURE_UNIX_AMD64_STRUCT_PASSING + VOID GenerateShuffleArray(MethodDesc* pInvoke, MethodDesc *pTargetMeth, SArray * pShuffleEntryArray) { STANDARD_VM_CONTRACT; @@ -352,6 +378,10 @@ VOID GenerateShuffleArray(MethodDesc* pInvoke, MethodDesc *pTargetMeth, SArrayAppend(entry); } @@ -392,77 +421,80 @@ VOID GenerateShuffleArray(MethodDesc* pInvoke, MethodDesc *pTargetMeth, SArrayAppend(entry); } -#if defined(UNIX_AMD64_ABI) && defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) - // The shuffle entries are produced in two passes on Unix AMD64. The first pass generates shuffle entries for - // all cases except of shuffling struct argument from stack to registers, which is performed in the second pass - // The reason is that if such structure argument contained floating point field and it was followed by a - // floating point argument, generating code for transferring the structure from stack into registers would - // overwrite the xmm register of the floating point argument before it could actually be shuffled. - // For example, consider this case: - // struct S { int x; float y; }; - // void fn(long a, long b, long c, long d, long e, S f, float g); - // src: rdi = this, rsi = a, rdx = b, rcx = c, r8 = d, r9 = e, stack: f, xmm0 = g - // dst: rdi = a, rsi = b, rdx = c, rcx = d, r8 = e, r9 = S.x, xmm0 = s.y, xmm1 = g - for (int pass = 0; pass < 2; pass++) -#endif // UNIX_AMD64_ABI && FEATURE_UNIX_AMD64_STRUCT_PASSING + // Iterate all the regular arguments. mapping source registers and stack locations to the corresponding + // destination locations. + while ((ofsSrc = sArgPlacerSrc.GetNextOffset()) != TransitionBlock::InvalidOffset) { - // Iterate all the regular arguments. mapping source registers and stack locations to the corresponding - // destination locations. - while ((ofsSrc = sArgPlacerSrc.GetNextOffset()) != TransitionBlock::InvalidOffset) + ofsDst = sArgPlacerDst.GetNextOffset(); + + // Find the argument location mapping for both source and destination signature. A single argument can + // occupy a floating point register, a general purpose register, a pair of registers of any kind or + // a stack slot. + sArgPlacerSrc.GetArgLoc(ofsSrc, &sArgSrc); + sArgPlacerDst.GetArgLoc(ofsDst, &sArgDst); + + ShuffleIterator iteratorSrc(&sArgSrc); + ShuffleIterator iteratorDst(&sArgDst); + + // Shuffle each slot in the argument (register or stack slot) from source to destination. + while (iteratorSrc.HasNextOfs()) { - ofsDst = sArgPlacerDst.GetNextOffset(); + // Locate the next slot to shuffle in the source and destination and encode the transfer into a + // shuffle entry. + entry.srcofs = iteratorSrc.GetNextOfs(); + entry.dstofs = iteratorDst.GetNextOfs(); + + // Only emit this entry if it's not a no-op (i.e. the source and destination locations are + // different). + if (entry.srcofs != entry.dstofs) + pShuffleEntryArray->Append(entry); + } + + // We should have run out of slots to shuffle in the destination at the same time as the source. + _ASSERTE(!iteratorDst.HasNextOfs()); + } #if defined(UNIX_AMD64_ABI) && defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) - bool shuffleStructFromStackToRegs = (ofsSrc != TransitionBlock::StructInRegsOffset) && (ofsDst == TransitionBlock::StructInRegsOffset); - if (((pass == 0) && shuffleStructFromStackToRegs) || - ((pass == 1) && !shuffleStructFromStackToRegs)) - { - continue; - } -#endif // UNIX_AMD64_ABI && FEATURE_UNIX_AMD64_STRUCT_PASSING - // Find the argument location mapping for both source and destination signature. A single argument can - // occupy a floating point register (in which case we don't need to do anything, they're not shuffled) - // or some combination of general registers and the stack. - sArgPlacerSrc.GetArgLoc(ofsSrc, &sArgSrc); - sArgPlacerDst.GetArgLoc(ofsDst, &sArgDst); + // The Unix AMD64 ABI can cause a struct to be passed on stack for the source and in registers for the destination. + // That can cause some arguments that are passed on stack for the destination to be passed in registers in the source. + // An extreme example of that is e.g.: + // void fn(int, int, int, int, int, struct {int, double}, double, double, double, double, double, double, double, double, double, double) + // For this signature, the shuffle needs to move slots as follows (please note the "forward" movement of xmm registers): + // RDI->RSI, RDX->RCX, R8->RDX, R9->R8, stack[0]->R9, xmm0->xmm1, xmm1->xmm2, ... xmm6->xmm7, xmm7->stack[0], stack[1]->xmm0, stack[2]->stack[1], stack[3]->stack[2] + // To prevent overwriting of slots before they are moved, we need to sort the move operations. - ShuffleIterator iteratorSrc(&sArgSrc); - ShuffleIterator iteratorDst(&sArgDst); + NewArrayHolder filledSlots = new bool[argSlots]; - // Shuffle each slot in the argument (register or stack slot) from source to destination. - while (iteratorSrc.HasNextOfs()) - { - // Locate the next slot to shuffle in the source and destination and encode the transfer into a - // shuffle entry. - entry.srcofs = iteratorSrc.GetNextOfs(); - entry.dstofs = iteratorDst.GetNextOfs(); - - // Only emit this entry if it's not a no-op (i.e. the source and destination locations are - // different). - if (entry.srcofs != entry.dstofs) - pShuffleEntryArray->Append(entry); - } + bool reordered; + do + { + reordered = false; - // We should have run out of slots to shuffle in the destination at the same time as the source. - _ASSERTE(!iteratorDst.HasNextOfs()); + for (int i = 0; i < argSlots; i++) + { + filledSlots[i] = false; } -#if defined(UNIX_AMD64_ABI) && defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) - if (pass == 0) + for (int i = 0; i < pShuffleEntryArray->GetCount(); i++) { - // Reset the iterator for the 2nd pass - sSigSrc.Reset(); - sSigDst.Reset(); + entry = (*pShuffleEntryArray)[i]; - sArgPlacerSrc = ArgIterator(&sSigSrc); - sArgPlacerDst = ArgIterator(&sSigDst); - - if (sSigDst.HasThis()) + // If the slot that we are moving the argument to was filled in already, we need to move this entry in front + // of the entry that filled it in. + if (filledSlots[GetNormalizedArgumentSlotIndex(entry.srcofs)]) { - sArgPlacerSrc.GetNextOffset(); + int j; + for (j = i; (*pShuffleEntryArray)[j].dstofs != entry.srcofs; j--) + (*pShuffleEntryArray)[j] = (*pShuffleEntryArray)[j - 1]; + + (*pShuffleEntryArray)[j] = entry; + reordered = true; } + + filledSlots[GetNormalizedArgumentSlotIndex(entry.dstofs)] = true; } -#endif // UNIX_AMD64_ABI && FEATURE_UNIX_AMD64_STRUCT_PASSING } + while (reordered); +#endif // UNIX_AMD64_ABI && FEATURE_UNIX_AMD64_STRUCT_PASSING entry.srcofs = ShuffleEntry::SENTINEL; entry.dstofs = 0; diff --git a/src/vm/i386/stublinkerx86.cpp b/src/vm/i386/stublinkerx86.cpp index 28a012e178..ff3ec48d60 100644 --- a/src/vm/i386/stublinkerx86.cpp +++ b/src/vm/i386/stublinkerx86.cpp @@ -3834,28 +3834,56 @@ VOID StubLinkerCPU::EmitShuffleThunk(ShuffleEntry *pShuffleEntryArray) { if (pEntry->srcofs & ShuffleEntry::REGMASK) { - // If source is present in register then destination must also be a register - _ASSERTE(pEntry->dstofs & ShuffleEntry::REGMASK); - // Both the srcofs and dstofs must be of the same kind of registers - float or general purpose. - _ASSERTE((pEntry->dstofs & ShuffleEntry::FPREGMASK) == (pEntry->srcofs & ShuffleEntry::FPREGMASK)); - - int dstRegIndex = pEntry->dstofs & ShuffleEntry::OFSREGMASK; + // Source in a general purpose or float register, destination in the same kind of a register or on stack int srcRegIndex = pEntry->srcofs & ShuffleEntry::OFSREGMASK; - if (pEntry->srcofs & ShuffleEntry::FPREGMASK) + if (pEntry->dstofs & ShuffleEntry::REGMASK) { - // movdqa dstReg, srcReg - X64EmitMovXmmXmm((X86Reg)(kXMM0 + dstRegIndex), (X86Reg)(kXMM0 + srcRegIndex)); + // Source in register, destination in register + + // Both the srcofs and dstofs must be of the same kind of registers - float or general purpose. + _ASSERTE((pEntry->dstofs & ShuffleEntry::FPREGMASK) == (pEntry->srcofs & ShuffleEntry::FPREGMASK)); + int dstRegIndex = pEntry->dstofs & ShuffleEntry::OFSREGMASK; + + if (pEntry->srcofs & ShuffleEntry::FPREGMASK) + { + // movdqa dstReg, srcReg + X64EmitMovXmmXmm((X86Reg)(kXMM0 + dstRegIndex), (X86Reg)(kXMM0 + srcRegIndex)); + } + else + { + // mov dstReg, srcReg + X86EmitMovRegReg(c_argRegs[dstRegIndex], c_argRegs[srcRegIndex]); + } } else { - // mov dstReg, srcReg - X86EmitMovRegReg(c_argRegs[dstRegIndex], c_argRegs[srcRegIndex]); + // Source in register, destination on stack + int dstOffset = (pEntry->dstofs + 1) * sizeof(void*); + + if (pEntry->srcofs & ShuffleEntry::FPREGMASK) + { + if (pEntry->dstofs & ShuffleEntry::FPSINGLEMASK) + { + // movss [rax + dst], srcReg + X64EmitMovSSToMem((X86Reg)(kXMM0 + srcRegIndex), SCRATCH_REGISTER_X86REG, dstOffset); + } + else + { + // movsd [rax + dst], srcReg + X64EmitMovSDToMem((X86Reg)(kXMM0 + srcRegIndex), SCRATCH_REGISTER_X86REG, dstOffset); + } + } + else + { + // mov [rax + dst], srcReg + X86EmitIndexRegStore (SCRATCH_REGISTER_X86REG, dstOffset, c_argRegs[srcRegIndex]); + } } } else if (pEntry->dstofs & ShuffleEntry::REGMASK) { - // source must be on the stack + // Source on stack, destination in register _ASSERTE(!(pEntry->srcofs & ShuffleEntry::REGMASK)); int dstRegIndex = pEntry->dstofs & ShuffleEntry::OFSREGMASK; @@ -3882,10 +3910,8 @@ VOID StubLinkerCPU::EmitShuffleThunk(ShuffleEntry *pShuffleEntryArray) } else { - // source must be on the stack + // Source on stack, destination on stack _ASSERTE(!(pEntry->srcofs & ShuffleEntry::REGMASK)); - - // dest must be on the stack _ASSERTE(!(pEntry->dstofs & ShuffleEntry::REGMASK)); // mov r10, [rax + src] -- cgit v1.2.3