summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/vm/comdelegate.cpp123
-rw-r--r--src/vm/comdelegate.h1
-rw-r--r--src/vm/i386/stublinkerx86.cpp75
-rw-r--r--src/vm/i386/stublinkerx86.h3
4 files changed, 182 insertions, 20 deletions
diff --git a/src/vm/comdelegate.cpp b/src/vm/comdelegate.cpp
index 7ee85b6..9d92a7c 100644
--- a/src/vm/comdelegate.cpp
+++ b/src/vm/comdelegate.cpp
@@ -237,6 +237,22 @@ int GetNormalizedArgumentSlotIndex(UINT16 offset)
return index;
}
+
+// Node of a directed graph where nodes represent registers / stack slots
+// and edges represent moves of data.
+struct ShuffleGraphNode
+{
+ static const UINT16 NoNode = 0xffff;
+ // Previous node (represents source of data for the register / stack of the current node)
+ UINT16 prev;
+ // Offset of the register / stack slot
+ UINT16 ofs;
+ // Set to true for nodes that are source of data for a destination node
+ UINT8 isSource;
+ // Nodes that are marked are either already processed or don't participate in the shuffling
+ UINT8 isMarked;
+};
+
#endif // UNIX_AMD64_ABI
VOID GenerateShuffleArray(MethodDesc* pInvoke, MethodDesc *pTargetMeth, SArray<ShuffleEntry> * pShuffleEntryArray)
@@ -458,6 +474,7 @@ VOID GenerateShuffleArray(MethodDesc* pInvoke, MethodDesc *pTargetMeth, SArray<S
_ASSERTE(!iteratorDst.HasNextOfs());
}
+
#if defined(UNIX_AMD64_ABI)
// The Unix AMD64 ABI can cause a struct to be passed on stack for the source and in registers for the destination.
// That can cause some arguments that are passed on stack for the destination to be passed in registers in the source.
@@ -465,39 +482,107 @@ VOID GenerateShuffleArray(MethodDesc* pInvoke, MethodDesc *pTargetMeth, SArray<S
// void fn(int, int, int, int, int, struct {int, double}, double, double, double, double, double, double, double, double, double, double)
// For this signature, the shuffle needs to move slots as follows (please note the "forward" movement of xmm registers):
// RDI->RSI, RDX->RCX, R8->RDX, R9->R8, stack[0]->R9, xmm0->xmm1, xmm1->xmm2, ... xmm6->xmm7, xmm7->stack[0], stack[1]->xmm0, stack[2]->stack[1], stack[3]->stack[2]
- // To prevent overwriting of slots before they are moved, we need to sort the move operations.
+ // To prevent overwriting of slots before they are moved, we need to perform the shuffling in correct order
+
+ NewArrayHolder<ShuffleGraphNode> pGraphNodes = new ShuffleGraphNode[argSlots];
+
+ // Initialize the graph array
+ for (unsigned int i = 0; i < argSlots; i++)
+ {
+ pGraphNodes[i].prev = ShuffleGraphNode::NoNode;
+ pGraphNodes[i].isMarked = true;
+ pGraphNodes[i].isSource = false;
+ }
+
+ // Build the directed graph representing register and stack slot shuffling.
+ // The links are directed from destination to source.
+ // During the build also set isSource flag for nodes that are sources of data.
+ // The ones that don't have the isSource flag set are beginnings of non-cyclic
+ // segments of the graph.
+ for (unsigned int i = 0; i < pShuffleEntryArray->GetCount(); i++)
+ {
+ ShuffleEntry entry = (*pShuffleEntryArray)[i];
+
+ int srcIndex = GetNormalizedArgumentSlotIndex(entry.srcofs);
+ int dstIndex = GetNormalizedArgumentSlotIndex(entry.dstofs);
+
+ // Unmark the node to indicate that it was not processed yet
+ pGraphNodes[srcIndex].isMarked = false;
+ // The node contains a register / stack slot that is a source from which we move data to a destination one
+ pGraphNodes[srcIndex].isSource = true;
+ pGraphNodes[srcIndex].ofs = entry.srcofs;
+
+ // Unmark the node to indicate that it was not processed yet
+ pGraphNodes[dstIndex].isMarked = false;
+ // Link to the previous node in the graph (source of data for the current node)
+ pGraphNodes[dstIndex].prev = srcIndex;
+ pGraphNodes[dstIndex].ofs = entry.dstofs;
+ }
- NewArrayHolder<bool> filledSlots = new bool[argSlots];
+ // Now that we've built the graph, clear the array, we will regenerate it from the graph ensuring a proper order of shuffling
+ pShuffleEntryArray->Clear();
- bool reordered;
- do
+ // Add all non-cyclic subgraphs to the target shuffle array and mark their nodes as visited
+ for (unsigned int startIndex = 0; startIndex < argSlots; startIndex++)
{
- reordered = false;
+ unsigned int index = startIndex;
- for (int i = 0; i < argSlots; i++)
+ if (!pGraphNodes[index].isMarked && !pGraphNodes[index].isSource)
{
- filledSlots[i] = false;
+ // This node is not a source, that means it is an end of shuffle chain
+ // Generate shuffle array entries for all nodes in the chain in a correct
+ // order.
+ UINT16 dstOfs = ShuffleEntry::SENTINEL;
+
+ do
+ {
+ pGraphNodes[index].isMarked = true;
+ if (dstOfs != ShuffleEntry::SENTINEL)
+ {
+ entry.srcofs = pGraphNodes[index].ofs;
+ entry.dstofs = dstOfs;
+ pShuffleEntryArray->Append(entry);
+ }
+
+ dstOfs = pGraphNodes[index].ofs;
+ index = pGraphNodes[index].prev;
+ }
+ while (index != ShuffleGraphNode::NoNode);
}
- for (unsigned int i = 0; i < pShuffleEntryArray->GetCount(); i++)
+ }
+
+ // Process all cycles in the graph
+ for (unsigned int startIndex = 0; startIndex < argSlots; startIndex++)
+ {
+ unsigned int index = startIndex;
+
+ if (!pGraphNodes[index].isMarked)
{
- entry = (*pShuffleEntryArray)[i];
+ // This node is part of a new cycle as all non-cyclic parts of the graphs were already visited
- // If the slot that we are moving the argument to was filled in already, we need to move this entry in front
- // of the entry that filled it in.
- if (filledSlots[GetNormalizedArgumentSlotIndex(entry.srcofs)])
+ // Move the first node register / stack slot to a helper reg
+ UINT16 dstOfs = ShuffleEntry::HELPERREG;
+
+ do
{
- unsigned int j;
- for (j = i; (*pShuffleEntryArray)[j].dstofs != entry.srcofs; j--)
- (*pShuffleEntryArray)[j] = (*pShuffleEntryArray)[j - 1];
+ pGraphNodes[index].isMarked = true;
- (*pShuffleEntryArray)[j] = entry;
- reordered = true;
+ entry.srcofs = pGraphNodes[index].ofs;
+ entry.dstofs = dstOfs;
+ pShuffleEntryArray->Append(entry);
+
+ dstOfs = pGraphNodes[index].ofs;
+ index = pGraphNodes[index].prev;
}
+ while (index != startIndex);
- filledSlots[GetNormalizedArgumentSlotIndex(entry.dstofs)] = true;
+ // Move helper reg to the last node register / stack slot
+ entry.srcofs = ShuffleEntry::HELPERREG;
+ entry.dstofs = dstOfs;
+ pShuffleEntryArray->Append(entry);
}
}
- while (reordered);
+
#endif // UNIX_AMD64_ABI
entry.srcofs = ShuffleEntry::SENTINEL;
diff --git a/src/vm/comdelegate.h b/src/vm/comdelegate.h
index 3d5ec8e..330f1f8 100644
--- a/src/vm/comdelegate.h
+++ b/src/vm/comdelegate.h
@@ -194,6 +194,7 @@ struct ShuffleEntry
OFSMASK = 0x7fff, // Mask to get stack offset
OFSREGMASK = 0x1fff, // Mask to get register index
SENTINEL = 0xffff, // Indicates end of shuffle array
+ HELPERREG = 0xcfff, // Use a helper register as source or destination (used to handle cycles in the shuffling)
};
#if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI)
diff --git a/src/vm/i386/stublinkerx86.cpp b/src/vm/i386/stublinkerx86.cpp
index 7020e40..983fc3f 100644
--- a/src/vm/i386/stublinkerx86.cpp
+++ b/src/vm/i386/stublinkerx86.cpp
@@ -1739,6 +1739,49 @@ VOID StubLinkerCPU::X64EmitMovSSToMem(X86Reg Xmmreg, X86Reg baseReg, __int32 ofs
X64EmitMovXmmWorker(0xF3, 0x11, Xmmreg, baseReg, ofs);
}
+VOID StubLinkerCPU::X64EmitMovqRegXmm(X86Reg reg, X86Reg Xmmreg)
+{
+ STANDARD_VM_CONTRACT;
+ X64EmitMovqWorker(0x7e, Xmmreg, reg);
+}
+
+VOID StubLinkerCPU::X64EmitMovqXmmReg(X86Reg Xmmreg, X86Reg reg)
+{
+ STANDARD_VM_CONTRACT;
+ X64EmitMovqWorker(0x6e, Xmmreg, reg);
+}
+
+//-----------------------------------------------------------------------------
+// Helper method for emitting movq between xmm and general purpose reqister
+//-----------------------------------------------------------------------------
+VOID StubLinkerCPU::X64EmitMovqWorker(BYTE opcode, X86Reg Xmmreg, X86Reg reg)
+{
+ BYTE codeBuffer[10];
+ unsigned int nBytes = 0;
+ codeBuffer[nBytes++] = 0x66;
+ BYTE rex = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT;
+ if (reg >= kR8)
+ {
+ rex |= REX_MODRM_RM_EXT;
+ reg = X86RegFromAMD64Reg(reg);
+ }
+ if (Xmmreg >= kXMM8)
+ {
+ rex |= REX_MODRM_REG_EXT;
+ Xmmreg = X86RegFromAMD64Reg(Xmmreg);
+ }
+ codeBuffer[nBytes++] = rex;
+ codeBuffer[nBytes++] = 0x0f;
+ codeBuffer[nBytes++] = opcode;
+ BYTE modrm = static_cast<BYTE>((Xmmreg << 3) | reg);
+ codeBuffer[nBytes++] = 0xC0|modrm;
+
+ _ASSERTE(nBytes <= _countof(codeBuffer));
+
+ // Lastly, emit the encoded bytes
+ EmitBytes(codeBuffer, nBytes);
+}
+
//---------------------------------------------------------------
// Helper method for emitting of XMM from/to memory moves
//---------------------------------------------------------------
@@ -3800,7 +3843,37 @@ VOID StubLinkerCPU::EmitShuffleThunk(ShuffleEntry *pShuffleEntryArray)
#ifdef UNIX_AMD64_ABI
for (ShuffleEntry* pEntry = pShuffleEntryArray; pEntry->srcofs != ShuffleEntry::SENTINEL; pEntry++)
{
- if (pEntry->srcofs & ShuffleEntry::REGMASK)
+ if (pEntry->srcofs == ShuffleEntry::HELPERREG)
+ {
+ if (pEntry->dstofs & ShuffleEntry::REGMASK)
+ {
+ // movq dstReg, xmm8
+ int dstRegIndex = pEntry->dstofs & ShuffleEntry::OFSREGMASK;
+ X64EmitMovqRegXmm(c_argRegs[dstRegIndex], (X86Reg)kXMM8);
+ }
+ else
+ {
+ // movsd [rax + dst], xmm8
+ int dstOffset = (pEntry->dstofs + 1) * sizeof(void*);
+ X64EmitMovSDToMem((X86Reg)kXMM8, SCRATCH_REGISTER_X86REG, dstOffset);
+ }
+ }
+ else if (pEntry->dstofs == ShuffleEntry::HELPERREG)
+ {
+ if (pEntry->srcofs & ShuffleEntry::REGMASK)
+ {
+ // movq xmm8, srcReg
+ int srcRegIndex = pEntry->srcofs & ShuffleEntry::OFSREGMASK;
+ X64EmitMovqXmmReg((X86Reg)kXMM8, c_argRegs[srcRegIndex]);
+ }
+ else
+ {
+ // movsd xmm8, [rax + src]
+ int srcOffset = (pEntry->srcofs + 1) * sizeof(void*);
+ X64EmitMovSDFromMem((X86Reg)(kXMM8), SCRATCH_REGISTER_X86REG, srcOffset);
+ }
+ }
+ else if (pEntry->srcofs & ShuffleEntry::REGMASK)
{
// Source in a general purpose or float register, destination in the same kind of a register or on stack
int srcRegIndex = pEntry->srcofs & ShuffleEntry::OFSREGMASK;
diff --git a/src/vm/i386/stublinkerx86.h b/src/vm/i386/stublinkerx86.h
index 3d79b51..49731a8 100644
--- a/src/vm/i386/stublinkerx86.h
+++ b/src/vm/i386/stublinkerx86.h
@@ -200,8 +200,11 @@ class StubLinkerCPU : public StubLinker
VOID X64EmitMovSDToMem(X86Reg Xmmreg, X86Reg baseReg, __int32 ofs = 0);
VOID X64EmitMovSSFromMem(X86Reg Xmmreg, X86Reg baseReg, __int32 ofs = 0);
VOID X64EmitMovSSToMem(X86Reg Xmmreg, X86Reg baseReg, __int32 ofs = 0);
+ VOID X64EmitMovqRegXmm(X86Reg reg, X86Reg Xmmreg);
+ VOID X64EmitMovqXmmReg(X86Reg Xmmreg, X86Reg reg);
VOID X64EmitMovXmmWorker(BYTE prefix, BYTE opcode, X86Reg Xmmreg, X86Reg baseReg, __int32 ofs = 0);
+ VOID X64EmitMovqWorker(BYTE opcode, X86Reg Xmmreg, X86Reg reg);
#endif
VOID X86EmitZeroOutReg(X86Reg reg);