summaryrefslogtreecommitdiff
path: root/src/jit
diff options
context:
space:
mode:
Diffstat (limited to 'src/jit')
-rwxr-xr-x[-rw-r--r--]src/jit/codegen.h3
-rwxr-xr-x[-rw-r--r--]src/jit/codegencommon.cpp27
-rwxr-xr-x[-rw-r--r--]src/jit/codegenxarch.cpp70
-rwxr-xr-x[-rw-r--r--]src/jit/ee_il_dll.cpp0
-rwxr-xr-x[-rw-r--r--]src/jit/lower.cpp16
5 files changed, 110 insertions, 6 deletions
diff --git a/src/jit/codegen.h b/src/jit/codegen.h
index 2d7bc65597..ce08d9fa65 100644..100755
--- a/src/jit/codegen.h
+++ b/src/jit/codegen.h
@@ -281,6 +281,9 @@ protected:
RegState *regState);
void genEnregisterIncomingStackArgs();
void genCheckUseBlockInit();
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) && defined(FEATURE_SIMD)
+ void genClearStackVec3ArgUpperBits();
+#endif //FEATURE_UNIX_AMD64_STRUCT_PASSING && FEATURE_SIMD
#if defined(_TARGET_ARM64_)
bool genInstrWithConstant(instruction ins,
diff --git a/src/jit/codegencommon.cpp b/src/jit/codegencommon.cpp
index 0ce079c6d3..42a404afbe 100644..100755
--- a/src/jit/codegencommon.cpp
+++ b/src/jit/codegencommon.cpp
@@ -3931,9 +3931,20 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg,
// RyuJit backend is making another implicit assumption that Vector3 type args when passed in
// registers or on stack, the upper most 4-bytes will be zero.
//
- // TODO-64bit: assumptions 1 and 2 hold within RyuJIT generated code. It is not clear whether
- // these assumptions hold when a Vector3 type arg is passed by native code. Example: PInvoke
- // returning Vector3 type value or RPInvoke passing Vector3 type args.
+ // For P/Invoke return and Reverse P/Invoke argument passing, native compiler doesn't guarantee
+ // that upper 4-bytes of a Vector3 type struct is zero initialized and hence assumption 2 is
+ // invalid.
+ //
+ // RyuJIT x64 Windows: arguments are treated as passed by ref and hence read/written just 12
+ // bytes. In case of Vector3 returns, Caller allocates a zero initialized Vector3 local and
+ // passes it retBuf arg and Callee method writes only 12 bytes to retBuf. For this reason,
+ // there is no need to clear upper 4-bytes of Vector3 type args.
+ //
+ // RyuJIT x64 Unix: arguments are treated as passed by value and read/writen as if TYP_SIMD16.
+ // Vector3 return values are returned two return registers and Caller assembles them into a
+ // single xmm reg. Hence RyuJIT explicitly generates code to clears upper 4-bytes of Vector3
+ // type args in prolog and Vector3 type return value of a call
+
if (varDsc->lvType == TYP_SIMD12)
{
regType = TYP_DOUBLE;
@@ -8519,6 +8530,16 @@ void CodeGen::genFnProlog()
genPrologPadForReJit();
getEmitter()->emitMarkPrologEnd();
}
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) && defined(FEATURE_SIMD)
+ // The unused bits of Vector3 arguments must be cleared
+ // since native compiler doesn't initize the upper bits to zeros.
+ //
+ // TODO-Cleanup: This logic can be implemented in
+ // genFnPrologCalleeRegArgs() for argument registers and
+ // genEnregisterIncomingStackArgs() for stack arguments.
+ genClearStackVec3ArgUpperBits();
+#endif //FEATURE_UNIX_AMD64_STRUCT_PASSING && FEATURE_SIMD
/*-----------------------------------------------------------------------------
* Take care of register arguments first
diff --git a/src/jit/codegenxarch.cpp b/src/jit/codegenxarch.cpp
index b35268e225..25c9f7ce4f 100644..100755
--- a/src/jit/codegenxarch.cpp
+++ b/src/jit/codegenxarch.cpp
@@ -3680,6 +3680,61 @@ void CodeGen::genStructPutArgRepMovs(GenTreePutArgStk* putArgNode, unsigned base
genConsumePutStructArgStk(putArgNode, REG_RDI, REG_RSI, REG_RCX, baseVarNum);
instGen(INS_r_movsb);
}
+
+//------------------------------------------------------------------------
+// If any Vector3 args are on stack and they are not pass-by-ref, the upper 32bits
+// must be cleared to zeroes. The native compiler doesn't clear the upper bits
+// and there is no way to know if the caller is native or not. So, the upper
+// 32 bits of Vector argument on stack are always cleared to zero.
+#ifdef FEATURE_SIMD
+void CodeGen::genClearStackVec3ArgUpperBits()
+{
+#ifdef DEBUG
+ if (verbose)
+ printf("*************** In genClearStackVec3ArgUpperBits()\n");
+#endif
+
+ assert(compiler->compGeneratingProlog);
+
+ unsigned varNum = 0;
+
+ for (unsigned varNum = 0; varNum < compiler->info.compArgsCount; varNum++)
+ {
+ LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
+ assert(varDsc->lvIsParam);
+
+ // Does var has simd12 type?
+ if (varDsc->lvType != TYP_SIMD12)
+ {
+ continue;
+ }
+
+ if (!varDsc->lvIsRegArg)
+ {
+ // Clear the upper 32 bits by mov dword ptr [V_ARG_BASE+0xC], 0
+ getEmitter()->emitIns_S_I(
+ ins_Store(TYP_INT),
+ EA_4BYTE,
+ varNum,
+ genTypeSize(TYP_FLOAT) * 3,
+ 0);
+
+ }
+ else
+ {
+ // Assume that for x64 linux, an argument is fully in registers
+ // or fully on stack.
+ regNumber argReg = varDsc->GetOtherArgReg();
+
+ // Clear the upper 32 bits by two shift instructions.
+ // argReg = argReg << 96
+ getEmitter()->emitIns_R_I(INS_pslldq, emitActualTypeSize(TYP_SIMD12), argReg, 12);
+ // argReg = argReg >> 96
+ getEmitter()->emitIns_R_I(INS_psrldq, emitActualTypeSize(TYP_SIMD12), argReg, 12);
+ }
+ }
+}
+#endif // FEATURE_SIMD
#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
// Generate code for CpObj nodes wich copy structs that have interleaved
@@ -5948,6 +6003,21 @@ void CodeGen::genCallInstruction(GenTreePtr node)
inst_RV_RV(ins_Copy(regType), allocatedReg, returnReg, regType);
}
}
+
+#ifdef FEATURE_SIMD
+ // A Vector3 return value is stored in xmm0 and xmm1.
+ // RyuJIT assumes that the upper unused bits of xmm1 are cleared but
+ // the native compiler doesn't guarantee it.
+ if (returnType == TYP_SIMD12)
+ {
+ returnReg = retTypeDesc->GetABIReturnReg(1);
+ // Clear the upper 32 bits by two shift instructions.
+ // retReg = retReg << 96
+ // retReg = retReg >> 96
+ getEmitter()->emitIns_R_I(INS_pslldq, emitActualTypeSize(TYP_SIMD12), returnReg, 12);
+ getEmitter()->emitIns_R_I(INS_psrldq, emitActualTypeSize(TYP_SIMD12), returnReg, 12);
+ }
+#endif // FEATURE_SIMD
}
else
{
diff --git a/src/jit/ee_il_dll.cpp b/src/jit/ee_il_dll.cpp
index b97c10cca4..b97c10cca4 100644..100755
--- a/src/jit/ee_il_dll.cpp
+++ b/src/jit/ee_il_dll.cpp
diff --git a/src/jit/lower.cpp b/src/jit/lower.cpp
index 1df4f3bbcd..8238ef446f 100644..100755
--- a/src/jit/lower.cpp
+++ b/src/jit/lower.cpp
@@ -694,9 +694,19 @@ void Lowering::LowerNode(GenTreePtr* ppTree, Compiler::fgWalkData* data)
// RyuJit backend is making another implicit assumption that Vector3 type args when passed in
// registers or on stack, the upper most 4-bytes will be zero.
//
- // TODO-64bit: assumptions 1 and 2 hold within RyuJIT generated code. It is not clear whether
- // these assumptions hold when a Vector3 type arg is passed by native code. Example: PInvoke
- // returning Vector3 type value or RPInvoke passing Vector3 type args.
+ // For P/Invoke return and Reverse P/Invoke argument passing, native compiler doesn't guarantee
+ // that upper 4-bytes of a Vector3 type struct is zero initialized and hence assumption 2 is
+ // invalid.
+ //
+ // RyuJIT x64 Windows: arguments are treated as passed by ref and hence read/written just 12
+ // bytes. In case of Vector3 returns, Caller allocates a zero initialized Vector3 local and
+ // passes it retBuf arg and Callee method writes only 12 bytes to retBuf. For this reason,
+ // there is no need to clear upper 4-bytes of Vector3 type args.
+ //
+ // RyuJIT x64 Unix: arguments are treated as passed by value and read/writen as if TYP_SIMD16.
+ // Vector3 return values are returned two return registers and Caller assembles them into a
+ // single xmm reg. Hence RyuJIT explicitly generates code to clears upper 4-bytes of Vector3
+ // type args in prolog and Vector3 type return value of a call
(*ppTree)->gtType = TYP_SIMD16;
#else
NYI("Lowering of TYP_SIMD12 locals");