summaryrefslogtreecommitdiff
path: root/src/jit
diff options
context:
space:
mode:
authorSejong OH <sejooh@microsoft.com>2016-05-12 17:06:45 -0700
committerSejong OH <sejooh@microsoft.com>2016-05-19 12:53:26 -0700
commiteb4c7a280a00b344a9bb4d35172d0a0936ec5bdd (patch)
treefc05fd45912477b5c7ea7262d999e80794326a63 /src/jit
parentc9dff80a222f30d1ff74e9ad1d5b8cdcb953897a (diff)
downloadcoreclr-eb4c7a280a00b344a9bb4d35172d0a0936ec5bdd.tar.gz
coreclr-eb4c7a280a00b344a9bb4d35172d0a0936ec5bdd.tar.bz2
coreclr-eb4c7a280a00b344a9bb4d35172d0a0936ec5bdd.zip
Initialize unused upper bits of Vector3 arguments and return registers for
Linux JIT requires the unused upper bits to be cleared but native compiler doesn't clear the upper bits. This changes clear those bits for Reverse PInvoke arguments and PInvoke return.
Diffstat (limited to 'src/jit')
-rwxr-xr-x[-rw-r--r--]src/jit/codegen.h3
-rwxr-xr-x[-rw-r--r--]src/jit/codegencommon.cpp27
-rwxr-xr-x[-rw-r--r--]src/jit/codegenxarch.cpp70
-rwxr-xr-x[-rw-r--r--]src/jit/ee_il_dll.cpp0
-rwxr-xr-x[-rw-r--r--]src/jit/lower.cpp16
5 files changed, 110 insertions, 6 deletions
diff --git a/src/jit/codegen.h b/src/jit/codegen.h
index 2d7bc65597..ce08d9fa65 100644..100755
--- a/src/jit/codegen.h
+++ b/src/jit/codegen.h
@@ -281,6 +281,9 @@ protected:
RegState *regState);
void genEnregisterIncomingStackArgs();
void genCheckUseBlockInit();
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) && defined(FEATURE_SIMD)
+ void genClearStackVec3ArgUpperBits();
+#endif //FEATURE_UNIX_AMD64_STRUCT_PASSING && FEATURE_SIMD
#if defined(_TARGET_ARM64_)
bool genInstrWithConstant(instruction ins,
diff --git a/src/jit/codegencommon.cpp b/src/jit/codegencommon.cpp
index 0ce079c6d3..42a404afbe 100644..100755
--- a/src/jit/codegencommon.cpp
+++ b/src/jit/codegencommon.cpp
@@ -3931,9 +3931,20 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg,
// RyuJit backend is making another implicit assumption that Vector3 type args when passed in
// registers or on stack, the upper most 4-bytes will be zero.
//
- // TODO-64bit: assumptions 1 and 2 hold within RyuJIT generated code. It is not clear whether
- // these assumptions hold when a Vector3 type arg is passed by native code. Example: PInvoke
- // returning Vector3 type value or RPInvoke passing Vector3 type args.
+ // For P/Invoke return and Reverse P/Invoke argument passing, native compiler doesn't guarantee
+ // that upper 4-bytes of a Vector3 type struct is zero initialized and hence assumption 2 is
+ // invalid.
+ //
+ // RyuJIT x64 Windows: arguments are treated as passed by ref and hence read/written just 12
+ // bytes. In case of Vector3 returns, Caller allocates a zero initialized Vector3 local and
+ // passes it retBuf arg and Callee method writes only 12 bytes to retBuf. For this reason,
+ // there is no need to clear upper 4-bytes of Vector3 type args.
+ //
+ // RyuJIT x64 Unix: arguments are treated as passed by value and read/writen as if TYP_SIMD16.
+ // Vector3 return values are returned two return registers and Caller assembles them into a
+ // single xmm reg. Hence RyuJIT explicitly generates code to clears upper 4-bytes of Vector3
+ // type args in prolog and Vector3 type return value of a call
+
if (varDsc->lvType == TYP_SIMD12)
{
regType = TYP_DOUBLE;
@@ -8519,6 +8530,16 @@ void CodeGen::genFnProlog()
genPrologPadForReJit();
getEmitter()->emitMarkPrologEnd();
}
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) && defined(FEATURE_SIMD)
+ // The unused bits of Vector3 arguments must be cleared
+ // since native compiler doesn't initize the upper bits to zeros.
+ //
+ // TODO-Cleanup: This logic can be implemented in
+ // genFnPrologCalleeRegArgs() for argument registers and
+ // genEnregisterIncomingStackArgs() for stack arguments.
+ genClearStackVec3ArgUpperBits();
+#endif //FEATURE_UNIX_AMD64_STRUCT_PASSING && FEATURE_SIMD
/*-----------------------------------------------------------------------------
* Take care of register arguments first
diff --git a/src/jit/codegenxarch.cpp b/src/jit/codegenxarch.cpp
index b35268e225..25c9f7ce4f 100644..100755
--- a/src/jit/codegenxarch.cpp
+++ b/src/jit/codegenxarch.cpp
@@ -3680,6 +3680,61 @@ void CodeGen::genStructPutArgRepMovs(GenTreePutArgStk* putArgNode, unsigned base
genConsumePutStructArgStk(putArgNode, REG_RDI, REG_RSI, REG_RCX, baseVarNum);
instGen(INS_r_movsb);
}
+
+//------------------------------------------------------------------------
+// If any Vector3 args are on stack and they are not pass-by-ref, the upper 32bits
+// must be cleared to zeroes. The native compiler doesn't clear the upper bits
+// and there is no way to know if the caller is native or not. So, the upper
+// 32 bits of Vector argument on stack are always cleared to zero.
+#ifdef FEATURE_SIMD
+void CodeGen::genClearStackVec3ArgUpperBits()
+{
+#ifdef DEBUG
+ if (verbose)
+ printf("*************** In genClearStackVec3ArgUpperBits()\n");
+#endif
+
+ assert(compiler->compGeneratingProlog);
+
+ unsigned varNum = 0;
+
+ for (unsigned varNum = 0; varNum < compiler->info.compArgsCount; varNum++)
+ {
+ LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
+ assert(varDsc->lvIsParam);
+
+ // Does var has simd12 type?
+ if (varDsc->lvType != TYP_SIMD12)
+ {
+ continue;
+ }
+
+ if (!varDsc->lvIsRegArg)
+ {
+ // Clear the upper 32 bits by mov dword ptr [V_ARG_BASE+0xC], 0
+ getEmitter()->emitIns_S_I(
+ ins_Store(TYP_INT),
+ EA_4BYTE,
+ varNum,
+ genTypeSize(TYP_FLOAT) * 3,
+ 0);
+
+ }
+ else
+ {
+ // Assume that for x64 linux, an argument is fully in registers
+ // or fully on stack.
+ regNumber argReg = varDsc->GetOtherArgReg();
+
+ // Clear the upper 32 bits by two shift instructions.
+ // argReg = argReg << 96
+ getEmitter()->emitIns_R_I(INS_pslldq, emitActualTypeSize(TYP_SIMD12), argReg, 12);
+ // argReg = argReg >> 96
+ getEmitter()->emitIns_R_I(INS_psrldq, emitActualTypeSize(TYP_SIMD12), argReg, 12);
+ }
+ }
+}
+#endif // FEATURE_SIMD
#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
// Generate code for CpObj nodes wich copy structs that have interleaved
@@ -5948,6 +6003,21 @@ void CodeGen::genCallInstruction(GenTreePtr node)
inst_RV_RV(ins_Copy(regType), allocatedReg, returnReg, regType);
}
}
+
+#ifdef FEATURE_SIMD
+ // A Vector3 return value is stored in xmm0 and xmm1.
+ // RyuJIT assumes that the upper unused bits of xmm1 are cleared but
+ // the native compiler doesn't guarantee it.
+ if (returnType == TYP_SIMD12)
+ {
+ returnReg = retTypeDesc->GetABIReturnReg(1);
+ // Clear the upper 32 bits by two shift instructions.
+ // retReg = retReg << 96
+ // retReg = retReg >> 96
+ getEmitter()->emitIns_R_I(INS_pslldq, emitActualTypeSize(TYP_SIMD12), returnReg, 12);
+ getEmitter()->emitIns_R_I(INS_psrldq, emitActualTypeSize(TYP_SIMD12), returnReg, 12);
+ }
+#endif // FEATURE_SIMD
}
else
{
diff --git a/src/jit/ee_il_dll.cpp b/src/jit/ee_il_dll.cpp
index b97c10cca4..b97c10cca4 100644..100755
--- a/src/jit/ee_il_dll.cpp
+++ b/src/jit/ee_il_dll.cpp
diff --git a/src/jit/lower.cpp b/src/jit/lower.cpp
index 1df4f3bbcd..8238ef446f 100644..100755
--- a/src/jit/lower.cpp
+++ b/src/jit/lower.cpp
@@ -694,9 +694,19 @@ void Lowering::LowerNode(GenTreePtr* ppTree, Compiler::fgWalkData* data)
// RyuJit backend is making another implicit assumption that Vector3 type args when passed in
// registers or on stack, the upper most 4-bytes will be zero.
//
- // TODO-64bit: assumptions 1 and 2 hold within RyuJIT generated code. It is not clear whether
- // these assumptions hold when a Vector3 type arg is passed by native code. Example: PInvoke
- // returning Vector3 type value or RPInvoke passing Vector3 type args.
+ // For P/Invoke return and Reverse P/Invoke argument passing, native compiler doesn't guarantee
+ // that upper 4-bytes of a Vector3 type struct is zero initialized and hence assumption 2 is
+ // invalid.
+ //
+ // RyuJIT x64 Windows: arguments are treated as passed by ref and hence read/written just 12
+ // bytes. In case of Vector3 returns, Caller allocates a zero initialized Vector3 local and
+ // passes it retBuf arg and Callee method writes only 12 bytes to retBuf. For this reason,
+ // there is no need to clear upper 4-bytes of Vector3 type args.
+ //
+ // RyuJIT x64 Unix: arguments are treated as passed by value and read/writen as if TYP_SIMD16.
+ // Vector3 return values are returned two return registers and Caller assembles them into a
+ // single xmm reg. Hence RyuJIT explicitly generates code to clears upper 4-bytes of Vector3
+ // type args in prolog and Vector3 type return value of a call
(*ppTree)->gtType = TYP_SIMD16;
#else
NYI("Lowering of TYP_SIMD12 locals");