diff options
author | Pat Gavlin <pagavlin@microsoft.com> | 2016-09-09 12:26:44 -0700 |
---|---|---|
committer | Pat Gavlin <pagavlin@microsoft.com> | 2016-09-09 12:26:44 -0700 |
commit | 7dc24537a059284db2ba55f77458e6e4c78c7f43 (patch) | |
tree | 1d691628a33104c9b8996469a30b4de50539506b /src/jit | |
parent | 0224d864af72cc2fa414bf7daedeb6d559cb1140 (diff) | |
download | coreclr-7dc24537a059284db2ba55f77458e6e4c78c7f43.tar.gz coreclr-7dc24537a059284db2ba55f77458e6e4c78c7f43.tar.bz2 coreclr-7dc24537a059284db2ba55f77458e6e4c78c7f43.zip |
Do not use movsq on x86.
This instruction is only available when targeting amd64.
Diffstat (limited to 'src/jit')
-rw-r--r-- | src/jit/codegenxarch.cpp | 36 | ||||
-rw-r--r-- | src/jit/instr.h | 12 | ||||
-rw-r--r-- | src/jit/instrsxarch.h | 16 |
3 files changed, 38 insertions, 26 deletions
diff --git a/src/jit/codegenxarch.cpp b/src/jit/codegenxarch.cpp index 3f0534840b..17967cbba7 100644 --- a/src/jit/codegenxarch.cpp +++ b/src/jit/codegenxarch.cpp @@ -3972,7 +3972,7 @@ void CodeGen::genClearStackVec3ArgUpperBits() // Generate code for CpObj nodes wich copy structs that have interleaved // GC pointers. -// This will generate a sequence of movsq instructions for the cases of non-gc members +// This will generate a sequence of movs{d,q} instructions for the cases of non-gc members // and calls to the BY_REF_ASSIGN helper otherwise. void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode) { @@ -4009,7 +4009,7 @@ void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode) bool dstOnStack = dstAddr->OperIsLocalAddr(); #ifdef DEBUG - bool isRepMovsqUsed = false; + bool isRepMovsPtrUsed = false; assert(!dstAddr->isContained()); @@ -4066,14 +4066,14 @@ void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode) { #ifdef DEBUG // If the destination of the CpObj is on the stack - // make sure we allocated RCX to emit rep movsq. + // make sure we allocated RCX to emit rep movs{d,q}. regNumber tmpReg = genRegNumFromMask(cpObjNode->gtRsvdRegs & RBM_ALLINT); assert(tmpReg == REG_RCX); - isRepMovsqUsed = true; + isRepMovsPtrUsed = true; #endif // DEBUG getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, REG_RCX, slots); - instGen(INS_r_movsq); + instGen(INS_r_movs_ptr); } else { @@ -4081,7 +4081,7 @@ void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode) // emit a rep movsq instruction. while (slots > 0) { - instGen(INS_movsq); + instGen(INS_movs_ptr); slots--; } } @@ -4097,7 +4097,7 @@ void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode) switch (gcPtrs[i]) { case TYPE_GC_NONE: - // Let's see if we can use rep movsq instead of a sequence of movsq instructions + // Let's see if we can use rep movs{d,q} instead of a sequence of movs{d,q} instructions // to save cycles and code size. { unsigned nonGcSlotCount = 0; @@ -4109,12 +4109,12 @@ void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode) } while (i < slots && gcPtrs[i] == TYPE_GC_NONE); // If we have a very small contiguous non-gc region, it's better just to - // emit a sequence of movsq instructions + // emit a sequence of movs{d,q} instructions if (nonGcSlotCount < CPOBJ_NONGC_SLOTS_LIMIT) { while (nonGcSlotCount > 0) { - instGen(INS_movsq); + instGen(INS_movs_ptr); nonGcSlotCount--; } } @@ -4122,13 +4122,13 @@ void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode) { #ifdef DEBUG // Otherwise, we can save code-size and improve CQ by emitting - // rep movsq + // rep movs{d,q} regNumber tmpReg = genRegNumFromMask(cpObjNode->gtRsvdRegs & RBM_ALLINT); assert(tmpReg == REG_RCX); - isRepMovsqUsed = true; + isRepMovsPtrUsed = true; #endif // DEBUG getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, REG_RCX, nonGcSlotCount); - instGen(INS_r_movsq); + instGen(INS_r_movs_ptr); } } break; @@ -8816,7 +8816,7 @@ void CodeGen::genPutStructArgStk(GenTreePtr treeNode, unsigned baseVarNum) switch (gcPtrs[i]) { case TYPE_GC_NONE: - // Let's see if we can use rep movsq instead of a sequence of movsq instructions + // Let's see if we can use rep movs{d,q} instead of a sequence of movs{d,q} instructions // to save cycles and code size. { unsigned nonGcSlotCount = 0; @@ -8828,13 +8828,13 @@ void CodeGen::genPutStructArgStk(GenTreePtr treeNode, unsigned baseVarNum) } while (i < slots && gcPtrs[i] == TYPE_GC_NONE); // If we have a very small contiguous non-gc region, it's better just to - // emit a sequence of movsq instructions + // emit a sequence of movs{d,q} instructions if (nonGcSlotCount < CPOBJ_NONGC_SLOTS_LIMIT) { copiedSlots += nonGcSlotCount; while (nonGcSlotCount > 0) { - instGen(INS_movsq); + instGen(INS_movs_ptr); nonGcSlotCount--; } } @@ -8842,7 +8842,7 @@ void CodeGen::genPutStructArgStk(GenTreePtr treeNode, unsigned baseVarNum) { getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, REG_RCX, nonGcSlotCount); copiedSlots += nonGcSlotCount; - instGen(INS_r_movsq); + instGen(INS_r_movs_ptr); } } break; @@ -8851,10 +8851,10 @@ void CodeGen::genPutStructArgStk(GenTreePtr treeNode, unsigned baseVarNum) case TYPE_GC_BYREF: // Is an interior pointer - promote it but don't scan it { // We have a GC (byref or ref) pointer - // TODO-Amd64-Unix: Here a better solution (for code size and CQ) would be to use movsq instruction, + // TODO-Amd64-Unix: Here a better solution (for code size and CQ) would be to use movs{d,q} instruction, // but the logic for emitting a GC info record is not available (it is internal for the emitter // only.) See emitGCVarLiveUpd function. If we could call it separately, we could do - // instGen(INS_movsq); and emission of gc info. + // instGen(INS_movs{d,q}); and emission of gc info. var_types memType; if (gcPtrs[i] == TYPE_GC_REF) diff --git a/src/jit/instr.h b/src/jit/instr.h index c38f8d2073..04256842c7 100644 --- a/src/jit/instr.h +++ b/src/jit/instr.h @@ -56,6 +56,18 @@ DECLARE_TYPED_ENUM(instruction,unsigned) } END_DECLARE_TYPED_ENUM(instruction,unsigned) +#if defined(_TARGET_XARCH_) +#if defined(_TARGET_X86_) +#define INS_r_movs_ptr INS_r_movsd +#define INS_movs_ptr INS_movsd +#elif defined(_TARGET_AMD64_) +#define INS_r_movs_ptr INS_r_movsq +#define INS_movs_ptr INS_movsq +#else +#error Unsupported xarch target +#endif +#endif + /*****************************************************************************/ enum insUpdateModes diff --git a/src/jit/instrsxarch.h b/src/jit/instrsxarch.h index 436563babf..4831ef95cf 100644 --- a/src/jit/instrsxarch.h +++ b/src/jit/instrsxarch.h @@ -367,25 +367,25 @@ INST2(sar_N , "sar" , 0, IUM_RW, 0, 1, 0x0038C0, 0x0038C0) INST1(r_movsb, "rep movsb" , 0, IUM_RD, 0, 0, 0x00A4F3) INST1(r_movsd, "rep movsd" , 0, IUM_RD, 0, 0, 0x00A5F3) -#ifndef LEGACY_BACKEND +#if !defined(LEGACY_BACKEND) && defined(_TARGET_AMD64_) INST1(r_movsq, "rep movsq" , 0, IUM_RD, 0, 0, 0xF3A548) -#endif // !LEGACY_BACKEND +#endif // !LEGACY_BACKEND || !defined(_TARGET_AMD64_) INST1(movsb , "movsb" , 0, IUM_RD, 0, 0, 0x0000A4) INST1(movsd , "movsd" , 0, IUM_RD, 0, 0, 0x0000A5) -#ifndef LEGACY_BACKEND +#if !defined(LEGACY_BACKEND) && defined(_TARGET_AMD64_) INST1(movsq, "movsq" , 0, IUM_RD, 0, 0, 0x00A548) -#endif // !LEGACY_BACKEND +#endif // !LEGACY_BACKEND || !defined(_TARGET_AMD64_) INST1(r_stosb, "rep stosb" , 0, IUM_RD, 0, 0, 0x00AAF3) INST1(r_stosd, "rep stosd" , 0, IUM_RD, 0, 0, 0x00ABF3) -#ifndef LEGACY_BACKEND +#if !defined(LEGACY_BACKEND) && defined(_TARGET_AMD64_) INST1(r_stosq, "rep stosq" , 0, IUM_RD, 0, 0, 0xF3AB48) -#endif // !LEGACY_BACKEND +#endif // !LEGACY_BACKEND || !defined(_TARGET_AMD64_) INST1(stosb, "stosb" , 0, IUM_RD, 0, 0, 0x0000AA) INST1(stosd, "stosd" , 0, IUM_RD, 0, 0, 0x0000AB) -#ifndef LEGACY_BACKEND +#if !defined(LEGACY_BACKEND) && defined(_TARGET_AMD64_) INST1(stosq, "stosq" , 0, IUM_RD, 0, 0, 0x00AB48) -#endif // !LEGACY_BACKEND +#endif // !LEGACY_BACKEND || !defined(_TARGET_AMD64_) INST1(int3 , "int3" , 0, IUM_RD, 0, 0, 0x0000CC) INST1(nop , "nop" , 0, IUM_RD, 0, 0, 0x000090) |