diff options
Diffstat (limited to 'src/jit/codegenarm64.cpp')
-rw-r--r-- | src/jit/codegenarm64.cpp | 1773 |
1 files changed, 756 insertions, 1017 deletions
diff --git a/src/jit/codegenarm64.cpp b/src/jit/codegenarm64.cpp index 7de19f9043..0aa14210bb 100644 --- a/src/jit/codegenarm64.cpp +++ b/src/jit/codegenarm64.cpp @@ -1366,18 +1366,59 @@ void CodeGen::instGen_Set_Reg_To_Imm(emitAttr size, regNumber reg, ssize_t imm, } else { - getEmitter()->emitIns_R_I(INS_mov, size, reg, (imm & 0xffff)); - getEmitter()->emitIns_R_I_I(INS_movk, size, reg, ((imm >> 16) & 0xffff), 16, INS_OPTS_LSL); + // Arm64 allows any arbitrary 16-bit constant to be loaded into a register halfword + // There are three forms + // movk which loads into any halfword preserving the remaining halfwords + // movz which loads into any halfword zeroing the remaining halfwords + // movn which loads into any halfword zeroing the remaining halfwords then bitwise inverting the register + // In some cases it is preferable to use movn, because it has the side effect of filling the other halfwords + // with ones + + // Determine whether movn or movz will require the fewest instructions to populate the immediate + int preferMovn = 0; + + for (int i = (size == EA_8BYTE) ? 48 : 16; i >= 0; i -= 16) + { + if (uint16_t(imm >> i) == 0xffff) + ++preferMovn; // a single movk 0xffff could be skipped if movn was used + else if (uint16_t(imm >> i) == 0x0000) + --preferMovn; // a single movk 0 could be skipped if movz was used + } + + // Select the first instruction. Any additional instruction will use movk + instruction ins = (preferMovn > 0) ? INS_movn : INS_movz; - if ((size == EA_8BYTE) && - ((imm >> 32) != 0)) // Sometimes the upper 32 bits are zero and the first mov has zero-ed them + // Initial movz or movn will fill the remaining bytes with the skipVal + // This can allow skipping filling a halfword + uint16_t skipVal = (preferMovn > 0) ? 0xffff : 0; + + unsigned bits = (size == EA_8BYTE) ? 64 : 32; + + // Iterate over imm examining 16 bits at a time + for (unsigned i = 0; i < bits; i += 16) { - getEmitter()->emitIns_R_I_I(INS_movk, EA_8BYTE, reg, ((imm >> 32) & 0xffff), 32, INS_OPTS_LSL); - if ((imm >> 48) != 0) // Frequently the upper 16 bits are zero and the first mov has zero-ed them + uint16_t imm16 = uint16_t(imm >> i); + + if (imm16 != skipVal) { - getEmitter()->emitIns_R_I_I(INS_movk, EA_8BYTE, reg, ((imm >> 48) & 0xffff), 48, INS_OPTS_LSL); + if (ins == INS_movn) + { + // For the movn case, we need to bitwise invert the immediate. This is because + // (movn x0, ~imm16) === (movz x0, imm16; or x0, x0, #0xffff`ffff`ffff`0000) + imm16 = ~imm16; + } + + getEmitter()->emitIns_R_I_I(ins, size, reg, imm16, i, INS_OPTS_LSL); + + // Once the initial movz/movn is emitted the remaining instructions will all use movk + ins = INS_movk; } } + + // We must emit a movn or movz or we have not done anything + // The cases which hit this assert should be (emitIns_valid_imm_for_mov() == true) and + // should not be in this else condition + assert(ins == INS_movk); } // The caller may have requested that the flags be set on this mov (rarely/never) if (flags == INS_FLAGS_SET) @@ -1503,18 +1544,13 @@ void CodeGen::genCodeForMulHi(GenTreeOp* treeNode) { inst_RV_RV(INS_mov, targetReg, REG_RDX, targetType); } + + genProduceReg(treeNode); #else // !0 NYI("genCodeForMulHi"); #endif // !0 } -// generate code for a DIV or MOD operation -// -void CodeGen::genCodeForDivMod(GenTreeOp* treeNode) -{ - // unused on ARM64 -} - // Generate code for ADD, SUB, MUL, DIV, UDIV, AND, OR and XOR // This method is expected to have called genConsumeOperands() before calling it. void CodeGen::genCodeForBinary(GenTree* treeNode) @@ -1541,6 +1577,177 @@ void CodeGen::genCodeForBinary(GenTree* treeNode) } //------------------------------------------------------------------------ +// genCodeForLclVar: Produce code for a GT_LCL_VAR node. +// +// Arguments: +// tree - the GT_LCL_VAR node +// +void CodeGen::genCodeForLclVar(GenTreeLclVar* tree) +{ + var_types targetType = tree->TypeGet(); + emitter* emit = getEmitter(); + + unsigned varNum = tree->gtLclNum; + assert(varNum < compiler->lvaCount); + LclVarDsc* varDsc = &(compiler->lvaTable[varNum]); + bool isRegCandidate = varDsc->lvIsRegCandidate(); + + // lcl_vars are not defs + assert((tree->gtFlags & GTF_VAR_DEF) == 0); + + if (isRegCandidate && !(tree->gtFlags & GTF_VAR_DEATH)) + { + assert((tree->InReg()) || (tree->gtFlags & GTF_SPILLED)); + } + + // If this is a register candidate that has been spilled, genConsumeReg() will + // reload it at the point of use. Otherwise, if it's not in a register, we load it here. + + if (!tree->InReg() && !(tree->gtFlags & GTF_SPILLED)) + { + assert(!isRegCandidate); + + // targetType must be a normal scalar type and not a TYP_STRUCT + assert(targetType != TYP_STRUCT); + + instruction ins = ins_Load(targetType); + emitAttr attr = emitTypeSize(targetType); + + attr = emit->emitInsAdjustLoadStoreAttr(ins, attr); + + emit->emitIns_R_S(ins, attr, tree->gtRegNum, varNum, 0); + genProduceReg(tree); + } +} + +//------------------------------------------------------------------------ +// genCodeForStoreLclFld: Produce code for a GT_STORE_LCL_FLD node. +// +// Arguments: +// tree - the GT_STORE_LCL_FLD node +// +void CodeGen::genCodeForStoreLclFld(GenTreeLclFld* tree) +{ + var_types targetType = tree->TypeGet(); + regNumber targetReg = tree->gtRegNum; + emitter* emit = getEmitter(); + noway_assert(targetType != TYP_STRUCT); + + // record the offset + unsigned offset = tree->gtLclOffs; + + // We must have a stack store with GT_STORE_LCL_FLD + noway_assert(!tree->InReg()); + noway_assert(targetReg == REG_NA); + + unsigned varNum = tree->gtLclNum; + assert(varNum < compiler->lvaCount); + LclVarDsc* varDsc = &(compiler->lvaTable[varNum]); + + // Ensure that lclVar nodes are typed correctly. + assert(!varDsc->lvNormalizeOnStore() || targetType == genActualType(varDsc->TypeGet())); + + GenTreePtr data = tree->gtOp1->gtEffectiveVal(); + genConsumeRegs(data); + + regNumber dataReg = REG_NA; + if (data->isContainedIntOrIImmed()) + { + assert(data->IsIntegralConst(0)); + dataReg = REG_ZR; + } + else + { + assert(!data->isContained()); + dataReg = data->gtRegNum; + } + assert(dataReg != REG_NA); + + instruction ins = ins_Store(targetType); + + emitAttr attr = emitTypeSize(targetType); + + attr = emit->emitInsAdjustLoadStoreAttr(ins, attr); + + emit->emitIns_S_R(ins, attr, dataReg, varNum, offset); + + genUpdateLife(tree); + + varDsc->lvRegNum = REG_STK; +} + +//------------------------------------------------------------------------ +// genCodeForStoreLclVar: Produce code for a GT_STORE_LCL_VAR node. +// +// Arguments: +// tree - the GT_STORE_LCL_VAR node +// +void CodeGen::genCodeForStoreLclVar(GenTreeLclVar* tree) +{ + var_types targetType = tree->TypeGet(); + regNumber targetReg = tree->gtRegNum; + emitter* emit = getEmitter(); + + unsigned varNum = tree->gtLclNum; + assert(varNum < compiler->lvaCount); + LclVarDsc* varDsc = &(compiler->lvaTable[varNum]); + + // Ensure that lclVar nodes are typed correctly. + assert(!varDsc->lvNormalizeOnStore() || targetType == genActualType(varDsc->TypeGet())); + + GenTreePtr data = tree->gtOp1->gtEffectiveVal(); + + // var = call, where call returns a multi-reg return value + // case is handled separately. + if (data->gtSkipReloadOrCopy()->IsMultiRegCall()) + { + genMultiRegCallStoreToLocal(tree); + } + else + { + genConsumeRegs(data); + + regNumber dataReg = REG_NA; + if (data->isContainedIntOrIImmed()) + { + assert(data->IsIntegralConst(0)); + dataReg = REG_ZR; + } + else + { + assert(!data->isContained()); + dataReg = data->gtRegNum; + } + assert(dataReg != REG_NA); + + if (targetReg == REG_NA) // store into stack based LclVar + { + inst_set_SV_var(tree); + + instruction ins = ins_Store(targetType); + emitAttr attr = emitTypeSize(targetType); + + attr = emit->emitInsAdjustLoadStoreAttr(ins, attr); + + emit->emitIns_S_R(ins, attr, dataReg, varNum, /* offset */ 0); + + genUpdateLife(tree); + + varDsc->lvRegNum = REG_STK; + } + else // store into register (i.e move into register) + { + if (dataReg != targetReg) + { + // Assign into targetReg when dataReg (from op1) is not the same register + inst_RV_RV(ins_Copy(targetType), targetReg, dataReg, targetType); + } + genProduceReg(tree); + } + } +} + +//------------------------------------------------------------------------ // isStructReturn: Returns whether the 'treeNode' is returning a struct. // // Arguments: @@ -1771,6 +1978,11 @@ void CodeGen::genReturn(GenTreePtr treeNode) GenTreePtr op1 = treeNode->gtGetOp1(); var_types targetType = treeNode->TypeGet(); + // A void GT_RETFILT is the end of a finally. For non-void filter returns we need to load the result in the return + // register, if it's not already there. The processing is the same as GT_RETURN. For filters, the IL spec says the + // result is type int32. Further, the only legal values are 0 or 1; the use of other values is "undefined". + assert(!treeNode->OperIs(GT_RETFILT) || (targetType == TYP_VOID) || (targetType == TYP_INT)); + #ifdef DEBUG if (targetType == TYP_VOID) { @@ -1840,985 +2052,6 @@ void CodeGen::genReturn(GenTreePtr treeNode) #endif } -/***************************************************************************** - * - * Generate code for a single node in the tree. - * Preconditions: All operands have been evaluated - * - */ -void CodeGen::genCodeForTreeNode(GenTreePtr treeNode) -{ - regNumber targetReg = treeNode->gtRegNum; - var_types targetType = treeNode->TypeGet(); - emitter* emit = getEmitter(); - -#ifdef DEBUG - // Validate that all the operands for the current node are consumed in order. - // This is important because LSRA ensures that any necessary copies will be - // handled correctly. - lastConsumedNode = nullptr; - if (compiler->verbose) - { - unsigned seqNum = treeNode->gtSeqNum; // Useful for setting a conditional break in Visual Studio - compiler->gtDispLIRNode(treeNode, "Generating: "); - } -#endif // DEBUG - - // Is this a node whose value is already in a register? LSRA denotes this by - // setting the GTF_REUSE_REG_VAL flag. - if (treeNode->IsReuseRegVal()) - { - // For now, this is only used for constant nodes. - assert((treeNode->OperGet() == GT_CNS_INT) || (treeNode->OperGet() == GT_CNS_DBL)); - JITDUMP(" TreeNode is marked ReuseReg\n"); - return; - } - - // contained nodes are part of their parents for codegen purposes - // ex : immediates, most LEAs - if (treeNode->isContained()) - { - return; - } - - switch (treeNode->gtOper) - { - case GT_START_NONGC: - getEmitter()->emitDisableGC(); - break; - - case GT_PROF_HOOK: - // We should be seeing this only if profiler hook is needed - noway_assert(compiler->compIsProfilerHookNeeded()); - -#ifdef PROFILING_SUPPORTED - // Right now this node is used only for tail calls. In future if - // we intend to use it for Enter or Leave hooks, add a data member - // to this node indicating the kind of profiler hook. For example, - // helper number can be used. - genProfilingLeaveCallback(CORINFO_HELP_PROF_FCN_TAILCALL); -#endif // PROFILING_SUPPORTED - break; - - case GT_LCLHEAP: - genLclHeap(treeNode); - break; - - case GT_CNS_INT: - case GT_CNS_DBL: - genSetRegToConst(targetReg, targetType, treeNode); - genProduceReg(treeNode); - break; - - case GT_NOT: - assert(!varTypeIsFloating(targetType)); - - __fallthrough; - - case GT_NEG: - { - instruction ins = genGetInsForOper(treeNode->OperGet(), targetType); - - // The arithmetic node must be sitting in a register (since it's not contained) - assert(!treeNode->isContained()); - // The dst can only be a register. - assert(targetReg != REG_NA); - - GenTreePtr operand = treeNode->gtGetOp1(); - assert(!operand->isContained()); - // The src must be a register. - regNumber operandReg = genConsumeReg(operand); - - getEmitter()->emitIns_R_R(ins, emitTypeSize(treeNode), targetReg, operandReg); - } - genProduceReg(treeNode); - break; - - case GT_DIV: - case GT_UDIV: - genConsumeOperands(treeNode->AsOp()); - - if (varTypeIsFloating(targetType)) - { - // Floating point divide never raises an exception - genCodeForBinary(treeNode); - } - else // an integer divide operation - { - GenTreePtr divisorOp = treeNode->gtGetOp2(); - emitAttr size = EA_ATTR(genTypeSize(genActualType(treeNode->TypeGet()))); - - if (divisorOp->IsIntegralConst(0)) - { - // We unconditionally throw a divide by zero exception - genJumpToThrowHlpBlk(EJ_jmp, SCK_DIV_BY_ZERO); - - // We still need to call genProduceReg - genProduceReg(treeNode); - } - else // the divisor is not the constant zero - { - regNumber divisorReg = divisorOp->gtRegNum; - - // Generate the require runtime checks for GT_DIV or GT_UDIV - if (treeNode->gtOper == GT_DIV) - { - BasicBlock* sdivLabel = genCreateTempLabel(); - - // Two possible exceptions: - // (AnyVal / 0) => DivideByZeroException - // (MinInt / -1) => ArithmeticException - // - bool checkDividend = true; - - // Do we have an immediate for the 'divisorOp'? - // - if (divisorOp->IsCnsIntOrI()) - { - GenTreeIntConCommon* intConstTree = divisorOp->AsIntConCommon(); - ssize_t intConstValue = intConstTree->IconValue(); - assert(intConstValue != 0); // already checked above by IsIntegralConst(0)) - if (intConstValue != -1) - { - checkDividend = false; // We statically know that the dividend is not -1 - } - } - else // insert check for divison by zero - { - // Check if the divisor is zero throw a DivideByZeroException - emit->emitIns_R_I(INS_cmp, size, divisorReg, 0); - emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED); - genJumpToThrowHlpBlk(jmpEqual, SCK_DIV_BY_ZERO); - } - - if (checkDividend) - { - // Check if the divisor is not -1 branch to 'sdivLabel' - emit->emitIns_R_I(INS_cmp, size, divisorReg, -1); - - emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED); - inst_JMP(jmpNotEqual, sdivLabel); - // If control flow continues past here the 'divisorReg' is known to be -1 - - regNumber dividendReg = treeNode->gtGetOp1()->gtRegNum; - // At this point the divisor is known to be -1 - // - // Issue the 'adds zr, dividendReg, dividendReg' instruction - // this will set both the Z and V flags only when dividendReg is MinInt - // - emit->emitIns_R_R_R(INS_adds, size, REG_ZR, dividendReg, dividendReg); - inst_JMP(jmpNotEqual, sdivLabel); // goto sdiv if the Z flag is clear - genJumpToThrowHlpBlk(EJ_vs, SCK_ARITH_EXCPN); // if the V flags is set throw - // ArithmeticException - - genDefineTempLabel(sdivLabel); - } - genCodeForBinary(treeNode); // Generate the sdiv instruction - } - else // (treeNode->gtOper == GT_UDIV) - { - // Only one possible exception - // (AnyVal / 0) => DivideByZeroException - // - // Note that division by the constant 0 was already checked for above by the - // op2->IsIntegralConst(0) check - // - if (!divisorOp->IsCnsIntOrI()) - { - // divisorOp is not a constant, so it could be zero - // - emit->emitIns_R_I(INS_cmp, size, divisorReg, 0); - emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED); - genJumpToThrowHlpBlk(jmpEqual, SCK_DIV_BY_ZERO); - } - genCodeForBinary(treeNode); - } - } - } - break; - - case GT_OR: - case GT_XOR: - case GT_AND: - assert(varTypeIsIntegralOrI(treeNode)); - __fallthrough; - case GT_ADD: - case GT_SUB: - case GT_MUL: - genConsumeOperands(treeNode->AsOp()); - genCodeForBinary(treeNode); - break; - - case GT_LSH: - case GT_RSH: - case GT_RSZ: - case GT_ROR: - genCodeForShift(treeNode); - // genCodeForShift() calls genProduceReg() - break; - - case GT_CAST: - if (varTypeIsFloating(targetType) && varTypeIsFloating(treeNode->gtOp.gtOp1)) - { - // Casts float/double <--> double/float - genFloatToFloatCast(treeNode); - } - else if (varTypeIsFloating(treeNode->gtOp.gtOp1)) - { - // Casts float/double --> int32/int64 - genFloatToIntCast(treeNode); - } - else if (varTypeIsFloating(targetType)) - { - // Casts int32/uint32/int64/uint64 --> float/double - genIntToFloatCast(treeNode); - } - else - { - // Casts int <--> int - genIntToIntCast(treeNode); - } - // The per-case functions call genProduceReg() - break; - - case GT_LCL_FLD_ADDR: - case GT_LCL_VAR_ADDR: - // Address of a local var. This by itself should never be allocated a register. - // If it is worth storing the address in a register then it should be cse'ed into - // a temp and that would be allocated a register. - noway_assert(targetType == TYP_BYREF); - noway_assert(!treeNode->InReg()); - - inst_RV_TT(INS_lea, targetReg, treeNode, 0, EA_BYREF); - genProduceReg(treeNode); - break; - - case GT_LCL_FLD: - { - GenTreeLclVarCommon* varNode = treeNode->AsLclVarCommon(); - assert(varNode->gtLclNum < compiler->lvaCount); - unsigned varNum = varNode->gtLclNum; - LclVarDsc* varDsc = &(compiler->lvaTable[varNum]); - - if (targetType == TYP_STRUCT) - { - NYI("GT_LCL_FLD with TYP_STRUCT"); - } - emitAttr size = emitTypeSize(targetType); - - noway_assert(targetType != TYP_STRUCT); - noway_assert(targetReg != REG_NA); - - unsigned offset = treeNode->gtLclFld.gtLclOffs; - - if (varTypeIsFloating(targetType)) - { - if (treeNode->InReg()) - { - NYI("GT_LCL_FLD with register to register Floating point move"); - } - else - { - emit->emitIns_R_S(ins_Load(targetType), size, targetReg, varNum, offset); - } - } - else - { - size = EA_SET_SIZE(size, EA_8BYTE); - emit->emitIns_R_S(ins_Move_Extend(targetType, treeNode->InReg()), size, targetReg, varNum, offset); - } - genProduceReg(treeNode); - } - break; - - case GT_LCL_VAR: - { - GenTreeLclVarCommon* varNode = treeNode->AsLclVarCommon(); - - unsigned varNum = varNode->gtLclNum; - assert(varNum < compiler->lvaCount); - LclVarDsc* varDsc = &(compiler->lvaTable[varNum]); - bool isRegCandidate = varDsc->lvIsRegCandidate(); - - // lcl_vars are not defs - assert((treeNode->gtFlags & GTF_VAR_DEF) == 0); - - if (isRegCandidate && !(treeNode->gtFlags & GTF_VAR_DEATH)) - { - assert((treeNode->InReg()) || (treeNode->gtFlags & GTF_SPILLED)); - } - - // If this is a register candidate that has been spilled, genConsumeReg() will - // reload it at the point of use. Otherwise, if it's not in a register, we load it here. - - if (!treeNode->InReg() && !(treeNode->gtFlags & GTF_SPILLED)) - { - assert(!isRegCandidate); - - // targetType must be a normal scalar type and not a TYP_STRUCT - assert(targetType != TYP_STRUCT); - - instruction ins = ins_Load(targetType); - emitAttr attr = emitTypeSize(targetType); - - attr = emit->emitInsAdjustLoadStoreAttr(ins, attr); - - emit->emitIns_R_S(ins, attr, targetReg, varNum, 0); - genProduceReg(treeNode); - } - } - break; - - case GT_STORE_LCL_FLD: - { - noway_assert(targetType != TYP_STRUCT); - - // record the offset - unsigned offset = treeNode->gtLclFld.gtLclOffs; - - // We must have a stack store with GT_STORE_LCL_FLD - noway_assert(!treeNode->InReg()); - noway_assert(targetReg == REG_NA); - - GenTreeLclVarCommon* varNode = treeNode->AsLclVarCommon(); - unsigned varNum = varNode->gtLclNum; - assert(varNum < compiler->lvaCount); - LclVarDsc* varDsc = &(compiler->lvaTable[varNum]); - - // Ensure that lclVar nodes are typed correctly. - assert(!varDsc->lvNormalizeOnStore() || targetType == genActualType(varDsc->TypeGet())); - - GenTreePtr data = treeNode->gtOp.gtOp1->gtEffectiveVal(); - genConsumeRegs(data); - - regNumber dataReg = REG_NA; - if (data->isContainedIntOrIImmed()) - { - assert(data->IsIntegralConst(0)); - dataReg = REG_ZR; - } - else - { - assert(!data->isContained()); - dataReg = data->gtRegNum; - } - assert(dataReg != REG_NA); - - instruction ins = ins_Store(targetType); - - emitAttr attr = emitTypeSize(targetType); - - attr = emit->emitInsAdjustLoadStoreAttr(ins, attr); - - emit->emitIns_S_R(ins, attr, dataReg, varNum, offset); - - genUpdateLife(varNode); - - varDsc->lvRegNum = REG_STK; - } - break; - - case GT_STORE_LCL_VAR: - { - GenTreeLclVarCommon* varNode = treeNode->AsLclVarCommon(); - - unsigned varNum = varNode->gtLclNum; - assert(varNum < compiler->lvaCount); - LclVarDsc* varDsc = &(compiler->lvaTable[varNum]); - unsigned offset = 0; - - // Ensure that lclVar nodes are typed correctly. - assert(!varDsc->lvNormalizeOnStore() || targetType == genActualType(varDsc->TypeGet())); - - GenTreePtr data = treeNode->gtOp.gtOp1->gtEffectiveVal(); - - // var = call, where call returns a multi-reg return value - // case is handled separately. - if (data->gtSkipReloadOrCopy()->IsMultiRegCall()) - { - genMultiRegCallStoreToLocal(treeNode); - } - else - { - genConsumeRegs(data); - - regNumber dataReg = REG_NA; - if (data->isContainedIntOrIImmed()) - { - assert(data->IsIntegralConst(0)); - dataReg = REG_ZR; - } - else - { - assert(!data->isContained()); - dataReg = data->gtRegNum; - } - assert(dataReg != REG_NA); - - if (targetReg == REG_NA) // store into stack based LclVar - { - inst_set_SV_var(varNode); - - instruction ins = ins_Store(targetType); - emitAttr attr = emitTypeSize(targetType); - - attr = emit->emitInsAdjustLoadStoreAttr(ins, attr); - - emit->emitIns_S_R(ins, attr, dataReg, varNum, offset); - - genUpdateLife(varNode); - - varDsc->lvRegNum = REG_STK; - } - else // store into register (i.e move into register) - { - if (dataReg != targetReg) - { - // Assign into targetReg when dataReg (from op1) is not the same register - inst_RV_RV(ins_Copy(targetType), targetReg, dataReg, targetType); - } - genProduceReg(treeNode); - } - } - } - break; - - case GT_RETFILT: - // A void GT_RETFILT is the end of a finally. For non-void filter returns we need to load the result in - // the return register, if it's not already there. The processing is the same as GT_RETURN. - if (targetType != TYP_VOID) - { - // For filters, the IL spec says the result is type int32. Further, the only specified legal values - // are 0 or 1, with the use of other values "undefined". - assert(targetType == TYP_INT); - } - - __fallthrough; - - case GT_RETURN: - genReturn(treeNode); - break; - - case GT_LEA: - { - // if we are here, it is the case where there is an LEA that cannot - // be folded into a parent instruction - GenTreeAddrMode* lea = treeNode->AsAddrMode(); - genLeaInstruction(lea); - } - // genLeaInstruction calls genProduceReg() - break; - - case GT_IND: - genConsumeAddress(treeNode->AsIndir()->Addr()); - emit->emitInsLoadStoreOp(ins_Load(targetType), emitTypeSize(treeNode), targetReg, treeNode->AsIndir()); - genProduceReg(treeNode); - break; - - case GT_MULHI: - genCodeForMulHi(treeNode->AsOp()); - genProduceReg(treeNode); - break; - - case GT_MOD: - case GT_UMOD: - // Integer MOD should have been morphed into a sequence of sub, mul, div in fgMorph. - // - // We shouldn't be seeing GT_MOD on float/double as it is morphed into a helper call by front-end. - noway_assert(!"Codegen for GT_MOD/GT_UMOD"); - break; - - case GT_INTRINSIC: - genIntrinsic(treeNode); - break; - -#ifdef FEATURE_SIMD - case GT_SIMD: - genSIMDIntrinsic(treeNode->AsSIMD()); - break; -#endif // FEATURE_SIMD - - case GT_CKFINITE: - genCkfinite(treeNode); - break; - - case GT_EQ: - case GT_NE: - case GT_LT: - case GT_LE: - case GT_GE: - case GT_GT: - { - // TODO-ARM64-CQ: Check if we can use the currently set flags. - // TODO-ARM64-CQ: Check for the case where we can simply transfer the carry bit to a register - // (signed < or >= where targetReg != REG_NA) - - GenTreeOp* tree = treeNode->AsOp(); - GenTreePtr op1 = tree->gtOp1; - GenTreePtr op2 = tree->gtOp2; - var_types op1Type = op1->TypeGet(); - var_types op2Type = op2->TypeGet(); - - assert(!op1->isUsedFromMemory()); - assert(!op2->isUsedFromMemory()); - - genConsumeOperands(tree); - - emitAttr cmpSize = EA_UNKNOWN; - - if (varTypeIsFloating(op1Type)) - { - assert(varTypeIsFloating(op2Type)); - assert(!op1->isContained()); - assert(op1Type == op2Type); - cmpSize = EA_ATTR(genTypeSize(op1Type)); - - if (op2->IsIntegralConst(0)) - { - emit->emitIns_R_F(INS_fcmp, cmpSize, op1->gtRegNum, 0.0); - } - else - { - assert(!op2->isContained()); - emit->emitIns_R_R(INS_fcmp, cmpSize, op1->gtRegNum, op2->gtRegNum); - } - } - else - { - assert(!varTypeIsFloating(op2Type)); - // We don't support swapping op1 and op2 to generate cmp reg, imm - assert(!op1->isContainedIntOrIImmed()); - - // TODO-ARM64-CQ: the second register argument of a CMP can be sign/zero - // extended as part of the instruction (using "CMP (extended register)"). - // We should use that if possible, swapping operands - // (and reversing the condition) if necessary. - unsigned op1Size = genTypeSize(op1Type); - unsigned op2Size = genTypeSize(op2Type); - - if ((op1Size < 4) || (op1Size < op2Size)) - { - // We need to sign/zero extend op1 up to 32 or 64 bits. - instruction ins = ins_Move_Extend(op1Type, true); - inst_RV_RV(ins, op1->gtRegNum, op1->gtRegNum); - } - - if (!op2->isContainedIntOrIImmed()) - { - if ((op2Size < 4) || (op2Size < op1Size)) - { - // We need to sign/zero extend op2 up to 32 or 64 bits. - instruction ins = ins_Move_Extend(op2Type, true); - inst_RV_RV(ins, op2->gtRegNum, op2->gtRegNum); - } - } - cmpSize = EA_4BYTE; - if ((op1Size == EA_8BYTE) || (op2Size == EA_8BYTE)) - { - cmpSize = EA_8BYTE; - } - - if (op2->isContainedIntOrIImmed()) - { - GenTreeIntConCommon* intConst = op2->AsIntConCommon(); - emit->emitIns_R_I(INS_cmp, cmpSize, op1->gtRegNum, intConst->IconValue()); - } - else - { - emit->emitIns_R_R(INS_cmp, cmpSize, op1->gtRegNum, op2->gtRegNum); - } - } - - // Are we evaluating this into a register? - if (targetReg != REG_NA) - { - genSetRegToCond(targetReg, tree); - genProduceReg(tree); - } - } - break; - - case GT_JTRUE: - genCodeForJumpTrue(treeNode); - break; - - case GT_RETURNTRAP: - { - // this is nothing but a conditional call to CORINFO_HELP_STOP_FOR_GC - // based on the contents of 'data' - - GenTree* data = treeNode->gtOp.gtOp1; - genConsumeRegs(data); - emit->emitIns_R_I(INS_cmp, EA_4BYTE, data->gtRegNum, 0); - - BasicBlock* skipLabel = genCreateTempLabel(); - - emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED); - inst_JMP(jmpEqual, skipLabel); - // emit the call to the EE-helper that stops for GC (or other reasons) - - genEmitHelperCall(CORINFO_HELP_STOP_FOR_GC, 0, EA_UNKNOWN); - genDefineTempLabel(skipLabel); - } - break; - - case GT_STOREIND: - { - GenTree* data = treeNode->gtOp.gtOp2; - GenTree* addr = treeNode->gtOp.gtOp1; - GCInfo::WriteBarrierForm writeBarrierForm = gcInfo.gcIsWriteBarrierCandidate(treeNode, data); - if (writeBarrierForm != GCInfo::WBF_NoBarrier) - { - // data and addr must be in registers. - // Consume both registers so that any copies of interfering - // registers are taken care of. - genConsumeOperands(treeNode->AsOp()); - -#if NOGC_WRITE_BARRIERS - // At this point, we should not have any interference. - // That is, 'data' must not be in REG_WRITE_BARRIER_DST_BYREF, - // as that is where 'addr' must go. - noway_assert(data->gtRegNum != REG_WRITE_BARRIER_DST_BYREF); - - // 'addr' goes into x14 (REG_WRITE_BARRIER_DST_BYREF) - if (addr->gtRegNum != REG_WRITE_BARRIER_DST_BYREF) - { - inst_RV_RV(INS_mov, REG_WRITE_BARRIER_DST_BYREF, addr->gtRegNum, addr->TypeGet()); - } - - // 'data' goes into x15 (REG_WRITE_BARRIER) - if (data->gtRegNum != REG_WRITE_BARRIER) - { - inst_RV_RV(INS_mov, REG_WRITE_BARRIER, data->gtRegNum, data->TypeGet()); - } -#else - // At this point, we should not have any interference. - // That is, 'data' must not be in REG_ARG_0, - // as that is where 'addr' must go. - noway_assert(data->gtRegNum != REG_ARG_0); - - // addr goes in REG_ARG_0 - if (addr->gtRegNum != REG_ARG_0) - { - inst_RV_RV(INS_mov, REG_ARG_0, addr->gtRegNum, addr->TypeGet()); - } - - // data goes in REG_ARG_1 - if (data->gtRegNum != REG_ARG_1) - { - inst_RV_RV(INS_mov, REG_ARG_1, data->gtRegNum, data->TypeGet()); - } -#endif // NOGC_WRITE_BARRIERS - - genGCWriteBarrier(treeNode, writeBarrierForm); - } - else // A normal store, not a WriteBarrier store - { - bool reverseOps = ((treeNode->gtFlags & GTF_REVERSE_OPS) != 0); - bool dataIsUnary = false; - GenTree* nonRMWsrc = nullptr; - // We must consume the operands in the proper execution order, - // so that liveness is updated appropriately. - if (!reverseOps) - { - genConsumeAddress(addr); - } - - if (!data->isContained()) - { - genConsumeRegs(data); - } - - if (reverseOps) - { - genConsumeAddress(addr); - } - - regNumber dataReg = REG_NA; - if (data->isContainedIntOrIImmed()) - { - assert(data->IsIntegralConst(0)); - dataReg = REG_ZR; - } - else // data is not contained, so evaluate it into a register - { - assert(!data->isContained()); - dataReg = data->gtRegNum; - } - - emit->emitInsLoadStoreOp(ins_Store(targetType), emitTypeSize(treeNode), dataReg, treeNode->AsIndir()); - } - } - break; - - case GT_COPY: - // This is handled at the time we call genConsumeReg() on the GT_COPY - break; - - case GT_SWAP: - { - // Swap is only supported for lclVar operands that are enregistered - // We do not consume or produce any registers. Both operands remain enregistered. - // However, the gc-ness may change. - assert(genIsRegCandidateLocal(treeNode->gtOp.gtOp1) && genIsRegCandidateLocal(treeNode->gtOp.gtOp2)); - - GenTreeLclVarCommon* lcl1 = treeNode->gtOp.gtOp1->AsLclVarCommon(); - LclVarDsc* varDsc1 = &(compiler->lvaTable[lcl1->gtLclNum]); - var_types type1 = varDsc1->TypeGet(); - GenTreeLclVarCommon* lcl2 = treeNode->gtOp.gtOp2->AsLclVarCommon(); - LclVarDsc* varDsc2 = &(compiler->lvaTable[lcl2->gtLclNum]); - var_types type2 = varDsc2->TypeGet(); - - // We must have both int or both fp regs - assert(!varTypeIsFloating(type1) || varTypeIsFloating(type2)); - - // FP swap is not yet implemented (and should have NYI'd in LSRA) - assert(!varTypeIsFloating(type1)); - - regNumber oldOp1Reg = lcl1->gtRegNum; - regMaskTP oldOp1RegMask = genRegMask(oldOp1Reg); - regNumber oldOp2Reg = lcl2->gtRegNum; - regMaskTP oldOp2RegMask = genRegMask(oldOp2Reg); - - // We don't call genUpdateVarReg because we don't have a tree node with the new register. - varDsc1->lvRegNum = oldOp2Reg; - varDsc2->lvRegNum = oldOp1Reg; - - // Do the xchg - emitAttr size = EA_PTRSIZE; - if (varTypeGCtype(type1) != varTypeGCtype(type2)) - { - // If the type specified to the emitter is a GC type, it will swap the GC-ness of the registers. - // Otherwise it will leave them alone, which is correct if they have the same GC-ness. - size = EA_GCREF; - } - - NYI("register swap"); - // inst_RV_RV(INS_xchg, oldOp1Reg, oldOp2Reg, TYP_I_IMPL, size); - - // Update the gcInfo. - // Manually remove these regs for the gc sets (mostly to avoid confusing duplicative dump output) - gcInfo.gcRegByrefSetCur &= ~(oldOp1RegMask | oldOp2RegMask); - gcInfo.gcRegGCrefSetCur &= ~(oldOp1RegMask | oldOp2RegMask); - - // gcMarkRegPtrVal will do the appropriate thing for non-gc types. - // It will also dump the updates. - gcInfo.gcMarkRegPtrVal(oldOp2Reg, type1); - gcInfo.gcMarkRegPtrVal(oldOp1Reg, type2); - } - break; - - case GT_LIST: - case GT_FIELD_LIST: - case GT_ARGPLACE: - // Nothing to do - break; - - case GT_PUTARG_STK: - genPutArgStk(treeNode->AsPutArgStk()); - break; - - case GT_PUTARG_REG: - assert(targetType != TYP_STRUCT); // Any TYP_STRUCT register args should have been removed by - // fgMorphMultiregStructArg - // We have a normal non-Struct targetType - { - GenTree* op1 = treeNode->gtOp.gtOp1; - // If child node is not already in the register we need, move it - genConsumeReg(op1); - if (targetReg != op1->gtRegNum) - { - inst_RV_RV(ins_Copy(targetType), targetReg, op1->gtRegNum, targetType); - } - } - genProduceReg(treeNode); - break; - - case GT_CALL: - genCallInstruction(treeNode->AsCall()); - break; - - case GT_JMP: - genJmpMethod(treeNode); - break; - - case GT_LOCKADD: - case GT_XCHG: - case GT_XADD: - genLockedInstructions(treeNode->AsOp()); - break; - - case GT_MEMORYBARRIER: - instGen_MemoryBarrier(); - break; - - case GT_CMPXCHG: - NYI("GT_CMPXCHG"); - break; - - case GT_RELOAD: - // do nothing - reload is just a marker. - // The parent node will call genConsumeReg on this which will trigger the unspill of this node's child - // into the register specified in this node. - break; - - case GT_NOP: - break; - - case GT_NO_OP: - if (treeNode->gtFlags & GTF_NO_OP_NO) - { - noway_assert(!"GTF_NO_OP_NO should not be set"); - } - else - { - instGen(INS_nop); - } - break; - - case GT_ARR_BOUNDS_CHECK: -#ifdef FEATURE_SIMD - case GT_SIMD_CHK: -#endif // FEATURE_SIMD - genRangeCheck(treeNode); - break; - - case GT_PHYSREG: - if (targetReg != treeNode->AsPhysReg()->gtSrcReg) - { - inst_RV_RV(ins_Copy(targetType), targetReg, treeNode->AsPhysReg()->gtSrcReg, targetType); - - genTransferRegGCState(targetReg, treeNode->AsPhysReg()->gtSrcReg); - } - genProduceReg(treeNode); - break; - - case GT_PHYSREGDST: - break; - - case GT_NULLCHECK: - { - assert(!treeNode->gtOp.gtOp1->isContained()); - regNumber reg = genConsumeReg(treeNode->gtOp.gtOp1); - emit->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_ZR, reg, 0); - } - break; - - case GT_CATCH_ARG: - - noway_assert(handlerGetsXcptnObj(compiler->compCurBB->bbCatchTyp)); - - /* Catch arguments get passed in a register. genCodeForBBlist() - would have marked it as holding a GC object, but not used. */ - - noway_assert(gcInfo.gcRegGCrefSetCur & RBM_EXCEPTION_OBJECT); - genConsumeReg(treeNode); - break; - - case GT_PINVOKE_PROLOG: - noway_assert(((gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur) & ~fullIntArgRegMask()) == 0); - - // the runtime side requires the codegen here to be consistent - emit->emitDisableRandomNops(); - break; - - case GT_LABEL: - genPendingCallLabel = genCreateTempLabel(); - treeNode->gtLabel.gtLabBB = genPendingCallLabel; - - // For long address (default): `adrp + add` will be emitted. - // For short address (proven later): `adr` will be emitted. - emit->emitIns_R_L(INS_adr, EA_PTRSIZE, genPendingCallLabel, targetReg); - break; - - case GT_STORE_OBJ: - if (treeNode->OperIsCopyBlkOp()) - { - assert(treeNode->AsObj()->gtGcPtrCount != 0); - genCodeForCpObj(treeNode->AsObj()); - break; - } - __fallthrough; - - case GT_STORE_DYN_BLK: - case GT_STORE_BLK: - { - GenTreeBlk* blkOp = treeNode->AsBlk(); - if (blkOp->gtBlkOpGcUnsafe) - { - getEmitter()->emitDisableGC(); - } - bool isCopyBlk = blkOp->OperIsCopyBlkOp(); - - switch (blkOp->gtBlkOpKind) - { - case GenTreeBlk::BlkOpKindHelper: - if (isCopyBlk) - { - genCodeForCpBlk(blkOp); - } - else - { - genCodeForInitBlk(blkOp); - } - break; - case GenTreeBlk::BlkOpKindUnroll: - if (isCopyBlk) - { - genCodeForCpBlkUnroll(blkOp); - } - else - { - genCodeForInitBlkUnroll(blkOp); - } - break; - default: - unreached(); - } - if (blkOp->gtBlkOpGcUnsafe) - { - getEmitter()->emitEnableGC(); - } - } - break; - - case GT_JMPTABLE: - genJumpTable(treeNode); - break; - - case GT_SWITCH_TABLE: - genTableBasedSwitch(treeNode); - break; - - case GT_ARR_INDEX: - genCodeForArrIndex(treeNode->AsArrIndex()); - break; - - case GT_ARR_OFFSET: - genCodeForArrOffset(treeNode->AsArrOffs()); - break; - - case GT_CLS_VAR_ADDR: - NYI("GT_CLS_VAR_ADDR"); - break; - - case GT_IL_OFFSET: - // Do nothing; these nodes are simply markers for debug info. - break; - - default: - { -#ifdef DEBUG - char message[256]; - _snprintf_s(message, _countof(message), _TRUNCATE, "Unimplemented node type %s\n", - GenTree::NodeName(treeNode->OperGet())); -#endif - assert(!"Unknown node in codegen"); - } - break; - } -} - /*********************************************************************************************** * Generate code for localloc */ @@ -3158,6 +2391,154 @@ BAILOUT: genProduceReg(tree); } +//------------------------------------------------------------------------ +// genCodeForNegNot: Produce code for a GT_NEG/GT_NOT node. +// +// Arguments: +// tree - the node +// +void CodeGen::genCodeForNegNot(GenTree* tree) +{ + assert(tree->OperIs(GT_NEG, GT_NOT)); + + var_types targetType = tree->TypeGet(); + + assert(!tree->OperIs(GT_NOT) || !varTypeIsFloating(targetType)); + + regNumber targetReg = tree->gtRegNum; + instruction ins = genGetInsForOper(tree->OperGet(), targetType); + + // The arithmetic node must be sitting in a register (since it's not contained) + assert(!tree->isContained()); + // The dst can only be a register. + assert(targetReg != REG_NA); + + GenTreePtr operand = tree->gtGetOp1(); + assert(!operand->isContained()); + // The src must be a register. + regNumber operandReg = genConsumeReg(operand); + + getEmitter()->emitIns_R_R(ins, emitTypeSize(tree), targetReg, operandReg); + + genProduceReg(tree); +} + +//------------------------------------------------------------------------ +// genCodeForDivMod: Produce code for a GT_DIV/GT_UDIV node. We don't see MOD: +// (1) integer MOD is morphed into a sequence of sub, mul, div in fgMorph; +// (2) float/double MOD is morphed into a helper call by front-end. +// +// Arguments: +// tree - the node +// +void CodeGen::genCodeForDivMod(GenTreeOp* tree) +{ + assert(tree->OperIs(GT_DIV, GT_UDIV)); + + var_types targetType = tree->TypeGet(); + emitter* emit = getEmitter(); + + genConsumeOperands(tree); + + if (varTypeIsFloating(targetType)) + { + // Floating point divide never raises an exception + genCodeForBinary(tree); + } + else // an integer divide operation + { + GenTreePtr divisorOp = tree->gtGetOp2(); + emitAttr size = EA_ATTR(genTypeSize(genActualType(tree->TypeGet()))); + + if (divisorOp->IsIntegralConst(0)) + { + // We unconditionally throw a divide by zero exception + genJumpToThrowHlpBlk(EJ_jmp, SCK_DIV_BY_ZERO); + + // We still need to call genProduceReg + genProduceReg(tree); + } + else // the divisor is not the constant zero + { + regNumber divisorReg = divisorOp->gtRegNum; + + // Generate the require runtime checks for GT_DIV or GT_UDIV + if (tree->gtOper == GT_DIV) + { + BasicBlock* sdivLabel = genCreateTempLabel(); + + // Two possible exceptions: + // (AnyVal / 0) => DivideByZeroException + // (MinInt / -1) => ArithmeticException + // + bool checkDividend = true; + + // Do we have an immediate for the 'divisorOp'? + // + if (divisorOp->IsCnsIntOrI()) + { + GenTreeIntConCommon* intConstTree = divisorOp->AsIntConCommon(); + ssize_t intConstValue = intConstTree->IconValue(); + assert(intConstValue != 0); // already checked above by IsIntegralConst(0)) + if (intConstValue != -1) + { + checkDividend = false; // We statically know that the dividend is not -1 + } + } + else // insert check for divison by zero + { + // Check if the divisor is zero throw a DivideByZeroException + emit->emitIns_R_I(INS_cmp, size, divisorReg, 0); + emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED); + genJumpToThrowHlpBlk(jmpEqual, SCK_DIV_BY_ZERO); + } + + if (checkDividend) + { + // Check if the divisor is not -1 branch to 'sdivLabel' + emit->emitIns_R_I(INS_cmp, size, divisorReg, -1); + + emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED); + inst_JMP(jmpNotEqual, sdivLabel); + // If control flow continues past here the 'divisorReg' is known to be -1 + + regNumber dividendReg = tree->gtGetOp1()->gtRegNum; + // At this point the divisor is known to be -1 + // + // Issue the 'adds zr, dividendReg, dividendReg' instruction + // this will set both the Z and V flags only when dividendReg is MinInt + // + emit->emitIns_R_R_R(INS_adds, size, REG_ZR, dividendReg, dividendReg); + inst_JMP(jmpNotEqual, sdivLabel); // goto sdiv if the Z flag is clear + genJumpToThrowHlpBlk(EJ_vs, SCK_ARITH_EXCPN); // if the V flags is set throw + // ArithmeticException + + genDefineTempLabel(sdivLabel); + } + genCodeForBinary(tree); // Generate the sdiv instruction + } + else // (tree->gtOper == GT_UDIV) + { + // Only one possible exception + // (AnyVal / 0) => DivideByZeroException + // + // Note that division by the constant 0 was already checked for above by the + // op2->IsIntegralConst(0) check + // + if (!divisorOp->IsCnsIntOrI()) + { + // divisorOp is not a constant, so it could be zero + // + emit->emitIns_R_I(INS_cmp, size, divisorReg, 0); + emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED); + genJumpToThrowHlpBlk(jmpEqual, SCK_DIV_BY_ZERO); + } + genCodeForBinary(tree); + } + } + } +} + // Generate code for InitBlk by performing a loop unroll // Preconditions: // a) Both the size and fill byte value are integer constants. @@ -3182,6 +2563,12 @@ void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* initBlkNode) genConsumeOperands(initBlkNode); + if (initBlkNode->gtFlags & GTF_BLK_VOLATILE) + { + // issue a full memory barrier before volatile an initBlockUnroll operation + instGen_MemoryBarrier(); + } + regNumber valReg = initVal->IsIntegralConst(0) ? REG_ZR : initVal->gtRegNum; assert(!initVal->IsIntegralConst(0) || (valReg == REG_ZR)); @@ -3257,9 +2644,7 @@ void CodeGen::genCodeForLoadPairOffset(regNumber dst, regNumber dst2, GenTree* b if (base->gtOper == GT_LCL_FLD_ADDR) offset += base->gtLclFld.gtLclOffs; - // TODO-ARM64-CQ: Implement support for using a ldp instruction with a varNum (see emitIns_R_S) - emit->emitIns_R_S(INS_ldr, EA_8BYTE, dst, base->gtLclVarCommon.gtLclNum, offset); - emit->emitIns_R_S(INS_ldr, EA_8BYTE, dst2, base->gtLclVarCommon.gtLclNum, offset + REGSIZE_BYTES); + emit->emitIns_R_R_S_S(INS_ldp, EA_8BYTE, EA_8BYTE, dst, dst2, base->gtLclVarCommon.gtLclNum, offset); } else { @@ -3298,9 +2683,7 @@ void CodeGen::genCodeForStorePairOffset(regNumber src, regNumber src2, GenTree* if (base->gtOper == GT_LCL_FLD_ADDR) offset += base->gtLclFld.gtLclOffs; - // TODO-ARM64-CQ: Implement support for using a stp instruction with a varNum (see emitIns_S_R) - emit->emitIns_S_R(INS_str, EA_8BYTE, src, base->gtLclVarCommon.gtLclNum, offset); - emit->emitIns_S_R(INS_str, EA_8BYTE, src2, base->gtLclVarCommon.gtLclNum, offset + REGSIZE_BYTES); + emit->emitIns_S_S_R_R(INS_stp, EA_8BYTE, EA_8BYTE, src, src2, base->gtLclVarCommon.gtLclNum, offset); } else { @@ -3324,6 +2707,12 @@ void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* cpBlkNode) emitter* emit = getEmitter(); + if (cpBlkNode->gtFlags & GTF_BLK_VOLATILE) + { + // issue a full memory barrier before & after a volatile CpBlkUnroll operation + instGen_MemoryBarrier(); + } + if (source->gtOper == GT_IND) { srcAddr = source->gtGetOp1(); @@ -3402,6 +2791,12 @@ void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* cpBlkNode) genCodeForStoreOffset(INS_strb, EA_1BYTE, tmpReg, dstAddr, offset); } } + + if (cpBlkNode->gtFlags & GTF_BLK_VOLATILE) + { + // issue a full memory barrier before & after a volatile CpBlkUnroll operation + instGen_MemoryBarrier(); + } } // Generate code for CpObj nodes wich copy structs that have interleaved @@ -3461,30 +2856,60 @@ void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode) gcInfo.gcMarkRegPtrVal(REG_WRITE_BARRIER_SRC_BYREF, srcAddrType); gcInfo.gcMarkRegPtrVal(REG_WRITE_BARRIER_DST_BYREF, dstAddr->TypeGet()); - // Temp register used to perform the sequence of loads and stores. - regNumber tmpReg = cpObjNode->GetSingleTempReg(); + unsigned slots = cpObjNode->gtSlots; + + // Temp register(s) used to perform the sequence of loads and stores. + regNumber tmpReg = cpObjNode->ExtractTempReg(); + regNumber tmpReg2 = REG_NA; + assert(genIsValidIntReg(tmpReg)); + assert(tmpReg != REG_WRITE_BARRIER_SRC_BYREF); + assert(tmpReg != REG_WRITE_BARRIER_DST_BYREF); - unsigned slots = cpObjNode->gtSlots; - emitter* emit = getEmitter(); + if (slots > 1) + { + tmpReg2 = cpObjNode->GetSingleTempReg(); + assert(tmpReg2 != tmpReg); + assert(genIsValidIntReg(tmpReg2)); + assert(tmpReg2 != REG_WRITE_BARRIER_DST_BYREF); + assert(tmpReg2 != REG_WRITE_BARRIER_SRC_BYREF); + } + + if (cpObjNode->gtFlags & GTF_BLK_VOLATILE) + { + // issue a full memory barrier before & after a volatile CpObj operation + instGen_MemoryBarrier(); + } + + emitter* emit = getEmitter(); BYTE* gcPtrs = cpObjNode->gtGcPtrs; // If we can prove it's on the stack we don't need to use the write barrier. if (dstOnStack) { - // TODO-ARM64-CQ: Consider using LDP/STP to save codesize. - for (unsigned i = 0; i < slots; ++i) + unsigned i = 0; + // Check if two or more remaining slots and use a ldp/stp sequence + while (i < slots - 1) { - emitAttr attr = EA_8BYTE; - if (gcPtrs[i] == GCT_GCREF) - attr = EA_GCREF; - else if (gcPtrs[i] == GCT_BYREF) - attr = EA_BYREF; + emitAttr attr0 = emitTypeSize(compiler->getJitGCType(gcPtrs[i + 0])); + emitAttr attr1 = emitTypeSize(compiler->getJitGCType(gcPtrs[i + 1])); + + emit->emitIns_R_R_R_I(INS_ldp, attr0, tmpReg, tmpReg2, REG_WRITE_BARRIER_SRC_BYREF, 2 * TARGET_POINTER_SIZE, + INS_OPTS_POST_INDEX, attr1); + emit->emitIns_R_R_R_I(INS_stp, attr0, tmpReg, tmpReg2, REG_WRITE_BARRIER_DST_BYREF, 2 * TARGET_POINTER_SIZE, + INS_OPTS_POST_INDEX, attr1); + i += 2; + } + + // Use a ldr/str sequence for the last remainder + if (i < slots) + { + emitAttr attr0 = emitTypeSize(compiler->getJitGCType(gcPtrs[i + 0])); - emit->emitIns_R_R_I(INS_ldr, attr, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE, + emit->emitIns_R_R_I(INS_ldr, attr0, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE, INS_OPTS_POST_INDEX); - emit->emitIns_R_R_I(INS_str, attr, tmpReg, REG_WRITE_BARRIER_DST_BYREF, TARGET_POINTER_SIZE, + emit->emitIns_R_R_I(INS_str, attr0, tmpReg, REG_WRITE_BARRIER_DST_BYREF, TARGET_POINTER_SIZE, INS_OPTS_POST_INDEX); } } @@ -3498,11 +2923,22 @@ void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode) switch (gcPtrs[i]) { case TYPE_GC_NONE: - // TODO-ARM64-CQ: Consider using LDP/STP to save codesize in case of contigous NON-GC slots. - emit->emitIns_R_R_I(INS_ldr, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE, - INS_OPTS_POST_INDEX); - emit->emitIns_R_R_I(INS_str, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_DST_BYREF, TARGET_POINTER_SIZE, - INS_OPTS_POST_INDEX); + // Check if the next slot's type is also TYP_GC_NONE and use ldp/stp + if ((i + 1 < slots) && (gcPtrs[i + 1] == TYPE_GC_NONE)) + { + emit->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, tmpReg, tmpReg2, REG_WRITE_BARRIER_SRC_BYREF, + 2 * TARGET_POINTER_SIZE, INS_OPTS_POST_INDEX); + emit->emitIns_R_R_R_I(INS_stp, EA_8BYTE, tmpReg, tmpReg2, REG_WRITE_BARRIER_DST_BYREF, + 2 * TARGET_POINTER_SIZE, INS_OPTS_POST_INDEX); + ++i; // extra increment of i, since we are copying two items + } + else + { + emit->emitIns_R_R_I(INS_ldr, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE, + INS_OPTS_POST_INDEX); + emit->emitIns_R_R_I(INS_str, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_DST_BYREF, TARGET_POINTER_SIZE, + INS_OPTS_POST_INDEX); + } break; default: @@ -3517,6 +2953,12 @@ void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode) assert(gcPtrCount == 0); } + if (cpObjNode->gtFlags & GTF_BLK_VOLATILE) + { + // issue a full memory barrier before & after a volatile CpObj operation + instGen_MemoryBarrier(); + } + // Clear the gcInfo for REG_WRITE_BARRIER_SRC_BYREF and REG_WRITE_BARRIER_DST_BYREF. // While we normally update GC info prior to the last instruction that uses them, // these actually live into the helper call. @@ -4069,6 +3511,194 @@ void CodeGen::genLeaInstruction(GenTreeAddrMode* lea) genProduceReg(lea); } +//------------------------------------------------------------------------ +// genCodeForReturnTrap: Produce code for a GT_RETURNTRAP node. +// +// Arguments: +// tree - the GT_RETURNTRAP node +// +void CodeGen::genCodeForReturnTrap(GenTreeOp* tree) +{ + assert(tree->OperGet() == GT_RETURNTRAP); + + // this is nothing but a conditional call to CORINFO_HELP_STOP_FOR_GC + // based on the contents of 'data' + + GenTree* data = tree->gtOp1; + genConsumeRegs(data); + getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, data->gtRegNum, 0); + + BasicBlock* skipLabel = genCreateTempLabel(); + + emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED); + inst_JMP(jmpEqual, skipLabel); + // emit the call to the EE-helper that stops for GC (or other reasons) + + genEmitHelperCall(CORINFO_HELP_STOP_FOR_GC, 0, EA_UNKNOWN); + genDefineTempLabel(skipLabel); +} + +//------------------------------------------------------------------------ +// genCodeForStoreInd: Produce code for a GT_STOREIND node. +// +// Arguments: +// tree - the GT_STOREIND node +// +void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree) +{ + GenTree* data = tree->Data(); + GenTree* addr = tree->Addr(); + var_types targetType = tree->TypeGet(); + emitter* emit = getEmitter(); + + GCInfo::WriteBarrierForm writeBarrierForm = gcInfo.gcIsWriteBarrierCandidate(tree, data); + if (writeBarrierForm != GCInfo::WBF_NoBarrier) + { + // data and addr must be in registers. + // Consume both registers so that any copies of interfering + // registers are taken care of. + genConsumeOperands(tree); + +#if NOGC_WRITE_BARRIERS + // At this point, we should not have any interference. + // That is, 'data' must not be in REG_WRITE_BARRIER_DST_BYREF, + // as that is where 'addr' must go. + noway_assert(data->gtRegNum != REG_WRITE_BARRIER_DST_BYREF); + + // 'addr' goes into x14 (REG_WRITE_BARRIER_DST_BYREF) + if (addr->gtRegNum != REG_WRITE_BARRIER_DST_BYREF) + { + inst_RV_RV(INS_mov, REG_WRITE_BARRIER_DST_BYREF, addr->gtRegNum, addr->TypeGet()); + } + + // 'data' goes into x15 (REG_WRITE_BARRIER) + if (data->gtRegNum != REG_WRITE_BARRIER) + { + inst_RV_RV(INS_mov, REG_WRITE_BARRIER, data->gtRegNum, data->TypeGet()); + } +#else + // At this point, we should not have any interference. + // That is, 'data' must not be in REG_ARG_0, + // as that is where 'addr' must go. + noway_assert(data->gtRegNum != REG_ARG_0); + + // addr goes in REG_ARG_0 + if (addr->gtRegNum != REG_ARG_0) + { + inst_RV_RV(INS_mov, REG_ARG_0, addr->gtRegNum, addr->TypeGet()); + } + + // data goes in REG_ARG_1 + if (data->gtRegNum != REG_ARG_1) + { + inst_RV_RV(INS_mov, REG_ARG_1, data->gtRegNum, data->TypeGet()); + } +#endif // NOGC_WRITE_BARRIERS + + genGCWriteBarrier(tree, writeBarrierForm); + } + else // A normal store, not a WriteBarrier store + { + bool reverseOps = ((tree->gtFlags & GTF_REVERSE_OPS) != 0); + bool dataIsUnary = false; + GenTree* nonRMWsrc = nullptr; + // We must consume the operands in the proper execution order, + // so that liveness is updated appropriately. + if (!reverseOps) + { + genConsumeAddress(addr); + } + + if (!data->isContained()) + { + genConsumeRegs(data); + } + + if (reverseOps) + { + genConsumeAddress(addr); + } + + regNumber dataReg = REG_NA; + if (data->isContainedIntOrIImmed()) + { + assert(data->IsIntegralConst(0)); + dataReg = REG_ZR; + } + else // data is not contained, so evaluate it into a register + { + assert(!data->isContained()); + dataReg = data->gtRegNum; + } + + if (tree->gtFlags & GTF_IND_VOLATILE) + { + // issue a full memory barrier a before volatile StInd + instGen_MemoryBarrier(); + } + + emit->emitInsLoadStoreOp(ins_Store(targetType), emitTypeSize(tree), dataReg, tree); + } +} + +//------------------------------------------------------------------------ +// genCodeForSwap: Produce code for a GT_SWAP node. +// +// Arguments: +// tree - the GT_SWAP node +// +void CodeGen::genCodeForSwap(GenTreeOp* tree) +{ + // Swap is only supported for lclVar operands that are enregistered + // We do not consume or produce any registers. Both operands remain enregistered. + // However, the gc-ness may change. + assert(genIsRegCandidateLocal(tree->gtOp1) && genIsRegCandidateLocal(tree->gtOp2)); + + GenTreeLclVarCommon* lcl1 = tree->gtOp1->AsLclVarCommon(); + LclVarDsc* varDsc1 = &(compiler->lvaTable[lcl1->gtLclNum]); + var_types type1 = varDsc1->TypeGet(); + GenTreeLclVarCommon* lcl2 = tree->gtOp2->AsLclVarCommon(); + LclVarDsc* varDsc2 = &(compiler->lvaTable[lcl2->gtLclNum]); + var_types type2 = varDsc2->TypeGet(); + + // We must have both int or both fp regs + assert(!varTypeIsFloating(type1) || varTypeIsFloating(type2)); + + // FP swap is not yet implemented (and should have NYI'd in LSRA) + assert(!varTypeIsFloating(type1)); + + regNumber oldOp1Reg = lcl1->gtRegNum; + regMaskTP oldOp1RegMask = genRegMask(oldOp1Reg); + regNumber oldOp2Reg = lcl2->gtRegNum; + regMaskTP oldOp2RegMask = genRegMask(oldOp2Reg); + + // We don't call genUpdateVarReg because we don't have a tree node with the new register. + varDsc1->lvRegNum = oldOp2Reg; + varDsc2->lvRegNum = oldOp1Reg; + + // Do the xchg + emitAttr size = EA_PTRSIZE; + if (varTypeGCtype(type1) != varTypeGCtype(type2)) + { + // If the type specified to the emitter is a GC type, it will swap the GC-ness of the registers. + // Otherwise it will leave them alone, which is correct if they have the same GC-ness. + size = EA_GCREF; + } + + NYI("register swap"); + // inst_RV_RV(INS_xchg, oldOp1Reg, oldOp2Reg, TYP_I_IMPL, size); + + // Update the gcInfo. + // Manually remove these regs for the gc sets (mostly to avoid confusing duplicative dump output) + gcInfo.gcRegByrefSetCur &= ~(oldOp1RegMask | oldOp2RegMask); + gcInfo.gcRegGCrefSetCur &= ~(oldOp1RegMask | oldOp2RegMask); + + // gcMarkRegPtrVal will do the appropriate thing for non-gc types. + // It will also dump the updates. + gcInfo.gcMarkRegPtrVal(oldOp2Reg, type1); + gcInfo.gcMarkRegPtrVal(oldOp1Reg, type2); +} + //------------------------------------------------------------------------------------------- // genSetRegToCond: Set a register 'dstReg' to the appropriate one or zero value // corresponding to a binary Relational operator result. @@ -4335,6 +3965,104 @@ void CodeGen::genCkfinite(GenTreePtr treeNode) genProduceReg(treeNode); } +//------------------------------------------------------------------------ +// genCodeForCompare: Produce code for a GT_EQ/GT_NE/GT_LT/GT_LE/GT_GE/GT_GT node. +// +// Arguments: +// tree - the node +// +void CodeGen::genCodeForCompare(GenTreeOp* tree) +{ + regNumber targetReg = tree->gtRegNum; + emitter* emit = getEmitter(); + + // TODO-ARM64-CQ: Check if we can use the currently set flags. + // TODO-ARM64-CQ: Check for the case where we can simply transfer the carry bit to a register + // (signed < or >= where targetReg != REG_NA) + + GenTreePtr op1 = tree->gtOp1; + GenTreePtr op2 = tree->gtOp2; + var_types op1Type = op1->TypeGet(); + var_types op2Type = op2->TypeGet(); + + assert(!op1->isUsedFromMemory()); + assert(!op2->isUsedFromMemory()); + + genConsumeOperands(tree); + + emitAttr cmpSize = EA_UNKNOWN; + + if (varTypeIsFloating(op1Type)) + { + assert(varTypeIsFloating(op2Type)); + assert(!op1->isContained()); + assert(op1Type == op2Type); + cmpSize = EA_ATTR(genTypeSize(op1Type)); + + if (op2->IsIntegralConst(0)) + { + emit->emitIns_R_F(INS_fcmp, cmpSize, op1->gtRegNum, 0.0); + } + else + { + assert(!op2->isContained()); + emit->emitIns_R_R(INS_fcmp, cmpSize, op1->gtRegNum, op2->gtRegNum); + } + } + else + { + assert(!varTypeIsFloating(op2Type)); + // We don't support swapping op1 and op2 to generate cmp reg, imm + assert(!op1->isContainedIntOrIImmed()); + + // TODO-ARM64-CQ: the second register argument of a CMP can be sign/zero + // extended as part of the instruction (using "CMP (extended register)"). + // We should use that if possible, swapping operands + // (and reversing the condition) if necessary. + unsigned op1Size = genTypeSize(op1Type); + unsigned op2Size = genTypeSize(op2Type); + + if ((op1Size < 4) || (op1Size < op2Size)) + { + // We need to sign/zero extend op1 up to 32 or 64 bits. + instruction ins = ins_Move_Extend(op1Type, true); + inst_RV_RV(ins, op1->gtRegNum, op1->gtRegNum); + } + + if (!op2->isContainedIntOrIImmed()) + { + if ((op2Size < 4) || (op2Size < op1Size)) + { + // We need to sign/zero extend op2 up to 32 or 64 bits. + instruction ins = ins_Move_Extend(op2Type, true); + inst_RV_RV(ins, op2->gtRegNum, op2->gtRegNum); + } + } + cmpSize = EA_4BYTE; + if ((op1Size == EA_8BYTE) || (op2Size == EA_8BYTE)) + { + cmpSize = EA_8BYTE; + } + + if (op2->isContainedIntOrIImmed()) + { + GenTreeIntConCommon* intConst = op2->AsIntConCommon(); + emit->emitIns_R_I(INS_cmp, cmpSize, op1->gtRegNum, intConst->IconValue()); + } + else + { + emit->emitIns_R_R(INS_cmp, cmpSize, op1->gtRegNum, op2->gtRegNum); + } + } + + // Are we evaluating this into a register? + if (targetReg != REG_NA) + { + genSetRegToCond(targetReg, tree); + genProduceReg(tree); + } +} + int CodeGenInterface::genSPtoFPdelta() { int delta; @@ -4552,6 +4280,17 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_I(INS_ldr, EA_8BYTE, REG_R8, REG_R9, 1, INS_OPTS_POST_INDEX); theEmitter->emitIns_R_R_I(INS_ldr, EA_8BYTE, REG_R8, REG_R9, 1, INS_OPTS_PRE_INDEX); + // ldar/stlr Rt, [reg] + theEmitter->emitIns_R_R(INS_ldar, EA_8BYTE, REG_R9, REG_R8); + theEmitter->emitIns_R_R(INS_ldar, EA_4BYTE, REG_R7, REG_R10); + theEmitter->emitIns_R_R(INS_ldarb, EA_4BYTE, REG_R5, REG_R11); + theEmitter->emitIns_R_R(INS_ldarh, EA_4BYTE, REG_R5, REG_R12); + + theEmitter->emitIns_R_R(INS_stlr, EA_8BYTE, REG_R9, REG_R8); + theEmitter->emitIns_R_R(INS_stlr, EA_4BYTE, REG_R7, REG_R13); + theEmitter->emitIns_R_R(INS_stlrb, EA_4BYTE, REG_R5, REG_R14); + theEmitter->emitIns_R_R(INS_stlrh, EA_4BYTE, REG_R3, REG_R15); + #endif // ALL_ARM64_EMITTER_UNIT_TESTS #ifdef ALL_ARM64_EMITTER_UNIT_TESTS |