// Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XX XX XX Arm64 Code Generator XX XX XX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX */ #include "jitpch.h" #ifdef _MSC_VER #pragma hdrstop #endif #ifndef LEGACY_BACKEND // This file is ONLY used for the RyuJIT backend that uses the linear scan register allocator #ifdef _TARGET_ARM64_ #include "emit.h" #include "codegen.h" #include "lower.h" #include "gcinfo.h" #include "gcinfoencoder.h" /* XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XX XX XX Prolog / Epilog XX XX XX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX */ //------------------------------------------------------------------------ // genInstrWithConstant: we will typically generate one instruction // // ins reg1, reg2, imm // // However the imm might not fit as a directly encodable immediate, // when it doesn't fit we generate extra instruction(s) that sets up // the 'regTmp' with the proper immediate value. // // mov regTmp, imm // ins reg1, reg2, regTmp // // Arguments: // ins - instruction // attr - operation size and GC attribute // reg1, reg2 - first and second register operands // imm - immediate value (third operand when it fits) // tmpReg - temp register to use when the 'imm' doesn't fit // inUnwindRegion - true if we are in a prolog/epilog region with unwind codes // // Return Value: // returns true if the immediate was too large and tmpReg was used and modified. // bool CodeGen::genInstrWithConstant(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, ssize_t imm, regNumber tmpReg, bool inUnwindRegion /* = false */) { bool immFitsInIns = false; emitAttr size = EA_SIZE(attr); // reg1 is usually a dest register // reg2 is always source register assert(tmpReg != reg2); // regTmp can not match any source register switch (ins) { case INS_add: case INS_sub: if (imm < 0) { imm = -imm; ins = (ins == INS_add) ? INS_sub : INS_add; } immFitsInIns = emitter::emitIns_valid_imm_for_add(imm, size); break; case INS_strb: case INS_strh: case INS_str: // reg1 is a source register for store instructions assert(tmpReg != reg1); // regTmp can not match any source register immFitsInIns = emitter::emitIns_valid_imm_for_ldst_offset(imm, size); break; case INS_ldrsb: case INS_ldrsh: case INS_ldrsw: case INS_ldrb: case INS_ldrh: case INS_ldr: immFitsInIns = emitter::emitIns_valid_imm_for_ldst_offset(imm, size); break; default: assert(!"Unexpected instruction in genInstrWithConstant"); break; } if (immFitsInIns) { // generate a single instruction that encodes the immediate directly getEmitter()->emitIns_R_R_I(ins, attr, reg1, reg2, imm); } else { // caller can specify REG_NA for tmpReg, when it "knows" that the immediate will always fit assert(tmpReg != REG_NA); // generate two or more instructions // first we load the immediate into tmpReg instGen_Set_Reg_To_Imm(size, tmpReg, imm); regTracker.rsTrackRegTrash(tmpReg); // when we are in an unwind code region // we record the extra instructions using unwindPadding() if (inUnwindRegion) { compiler->unwindPadding(); } // generate the instruction using a three register encoding with the immediate in tmpReg getEmitter()->emitIns_R_R_R(ins, attr, reg1, reg2, tmpReg); } return immFitsInIns; } //------------------------------------------------------------------------ // genStackPointerAdjustment: add a specified constant value to the stack pointer in either the prolog // or the epilog. The unwind codes for the generated instructions are produced. An available temporary // register is required to be specified, in case the constant is too large to encode in an "add" // instruction (or "sub" instruction if we choose to use one), such that we need to load the constant // into a register first, before using it. // // Arguments: // spDelta - the value to add to SP (can be negative) // tmpReg - an available temporary register // pTmpRegIsZero - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'. // Otherwise, we don't touch it. // // Return Value: // None. void CodeGen::genStackPointerAdjustment(ssize_t spDelta, regNumber tmpReg, bool* pTmpRegIsZero) { // Even though INS_add is specified here, the encoder will choose either // an INS_add or an INS_sub and encode the immediate as a positive value // if (genInstrWithConstant(INS_add, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, spDelta, tmpReg, true)) { if (pTmpRegIsZero != nullptr) { *pTmpRegIsZero = false; } } // spDelta is negative in the prolog, positive in the epilog, but we always tell the unwind codes the positive // value. ssize_t spDeltaAbs = abs(spDelta); unsigned unwindSpDelta = (unsigned)spDeltaAbs; assert((ssize_t)unwindSpDelta == spDeltaAbs); // make sure that it fits in a unsigned compiler->unwindAllocStack(unwindSpDelta); } //------------------------------------------------------------------------ // genPrologSaveRegPair: Save a pair of general-purpose or floating-point/SIMD registers in a function or funclet // prolog. If possible, we use pre-indexed addressing to adjust SP and store the registers with a single instruction. // The caller must ensure that we can use the STP instruction, and that spOffset will be in the legal range for that // instruction. // // Arguments: // reg1 - First register of pair to save. // reg2 - Second register of pair to save. // spOffset - The offset from SP to store reg1 (must be positive or zero). // spDelta - If non-zero, the amount to add to SP before the register saves (must be negative or // zero). // lastSavedWasPreviousPair - True if the last prolog instruction was to save the previous register pair. This // allows us to emit the "save_next" unwind code. // tmpReg - An available temporary register. Needed for the case of large frames. // pTmpRegIsZero - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'. // Otherwise, we don't touch it. // // Return Value: // None. void CodeGen::genPrologSaveRegPair(regNumber reg1, regNumber reg2, int spOffset, int spDelta, bool lastSavedWasPreviousPair, regNumber tmpReg, bool* pTmpRegIsZero) { assert(spOffset >= 0); assert(spDelta <= 0); assert((spDelta % 16) == 0); // SP changes must be 16-byte aligned assert(genIsValidFloatReg(reg1) == genIsValidFloatReg(reg2)); // registers must be both general-purpose, or both // FP/SIMD bool needToSaveRegs = true; if (spDelta != 0) { if ((spOffset == 0) && (spDelta >= -512)) { // We can use pre-indexed addressing. // stp REG, REG + 1, [SP, #spDelta]! // 64-bit STP offset range: -512 to 504, multiple of 8. getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spDelta, INS_OPTS_PRE_INDEX); compiler->unwindSaveRegPairPreindexed(reg1, reg2, spDelta); needToSaveRegs = false; } else // (spDelta < -512)) { // We need to do SP adjustment separately from the store; we can't fold in a pre-indexed addressing and the // non-zero offset. // generate sub SP,SP,imm genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero); } } if (needToSaveRegs) { // stp REG, REG + 1, [SP, #offset] // 64-bit STP offset range: -512 to 504, multiple of 8. assert(spOffset <= 504); getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spOffset); if (lastSavedWasPreviousPair) { // This works as long as we've only been saving pairs, in order, and we've saved the previous one just // before this one. compiler->unwindSaveNext(); } else { compiler->unwindSaveRegPair(reg1, reg2, spOffset); } } } //------------------------------------------------------------------------ // genPrologSaveReg: Like genPrologSaveRegPair, but for a single register. Save a single general-purpose or // floating-point/SIMD register in a function or funclet prolog. Note that if we wish to change SP (i.e., spDelta != 0), // then spOffset must be 8. This is because otherwise we would create an alignment hole above the saved register, not // below it, which we currently don't support. This restriction could be loosened if the callers change to handle it // (and this function changes to support using pre-indexed STR addressing). The caller must ensure that we can use the // STR instruction, and that spOffset will be in the legal range for that instruction. // // Arguments: // reg1 - Register to save. // spOffset - The offset from SP to store reg1 (must be positive or zero). // spDelta - If non-zero, the amount to add to SP before the register saves (must be negative or // zero). // tmpReg - An available temporary register. Needed for the case of large frames. // pTmpRegIsZero - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'. // Otherwise, we don't touch it. // // Return Value: // None. void CodeGen::genPrologSaveReg(regNumber reg1, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero) { assert(spOffset >= 0); assert(spDelta <= 0); assert((spDelta % 16) == 0); // SP changes must be 16-byte aligned if (spDelta != 0) { // generate sub SP,SP,imm genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero); } // str REG, [SP, #offset] // 64-bit STR offset range: 0 to 32760, multiple of 8. getEmitter()->emitIns_R_R_I(INS_str, EA_PTRSIZE, reg1, REG_SPBASE, spOffset); compiler->unwindSaveReg(reg1, spOffset); } //------------------------------------------------------------------------ // genEpilogRestoreRegPair: This is the opposite of genPrologSaveRegPair(), run in the epilog instead of the prolog. // The stack pointer adjustment, if requested, is done after the register restore, using post-index addressing. // The caller must ensure that we can use the LDP instruction, and that spOffset will be in the legal range for that // instruction. // // Arguments: // reg1 - First register of pair to restore. // reg2 - Second register of pair to restore. // spOffset - The offset from SP to load reg1 (must be positive or zero). // spDelta - If non-zero, the amount to add to SP after the register restores (must be positive or // zero). // tmpReg - An available temporary register. Needed for the case of large frames. // pTmpRegIsZero - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'. // Otherwise, we don't touch it. // // Return Value: // None. void CodeGen::genEpilogRestoreRegPair( regNumber reg1, regNumber reg2, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero) { assert(spOffset >= 0); assert(spDelta >= 0); assert((spDelta % 16) == 0); // SP changes must be 16-byte aligned if (spDelta != 0) { if ((spOffset == 0) && (spDelta <= 504)) { // Fold the SP change into this instruction. // ldp reg1, reg2, [SP], #spDelta getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spDelta, INS_OPTS_POST_INDEX); compiler->unwindSaveRegPairPreindexed(reg1, reg2, -spDelta); } else // (spDelta > 504)) { // Can't fold in the SP change; need to use a separate ADD instruction. // ldp reg1, reg2, [SP, #offset] getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spOffset); compiler->unwindSaveRegPair(reg1, reg2, spOffset); // generate add SP,SP,imm genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero); } } else { // ldp reg1, reg2, [SP, #offset] getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spOffset); compiler->unwindSaveRegPair(reg1, reg2, spOffset); } } //------------------------------------------------------------------------ // genEpilogRestoreReg: The opposite of genPrologSaveReg(), run in the epilog instead of the prolog. // // Arguments: // reg1 - Register to restore. // spOffset - The offset from SP to restore reg1 (must be positive or zero). // spDelta - If non-zero, the amount to add to SP after the register restores (must be positive or // zero). // tmpReg - An available temporary register. Needed for the case of large frames. // pTmpRegIsZero - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'. // Otherwise, we don't touch it. // // Return Value: // None. void CodeGen::genEpilogRestoreReg(regNumber reg1, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero) { assert(spOffset >= 0); assert(spDelta >= 0); assert((spDelta % 16) == 0); // SP changes must be 16-byte aligned // ldr reg1, [SP, #offset] getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, reg1, REG_SPBASE, spOffset); compiler->unwindSaveReg(reg1, spOffset); if (spDelta != 0) { // generate add SP,SP,imm genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero); } } //------------------------------------------------------------------------ // genSaveCalleeSavedRegistersHelp: Save the callee-saved registers in 'regsToSaveMask' to the stack frame // in the function or funclet prolog. The save set does not contain FP, since that is // guaranteed to be saved separately, so we can set up chaining. We can only use the instructions // that are allowed by the unwind codes. Integer registers are stored at lower addresses, // FP/SIMD registers are stored at higher addresses. There are no gaps. The caller ensures that // there is enough space on the frame to store these registers, and that the store instructions // we need to use (STR or STP) are encodable with the stack-pointer immediate offsets we need to // use. Note that the save set can contain LR if this is a frame without a frame pointer, in // which case LR is saved along with the other callee-saved registers. The caller can tell us // to fold in a stack pointer adjustment, which we will do with the first instruction. Note that // the stack pointer adjustment must be by a multiple of 16 to preserve the invariant that the // stack pointer is always 16 byte aligned. If we are saving an odd number of callee-saved // registers, though, we will have an empty aligment slot somewhere. It turns out we will put // it below (at a lower address) the callee-saved registers, as that is currently how we // do frame layout. This means that the first stack offset will be 8 and the stack pointer // adjustment must be done by a SUB, and not folded in to a pre-indexed store. // // Arguments: // regsToSaveMask - The mask of callee-saved registers to save. If empty, this function does nothing. // lowestCalleeSavedOffset - The offset from SP that is the beginning of the callee-saved register area. Note that // if non-zero spDelta, then this is the offset of the first save *after* that // SP adjustment. // spDelta - If non-zero, the amount to add to SP before the register saves (must be negative or // zero). // // Return Value: // None. void CodeGen::genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowestCalleeSavedOffset, int spDelta) { assert(spDelta <= 0); unsigned regsToSaveCount = genCountBits(regsToSaveMask); if (regsToSaveCount == 0) { if (spDelta != 0) { // Currently this is the case for varargs only // whose size is MAX_REG_ARG * REGSIZE_BYTES = 64 bytes. genStackPointerAdjustment(spDelta, REG_NA, nullptr); } return; } assert((spDelta % 16) == 0); assert((regsToSaveMask & RBM_FP) == 0); // we never save FP here assert(regsToSaveCount <= genCountBits(RBM_CALLEE_SAVED | RBM_LR)); // We also save LR, even though it is not in // RBM_CALLEE_SAVED. regMaskTP maskSaveRegsFloat = regsToSaveMask & RBM_ALLFLOAT; regMaskTP maskSaveRegsInt = regsToSaveMask & ~maskSaveRegsFloat; int spOffset = lowestCalleeSavedOffset; // this is the offset *after* we change SP. unsigned intRegsToSaveCount = genCountBits(maskSaveRegsInt); unsigned floatRegsToSaveCount = genCountBits(maskSaveRegsFloat); bool isPairSave = false; #ifdef DEBUG bool isRegsToSaveCountOdd = ((intRegsToSaveCount + floatRegsToSaveCount) % 2 != 0); #endif // Save the integer registers bool lastSavedWasPair = false; while (maskSaveRegsInt != RBM_NONE) { // If this is the first store that needs to change SP (spDelta != 0), // then the offset must be 8 to account for alignment for the odd count // or it must be 0 for the even count. assert((spDelta == 0) || (isRegsToSaveCountOdd && spOffset == REGSIZE_BYTES) || (!isRegsToSaveCountOdd && spOffset == 0)); isPairSave = (intRegsToSaveCount >= 2); regMaskTP reg1Mask = genFindLowestBit(maskSaveRegsInt); regNumber reg1 = genRegNumFromMask(reg1Mask); maskSaveRegsInt &= ~reg1Mask; intRegsToSaveCount -= 1; if (isPairSave) { // We can use a STP instruction. regMaskTP reg2Mask = genFindLowestBit(maskSaveRegsInt); regNumber reg2 = genRegNumFromMask(reg2Mask); assert((reg2 == REG_NEXT(reg1)) || (reg2 == REG_LR)); maskSaveRegsInt &= ~reg2Mask; intRegsToSaveCount -= 1; genPrologSaveRegPair(reg1, reg2, spOffset, spDelta, lastSavedWasPair, REG_IP0, nullptr); // TODO-ARM64-CQ: this code works in the prolog, but it's a bit weird to think about "next" when generating // this epilog, to get the codes to match. Turn this off until that is better understood. // lastSavedWasPair = true; spOffset += 2 * REGSIZE_BYTES; } else { // No register pair; we use a STR instruction. genPrologSaveReg(reg1, spOffset, spDelta, REG_IP0, nullptr); lastSavedWasPair = false; spOffset += REGSIZE_BYTES; } spDelta = 0; // We've now changed SP already, if necessary; don't do it again. } assert(intRegsToSaveCount == 0); // Save the floating-point/SIMD registers lastSavedWasPair = false; while (maskSaveRegsFloat != RBM_NONE) { // If this is the first store that needs to change SP (spDelta != 0), // then the offset must be 8 to account for alignment for the odd count // or it must be 0 for the even count. assert((spDelta == 0) || (isRegsToSaveCountOdd && spOffset == REGSIZE_BYTES) || (!isRegsToSaveCountOdd && spOffset == 0)); isPairSave = (floatRegsToSaveCount >= 2); regMaskTP reg1Mask = genFindLowestBit(maskSaveRegsFloat); regNumber reg1 = genRegNumFromMask(reg1Mask); maskSaveRegsFloat &= ~reg1Mask; floatRegsToSaveCount -= 1; if (isPairSave) { // We can use a STP instruction. regMaskTP reg2Mask = genFindLowestBit(maskSaveRegsFloat); regNumber reg2 = genRegNumFromMask(reg2Mask); assert(reg2 == REG_NEXT(reg1)); maskSaveRegsFloat &= ~reg2Mask; floatRegsToSaveCount -= 1; genPrologSaveRegPair(reg1, reg2, spOffset, spDelta, lastSavedWasPair, REG_IP0, nullptr); // TODO-ARM64-CQ: this code works in the prolog, but it's a bit weird to think about "next" when generating // this epilog, to get the codes to match. Turn this off until that is better understood. // lastSavedWasPair = true; spOffset += 2 * FPSAVE_REGSIZE_BYTES; } else { // No register pair; we use a STR instruction. genPrologSaveReg(reg1, spOffset, spDelta, REG_IP0, nullptr); lastSavedWasPair = false; spOffset += FPSAVE_REGSIZE_BYTES; } spDelta = 0; // We've now changed SP already, if necessary; don't do it again. } assert(floatRegsToSaveCount == 0); } //------------------------------------------------------------------------ // genRestoreCalleeSavedRegistersHelp: Restore the callee-saved registers in 'regsToRestoreMask' from the stack frame // in the function or funclet epilog. This exactly reverses the actions of genSaveCalleeSavedRegistersHelp(). // // Arguments: // regsToRestoreMask - The mask of callee-saved registers to restore. If empty, this function does nothing. // lowestCalleeSavedOffset - The offset from SP that is the beginning of the callee-saved register area. // spDelta - If non-zero, the amount to add to SP after the register restores (must be positive or // zero). // // Here's an example restore sequence: // ldp x27, x28, [sp,#96] // ldp x25, x26, [sp,#80] // ldp x23, x24, [sp,#64] // ldp x21, x22, [sp,#48] // ldp x19, x20, [sp,#32] // // For the case of non-zero spDelta, we assume the base of the callee-save registers to restore is at SP, and // the last restore adjusts SP by the specified amount. For example: // ldp x27, x28, [sp,#64] // ldp x25, x26, [sp,#48] // ldp x23, x24, [sp,#32] // ldp x21, x22, [sp,#16] // ldp x19, x20, [sp], #80 // // Note you call the unwind functions specifying the prolog operation that is being un-done. So, for example, when // generating a post-indexed load, you call the unwind function for specifying the corresponding preindexed store. // // Return Value: // None. void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, int lowestCalleeSavedOffset, int spDelta) { assert(spDelta >= 0); unsigned regsToRestoreCount = genCountBits(regsToRestoreMask); if (regsToRestoreCount == 0) { if (spDelta != 0) { // Currently this is the case for varargs only // whose size is MAX_REG_ARG * REGSIZE_BYTES = 64 bytes. genStackPointerAdjustment(spDelta, REG_NA, nullptr); } return; } assert((spDelta % 16) == 0); assert((regsToRestoreMask & RBM_FP) == 0); // we never restore FP here assert(regsToRestoreCount <= genCountBits(RBM_CALLEE_SAVED | RBM_LR)); // We also save LR, even though it is not in RBM_CALLEE_SAVED. regMaskTP maskRestoreRegsFloat = regsToRestoreMask & RBM_ALLFLOAT; regMaskTP maskRestoreRegsInt = regsToRestoreMask & ~maskRestoreRegsFloat; assert(REGSIZE_BYTES == FPSAVE_REGSIZE_BYTES); int spOffset = lowestCalleeSavedOffset + regsToRestoreCount * REGSIZE_BYTES; // Point past the end, to start. We // predecrement to find the offset to // load from. unsigned floatRegsToRestoreCount = genCountBits(maskRestoreRegsFloat); unsigned intRegsToRestoreCount = genCountBits(maskRestoreRegsInt); int stackDelta = 0; bool isPairRestore = false; bool thisIsTheLastRestoreInstruction = false; #ifdef DEBUG bool isRegsToRestoreCountOdd = ((floatRegsToRestoreCount + intRegsToRestoreCount) % 2 != 0); #endif // We want to restore in the opposite order we saved, so the unwind codes match. Be careful to handle odd numbers of // callee-saved registers properly. // Restore the floating-point/SIMD registers while (maskRestoreRegsFloat != RBM_NONE) { thisIsTheLastRestoreInstruction = (floatRegsToRestoreCount <= 2) && (maskRestoreRegsInt == RBM_NONE); isPairRestore = (floatRegsToRestoreCount % 2) == 0; // Update stack delta only if it is the last restore (the first save). if (thisIsTheLastRestoreInstruction) { assert(stackDelta == 0); stackDelta = spDelta; } // Update stack offset. if (isPairRestore) { spOffset -= 2 * FPSAVE_REGSIZE_BYTES; } else { spOffset -= FPSAVE_REGSIZE_BYTES; } // If this is the last restore (the first save) that needs to change SP (stackDelta != 0), // then the offset must be 8 to account for alignment for the odd count // or it must be 0 for the even count. assert((stackDelta == 0) || (isRegsToRestoreCountOdd && spOffset == FPSAVE_REGSIZE_BYTES) || (!isRegsToRestoreCountOdd && spOffset == 0)); regMaskTP reg2Mask = genFindHighestBit(maskRestoreRegsFloat); regNumber reg2 = genRegNumFromMask(reg2Mask); maskRestoreRegsFloat &= ~reg2Mask; floatRegsToRestoreCount -= 1; if (isPairRestore) { regMaskTP reg1Mask = genFindHighestBit(maskRestoreRegsFloat); regNumber reg1 = genRegNumFromMask(reg1Mask); maskRestoreRegsFloat &= ~reg1Mask; floatRegsToRestoreCount -= 1; genEpilogRestoreRegPair(reg1, reg2, spOffset, stackDelta, REG_IP0, nullptr); } else { genEpilogRestoreReg(reg2, spOffset, stackDelta, REG_IP0, nullptr); } } assert(floatRegsToRestoreCount == 0); // Restore the integer registers while (maskRestoreRegsInt != RBM_NONE) { thisIsTheLastRestoreInstruction = (intRegsToRestoreCount <= 2); isPairRestore = (intRegsToRestoreCount % 2) == 0; // Update stack delta only if it is the last restore (the first save). if (thisIsTheLastRestoreInstruction) { assert(stackDelta == 0); stackDelta = spDelta; } // Update stack offset. spOffset -= REGSIZE_BYTES; if (isPairRestore) { spOffset -= REGSIZE_BYTES; } // If this is the last restore (the first save) that needs to change SP (stackDelta != 0), // then the offset must be 8 to account for alignment for the odd count // or it must be 0 for the even count. assert((stackDelta == 0) || (isRegsToRestoreCountOdd && spOffset == REGSIZE_BYTES) || (!isRegsToRestoreCountOdd && spOffset == 0)); regMaskTP reg2Mask = genFindHighestBit(maskRestoreRegsInt); regNumber reg2 = genRegNumFromMask(reg2Mask); maskRestoreRegsInt &= ~reg2Mask; intRegsToRestoreCount -= 1; if (isPairRestore) { regMaskTP reg1Mask = genFindHighestBit(maskRestoreRegsInt); regNumber reg1 = genRegNumFromMask(reg1Mask); maskRestoreRegsInt &= ~reg1Mask; intRegsToRestoreCount -= 1; genEpilogRestoreRegPair(reg1, reg2, spOffset, stackDelta, REG_IP0, nullptr); } else { genEpilogRestoreReg(reg2, spOffset, stackDelta, REG_IP0, nullptr); } } assert(intRegsToRestoreCount == 0); } // clang-format off /***************************************************************************** * * Generates code for an EH funclet prolog. * * Funclets have the following incoming arguments: * * catch: x0 = the exception object that was caught (see GT_CATCH_ARG) * filter: x0 = the exception object to filter (see GT_CATCH_ARG), x1 = CallerSP of the containing function * finally/fault: none * * Funclets set the following registers on exit: * * catch: x0 = the address at which execution should resume (see BBJ_EHCATCHRET) * filter: x0 = non-zero if the handler should handle the exception, zero otherwise (see GT_RETFILT) * finally/fault: none * * The ARM64 funclet prolog sequence is one of the following (Note: #framesz is total funclet frame size, * including everything; #outsz is outgoing argument space. #framesz must be a multiple of 16): * * Frame type 1: * For #outsz == 0 and #framesz <= 512: * stp fp,lr,[sp,-#framesz]! ; establish the frame, save FP/LR * stp x19,x20,[sp,#xxx] ; save callee-saved registers, as necessary * * The funclet frame is thus: * * | | * |-----------------------| * | incoming | * | arguments | * +=======================+ <---- Caller's SP * |Callee saved registers | // multiple of 8 bytes * |-----------------------| * | PSP slot | // 8 bytes (omitted in CoreRT ABI) * |-----------------------| * ~ alignment padding ~ // To make the whole frame 16 byte aligned. * |-----------------------| * | Saved FP, LR | // 16 bytes * |-----------------------| <---- Ambient SP * | | | * ~ | Stack grows ~ * | | downward | * V * * Frame type 2: * For #outsz != 0 and #framesz <= 512: * sub sp,sp,#framesz ; establish the frame * stp fp,lr,[sp,#outsz] ; save FP/LR. * stp x19,x20,[sp,#xxx] ; save callee-saved registers, as necessary * * The funclet frame is thus: * * | | * |-----------------------| * | incoming | * | arguments | * +=======================+ <---- Caller's SP * |Callee saved registers | // multiple of 8 bytes * |-----------------------| * | PSP slot | // 8 bytes (omitted in CoreRT ABI) * |-----------------------| * ~ alignment padding ~ // To make the whole frame 16 byte aligned. * |-----------------------| * | Saved FP, LR | // 16 bytes * |-----------------------| * | Outgoing arg space | // multiple of 8 bytes * |-----------------------| <---- Ambient SP * | | | * ~ | Stack grows ~ * | | downward | * V * * Frame type 3: * For #framesz > 512: * stp fp,lr,[sp,- (#framesz - #outsz)]! ; establish the frame, save FP/LR: note that it is guaranteed here that (#framesz - #outsz) <= 168 * stp x19,x20,[sp,#xxx] ; save callee-saved registers, as necessary * sub sp,sp,#outsz ; create space for outgoing argument space * * The funclet frame is thus: * * | | * |-----------------------| * | incoming | * | arguments | * +=======================+ <---- Caller's SP * |Callee saved registers | // multiple of 8 bytes * |-----------------------| * | PSP slot | // 8 bytes (omitted in CoreRT ABI) * |-----------------------| * ~ alignment padding ~ // To make the first SP subtraction 16 byte aligned * |-----------------------| * | Saved FP, LR | // 16 bytes * |-----------------------| * ~ alignment padding ~ // To make the whole frame 16 byte aligned (specifically, to 16-byte align the outgoing argument space). * |-----------------------| * | Outgoing arg space | // multiple of 8 bytes * |-----------------------| <---- Ambient SP * | | | * ~ | Stack grows ~ * | | downward | * V * * Both #1 and #2 only change SP once. That means that there will be a maximum of one alignment slot needed. For the general case, #3, * it is possible that we will need to add alignment to both changes to SP, leading to 16 bytes of alignment. Remember that the stack * pointer needs to be 16 byte aligned at all times. The size of the PSP slot plus callee-saved registers space is a maximum of 168 bytes: * (1 PSP slot + 12 integer registers + 8 FP/SIMD registers) * 8 bytes. The outgoing argument size, however, can be very large, if we call a * function that takes a large number of arguments (note that we currently use the same outgoing argument space size in the funclet as for the main * function, even if the funclet doesn't have any calls, or has a much smaller, or larger, maximum number of outgoing arguments for any call). * In that case, we need to 16-byte align the initial change to SP, before saving off the callee-saved registers and establishing the PSPsym, * so we can use the limited immediate offset encodings we have available, before doing another 16-byte aligned SP adjustment to create the * outgoing argument space. Both changes to SP might need to add alignment padding. * * Note that in all cases, the PSPSym is in exactly the same position with respect to Caller-SP, and that location is the same relative to Caller-SP * as in the main function. * * ; After this header, fill the PSP slot, for use by the VM (it gets reported with the GC info), or by code generation of nested filters. * ; This is not part of the "OS prolog"; it has no associated unwind data, and is not reversed in the funclet epilog. * * if (this is a filter funclet) * { * // x1 on entry to a filter funclet is CallerSP of the containing function: * // either the main function, or the funclet for a handler that this filter is dynamically nested within. * // Note that a filter can be dynamically nested within a funclet even if it is not statically within * // a funclet. Consider: * // * // try { * // try { * // throw new Exception(); * // } catch(Exception) { * // throw new Exception(); // The exception thrown here ... * // } * // } filter { // ... will be processed here, while the "catch" funclet frame is still on the stack * // } filter-handler { * // } * // * // Because of this, we need a PSP in the main function anytime a filter funclet doesn't know whether the enclosing frame will * // be a funclet or main function. We won't know any time there is a filter protecting nested EH. To simplify, we just always * // create a main function PSP for any function with a filter. * * ldr x1, [x1, #CallerSP_to_PSP_slot_delta] ; Load the CallerSP of the main function (stored in the PSP of the dynamically containing funclet or function) * str x1, [sp, #SP_to_PSP_slot_delta] ; store the PSP * add fp, x1, #Function_CallerSP_to_FP_delta ; re-establish the frame pointer * } * else * { * // This is NOT a filter funclet. The VM re-establishes the frame pointer on entry. * // TODO-ARM64-CQ: if VM set x1 to CallerSP on entry, like for filters, we could save an instruction. * * add x3, fp, #Function_FP_to_CallerSP_delta ; compute the CallerSP, given the frame pointer. x3 is scratch. * str x3, [sp, #SP_to_PSP_slot_delta] ; store the PSP * } * * An example epilog sequence is then: * * add sp,sp,#outsz ; if any outgoing argument space * ... ; restore callee-saved registers * ldp x19,x20,[sp,#xxx] * ldp fp,lr,[sp],#framesz * ret lr * * The funclet frame is thus: * * | | * |-----------------------| * | incoming | * | arguments | * +=======================+ <---- Caller's SP * |Callee saved registers | // multiple of 8 bytes * |-----------------------| * | PSP slot | // 8 bytes (omitted in CoreRT ABI) * |-----------------------| * | Saved FP, LR | // 16 bytes * |-----------------------| * ~ alignment padding ~ // To make the whole frame 16 byte aligned. * |-----------------------| * | Outgoing arg space | // multiple of 8 bytes * |-----------------------| <---- Ambient SP * | | | * ~ | Stack grows ~ * | | downward | * V */ // clang-format on void CodeGen::genFuncletProlog(BasicBlock* block) { #ifdef DEBUG if (verbose) printf("*************** In genFuncletProlog()\n"); #endif assert(block != NULL); assert(block->bbFlags & BBF_FUNCLET_BEG); ScopedSetVariable _setGeneratingProlog(&compiler->compGeneratingProlog, true); gcInfo.gcResetForBB(); compiler->unwindBegProlog(); regMaskTP maskSaveRegsFloat = genFuncletInfo.fiSaveRegs & RBM_ALLFLOAT; regMaskTP maskSaveRegsInt = genFuncletInfo.fiSaveRegs & ~maskSaveRegsFloat; // Funclets must always save LR and FP, since when we have funclets we must have an FP frame. assert((maskSaveRegsInt & RBM_LR) != 0); assert((maskSaveRegsInt & RBM_FP) != 0); bool isFilter = (block->bbCatchTyp == BBCT_FILTER); regMaskTP maskArgRegsLiveIn; if (isFilter) { maskArgRegsLiveIn = RBM_R0 | RBM_R1; } else if ((block->bbCatchTyp == BBCT_FINALLY) || (block->bbCatchTyp == BBCT_FAULT)) { maskArgRegsLiveIn = RBM_NONE; } else { maskArgRegsLiveIn = RBM_R0; } int lowestCalleeSavedOffset = genFuncletInfo.fiSP_to_CalleeSave_delta; if (genFuncletInfo.fiFrameType == 1) { getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, genFuncletInfo.fiSpDelta1, INS_OPTS_PRE_INDEX); compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, genFuncletInfo.fiSpDelta1); assert(genFuncletInfo.fiSpDelta2 == 0); assert(genFuncletInfo.fiSP_to_FPLR_save_delta == 0); } else if (genFuncletInfo.fiFrameType == 2) { // fiFrameType==2 constraints: assert(genFuncletInfo.fiSpDelta1 < 0); assert(genFuncletInfo.fiSpDelta1 >= -512); // generate sub SP,SP,imm genStackPointerAdjustment(genFuncletInfo.fiSpDelta1, REG_NA, nullptr); assert(genFuncletInfo.fiSpDelta2 == 0); getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, genFuncletInfo.fiSP_to_FPLR_save_delta); compiler->unwindSaveRegPair(REG_FP, REG_LR, genFuncletInfo.fiSP_to_FPLR_save_delta); } else { assert(genFuncletInfo.fiFrameType == 3); getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, genFuncletInfo.fiSpDelta1, INS_OPTS_PRE_INDEX); compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, genFuncletInfo.fiSpDelta1); lowestCalleeSavedOffset += genFuncletInfo.fiSpDelta2; // We haven't done the second adjustment of SP yet. } maskSaveRegsInt &= ~(RBM_LR | RBM_FP); // We've saved these now genSaveCalleeSavedRegistersHelp(maskSaveRegsInt | maskSaveRegsFloat, lowestCalleeSavedOffset, 0); if (genFuncletInfo.fiFrameType == 3) { // Note that genFuncletInfo.fiSpDelta2 is always a negative value assert(genFuncletInfo.fiSpDelta2 < 0); // generate sub SP,SP,imm genStackPointerAdjustment(genFuncletInfo.fiSpDelta2, REG_R2, nullptr); } // This is the end of the OS-reported prolog for purposes of unwinding compiler->unwindEndProlog(); // If there is no PSPSym (CoreRT ABI), we are done. if (compiler->lvaPSPSym == BAD_VAR_NUM) { return; } if (isFilter) { // This is the first block of a filter // Note that register x1 = CallerSP of the containing function // X1 is overwritten by the first Load (new callerSP) // X2 is scratch when we have a large constant offset // Load the CallerSP of the main function (stored in the PSP of the dynamically containing funclet or function) genInstrWithConstant(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_R1, REG_R1, genFuncletInfo.fiCallerSP_to_PSP_slot_delta, REG_R2, false); regTracker.rsTrackRegTrash(REG_R1); // Store the PSP value (aka CallerSP) genInstrWithConstant(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_R1, REG_SPBASE, genFuncletInfo.fiSP_to_PSP_slot_delta, REG_R2, false); // re-establish the frame pointer genInstrWithConstant(INS_add, EA_PTRSIZE, REG_FPBASE, REG_R1, genFuncletInfo.fiFunction_CallerSP_to_FP_delta, REG_R2, false); } else // This is a non-filter funclet { // X3 is scratch, X2 can also become scratch // compute the CallerSP, given the frame pointer. x3 is scratch. genInstrWithConstant(INS_add, EA_PTRSIZE, REG_R3, REG_FPBASE, -genFuncletInfo.fiFunction_CallerSP_to_FP_delta, REG_R2, false); regTracker.rsTrackRegTrash(REG_R3); genInstrWithConstant(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_R3, REG_SPBASE, genFuncletInfo.fiSP_to_PSP_slot_delta, REG_R2, false); } } /***************************************************************************** * * Generates code for an EH funclet epilog. */ void CodeGen::genFuncletEpilog() { #ifdef DEBUG if (verbose) printf("*************** In genFuncletEpilog()\n"); #endif ScopedSetVariable _setGeneratingEpilog(&compiler->compGeneratingEpilog, true); bool unwindStarted = false; if (!unwindStarted) { // We can delay this until we know we'll generate an unwindable instruction, if necessary. compiler->unwindBegEpilog(); unwindStarted = true; } regMaskTP maskRestoreRegsFloat = genFuncletInfo.fiSaveRegs & RBM_ALLFLOAT; regMaskTP maskRestoreRegsInt = genFuncletInfo.fiSaveRegs & ~maskRestoreRegsFloat; // Funclets must always save LR and FP, since when we have funclets we must have an FP frame. assert((maskRestoreRegsInt & RBM_LR) != 0); assert((maskRestoreRegsInt & RBM_FP) != 0); maskRestoreRegsInt &= ~(RBM_LR | RBM_FP); // We restore FP/LR at the end int lowestCalleeSavedOffset = genFuncletInfo.fiSP_to_CalleeSave_delta; if (genFuncletInfo.fiFrameType == 3) { // Note that genFuncletInfo.fiSpDelta2 is always a negative value assert(genFuncletInfo.fiSpDelta2 < 0); // generate add SP,SP,imm genStackPointerAdjustment(-genFuncletInfo.fiSpDelta2, REG_R2, nullptr); lowestCalleeSavedOffset += genFuncletInfo.fiSpDelta2; } regMaskTP regsToRestoreMask = maskRestoreRegsInt | maskRestoreRegsFloat; genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, lowestCalleeSavedOffset, 0); if (genFuncletInfo.fiFrameType == 1) { getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, -genFuncletInfo.fiSpDelta1, INS_OPTS_POST_INDEX); compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, genFuncletInfo.fiSpDelta1); assert(genFuncletInfo.fiSpDelta2 == 0); assert(genFuncletInfo.fiSP_to_FPLR_save_delta == 0); } else if (genFuncletInfo.fiFrameType == 2) { getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, genFuncletInfo.fiSP_to_FPLR_save_delta); compiler->unwindSaveRegPair(REG_FP, REG_LR, genFuncletInfo.fiSP_to_FPLR_save_delta); // fiFrameType==2 constraints: assert(genFuncletInfo.fiSpDelta1 < 0); assert(genFuncletInfo.fiSpDelta1 >= -512); // generate add SP,SP,imm genStackPointerAdjustment(-genFuncletInfo.fiSpDelta1, REG_NA, nullptr); assert(genFuncletInfo.fiSpDelta2 == 0); } else { assert(genFuncletInfo.fiFrameType == 3); getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, -genFuncletInfo.fiSpDelta1, INS_OPTS_POST_INDEX); compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, genFuncletInfo.fiSpDelta1); } inst_RV(INS_ret, REG_LR, TYP_I_IMPL); compiler->unwindReturn(REG_LR); compiler->unwindEndEpilog(); } /***************************************************************************** * * Capture the information used to generate the funclet prologs and epilogs. * Note that all funclet prologs are identical, and all funclet epilogs are * identical (per type: filters are identical, and non-filters are identical). * Thus, we compute the data used for these just once. * * See genFuncletProlog() for more information about the prolog/epilog sequences. */ void CodeGen::genCaptureFuncletPrologEpilogInfo() { if (!compiler->ehAnyFunclets()) return; assert(isFramePointerUsed()); assert(compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT); // The frame size and offsets must be // finalized genFuncletInfo.fiFunction_CallerSP_to_FP_delta = genCallerSPtoFPdelta(); regMaskTP rsMaskSaveRegs = regSet.rsMaskCalleeSaved; assert((rsMaskSaveRegs & RBM_LR) != 0); assert((rsMaskSaveRegs & RBM_FP) != 0); unsigned PSPSize = (compiler->lvaPSPSym != BAD_VAR_NUM) ? REGSIZE_BYTES : 0; unsigned saveRegsCount = genCountBits(rsMaskSaveRegs); unsigned saveRegsPlusPSPSize = saveRegsCount * REGSIZE_BYTES + PSPSize; if (compiler->info.compIsVarArgs) { // For varargs we always save all of the integer register arguments // so that they are contiguous with the incoming stack arguments. saveRegsPlusPSPSize += MAX_REG_ARG * REGSIZE_BYTES; } unsigned saveRegsPlusPSPSizeAligned = (unsigned)roundUp(saveRegsPlusPSPSize, STACK_ALIGN); assert(compiler->lvaOutgoingArgSpaceSize % REGSIZE_BYTES == 0); unsigned outgoingArgSpaceAligned = (unsigned)roundUp(compiler->lvaOutgoingArgSpaceSize, STACK_ALIGN); unsigned maxFuncletFrameSizeAligned = saveRegsPlusPSPSizeAligned + outgoingArgSpaceAligned; assert((maxFuncletFrameSizeAligned % STACK_ALIGN) == 0); int SP_to_FPLR_save_delta; int SP_to_PSP_slot_delta; int CallerSP_to_PSP_slot_delta; if (maxFuncletFrameSizeAligned <= 512) { unsigned funcletFrameSize = saveRegsPlusPSPSize + compiler->lvaOutgoingArgSpaceSize; unsigned funcletFrameSizeAligned = (unsigned)roundUp(funcletFrameSize, STACK_ALIGN); assert(funcletFrameSizeAligned <= maxFuncletFrameSizeAligned); unsigned funcletFrameAlignmentPad = funcletFrameSizeAligned - funcletFrameSize; assert((funcletFrameAlignmentPad == 0) || (funcletFrameAlignmentPad == REGSIZE_BYTES)); SP_to_FPLR_save_delta = compiler->lvaOutgoingArgSpaceSize; SP_to_PSP_slot_delta = SP_to_FPLR_save_delta + 2 /* FP, LR */ * REGSIZE_BYTES + funcletFrameAlignmentPad; CallerSP_to_PSP_slot_delta = -(int)(saveRegsPlusPSPSize - 2 /* FP, LR */ * REGSIZE_BYTES); if (compiler->lvaOutgoingArgSpaceSize == 0) { genFuncletInfo.fiFrameType = 1; } else { genFuncletInfo.fiFrameType = 2; } genFuncletInfo.fiSpDelta1 = -(int)funcletFrameSizeAligned; genFuncletInfo.fiSpDelta2 = 0; assert(genFuncletInfo.fiSpDelta1 + genFuncletInfo.fiSpDelta2 == -(int)funcletFrameSizeAligned); } else { unsigned saveRegsPlusPSPAlignmentPad = saveRegsPlusPSPSizeAligned - saveRegsPlusPSPSize; assert((saveRegsPlusPSPAlignmentPad == 0) || (saveRegsPlusPSPAlignmentPad == REGSIZE_BYTES)); SP_to_FPLR_save_delta = outgoingArgSpaceAligned; SP_to_PSP_slot_delta = SP_to_FPLR_save_delta + 2 /* FP, LR */ * REGSIZE_BYTES + saveRegsPlusPSPAlignmentPad; CallerSP_to_PSP_slot_delta = -(int)(saveRegsPlusPSPSizeAligned - 2 /* FP, LR */ * REGSIZE_BYTES - saveRegsPlusPSPAlignmentPad); genFuncletInfo.fiFrameType = 3; genFuncletInfo.fiSpDelta1 = -(int)saveRegsPlusPSPSizeAligned; genFuncletInfo.fiSpDelta2 = -(int)outgoingArgSpaceAligned; assert(genFuncletInfo.fiSpDelta1 + genFuncletInfo.fiSpDelta2 == -(int)maxFuncletFrameSizeAligned); } /* Now save it for future use */ genFuncletInfo.fiSaveRegs = rsMaskSaveRegs; genFuncletInfo.fiSP_to_FPLR_save_delta = SP_to_FPLR_save_delta; genFuncletInfo.fiSP_to_PSP_slot_delta = SP_to_PSP_slot_delta; genFuncletInfo.fiSP_to_CalleeSave_delta = SP_to_PSP_slot_delta + REGSIZE_BYTES; genFuncletInfo.fiCallerSP_to_PSP_slot_delta = CallerSP_to_PSP_slot_delta; #ifdef DEBUG if (verbose) { printf("\n"); printf("Funclet prolog / epilog info\n"); printf(" Save regs: "); dspRegMask(genFuncletInfo.fiSaveRegs); printf("\n"); printf(" Function CallerSP-to-FP delta: %d\n", genFuncletInfo.fiFunction_CallerSP_to_FP_delta); printf(" SP to FP/LR save location delta: %d\n", genFuncletInfo.fiSP_to_FPLR_save_delta); printf(" SP to PSP slot delta: %d\n", genFuncletInfo.fiSP_to_PSP_slot_delta); printf(" SP to callee-saved area delta: %d\n", genFuncletInfo.fiSP_to_CalleeSave_delta); printf(" Caller SP to PSP slot delta: %d\n", genFuncletInfo.fiCallerSP_to_PSP_slot_delta); printf(" Frame type: %d\n", genFuncletInfo.fiFrameType); printf(" SP delta 1: %d\n", genFuncletInfo.fiSpDelta1); printf(" SP delta 2: %d\n", genFuncletInfo.fiSpDelta2); if (compiler->lvaPSPSym != BAD_VAR_NUM) { if (CallerSP_to_PSP_slot_delta != compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)) // for debugging { printf("lvaGetCallerSPRelativeOffset(lvaPSPSym): %d\n", compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)); } } } assert(genFuncletInfo.fiSP_to_FPLR_save_delta >= 0); assert(genFuncletInfo.fiSP_to_PSP_slot_delta >= 0); assert(genFuncletInfo.fiSP_to_CalleeSave_delta >= 0); assert(genFuncletInfo.fiCallerSP_to_PSP_slot_delta <= 0); if (compiler->lvaPSPSym != BAD_VAR_NUM) { assert(genFuncletInfo.fiCallerSP_to_PSP_slot_delta == compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)); // same offset used in main function and // funclet! } #endif // DEBUG } /* XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XX XX XX End Prolog / Epilog XX XX XX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX */ BasicBlock* CodeGen::genCallFinally(BasicBlock* block) { // Generate a call to the finally, like this: // mov x0,qword ptr [fp + 10H] / sp // Load x0 with PSPSym, or sp if PSPSym is not used // bl finally-funclet // b finally-return // Only for non-retless finally calls // The 'b' can be a NOP if we're going to the next block. if (compiler->lvaPSPSym != BAD_VAR_NUM) { getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_R0, compiler->lvaPSPSym, 0); } else { getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_R0, REG_SPBASE); } getEmitter()->emitIns_J(INS_bl_local, block->bbJumpDest); if (block->bbFlags & BBF_RETLESS_CALL) { // We have a retless call, and the last instruction generated was a call. // If the next block is in a different EH region (or is the end of the code // block), then we need to generate a breakpoint here (since it will never // get executed) to get proper unwind behavior. if ((block->bbNext == nullptr) || !BasicBlock::sameEHRegion(block, block->bbNext)) { instGen(INS_bkpt); // This should never get executed } } else { // Because of the way the flowgraph is connected, the liveness info for this one instruction // after the call is not (can not be) correct in cases where a variable has a last use in the // handler. So turn off GC reporting for this single instruction. getEmitter()->emitDisableGC(); // Now go to where the finally funclet needs to return to. if (block->bbNext->bbJumpDest == block->bbNext->bbNext) { // Fall-through. // TODO-ARM64-CQ: Can we get rid of this instruction, and just have the call return directly // to the next instruction? This would depend on stack walking from within the finally // handler working without this instruction being in this special EH region. instGen(INS_nop); } else { inst_JMP(EJ_jmp, block->bbNext->bbJumpDest); } getEmitter()->emitEnableGC(); } // The BBJ_ALWAYS is used because the BBJ_CALLFINALLY can't point to the // jump target using bbJumpDest - that is already used to point // to the finally block. So just skip past the BBJ_ALWAYS unless the // block is RETLESS. if (!(block->bbFlags & BBF_RETLESS_CALL)) { assert(block->isBBCallAlwaysPair()); block = block->bbNext; } return block; } void CodeGen::genEHCatchRet(BasicBlock* block) { // For long address (default): `adrp + add` will be emitted. // For short address (proven later): `adr` will be emitted. getEmitter()->emitIns_R_L(INS_adr, EA_PTRSIZE, block->bbJumpDest, REG_INTRET); } // move an immediate value into an integer register void CodeGen::instGen_Set_Reg_To_Imm(emitAttr size, regNumber reg, ssize_t imm, insFlags flags) { // reg cannot be a FP register assert(!genIsValidFloatReg(reg)); if (!compiler->opts.compReloc) { size = EA_SIZE(size); // Strip any Reloc flags from size if we aren't doing relocs } if (EA_IS_RELOC(size)) { // This emits a pair of adrp/add (two instructions) with fix-ups. getEmitter()->emitIns_R_AI(INS_adrp, size, reg, imm); } else if (imm == 0) { instGen_Set_Reg_To_Zero(size, reg, flags); } else { if (emitter::emitIns_valid_imm_for_mov(imm, size)) { getEmitter()->emitIns_R_I(INS_mov, size, reg, imm); } else { // Arm64 allows any arbitrary 16-bit constant to be loaded into a register halfword // There are three forms // movk which loads into any halfword preserving the remaining halfwords // movz which loads into any halfword zeroing the remaining halfwords // movn which loads into any halfword zeroing the remaining halfwords then bitwise inverting the register // In some cases it is preferable to use movn, because it has the side effect of filling the other halfwords // with ones // Determine whether movn or movz will require the fewest instructions to populate the immediate int preferMovn = 0; for (int i = (size == EA_8BYTE) ? 48 : 16; i >= 0; i -= 16) { if (uint16_t(imm >> i) == 0xffff) ++preferMovn; // a single movk 0xffff could be skipped if movn was used else if (uint16_t(imm >> i) == 0x0000) --preferMovn; // a single movk 0 could be skipped if movz was used } // Select the first instruction. Any additional instruction will use movk instruction ins = (preferMovn > 0) ? INS_movn : INS_movz; // Initial movz or movn will fill the remaining bytes with the skipVal // This can allow skipping filling a halfword uint16_t skipVal = (preferMovn > 0) ? 0xffff : 0; unsigned bits = (size == EA_8BYTE) ? 64 : 32; // Iterate over imm examining 16 bits at a time for (unsigned i = 0; i < bits; i += 16) { uint16_t imm16 = uint16_t(imm >> i); if (imm16 != skipVal) { if (ins == INS_movn) { // For the movn case, we need to bitwise invert the immediate. This is because // (movn x0, ~imm16) === (movz x0, imm16; or x0, x0, #0xffff`ffff`ffff`0000) imm16 = ~imm16; } getEmitter()->emitIns_R_I_I(ins, size, reg, imm16, i, INS_OPTS_LSL); // Once the initial movz/movn is emitted the remaining instructions will all use movk ins = INS_movk; } } // We must emit a movn or movz or we have not done anything // The cases which hit this assert should be (emitIns_valid_imm_for_mov() == true) and // should not be in this else condition assert(ins == INS_movk); } // The caller may have requested that the flags be set on this mov (rarely/never) if (flags == INS_FLAGS_SET) { getEmitter()->emitIns_R_I(INS_tst, size, reg, 0); } } regTracker.rsTrackRegIntCns(reg, imm); } /*********************************************************************************** * * Generate code to set a register 'targetReg' of type 'targetType' to the constant * specified by the constant (GT_CNS_INT or GT_CNS_DBL) in 'tree'. This does not call * genProduceReg() on the target register. */ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTreePtr tree) { switch (tree->gtOper) { case GT_CNS_INT: { // relocatable values tend to come down as a CNS_INT of native int type // so the line between these two opcodes is kind of blurry GenTreeIntConCommon* con = tree->AsIntConCommon(); ssize_t cnsVal = con->IconValue(); bool needReloc = compiler->opts.compReloc && tree->IsIconHandle(); if (needReloc) { instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, targetReg, cnsVal); regTracker.rsTrackRegTrash(targetReg); } else { genSetRegToIcon(targetReg, cnsVal, targetType); } } break; case GT_CNS_DBL: { emitter* emit = getEmitter(); emitAttr size = emitActualTypeSize(tree); GenTreeDblCon* dblConst = tree->AsDblCon(); double constValue = dblConst->gtDblCon.gtDconVal; // Make sure we use "movi reg, 0x00" only for positive zero (0.0) and not for negative zero (-0.0) if (*(__int64*)&constValue == 0) { // A faster/smaller way to generate 0.0 // We will just zero out the entire vector register for both float and double emit->emitIns_R_I(INS_movi, EA_16BYTE, targetReg, 0x00, INS_OPTS_16B); } else if (emitter::emitIns_valid_imm_for_fmov(constValue)) { // We can load the FP constant using the fmov FP-immediate for this constValue emit->emitIns_R_F(INS_fmov, size, targetReg, constValue); } else { // Get a temp integer register to compute long address. regNumber addrReg = tree->GetSingleTempReg(); // We must load the FP constant from the constant pool // Emit a data section constant for the float or double constant. CORINFO_FIELD_HANDLE hnd = emit->emitFltOrDblConst(dblConst); // For long address (default): `adrp + ldr + fmov` will be emitted. // For short address (proven later), `ldr` will be emitted. emit->emitIns_R_C(INS_ldr, size, targetReg, addrReg, hnd, 0); } } break; default: unreached(); } } // Generate code to get the high N bits of a N*N=2N bit multiplication result void CodeGen::genCodeForMulHi(GenTreeOp* treeNode) { assert(!treeNode->gtOverflowEx()); genConsumeOperands(treeNode); regNumber targetReg = treeNode->gtRegNum; var_types targetType = treeNode->TypeGet(); emitter* emit = getEmitter(); emitAttr attr = emitActualTypeSize(treeNode); unsigned isUnsigned = (treeNode->gtFlags & GTF_UNSIGNED); GenTreePtr op1 = treeNode->gtGetOp1(); GenTreePtr op2 = treeNode->gtGetOp2(); assert(!varTypeIsFloating(targetType)); // The arithmetic node must be sitting in a register (since it's not contained) assert(targetReg != REG_NA); if (EA_SIZE(attr) == EA_8BYTE) { instruction ins = isUnsigned ? INS_umulh : INS_smulh; regNumber r = emit->emitInsTernary(ins, attr, treeNode, op1, op2); assert(r == targetReg); } else { assert(EA_SIZE(attr) == EA_4BYTE); instruction ins = isUnsigned ? INS_umull : INS_smull; regNumber r = emit->emitInsTernary(ins, EA_4BYTE, treeNode, op1, op2); emit->emitIns_R_R_I(isUnsigned ? INS_lsr : INS_asr, EA_8BYTE, targetReg, targetReg, 32); } genProduceReg(treeNode); } // Generate code for ADD, SUB, MUL, DIV, UDIV, AND, OR and XOR // This method is expected to have called genConsumeOperands() before calling it. void CodeGen::genCodeForBinary(GenTree* treeNode) { const genTreeOps oper = treeNode->OperGet(); regNumber targetReg = treeNode->gtRegNum; var_types targetType = treeNode->TypeGet(); emitter* emit = getEmitter(); assert(oper == GT_ADD || oper == GT_SUB || oper == GT_MUL || oper == GT_DIV || oper == GT_UDIV || oper == GT_AND || oper == GT_OR || oper == GT_XOR); GenTreePtr op1 = treeNode->gtGetOp1(); GenTreePtr op2 = treeNode->gtGetOp2(); instruction ins = genGetInsForOper(treeNode->OperGet(), targetType); if ((treeNode->gtFlags & GTF_SET_FLAGS) != 0) { switch (oper) { case GT_ADD: ins = INS_adds; break; case GT_SUB: ins = INS_subs; break; case GT_AND: ins = INS_ands; break; default: noway_assert(!"Unexpected BinaryOp with GTF_SET_FLAGS set"); } } // The arithmetic node must be sitting in a register (since it's not contained) assert(targetReg != REG_NA); regNumber r = emit->emitInsTernary(ins, emitActualTypeSize(treeNode), treeNode, op1, op2); assert(r == targetReg); genProduceReg(treeNode); } //------------------------------------------------------------------------ // genCodeForLclVar: Produce code for a GT_LCL_VAR node. // // Arguments: // tree - the GT_LCL_VAR node // void CodeGen::genCodeForLclVar(GenTreeLclVar* tree) { var_types targetType = tree->TypeGet(); emitter* emit = getEmitter(); unsigned varNum = tree->gtLclNum; assert(varNum < compiler->lvaCount); LclVarDsc* varDsc = &(compiler->lvaTable[varNum]); bool isRegCandidate = varDsc->lvIsRegCandidate(); // lcl_vars are not defs assert((tree->gtFlags & GTF_VAR_DEF) == 0); // If this is a register candidate that has been spilled, genConsumeReg() will // reload it at the point of use. Otherwise, if it's not in a register, we load it here. if (!isRegCandidate && !(tree->gtFlags & GTF_SPILLED)) { // targetType must be a normal scalar type and not a TYP_STRUCT assert(targetType != TYP_STRUCT); instruction ins = ins_Load(targetType); emitAttr attr = emitTypeSize(targetType); attr = emit->emitInsAdjustLoadStoreAttr(ins, attr); emit->emitIns_R_S(ins, attr, tree->gtRegNum, varNum, 0); genProduceReg(tree); } } //------------------------------------------------------------------------ // genCodeForStoreLclFld: Produce code for a GT_STORE_LCL_FLD node. // // Arguments: // tree - the GT_STORE_LCL_FLD node // void CodeGen::genCodeForStoreLclFld(GenTreeLclFld* tree) { var_types targetType = tree->TypeGet(); regNumber targetReg = tree->gtRegNum; emitter* emit = getEmitter(); noway_assert(targetType != TYP_STRUCT); // record the offset unsigned offset = tree->gtLclOffs; // We must have a stack store with GT_STORE_LCL_FLD noway_assert(targetReg == REG_NA); unsigned varNum = tree->gtLclNum; assert(varNum < compiler->lvaCount); LclVarDsc* varDsc = &(compiler->lvaTable[varNum]); // Ensure that lclVar nodes are typed correctly. assert(!varDsc->lvNormalizeOnStore() || targetType == genActualType(varDsc->TypeGet())); GenTreePtr data = tree->gtOp1; genConsumeRegs(data); regNumber dataReg = REG_NA; if (data->isContainedIntOrIImmed()) { assert(data->IsIntegralConst(0)); dataReg = REG_ZR; } else { assert(!data->isContained()); dataReg = data->gtRegNum; } assert(dataReg != REG_NA); instruction ins = ins_Store(targetType); emitAttr attr = emitTypeSize(targetType); attr = emit->emitInsAdjustLoadStoreAttr(ins, attr); emit->emitIns_S_R(ins, attr, dataReg, varNum, offset); genUpdateLife(tree); varDsc->lvRegNum = REG_STK; } //------------------------------------------------------------------------ // genCodeForStoreLclVar: Produce code for a GT_STORE_LCL_VAR node. // // Arguments: // tree - the GT_STORE_LCL_VAR node // void CodeGen::genCodeForStoreLclVar(GenTreeLclVar* tree) { var_types targetType = tree->TypeGet(); regNumber targetReg = tree->gtRegNum; emitter* emit = getEmitter(); unsigned varNum = tree->gtLclNum; assert(varNum < compiler->lvaCount); LclVarDsc* varDsc = &(compiler->lvaTable[varNum]); // Ensure that lclVar nodes are typed correctly. assert(!varDsc->lvNormalizeOnStore() || targetType == genActualType(varDsc->TypeGet())); GenTreePtr data = tree->gtOp1; // var = call, where call returns a multi-reg return value // case is handled separately. if (data->gtSkipReloadOrCopy()->IsMultiRegCall()) { genMultiRegCallStoreToLocal(tree); } else { genConsumeRegs(data); regNumber dataReg = REG_NA; if (data->isContainedIntOrIImmed()) { assert(data->IsIntegralConst(0)); dataReg = REG_ZR; } else { assert(!data->isContained()); dataReg = data->gtRegNum; } assert(dataReg != REG_NA); if (targetReg == REG_NA) // store into stack based LclVar { inst_set_SV_var(tree); instruction ins = ins_Store(targetType); emitAttr attr = emitTypeSize(targetType); attr = emit->emitInsAdjustLoadStoreAttr(ins, attr); emit->emitIns_S_R(ins, attr, dataReg, varNum, /* offset */ 0); genUpdateLife(tree); varDsc->lvRegNum = REG_STK; } else // store into register (i.e move into register) { if (dataReg != targetReg) { // Assign into targetReg when dataReg (from op1) is not the same register inst_RV_RV(ins_Copy(targetType), targetReg, dataReg, targetType); } genProduceReg(tree); } } } //------------------------------------------------------------------------ // genReturn: Generates code for return statement. // In case of struct return, delegates to the genStructReturn method. // // Arguments: // treeNode - The GT_RETURN or GT_RETFILT tree node. // // Return Value: // None // void CodeGen::genReturn(GenTreePtr treeNode) { assert(treeNode->OperGet() == GT_RETURN || treeNode->OperGet() == GT_RETFILT); GenTreePtr op1 = treeNode->gtGetOp1(); var_types targetType = treeNode->TypeGet(); // A void GT_RETFILT is the end of a finally. For non-void filter returns we need to load the result in the return // register, if it's not already there. The processing is the same as GT_RETURN. For filters, the IL spec says the // result is type int32. Further, the only legal values are 0 or 1; the use of other values is "undefined". assert(!treeNode->OperIs(GT_RETFILT) || (targetType == TYP_VOID) || (targetType == TYP_INT)); #ifdef DEBUG if (targetType == TYP_VOID) { assert(op1 == nullptr); } #endif if (isStructReturn(treeNode)) { genStructReturn(treeNode); } else if (targetType != TYP_VOID) { assert(op1 != nullptr); noway_assert(op1->gtRegNum != REG_NA); genConsumeReg(op1); regNumber retReg = varTypeIsFloating(treeNode) ? REG_FLOATRET : REG_INTRET; bool movRequired = (op1->gtRegNum != retReg); if (!movRequired) { if (op1->OperGet() == GT_LCL_VAR) { GenTreeLclVarCommon* lcl = op1->AsLclVarCommon(); bool isRegCandidate = compiler->lvaTable[lcl->gtLclNum].lvIsRegCandidate(); if (isRegCandidate && ((op1->gtFlags & GTF_SPILLED) == 0)) { // We may need to generate a zero-extending mov instruction to load the value from this GT_LCL_VAR unsigned lclNum = lcl->gtLclNum; LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]); var_types op1Type = genActualType(op1->TypeGet()); var_types lclType = genActualType(varDsc->TypeGet()); if (genTypeSize(op1Type) < genTypeSize(lclType)) { movRequired = true; } } } } if (movRequired) { emitAttr attr = emitActualTypeSize(targetType); getEmitter()->emitIns_R_R(INS_mov, attr, retReg, op1->gtRegNum); } } #ifdef PROFILING_SUPPORTED // There will be a single return block while generating profiler ELT callbacks. // // Reason for not materializing Leave callback as a GT_PROF_HOOK node after GT_RETURN: // In flowgraph and other places assert that the last node of a block marked as // GT_RETURN is either a GT_RETURN or GT_JMP or a tail call. It would be nice to // maintain such an invariant irrespective of whether profiler hook needed or not. // Also, there is not much to be gained by materializing it as an explicit node. if (compiler->compCurBB == compiler->genReturnBB) { genProfilingLeaveCallback(); } #endif } /*********************************************************************************************** * Generate code for localloc */ void CodeGen::genLclHeap(GenTreePtr tree) { assert(tree->OperGet() == GT_LCLHEAP); GenTreePtr size = tree->gtOp.gtOp1; noway_assert((genActualType(size->gtType) == TYP_INT) || (genActualType(size->gtType) == TYP_I_IMPL)); regNumber targetReg = tree->gtRegNum; regNumber regCnt = REG_NA; regNumber pspSymReg = REG_NA; var_types type = genActualType(size->gtType); emitAttr easz = emitTypeSize(type); BasicBlock* endLabel = nullptr; BasicBlock* loop = nullptr; unsigned stackAdjustment = 0; #ifdef DEBUG // Verify ESP if (compiler->opts.compStackCheckOnRet) { noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC && compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister && compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame); getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0); BasicBlock* esp_check = genCreateTempLabel(); emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED); inst_JMP(jmpEqual, esp_check); getEmitter()->emitIns(INS_bkpt); genDefineTempLabel(esp_check); } #endif noway_assert(isFramePointerUsed()); // localloc requires Frame Pointer to be established since SP changes noway_assert(genStackLevel == 0); // Can't have anything on the stack // Whether method has PSPSym. bool hasPspSym; #if FEATURE_EH_FUNCLETS hasPspSym = (compiler->lvaPSPSym != BAD_VAR_NUM); #else hasPspSym = false; #endif // compute the amount of memory to allocate to properly STACK_ALIGN. size_t amount = 0; if (size->IsCnsIntOrI()) { // If size is a constant, then it must be contained. assert(size->isContained()); // If amount is zero then return null in targetReg amount = size->gtIntCon.gtIconVal; if (amount == 0) { instGen_Set_Reg_To_Zero(EA_PTRSIZE, targetReg); goto BAILOUT; } // 'amount' is the total numbe of bytes to localloc to properly STACK_ALIGN amount = AlignUp(amount, STACK_ALIGN); } else { // If 0 bail out by returning null in targetReg genConsumeRegAndCopy(size, targetReg); endLabel = genCreateTempLabel(); getEmitter()->emitIns_R_R(INS_tst, easz, targetReg, targetReg); emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED); inst_JMP(jmpEqual, endLabel); // Compute the size of the block to allocate and perform alignment. // If the method has no PSPSym and compInitMem=true, we can reuse targetReg as regcnt, // since we don't need any internal registers. if (!hasPspSym && compiler->info.compInitMem) { assert(tree->AvailableTempRegCount() == 0); regCnt = targetReg; } else { regCnt = tree->ExtractTempReg(); if (regCnt != targetReg) { inst_RV_RV(INS_mov, regCnt, targetReg, size->TypeGet()); } } // Align to STACK_ALIGN // regCnt will be the total number of bytes to localloc inst_RV_IV(INS_add, regCnt, (STACK_ALIGN - 1), emitActualTypeSize(type)); inst_RV_IV(INS_and, regCnt, ~(STACK_ALIGN - 1), emitActualTypeSize(type)); } stackAdjustment = 0; #if FEATURE_EH_FUNCLETS // If we have PSPsym, then need to re-locate it after localloc. if (hasPspSym) { stackAdjustment += STACK_ALIGN; // Save a copy of PSPSym pspSymReg = tree->ExtractTempReg(); getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, pspSymReg, compiler->lvaPSPSym, 0); } #endif #if FEATURE_FIXED_OUT_ARGS // If we have an outgoing arg area then we must adjust the SP by popping off the // outgoing arg area. We will restore it right before we return from this method. // // Localloc is supposed to return stack space that is STACK_ALIGN'ed. The following // are the cases that needs to be handled: // i) Method has PSPSym + out-going arg area. // It is guaranteed that size of out-going arg area is STACK_ALIGNED (see fgMorphArgs). // Therefore, we will pop-off RSP upto out-going arg area before locallocating. // We need to add padding to ensure RSP is STACK_ALIGN'ed while re-locating PSPSym + arg area. // ii) Method has no PSPSym but out-going arg area. // Almost same case as above without the requirement to pad for the final RSP to be STACK_ALIGN'ed. // iii) Method has PSPSym but no out-going arg area. // Nothing to pop-off from the stack but needs to relocate PSPSym with SP padded. // iv) Method has neither PSPSym nor out-going arg area. // Nothing needs to popped off from stack nor relocated. if (compiler->lvaOutgoingArgSpaceSize > 0) { assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) == 0); // This must be true for the stack to remain // aligned inst_RV_IV(INS_add, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize, EA_PTRSIZE); stackAdjustment += compiler->lvaOutgoingArgSpaceSize; } #endif if (size->IsCnsIntOrI()) { // We should reach here only for non-zero, constant size allocations. assert(amount > 0); // For small allocations we will generate up to four stp instructions size_t cntStackAlignedWidthItems = (amount >> STACK_ALIGN_SHIFT); if (cntStackAlignedWidthItems <= 4) { while (cntStackAlignedWidthItems != 0) { // We can use pre-indexed addressing. // stp ZR, ZR, [SP, #-16]! getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_ZR, REG_ZR, REG_SPBASE, -16, INS_OPTS_PRE_INDEX); cntStackAlignedWidthItems -= 1; } goto ALLOC_DONE; } else if (!compiler->info.compInitMem && (amount < compiler->eeGetPageSize())) // must be < not <= { // Since the size is a page or less, simply adjust the SP value // The SP might already be in the guard page, must touch it BEFORE // the alloc, not after. // ldr wz, [SP, #0] getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_ZR, REG_SP, 0); inst_RV_IV(INS_sub, REG_SP, amount, EA_PTRSIZE); goto ALLOC_DONE; } // else, "mov regCnt, amount" // If the method has no PSPSym and compInitMem=true, we can reuse targetReg as regcnt. // Since size is a constant, regCnt is not yet initialized. assert(regCnt == REG_NA); if (!hasPspSym && compiler->info.compInitMem) { assert(tree->AvailableTempRegCount() == 0); regCnt = targetReg; } else { regCnt = tree->ExtractTempReg(); } genSetRegToIcon(regCnt, amount, ((int)amount == amount) ? TYP_INT : TYP_LONG); } if (compiler->info.compInitMem) { BasicBlock* loop = genCreateTempLabel(); // At this point 'regCnt' is set to the total number of bytes to locAlloc. // Since we have to zero out the allocated memory AND ensure that RSP is always valid // by tickling the pages, we will just push 0's on the stack. // // Note: regCnt is guaranteed to be even on Amd64 since STACK_ALIGN/TARGET_POINTER_SIZE = 2 // and localloc size is a multiple of STACK_ALIGN. // Loop: genDefineTempLabel(loop); // We can use pre-indexed addressing. // stp ZR, ZR, [SP, #-16]! getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_ZR, REG_ZR, REG_SPBASE, -16, INS_OPTS_PRE_INDEX); // If not done, loop // Note that regCnt is the number of bytes to stack allocate. // Therefore we need to subtract 16 from regcnt here. assert(genIsValidIntReg(regCnt)); inst_RV_IV(INS_subs, regCnt, 16, emitActualTypeSize(type)); emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED); inst_JMP(jmpNotEqual, loop); } else { // At this point 'regCnt' is set to the total number of bytes to locAlloc. // // We don't need to zero out the allocated memory. However, we do have // to tickle the pages to ensure that SP is always valid and is // in sync with the "stack guard page". Note that in the worst // case SP is on the last byte of the guard page. Thus you must // touch SP+0 first not SP+x01000. // // Another subtlety is that you don't want SP to be exactly on the // boundary of the guard page because PUSH is predecrement, thus // call setup would not touch the guard page but just beyond it // // Note that we go through a few hoops so that SP never points to // illegal pages at any time during the ticking process // // subs regCnt, SP, regCnt // regCnt now holds ultimate SP // jb Loop // result is smaller than orignial SP (no wrap around) // mov regCnt, #0 // Overflow, pick lowest possible value // // Loop: // ldr wzr, [SP + 0] // tickle the page - read from the page // sub regTmp, SP, GetOsPageSize() // decrement SP by GetOsPageSize() // cmp regTmp, regCnt // jb Done // mov SP, regTmp // j Loop // // Done: // mov SP, regCnt // // Setup the regTmp regNumber regTmp = tree->GetSingleTempReg(); BasicBlock* loop = genCreateTempLabel(); BasicBlock* done = genCreateTempLabel(); // subs regCnt, SP, regCnt // regCnt now holds ultimate SP getEmitter()->emitIns_R_R_R(INS_subs, EA_PTRSIZE, regCnt, REG_SPBASE, regCnt); inst_JMP(EJ_vc, loop); // branch if the V flag is not set // Overflow, set regCnt to lowest possible value instGen_Set_Reg_To_Zero(EA_PTRSIZE, regCnt); genDefineTempLabel(loop); // tickle the page - Read from the updated SP - this triggers a page fault when on the guard page getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_ZR, REG_SPBASE, 0); // decrement SP by GetOsPageSize() getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, regTmp, REG_SPBASE, compiler->eeGetPageSize()); getEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, regTmp, regCnt); emitJumpKind jmpLTU = genJumpKindForOper(GT_LT, CK_UNSIGNED); inst_JMP(jmpLTU, done); // Update SP to be at the next page of stack that we will tickle getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_SPBASE, regCnt); // Jump to loop and tickle new stack address inst_JMP(EJ_jmp, loop); // Done with stack tickle loop genDefineTempLabel(done); // Now just move the final value to SP getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_SPBASE, regCnt); } ALLOC_DONE: // Re-adjust SP to allocate PSPSym and out-going arg area if (stackAdjustment != 0) { assert((stackAdjustment % STACK_ALIGN) == 0); // This must be true for the stack to remain aligned assert(stackAdjustment > 0); getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, (int)stackAdjustment); #if FEATURE_EH_FUNCLETS // Write PSPSym to its new location. if (hasPspSym) { assert(genIsValidIntReg(pspSymReg)); getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, pspSymReg, compiler->lvaPSPSym, 0); } #endif // Return the stackalloc'ed address in result register. // TargetReg = RSP + stackAdjustment. // getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, targetReg, REG_SPBASE, (int)stackAdjustment); } else // stackAdjustment == 0 { // Move the final value of SP to targetReg inst_RV_RV(INS_mov, targetReg, REG_SPBASE); } BAILOUT: if (endLabel != nullptr) genDefineTempLabel(endLabel); // Write the lvaLocAllocSPvar stack frame slot if (compiler->lvaLocAllocSPvar != BAD_VAR_NUM) { getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, targetReg, compiler->lvaLocAllocSPvar, 0); } #if STACK_PROBES if (compiler->opts.compNeedStackProbes) { genGenerateStackProbe(); } #endif #ifdef DEBUG // Update new ESP if (compiler->opts.compStackCheckOnRet) { noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC && compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister && compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame); getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, targetReg, compiler->lvaReturnEspCheck, 0); } #endif genProduceReg(tree); } //------------------------------------------------------------------------ // genCodeForNegNot: Produce code for a GT_NEG/GT_NOT node. // // Arguments: // tree - the node // void CodeGen::genCodeForNegNot(GenTree* tree) { assert(tree->OperIs(GT_NEG, GT_NOT)); var_types targetType = tree->TypeGet(); assert(!tree->OperIs(GT_NOT) || !varTypeIsFloating(targetType)); regNumber targetReg = tree->gtRegNum; instruction ins = genGetInsForOper(tree->OperGet(), targetType); // The arithmetic node must be sitting in a register (since it's not contained) assert(!tree->isContained()); // The dst can only be a register. assert(targetReg != REG_NA); GenTreePtr operand = tree->gtGetOp1(); assert(!operand->isContained()); // The src must be a register. regNumber operandReg = genConsumeReg(operand); getEmitter()->emitIns_R_R(ins, emitActualTypeSize(tree), targetReg, operandReg); genProduceReg(tree); } //------------------------------------------------------------------------ // genCodeForDivMod: Produce code for a GT_DIV/GT_UDIV node. We don't see MOD: // (1) integer MOD is morphed into a sequence of sub, mul, div in fgMorph; // (2) float/double MOD is morphed into a helper call by front-end. // // Arguments: // tree - the node // void CodeGen::genCodeForDivMod(GenTreeOp* tree) { assert(tree->OperIs(GT_DIV, GT_UDIV)); var_types targetType = tree->TypeGet(); emitter* emit = getEmitter(); genConsumeOperands(tree); if (varTypeIsFloating(targetType)) { // Floating point divide never raises an exception genCodeForBinary(tree); } else // an integer divide operation { GenTreePtr divisorOp = tree->gtGetOp2(); emitAttr size = EA_ATTR(genTypeSize(genActualType(tree->TypeGet()))); if (divisorOp->IsIntegralConst(0)) { // We unconditionally throw a divide by zero exception genJumpToThrowHlpBlk(EJ_jmp, SCK_DIV_BY_ZERO); // We still need to call genProduceReg genProduceReg(tree); } else // the divisor is not the constant zero { regNumber divisorReg = divisorOp->gtRegNum; // Generate the require runtime checks for GT_DIV or GT_UDIV if (tree->gtOper == GT_DIV) { BasicBlock* sdivLabel = genCreateTempLabel(); // Two possible exceptions: // (AnyVal / 0) => DivideByZeroException // (MinInt / -1) => ArithmeticException // bool checkDividend = true; // Do we have an immediate for the 'divisorOp'? // if (divisorOp->IsCnsIntOrI()) { GenTreeIntConCommon* intConstTree = divisorOp->AsIntConCommon(); ssize_t intConstValue = intConstTree->IconValue(); assert(intConstValue != 0); // already checked above by IsIntegralConst(0)) if (intConstValue != -1) { checkDividend = false; // We statically know that the dividend is not -1 } } else // insert check for divison by zero { // Check if the divisor is zero throw a DivideByZeroException emit->emitIns_R_I(INS_cmp, size, divisorReg, 0); emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED); genJumpToThrowHlpBlk(jmpEqual, SCK_DIV_BY_ZERO); } if (checkDividend) { // Check if the divisor is not -1 branch to 'sdivLabel' emit->emitIns_R_I(INS_cmp, size, divisorReg, -1); emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED); inst_JMP(jmpNotEqual, sdivLabel); // If control flow continues past here the 'divisorReg' is known to be -1 regNumber dividendReg = tree->gtGetOp1()->gtRegNum; // At this point the divisor is known to be -1 // // Issue the 'adds zr, dividendReg, dividendReg' instruction // this will set both the Z and V flags only when dividendReg is MinInt // emit->emitIns_R_R_R(INS_adds, size, REG_ZR, dividendReg, dividendReg); inst_JMP(jmpNotEqual, sdivLabel); // goto sdiv if the Z flag is clear genJumpToThrowHlpBlk(EJ_vs, SCK_ARITH_EXCPN); // if the V flags is set throw // ArithmeticException genDefineTempLabel(sdivLabel); } genCodeForBinary(tree); // Generate the sdiv instruction } else // (tree->gtOper == GT_UDIV) { // Only one possible exception // (AnyVal / 0) => DivideByZeroException // // Note that division by the constant 0 was already checked for above by the // op2->IsIntegralConst(0) check // if (!divisorOp->IsCnsIntOrI()) { // divisorOp is not a constant, so it could be zero // emit->emitIns_R_I(INS_cmp, size, divisorReg, 0); emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED); genJumpToThrowHlpBlk(jmpEqual, SCK_DIV_BY_ZERO); } genCodeForBinary(tree); } } } } // Generate code for InitBlk by performing a loop unroll // Preconditions: // a) Both the size and fill byte value are integer constants. // b) The size of the struct to initialize is smaller than INITBLK_UNROLL_LIMIT bytes. void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* initBlkNode) { // Make sure we got the arguments of the initblk/initobj operation in the right registers unsigned size = initBlkNode->Size(); GenTreePtr dstAddr = initBlkNode->Addr(); GenTreePtr initVal = initBlkNode->Data(); if (initVal->OperIsInitVal()) { initVal = initVal->gtGetOp1(); } assert(dstAddr->isUsedFromReg()); assert(initVal->isUsedFromReg() && !initVal->IsIntegralConst(0) || initVal->IsIntegralConst(0)); assert(size != 0); assert(size <= INITBLK_UNROLL_LIMIT); emitter* emit = getEmitter(); genConsumeOperands(initBlkNode); if (initBlkNode->gtFlags & GTF_BLK_VOLATILE) { // issue a full memory barrier before a volatile initBlockUnroll operation instGen_MemoryBarrier(); } regNumber valReg = initVal->IsIntegralConst(0) ? REG_ZR : initVal->gtRegNum; assert(!initVal->IsIntegralConst(0) || (valReg == REG_ZR)); unsigned offset = 0; // Perform an unroll using stp. if (size >= 2 * REGSIZE_BYTES) { // Determine how many 16 byte slots size_t slots = size / (2 * REGSIZE_BYTES); while (slots-- > 0) { emit->emitIns_R_R_R_I(INS_stp, EA_8BYTE, valReg, valReg, dstAddr->gtRegNum, offset); offset += (2 * REGSIZE_BYTES); } } // Fill the remainder (15 bytes or less) if there's any. if ((size & 0xf) != 0) { if ((size & 8) != 0) { emit->emitIns_R_R_I(INS_str, EA_8BYTE, valReg, dstAddr->gtRegNum, offset); offset += 8; } if ((size & 4) != 0) { emit->emitIns_R_R_I(INS_str, EA_4BYTE, valReg, dstAddr->gtRegNum, offset); offset += 4; } if ((size & 2) != 0) { emit->emitIns_R_R_I(INS_strh, EA_2BYTE, valReg, dstAddr->gtRegNum, offset); offset += 2; } if ((size & 1) != 0) { emit->emitIns_R_R_I(INS_strb, EA_1BYTE, valReg, dstAddr->gtRegNum, offset); } } } // Generate code for a load pair from some address + offset // base: tree node which can be either a local address or arbitrary node // offset: distance from the base from which to load void CodeGen::genCodeForLoadPairOffset(regNumber dst, regNumber dst2, GenTree* base, unsigned offset) { emitter* emit = getEmitter(); if (base->OperIsLocalAddr()) { if (base->gtOper == GT_LCL_FLD_ADDR) offset += base->gtLclFld.gtLclOffs; emit->emitIns_R_R_S_S(INS_ldp, EA_8BYTE, EA_8BYTE, dst, dst2, base->gtLclVarCommon.gtLclNum, offset); } else { emit->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, dst, dst2, base->gtRegNum, offset); } } // Generate code for a store pair to some address + offset // base: tree node which can be either a local address or arbitrary node // offset: distance from the base from which to load void CodeGen::genCodeForStorePairOffset(regNumber src, regNumber src2, GenTree* base, unsigned offset) { emitter* emit = getEmitter(); if (base->OperIsLocalAddr()) { if (base->gtOper == GT_LCL_FLD_ADDR) offset += base->gtLclFld.gtLclOffs; emit->emitIns_S_S_R_R(INS_stp, EA_8BYTE, EA_8BYTE, src, src2, base->gtLclVarCommon.gtLclNum, offset); } else { emit->emitIns_R_R_R_I(INS_stp, EA_8BYTE, src, src2, base->gtRegNum, offset); } } // Generate code for CpObj nodes wich copy structs that have interleaved // GC pointers. // For this case we'll generate a sequence of loads/stores in the case of struct // slots that don't contain GC pointers. The generated code will look like: // ldr tempReg, [R13, #8] // str tempReg, [R14, #8] // // In the case of a GC-Pointer we'll call the ByRef write barrier helper // who happens to use the same registers as the previous call to maintain // the same register requirements and register killsets: // bl CORINFO_HELP_ASSIGN_BYREF // // So finally an example would look like this: // ldr tempReg, [R13, #8] // str tempReg, [R14, #8] // bl CORINFO_HELP_ASSIGN_BYREF // ldr tempReg, [R13, #8] // str tempReg, [R14, #8] // bl CORINFO_HELP_ASSIGN_BYREF // ldr tempReg, [R13, #8] // str tempReg, [R14, #8] void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode) { GenTreePtr dstAddr = cpObjNode->Addr(); GenTreePtr source = cpObjNode->Data(); var_types srcAddrType = TYP_BYREF; bool sourceIsLocal = false; assert(source->isContained()); if (source->gtOper == GT_IND) { GenTree* srcAddr = source->gtGetOp1(); assert(!srcAddr->isContained()); srcAddrType = srcAddr->TypeGet(); } else { noway_assert(source->IsLocal()); sourceIsLocal = true; } bool dstOnStack = dstAddr->OperIsLocalAddr(); #ifdef DEBUG assert(!dstAddr->isContained()); // This GenTree node has data about GC pointers, this means we're dealing // with CpObj. assert(cpObjNode->gtGcPtrCount > 0); #endif // DEBUG // Consume the operands and get them into the right registers. // They may now contain gc pointers (depending on their type; gcMarkRegPtrVal will "do the right thing"). genConsumeBlockOp(cpObjNode, REG_WRITE_BARRIER_DST_BYREF, REG_WRITE_BARRIER_SRC_BYREF, REG_NA); gcInfo.gcMarkRegPtrVal(REG_WRITE_BARRIER_SRC_BYREF, srcAddrType); gcInfo.gcMarkRegPtrVal(REG_WRITE_BARRIER_DST_BYREF, dstAddr->TypeGet()); unsigned slots = cpObjNode->gtSlots; // Temp register(s) used to perform the sequence of loads and stores. regNumber tmpReg = cpObjNode->ExtractTempReg(); regNumber tmpReg2 = REG_NA; assert(genIsValidIntReg(tmpReg)); assert(tmpReg != REG_WRITE_BARRIER_SRC_BYREF); assert(tmpReg != REG_WRITE_BARRIER_DST_BYREF); if (slots > 1) { tmpReg2 = cpObjNode->GetSingleTempReg(); assert(tmpReg2 != tmpReg); assert(genIsValidIntReg(tmpReg2)); assert(tmpReg2 != REG_WRITE_BARRIER_DST_BYREF); assert(tmpReg2 != REG_WRITE_BARRIER_SRC_BYREF); } if (cpObjNode->gtFlags & GTF_BLK_VOLATILE) { // issue a full memory barrier before a volatile CpObj operation instGen_MemoryBarrier(); } emitter* emit = getEmitter(); BYTE* gcPtrs = cpObjNode->gtGcPtrs; // If we can prove it's on the stack we don't need to use the write barrier. if (dstOnStack) { unsigned i = 0; // Check if two or more remaining slots and use a ldp/stp sequence while (i < slots - 1) { emitAttr attr0 = emitTypeSize(compiler->getJitGCType(gcPtrs[i + 0])); emitAttr attr1 = emitTypeSize(compiler->getJitGCType(gcPtrs[i + 1])); emit->emitIns_R_R_R_I(INS_ldp, attr0, tmpReg, tmpReg2, REG_WRITE_BARRIER_SRC_BYREF, 2 * TARGET_POINTER_SIZE, INS_OPTS_POST_INDEX, attr1); emit->emitIns_R_R_R_I(INS_stp, attr0, tmpReg, tmpReg2, REG_WRITE_BARRIER_DST_BYREF, 2 * TARGET_POINTER_SIZE, INS_OPTS_POST_INDEX, attr1); i += 2; } // Use a ldr/str sequence for the last remainder if (i < slots) { emitAttr attr0 = emitTypeSize(compiler->getJitGCType(gcPtrs[i + 0])); emit->emitIns_R_R_I(INS_ldr, attr0, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE, INS_OPTS_POST_INDEX); emit->emitIns_R_R_I(INS_str, attr0, tmpReg, REG_WRITE_BARRIER_DST_BYREF, TARGET_POINTER_SIZE, INS_OPTS_POST_INDEX); } } else { unsigned gcPtrCount = cpObjNode->gtGcPtrCount; unsigned i = 0; while (i < slots) { switch (gcPtrs[i]) { case TYPE_GC_NONE: // Check if the next slot's type is also TYP_GC_NONE and use ldp/stp if ((i + 1 < slots) && (gcPtrs[i + 1] == TYPE_GC_NONE)) { emit->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, tmpReg, tmpReg2, REG_WRITE_BARRIER_SRC_BYREF, 2 * TARGET_POINTER_SIZE, INS_OPTS_POST_INDEX); emit->emitIns_R_R_R_I(INS_stp, EA_8BYTE, tmpReg, tmpReg2, REG_WRITE_BARRIER_DST_BYREF, 2 * TARGET_POINTER_SIZE, INS_OPTS_POST_INDEX); ++i; // extra increment of i, since we are copying two items } else { emit->emitIns_R_R_I(INS_ldr, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE, INS_OPTS_POST_INDEX); emit->emitIns_R_R_I(INS_str, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_DST_BYREF, TARGET_POINTER_SIZE, INS_OPTS_POST_INDEX); } break; default: // In the case of a GC-Pointer we'll call the ByRef write barrier helper genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, 0, EA_PTRSIZE); gcPtrCount--; break; } ++i; } assert(gcPtrCount == 0); } if (cpObjNode->gtFlags & GTF_BLK_VOLATILE) { // issue a INS_BARRIER_ISHLD after a volatile CpObj operation instGen_MemoryBarrier(INS_BARRIER_ISHLD); } // Clear the gcInfo for REG_WRITE_BARRIER_SRC_BYREF and REG_WRITE_BARRIER_DST_BYREF. // While we normally update GC info prior to the last instruction that uses them, // these actually live into the helper call. gcInfo.gcMarkRegSetNpt(RBM_WRITE_BARRIER_SRC_BYREF | RBM_WRITE_BARRIER_DST_BYREF); } // generate code do a switch statement based on a table of ip-relative offsets void CodeGen::genTableBasedSwitch(GenTree* treeNode) { genConsumeOperands(treeNode->AsOp()); regNumber idxReg = treeNode->gtOp.gtOp1->gtRegNum; regNumber baseReg = treeNode->gtOp.gtOp2->gtRegNum; regNumber tmpReg = treeNode->GetSingleTempReg(); // load the ip-relative offset (which is relative to start of fgFirstBB) getEmitter()->emitIns_R_R_R(INS_ldr, EA_4BYTE, baseReg, baseReg, idxReg, INS_OPTS_LSL); // add it to the absolute address of fgFirstBB compiler->fgFirstBB->bbFlags |= BBF_JMP_TARGET; getEmitter()->emitIns_R_L(INS_adr, EA_PTRSIZE, compiler->fgFirstBB, tmpReg); getEmitter()->emitIns_R_R_R(INS_add, EA_PTRSIZE, baseReg, baseReg, tmpReg); // br baseReg getEmitter()->emitIns_R(INS_br, emitActualTypeSize(TYP_I_IMPL), baseReg); } // emits the table and an instruction to get the address of the first element void CodeGen::genJumpTable(GenTree* treeNode) { noway_assert(compiler->compCurBB->bbJumpKind == BBJ_SWITCH); assert(treeNode->OperGet() == GT_JMPTABLE); unsigned jumpCount = compiler->compCurBB->bbJumpSwt->bbsCount; BasicBlock** jumpTable = compiler->compCurBB->bbJumpSwt->bbsDstTab; unsigned jmpTabOffs; unsigned jmpTabBase; jmpTabBase = getEmitter()->emitBBTableDataGenBeg(jumpCount, true); jmpTabOffs = 0; JITDUMP("\n J_M%03u_DS%02u LABEL DWORD\n", Compiler::s_compMethodsCount, jmpTabBase); for (unsigned i = 0; i < jumpCount; i++) { BasicBlock* target = *jumpTable++; noway_assert(target->bbFlags & BBF_JMP_TARGET); JITDUMP(" DD L_M%03u_BB%02u\n", Compiler::s_compMethodsCount, target->bbNum); getEmitter()->emitDataGenData(i, target); }; getEmitter()->emitDataGenEnd(); // Access to inline data is 'abstracted' by a special type of static member // (produced by eeFindJitDataOffs) which the emitter recognizes as being a reference // to constant data, not a real static field. getEmitter()->emitIns_R_C(INS_adr, emitActualTypeSize(TYP_I_IMPL), treeNode->gtRegNum, REG_NA, compiler->eeFindJitDataOffs(jmpTabBase), 0); genProduceReg(treeNode); } // generate code for the locked operations: // GT_LOCKADD, GT_XCHG, GT_XADD void CodeGen::genLockedInstructions(GenTreeOp* treeNode) { GenTree* data = treeNode->gtOp.gtOp2; GenTree* addr = treeNode->gtOp.gtOp1; regNumber targetReg = treeNode->gtRegNum; regNumber dataReg = data->gtRegNum; regNumber addrReg = addr->gtRegNum; regNumber exResultReg = treeNode->ExtractTempReg(RBM_ALLINT); regNumber storeDataReg = (treeNode->OperGet() == GT_XCHG) ? dataReg : treeNode->ExtractTempReg(RBM_ALLINT); regNumber loadReg = (targetReg != REG_NA) ? targetReg : storeDataReg; // Check allocator assumptions // // The register allocator should have extended the lifetimes of all input and internal registers so that // none interfere with the target. noway_assert(addrReg != targetReg); noway_assert(addrReg != loadReg); noway_assert(dataReg != loadReg); noway_assert(addrReg != storeDataReg); noway_assert((treeNode->OperGet() == GT_XCHG) || (addrReg != dataReg)); assert(addr->isUsedFromReg()); noway_assert(exResultReg != REG_NA); noway_assert(exResultReg != targetReg); noway_assert((targetReg != REG_NA) || (treeNode->OperGet() != GT_XCHG)); // Store exclusive unpredictable cases must be avoided noway_assert(exResultReg != storeDataReg); noway_assert(exResultReg != addrReg); genConsumeAddress(addr); genConsumeRegs(data); // NOTE: `genConsumeAddress` marks the consumed register as not a GC pointer, as it assumes that the input registers // die at the first instruction generated by the node. This is not the case for these atomics as the input // registers are multiply-used. As such, we need to mark the addr register as containing a GC pointer until // we are finished generating the code for this node. gcInfo.gcMarkRegPtrVal(addrReg, addr->TypeGet()); // TODO-ARM64-CQ Use ARMv8.1 atomics if available // https://github.com/dotnet/coreclr/issues/11881 // Emit code like this: // retry: // ldxr loadReg, [addrReg] // add storeDataReg, loadReg, dataReg # Only for GT_XADD & GT_LOCKADD // # GT_XCHG storeDataReg === dataReg // stxr exResult, storeDataReg, [addrReg] // cbnz exResult, retry BasicBlock* labelRetry = genCreateTempLabel(); genDefineTempLabel(labelRetry); // The following instruction includes a acquire half barrier // TODO-ARM64-CQ Evaluate whether this is necessary // https://github.com/dotnet/coreclr/issues/14346 getEmitter()->emitIns_R_R(INS_ldaxr, emitTypeSize(treeNode), loadReg, addrReg); switch (treeNode->OperGet()) { case GT_XADD: case GT_LOCKADD: if (data->isContainedIntOrIImmed()) { // Even though INS_add is specified here, the encoder will choose either // an INS_add or an INS_sub and encode the immediate as a positive value genInstrWithConstant(INS_add, emitActualTypeSize(treeNode), storeDataReg, loadReg, data->AsIntConCommon()->IconValue(), REG_NA); } else { getEmitter()->emitIns_R_R_R(INS_add, emitActualTypeSize(treeNode), storeDataReg, loadReg, dataReg); } break; case GT_XCHG: assert(!data->isContained()); storeDataReg = dataReg; break; default: unreached(); } // The following instruction includes a release half barrier // TODO-ARM64-CQ Evaluate whether this is necessary // https://github.com/dotnet/coreclr/issues/14346 getEmitter()->emitIns_R_R_R(INS_stlxr, emitTypeSize(treeNode), exResultReg, storeDataReg, addrReg); getEmitter()->emitIns_J_R(INS_cbnz, EA_4BYTE, labelRetry, exResultReg); gcInfo.gcMarkRegSetNpt(addr->gtGetRegMask()); if (treeNode->gtRegNum != REG_NA) { genProduceReg(treeNode); } } //------------------------------------------------------------------------ // genCodeForSwap: Produce code for a GT_CMPXCHG node. // // Arguments: // tree - the GT_CMPXCHG node // void CodeGen::genCodeForCmpXchg(GenTreeCmpXchg* treeNode) { assert(treeNode->OperIs(GT_CMPXCHG)); GenTreePtr addr = treeNode->gtOpLocation; // arg1 GenTreePtr data = treeNode->gtOpValue; // arg2 GenTreePtr comparand = treeNode->gtOpComparand; // arg3 regNumber targetReg = treeNode->gtRegNum; regNumber dataReg = data->gtRegNum; regNumber addrReg = addr->gtRegNum; regNumber comparandReg = comparand->gtRegNum; regNumber exResultReg = treeNode->ExtractTempReg(RBM_ALLINT); // Check allocator assumptions // // The register allocator should have extended the lifetimes of all input and internal registers so that // none interfere with the target. noway_assert(addrReg != targetReg); noway_assert(dataReg != targetReg); noway_assert(comparandReg != targetReg); noway_assert(addrReg != dataReg); noway_assert(targetReg != REG_NA); noway_assert(exResultReg != REG_NA); noway_assert(exResultReg != targetReg); assert(addr->isUsedFromReg()); assert(data->isUsedFromReg()); assert(!comparand->isUsedFromMemory()); // Store exclusive unpredictable cases must be avoided noway_assert(exResultReg != dataReg); noway_assert(exResultReg != addrReg); genConsumeAddress(addr); genConsumeRegs(data); genConsumeRegs(comparand); // NOTE: `genConsumeAddress` marks the consumed register as not a GC pointer, as it assumes that the input registers // die at the first instruction generated by the node. This is not the case for these atomics as the input // registers are multiply-used. As such, we need to mark the addr register as containing a GC pointer until // we are finished generating the code for this node. gcInfo.gcMarkRegPtrVal(addrReg, addr->TypeGet()); // TODO-ARM64-CQ Use ARMv8.1 atomics if available // https://github.com/dotnet/coreclr/issues/11881 // Emit code like this: // retry: // ldxr targetReg, [addrReg] // cmp targetReg, comparandReg // bne compareFail // stxr exResult, dataReg, [addrReg] // cbnz exResult, retry // compareFail: BasicBlock* labelRetry = genCreateTempLabel(); BasicBlock* labelCompareFail = genCreateTempLabel(); genDefineTempLabel(labelRetry); // The following instruction includes a acquire half barrier // TODO-ARM64-CQ Evaluate whether this is necessary // https://github.com/dotnet/coreclr/issues/14346 getEmitter()->emitIns_R_R(INS_ldaxr, emitTypeSize(treeNode), targetReg, addrReg); if (comparand->isContainedIntOrIImmed()) { if (comparand->IsIntegralConst(0)) { getEmitter()->emitIns_J_R(INS_cbnz, emitActualTypeSize(treeNode), labelCompareFail, targetReg); } else { getEmitter()->emitIns_R_I(INS_cmp, emitActualTypeSize(treeNode), targetReg, comparand->AsIntConCommon()->IconValue()); getEmitter()->emitIns_J(INS_bne, labelCompareFail); } } else { getEmitter()->emitIns_R_R(INS_cmp, emitActualTypeSize(treeNode), targetReg, comparandReg); getEmitter()->emitIns_J(INS_bne, labelCompareFail); } // The following instruction includes a release half barrier // TODO-ARM64-CQ Evaluate whether this is necessary // https://github.com/dotnet/coreclr/issues/14346 getEmitter()->emitIns_R_R_R(INS_stlxr, emitTypeSize(treeNode), exResultReg, dataReg, addrReg); getEmitter()->emitIns_J_R(INS_cbnz, EA_4BYTE, labelRetry, exResultReg); genDefineTempLabel(labelCompareFail); gcInfo.gcMarkRegSetNpt(addr->gtGetRegMask()); genProduceReg(treeNode); } instruction CodeGen::genGetInsForOper(genTreeOps oper, var_types type) { instruction ins = INS_brk; if (varTypeIsFloating(type)) { switch (oper) { case GT_ADD: ins = INS_fadd; break; case GT_SUB: ins = INS_fsub; break; case GT_MUL: ins = INS_fmul; break; case GT_DIV: ins = INS_fdiv; break; case GT_NEG: ins = INS_fneg; break; default: NYI("Unhandled oper in genGetInsForOper() - float"); unreached(); break; } } else { switch (oper) { case GT_ADD: ins = INS_add; break; case GT_AND: ins = INS_and; break; case GT_DIV: ins = INS_sdiv; break; case GT_UDIV: ins = INS_udiv; break; case GT_MUL: ins = INS_mul; break; case GT_LSH: ins = INS_lsl; break; case GT_NEG: ins = INS_neg; break; case GT_NOT: ins = INS_mvn; break; case GT_OR: ins = INS_orr; break; case GT_ROR: ins = INS_ror; break; case GT_RSH: ins = INS_asr; break; case GT_RSZ: ins = INS_lsr; break; case GT_SUB: ins = INS_sub; break; case GT_XOR: ins = INS_eor; break; default: NYI("Unhandled oper in genGetInsForOper() - integer"); unreached(); break; } } return ins; } //------------------------------------------------------------------------ // genCodeForReturnTrap: Produce code for a GT_RETURNTRAP node. // // Arguments: // tree - the GT_RETURNTRAP node // void CodeGen::genCodeForReturnTrap(GenTreeOp* tree) { assert(tree->OperGet() == GT_RETURNTRAP); // this is nothing but a conditional call to CORINFO_HELP_STOP_FOR_GC // based on the contents of 'data' GenTree* data = tree->gtOp1; genConsumeRegs(data); getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, data->gtRegNum, 0); BasicBlock* skipLabel = genCreateTempLabel(); emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED); inst_JMP(jmpEqual, skipLabel); // emit the call to the EE-helper that stops for GC (or other reasons) genEmitHelperCall(CORINFO_HELP_STOP_FOR_GC, 0, EA_UNKNOWN); genDefineTempLabel(skipLabel); } //------------------------------------------------------------------------ // genCodeForStoreInd: Produce code for a GT_STOREIND node. // // Arguments: // tree - the GT_STOREIND node // void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree) { GenTree* data = tree->Data(); GenTree* addr = tree->Addr(); var_types targetType = tree->TypeGet(); emitter* emit = getEmitter(); emitAttr attr = emitTypeSize(tree); instruction ins = ins_Store(targetType); GCInfo::WriteBarrierForm writeBarrierForm = gcInfo.gcIsWriteBarrierCandidate(tree, data); if (writeBarrierForm != GCInfo::WBF_NoBarrier) { // data and addr must be in registers. // Consume both registers so that any copies of interfering // registers are taken care of. genConsumeOperands(tree); #if NOGC_WRITE_BARRIERS // At this point, we should not have any interference. // That is, 'data' must not be in REG_WRITE_BARRIER_DST_BYREF, // as that is where 'addr' must go. noway_assert(data->gtRegNum != REG_WRITE_BARRIER_DST_BYREF); // 'addr' goes into x14 (REG_WRITE_BARRIER_DST_BYREF) if (addr->gtRegNum != REG_WRITE_BARRIER_DST_BYREF) { inst_RV_RV(INS_mov, REG_WRITE_BARRIER_DST_BYREF, addr->gtRegNum, addr->TypeGet()); } // 'data' goes into x15 (REG_WRITE_BARRIER) if (data->gtRegNum != REG_WRITE_BARRIER) { inst_RV_RV(INS_mov, REG_WRITE_BARRIER, data->gtRegNum, data->TypeGet()); } #else // At this point, we should not have any interference. // That is, 'data' must not be in REG_ARG_0, // as that is where 'addr' must go. noway_assert(data->gtRegNum != REG_ARG_0); // addr goes in REG_ARG_0 if (addr->gtRegNum != REG_ARG_0) { inst_RV_RV(INS_mov, REG_ARG_0, addr->gtRegNum, addr->TypeGet()); } // data goes in REG_ARG_1 if (data->gtRegNum != REG_ARG_1) { inst_RV_RV(INS_mov, REG_ARG_1, data->gtRegNum, data->TypeGet()); } #endif // NOGC_WRITE_BARRIERS genGCWriteBarrier(tree, writeBarrierForm); } else // A normal store, not a WriteBarrier store { bool dataIsUnary = false; GenTree* nonRMWsrc = nullptr; // We must consume the operands in the proper execution order, // so that liveness is updated appropriately. genConsumeAddress(addr); if (!data->isContained()) { genConsumeRegs(data); } regNumber dataReg = REG_NA; if (data->isContainedIntOrIImmed()) { assert(data->IsIntegralConst(0)); dataReg = REG_ZR; } else // data is not contained, so evaluate it into a register { assert(!data->isContained()); dataReg = data->gtRegNum; } assert((attr != EA_1BYTE) || !(tree->gtFlags & GTF_IND_UNALIGNED)); if (tree->gtFlags & GTF_IND_VOLATILE) { bool useStoreRelease = genIsValidIntReg(dataReg) && !addr->isContained() && !(tree->gtFlags & GTF_IND_UNALIGNED); if (useStoreRelease) { switch (EA_SIZE(attr)) { case EA_1BYTE: assert(ins == INS_strb); ins = INS_stlrb; break; case EA_2BYTE: assert(ins == INS_strh); ins = INS_stlrh; break; case EA_4BYTE: case EA_8BYTE: assert(ins == INS_str); ins = INS_stlr; break; default: assert(false); // We should not get here } } else { // issue a full memory barrier before a volatile StInd instGen_MemoryBarrier(); } } emit->emitInsLoadStoreOp(ins, attr, dataReg, tree); } } //------------------------------------------------------------------------ // genCodeForSwap: Produce code for a GT_SWAP node. // // Arguments: // tree - the GT_SWAP node // void CodeGen::genCodeForSwap(GenTreeOp* tree) { // Swap is only supported for lclVar operands that are enregistered // We do not consume or produce any registers. Both operands remain enregistered. // However, the gc-ness may change. assert(genIsRegCandidateLocal(tree->gtOp1) && genIsRegCandidateLocal(tree->gtOp2)); GenTreeLclVarCommon* lcl1 = tree->gtOp1->AsLclVarCommon(); LclVarDsc* varDsc1 = &(compiler->lvaTable[lcl1->gtLclNum]); var_types type1 = varDsc1->TypeGet(); GenTreeLclVarCommon* lcl2 = tree->gtOp2->AsLclVarCommon(); LclVarDsc* varDsc2 = &(compiler->lvaTable[lcl2->gtLclNum]); var_types type2 = varDsc2->TypeGet(); // We must have both int or both fp regs assert(!varTypeIsFloating(type1) || varTypeIsFloating(type2)); // FP swap is not yet implemented (and should have NYI'd in LSRA) assert(!varTypeIsFloating(type1)); regNumber oldOp1Reg = lcl1->gtRegNum; regMaskTP oldOp1RegMask = genRegMask(oldOp1Reg); regNumber oldOp2Reg = lcl2->gtRegNum; regMaskTP oldOp2RegMask = genRegMask(oldOp2Reg); // We don't call genUpdateVarReg because we don't have a tree node with the new register. varDsc1->lvRegNum = oldOp2Reg; varDsc2->lvRegNum = oldOp1Reg; // Do the xchg emitAttr size = EA_PTRSIZE; if (varTypeGCtype(type1) != varTypeGCtype(type2)) { // If the type specified to the emitter is a GC type, it will swap the GC-ness of the registers. // Otherwise it will leave them alone, which is correct if they have the same GC-ness. size = EA_GCREF; } NYI("register swap"); // inst_RV_RV(INS_xchg, oldOp1Reg, oldOp2Reg, TYP_I_IMPL, size); // Update the gcInfo. // Manually remove these regs for the gc sets (mostly to avoid confusing duplicative dump output) gcInfo.gcRegByrefSetCur &= ~(oldOp1RegMask | oldOp2RegMask); gcInfo.gcRegGCrefSetCur &= ~(oldOp1RegMask | oldOp2RegMask); // gcMarkRegPtrVal will do the appropriate thing for non-gc types. // It will also dump the updates. gcInfo.gcMarkRegPtrVal(oldOp2Reg, type1); gcInfo.gcMarkRegPtrVal(oldOp1Reg, type2); } //------------------------------------------------------------------------------------------- // genSetRegToCond: Set a register 'dstReg' to the appropriate one or zero value // corresponding to a binary Relational operator result. // // Arguments: // dstReg - The target register to set to 1 or 0 // tree - The GenTree Relop node that was used to set the Condition codes // // Return Value: none // // Notes: // A full 64-bit value of either 1 or 0 is setup in the 'dstReg' //------------------------------------------------------------------------------------------- void CodeGen::genSetRegToCond(regNumber dstReg, GenTreePtr tree) { emitJumpKind jumpKind[2]; bool branchToTrueLabel[2]; genJumpKindsForTree(tree, jumpKind, branchToTrueLabel); assert(jumpKind[0] != EJ_NONE); // Set the reg according to the flags inst_SET(jumpKind[0], dstReg); // Do we need to use two operation to set the flags? // if (jumpKind[1] != EJ_NONE) { emitter* emit = getEmitter(); bool ordered = ((tree->gtFlags & GTF_RELOP_NAN_UN) == 0); insCond secondCond; // The only ones that require two operations are the // floating point compare operations of BEQ or BNE.UN // if (tree->gtOper == GT_EQ) { // This must be an ordered comparison. assert(ordered); assert(jumpKind[1] == EJ_vs); // We complement this value secondCond = INS_COND_VC; // for the secondCond } else // gtOper == GT_NE { // This must be BNE.UN (unordered comparison) assert((tree->gtOper == GT_NE) && !ordered); assert(jumpKind[1] == EJ_lo); // We complement this value secondCond = INS_COND_HS; // for the secondCond } // The second instruction is a 'csinc' instruction that either selects the previous dstReg // or increments the ZR register, which produces a 1 result. emit->emitIns_R_R_R_COND(INS_csinc, EA_8BYTE, dstReg, dstReg, REG_ZR, secondCond); } } //------------------------------------------------------------------------ // genIntToFloatCast: Generate code to cast an int/long to float/double // // Arguments: // treeNode - The GT_CAST node // // Return Value: // None. // // Assumptions: // Cast is a non-overflow conversion. // The treeNode must have an assigned register. // SrcType= int32/uint32/int64/uint64 and DstType=float/double. // void CodeGen::genIntToFloatCast(GenTreePtr treeNode) { // int type --> float/double conversions are always non-overflow ones assert(treeNode->OperGet() == GT_CAST); assert(!treeNode->gtOverflow()); regNumber targetReg = treeNode->gtRegNum; assert(genIsValidFloatReg(targetReg)); GenTreePtr op1 = treeNode->gtOp.gtOp1; assert(!op1->isContained()); // Cannot be contained assert(genIsValidIntReg(op1->gtRegNum)); // Must be a valid int reg. var_types dstType = treeNode->CastToType(); var_types srcType = genActualType(op1->TypeGet()); assert(!varTypeIsFloating(srcType) && varTypeIsFloating(dstType)); // force the srcType to unsigned if GT_UNSIGNED flag is set if (treeNode->gtFlags & GTF_UNSIGNED) { srcType = genUnsignedType(srcType); } // We should never see a srcType whose size is neither EA_4BYTE or EA_8BYTE emitAttr srcSize = EA_ATTR(genTypeSize(srcType)); noway_assert((srcSize == EA_4BYTE) || (srcSize == EA_8BYTE)); instruction ins = varTypeIsUnsigned(srcType) ? INS_ucvtf : INS_scvtf; insOpts cvtOption = INS_OPTS_NONE; // invalid value if (dstType == TYP_DOUBLE) { if (srcSize == EA_4BYTE) { cvtOption = INS_OPTS_4BYTE_TO_D; } else { assert(srcSize == EA_8BYTE); cvtOption = INS_OPTS_8BYTE_TO_D; } } else { assert(dstType == TYP_FLOAT); if (srcSize == EA_4BYTE) { cvtOption = INS_OPTS_4BYTE_TO_S; } else { assert(srcSize == EA_8BYTE); cvtOption = INS_OPTS_8BYTE_TO_S; } } genConsumeOperands(treeNode->AsOp()); getEmitter()->emitIns_R_R(ins, emitActualTypeSize(dstType), treeNode->gtRegNum, op1->gtRegNum, cvtOption); genProduceReg(treeNode); } //------------------------------------------------------------------------ // genFloatToIntCast: Generate code to cast float/double to int/long // // Arguments: // treeNode - The GT_CAST node // // Return Value: // None. // // Assumptions: // Cast is a non-overflow conversion. // The treeNode must have an assigned register. // SrcType=float/double and DstType= int32/uint32/int64/uint64 // void CodeGen::genFloatToIntCast(GenTreePtr treeNode) { // we don't expect to see overflow detecting float/double --> int type conversions here // as they should have been converted into helper calls by front-end. assert(treeNode->OperGet() == GT_CAST); assert(!treeNode->gtOverflow()); regNumber targetReg = treeNode->gtRegNum; assert(genIsValidIntReg(targetReg)); // Must be a valid int reg. GenTreePtr op1 = treeNode->gtOp.gtOp1; assert(!op1->isContained()); // Cannot be contained assert(genIsValidFloatReg(op1->gtRegNum)); // Must be a valid float reg. var_types dstType = treeNode->CastToType(); var_types srcType = op1->TypeGet(); assert(varTypeIsFloating(srcType) && !varTypeIsFloating(dstType)); // We should never see a dstType whose size is neither EA_4BYTE or EA_8BYTE // For conversions to small types (byte/sbyte/int16/uint16) from float/double, // we expect the front-end or lowering phase to have generated two levels of cast. // emitAttr dstSize = EA_ATTR(genTypeSize(dstType)); noway_assert((dstSize == EA_4BYTE) || (dstSize == EA_8BYTE)); instruction ins = INS_fcvtzs; // default to sign converts insOpts cvtOption = INS_OPTS_NONE; // invalid value if (varTypeIsUnsigned(dstType)) { ins = INS_fcvtzu; // use unsigned converts } if (srcType == TYP_DOUBLE) { if (dstSize == EA_4BYTE) { cvtOption = INS_OPTS_D_TO_4BYTE; } else { assert(dstSize == EA_8BYTE); cvtOption = INS_OPTS_D_TO_8BYTE; } } else { assert(srcType == TYP_FLOAT); if (dstSize == EA_4BYTE) { cvtOption = INS_OPTS_S_TO_4BYTE; } else { assert(dstSize == EA_8BYTE); cvtOption = INS_OPTS_S_TO_8BYTE; } } genConsumeOperands(treeNode->AsOp()); getEmitter()->emitIns_R_R(ins, dstSize, treeNode->gtRegNum, op1->gtRegNum, cvtOption); genProduceReg(treeNode); } //------------------------------------------------------------------------ // genCkfinite: Generate code for ckfinite opcode. // // Arguments: // treeNode - The GT_CKFINITE node // // Return Value: // None. // // Assumptions: // GT_CKFINITE node has reserved an internal register. // void CodeGen::genCkfinite(GenTreePtr treeNode) { assert(treeNode->OperGet() == GT_CKFINITE); GenTreePtr op1 = treeNode->gtOp.gtOp1; var_types targetType = treeNode->TypeGet(); int expMask = (targetType == TYP_FLOAT) ? 0x7F8 : 0x7FF; // Bit mask to extract exponent. int shiftAmount = targetType == TYP_FLOAT ? 20 : 52; emitter* emit = getEmitter(); // Extract exponent into a register. regNumber intReg = treeNode->GetSingleTempReg(); regNumber fpReg = genConsumeReg(op1); emit->emitIns_R_R(ins_Copy(targetType), emitActualTypeSize(treeNode), intReg, fpReg); emit->emitIns_R_R_I(INS_lsr, emitActualTypeSize(targetType), intReg, intReg, shiftAmount); // Mask of exponent with all 1's and check if the exponent is all 1's emit->emitIns_R_R_I(INS_and, EA_4BYTE, intReg, intReg, expMask); emit->emitIns_R_I(INS_cmp, EA_4BYTE, intReg, expMask); // If exponent is all 1's, throw ArithmeticException emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED); genJumpToThrowHlpBlk(jmpEqual, SCK_ARITH_EXCPN); // if it is a finite value copy it to targetReg if (treeNode->gtRegNum != fpReg) { emit->emitIns_R_R(ins_Copy(targetType), emitActualTypeSize(treeNode), treeNode->gtRegNum, fpReg); } genProduceReg(treeNode); } //------------------------------------------------------------------------ // genCodeForCompare: Produce code for a GT_EQ/GT_NE/GT_LT/GT_LE/GT_GE/GT_GT/GT_TEST_EQ/GT_TEST_NE node. // // Arguments: // tree - the node // void CodeGen::genCodeForCompare(GenTreeOp* tree) { regNumber targetReg = tree->gtRegNum; emitter* emit = getEmitter(); GenTreePtr op1 = tree->gtOp1; GenTreePtr op2 = tree->gtOp2; var_types op1Type = genActualType(op1->TypeGet()); var_types op2Type = genActualType(op2->TypeGet()); assert(!op1->isUsedFromMemory()); assert(!op2->isUsedFromMemory()); genConsumeOperands(tree); emitAttr cmpSize = EA_ATTR(genTypeSize(op1Type)); assert(genTypeSize(op1Type) == genTypeSize(op2Type)); if (varTypeIsFloating(op1Type)) { assert(varTypeIsFloating(op2Type)); assert(!op1->isContained()); assert(op1Type == op2Type); if (op2->IsIntegralConst(0)) { assert(op2->isContained()); emit->emitIns_R_F(INS_fcmp, cmpSize, op1->gtRegNum, 0.0); } else { assert(!op2->isContained()); emit->emitIns_R_R(INS_fcmp, cmpSize, op1->gtRegNum, op2->gtRegNum); } } else { assert(!varTypeIsFloating(op2Type)); // We don't support swapping op1 and op2 to generate cmp reg, imm assert(!op1->isContainedIntOrIImmed()); instruction ins = tree->OperIs(GT_TEST_EQ, GT_TEST_NE) ? INS_tst : INS_cmp; if (op2->isContainedIntOrIImmed()) { GenTreeIntConCommon* intConst = op2->AsIntConCommon(); emit->emitIns_R_I(ins, cmpSize, op1->gtRegNum, intConst->IconValue()); } else { emit->emitIns_R_R(ins, cmpSize, op1->gtRegNum, op2->gtRegNum); } } // Are we evaluating this into a register? if (targetReg != REG_NA) { genSetRegToCond(targetReg, tree); genProduceReg(tree); } } //------------------------------------------------------------------------ // genCodeForJumpCompare: Generates code for jmpCompare statement. // // A GT_JCMP node is created when a comparison and conditional branch // can be executed in a single instruction. // // Arm64 has a few instructions with this behavior. // - cbz/cbnz -- Compare and branch register zero/not zero // - tbz/tbnz -- Test and branch register bit zero/not zero // // The cbz/cbnz supports the normal +/- 1MB branch range for conditional branches // The tbz/tbnz supports a smaller +/- 32KB branch range // // A GT_JCMP cbz/cbnz node is created when there is a GT_EQ or GT_NE // integer/unsigned comparison against #0 which is used by a GT_JTRUE // condition jump node. // // A GT_JCMP tbz/tbnz node is created when there is a GT_TEST_EQ or GT_TEST_NE // integer/unsigned comparison against against a mask with a single bit set // which is used by a GT_JTRUE condition jump node. // // This node is repsonsible for consuming the register, and emitting the // appropriate fused compare/test and branch instruction // // Two flags guide code generation // GTF_JCMP_TST -- Set if this is a tbz/tbnz rather than cbz/cbnz // GTF_JCMP_EQ -- Set if this is cbz/tbz rather than cbnz/tbnz // // Arguments: // tree - The GT_JCMP tree node. // // Return Value: // None // void CodeGen::genCodeForJumpCompare(GenTreeOp* tree) { assert(compiler->compCurBB->bbJumpKind == BBJ_COND); GenTree* op1 = tree->gtGetOp1(); GenTree* op2 = tree->gtGetOp2(); assert(tree->OperIs(GT_JCMP)); assert(!varTypeIsFloating(tree)); assert(!op1->isUsedFromMemory()); assert(!op2->isUsedFromMemory()); assert(op2->IsCnsIntOrI()); assert(op2->isContained()); genConsumeOperands(tree); regNumber reg = op1->gtRegNum; emitAttr attr = emitActualTypeSize(op1->TypeGet()); if (tree->gtFlags & GTF_JCMP_TST) { ssize_t compareImm = op2->gtIntCon.IconValue(); assert(isPow2(compareImm)); instruction ins = (tree->gtFlags & GTF_JCMP_EQ) ? INS_tbz : INS_tbnz; int imm = genLog2((size_t)compareImm); getEmitter()->emitIns_J_R_I(ins, attr, compiler->compCurBB->bbJumpDest, reg, imm); } else { assert(op2->IsIntegralConst(0)); instruction ins = (tree->gtFlags & GTF_JCMP_EQ) ? INS_cbz : INS_cbnz; getEmitter()->emitIns_J_R(ins, attr, compiler->compCurBB->bbJumpDest, reg); } } int CodeGenInterface::genSPtoFPdelta() { int delta; // We place the saved frame pointer immediately above the outgoing argument space. delta = (int)compiler->lvaOutgoingArgSpaceSize; assert(delta >= 0); return delta; } //--------------------------------------------------------------------- // genTotalFrameSize - return the total size of the stack frame, including local size, // callee-saved register size, etc. // // Return value: // Total frame size // int CodeGenInterface::genTotalFrameSize() { // For varargs functions, we home all the incoming register arguments. They are not // included in the compCalleeRegsPushed count. This is like prespill on ARM32, but // since we don't use "push" instructions to save them, we don't have to do the // save of these varargs register arguments as the first thing in the prolog. assert(!IsUninitialized(compiler->compCalleeRegsPushed)); int totalFrameSize = (compiler->info.compIsVarArgs ? MAX_REG_ARG * REGSIZE_BYTES : 0) + compiler->compCalleeRegsPushed * REGSIZE_BYTES + compiler->compLclFrameSize; assert(totalFrameSize >= 0); return totalFrameSize; } //--------------------------------------------------------------------- // genCallerSPtoFPdelta - return the offset from Caller-SP to the frame pointer. // This number is going to be negative, since the Caller-SP is at a higher // address than the frame pointer. // // There must be a frame pointer to call this function! int CodeGenInterface::genCallerSPtoFPdelta() { assert(isFramePointerUsed()); int callerSPtoFPdelta; callerSPtoFPdelta = genCallerSPtoInitialSPdelta() + genSPtoFPdelta(); assert(callerSPtoFPdelta <= 0); return callerSPtoFPdelta; } //--------------------------------------------------------------------- // genCallerSPtoInitialSPdelta - return the offset from Caller-SP to Initial SP. // // This number will be negative. int CodeGenInterface::genCallerSPtoInitialSPdelta() { int callerSPtoSPdelta = 0; callerSPtoSPdelta -= genTotalFrameSize(); assert(callerSPtoSPdelta <= 0); return callerSPtoSPdelta; } /***************************************************************************** * Emit a call to a helper function. * */ void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize, regNumber callTargetReg /*= REG_NA */) { void* addr = nullptr; void* pAddr = nullptr; emitter::EmitCallType callType = emitter::EC_FUNC_TOKEN; addr = compiler->compGetHelperFtn((CorInfoHelpFunc)helper, &pAddr); regNumber callTarget = REG_NA; if (addr == nullptr) { // This is call to a runtime helper. // adrp x, [reloc:rel page addr] // add x, x, [reloc:page offset] // ldr x, [x] // br x if (callTargetReg == REG_NA) { // If a callTargetReg has not been explicitly provided, we will use REG_DEFAULT_HELPER_CALL_TARGET, but // this is only a valid assumption if the helper call is known to kill REG_DEFAULT_HELPER_CALL_TARGET. callTargetReg = REG_DEFAULT_HELPER_CALL_TARGET; } regMaskTP callTargetMask = genRegMask(callTargetReg); regMaskTP callKillSet = compiler->compHelperCallKillSet((CorInfoHelpFunc)helper); // assert that all registers in callTargetMask are in the callKillSet noway_assert((callTargetMask & callKillSet) == callTargetMask); callTarget = callTargetReg; // adrp + add with relocations will be emitted getEmitter()->emitIns_R_AI(INS_adrp, EA_PTR_DSP_RELOC, callTarget, (ssize_t)pAddr); getEmitter()->emitIns_R_R(INS_ldr, EA_PTRSIZE, callTarget, callTarget); callType = emitter::EC_INDIR_R; } getEmitter()->emitIns_Call(callType, compiler->eeFindHelper(helper), INDEBUG_LDISASM_COMMA(nullptr) addr, argSize, retSize, EA_UNKNOWN, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, BAD_IL_OFFSET, /* IL offset */ callTarget, /* ireg */ REG_NA, 0, 0, /* xreg, xmul, disp */ false, /* isJump */ emitter::emitNoGChelper(helper)); regMaskTP killMask = compiler->compHelperCallKillSet((CorInfoHelpFunc)helper); regTracker.rsTrashRegSet(killMask); regTracker.rsTrashRegsForGCInterruptability(); } /***************************************************************************** * Unit testing of the ARM64 emitter: generate a bunch of instructions into the prolog * (it's as good a place as any), then use COMPlus_JitLateDisasm=* to see if the late * disassembler thinks the instructions as the same as we do. */ // Uncomment "#define ALL_ARM64_EMITTER_UNIT_TESTS" to run all the unit tests here. // After adding a unit test, and verifying it works, put it under this #ifdef, so we don't see it run every time. //#define ALL_ARM64_EMITTER_UNIT_TESTS #if defined(DEBUG) void CodeGen::genArm64EmitterUnitTests() { if (!verbose) { return; } if (!compiler->opts.altJit) { // No point doing this in a "real" JIT. return; } // Mark the "fake" instructions in the output. printf("*************** In genArm64EmitterUnitTests()\n"); emitter* theEmitter = getEmitter(); #ifdef ALL_ARM64_EMITTER_UNIT_TESTS // We use this: // genDefineTempLabel(genCreateTempLabel()); // to create artificial labels to help separate groups of tests. // // Loads/Stores basic general register // genDefineTempLabel(genCreateTempLabel()); // ldr/str Xt, [reg] theEmitter->emitIns_R_R(INS_ldr, EA_8BYTE, REG_R8, REG_R9); theEmitter->emitIns_R_R(INS_ldrb, EA_1BYTE, REG_R8, REG_R9); theEmitter->emitIns_R_R(INS_ldrh, EA_2BYTE, REG_R8, REG_R9); theEmitter->emitIns_R_R(INS_str, EA_8BYTE, REG_R8, REG_R9); theEmitter->emitIns_R_R(INS_strb, EA_1BYTE, REG_R8, REG_R9); theEmitter->emitIns_R_R(INS_strh, EA_2BYTE, REG_R8, REG_R9); // ldr/str Wt, [reg] theEmitter->emitIns_R_R(INS_ldr, EA_4BYTE, REG_R8, REG_R9); theEmitter->emitIns_R_R(INS_ldrb, EA_1BYTE, REG_R8, REG_R9); theEmitter->emitIns_R_R(INS_ldrh, EA_2BYTE, REG_R8, REG_R9); theEmitter->emitIns_R_R(INS_str, EA_4BYTE, REG_R8, REG_R9); theEmitter->emitIns_R_R(INS_strb, EA_1BYTE, REG_R8, REG_R9); theEmitter->emitIns_R_R(INS_strh, EA_2BYTE, REG_R8, REG_R9); theEmitter->emitIns_R_R(INS_ldrsb, EA_4BYTE, REG_R8, REG_R9); // target Wt theEmitter->emitIns_R_R(INS_ldrsh, EA_4BYTE, REG_R8, REG_R9); // target Wt theEmitter->emitIns_R_R(INS_ldrsb, EA_8BYTE, REG_R8, REG_R9); // target Xt theEmitter->emitIns_R_R(INS_ldrsh, EA_8BYTE, REG_R8, REG_R9); // target Xt theEmitter->emitIns_R_R(INS_ldrsw, EA_8BYTE, REG_R8, REG_R9); // target Xt theEmitter->emitIns_R_R_I(INS_ldurb, EA_4BYTE, REG_R8, REG_R9, 1); theEmitter->emitIns_R_R_I(INS_ldurh, EA_4BYTE, REG_R8, REG_R9, 1); theEmitter->emitIns_R_R_I(INS_sturb, EA_4BYTE, REG_R8, REG_R9, 1); theEmitter->emitIns_R_R_I(INS_sturh, EA_4BYTE, REG_R8, REG_R9, 1); theEmitter->emitIns_R_R_I(INS_ldursb, EA_4BYTE, REG_R8, REG_R9, 1); theEmitter->emitIns_R_R_I(INS_ldursb, EA_8BYTE, REG_R8, REG_R9, 1); theEmitter->emitIns_R_R_I(INS_ldursh, EA_4BYTE, REG_R8, REG_R9, 1); theEmitter->emitIns_R_R_I(INS_ldursh, EA_8BYTE, REG_R8, REG_R9, 1); theEmitter->emitIns_R_R_I(INS_ldur, EA_8BYTE, REG_R8, REG_R9, 1); theEmitter->emitIns_R_R_I(INS_ldur, EA_4BYTE, REG_R8, REG_R9, 1); theEmitter->emitIns_R_R_I(INS_stur, EA_4BYTE, REG_R8, REG_R9, 1); theEmitter->emitIns_R_R_I(INS_stur, EA_8BYTE, REG_R8, REG_R9, 1); theEmitter->emitIns_R_R_I(INS_ldursw, EA_8BYTE, REG_R8, REG_R9, 1); // SP and ZR tests theEmitter->emitIns_R_R_I(INS_ldur, EA_8BYTE, REG_R8, REG_SP, 1); theEmitter->emitIns_R_R_I(INS_ldurb, EA_8BYTE, REG_ZR, REG_R9, 1); theEmitter->emitIns_R_R_I(INS_ldurh, EA_8BYTE, REG_ZR, REG_SP, 1); // scaled theEmitter->emitIns_R_R_I(INS_ldrb, EA_1BYTE, REG_R8, REG_R9, 1); theEmitter->emitIns_R_R_I(INS_ldrh, EA_2BYTE, REG_R8, REG_R9, 2); theEmitter->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_R8, REG_R9, 4); theEmitter->emitIns_R_R_I(INS_ldr, EA_8BYTE, REG_R8, REG_R9, 8); // pre-/post-indexed (unscaled) theEmitter->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_R8, REG_R9, 1, INS_OPTS_POST_INDEX); theEmitter->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_R8, REG_R9, 1, INS_OPTS_PRE_INDEX); theEmitter->emitIns_R_R_I(INS_ldr, EA_8BYTE, REG_R8, REG_R9, 1, INS_OPTS_POST_INDEX); theEmitter->emitIns_R_R_I(INS_ldr, EA_8BYTE, REG_R8, REG_R9, 1, INS_OPTS_PRE_INDEX); // ldar/stlr Rt, [reg] theEmitter->emitIns_R_R(INS_ldar, EA_8BYTE, REG_R9, REG_R8); theEmitter->emitIns_R_R(INS_ldar, EA_4BYTE, REG_R7, REG_R10); theEmitter->emitIns_R_R(INS_ldarb, EA_4BYTE, REG_R5, REG_R11); theEmitter->emitIns_R_R(INS_ldarh, EA_4BYTE, REG_R5, REG_R12); theEmitter->emitIns_R_R(INS_stlr, EA_8BYTE, REG_R9, REG_R8); theEmitter->emitIns_R_R(INS_stlr, EA_4BYTE, REG_R7, REG_R13); theEmitter->emitIns_R_R(INS_stlrb, EA_4BYTE, REG_R5, REG_R14); theEmitter->emitIns_R_R(INS_stlrh, EA_4BYTE, REG_R3, REG_R15); // ldaxr Rt, [reg] theEmitter->emitIns_R_R(INS_ldaxr, EA_8BYTE, REG_R9, REG_R8); theEmitter->emitIns_R_R(INS_ldaxr, EA_4BYTE, REG_R7, REG_R10); theEmitter->emitIns_R_R(INS_ldaxrb, EA_4BYTE, REG_R5, REG_R11); theEmitter->emitIns_R_R(INS_ldaxrh, EA_4BYTE, REG_R5, REG_R12); // ldxr Rt, [reg] theEmitter->emitIns_R_R(INS_ldxr, EA_8BYTE, REG_R9, REG_R8); theEmitter->emitIns_R_R(INS_ldxr, EA_4BYTE, REG_R7, REG_R10); theEmitter->emitIns_R_R(INS_ldxrb, EA_4BYTE, REG_R5, REG_R11); theEmitter->emitIns_R_R(INS_ldxrh, EA_4BYTE, REG_R5, REG_R12); // stxr Ws, Rt, [reg] theEmitter->emitIns_R_R_R(INS_stxr, EA_8BYTE, REG_R1, REG_R9, REG_R8); theEmitter->emitIns_R_R_R(INS_stxr, EA_4BYTE, REG_R3, REG_R7, REG_R13); theEmitter->emitIns_R_R_R(INS_stxrb, EA_4BYTE, REG_R8, REG_R5, REG_R14); theEmitter->emitIns_R_R_R(INS_stxrh, EA_4BYTE, REG_R12, REG_R3, REG_R15); // stlxr Ws, Rt, [reg] theEmitter->emitIns_R_R_R(INS_stlxr, EA_8BYTE, REG_R1, REG_R9, REG_R8); theEmitter->emitIns_R_R_R(INS_stlxr, EA_4BYTE, REG_R3, REG_R7, REG_R13); theEmitter->emitIns_R_R_R(INS_stlxrb, EA_4BYTE, REG_R8, REG_R5, REG_R14); theEmitter->emitIns_R_R_R(INS_stlxrh, EA_4BYTE, REG_R12, REG_R3, REG_R15); #endif // ALL_ARM64_EMITTER_UNIT_TESTS #ifdef ALL_ARM64_EMITTER_UNIT_TESTS // // Compares // genDefineTempLabel(genCreateTempLabel()); // cmp reg, reg theEmitter->emitIns_R_R(INS_cmp, EA_8BYTE, REG_R8, REG_R9); theEmitter->emitIns_R_R(INS_cmn, EA_8BYTE, REG_R8, REG_R9); // cmp reg, imm theEmitter->emitIns_R_I(INS_cmp, EA_8BYTE, REG_R8, 0); theEmitter->emitIns_R_I(INS_cmp, EA_8BYTE, REG_R8, 4095); theEmitter->emitIns_R_I(INS_cmp, EA_8BYTE, REG_R8, 1 << 12); theEmitter->emitIns_R_I(INS_cmp, EA_8BYTE, REG_R8, 4095 << 12); theEmitter->emitIns_R_I(INS_cmn, EA_8BYTE, REG_R8, 0); theEmitter->emitIns_R_I(INS_cmn, EA_8BYTE, REG_R8, 4095); theEmitter->emitIns_R_I(INS_cmn, EA_8BYTE, REG_R8, 1 << 12); theEmitter->emitIns_R_I(INS_cmn, EA_8BYTE, REG_R8, 4095 << 12); theEmitter->emitIns_R_I(INS_cmp, EA_8BYTE, REG_R8, -1); theEmitter->emitIns_R_I(INS_cmp, EA_8BYTE, REG_R8, -0xfff); theEmitter->emitIns_R_I(INS_cmp, EA_8BYTE, REG_R8, 0xfffffffffffff000LL); theEmitter->emitIns_R_I(INS_cmp, EA_8BYTE, REG_R8, 0xffffffffff800000LL); theEmitter->emitIns_R_I(INS_cmn, EA_8BYTE, REG_R8, -1); theEmitter->emitIns_R_I(INS_cmn, EA_8BYTE, REG_R8, -0xfff); theEmitter->emitIns_R_I(INS_cmn, EA_8BYTE, REG_R8, 0xfffffffffffff000LL); theEmitter->emitIns_R_I(INS_cmn, EA_8BYTE, REG_R8, 0xffffffffff800000LL); #endif // ALL_ARM64_EMITTER_UNIT_TESTS #ifdef ALL_ARM64_EMITTER_UNIT_TESTS // R_R // genDefineTempLabel(genCreateTempLabel()); theEmitter->emitIns_R_R(INS_cls, EA_8BYTE, REG_R1, REG_R12); theEmitter->emitIns_R_R(INS_clz, EA_8BYTE, REG_R2, REG_R13); theEmitter->emitIns_R_R(INS_rbit, EA_8BYTE, REG_R3, REG_R14); theEmitter->emitIns_R_R(INS_rev, EA_8BYTE, REG_R4, REG_R15); theEmitter->emitIns_R_R(INS_rev16, EA_8BYTE, REG_R5, REG_R0); theEmitter->emitIns_R_R(INS_rev32, EA_8BYTE, REG_R6, REG_R1); theEmitter->emitIns_R_R(INS_cls, EA_4BYTE, REG_R7, REG_R2); theEmitter->emitIns_R_R(INS_clz, EA_4BYTE, REG_R8, REG_R3); theEmitter->emitIns_R_R(INS_rbit, EA_4BYTE, REG_R9, REG_R4); theEmitter->emitIns_R_R(INS_rev, EA_4BYTE, REG_R10, REG_R5); theEmitter->emitIns_R_R(INS_rev16, EA_4BYTE, REG_R11, REG_R6); #endif // ALL_ARM64_EMITTER_UNIT_TESTS #ifdef ALL_ARM64_EMITTER_UNIT_TESTS // // R_I // genDefineTempLabel(genCreateTempLabel()); // mov reg, imm(i16,hw) theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x0000000000001234); theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x0000000043210000); theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x0000567800000000); theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x8765000000000000); theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0xFFFFFFFFFFFF1234); theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0xFFFFFFFF4321FFFF); theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0xFFFF5678FFFFFFFF); theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x8765FFFFFFFFFFFF); theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0x00001234); theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0x87650000); theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0xFFFF1234); theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0x4567FFFF); // mov reg, imm(N,r,s) theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x00FFFFF000000000); theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x6666666666666666); theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_SP, 0x7FFF00007FFF0000); theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x5555555555555555); theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0xE003E003E003E003); theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x0707070707070707); theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0x00FFFFF0); theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0x66666666); theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0x03FFC000); theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0x55555555); theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0xE003E003); theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0x07070707); theEmitter->emitIns_R_I(INS_tst, EA_8BYTE, REG_R8, 0xE003E003E003E003); theEmitter->emitIns_R_I(INS_tst, EA_8BYTE, REG_R8, 0x00FFFFF000000000); theEmitter->emitIns_R_I(INS_tst, EA_8BYTE, REG_R8, 0x6666666666666666); theEmitter->emitIns_R_I(INS_tst, EA_8BYTE, REG_R8, 0x0707070707070707); theEmitter->emitIns_R_I(INS_tst, EA_8BYTE, REG_R8, 0x7FFF00007FFF0000); theEmitter->emitIns_R_I(INS_tst, EA_8BYTE, REG_R8, 0x5555555555555555); theEmitter->emitIns_R_I(INS_tst, EA_4BYTE, REG_R8, 0xE003E003); theEmitter->emitIns_R_I(INS_tst, EA_4BYTE, REG_R8, 0x00FFFFF0); theEmitter->emitIns_R_I(INS_tst, EA_4BYTE, REG_R8, 0x66666666); theEmitter->emitIns_R_I(INS_tst, EA_4BYTE, REG_R8, 0x07070707); theEmitter->emitIns_R_I(INS_tst, EA_4BYTE, REG_R8, 0xFFF00000); theEmitter->emitIns_R_I(INS_tst, EA_4BYTE, REG_R8, 0x55555555); #endif // ALL_ARM64_EMITTER_UNIT_TESTS #ifdef ALL_ARM64_EMITTER_UNIT_TESTS // // R_R // genDefineTempLabel(genCreateTempLabel()); // tst reg, reg theEmitter->emitIns_R_R(INS_tst, EA_8BYTE, REG_R7, REG_R10); // mov reg, reg theEmitter->emitIns_R_R(INS_mov, EA_8BYTE, REG_R7, REG_R10); theEmitter->emitIns_R_R(INS_mov, EA_8BYTE, REG_R8, REG_SP); theEmitter->emitIns_R_R(INS_mov, EA_8BYTE, REG_SP, REG_R9); theEmitter->emitIns_R_R(INS_mvn, EA_8BYTE, REG_R5, REG_R11); theEmitter->emitIns_R_R(INS_neg, EA_8BYTE, REG_R4, REG_R12); theEmitter->emitIns_R_R(INS_negs, EA_8BYTE, REG_R3, REG_R13); theEmitter->emitIns_R_R(INS_mov, EA_4BYTE, REG_R7, REG_R10); theEmitter->emitIns_R_R(INS_mvn, EA_4BYTE, REG_R5, REG_R11); theEmitter->emitIns_R_R(INS_neg, EA_4BYTE, REG_R4, REG_R12); theEmitter->emitIns_R_R(INS_negs, EA_4BYTE, REG_R3, REG_R13); theEmitter->emitIns_R_R(INS_sxtb, EA_8BYTE, REG_R7, REG_R10); theEmitter->emitIns_R_R(INS_sxth, EA_8BYTE, REG_R5, REG_R11); theEmitter->emitIns_R_R(INS_sxtw, EA_8BYTE, REG_R4, REG_R12); theEmitter->emitIns_R_R(INS_uxtb, EA_8BYTE, REG_R3, REG_R13); // map to Wt theEmitter->emitIns_R_R(INS_uxth, EA_8BYTE, REG_R2, REG_R14); // map to Wt theEmitter->emitIns_R_R(INS_sxtb, EA_4BYTE, REG_R7, REG_R10); theEmitter->emitIns_R_R(INS_sxth, EA_4BYTE, REG_R5, REG_R11); theEmitter->emitIns_R_R(INS_uxtb, EA_4BYTE, REG_R3, REG_R13); theEmitter->emitIns_R_R(INS_uxth, EA_4BYTE, REG_R2, REG_R14); #endif // ALL_ARM64_EMITTER_UNIT_TESTS #ifdef ALL_ARM64_EMITTER_UNIT_TESTS // // R_I_I // genDefineTempLabel(genCreateTempLabel()); // mov reg, imm(i16,hw) theEmitter->emitIns_R_I_I(INS_mov, EA_8BYTE, REG_R8, 0x1234, 0, INS_OPTS_LSL); theEmitter->emitIns_R_I_I(INS_mov, EA_8BYTE, REG_R8, 0x4321, 16, INS_OPTS_LSL); theEmitter->emitIns_R_I_I(INS_movk, EA_8BYTE, REG_R8, 0x4321, 16, INS_OPTS_LSL); theEmitter->emitIns_R_I_I(INS_movn, EA_8BYTE, REG_R8, 0x5678, 32, INS_OPTS_LSL); theEmitter->emitIns_R_I_I(INS_movz, EA_8BYTE, REG_R8, 0x8765, 48, INS_OPTS_LSL); theEmitter->emitIns_R_I_I(INS_movk, EA_4BYTE, REG_R8, 0x4321, 16, INS_OPTS_LSL); theEmitter->emitIns_R_I_I(INS_movn, EA_4BYTE, REG_R8, 0x5678, 16, INS_OPTS_LSL); theEmitter->emitIns_R_I_I(INS_movz, EA_4BYTE, REG_R8, 0x8765, 16, INS_OPTS_LSL); #endif // ALL_ARM64_EMITTER_UNIT_TESTS #ifdef ALL_ARM64_EMITTER_UNIT_TESTS // // R_R_I // genDefineTempLabel(genCreateTempLabel()); theEmitter->emitIns_R_R_I(INS_lsl, EA_8BYTE, REG_R0, REG_R0, 1); theEmitter->emitIns_R_R_I(INS_lsl, EA_4BYTE, REG_R9, REG_R3, 18); theEmitter->emitIns_R_R_I(INS_lsr, EA_8BYTE, REG_R7, REG_R0, 37); theEmitter->emitIns_R_R_I(INS_lsr, EA_4BYTE, REG_R0, REG_R1, 2); theEmitter->emitIns_R_R_I(INS_asr, EA_8BYTE, REG_R2, REG_R3, 53); theEmitter->emitIns_R_R_I(INS_asr, EA_4BYTE, REG_R9, REG_R3, 18); theEmitter->emitIns_R_R_I(INS_and, EA_8BYTE, REG_R2, REG_R3, 0x5555555555555555); theEmitter->emitIns_R_R_I(INS_ands, EA_8BYTE, REG_R1, REG_R5, 0x6666666666666666); theEmitter->emitIns_R_R_I(INS_eor, EA_8BYTE, REG_R8, REG_R9, 0x0707070707070707); theEmitter->emitIns_R_R_I(INS_orr, EA_8BYTE, REG_SP, REG_R3, 0xFFFC000000000000); theEmitter->emitIns_R_R_I(INS_ands, EA_4BYTE, REG_R8, REG_R9, 0xE003E003); theEmitter->emitIns_R_R_I(INS_ror, EA_8BYTE, REG_R8, REG_R9, 1); theEmitter->emitIns_R_R_I(INS_ror, EA_8BYTE, REG_R8, REG_R9, 31); theEmitter->emitIns_R_R_I(INS_ror, EA_8BYTE, REG_R8, REG_R9, 32); theEmitter->emitIns_R_R_I(INS_ror, EA_8BYTE, REG_R8, REG_R9, 63); theEmitter->emitIns_R_R_I(INS_ror, EA_4BYTE, REG_R8, REG_R9, 1); theEmitter->emitIns_R_R_I(INS_ror, EA_4BYTE, REG_R8, REG_R9, 31); theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, 0); // == mov theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, 1); theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, -1); theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, 0xfff); theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, -0xfff); theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, 0x1000); theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, 0xfff000); theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, 0xfffffffffffff000LL); theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, 0xffffffffff800000LL); theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, 0); // == mov theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, 1); theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, -1); theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, 0xfff); theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, -0xfff); theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, 0x1000); theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, 0xfff000); theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, 0xfffffffffffff000LL); theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, 0xffffffffff800000LL); theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, 0); // == mov theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, 1); theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, -1); theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, 0xfff); theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, -0xfff); theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, 0x1000); theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, 0xfff000); theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, 0xfffffffffffff000LL); theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, 0xffffffffff800000LL); theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, 0); // == mov theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, 1); theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, -1); theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, 0xfff); theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, -0xfff); theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, 0x1000); theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, 0xfff000); theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, 0xfffffffffffff000LL); theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, 0xffffffffff800000LL); theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, 0); // == mov theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, 1); theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, -1); theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, 0xfff); theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, -0xfff); theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, 0x1000); theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, 0xfff000); theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, 0xfffffffffffff000LL); theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, 0xffffffffff800000LL); theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, 0); // == mov theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, 1); theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, -1); theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, 0xfff); theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, -0xfff); theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, 0x1000); theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, 0xfff000); theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, 0xfffffffffffff000LL); theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, 0xffffffffff800000LL); theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, 0); // == mov theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, 1); theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, -1); theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, 0xfff); theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, -0xfff); theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, 0x1000); theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, 0xfff000); theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, 0xfffffffffffff000LL); theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, 0xffffffffff800000LL); theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, 0); // == mov theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, 1); theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, -1); theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, 0xfff); theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, -0xfff); theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, 0x1000); theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, 0xfff000); theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, 0xfffffffffffff000LL); theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, 0xffffffffff800000LL); #endif // ALL_ARM64_EMITTER_UNIT_TESTS #ifdef ALL_ARM64_EMITTER_UNIT_TESTS // // R_R_I cmp/txt // // cmp theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0); theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 0); // CMP (shifted register) theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 31, INS_OPTS_LSL); theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 32, INS_OPTS_LSR); theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 33, INS_OPTS_ASR); theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 21, INS_OPTS_LSL); theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 22, INS_OPTS_LSR); theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 23, INS_OPTS_ASR); // TST (shifted register) theEmitter->emitIns_R_R_I(INS_tst, EA_8BYTE, REG_R8, REG_R9, 31, INS_OPTS_LSL); theEmitter->emitIns_R_R_I(INS_tst, EA_8BYTE, REG_R8, REG_R9, 32, INS_OPTS_LSR); theEmitter->emitIns_R_R_I(INS_tst, EA_8BYTE, REG_R8, REG_R9, 33, INS_OPTS_ASR); theEmitter->emitIns_R_R_I(INS_tst, EA_8BYTE, REG_R8, REG_R9, 34, INS_OPTS_ROR); theEmitter->emitIns_R_R_I(INS_tst, EA_4BYTE, REG_R8, REG_R9, 21, INS_OPTS_LSL); theEmitter->emitIns_R_R_I(INS_tst, EA_4BYTE, REG_R8, REG_R9, 22, INS_OPTS_LSR); theEmitter->emitIns_R_R_I(INS_tst, EA_4BYTE, REG_R8, REG_R9, 23, INS_OPTS_ASR); theEmitter->emitIns_R_R_I(INS_tst, EA_4BYTE, REG_R8, REG_R9, 24, INS_OPTS_ROR); // CMP (extended register) theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0, INS_OPTS_UXTB); theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0, INS_OPTS_UXTH); theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0, INS_OPTS_UXTW); // "cmp x8, x9, UXTW"; msdis // disassembles this "cmp x8,x9", // which looks like an msdis issue. theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0, INS_OPTS_UXTX); theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0, INS_OPTS_SXTB); theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0, INS_OPTS_SXTH); theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0, INS_OPTS_SXTW); theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0, INS_OPTS_SXTX); // CMP 64-bit (extended register) and left shift theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 1, INS_OPTS_UXTB); theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 2, INS_OPTS_UXTH); theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 3, INS_OPTS_UXTW); theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 4, INS_OPTS_UXTX); theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 1, INS_OPTS_SXTB); theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 2, INS_OPTS_SXTH); theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 3, INS_OPTS_SXTW); theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 4, INS_OPTS_SXTX); // CMP 32-bit (extended register) and left shift theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 0, INS_OPTS_UXTB); theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 2, INS_OPTS_UXTH); theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 4, INS_OPTS_UXTW); theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 0, INS_OPTS_SXTB); theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 2, INS_OPTS_SXTH); theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 4, INS_OPTS_SXTW); #endif // ALL_ARM64_EMITTER_UNIT_TESTS #ifdef ALL_ARM64_EMITTER_UNIT_TESTS // // R_R_R // genDefineTempLabel(genCreateTempLabel()); theEmitter->emitIns_R_R_R(INS_lsl, EA_8BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_lsr, EA_8BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_asr, EA_8BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_ror, EA_8BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_adc, EA_8BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_adcs, EA_8BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_sbc, EA_8BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_sbcs, EA_8BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_udiv, EA_8BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_sdiv, EA_8BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_mul, EA_8BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_mneg, EA_8BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_smull, EA_8BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_smnegl, EA_8BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_smulh, EA_8BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_umull, EA_8BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_umnegl, EA_8BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_umulh, EA_8BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_lslv, EA_8BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_lsrv, EA_8BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_asrv, EA_8BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_rorv, EA_8BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_lsl, EA_4BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_lsr, EA_4BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_asr, EA_4BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_ror, EA_4BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_adc, EA_4BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_adcs, EA_4BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_sbc, EA_4BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_sbcs, EA_4BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_udiv, EA_4BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_sdiv, EA_4BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_mul, EA_4BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_mneg, EA_4BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_smull, EA_4BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_smnegl, EA_4BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_smulh, EA_4BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_umull, EA_4BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_umnegl, EA_4BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_umulh, EA_4BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_lslv, EA_4BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_lsrv, EA_4BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_asrv, EA_4BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_rorv, EA_4BYTE, REG_R8, REG_R9, REG_R10); #endif // ALL_ARM64_EMITTER_UNIT_TESTS #ifdef ALL_ARM64_EMITTER_UNIT_TESTS // // R_R_I_I // genDefineTempLabel(genCreateTempLabel()); theEmitter->emitIns_R_R_I_I(INS_sbfm, EA_8BYTE, REG_R2, REG_R3, 4, 39); theEmitter->emitIns_R_R_I_I(INS_bfm, EA_8BYTE, REG_R1, REG_R5, 20, 23); theEmitter->emitIns_R_R_I_I(INS_ubfm, EA_8BYTE, REG_R8, REG_R9, 36, 7); theEmitter->emitIns_R_R_I_I(INS_sbfiz, EA_8BYTE, REG_R2, REG_R3, 7, 37); theEmitter->emitIns_R_R_I_I(INS_bfi, EA_8BYTE, REG_R1, REG_R5, 23, 21); theEmitter->emitIns_R_R_I_I(INS_ubfiz, EA_8BYTE, REG_R8, REG_R9, 39, 5); theEmitter->emitIns_R_R_I_I(INS_sbfx, EA_8BYTE, REG_R2, REG_R3, 10, 24); theEmitter->emitIns_R_R_I_I(INS_bfxil, EA_8BYTE, REG_R1, REG_R5, 26, 16); theEmitter->emitIns_R_R_I_I(INS_ubfx, EA_8BYTE, REG_R8, REG_R9, 42, 8); theEmitter->emitIns_R_R_I_I(INS_sbfm, EA_4BYTE, REG_R2, REG_R3, 4, 19); theEmitter->emitIns_R_R_I_I(INS_bfm, EA_4BYTE, REG_R1, REG_R5, 10, 13); theEmitter->emitIns_R_R_I_I(INS_ubfm, EA_4BYTE, REG_R8, REG_R9, 16, 7); theEmitter->emitIns_R_R_I_I(INS_sbfiz, EA_4BYTE, REG_R2, REG_R3, 5, 17); theEmitter->emitIns_R_R_I_I(INS_bfi, EA_4BYTE, REG_R1, REG_R5, 13, 11); theEmitter->emitIns_R_R_I_I(INS_ubfiz, EA_4BYTE, REG_R8, REG_R9, 19, 5); theEmitter->emitIns_R_R_I_I(INS_sbfx, EA_4BYTE, REG_R2, REG_R3, 3, 14); theEmitter->emitIns_R_R_I_I(INS_bfxil, EA_4BYTE, REG_R1, REG_R5, 11, 9); theEmitter->emitIns_R_R_I_I(INS_ubfx, EA_4BYTE, REG_R8, REG_R9, 22, 8); #endif // ALL_ARM64_EMITTER_UNIT_TESTS #ifdef ALL_ARM64_EMITTER_UNIT_TESTS // // R_R_R_I // genDefineTempLabel(genCreateTempLabel()); // ADD (extended register) theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_UXTB); theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_UXTH); theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_UXTW); theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_UXTX); theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_SXTB); theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_SXTH); theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_SXTW); theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_SXTX); // ADD (extended register) and left shift theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_UXTB); theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_UXTH); theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_UXTW); theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_UXTX); theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_SXTB); theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_SXTH); theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_SXTW); theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_SXTX); // ADD (shifted register) theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0); theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 31, INS_OPTS_LSL); theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 32, INS_OPTS_LSR); theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 33, INS_OPTS_ASR); // EXTR (extract field from register pair) theEmitter->emitIns_R_R_R_I(INS_extr, EA_8BYTE, REG_R8, REG_R9, REG_R10, 1); theEmitter->emitIns_R_R_R_I(INS_extr, EA_8BYTE, REG_R8, REG_R9, REG_R10, 31); theEmitter->emitIns_R_R_R_I(INS_extr, EA_8BYTE, REG_R8, REG_R9, REG_R10, 32); theEmitter->emitIns_R_R_R_I(INS_extr, EA_8BYTE, REG_R8, REG_R9, REG_R10, 63); theEmitter->emitIns_R_R_R_I(INS_extr, EA_4BYTE, REG_R8, REG_R9, REG_R10, 1); theEmitter->emitIns_R_R_R_I(INS_extr, EA_4BYTE, REG_R8, REG_R9, REG_R10, 31); // SUB (extended register) theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_UXTB); theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_UXTH); theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_UXTW); theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_UXTX); theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_SXTB); theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_SXTH); theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_SXTW); theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_SXTX); // SUB (extended register) and left shift theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_UXTB); theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_UXTH); theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_UXTW); theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_UXTX); theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_SXTB); theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_SXTH); theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_SXTW); theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_SXTX); // SUB (shifted register) theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0); theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 27, INS_OPTS_LSL); theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 28, INS_OPTS_LSR); theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 29, INS_OPTS_ASR); // bit operations theEmitter->emitIns_R_R_R_I(INS_and, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0); theEmitter->emitIns_R_R_R_I(INS_ands, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0); theEmitter->emitIns_R_R_R_I(INS_eor, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0); theEmitter->emitIns_R_R_R_I(INS_orr, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0); theEmitter->emitIns_R_R_R_I(INS_bic, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0); theEmitter->emitIns_R_R_R_I(INS_bics, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0); theEmitter->emitIns_R_R_R_I(INS_eon, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0); theEmitter->emitIns_R_R_R_I(INS_orn, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0); theEmitter->emitIns_R_R_R_I(INS_and, EA_8BYTE, REG_R8, REG_R9, REG_R10, 1, INS_OPTS_LSL); theEmitter->emitIns_R_R_R_I(INS_ands, EA_8BYTE, REG_R8, REG_R9, REG_R10, 2, INS_OPTS_LSR); theEmitter->emitIns_R_R_R_I(INS_eor, EA_8BYTE, REG_R8, REG_R9, REG_R10, 3, INS_OPTS_ASR); theEmitter->emitIns_R_R_R_I(INS_orr, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_ROR); theEmitter->emitIns_R_R_R_I(INS_bic, EA_8BYTE, REG_R8, REG_R9, REG_R10, 5, INS_OPTS_LSL); theEmitter->emitIns_R_R_R_I(INS_bics, EA_8BYTE, REG_R8, REG_R9, REG_R10, 6, INS_OPTS_LSR); theEmitter->emitIns_R_R_R_I(INS_eon, EA_8BYTE, REG_R8, REG_R9, REG_R10, 7, INS_OPTS_ASR); theEmitter->emitIns_R_R_R_I(INS_orn, EA_8BYTE, REG_R8, REG_R9, REG_R10, 8, INS_OPTS_ROR); theEmitter->emitIns_R_R_R_I(INS_and, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0); theEmitter->emitIns_R_R_R_I(INS_ands, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0); theEmitter->emitIns_R_R_R_I(INS_eor, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0); theEmitter->emitIns_R_R_R_I(INS_orr, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0); theEmitter->emitIns_R_R_R_I(INS_bic, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0); theEmitter->emitIns_R_R_R_I(INS_bics, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0); theEmitter->emitIns_R_R_R_I(INS_eon, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0); theEmitter->emitIns_R_R_R_I(INS_orn, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0); theEmitter->emitIns_R_R_R_I(INS_and, EA_4BYTE, REG_R8, REG_R9, REG_R10, 1, INS_OPTS_LSL); theEmitter->emitIns_R_R_R_I(INS_ands, EA_4BYTE, REG_R8, REG_R9, REG_R10, 2, INS_OPTS_LSR); theEmitter->emitIns_R_R_R_I(INS_eor, EA_4BYTE, REG_R8, REG_R9, REG_R10, 3, INS_OPTS_ASR); theEmitter->emitIns_R_R_R_I(INS_orr, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_ROR); theEmitter->emitIns_R_R_R_I(INS_bic, EA_4BYTE, REG_R8, REG_R9, REG_R10, 5, INS_OPTS_LSL); theEmitter->emitIns_R_R_R_I(INS_bics, EA_4BYTE, REG_R8, REG_R9, REG_R10, 6, INS_OPTS_LSR); theEmitter->emitIns_R_R_R_I(INS_eon, EA_4BYTE, REG_R8, REG_R9, REG_R10, 7, INS_OPTS_ASR); theEmitter->emitIns_R_R_R_I(INS_orn, EA_4BYTE, REG_R8, REG_R9, REG_R10, 8, INS_OPTS_ROR); #endif // ALL_ARM64_EMITTER_UNIT_TESTS #ifdef ALL_ARM64_EMITTER_UNIT_TESTS // // R_R_R_I -- load/store pair // theEmitter->emitIns_R_R_R_I(INS_ldnp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0); theEmitter->emitIns_R_R_R_I(INS_stnp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0); theEmitter->emitIns_R_R_R_I(INS_ldnp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 8); theEmitter->emitIns_R_R_R_I(INS_stnp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 8); theEmitter->emitIns_R_R_R_I(INS_ldnp, EA_4BYTE, REG_R8, REG_R9, REG_SP, 0); theEmitter->emitIns_R_R_R_I(INS_stnp, EA_4BYTE, REG_R8, REG_R9, REG_SP, 0); theEmitter->emitIns_R_R_R_I(INS_ldnp, EA_4BYTE, REG_R8, REG_R9, REG_SP, 8); theEmitter->emitIns_R_R_R_I(INS_stnp, EA_4BYTE, REG_R8, REG_R9, REG_SP, 8); theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0); theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0); theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 16); theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 16); theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_POST_INDEX); theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_POST_INDEX); theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_PRE_INDEX); theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_PRE_INDEX); theEmitter->emitIns_R_R_R_I(INS_ldp, EA_4BYTE, REG_R8, REG_R9, REG_SP, 0); theEmitter->emitIns_R_R_R_I(INS_stp, EA_4BYTE, REG_R8, REG_R9, REG_SP, 0); theEmitter->emitIns_R_R_R_I(INS_ldp, EA_4BYTE, REG_R8, REG_R9, REG_SP, 16); theEmitter->emitIns_R_R_R_I(INS_stp, EA_4BYTE, REG_R8, REG_R9, REG_SP, 16); theEmitter->emitIns_R_R_R_I(INS_ldp, EA_4BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_POST_INDEX); theEmitter->emitIns_R_R_R_I(INS_stp, EA_4BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_POST_INDEX); theEmitter->emitIns_R_R_R_I(INS_ldp, EA_4BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_PRE_INDEX); theEmitter->emitIns_R_R_R_I(INS_stp, EA_4BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_PRE_INDEX); theEmitter->emitIns_R_R_R_I(INS_ldpsw, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0); theEmitter->emitIns_R_R_R_I(INS_ldpsw, EA_4BYTE, REG_R8, REG_R9, REG_R10, 16); theEmitter->emitIns_R_R_R_I(INS_ldpsw, EA_4BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_POST_INDEX); theEmitter->emitIns_R_R_R_I(INS_ldpsw, EA_4BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_PRE_INDEX); // SP and ZR tests theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_ZR, REG_R1, REG_SP, 0); theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_R0, REG_ZR, REG_SP, 16); theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_ZR, REG_R1, REG_SP, 0); theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_R0, REG_ZR, REG_SP, 16); theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_ZR, REG_ZR, REG_SP, 16, INS_OPTS_POST_INDEX); theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_ZR, REG_ZR, REG_R8, 16, INS_OPTS_PRE_INDEX); #endif // ALL_ARM64_EMITTER_UNIT_TESTS #ifdef ALL_ARM64_EMITTER_UNIT_TESTS // // R_R_R_Ext -- load/store shifted/extend // genDefineTempLabel(genCreateTempLabel()); // LDR (register) theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL, 3); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW, 3); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW, 3); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX, 3); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX, 3); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL, 2); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW, 2); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW, 2); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX, 2); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX, 2); theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9); theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL); theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL, 1); theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW); theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW, 1); theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW); theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW, 1); theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX); theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX, 1); theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX); theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX, 1); theEmitter->emitIns_R_R_R_Ext(INS_ldrb, EA_1BYTE, REG_R8, REG_SP, REG_R9); theEmitter->emitIns_R_R_R_Ext(INS_ldrb, EA_1BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW); theEmitter->emitIns_R_R_R_Ext(INS_ldrb, EA_1BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW); theEmitter->emitIns_R_R_R_Ext(INS_ldrb, EA_1BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX); theEmitter->emitIns_R_R_R_Ext(INS_ldrb, EA_1BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX); theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9); theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL); theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL, 2); theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW); theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW, 2); theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW); theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW, 2); theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX); theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX, 2); theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX); theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX, 2); theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_4BYTE, REG_R8, REG_SP, REG_R9); theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_8BYTE, REG_R8, REG_SP, REG_R9); theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL); theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL, 1); theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW); theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW, 1); theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW); theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW, 1); theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX); theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX, 1); theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX); theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX, 1); theEmitter->emitIns_R_R_R_Ext(INS_ldrsb, EA_4BYTE, REG_R8, REG_SP, REG_R9); theEmitter->emitIns_R_R_R_Ext(INS_ldrsb, EA_8BYTE, REG_R8, REG_SP, REG_R9); theEmitter->emitIns_R_R_R_Ext(INS_ldrsb, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW); theEmitter->emitIns_R_R_R_Ext(INS_ldrsb, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW); theEmitter->emitIns_R_R_R_Ext(INS_ldrsb, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX); theEmitter->emitIns_R_R_R_Ext(INS_ldrsb, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX); // STR (register) theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9); theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL); theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL, 3); theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW); theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW, 3); theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW); theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW, 3); theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX); theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX, 3); theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX); theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX, 3); theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9); theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL); theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL, 2); theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW); theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW, 2); theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW); theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW, 2); theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX); theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX, 2); theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX); theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX, 2); theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9); theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL); theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL, 1); theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW); theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW, 1); theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW); theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW, 1); theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX); theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX, 1); theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX); theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX, 1); theEmitter->emitIns_R_R_R_Ext(INS_strb, EA_1BYTE, REG_R8, REG_SP, REG_R9); theEmitter->emitIns_R_R_R_Ext(INS_strb, EA_1BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW); theEmitter->emitIns_R_R_R_Ext(INS_strb, EA_1BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW); theEmitter->emitIns_R_R_R_Ext(INS_strb, EA_1BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX); theEmitter->emitIns_R_R_R_Ext(INS_strb, EA_1BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX); #endif // ALL_ARM64_EMITTER_UNIT_TESTS #ifdef ALL_ARM64_EMITTER_UNIT_TESTS // // R_R_R_R // genDefineTempLabel(genCreateTempLabel()); theEmitter->emitIns_R_R_R_R(INS_madd, EA_4BYTE, REG_R0, REG_R12, REG_R27, REG_R10); theEmitter->emitIns_R_R_R_R(INS_msub, EA_4BYTE, REG_R1, REG_R13, REG_R28, REG_R11); theEmitter->emitIns_R_R_R_R(INS_smaddl, EA_4BYTE, REG_R2, REG_R14, REG_R0, REG_R12); theEmitter->emitIns_R_R_R_R(INS_smsubl, EA_4BYTE, REG_R3, REG_R15, REG_R1, REG_R13); theEmitter->emitIns_R_R_R_R(INS_umaddl, EA_4BYTE, REG_R4, REG_R19, REG_R2, REG_R14); theEmitter->emitIns_R_R_R_R(INS_umsubl, EA_4BYTE, REG_R5, REG_R20, REG_R3, REG_R15); theEmitter->emitIns_R_R_R_R(INS_madd, EA_8BYTE, REG_R6, REG_R21, REG_R4, REG_R19); theEmitter->emitIns_R_R_R_R(INS_msub, EA_8BYTE, REG_R7, REG_R22, REG_R5, REG_R20); theEmitter->emitIns_R_R_R_R(INS_smaddl, EA_8BYTE, REG_R8, REG_R23, REG_R6, REG_R21); theEmitter->emitIns_R_R_R_R(INS_smsubl, EA_8BYTE, REG_R9, REG_R24, REG_R7, REG_R22); theEmitter->emitIns_R_R_R_R(INS_umaddl, EA_8BYTE, REG_R10, REG_R25, REG_R8, REG_R23); theEmitter->emitIns_R_R_R_R(INS_umsubl, EA_8BYTE, REG_R11, REG_R26, REG_R9, REG_R24); #endif // ALL_ARM64_EMITTER_UNIT_TESTS #ifdef ALL_ARM64_EMITTER_UNIT_TESTS // R_COND // // cset reg, cond theEmitter->emitIns_R_COND(INS_cset, EA_8BYTE, REG_R9, INS_COND_EQ); // eq theEmitter->emitIns_R_COND(INS_cset, EA_4BYTE, REG_R8, INS_COND_NE); // ne theEmitter->emitIns_R_COND(INS_cset, EA_4BYTE, REG_R7, INS_COND_HS); // hs theEmitter->emitIns_R_COND(INS_cset, EA_8BYTE, REG_R6, INS_COND_LO); // lo theEmitter->emitIns_R_COND(INS_cset, EA_8BYTE, REG_R5, INS_COND_MI); // mi theEmitter->emitIns_R_COND(INS_cset, EA_4BYTE, REG_R4, INS_COND_PL); // pl theEmitter->emitIns_R_COND(INS_cset, EA_4BYTE, REG_R3, INS_COND_VS); // vs theEmitter->emitIns_R_COND(INS_cset, EA_8BYTE, REG_R2, INS_COND_VC); // vc theEmitter->emitIns_R_COND(INS_cset, EA_8BYTE, REG_R1, INS_COND_HI); // hi theEmitter->emitIns_R_COND(INS_cset, EA_4BYTE, REG_R0, INS_COND_LS); // ls theEmitter->emitIns_R_COND(INS_cset, EA_4BYTE, REG_R9, INS_COND_GE); // ge theEmitter->emitIns_R_COND(INS_cset, EA_8BYTE, REG_R8, INS_COND_LT); // lt theEmitter->emitIns_R_COND(INS_cset, EA_8BYTE, REG_R7, INS_COND_GT); // gt theEmitter->emitIns_R_COND(INS_cset, EA_4BYTE, REG_R6, INS_COND_LE); // le // csetm reg, cond theEmitter->emitIns_R_COND(INS_csetm, EA_4BYTE, REG_R9, INS_COND_EQ); // eq theEmitter->emitIns_R_COND(INS_csetm, EA_8BYTE, REG_R8, INS_COND_NE); // ne theEmitter->emitIns_R_COND(INS_csetm, EA_8BYTE, REG_R7, INS_COND_HS); // hs theEmitter->emitIns_R_COND(INS_csetm, EA_4BYTE, REG_R6, INS_COND_LO); // lo theEmitter->emitIns_R_COND(INS_csetm, EA_4BYTE, REG_R5, INS_COND_MI); // mi theEmitter->emitIns_R_COND(INS_csetm, EA_8BYTE, REG_R4, INS_COND_PL); // pl theEmitter->emitIns_R_COND(INS_csetm, EA_8BYTE, REG_R3, INS_COND_VS); // vs theEmitter->emitIns_R_COND(INS_csetm, EA_4BYTE, REG_R2, INS_COND_VC); // vc theEmitter->emitIns_R_COND(INS_csetm, EA_4BYTE, REG_R1, INS_COND_HI); // hi theEmitter->emitIns_R_COND(INS_csetm, EA_8BYTE, REG_R0, INS_COND_LS); // ls theEmitter->emitIns_R_COND(INS_csetm, EA_8BYTE, REG_R9, INS_COND_GE); // ge theEmitter->emitIns_R_COND(INS_csetm, EA_4BYTE, REG_R8, INS_COND_LT); // lt theEmitter->emitIns_R_COND(INS_csetm, EA_4BYTE, REG_R7, INS_COND_GT); // gt theEmitter->emitIns_R_COND(INS_csetm, EA_8BYTE, REG_R6, INS_COND_LE); // le #endif // ALL_ARM64_EMITTER_UNIT_TESTS #ifdef ALL_ARM64_EMITTER_UNIT_TESTS // R_R_COND // // cinc reg, reg, cond // cinv reg, reg, cond // cneg reg, reg, cond theEmitter->emitIns_R_R_COND(INS_cinc, EA_8BYTE, REG_R0, REG_R4, INS_COND_EQ); // eq theEmitter->emitIns_R_R_COND(INS_cinv, EA_4BYTE, REG_R1, REG_R5, INS_COND_NE); // ne theEmitter->emitIns_R_R_COND(INS_cneg, EA_4BYTE, REG_R2, REG_R6, INS_COND_HS); // hs theEmitter->emitIns_R_R_COND(INS_cinc, EA_8BYTE, REG_R3, REG_R7, INS_COND_LO); // lo theEmitter->emitIns_R_R_COND(INS_cinv, EA_4BYTE, REG_R4, REG_R8, INS_COND_MI); // mi theEmitter->emitIns_R_R_COND(INS_cneg, EA_8BYTE, REG_R5, REG_R9, INS_COND_PL); // pl theEmitter->emitIns_R_R_COND(INS_cinc, EA_8BYTE, REG_R6, REG_R0, INS_COND_VS); // vs theEmitter->emitIns_R_R_COND(INS_cinv, EA_4BYTE, REG_R7, REG_R1, INS_COND_VC); // vc theEmitter->emitIns_R_R_COND(INS_cneg, EA_8BYTE, REG_R8, REG_R2, INS_COND_HI); // hi theEmitter->emitIns_R_R_COND(INS_cinc, EA_4BYTE, REG_R9, REG_R3, INS_COND_LS); // ls theEmitter->emitIns_R_R_COND(INS_cinv, EA_4BYTE, REG_R0, REG_R4, INS_COND_GE); // ge theEmitter->emitIns_R_R_COND(INS_cneg, EA_8BYTE, REG_R2, REG_R5, INS_COND_LT); // lt theEmitter->emitIns_R_R_COND(INS_cinc, EA_4BYTE, REG_R2, REG_R6, INS_COND_GT); // gt theEmitter->emitIns_R_R_COND(INS_cinv, EA_8BYTE, REG_R3, REG_R7, INS_COND_LE); // le #endif // ALL_ARM64_EMITTER_UNIT_TESTS #ifdef ALL_ARM64_EMITTER_UNIT_TESTS // R_R_R_COND // // csel reg, reg, reg, cond // csinc reg, reg, reg, cond // csinv reg, reg, reg, cond // csneg reg, reg, reg, cond theEmitter->emitIns_R_R_R_COND(INS_csel, EA_8BYTE, REG_R0, REG_R4, REG_R8, INS_COND_EQ); // eq theEmitter->emitIns_R_R_R_COND(INS_csinc, EA_4BYTE, REG_R1, REG_R5, REG_R9, INS_COND_NE); // ne theEmitter->emitIns_R_R_R_COND(INS_csinv, EA_4BYTE, REG_R2, REG_R6, REG_R0, INS_COND_HS); // hs theEmitter->emitIns_R_R_R_COND(INS_csneg, EA_8BYTE, REG_R3, REG_R7, REG_R1, INS_COND_LO); // lo theEmitter->emitIns_R_R_R_COND(INS_csel, EA_4BYTE, REG_R4, REG_R8, REG_R2, INS_COND_MI); // mi theEmitter->emitIns_R_R_R_COND(INS_csinc, EA_8BYTE, REG_R5, REG_R9, REG_R3, INS_COND_PL); // pl theEmitter->emitIns_R_R_R_COND(INS_csinv, EA_8BYTE, REG_R6, REG_R0, REG_R4, INS_COND_VS); // vs theEmitter->emitIns_R_R_R_COND(INS_csneg, EA_4BYTE, REG_R7, REG_R1, REG_R5, INS_COND_VC); // vc theEmitter->emitIns_R_R_R_COND(INS_csel, EA_8BYTE, REG_R8, REG_R2, REG_R6, INS_COND_HI); // hi theEmitter->emitIns_R_R_R_COND(INS_csinc, EA_4BYTE, REG_R9, REG_R3, REG_R7, INS_COND_LS); // ls theEmitter->emitIns_R_R_R_COND(INS_csinv, EA_4BYTE, REG_R0, REG_R4, REG_R8, INS_COND_GE); // ge theEmitter->emitIns_R_R_R_COND(INS_csneg, EA_8BYTE, REG_R2, REG_R5, REG_R9, INS_COND_LT); // lt theEmitter->emitIns_R_R_R_COND(INS_csel, EA_4BYTE, REG_R2, REG_R6, REG_R0, INS_COND_GT); // gt theEmitter->emitIns_R_R_R_COND(INS_csinc, EA_8BYTE, REG_R3, REG_R7, REG_R1, INS_COND_LE); // le #endif // ALL_ARM64_EMITTER_UNIT_TESTS #ifdef ALL_ARM64_EMITTER_UNIT_TESTS // R_R_FLAGS_COND // // ccmp reg1, reg2, nzcv, cond theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R9, REG_R3, INS_FLAGS_V, INS_COND_EQ); // eq theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R8, REG_R2, INS_FLAGS_C, INS_COND_NE); // ne theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R7, REG_R1, INS_FLAGS_Z, INS_COND_HS); // hs theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R6, REG_R0, INS_FLAGS_N, INS_COND_LO); // lo theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R5, REG_R3, INS_FLAGS_CV, INS_COND_MI); // mi theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R4, REG_R2, INS_FLAGS_ZV, INS_COND_PL); // pl theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R3, REG_R1, INS_FLAGS_ZC, INS_COND_VS); // vs theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R2, REG_R0, INS_FLAGS_NV, INS_COND_VC); // vc theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R1, REG_R3, INS_FLAGS_NC, INS_COND_HI); // hi theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R0, REG_R2, INS_FLAGS_NZ, INS_COND_LS); // ls theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R9, REG_R1, INS_FLAGS_NONE, INS_COND_GE); // ge theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R8, REG_R0, INS_FLAGS_NZV, INS_COND_LT); // lt theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R7, REG_R3, INS_FLAGS_NZC, INS_COND_GT); // gt theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R6, REG_R2, INS_FLAGS_NZCV, INS_COND_LE); // le // ccmp reg1, imm, nzcv, cond theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R9, 3, INS_FLAGS_V, INS_COND_EQ); // eq theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R8, 2, INS_FLAGS_C, INS_COND_NE); // ne theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R7, 1, INS_FLAGS_Z, INS_COND_HS); // hs theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R6, 0, INS_FLAGS_N, INS_COND_LO); // lo theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R5, 31, INS_FLAGS_CV, INS_COND_MI); // mi theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R4, 28, INS_FLAGS_ZV, INS_COND_PL); // pl theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R3, 25, INS_FLAGS_ZC, INS_COND_VS); // vs theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R2, 22, INS_FLAGS_NV, INS_COND_VC); // vc theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R1, 19, INS_FLAGS_NC, INS_COND_HI); // hi theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R0, 16, INS_FLAGS_NZ, INS_COND_LS); // ls theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R9, 13, INS_FLAGS_NONE, INS_COND_GE); // ge theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R8, 10, INS_FLAGS_NZV, INS_COND_LT); // lt theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R7, 7, INS_FLAGS_NZC, INS_COND_GT); // gt theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R6, 4, INS_FLAGS_NZCV, INS_COND_LE); // le // ccmp reg1, imm, nzcv, cond -- encoded as ccmn theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R9, -3, INS_FLAGS_V, INS_COND_EQ); // eq theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R8, -2, INS_FLAGS_C, INS_COND_NE); // ne theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R7, -1, INS_FLAGS_Z, INS_COND_HS); // hs theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R6, -5, INS_FLAGS_N, INS_COND_LO); // lo theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R5, -31, INS_FLAGS_CV, INS_COND_MI); // mi theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R4, -28, INS_FLAGS_ZV, INS_COND_PL); // pl theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R3, -25, INS_FLAGS_ZC, INS_COND_VS); // vs theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R2, -22, INS_FLAGS_NV, INS_COND_VC); // vc theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R1, -19, INS_FLAGS_NC, INS_COND_HI); // hi theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R0, -16, INS_FLAGS_NZ, INS_COND_LS); // ls theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R9, -13, INS_FLAGS_NONE, INS_COND_GE); // ge theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R8, -10, INS_FLAGS_NZV, INS_COND_LT); // lt theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R7, -7, INS_FLAGS_NZC, INS_COND_GT); // gt theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R6, -4, INS_FLAGS_NZCV, INS_COND_LE); // le // ccmn reg1, reg2, nzcv, cond theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R9, REG_R3, INS_FLAGS_V, INS_COND_EQ); // eq theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R8, REG_R2, INS_FLAGS_C, INS_COND_NE); // ne theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R7, REG_R1, INS_FLAGS_Z, INS_COND_HS); // hs theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R6, REG_R0, INS_FLAGS_N, INS_COND_LO); // lo theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R5, REG_R3, INS_FLAGS_CV, INS_COND_MI); // mi theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R4, REG_R2, INS_FLAGS_ZV, INS_COND_PL); // pl theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R3, REG_R1, INS_FLAGS_ZC, INS_COND_VS); // vs theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R2, REG_R0, INS_FLAGS_NV, INS_COND_VC); // vc theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R1, REG_R3, INS_FLAGS_NC, INS_COND_HI); // hi theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R0, REG_R2, INS_FLAGS_NZ, INS_COND_LS); // ls theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R9, REG_R1, INS_FLAGS_NONE, INS_COND_GE); // ge theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R8, REG_R0, INS_FLAGS_NZV, INS_COND_LT); // lt theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R7, REG_R3, INS_FLAGS_NZC, INS_COND_GT); // gt theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R6, REG_R2, INS_FLAGS_NZCV, INS_COND_LE); // le // ccmn reg1, imm, nzcv, cond theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R9, 3, INS_FLAGS_V, INS_COND_EQ); // eq theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R8, 2, INS_FLAGS_C, INS_COND_NE); // ne theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R7, 1, INS_FLAGS_Z, INS_COND_HS); // hs theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R6, 0, INS_FLAGS_N, INS_COND_LO); // lo theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R5, 31, INS_FLAGS_CV, INS_COND_MI); // mi theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R4, 28, INS_FLAGS_ZV, INS_COND_PL); // pl theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R3, 25, INS_FLAGS_ZC, INS_COND_VS); // vs theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R2, 22, INS_FLAGS_NV, INS_COND_VC); // vc theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R1, 19, INS_FLAGS_NC, INS_COND_HI); // hi theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R0, 16, INS_FLAGS_NZ, INS_COND_LS); // ls theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R9, 13, INS_FLAGS_NONE, INS_COND_GE); // ge theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R8, 10, INS_FLAGS_NZV, INS_COND_LT); // lt theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R7, 7, INS_FLAGS_NZC, INS_COND_GT); // gt theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R6, 4, INS_FLAGS_NZCV, INS_COND_LE); // le #endif // ALL_ARM64_EMITTER_UNIT_TESTS #ifdef ALL_ARM64_EMITTER_UNIT_TESTS // // Branch to register // genDefineTempLabel(genCreateTempLabel()); theEmitter->emitIns_R(INS_br, EA_PTRSIZE, REG_R8); theEmitter->emitIns_R(INS_blr, EA_PTRSIZE, REG_R9); theEmitter->emitIns_R(INS_ret, EA_PTRSIZE, REG_R8); theEmitter->emitIns_R(INS_ret, EA_PTRSIZE, REG_LR); #endif // ALL_ARM64_EMITTER_UNIT_TESTS #ifdef ALL_ARM64_EMITTER_UNIT_TESTS // // Misc // genDefineTempLabel(genCreateTempLabel()); theEmitter->emitIns_I(INS_brk, EA_PTRSIZE, 0); theEmitter->emitIns_I(INS_brk, EA_PTRSIZE, 65535); theEmitter->emitIns_BARR(INS_dsb, INS_BARRIER_OSHLD); theEmitter->emitIns_BARR(INS_dmb, INS_BARRIER_OSHST); theEmitter->emitIns_BARR(INS_isb, INS_BARRIER_OSH); theEmitter->emitIns_BARR(INS_dmb, INS_BARRIER_NSHLD); theEmitter->emitIns_BARR(INS_isb, INS_BARRIER_NSHST); theEmitter->emitIns_BARR(INS_dsb, INS_BARRIER_NSH); theEmitter->emitIns_BARR(INS_isb, INS_BARRIER_ISHLD); theEmitter->emitIns_BARR(INS_dsb, INS_BARRIER_ISHST); theEmitter->emitIns_BARR(INS_dmb, INS_BARRIER_ISH); theEmitter->emitIns_BARR(INS_dsb, INS_BARRIER_LD); theEmitter->emitIns_BARR(INS_dmb, INS_BARRIER_ST); theEmitter->emitIns_BARR(INS_isb, INS_BARRIER_SY); #endif // ALL_ARM64_EMITTER_UNIT_TESTS #ifdef ALL_ARM64_EMITTER_UNIT_TESTS //////////////////////////////////////////////////////////////////////////////// // // SIMD and Floating point // //////////////////////////////////////////////////////////////////////////////// // // Load/Stores vector register // genDefineTempLabel(genCreateTempLabel()); // ldr/str Vt, [reg] theEmitter->emitIns_R_R(INS_ldr, EA_8BYTE, REG_V1, REG_R9); theEmitter->emitIns_R_R(INS_str, EA_8BYTE, REG_V2, REG_R8); theEmitter->emitIns_R_R(INS_ldr, EA_4BYTE, REG_V3, REG_R7); theEmitter->emitIns_R_R(INS_str, EA_4BYTE, REG_V4, REG_R6); theEmitter->emitIns_R_R(INS_ldr, EA_2BYTE, REG_V5, REG_R5); theEmitter->emitIns_R_R(INS_str, EA_2BYTE, REG_V6, REG_R4); theEmitter->emitIns_R_R(INS_ldr, EA_1BYTE, REG_V7, REG_R3); theEmitter->emitIns_R_R(INS_str, EA_1BYTE, REG_V8, REG_R2); theEmitter->emitIns_R_R(INS_ldr, EA_16BYTE, REG_V9, REG_R1); theEmitter->emitIns_R_R(INS_str, EA_16BYTE, REG_V10, REG_R0); // ldr/str Vt, [reg+cns] -- scaled theEmitter->emitIns_R_R_I(INS_ldr, EA_1BYTE, REG_V8, REG_R9, 1); theEmitter->emitIns_R_R_I(INS_ldr, EA_2BYTE, REG_V8, REG_R9, 2); theEmitter->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_V8, REG_R9, 4); theEmitter->emitIns_R_R_I(INS_ldr, EA_8BYTE, REG_V8, REG_R9, 8); theEmitter->emitIns_R_R_I(INS_ldr, EA_16BYTE, REG_V8, REG_R9, 16); theEmitter->emitIns_R_R_I(INS_ldr, EA_1BYTE, REG_V7, REG_R10, 1); theEmitter->emitIns_R_R_I(INS_ldr, EA_2BYTE, REG_V7, REG_R10, 2); theEmitter->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_V7, REG_R10, 4); theEmitter->emitIns_R_R_I(INS_ldr, EA_8BYTE, REG_V7, REG_R10, 8); theEmitter->emitIns_R_R_I(INS_ldr, EA_16BYTE, REG_V7, REG_R10, 16); // ldr/str Vt, [reg],cns -- post-indexed (unscaled) // ldr/str Vt, [reg+cns]! -- post-indexed (unscaled) theEmitter->emitIns_R_R_I(INS_ldr, EA_1BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX); theEmitter->emitIns_R_R_I(INS_ldr, EA_2BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX); theEmitter->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX); theEmitter->emitIns_R_R_I(INS_ldr, EA_8BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX); theEmitter->emitIns_R_R_I(INS_ldr, EA_16BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX); theEmitter->emitIns_R_R_I(INS_ldr, EA_1BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX); theEmitter->emitIns_R_R_I(INS_ldr, EA_2BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX); theEmitter->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX); theEmitter->emitIns_R_R_I(INS_ldr, EA_8BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX); theEmitter->emitIns_R_R_I(INS_ldr, EA_16BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX); theEmitter->emitIns_R_R_I(INS_str, EA_1BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX); theEmitter->emitIns_R_R_I(INS_str, EA_2BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX); theEmitter->emitIns_R_R_I(INS_str, EA_4BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX); theEmitter->emitIns_R_R_I(INS_str, EA_8BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX); theEmitter->emitIns_R_R_I(INS_str, EA_16BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX); theEmitter->emitIns_R_R_I(INS_str, EA_1BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX); theEmitter->emitIns_R_R_I(INS_str, EA_2BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX); theEmitter->emitIns_R_R_I(INS_str, EA_4BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX); theEmitter->emitIns_R_R_I(INS_str, EA_8BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX); theEmitter->emitIns_R_R_I(INS_str, EA_16BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX); theEmitter->emitIns_R_R_I(INS_ldur, EA_1BYTE, REG_V8, REG_R9, 2); theEmitter->emitIns_R_R_I(INS_ldur, EA_2BYTE, REG_V8, REG_R9, 3); theEmitter->emitIns_R_R_I(INS_ldur, EA_4BYTE, REG_V8, REG_R9, 5); theEmitter->emitIns_R_R_I(INS_ldur, EA_8BYTE, REG_V8, REG_R9, 9); theEmitter->emitIns_R_R_I(INS_ldur, EA_16BYTE, REG_V8, REG_R9, 17); theEmitter->emitIns_R_R_I(INS_stur, EA_1BYTE, REG_V7, REG_R10, 2); theEmitter->emitIns_R_R_I(INS_stur, EA_2BYTE, REG_V7, REG_R10, 3); theEmitter->emitIns_R_R_I(INS_stur, EA_4BYTE, REG_V7, REG_R10, 5); theEmitter->emitIns_R_R_I(INS_stur, EA_8BYTE, REG_V7, REG_R10, 9); theEmitter->emitIns_R_R_I(INS_stur, EA_16BYTE, REG_V7, REG_R10, 17); // load/store pair theEmitter->emitIns_R_R_R(INS_ldnp, EA_8BYTE, REG_V0, REG_V1, REG_R10); theEmitter->emitIns_R_R_R_I(INS_stnp, EA_8BYTE, REG_V1, REG_V2, REG_R10, 0); theEmitter->emitIns_R_R_R_I(INS_ldnp, EA_8BYTE, REG_V2, REG_V3, REG_R10, 8); theEmitter->emitIns_R_R_R_I(INS_stnp, EA_8BYTE, REG_V3, REG_V4, REG_R10, 24); theEmitter->emitIns_R_R_R(INS_ldnp, EA_4BYTE, REG_V4, REG_V5, REG_SP); theEmitter->emitIns_R_R_R_I(INS_stnp, EA_4BYTE, REG_V5, REG_V6, REG_SP, 0); theEmitter->emitIns_R_R_R_I(INS_ldnp, EA_4BYTE, REG_V6, REG_V7, REG_SP, 4); theEmitter->emitIns_R_R_R_I(INS_stnp, EA_4BYTE, REG_V7, REG_V8, REG_SP, 12); theEmitter->emitIns_R_R_R(INS_ldnp, EA_16BYTE, REG_V8, REG_V9, REG_R10); theEmitter->emitIns_R_R_R_I(INS_stnp, EA_16BYTE, REG_V9, REG_V10, REG_R10, 0); theEmitter->emitIns_R_R_R_I(INS_ldnp, EA_16BYTE, REG_V10, REG_V11, REG_R10, 16); theEmitter->emitIns_R_R_R_I(INS_stnp, EA_16BYTE, REG_V11, REG_V12, REG_R10, 48); theEmitter->emitIns_R_R_R(INS_ldp, EA_8BYTE, REG_V0, REG_V1, REG_R10); theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_V1, REG_V2, REG_SP, 0); theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_V2, REG_V3, REG_SP, 8); theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_V3, REG_V4, REG_R10, 16); theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_V4, REG_V5, REG_R10, 24, INS_OPTS_POST_INDEX); theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_V5, REG_V6, REG_SP, 32, INS_OPTS_POST_INDEX); theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_V6, REG_V7, REG_SP, 40, INS_OPTS_PRE_INDEX); theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_V7, REG_V8, REG_R10, 48, INS_OPTS_PRE_INDEX); theEmitter->emitIns_R_R_R(INS_ldp, EA_4BYTE, REG_V0, REG_V1, REG_R10); theEmitter->emitIns_R_R_R_I(INS_stp, EA_4BYTE, REG_V1, REG_V2, REG_SP, 0); theEmitter->emitIns_R_R_R_I(INS_ldp, EA_4BYTE, REG_V2, REG_V3, REG_SP, 4); theEmitter->emitIns_R_R_R_I(INS_stp, EA_4BYTE, REG_V3, REG_V4, REG_R10, 8); theEmitter->emitIns_R_R_R_I(INS_ldp, EA_4BYTE, REG_V4, REG_V5, REG_R10, 12, INS_OPTS_POST_INDEX); theEmitter->emitIns_R_R_R_I(INS_stp, EA_4BYTE, REG_V5, REG_V6, REG_SP, 16, INS_OPTS_POST_INDEX); theEmitter->emitIns_R_R_R_I(INS_ldp, EA_4BYTE, REG_V6, REG_V7, REG_SP, 20, INS_OPTS_PRE_INDEX); theEmitter->emitIns_R_R_R_I(INS_stp, EA_4BYTE, REG_V7, REG_V8, REG_R10, 24, INS_OPTS_PRE_INDEX); theEmitter->emitIns_R_R_R(INS_ldp, EA_16BYTE, REG_V0, REG_V1, REG_R10); theEmitter->emitIns_R_R_R_I(INS_stp, EA_16BYTE, REG_V1, REG_V2, REG_SP, 0); theEmitter->emitIns_R_R_R_I(INS_ldp, EA_16BYTE, REG_V2, REG_V3, REG_SP, 16); theEmitter->emitIns_R_R_R_I(INS_stp, EA_16BYTE, REG_V3, REG_V4, REG_R10, 32); theEmitter->emitIns_R_R_R_I(INS_ldp, EA_16BYTE, REG_V4, REG_V5, REG_R10, 48, INS_OPTS_POST_INDEX); theEmitter->emitIns_R_R_R_I(INS_stp, EA_16BYTE, REG_V5, REG_V6, REG_SP, 64, INS_OPTS_POST_INDEX); theEmitter->emitIns_R_R_R_I(INS_ldp, EA_16BYTE, REG_V6, REG_V7, REG_SP, 80, INS_OPTS_PRE_INDEX); theEmitter->emitIns_R_R_R_I(INS_stp, EA_16BYTE, REG_V7, REG_V8, REG_R10, 96, INS_OPTS_PRE_INDEX); // LDR (register) theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V1, REG_SP, REG_R9); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V2, REG_R7, REG_R9, INS_OPTS_LSL); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V3, REG_R7, REG_R9, INS_OPTS_LSL, 3); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V4, REG_R7, REG_R9, INS_OPTS_SXTW); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V5, REG_R7, REG_R9, INS_OPTS_SXTW, 3); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V6, REG_SP, REG_R9, INS_OPTS_UXTW); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V7, REG_R7, REG_R9, INS_OPTS_UXTW, 3); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V8, REG_R7, REG_R9, INS_OPTS_SXTX); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V9, REG_R7, REG_R9, INS_OPTS_SXTX, 3); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V10, REG_R7, REG_R9, INS_OPTS_UXTX); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V11, REG_SP, REG_R9, INS_OPTS_UXTX, 3); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V1, REG_SP, REG_R9); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V2, REG_R7, REG_R9, INS_OPTS_LSL); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V3, REG_R7, REG_R9, INS_OPTS_LSL, 2); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V4, REG_R7, REG_R9, INS_OPTS_SXTW); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V5, REG_R7, REG_R9, INS_OPTS_SXTW, 2); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V6, REG_SP, REG_R9, INS_OPTS_UXTW); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V7, REG_R7, REG_R9, INS_OPTS_UXTW, 2); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V8, REG_R7, REG_R9, INS_OPTS_SXTX); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V9, REG_R7, REG_R9, INS_OPTS_SXTX, 2); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V10, REG_R7, REG_R9, INS_OPTS_UXTX); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V11, REG_SP, REG_R9, INS_OPTS_UXTX, 2); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V1, REG_SP, REG_R9); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V2, REG_R7, REG_R9, INS_OPTS_LSL); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V3, REG_R7, REG_R9, INS_OPTS_LSL, 4); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V4, REG_R7, REG_R9, INS_OPTS_SXTW); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V5, REG_R7, REG_R9, INS_OPTS_SXTW, 4); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V6, REG_SP, REG_R9, INS_OPTS_UXTW); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V7, REG_R7, REG_R9, INS_OPTS_UXTW, 4); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V8, REG_R7, REG_R9, INS_OPTS_SXTX); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V9, REG_R7, REG_R9, INS_OPTS_SXTX, 4); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V10, REG_R7, REG_R9, INS_OPTS_UXTX); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V11, REG_SP, REG_R9, INS_OPTS_UXTX, 4); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V1, REG_SP, REG_R9); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V2, REG_R7, REG_R9, INS_OPTS_LSL); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V3, REG_R7, REG_R9, INS_OPTS_LSL, 1); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V4, REG_R7, REG_R9, INS_OPTS_SXTW); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V5, REG_R7, REG_R9, INS_OPTS_SXTW, 1); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V6, REG_SP, REG_R9, INS_OPTS_UXTW); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V7, REG_R7, REG_R9, INS_OPTS_UXTW, 1); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V8, REG_R7, REG_R9, INS_OPTS_SXTX); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V9, REG_R7, REG_R9, INS_OPTS_SXTX, 1); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V10, REG_R7, REG_R9, INS_OPTS_UXTX); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V11, REG_SP, REG_R9, INS_OPTS_UXTX, 1); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_1BYTE, REG_V1, REG_R7, REG_R9); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_1BYTE, REG_V2, REG_SP, REG_R9, INS_OPTS_SXTW); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_1BYTE, REG_V3, REG_R7, REG_R9, INS_OPTS_UXTW); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_1BYTE, REG_V4, REG_SP, REG_R9, INS_OPTS_SXTX); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_1BYTE, REG_V5, REG_R7, REG_R9, INS_OPTS_UXTX); #endif // ALL_ARM64_EMITTER_UNIT_TESTS #ifdef ALL_ARM64_EMITTER_UNIT_TESTS // // R_R mov and aliases for mov // // mov vector to vector theEmitter->emitIns_R_R(INS_mov, EA_8BYTE, REG_V0, REG_V1); theEmitter->emitIns_R_R(INS_mov, EA_16BYTE, REG_V2, REG_V3); theEmitter->emitIns_R_R(INS_mov, EA_4BYTE, REG_V12, REG_V13); theEmitter->emitIns_R_R(INS_mov, EA_2BYTE, REG_V14, REG_V15); theEmitter->emitIns_R_R(INS_mov, EA_1BYTE, REG_V16, REG_V17); // mov vector to general theEmitter->emitIns_R_R(INS_mov, EA_8BYTE, REG_R0, REG_V4); theEmitter->emitIns_R_R(INS_mov, EA_4BYTE, REG_R1, REG_V5); theEmitter->emitIns_R_R(INS_mov, EA_2BYTE, REG_R2, REG_V6); theEmitter->emitIns_R_R(INS_mov, EA_1BYTE, REG_R3, REG_V7); // mov general to vector theEmitter->emitIns_R_R(INS_mov, EA_8BYTE, REG_V8, REG_R4); theEmitter->emitIns_R_R(INS_mov, EA_4BYTE, REG_V9, REG_R5); theEmitter->emitIns_R_R(INS_mov, EA_2BYTE, REG_V10, REG_R6); theEmitter->emitIns_R_R(INS_mov, EA_1BYTE, REG_V11, REG_R7); // mov vector[index] to vector theEmitter->emitIns_R_R_I(INS_mov, EA_8BYTE, REG_V0, REG_V1, 1); theEmitter->emitIns_R_R_I(INS_mov, EA_4BYTE, REG_V2, REG_V3, 3); theEmitter->emitIns_R_R_I(INS_mov, EA_2BYTE, REG_V4, REG_V5, 7); theEmitter->emitIns_R_R_I(INS_mov, EA_1BYTE, REG_V6, REG_V7, 15); // mov to general from vector[index] theEmitter->emitIns_R_R_I(INS_mov, EA_8BYTE, REG_R8, REG_V16, 1); theEmitter->emitIns_R_R_I(INS_mov, EA_4BYTE, REG_R9, REG_V17, 2); theEmitter->emitIns_R_R_I(INS_mov, EA_2BYTE, REG_R10, REG_V18, 3); theEmitter->emitIns_R_R_I(INS_mov, EA_1BYTE, REG_R11, REG_V19, 4); // mov to vector[index] from general theEmitter->emitIns_R_R_I(INS_mov, EA_8BYTE, REG_V20, REG_R12, 1); theEmitter->emitIns_R_R_I(INS_mov, EA_4BYTE, REG_V21, REG_R13, 2); theEmitter->emitIns_R_R_I(INS_mov, EA_2BYTE, REG_V22, REG_R14, 6); theEmitter->emitIns_R_R_I(INS_mov, EA_1BYTE, REG_V23, REG_R15, 8); // mov vector[index] to vector[index2] theEmitter->emitIns_R_R_I_I(INS_mov, EA_8BYTE, REG_V8, REG_V9, 1, 0); theEmitter->emitIns_R_R_I_I(INS_mov, EA_4BYTE, REG_V10, REG_V11, 2, 1); theEmitter->emitIns_R_R_I_I(INS_mov, EA_2BYTE, REG_V12, REG_V13, 5, 2); theEmitter->emitIns_R_R_I_I(INS_mov, EA_1BYTE, REG_V14, REG_V15, 12, 3); ////////////////////////////////////////////////////////////////////////////////// // mov/dup scalar theEmitter->emitIns_R_R_I(INS_dup, EA_8BYTE, REG_V24, REG_V25, 1); theEmitter->emitIns_R_R_I(INS_dup, EA_4BYTE, REG_V26, REG_V27, 3); theEmitter->emitIns_R_R_I(INS_dup, EA_2BYTE, REG_V28, REG_V29, 7); theEmitter->emitIns_R_R_I(INS_dup, EA_1BYTE, REG_V30, REG_V31, 15); // mov/ins vector element theEmitter->emitIns_R_R_I_I(INS_ins, EA_8BYTE, REG_V0, REG_V1, 0, 1); theEmitter->emitIns_R_R_I_I(INS_ins, EA_4BYTE, REG_V2, REG_V3, 2, 2); theEmitter->emitIns_R_R_I_I(INS_ins, EA_2BYTE, REG_V4, REG_V5, 4, 3); theEmitter->emitIns_R_R_I_I(INS_ins, EA_1BYTE, REG_V6, REG_V7, 8, 4); // umov to general from vector element theEmitter->emitIns_R_R_I(INS_umov, EA_8BYTE, REG_R0, REG_V8, 1); theEmitter->emitIns_R_R_I(INS_umov, EA_4BYTE, REG_R1, REG_V9, 2); theEmitter->emitIns_R_R_I(INS_umov, EA_2BYTE, REG_R2, REG_V10, 4); theEmitter->emitIns_R_R_I(INS_umov, EA_1BYTE, REG_R3, REG_V11, 8); // ins to vector element from general theEmitter->emitIns_R_R_I(INS_ins, EA_8BYTE, REG_V12, REG_R4, 1); theEmitter->emitIns_R_R_I(INS_ins, EA_4BYTE, REG_V13, REG_R5, 3); theEmitter->emitIns_R_R_I(INS_ins, EA_2BYTE, REG_V14, REG_R6, 7); theEmitter->emitIns_R_R_I(INS_ins, EA_1BYTE, REG_V15, REG_R7, 15); // smov to general from vector element theEmitter->emitIns_R_R_I(INS_smov, EA_4BYTE, REG_R5, REG_V17, 2); theEmitter->emitIns_R_R_I(INS_smov, EA_2BYTE, REG_R6, REG_V18, 4); theEmitter->emitIns_R_R_I(INS_smov, EA_1BYTE, REG_R7, REG_V19, 8); #endif // ALL_ARM64_EMITTER_UNIT_TESTS #ifdef ALL_ARM64_EMITTER_UNIT_TESTS // // R_I movi and mvni // // movi imm8 (vector) theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V0, 0x00, INS_OPTS_8B); theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V1, 0xFF, INS_OPTS_8B); theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V2, 0x00, INS_OPTS_16B); theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V3, 0xFF, INS_OPTS_16B); theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V4, 0x007F, INS_OPTS_4H); theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V5, 0x7F00, INS_OPTS_4H); // LSL 8 theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V6, 0x003F, INS_OPTS_8H); theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V7, 0x3F00, INS_OPTS_8H); // LSL 8 theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V8, 0x1F, INS_OPTS_2S); theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V9, 0x1F00, INS_OPTS_2S); // LSL 8 theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V10, 0x1F0000, INS_OPTS_2S); // LSL 16 theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V11, 0x1F000000, INS_OPTS_2S); // LSL 24 theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V12, 0x1FFF, INS_OPTS_2S); // MSL 8 theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V13, 0x1FFFFF, INS_OPTS_2S); // MSL 16 theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V14, 0x37, INS_OPTS_4S); theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V15, 0x3700, INS_OPTS_4S); // LSL 8 theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V16, 0x370000, INS_OPTS_4S); // LSL 16 theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V17, 0x37000000, INS_OPTS_4S); // LSL 24 theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V18, 0x37FF, INS_OPTS_4S); // MSL 8 theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V19, 0x37FFFF, INS_OPTS_4S); // MSL 16 theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V20, 0xFF80, INS_OPTS_4H); // mvni theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V21, 0xFFC0, INS_OPTS_8H); // mvni theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V22, 0xFFFFFFE0, INS_OPTS_2S); // mvni theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V23, 0xFFFFF0FF, INS_OPTS_4S); // mvni LSL 8 theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V24, 0xFFF8FFFF, INS_OPTS_2S); // mvni LSL 16 theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V25, 0xFCFFFFFF, INS_OPTS_4S); // mvni LSL 24 theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V26, 0xFFFFFE00, INS_OPTS_2S); // mvni MSL 8 theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V27, 0xFFFC0000, INS_OPTS_4S); // mvni MSL 16 theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V28, 0x00FF00FF00FF00FF, INS_OPTS_1D); theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V29, 0x00FFFF0000FFFF00, INS_OPTS_2D); theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V30, 0xFF000000FF000000); theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V31, 0x0, INS_OPTS_2D); theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V0, 0x0022, INS_OPTS_4H); theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V1, 0x2200, INS_OPTS_4H); // LSL 8 theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V2, 0x0033, INS_OPTS_8H); theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V3, 0x3300, INS_OPTS_8H); // LSL 8 theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V4, 0x42, INS_OPTS_2S); theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V5, 0x4200, INS_OPTS_2S); // LSL 8 theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V6, 0x420000, INS_OPTS_2S); // LSL 16 theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V7, 0x42000000, INS_OPTS_2S); // LSL 24 theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V8, 0x42FF, INS_OPTS_2S); // MSL 8 theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V9, 0x42FFFF, INS_OPTS_2S); // MSL 16 theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V10, 0x5D, INS_OPTS_4S); theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V11, 0x5D00, INS_OPTS_4S); // LSL 8 theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V12, 0x5D0000, INS_OPTS_4S); // LSL 16 theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V13, 0x5D000000, INS_OPTS_4S); // LSL 24 theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V14, 0x5DFF, INS_OPTS_4S); // MSL 8 theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V15, 0x5DFFFF, INS_OPTS_4S); // MSL 16 #endif // ALL_ARM64_EMITTER_UNIT_TESTS #ifdef ALL_ARM64_EMITTER_UNIT_TESTS // // R_I orr/bic vector immediate // theEmitter->emitIns_R_I(INS_orr, EA_8BYTE, REG_V0, 0x0022, INS_OPTS_4H); theEmitter->emitIns_R_I(INS_orr, EA_8BYTE, REG_V1, 0x2200, INS_OPTS_4H); // LSL 8 theEmitter->emitIns_R_I(INS_orr, EA_16BYTE, REG_V2, 0x0033, INS_OPTS_8H); theEmitter->emitIns_R_I(INS_orr, EA_16BYTE, REG_V3, 0x3300, INS_OPTS_8H); // LSL 8 theEmitter->emitIns_R_I(INS_orr, EA_8BYTE, REG_V4, 0x42, INS_OPTS_2S); theEmitter->emitIns_R_I(INS_orr, EA_8BYTE, REG_V5, 0x4200, INS_OPTS_2S); // LSL 8 theEmitter->emitIns_R_I(INS_orr, EA_8BYTE, REG_V6, 0x420000, INS_OPTS_2S); // LSL 16 theEmitter->emitIns_R_I(INS_orr, EA_8BYTE, REG_V7, 0x42000000, INS_OPTS_2S); // LSL 24 theEmitter->emitIns_R_I(INS_orr, EA_16BYTE, REG_V10, 0x5D, INS_OPTS_4S); theEmitter->emitIns_R_I(INS_orr, EA_16BYTE, REG_V11, 0x5D00, INS_OPTS_4S); // LSL 8 theEmitter->emitIns_R_I(INS_orr, EA_16BYTE, REG_V12, 0x5D0000, INS_OPTS_4S); // LSL 16 theEmitter->emitIns_R_I(INS_orr, EA_16BYTE, REG_V13, 0x5D000000, INS_OPTS_4S); // LSL 24 theEmitter->emitIns_R_I(INS_bic, EA_8BYTE, REG_V0, 0x0022, INS_OPTS_4H); theEmitter->emitIns_R_I(INS_bic, EA_8BYTE, REG_V1, 0x2200, INS_OPTS_4H); // LSL 8 theEmitter->emitIns_R_I(INS_bic, EA_16BYTE, REG_V2, 0x0033, INS_OPTS_8H); theEmitter->emitIns_R_I(INS_bic, EA_16BYTE, REG_V3, 0x3300, INS_OPTS_8H); // LSL 8 theEmitter->emitIns_R_I(INS_bic, EA_8BYTE, REG_V4, 0x42, INS_OPTS_2S); theEmitter->emitIns_R_I(INS_bic, EA_8BYTE, REG_V5, 0x4200, INS_OPTS_2S); // LSL 8 theEmitter->emitIns_R_I(INS_bic, EA_8BYTE, REG_V6, 0x420000, INS_OPTS_2S); // LSL 16 theEmitter->emitIns_R_I(INS_bic, EA_8BYTE, REG_V7, 0x42000000, INS_OPTS_2S); // LSL 24 theEmitter->emitIns_R_I(INS_bic, EA_16BYTE, REG_V10, 0x5D, INS_OPTS_4S); theEmitter->emitIns_R_I(INS_bic, EA_16BYTE, REG_V11, 0x5D00, INS_OPTS_4S); // LSL 8 theEmitter->emitIns_R_I(INS_bic, EA_16BYTE, REG_V12, 0x5D0000, INS_OPTS_4S); // LSL 16 theEmitter->emitIns_R_I(INS_bic, EA_16BYTE, REG_V13, 0x5D000000, INS_OPTS_4S); // LSL 24 #endif // ALL_ARM64_EMITTER_UNIT_TESTS #ifdef ALL_ARM64_EMITTER_UNIT_TESTS // // R_F cmp/fmov immediate // // fmov imm8 (scalar) theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V14, 1.0); theEmitter->emitIns_R_F(INS_fmov, EA_4BYTE, REG_V15, -1.0); theEmitter->emitIns_R_F(INS_fmov, EA_4BYTE, REG_V0, 2.0); // encodes imm8 == 0 theEmitter->emitIns_R_F(INS_fmov, EA_4BYTE, REG_V16, 10.0); theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V17, -10.0); theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V18, 31); // Largest encodable value theEmitter->emitIns_R_F(INS_fmov, EA_4BYTE, REG_V19, -31); theEmitter->emitIns_R_F(INS_fmov, EA_4BYTE, REG_V20, 1.25); theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V21, -1.25); theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V22, 0.125); // Smallest encodable value theEmitter->emitIns_R_F(INS_fmov, EA_4BYTE, REG_V23, -0.125); // fmov imm8 (vector) theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V0, 2.0, INS_OPTS_2S); theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V24, 1.0, INS_OPTS_2S); theEmitter->emitIns_R_F(INS_fmov, EA_16BYTE, REG_V25, 1.0, INS_OPTS_4S); theEmitter->emitIns_R_F(INS_fmov, EA_16BYTE, REG_V26, 1.0, INS_OPTS_2D); theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V27, -10.0, INS_OPTS_2S); theEmitter->emitIns_R_F(INS_fmov, EA_16BYTE, REG_V28, -10.0, INS_OPTS_4S); theEmitter->emitIns_R_F(INS_fmov, EA_16BYTE, REG_V29, -10.0, INS_OPTS_2D); theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V30, 31.0, INS_OPTS_2S); theEmitter->emitIns_R_F(INS_fmov, EA_16BYTE, REG_V31, 31.0, INS_OPTS_4S); theEmitter->emitIns_R_F(INS_fmov, EA_16BYTE, REG_V0, 31.0, INS_OPTS_2D); theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V1, -0.125, INS_OPTS_2S); theEmitter->emitIns_R_F(INS_fmov, EA_16BYTE, REG_V2, -0.125, INS_OPTS_4S); theEmitter->emitIns_R_F(INS_fmov, EA_16BYTE, REG_V3, -0.125, INS_OPTS_2D); // fcmp with 0.0 theEmitter->emitIns_R_F(INS_fcmp, EA_8BYTE, REG_V12, 0.0); theEmitter->emitIns_R_F(INS_fcmp, EA_4BYTE, REG_V13, 0.0); theEmitter->emitIns_R_F(INS_fcmpe, EA_8BYTE, REG_V14, 0.0); theEmitter->emitIns_R_F(INS_fcmpe, EA_4BYTE, REG_V15, 0.0); #endif // ALL_ARM64_EMITTER_UNIT_TESTS #ifdef ALL_ARM64_EMITTER_UNIT_TESTS // // R_R fmov/fcmp/fcvt // // fmov to vector to vector theEmitter->emitIns_R_R(INS_fmov, EA_8BYTE, REG_V0, REG_V2); theEmitter->emitIns_R_R(INS_fmov, EA_4BYTE, REG_V1, REG_V3); // fmov to vector to general theEmitter->emitIns_R_R(INS_fmov, EA_8BYTE, REG_R0, REG_V4); theEmitter->emitIns_R_R(INS_fmov, EA_4BYTE, REG_R1, REG_V5); // using the optional conversion specifier theEmitter->emitIns_R_R(INS_fmov, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_D_TO_8BYTE); theEmitter->emitIns_R_R(INS_fmov, EA_4BYTE, REG_R3, REG_V7, INS_OPTS_S_TO_4BYTE); // fmov to general to vector theEmitter->emitIns_R_R(INS_fmov, EA_8BYTE, REG_V8, REG_R4); theEmitter->emitIns_R_R(INS_fmov, EA_4BYTE, REG_V9, REG_R5); // using the optional conversion specifier theEmitter->emitIns_R_R(INS_fmov, EA_8BYTE, REG_V10, REG_R6, INS_OPTS_8BYTE_TO_D); theEmitter->emitIns_R_R(INS_fmov, EA_4BYTE, REG_V11, REG_R7, INS_OPTS_4BYTE_TO_S); // fcmp/fcmpe theEmitter->emitIns_R_R(INS_fcmp, EA_8BYTE, REG_V8, REG_V16); theEmitter->emitIns_R_R(INS_fcmp, EA_4BYTE, REG_V9, REG_V17); theEmitter->emitIns_R_R(INS_fcmpe, EA_8BYTE, REG_V10, REG_V18); theEmitter->emitIns_R_R(INS_fcmpe, EA_4BYTE, REG_V11, REG_V19); // fcvt theEmitter->emitIns_R_R(INS_fcvt, EA_8BYTE, REG_V24, REG_V25, INS_OPTS_S_TO_D); // Single to Double theEmitter->emitIns_R_R(INS_fcvt, EA_4BYTE, REG_V26, REG_V27, INS_OPTS_D_TO_S); // Double to Single theEmitter->emitIns_R_R(INS_fcvt, EA_4BYTE, REG_V1, REG_V2, INS_OPTS_H_TO_S); theEmitter->emitIns_R_R(INS_fcvt, EA_8BYTE, REG_V3, REG_V4, INS_OPTS_H_TO_D); theEmitter->emitIns_R_R(INS_fcvt, EA_2BYTE, REG_V5, REG_V6, INS_OPTS_S_TO_H); theEmitter->emitIns_R_R(INS_fcvt, EA_2BYTE, REG_V7, REG_V8, INS_OPTS_D_TO_H); #endif // ALL_ARM64_EMITTER_UNIT_TESTS #ifdef ALL_ARM64_EMITTER_UNIT_TESTS // // R_R floating point conversions // // fcvtas scalar theEmitter->emitIns_R_R(INS_fcvtas, EA_4BYTE, REG_V0, REG_V1); theEmitter->emitIns_R_R(INS_fcvtas, EA_8BYTE, REG_V2, REG_V3); // fcvtas scalar to general theEmitter->emitIns_R_R(INS_fcvtas, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE); theEmitter->emitIns_R_R(INS_fcvtas, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE); theEmitter->emitIns_R_R(INS_fcvtas, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE); theEmitter->emitIns_R_R(INS_fcvtas, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE); // fcvtas vector theEmitter->emitIns_R_R(INS_fcvtas, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S); theEmitter->emitIns_R_R(INS_fcvtas, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S); theEmitter->emitIns_R_R(INS_fcvtas, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D); // fcvtau scalar theEmitter->emitIns_R_R(INS_fcvtau, EA_4BYTE, REG_V0, REG_V1); theEmitter->emitIns_R_R(INS_fcvtau, EA_8BYTE, REG_V2, REG_V3); // fcvtau scalar to general theEmitter->emitIns_R_R(INS_fcvtau, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE); theEmitter->emitIns_R_R(INS_fcvtau, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE); theEmitter->emitIns_R_R(INS_fcvtau, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE); theEmitter->emitIns_R_R(INS_fcvtau, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE); // fcvtau vector theEmitter->emitIns_R_R(INS_fcvtau, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S); theEmitter->emitIns_R_R(INS_fcvtau, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S); theEmitter->emitIns_R_R(INS_fcvtau, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D); //////////////////////////////////////////////////////////////////////////////// // fcvtms scalar theEmitter->emitIns_R_R(INS_fcvtms, EA_4BYTE, REG_V0, REG_V1); theEmitter->emitIns_R_R(INS_fcvtms, EA_8BYTE, REG_V2, REG_V3); // fcvtms scalar to general theEmitter->emitIns_R_R(INS_fcvtms, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE); theEmitter->emitIns_R_R(INS_fcvtms, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE); theEmitter->emitIns_R_R(INS_fcvtms, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE); theEmitter->emitIns_R_R(INS_fcvtms, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE); // fcvtms vector theEmitter->emitIns_R_R(INS_fcvtms, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S); theEmitter->emitIns_R_R(INS_fcvtms, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S); theEmitter->emitIns_R_R(INS_fcvtms, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D); // fcvtmu scalar theEmitter->emitIns_R_R(INS_fcvtmu, EA_4BYTE, REG_V0, REG_V1); theEmitter->emitIns_R_R(INS_fcvtmu, EA_8BYTE, REG_V2, REG_V3); // fcvtmu scalar to general theEmitter->emitIns_R_R(INS_fcvtmu, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE); theEmitter->emitIns_R_R(INS_fcvtmu, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE); theEmitter->emitIns_R_R(INS_fcvtmu, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE); theEmitter->emitIns_R_R(INS_fcvtmu, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE); // fcvtmu vector theEmitter->emitIns_R_R(INS_fcvtmu, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S); theEmitter->emitIns_R_R(INS_fcvtmu, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S); theEmitter->emitIns_R_R(INS_fcvtmu, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D); //////////////////////////////////////////////////////////////////////////////// // fcvtns scalar theEmitter->emitIns_R_R(INS_fcvtns, EA_4BYTE, REG_V0, REG_V1); theEmitter->emitIns_R_R(INS_fcvtns, EA_8BYTE, REG_V2, REG_V3); // fcvtns scalar to general theEmitter->emitIns_R_R(INS_fcvtns, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE); theEmitter->emitIns_R_R(INS_fcvtns, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE); theEmitter->emitIns_R_R(INS_fcvtns, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE); theEmitter->emitIns_R_R(INS_fcvtns, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE); // fcvtns vector theEmitter->emitIns_R_R(INS_fcvtns, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S); theEmitter->emitIns_R_R(INS_fcvtns, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S); theEmitter->emitIns_R_R(INS_fcvtns, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D); // fcvtnu scalar theEmitter->emitIns_R_R(INS_fcvtnu, EA_4BYTE, REG_V0, REG_V1); theEmitter->emitIns_R_R(INS_fcvtnu, EA_8BYTE, REG_V2, REG_V3); // fcvtnu scalar to general theEmitter->emitIns_R_R(INS_fcvtnu, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE); theEmitter->emitIns_R_R(INS_fcvtnu, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE); theEmitter->emitIns_R_R(INS_fcvtnu, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE); theEmitter->emitIns_R_R(INS_fcvtnu, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE); // fcvtnu vector theEmitter->emitIns_R_R(INS_fcvtnu, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S); theEmitter->emitIns_R_R(INS_fcvtnu, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S); theEmitter->emitIns_R_R(INS_fcvtnu, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D); //////////////////////////////////////////////////////////////////////////////// // fcvtps scalar theEmitter->emitIns_R_R(INS_fcvtps, EA_4BYTE, REG_V0, REG_V1); theEmitter->emitIns_R_R(INS_fcvtps, EA_8BYTE, REG_V2, REG_V3); // fcvtps scalar to general theEmitter->emitIns_R_R(INS_fcvtps, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE); theEmitter->emitIns_R_R(INS_fcvtps, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE); theEmitter->emitIns_R_R(INS_fcvtps, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE); theEmitter->emitIns_R_R(INS_fcvtps, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE); // fcvtps vector theEmitter->emitIns_R_R(INS_fcvtps, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S); theEmitter->emitIns_R_R(INS_fcvtps, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S); theEmitter->emitIns_R_R(INS_fcvtps, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D); // fcvtpu scalar theEmitter->emitIns_R_R(INS_fcvtpu, EA_4BYTE, REG_V0, REG_V1); theEmitter->emitIns_R_R(INS_fcvtpu, EA_8BYTE, REG_V2, REG_V3); // fcvtpu scalar to general theEmitter->emitIns_R_R(INS_fcvtpu, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE); theEmitter->emitIns_R_R(INS_fcvtpu, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE); theEmitter->emitIns_R_R(INS_fcvtpu, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE); theEmitter->emitIns_R_R(INS_fcvtpu, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE); // fcvtpu vector theEmitter->emitIns_R_R(INS_fcvtpu, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S); theEmitter->emitIns_R_R(INS_fcvtpu, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S); theEmitter->emitIns_R_R(INS_fcvtpu, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D); //////////////////////////////////////////////////////////////////////////////// // fcvtzs scalar theEmitter->emitIns_R_R(INS_fcvtzs, EA_4BYTE, REG_V0, REG_V1); theEmitter->emitIns_R_R(INS_fcvtzs, EA_8BYTE, REG_V2, REG_V3); // fcvtzs scalar to general theEmitter->emitIns_R_R(INS_fcvtzs, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE); theEmitter->emitIns_R_R(INS_fcvtzs, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE); theEmitter->emitIns_R_R(INS_fcvtzs, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE); theEmitter->emitIns_R_R(INS_fcvtzs, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE); // fcvtzs vector theEmitter->emitIns_R_R(INS_fcvtzs, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S); theEmitter->emitIns_R_R(INS_fcvtzs, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S); theEmitter->emitIns_R_R(INS_fcvtzs, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D); // fcvtzu scalar theEmitter->emitIns_R_R(INS_fcvtzu, EA_4BYTE, REG_V0, REG_V1); theEmitter->emitIns_R_R(INS_fcvtzu, EA_8BYTE, REG_V2, REG_V3); // fcvtzu scalar to general theEmitter->emitIns_R_R(INS_fcvtzu, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE); theEmitter->emitIns_R_R(INS_fcvtzu, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE); theEmitter->emitIns_R_R(INS_fcvtzu, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE); theEmitter->emitIns_R_R(INS_fcvtzu, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE); // fcvtzu vector theEmitter->emitIns_R_R(INS_fcvtzu, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S); theEmitter->emitIns_R_R(INS_fcvtzu, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S); theEmitter->emitIns_R_R(INS_fcvtzu, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D); //////////////////////////////////////////////////////////////////////////////// // scvtf scalar theEmitter->emitIns_R_R(INS_scvtf, EA_4BYTE, REG_V0, REG_V1); theEmitter->emitIns_R_R(INS_scvtf, EA_8BYTE, REG_V2, REG_V3); // scvtf scalar from general theEmitter->emitIns_R_R(INS_scvtf, EA_4BYTE, REG_V4, REG_R0, INS_OPTS_4BYTE_TO_S); theEmitter->emitIns_R_R(INS_scvtf, EA_4BYTE, REG_V5, REG_R1, INS_OPTS_8BYTE_TO_S); theEmitter->emitIns_R_R(INS_scvtf, EA_8BYTE, REG_V6, REG_R2, INS_OPTS_4BYTE_TO_D); theEmitter->emitIns_R_R(INS_scvtf, EA_8BYTE, REG_V7, REG_R3, INS_OPTS_8BYTE_TO_D); // scvtf vector theEmitter->emitIns_R_R(INS_scvtf, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S); theEmitter->emitIns_R_R(INS_scvtf, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S); theEmitter->emitIns_R_R(INS_scvtf, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D); // ucvtf scalar theEmitter->emitIns_R_R(INS_ucvtf, EA_4BYTE, REG_V0, REG_V1); theEmitter->emitIns_R_R(INS_ucvtf, EA_8BYTE, REG_V2, REG_V3); // ucvtf scalar from general theEmitter->emitIns_R_R(INS_ucvtf, EA_4BYTE, REG_V4, REG_R0, INS_OPTS_4BYTE_TO_S); theEmitter->emitIns_R_R(INS_ucvtf, EA_4BYTE, REG_V5, REG_R1, INS_OPTS_8BYTE_TO_S); theEmitter->emitIns_R_R(INS_ucvtf, EA_8BYTE, REG_V6, REG_R2, INS_OPTS_4BYTE_TO_D); theEmitter->emitIns_R_R(INS_ucvtf, EA_8BYTE, REG_V7, REG_R3, INS_OPTS_8BYTE_TO_D); // ucvtf vector theEmitter->emitIns_R_R(INS_ucvtf, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S); theEmitter->emitIns_R_R(INS_ucvtf, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S); theEmitter->emitIns_R_R(INS_ucvtf, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D); #endif // ALL_ARM64_EMITTER_UNIT_TESTS #ifdef ALL_ARM64_EMITTER_UNIT_TESTS // // R_R floating point operations, one dest, one source // // fabs scalar theEmitter->emitIns_R_R(INS_fabs, EA_4BYTE, REG_V0, REG_V1); theEmitter->emitIns_R_R(INS_fabs, EA_8BYTE, REG_V2, REG_V3); // fabs vector theEmitter->emitIns_R_R(INS_fabs, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S); theEmitter->emitIns_R_R(INS_fabs, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S); theEmitter->emitIns_R_R(INS_fabs, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D); // fneg scalar theEmitter->emitIns_R_R(INS_fneg, EA_4BYTE, REG_V0, REG_V1); theEmitter->emitIns_R_R(INS_fneg, EA_8BYTE, REG_V2, REG_V3); // fneg vector theEmitter->emitIns_R_R(INS_fneg, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S); theEmitter->emitIns_R_R(INS_fneg, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S); theEmitter->emitIns_R_R(INS_fneg, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D); // fsqrt scalar theEmitter->emitIns_R_R(INS_fsqrt, EA_4BYTE, REG_V0, REG_V1); theEmitter->emitIns_R_R(INS_fsqrt, EA_8BYTE, REG_V2, REG_V3); // fsqrt vector theEmitter->emitIns_R_R(INS_fsqrt, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S); theEmitter->emitIns_R_R(INS_fsqrt, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S); theEmitter->emitIns_R_R(INS_fsqrt, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D); genDefineTempLabel(genCreateTempLabel()); // abs scalar theEmitter->emitIns_R_R(INS_abs, EA_8BYTE, REG_V2, REG_V3); // abs vector theEmitter->emitIns_R_R(INS_abs, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B); theEmitter->emitIns_R_R(INS_abs, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B); theEmitter->emitIns_R_R(INS_abs, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H); theEmitter->emitIns_R_R(INS_abs, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H); theEmitter->emitIns_R_R(INS_abs, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S); theEmitter->emitIns_R_R(INS_abs, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S); theEmitter->emitIns_R_R(INS_abs, EA_16BYTE, REG_V16, REG_V17, INS_OPTS_2D); // neg scalar theEmitter->emitIns_R_R(INS_neg, EA_8BYTE, REG_V2, REG_V3); // neg vector theEmitter->emitIns_R_R(INS_neg, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B); theEmitter->emitIns_R_R(INS_neg, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B); theEmitter->emitIns_R_R(INS_neg, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H); theEmitter->emitIns_R_R(INS_neg, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H); theEmitter->emitIns_R_R(INS_neg, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S); theEmitter->emitIns_R_R(INS_neg, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S); theEmitter->emitIns_R_R(INS_neg, EA_16BYTE, REG_V16, REG_V17, INS_OPTS_2D); // mvn vector theEmitter->emitIns_R_R(INS_mvn, EA_8BYTE, REG_V4, REG_V5); theEmitter->emitIns_R_R(INS_mvn, EA_8BYTE, REG_V6, REG_V7, INS_OPTS_8B); theEmitter->emitIns_R_R(INS_mvn, EA_16BYTE, REG_V8, REG_V9); theEmitter->emitIns_R_R(INS_mvn, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_16B); // cnt vector theEmitter->emitIns_R_R(INS_cnt, EA_8BYTE, REG_V22, REG_V23, INS_OPTS_8B); theEmitter->emitIns_R_R(INS_cnt, EA_16BYTE, REG_V24, REG_V25, INS_OPTS_16B); // not vector (the same encoding as mvn) theEmitter->emitIns_R_R(INS_not, EA_8BYTE, REG_V12, REG_V13); theEmitter->emitIns_R_R(INS_not, EA_8BYTE, REG_V14, REG_V15, INS_OPTS_8B); theEmitter->emitIns_R_R(INS_not, EA_16BYTE, REG_V16, REG_V17); theEmitter->emitIns_R_R(INS_not, EA_16BYTE, REG_V18, REG_V19, INS_OPTS_16B); // cls vector theEmitter->emitIns_R_R(INS_cls, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B); theEmitter->emitIns_R_R(INS_cls, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B); theEmitter->emitIns_R_R(INS_cls, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H); theEmitter->emitIns_R_R(INS_cls, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H); theEmitter->emitIns_R_R(INS_cls, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S); theEmitter->emitIns_R_R(INS_cls, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S); // clz vector theEmitter->emitIns_R_R(INS_clz, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B); theEmitter->emitIns_R_R(INS_clz, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B); theEmitter->emitIns_R_R(INS_clz, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H); theEmitter->emitIns_R_R(INS_clz, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H); theEmitter->emitIns_R_R(INS_clz, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S); theEmitter->emitIns_R_R(INS_clz, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S); // rbit vector theEmitter->emitIns_R_R(INS_rbit, EA_8BYTE, REG_V0, REG_V1, INS_OPTS_8B); theEmitter->emitIns_R_R(INS_rbit, EA_16BYTE, REG_V2, REG_V3, INS_OPTS_16B); // rev16 vector theEmitter->emitIns_R_R(INS_rev16, EA_8BYTE, REG_V0, REG_V1, INS_OPTS_8B); theEmitter->emitIns_R_R(INS_rev16, EA_16BYTE, REG_V2, REG_V3, INS_OPTS_16B); // rev32 vector theEmitter->emitIns_R_R(INS_rev32, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B); theEmitter->emitIns_R_R(INS_rev32, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B); theEmitter->emitIns_R_R(INS_rev32, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H); theEmitter->emitIns_R_R(INS_rev32, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H); // rev64 vector theEmitter->emitIns_R_R(INS_rev64, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B); theEmitter->emitIns_R_R(INS_rev64, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B); theEmitter->emitIns_R_R(INS_rev64, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H); theEmitter->emitIns_R_R(INS_rev64, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H); theEmitter->emitIns_R_R(INS_rev64, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S); theEmitter->emitIns_R_R(INS_rev64, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S); #endif #ifdef ALL_ARM64_EMITTER_UNIT_TESTS // // R_R floating point round to int, one dest, one source // // frinta scalar theEmitter->emitIns_R_R(INS_frinta, EA_4BYTE, REG_V0, REG_V1); theEmitter->emitIns_R_R(INS_frinta, EA_8BYTE, REG_V2, REG_V3); // frinta vector theEmitter->emitIns_R_R(INS_frinta, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S); theEmitter->emitIns_R_R(INS_frinta, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S); theEmitter->emitIns_R_R(INS_frinta, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D); // frinti scalar theEmitter->emitIns_R_R(INS_frinti, EA_4BYTE, REG_V0, REG_V1); theEmitter->emitIns_R_R(INS_frinti, EA_8BYTE, REG_V2, REG_V3); // frinti vector theEmitter->emitIns_R_R(INS_frinti, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S); theEmitter->emitIns_R_R(INS_frinti, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S); theEmitter->emitIns_R_R(INS_frinti, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D); // frintm scalar theEmitter->emitIns_R_R(INS_frintm, EA_4BYTE, REG_V0, REG_V1); theEmitter->emitIns_R_R(INS_frintm, EA_8BYTE, REG_V2, REG_V3); // frintm vector theEmitter->emitIns_R_R(INS_frintm, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S); theEmitter->emitIns_R_R(INS_frintm, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S); theEmitter->emitIns_R_R(INS_frintm, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D); // frintn scalar theEmitter->emitIns_R_R(INS_frintn, EA_4BYTE, REG_V0, REG_V1); theEmitter->emitIns_R_R(INS_frintn, EA_8BYTE, REG_V2, REG_V3); // frintn vector theEmitter->emitIns_R_R(INS_frintn, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S); theEmitter->emitIns_R_R(INS_frintn, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S); theEmitter->emitIns_R_R(INS_frintn, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D); // frintp scalar theEmitter->emitIns_R_R(INS_frintp, EA_4BYTE, REG_V0, REG_V1); theEmitter->emitIns_R_R(INS_frintp, EA_8BYTE, REG_V2, REG_V3); // frintp vector theEmitter->emitIns_R_R(INS_frintp, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S); theEmitter->emitIns_R_R(INS_frintp, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S); theEmitter->emitIns_R_R(INS_frintp, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D); // frintx scalar theEmitter->emitIns_R_R(INS_frintx, EA_4BYTE, REG_V0, REG_V1); theEmitter->emitIns_R_R(INS_frintx, EA_8BYTE, REG_V2, REG_V3); // frintx vector theEmitter->emitIns_R_R(INS_frintx, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S); theEmitter->emitIns_R_R(INS_frintx, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S); theEmitter->emitIns_R_R(INS_frintx, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D); // frintz scalar theEmitter->emitIns_R_R(INS_frintz, EA_4BYTE, REG_V0, REG_V1); theEmitter->emitIns_R_R(INS_frintz, EA_8BYTE, REG_V2, REG_V3); // frintz vector theEmitter->emitIns_R_R(INS_frintz, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S); theEmitter->emitIns_R_R(INS_frintz, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S); theEmitter->emitIns_R_R(INS_frintz, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D); #endif // ALL_ARM64_EMITTER_UNIT_TESTS #ifdef ALL_ARM64_EMITTER_UNIT_TESTS // // R_R_R floating point operations, one dest, two source // genDefineTempLabel(genCreateTempLabel()); theEmitter->emitIns_R_R_R(INS_fadd, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE theEmitter->emitIns_R_R_R(INS_fadd, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE theEmitter->emitIns_R_R_R(INS_fadd, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S); theEmitter->emitIns_R_R_R(INS_fadd, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S); theEmitter->emitIns_R_R_R(INS_fadd, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D); theEmitter->emitIns_R_R_R(INS_fsub, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE theEmitter->emitIns_R_R_R(INS_fsub, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE theEmitter->emitIns_R_R_R(INS_fsub, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S); theEmitter->emitIns_R_R_R(INS_fsub, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S); theEmitter->emitIns_R_R_R(INS_fsub, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D); theEmitter->emitIns_R_R_R(INS_fdiv, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE theEmitter->emitIns_R_R_R(INS_fdiv, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE theEmitter->emitIns_R_R_R(INS_fdiv, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S); theEmitter->emitIns_R_R_R(INS_fdiv, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S); theEmitter->emitIns_R_R_R(INS_fdiv, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D); theEmitter->emitIns_R_R_R(INS_fmax, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE theEmitter->emitIns_R_R_R(INS_fmax, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE theEmitter->emitIns_R_R_R(INS_fmax, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S); theEmitter->emitIns_R_R_R(INS_fmax, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S); theEmitter->emitIns_R_R_R(INS_fmax, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D); theEmitter->emitIns_R_R_R(INS_fmin, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE theEmitter->emitIns_R_R_R(INS_fmin, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE theEmitter->emitIns_R_R_R(INS_fmin, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S); theEmitter->emitIns_R_R_R(INS_fmin, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S); theEmitter->emitIns_R_R_R(INS_fmin, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D); // fabd theEmitter->emitIns_R_R_R(INS_fabd, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE theEmitter->emitIns_R_R_R(INS_fabd, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE theEmitter->emitIns_R_R_R(INS_fabd, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S); theEmitter->emitIns_R_R_R(INS_fabd, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S); theEmitter->emitIns_R_R_R(INS_fabd, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D); genDefineTempLabel(genCreateTempLabel()); theEmitter->emitIns_R_R_R(INS_fmul, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE theEmitter->emitIns_R_R_R(INS_fmul, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE theEmitter->emitIns_R_R_R(INS_fmul, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S); theEmitter->emitIns_R_R_R(INS_fmul, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S); theEmitter->emitIns_R_R_R(INS_fmul, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D); theEmitter->emitIns_R_R_R_I(INS_fmul, EA_4BYTE, REG_V15, REG_V16, REG_V17, 3); // scalar by elem 4BYTE theEmitter->emitIns_R_R_R_I(INS_fmul, EA_8BYTE, REG_V18, REG_V19, REG_V20, 1); // scalar by elem 8BYTE theEmitter->emitIns_R_R_R_I(INS_fmul, EA_8BYTE, REG_V21, REG_V22, REG_V23, 0, INS_OPTS_2S); theEmitter->emitIns_R_R_R_I(INS_fmul, EA_16BYTE, REG_V24, REG_V25, REG_V26, 2, INS_OPTS_4S); theEmitter->emitIns_R_R_R_I(INS_fmul, EA_16BYTE, REG_V27, REG_V28, REG_V29, 0, INS_OPTS_2D); theEmitter->emitIns_R_R_R(INS_fmulx, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE theEmitter->emitIns_R_R_R(INS_fmulx, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE theEmitter->emitIns_R_R_R(INS_fmulx, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S); theEmitter->emitIns_R_R_R(INS_fmulx, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S); theEmitter->emitIns_R_R_R(INS_fmulx, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D); theEmitter->emitIns_R_R_R_I(INS_fmulx, EA_4BYTE, REG_V15, REG_V16, REG_V17, 3); // scalar by elem 4BYTE theEmitter->emitIns_R_R_R_I(INS_fmulx, EA_8BYTE, REG_V18, REG_V19, REG_V20, 1); // scalar by elem 8BYTE theEmitter->emitIns_R_R_R_I(INS_fmulx, EA_8BYTE, REG_V21, REG_V22, REG_V23, 0, INS_OPTS_2S); theEmitter->emitIns_R_R_R_I(INS_fmulx, EA_16BYTE, REG_V24, REG_V25, REG_V26, 2, INS_OPTS_4S); theEmitter->emitIns_R_R_R_I(INS_fmulx, EA_16BYTE, REG_V27, REG_V28, REG_V29, 0, INS_OPTS_2D); theEmitter->emitIns_R_R_R(INS_fnmul, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE theEmitter->emitIns_R_R_R(INS_fnmul, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE #endif // ALL_ARM64_EMITTER_UNIT_TESTS #ifdef ALL_ARM64_EMITTER_UNIT_TESTS // // R_R_I vector operations, one dest, one source reg, one immed // genDefineTempLabel(genCreateTempLabel()); // 'sshr' scalar theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V0, REG_V1, 1); theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V2, REG_V3, 14); theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V4, REG_V5, 27); theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V6, REG_V7, 40); theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V8, REG_V9, 63); // 'sshr' vector theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B); theEmitter->emitIns_R_R_I(INS_sshr, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B); theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H); theEmitter->emitIns_R_R_I(INS_sshr, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H); theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S); theEmitter->emitIns_R_R_I(INS_sshr, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S); theEmitter->emitIns_R_R_I(INS_sshr, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D); theEmitter->emitIns_R_R_I(INS_sshr, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D); // 'ssra' scalar theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V0, REG_V1, 1); theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V2, REG_V3, 14); theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V4, REG_V5, 27); theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V6, REG_V7, 40); theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V8, REG_V9, 63); // 'ssra' vector theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B); theEmitter->emitIns_R_R_I(INS_ssra, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B); theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H); theEmitter->emitIns_R_R_I(INS_ssra, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H); theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S); theEmitter->emitIns_R_R_I(INS_ssra, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S); theEmitter->emitIns_R_R_I(INS_ssra, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D); theEmitter->emitIns_R_R_I(INS_ssra, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D); // 'srshr' scalar theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V0, REG_V1, 1); theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V2, REG_V3, 14); theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V4, REG_V5, 27); theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V6, REG_V7, 40); theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V8, REG_V9, 63); // 'srshr' vector theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B); theEmitter->emitIns_R_R_I(INS_srshr, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B); theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H); theEmitter->emitIns_R_R_I(INS_srshr, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H); theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S); theEmitter->emitIns_R_R_I(INS_srshr, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S); theEmitter->emitIns_R_R_I(INS_srshr, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D); theEmitter->emitIns_R_R_I(INS_srshr, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D); // 'srsra' scalar theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V0, REG_V1, 1); theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V2, REG_V3, 14); theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V4, REG_V5, 27); theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V6, REG_V7, 40); theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V8, REG_V9, 63); // 'srsra' vector theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B); theEmitter->emitIns_R_R_I(INS_srsra, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B); theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H); theEmitter->emitIns_R_R_I(INS_srsra, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H); theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S); theEmitter->emitIns_R_R_I(INS_srsra, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S); theEmitter->emitIns_R_R_I(INS_srsra, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D); theEmitter->emitIns_R_R_I(INS_srsra, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D); // 'shl' scalar theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V0, REG_V1, 1); theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V2, REG_V3, 14); theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V4, REG_V5, 27); theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V6, REG_V7, 40); theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V8, REG_V9, 63); // 'shl' vector theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B); theEmitter->emitIns_R_R_I(INS_shl, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B); theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H); theEmitter->emitIns_R_R_I(INS_shl, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H); theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S); theEmitter->emitIns_R_R_I(INS_shl, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S); theEmitter->emitIns_R_R_I(INS_shl, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D); theEmitter->emitIns_R_R_I(INS_shl, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D); // 'ushr' scalar theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V0, REG_V1, 1); theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V2, REG_V3, 14); theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V4, REG_V5, 27); theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V6, REG_V7, 40); theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V8, REG_V9, 63); // 'ushr' vector theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B); theEmitter->emitIns_R_R_I(INS_ushr, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B); theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H); theEmitter->emitIns_R_R_I(INS_ushr, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H); theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S); theEmitter->emitIns_R_R_I(INS_ushr, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S); theEmitter->emitIns_R_R_I(INS_ushr, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D); theEmitter->emitIns_R_R_I(INS_ushr, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D); // 'usra' scalar theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V0, REG_V1, 1); theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V2, REG_V3, 14); theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V4, REG_V5, 27); theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V6, REG_V7, 40); theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V8, REG_V9, 63); // 'usra' vector theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B); theEmitter->emitIns_R_R_I(INS_usra, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B); theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H); theEmitter->emitIns_R_R_I(INS_usra, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H); theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S); theEmitter->emitIns_R_R_I(INS_usra, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S); theEmitter->emitIns_R_R_I(INS_usra, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D); theEmitter->emitIns_R_R_I(INS_usra, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D); // 'urshr' scalar theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V0, REG_V1, 1); theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V2, REG_V3, 14); theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V4, REG_V5, 27); theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V6, REG_V7, 40); theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V8, REG_V9, 63); // 'urshr' vector theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B); theEmitter->emitIns_R_R_I(INS_urshr, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B); theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H); theEmitter->emitIns_R_R_I(INS_urshr, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H); theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S); theEmitter->emitIns_R_R_I(INS_urshr, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S); theEmitter->emitIns_R_R_I(INS_urshr, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D); theEmitter->emitIns_R_R_I(INS_urshr, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D); // 'ursra' scalar theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V0, REG_V1, 1); theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V2, REG_V3, 14); theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V4, REG_V5, 27); theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V6, REG_V7, 40); theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V8, REG_V9, 63); // 'srsra' vector theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B); theEmitter->emitIns_R_R_I(INS_ursra, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B); theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H); theEmitter->emitIns_R_R_I(INS_ursra, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H); theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S); theEmitter->emitIns_R_R_I(INS_ursra, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S); theEmitter->emitIns_R_R_I(INS_ursra, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D); theEmitter->emitIns_R_R_I(INS_ursra, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D); // 'sri' scalar theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V0, REG_V1, 1); theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V2, REG_V3, 14); theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V4, REG_V5, 27); theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V6, REG_V7, 40); theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V8, REG_V9, 63); // 'sri' vector theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B); theEmitter->emitIns_R_R_I(INS_sri, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B); theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H); theEmitter->emitIns_R_R_I(INS_sri, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H); theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S); theEmitter->emitIns_R_R_I(INS_sri, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S); theEmitter->emitIns_R_R_I(INS_sri, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D); theEmitter->emitIns_R_R_I(INS_sri, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D); // 'sli' scalar theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V0, REG_V1, 1); theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V2, REG_V3, 14); theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V4, REG_V5, 27); theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V6, REG_V7, 40); theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V8, REG_V9, 63); // 'sli' vector theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B); theEmitter->emitIns_R_R_I(INS_sli, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B); theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H); theEmitter->emitIns_R_R_I(INS_sli, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H); theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S); theEmitter->emitIns_R_R_I(INS_sli, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S); theEmitter->emitIns_R_R_I(INS_sli, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D); theEmitter->emitIns_R_R_I(INS_sli, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D); // 'sshll' vector theEmitter->emitIns_R_R_I(INS_sshll, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B); theEmitter->emitIns_R_R_I(INS_sshll2, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B); theEmitter->emitIns_R_R_I(INS_sshll, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H); theEmitter->emitIns_R_R_I(INS_sshll2, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H); theEmitter->emitIns_R_R_I(INS_sshll, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S); theEmitter->emitIns_R_R_I(INS_sshll2, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S); // 'ushll' vector theEmitter->emitIns_R_R_I(INS_ushll, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B); theEmitter->emitIns_R_R_I(INS_ushll2, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B); theEmitter->emitIns_R_R_I(INS_ushll, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H); theEmitter->emitIns_R_R_I(INS_ushll2, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H); theEmitter->emitIns_R_R_I(INS_ushll, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S); theEmitter->emitIns_R_R_I(INS_ushll2, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S); // 'shrn' vector theEmitter->emitIns_R_R_I(INS_shrn, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B); theEmitter->emitIns_R_R_I(INS_shrn2, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B); theEmitter->emitIns_R_R_I(INS_shrn, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H); theEmitter->emitIns_R_R_I(INS_shrn2, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H); theEmitter->emitIns_R_R_I(INS_shrn, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S); theEmitter->emitIns_R_R_I(INS_shrn2, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S); // 'rshrn' vector theEmitter->emitIns_R_R_I(INS_rshrn, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B); theEmitter->emitIns_R_R_I(INS_rshrn2, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B); theEmitter->emitIns_R_R_I(INS_rshrn, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H); theEmitter->emitIns_R_R_I(INS_rshrn2, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H); theEmitter->emitIns_R_R_I(INS_rshrn, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S); theEmitter->emitIns_R_R_I(INS_rshrn2, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S); // 'sxtl' vector theEmitter->emitIns_R_R(INS_sxtl, EA_8BYTE, REG_V0, REG_V1, INS_OPTS_8B); theEmitter->emitIns_R_R(INS_sxtl2, EA_16BYTE, REG_V2, REG_V3, INS_OPTS_16B); theEmitter->emitIns_R_R(INS_sxtl, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_4H); theEmitter->emitIns_R_R(INS_sxtl2, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_8H); theEmitter->emitIns_R_R(INS_sxtl, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S); theEmitter->emitIns_R_R(INS_sxtl2, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S); // 'uxtl' vector theEmitter->emitIns_R_R(INS_uxtl, EA_8BYTE, REG_V0, REG_V1, INS_OPTS_8B); theEmitter->emitIns_R_R(INS_uxtl2, EA_16BYTE, REG_V2, REG_V3, INS_OPTS_16B); theEmitter->emitIns_R_R(INS_uxtl, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_4H); theEmitter->emitIns_R_R(INS_uxtl2, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_8H); theEmitter->emitIns_R_R(INS_uxtl, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S); theEmitter->emitIns_R_R(INS_uxtl2, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S); #endif // ALL_ARM64_EMITTER_UNIT_TESTS #ifdef ALL_ARM64_EMITTER_UNIT_TESTS // // R_R_R vector operations, one dest, two source // genDefineTempLabel(genCreateTempLabel()); // Specifying an Arrangement is optional // theEmitter->emitIns_R_R_R(INS_and, EA_8BYTE, REG_V6, REG_V7, REG_V8); theEmitter->emitIns_R_R_R(INS_bic, EA_8BYTE, REG_V9, REG_V10, REG_V11); theEmitter->emitIns_R_R_R(INS_eor, EA_8BYTE, REG_V12, REG_V13, REG_V14); theEmitter->emitIns_R_R_R(INS_orr, EA_8BYTE, REG_V15, REG_V16, REG_V17); theEmitter->emitIns_R_R_R(INS_orn, EA_8BYTE, REG_V18, REG_V19, REG_V20); theEmitter->emitIns_R_R_R(INS_and, EA_16BYTE, REG_V21, REG_V22, REG_V23); theEmitter->emitIns_R_R_R(INS_bic, EA_16BYTE, REG_V24, REG_V25, REG_V26); theEmitter->emitIns_R_R_R(INS_eor, EA_16BYTE, REG_V27, REG_V28, REG_V29); theEmitter->emitIns_R_R_R(INS_orr, EA_16BYTE, REG_V30, REG_V31, REG_V0); theEmitter->emitIns_R_R_R(INS_orn, EA_16BYTE, REG_V1, REG_V2, REG_V3); theEmitter->emitIns_R_R_R(INS_bsl, EA_8BYTE, REG_V4, REG_V5, REG_V6); theEmitter->emitIns_R_R_R(INS_bit, EA_8BYTE, REG_V7, REG_V8, REG_V9); theEmitter->emitIns_R_R_R(INS_bif, EA_8BYTE, REG_V10, REG_V11, REG_V12); theEmitter->emitIns_R_R_R(INS_bsl, EA_16BYTE, REG_V13, REG_V14, REG_V15); theEmitter->emitIns_R_R_R(INS_bit, EA_16BYTE, REG_V16, REG_V17, REG_V18); theEmitter->emitIns_R_R_R(INS_bif, EA_16BYTE, REG_V19, REG_V20, REG_V21); // Default Arrangement as per the ARM64 manual // theEmitter->emitIns_R_R_R(INS_and, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_8B); theEmitter->emitIns_R_R_R(INS_bic, EA_8BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8B); theEmitter->emitIns_R_R_R(INS_eor, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_8B); theEmitter->emitIns_R_R_R(INS_orr, EA_8BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_8B); theEmitter->emitIns_R_R_R(INS_orn, EA_8BYTE, REG_V18, REG_V19, REG_V20, INS_OPTS_8B); theEmitter->emitIns_R_R_R(INS_and, EA_16BYTE, REG_V21, REG_V22, REG_V23, INS_OPTS_16B); theEmitter->emitIns_R_R_R(INS_bic, EA_16BYTE, REG_V24, REG_V25, REG_V26, INS_OPTS_16B); theEmitter->emitIns_R_R_R(INS_eor, EA_16BYTE, REG_V27, REG_V28, REG_V29, INS_OPTS_16B); theEmitter->emitIns_R_R_R(INS_orr, EA_16BYTE, REG_V30, REG_V31, REG_V0, INS_OPTS_16B); theEmitter->emitIns_R_R_R(INS_orn, EA_16BYTE, REG_V1, REG_V2, REG_V3, INS_OPTS_16B); theEmitter->emitIns_R_R_R(INS_bsl, EA_8BYTE, REG_V4, REG_V5, REG_V6, INS_OPTS_8B); theEmitter->emitIns_R_R_R(INS_bit, EA_8BYTE, REG_V7, REG_V8, REG_V9, INS_OPTS_8B); theEmitter->emitIns_R_R_R(INS_bif, EA_8BYTE, REG_V10, REG_V11, REG_V12, INS_OPTS_8B); theEmitter->emitIns_R_R_R(INS_bsl, EA_16BYTE, REG_V13, REG_V14, REG_V15, INS_OPTS_16B); theEmitter->emitIns_R_R_R(INS_bit, EA_16BYTE, REG_V16, REG_V17, REG_V18, INS_OPTS_16B); theEmitter->emitIns_R_R_R(INS_bif, EA_16BYTE, REG_V19, REG_V20, REG_V21, INS_OPTS_16B); genDefineTempLabel(genCreateTempLabel()); theEmitter->emitIns_R_R_R(INS_add, EA_8BYTE, REG_V0, REG_V1, REG_V2); // scalar 8BYTE theEmitter->emitIns_R_R_R(INS_add, EA_8BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_8B); theEmitter->emitIns_R_R_R(INS_add, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H); theEmitter->emitIns_R_R_R(INS_add, EA_8BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_2S); theEmitter->emitIns_R_R_R(INS_add, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_16B); theEmitter->emitIns_R_R_R(INS_add, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_8H); theEmitter->emitIns_R_R_R(INS_add, EA_16BYTE, REG_V18, REG_V19, REG_V20, INS_OPTS_4S); theEmitter->emitIns_R_R_R(INS_add, EA_16BYTE, REG_V21, REG_V22, REG_V23, INS_OPTS_2D); theEmitter->emitIns_R_R_R(INS_sub, EA_8BYTE, REG_V1, REG_V2, REG_V3); // scalar 8BYTE theEmitter->emitIns_R_R_R(INS_sub, EA_8BYTE, REG_V4, REG_V5, REG_V6, INS_OPTS_8B); theEmitter->emitIns_R_R_R(INS_sub, EA_8BYTE, REG_V7, REG_V8, REG_V9, INS_OPTS_4H); theEmitter->emitIns_R_R_R(INS_sub, EA_8BYTE, REG_V10, REG_V11, REG_V12, INS_OPTS_2S); theEmitter->emitIns_R_R_R(INS_sub, EA_16BYTE, REG_V13, REG_V14, REG_V15, INS_OPTS_16B); theEmitter->emitIns_R_R_R(INS_sub, EA_16BYTE, REG_V16, REG_V17, REG_V18, INS_OPTS_8H); theEmitter->emitIns_R_R_R(INS_sub, EA_16BYTE, REG_V19, REG_V20, REG_V21, INS_OPTS_4S); theEmitter->emitIns_R_R_R(INS_sub, EA_16BYTE, REG_V22, REG_V23, REG_V24, INS_OPTS_2D); genDefineTempLabel(genCreateTempLabel()); // saba vector theEmitter->emitIns_R_R_R(INS_saba, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B); theEmitter->emitIns_R_R_R(INS_saba, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B); theEmitter->emitIns_R_R_R(INS_saba, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H); theEmitter->emitIns_R_R_R(INS_saba, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H); theEmitter->emitIns_R_R_R(INS_saba, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S); theEmitter->emitIns_R_R_R(INS_saba, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S); // sabd vector theEmitter->emitIns_R_R_R(INS_sabd, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B); theEmitter->emitIns_R_R_R(INS_sabd, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B); theEmitter->emitIns_R_R_R(INS_sabd, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H); theEmitter->emitIns_R_R_R(INS_sabd, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H); theEmitter->emitIns_R_R_R(INS_sabd, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S); theEmitter->emitIns_R_R_R(INS_sabd, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S); // uaba vector theEmitter->emitIns_R_R_R(INS_uaba, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B); theEmitter->emitIns_R_R_R(INS_uaba, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B); theEmitter->emitIns_R_R_R(INS_uaba, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H); theEmitter->emitIns_R_R_R(INS_uaba, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H); theEmitter->emitIns_R_R_R(INS_uaba, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S); theEmitter->emitIns_R_R_R(INS_uaba, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S); // uabd vector theEmitter->emitIns_R_R_R(INS_uabd, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B); theEmitter->emitIns_R_R_R(INS_uabd, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B); theEmitter->emitIns_R_R_R(INS_uabd, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H); theEmitter->emitIns_R_R_R(INS_uabd, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H); theEmitter->emitIns_R_R_R(INS_uabd, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S); theEmitter->emitIns_R_R_R(INS_uabd, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S); #endif // ALL_ARM64_EMITTER_UNIT_TESTS #ifdef ALL_ARM64_EMITTER_UNIT_TESTS // // R_R_R vector multiply // genDefineTempLabel(genCreateTempLabel()); theEmitter->emitIns_R_R_R(INS_mul, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B); theEmitter->emitIns_R_R_R(INS_mul, EA_8BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_4H); theEmitter->emitIns_R_R_R(INS_mul, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S); theEmitter->emitIns_R_R_R(INS_mul, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_16B); theEmitter->emitIns_R_R_R(INS_mul, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_8H); theEmitter->emitIns_R_R_R(INS_mul, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S); theEmitter->emitIns_R_R_R(INS_pmul, EA_8BYTE, REG_V18, REG_V19, REG_V20, INS_OPTS_8B); theEmitter->emitIns_R_R_R(INS_pmul, EA_16BYTE, REG_V21, REG_V22, REG_V23, INS_OPTS_16B); // 'mul' vector by elem theEmitter->emitIns_R_R_R_I(INS_mul, EA_8BYTE, REG_V0, REG_V1, REG_V16, 0, INS_OPTS_2S); theEmitter->emitIns_R_R_R_I(INS_mul, EA_8BYTE, REG_V2, REG_V3, REG_V15, 1, INS_OPTS_2S); theEmitter->emitIns_R_R_R_I(INS_mul, EA_8BYTE, REG_V4, REG_V5, REG_V17, 3, INS_OPTS_2S); theEmitter->emitIns_R_R_R_I(INS_mul, EA_8BYTE, REG_V6, REG_V7, REG_V0, 0, INS_OPTS_4H); theEmitter->emitIns_R_R_R_I(INS_mul, EA_8BYTE, REG_V8, REG_V9, REG_V1, 3, INS_OPTS_4H); theEmitter->emitIns_R_R_R_I(INS_mul, EA_8BYTE, REG_V10, REG_V11, REG_V2, 7, INS_OPTS_4H); theEmitter->emitIns_R_R_R_I(INS_mul, EA_16BYTE, REG_V12, REG_V13, REG_V14, 0, INS_OPTS_4S); theEmitter->emitIns_R_R_R_I(INS_mul, EA_16BYTE, REG_V14, REG_V15, REG_V18, 1, INS_OPTS_4S); theEmitter->emitIns_R_R_R_I(INS_mul, EA_16BYTE, REG_V16, REG_V17, REG_V13, 3, INS_OPTS_4S); theEmitter->emitIns_R_R_R_I(INS_mul, EA_16BYTE, REG_V18, REG_V19, REG_V3, 0, INS_OPTS_8H); theEmitter->emitIns_R_R_R_I(INS_mul, EA_16BYTE, REG_V20, REG_V21, REG_V4, 3, INS_OPTS_8H); theEmitter->emitIns_R_R_R_I(INS_mul, EA_16BYTE, REG_V22, REG_V23, REG_V5, 7, INS_OPTS_8H); // 'mla' vector by elem theEmitter->emitIns_R_R_R_I(INS_mla, EA_8BYTE, REG_V0, REG_V1, REG_V16, 0, INS_OPTS_2S); theEmitter->emitIns_R_R_R_I(INS_mla, EA_8BYTE, REG_V2, REG_V3, REG_V15, 1, INS_OPTS_2S); theEmitter->emitIns_R_R_R_I(INS_mla, EA_8BYTE, REG_V4, REG_V5, REG_V17, 3, INS_OPTS_2S); theEmitter->emitIns_R_R_R_I(INS_mla, EA_8BYTE, REG_V6, REG_V7, REG_V0, 0, INS_OPTS_4H); theEmitter->emitIns_R_R_R_I(INS_mla, EA_8BYTE, REG_V8, REG_V9, REG_V1, 3, INS_OPTS_4H); theEmitter->emitIns_R_R_R_I(INS_mla, EA_8BYTE, REG_V10, REG_V11, REG_V2, 7, INS_OPTS_4H); theEmitter->emitIns_R_R_R_I(INS_mla, EA_16BYTE, REG_V12, REG_V13, REG_V14, 0, INS_OPTS_4S); theEmitter->emitIns_R_R_R_I(INS_mla, EA_16BYTE, REG_V14, REG_V15, REG_V18, 1, INS_OPTS_4S); theEmitter->emitIns_R_R_R_I(INS_mla, EA_16BYTE, REG_V16, REG_V17, REG_V13, 3, INS_OPTS_4S); theEmitter->emitIns_R_R_R_I(INS_mla, EA_16BYTE, REG_V18, REG_V19, REG_V3, 0, INS_OPTS_8H); theEmitter->emitIns_R_R_R_I(INS_mla, EA_16BYTE, REG_V20, REG_V21, REG_V4, 3, INS_OPTS_8H); theEmitter->emitIns_R_R_R_I(INS_mla, EA_16BYTE, REG_V22, REG_V23, REG_V5, 7, INS_OPTS_8H); // 'mls' vector by elem theEmitter->emitIns_R_R_R_I(INS_mls, EA_8BYTE, REG_V0, REG_V1, REG_V16, 0, INS_OPTS_2S); theEmitter->emitIns_R_R_R_I(INS_mls, EA_8BYTE, REG_V2, REG_V3, REG_V15, 1, INS_OPTS_2S); theEmitter->emitIns_R_R_R_I(INS_mls, EA_8BYTE, REG_V4, REG_V5, REG_V17, 3, INS_OPTS_2S); theEmitter->emitIns_R_R_R_I(INS_mls, EA_8BYTE, REG_V6, REG_V7, REG_V0, 0, INS_OPTS_4H); theEmitter->emitIns_R_R_R_I(INS_mls, EA_8BYTE, REG_V8, REG_V9, REG_V1, 3, INS_OPTS_4H); theEmitter->emitIns_R_R_R_I(INS_mls, EA_8BYTE, REG_V10, REG_V11, REG_V2, 7, INS_OPTS_4H); theEmitter->emitIns_R_R_R_I(INS_mls, EA_16BYTE, REG_V12, REG_V13, REG_V14, 0, INS_OPTS_4S); theEmitter->emitIns_R_R_R_I(INS_mls, EA_16BYTE, REG_V14, REG_V15, REG_V18, 1, INS_OPTS_4S); theEmitter->emitIns_R_R_R_I(INS_mls, EA_16BYTE, REG_V16, REG_V17, REG_V13, 3, INS_OPTS_4S); theEmitter->emitIns_R_R_R_I(INS_mls, EA_16BYTE, REG_V18, REG_V19, REG_V3, 0, INS_OPTS_8H); theEmitter->emitIns_R_R_R_I(INS_mls, EA_16BYTE, REG_V20, REG_V21, REG_V4, 3, INS_OPTS_8H); theEmitter->emitIns_R_R_R_I(INS_mls, EA_16BYTE, REG_V22, REG_V23, REG_V5, 7, INS_OPTS_8H); #endif // ALL_ARM64_EMITTER_UNIT_TESTS #ifdef ALL_ARM64_EMITTER_UNIT_TESTS // // R_R_R floating point operations, one source/dest, and two source // genDefineTempLabel(genCreateTempLabel()); theEmitter->emitIns_R_R_R(INS_fmla, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S); theEmitter->emitIns_R_R_R(INS_fmla, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S); theEmitter->emitIns_R_R_R(INS_fmla, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D); theEmitter->emitIns_R_R_R_I(INS_fmla, EA_4BYTE, REG_V15, REG_V16, REG_V17, 3); // scalar by elem 4BYTE theEmitter->emitIns_R_R_R_I(INS_fmla, EA_8BYTE, REG_V18, REG_V19, REG_V20, 1); // scalar by elem 8BYTE theEmitter->emitIns_R_R_R_I(INS_fmla, EA_8BYTE, REG_V21, REG_V22, REG_V23, 0, INS_OPTS_2S); theEmitter->emitIns_R_R_R_I(INS_fmla, EA_16BYTE, REG_V24, REG_V25, REG_V26, 2, INS_OPTS_4S); theEmitter->emitIns_R_R_R_I(INS_fmla, EA_16BYTE, REG_V27, REG_V28, REG_V29, 0, INS_OPTS_2D); theEmitter->emitIns_R_R_R(INS_fmls, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S); theEmitter->emitIns_R_R_R(INS_fmls, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S); theEmitter->emitIns_R_R_R(INS_fmls, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D); theEmitter->emitIns_R_R_R_I(INS_fmls, EA_4BYTE, REG_V15, REG_V16, REG_V17, 3); // scalar by elem 4BYTE theEmitter->emitIns_R_R_R_I(INS_fmls, EA_8BYTE, REG_V18, REG_V19, REG_V20, 1); // scalar by elem 8BYTE theEmitter->emitIns_R_R_R_I(INS_fmls, EA_8BYTE, REG_V21, REG_V22, REG_V23, 0, INS_OPTS_2S); theEmitter->emitIns_R_R_R_I(INS_fmls, EA_16BYTE, REG_V24, REG_V25, REG_V26, 2, INS_OPTS_4S); theEmitter->emitIns_R_R_R_I(INS_fmls, EA_16BYTE, REG_V27, REG_V28, REG_V29, 0, INS_OPTS_2D); #endif // ALL_ARM64_EMITTER_UNIT_TESTS #ifdef ALL_ARM64_EMITTER_UNIT_TESTS // // R_R_R_R floating point operations, one dest, and three source // theEmitter->emitIns_R_R_R_R(INS_fmadd, EA_4BYTE, REG_V0, REG_V8, REG_V16, REG_V24); theEmitter->emitIns_R_R_R_R(INS_fmsub, EA_4BYTE, REG_V1, REG_V9, REG_V17, REG_V25); theEmitter->emitIns_R_R_R_R(INS_fnmadd, EA_4BYTE, REG_V2, REG_V10, REG_V18, REG_V26); theEmitter->emitIns_R_R_R_R(INS_fnmsub, EA_4BYTE, REG_V3, REG_V11, REG_V19, REG_V27); theEmitter->emitIns_R_R_R_R(INS_fmadd, EA_8BYTE, REG_V4, REG_V12, REG_V20, REG_V28); theEmitter->emitIns_R_R_R_R(INS_fmsub, EA_8BYTE, REG_V5, REG_V13, REG_V21, REG_V29); theEmitter->emitIns_R_R_R_R(INS_fnmadd, EA_8BYTE, REG_V6, REG_V14, REG_V22, REG_V30); theEmitter->emitIns_R_R_R_R(INS_fnmsub, EA_8BYTE, REG_V7, REG_V15, REG_V23, REG_V31); #endif #ifdef ALL_ARM64_EMITTER_UNIT_TESTS BasicBlock* label = genCreateTempLabel(); genDefineTempLabel(label); instGen(INS_nop); instGen(INS_nop); instGen(INS_nop); instGen(INS_nop); theEmitter->emitIns_R_L(INS_adr, EA_4BYTE_DSP_RELOC, label, REG_R0); #endif // ALL_ARM64_EMITTER_UNIT_TESTS printf("*************** End of genArm64EmitterUnitTests()\n"); } #endif // defined(DEBUG) #endif // _TARGET_ARM64_ #endif // !LEGACY_BACKEND