// // Copyright (c) Microsoft. All rights reserved. // Licensed under the MIT license. See LICENSE file in the project root for full license information. // /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XX XX XX Arm64 Code Generator XX XX XX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX */ #include "jitpch.h" #ifdef _MSC_VER #pragma hdrstop #endif #ifndef LEGACY_BACKEND // This file is ONLY used for the RyuJIT backend that uses the linear scan register allocator #ifdef _TARGET_ARM64_ #include "emit.h" #include "codegen.h" #include "lower.h" #include "gcinfo.h" #include "gcinfoencoder.h" /* XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XX XX XX Prolog / Epilog XX XX XX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX */ //------------------------------------------------------------------------ // genStackPointerAdjustment: add a specified constant value to the stack pointer in either the prolog // or the epilog. The unwind codes for the generated instructions are produced. An available temporary // register is required to be specified, in case the constant is too large to encode in an "add" // instruction (or "sub" instruction if we choose to use one), such that we need to load the constant // into a register first, before using it. // // Arguments: // spDelta - the value to add to SP (can be negative) // tmpReg - an available temporary register // pTmpRegIsZero - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'. // Otherwise, we don't touch it. // // Return Value: // None. void CodeGen::genStackPointerAdjustment(ssize_t spDelta, regNumber tmpReg, bool* pTmpRegIsZero) { unsigned unwindSpDelta; if (emitter::emitIns_valid_imm_for_add(spDelta, EA_8BYTE)) { getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, spDelta); unwindSpDelta = (unsigned)abs(spDelta); } else { bool adjustmentIsNegative = (spDelta < 0); spDelta = abs(spDelta); instGen_Set_Reg_To_Imm(EA_PTRSIZE, tmpReg, spDelta); if (pTmpRegIsZero != nullptr) { *pTmpRegIsZero = false; } compiler->unwindPadding(); getEmitter()->emitIns_R_R_R(adjustmentIsNegative ? INS_sub : INS_add, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, tmpReg); unwindSpDelta = (unsigned)spDelta; } // spDelta is negative in the prolog, positive in the epilog, but we always tell the unwind codes the positive value. compiler->unwindAllocStack(unwindSpDelta); } //------------------------------------------------------------------------ // genPrologSaveRegPair: Save a pair of general-purpose or floating-point/SIMD registers in a function or funclet prolog. // If possible, we use pre-indexed addressing to adjust SP and store the registers with a single instruction. // The caller must ensure that we can use the STP instruction, and that spOffset will be in the legal range for that instruction. // // Arguments: // reg1 - First register of pair to save. // reg2 - Second register of pair to save. // spOffset - The offset from SP to store reg1 (must be positive or zero). // spDelta - If non-zero, the amount to add to SP before the register saves (must be negative or zero). // lastSavedWasPreviousPair - True if the last prolog instruction was to save the previous register pair. This allows us to // emit the "save_next" unwind code. // tmpReg - An available temporary register. Needed for the case of large frames. // pTmpRegIsZero - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'. // Otherwise, we don't touch it. // // Return Value: // None. void CodeGen::genPrologSaveRegPair(regNumber reg1, regNumber reg2, int spOffset, int spDelta, bool lastSavedWasPreviousPair, regNumber tmpReg, bool* pTmpRegIsZero) { assert(spOffset >= 0); assert(spDelta <= 0); assert((spDelta % 16) == 0); // SP changes must be 16-byte aligned assert(genIsValidFloatReg(reg1) == genIsValidFloatReg(reg2)); // registers must be both general-purpose, or both FP/SIMD bool needToSaveRegs = true; if (spDelta != 0) { if ((spOffset == 0) && (spDelta >= -512)) { // We can use pre-indexed addressing. // stp REG, REG + 1, [SP, #spDelta]! // 64-bit STP offset range: -512 to 504, multiple of 8. getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spDelta, INS_OPTS_PRE_INDEX); compiler->unwindSaveRegPairPreindexed(reg1, reg2, spDelta); needToSaveRegs = false; } else { // We need to do SP adjustment separately from the store; we can't fold in a pre-indexed addressing and the non-zero offset. genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero); } } if (needToSaveRegs) { // stp REG, REG + 1, [SP, #offset] // 64-bit STP offset range: -512 to 504, multiple of 8. assert(spOffset <= 504); getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spOffset); if (lastSavedWasPreviousPair) { // This works as long as we've only been saving pairs, in order, and we've saved the previous one just before this one. compiler->unwindSaveNext(); } else { compiler->unwindSaveRegPair(reg1, reg2, spOffset); } } } //------------------------------------------------------------------------ // genPrologSaveRegPair: Like genPrologSaveRegPair, but for a single register. Save a single general-purpose or floating-point/SIMD register // in a function or funclet prolog. Note that if we wish to change SP (i.e., spDelta != 0), then spOffset must be 8. This is because // otherwise we would create an alignment hole above the saved register, not below it, which we currently don't support. This restriction // could be loosened if the callers change to handle it (and this function changes to support using pre-indexed STR addressing). // The caller must ensure that we can use the STR instruction, and that spOffset will be in the legal range for that instruction. // // Arguments: // reg1 - Register to save. // spOffset - The offset from SP to store reg1 (must be positive or zero). // spDelta - If non-zero, the amount to add to SP before the register saves (must be negative or zero). // tmpReg - An available temporary register. Needed for the case of large frames. // pTmpRegIsZero - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'. // Otherwise, we don't touch it. // // Return Value: // None. void CodeGen::genPrologSaveReg(regNumber reg1, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero) { assert(spOffset >= 0); assert(spDelta <= 0); assert((spDelta % 16) == 0); // SP changes must be 16-byte aligned if (spDelta != 0) { // If saving a single callee-save register, and we need to change SP, the offset cannot be zero. It must be 8 to account // for alignment. assert(spOffset != 0); assert(spOffset == REGSIZE_BYTES); genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero); } // str REG, [SP, #offset] // 64-bit STR offset range: 0 to 32760, multiple of 8. getEmitter()->emitIns_R_R_I(INS_str, EA_PTRSIZE, reg1, REG_SPBASE, spOffset); compiler->unwindSaveReg(reg1, spOffset); } //------------------------------------------------------------------------ // genEpilogRestoreRegPair: This is the opposite of genPrologSaveRegPair(), run in the epilog instead of the prolog. // The stack pointer adjustment, if requested, is done after the register restore, using post-index addressing. // The caller must ensure that we can use the LDP instruction, and that spOffset will be in the legal range for that instruction. // // Arguments: // reg1 - First register of pair to restore. // reg2 - Second register of pair to restore. // spOffset - The offset from SP to load reg1 (must be positive or zero). // spDelta - If non-zero, the amount to add to SP after the register restores (must be positive or zero). // tmpReg - An available temporary register. Needed for the case of large frames. // pTmpRegIsZero - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'. // Otherwise, we don't touch it. // // Return Value: // None. void CodeGen::genEpilogRestoreRegPair(regNumber reg1, regNumber reg2, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero) { assert(spOffset >= 0); assert(spDelta >= 0); assert((spDelta % 16) == 0); // SP changes must be 16-byte aligned if (spDelta != 0) { if ((spOffset == 0) && (spDelta <= 504)) { // Fold the SP change into this instruction. // ldp reg1, reg2, [SP], #spDelta getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spDelta, INS_OPTS_POST_INDEX); compiler->unwindSaveRegPairPreindexed(reg1, reg2, -spDelta); } else { // Can't fold in the SP change; need to use a separate ADD instruction. // ldp reg1, reg2, [SP, #offset] getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spOffset); compiler->unwindSaveRegPair(reg1, reg2, spOffset); genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero); } } else { // ldp reg1, reg2, [SP, #offset] getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spOffset); compiler->unwindSaveRegPair(reg1, reg2, spOffset); } } //------------------------------------------------------------------------ // genEpilogRestoreReg: The opposite of genPrologSaveReg(), run in the epilog instead of the prolog. // // Arguments: // reg1 - Register to restore. // spOffset - The offset from SP to restore reg1 (must be positive or zero). // spDelta - If non-zero, the amount to add to SP after the register restores (must be positive or zero). // tmpReg - An available temporary register. Needed for the case of large frames. // pTmpRegIsZero - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'. // Otherwise, we don't touch it. // // Return Value: // None. void CodeGen::genEpilogRestoreReg(regNumber reg1, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero) { assert(spOffset >= 0); assert(spDelta >= 0); assert((spDelta % 16) == 0); // SP changes must be 16-byte aligned // ldr reg1, [SP, #offset] getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, reg1, REG_SPBASE, spOffset); compiler->unwindSaveReg(reg1, spOffset); if (spDelta != 0) { assert(spOffset != 0); genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero); } } //------------------------------------------------------------------------ // genSaveCalleeSavedRegistersHelp: Save the callee-saved registers in 'regsToSaveMask' to the stack frame // in the function or funclet prolog. The save set does not contain FP, since that is // guaranteed to be saved separately, so we can set up chaining. We can only use the instructions // that are allowed by the unwind codes. Integer registers are stored at lower addresses, // FP/SIMD registers are stored at higher addresses. There are no gaps. The caller ensures that // there is enough space on the frame to store these registers, and that the store instructions // we need to use (STR or STP) are encodable with the stack-pointer immediate offsets we need to // use. Note that the save set can contain LR if this is a frame without a frame pointer, in // which case LR is saved along with the other callee-saved registers. The caller can tell us // to fold in a stack pointer adjustment, which we will do with the first instruction. Note that // the stack pointer adjustment must be by a multiple of 16 to preserve the invariant that the // stack pointer is always 16 byte aligned. If we are saving an odd number of callee-saved // registers, though, we will have an empty aligment slot somewhere. It turns out we will put // it below (at a lower address) the callee-saved registers, as that is currently how we // do frame layout. This means that the first stack offset will be 8 and the stack pointer // adjustment must be done by a SUB, and not folded in to a pre-indexed store. // // Arguments: // regsToSaveMask - The mask of callee-saved registers to save. If empty, this function does nothing. // lowestCalleeSavedOffset - The offset from SP that is the beginning of the callee-saved register area. Note that // if non-zero spDelta, then this is the offset of the first save *after* that // SP adjustment. // spDelta - If non-zero, the amount to add to SP before the register saves (must be negative or zero). // // Return Value: // None. void CodeGen::genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowestCalleeSavedOffset, int spDelta) { unsigned regsToSaveCount = genCountBits(regsToSaveMask); if (regsToSaveCount == 0) { return; } assert(spDelta <= 0); assert((spDelta % 16) == 0); assert((regsToSaveMask & RBM_FP) == 0); // we never save FP here assert(regsToSaveCount <= genCountBits(RBM_CALLEE_SAVED | RBM_LR)); // We also save LR, even though it is not in RBM_CALLEE_SAVED. regMaskTP maskSaveRegsFloat = regsToSaveMask & RBM_ALLFLOAT; regMaskTP maskSaveRegsInt = regsToSaveMask & ~maskSaveRegsFloat; int spOffset = lowestCalleeSavedOffset; // this is the offset *after* we change SP. if (maskSaveRegsInt != RBM_NONE) { // Save the integer registers unsigned intRegsToSaveCount = genCountBits(maskSaveRegsInt); bool lastSavedWasPair = false; while (maskSaveRegsInt != RBM_NONE) { regMaskTP reg1Mask = genFindLowestBit(maskSaveRegsInt); regNumber reg1 = genRegNumFromMask(reg1Mask); maskSaveRegsInt &= ~reg1Mask; if (intRegsToSaveCount >= 2) { // We can use a STP instruction. regMaskTP reg2Mask = genFindLowestBit(maskSaveRegsInt); regNumber reg2 = genRegNumFromMask(reg2Mask); assert((reg2 == REG_NEXT(reg1)) || (reg2 == REG_LR)); maskSaveRegsInt &= ~reg2Mask; genPrologSaveRegPair(reg1, reg2, spOffset, spDelta, lastSavedWasPair, REG_IP0, nullptr); // TODO-ARM64-CQ: this code works in the prolog, but it's a bit weird to think about "next" when generating this epilog, to // get the codes to match. Turn this off until that is better understood. // lastSavedWasPair = true; intRegsToSaveCount -= 2; spOffset += 2 * REGSIZE_BYTES; } else { // No register pair; we use a STR instruction. assert(intRegsToSaveCount == 1); // this will be the last store we do genPrologSaveReg(reg1, spOffset, spDelta, REG_IP0, nullptr); lastSavedWasPair = false; intRegsToSaveCount -= 1; spOffset += REGSIZE_BYTES; } spDelta = 0; // We've now changed SP already, if necessary; don't do it again. } assert(intRegsToSaveCount == 0); } if (maskSaveRegsFloat != RBM_NONE) { // Save the floating-point/SIMD registers unsigned floatRegsToSaveCount = genCountBits(maskSaveRegsFloat); bool lastSavedWasPair = false; while (maskSaveRegsFloat != RBM_NONE) { regMaskTP reg1Mask = genFindLowestBit(maskSaveRegsFloat); regNumber reg1 = genRegNumFromMask(reg1Mask); maskSaveRegsFloat &= ~reg1Mask; if (floatRegsToSaveCount >= 2) { // We can use a STP instruction. regMaskTP reg2Mask = genFindLowestBit(maskSaveRegsFloat); regNumber reg2 = genRegNumFromMask(reg2Mask); assert(reg2 == REG_NEXT(reg1)); maskSaveRegsFloat &= ~reg2Mask; genPrologSaveRegPair(reg1, reg2, spOffset, spDelta, lastSavedWasPair, REG_IP0, nullptr); // TODO-ARM64-CQ: this code works in the prolog, but it's a bit weird to think about "next" when generating this epilog, to // get the codes to match. Turn this off until that is better understood. // lastSavedWasPair = true; floatRegsToSaveCount -= 2; spOffset += 2 * FPSAVE_REGSIZE_BYTES; } else { // No register pair; we use a STR instruction. assert(floatRegsToSaveCount == 1); genPrologSaveReg(reg1, spOffset, spDelta, REG_IP0, nullptr); lastSavedWasPair = false; floatRegsToSaveCount -= 1; spOffset += FPSAVE_REGSIZE_BYTES; } spDelta = 0; // We've now changed SP already, if necessary; don't do it again. } assert(floatRegsToSaveCount == 0); } } //------------------------------------------------------------------------ // genRestoreCalleeSavedRegistersHelp: Restore the callee-saved registers in 'regsToRestoreMask' from the stack frame // in the function or funclet epilog. This exactly reverses the actions of genSaveCalleeSavedRegistersHelp(). // // Arguments: // regsToRestoreMask - The mask of callee-saved registers to restore. If empty, this function does nothing. // lowestCalleeSavedOffset - The offset from SP that is the beginning of the callee-saved register area. // spDelta - If non-zero, the amount to add to SP after the register restores (must be positive or zero). // // Here's an example restore sequence: // ldp x27, x28, [sp,#96] // ldp x25, x26, [sp,#80] // ldp x23, x24, [sp,#64] // ldp x21, x22, [sp,#48] // ldp x19, x20, [sp,#32] // // For the case of non-zero spDelta, we assume the base of the callee-save registers to restore is at SP, and // the last restore adjusts SP by the specified amount. For example: // ldp x27, x28, [sp,#64] // ldp x25, x26, [sp,#48] // ldp x23, x24, [sp,#32] // ldp x21, x22, [sp,#16] // ldp x19, x20, [sp], #80 // // Note you call the unwind functions specifying the prolog operation that is being un-done. So, for example, when generating // a post-indexed load, you call the unwind function for specifying the corresponding preindexed store. // // Return Value: // None. void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, int lowestCalleeSavedOffset, int spDelta) { unsigned regsToRestoreCount = genCountBits(regsToRestoreMask); if (regsToRestoreCount == 0) { return; } assert(spDelta >= 0); assert((spDelta % 16) == 0); assert((regsToRestoreMask & RBM_FP) == 0); // we never restore FP here assert(regsToRestoreCount <= genCountBits(RBM_CALLEE_SAVED | RBM_LR)); // We also save LR, even though it is not in RBM_CALLEE_SAVED. regMaskTP maskRestoreRegsFloat = regsToRestoreMask & RBM_ALLFLOAT; regMaskTP maskRestoreRegsInt = regsToRestoreMask & ~maskRestoreRegsFloat; assert(REGSIZE_BYTES == FPSAVE_REGSIZE_BYTES); int spOffset = lowestCalleeSavedOffset + regsToRestoreCount * REGSIZE_BYTES; // Point past the end, to start. We predecrement to find the offset to load from. // We want to restore in the opposite order we saved, so the unwind codes match. Be careful to handle odd numbers of // callee-saved registers properly. if (maskRestoreRegsFloat != RBM_NONE) { // Restore the floating-point/SIMD registers unsigned floatRegsToRestoreCount = genCountBits(maskRestoreRegsFloat); while (maskRestoreRegsFloat != RBM_NONE) { if ((floatRegsToRestoreCount % 2) == 0) { assert(floatRegsToRestoreCount >= 2); regMaskTP reg2Mask = genFindHighestBit(maskRestoreRegsFloat); regNumber reg2 = genRegNumFromMask(reg2Mask); maskRestoreRegsFloat &= ~reg2Mask; regMaskTP reg1Mask = genFindHighestBit(maskRestoreRegsFloat); regNumber reg1 = genRegNumFromMask(reg1Mask); maskRestoreRegsFloat &= ~reg1Mask; spOffset -= 2 * FPSAVE_REGSIZE_BYTES; // Is this the last restore instruction? And have we've been told to adjust SP? bool thisIsTheLastRestoreInstruction = (floatRegsToRestoreCount == 2) && (maskRestoreRegsInt == RBM_NONE); genEpilogRestoreRegPair(reg1, reg2, spOffset, thisIsTheLastRestoreInstruction ? spDelta : 0, REG_IP0, nullptr); floatRegsToRestoreCount -= 2; } else { // We do the odd register first when restoring, last when saving. assert((floatRegsToRestoreCount % 2) == 1); regMaskTP reg1Mask = genFindHighestBit(maskRestoreRegsFloat); regNumber reg1 = genRegNumFromMask(reg1Mask); maskRestoreRegsFloat &= ~reg1Mask; spOffset -= FPSAVE_REGSIZE_BYTES; // Is this the last restore instruction? And have we've been told to adjust SP? bool thisIsTheLastRestoreInstruction = (floatRegsToRestoreCount == 1) && (maskRestoreRegsInt == RBM_NONE); genEpilogRestoreReg(reg1, spOffset, thisIsTheLastRestoreInstruction ? spDelta : 0, REG_IP0, nullptr); floatRegsToRestoreCount -= 1; } } assert(floatRegsToRestoreCount == 0); } if (maskRestoreRegsInt != RBM_NONE) { // Restore the integer registers unsigned intRegsToRestoreCount = genCountBits(maskRestoreRegsInt); while (maskRestoreRegsInt != RBM_NONE) { if ((intRegsToRestoreCount % 2) == 0) { assert(intRegsToRestoreCount >= 2); regMaskTP reg2Mask = genFindHighestBit(maskRestoreRegsInt); regNumber reg2 = genRegNumFromMask(reg2Mask); maskRestoreRegsInt &= ~reg2Mask; regMaskTP reg1Mask = genFindHighestBit(maskRestoreRegsInt); regNumber reg1 = genRegNumFromMask(reg1Mask); maskRestoreRegsInt &= ~reg1Mask; spOffset -= 2 * REGSIZE_BYTES; // Is this the last restore instruction? And have we've been told to adjust SP? bool thisIsTheLastRestoreInstruction = (intRegsToRestoreCount == 2); genEpilogRestoreRegPair(reg1, reg2, spOffset, thisIsTheLastRestoreInstruction ? spDelta : 0, REG_IP0, nullptr); intRegsToRestoreCount -= 2; } else { // We do the odd register first when restoring, last when saving. assert((intRegsToRestoreCount % 2) == 1); regMaskTP reg1Mask = genFindHighestBit(maskRestoreRegsInt); regNumber reg1 = genRegNumFromMask(reg1Mask); maskRestoreRegsInt &= ~reg1Mask; spOffset -= REGSIZE_BYTES; // Is this the last restore instruction? And have we've been told to adjust SP? bool thisIsTheLastRestoreInstruction = (intRegsToRestoreCount == 1); genEpilogRestoreReg(reg1, spOffset, thisIsTheLastRestoreInstruction ? spDelta : 0, REG_IP0, nullptr); intRegsToRestoreCount -= 1; } } assert(intRegsToRestoreCount == 0); } } /***************************************************************************** * * Generates code for an EH funclet prolog. * * Funclets have the following incoming arguments: * * catch: x0 = the exception object that was caught (see GT_CATCH_ARG) * filter: x0 = the exception object to filter (see GT_CATCH_ARG), x1 = CallerSP of the containing function * finally/fault: none * * Funclets set the following registers on exit: * * catch: x0 = the address at which execution should resume (see BBJ_EHCATCHRET) * filter: x0 = non-zero if the handler should handle the exception, zero otherwise (see GT_RETFILT) * finally/fault: none * * The ARM64 funclet prolog sequence is one of the following (Note: #framesz is total funclet frame size, * including everything; #outsz is outgoing argument space. #framesz must be a multiple of 16): * * Frame type 1: * For #outsz == 0 and #framesz <= 512: * stp fp,lr,[sp,-#framesz]! ; establish the frame, save FP/LR * stp x19,x20,[sp,#xxx] ; save callee-saved registers, as necessary * * The funclet frame is thus: * * | | * |-----------------------| * | incoming | * | arguments | * +=======================+ <---- Caller's SP * |Callee saved registers | // multiple of 8 bytes * |-----------------------| * | PSP slot | // 8 bytes * |-----------------------| * ~ alignment padding ~ // To make the whole frame 16 byte aligned. * |-----------------------| * | Saved FP, LR | // 16 bytes * |-----------------------| <---- Ambient SP * | | | * ~ | Stack grows ~ * | | downward | * V * * Frame type 2: * For #outsz != 0 and #framesz <= 512: * sub sp,sp,#framesz ; establish the frame * stp fp,lr,[sp,#outsz] ; save FP/LR. * stp x19,x20,[sp,#xxx] ; save callee-saved registers, as necessary * * The funclet frame is thus: * * | | * |-----------------------| * | incoming | * | arguments | * +=======================+ <---- Caller's SP * |Callee saved registers | // multiple of 8 bytes * |-----------------------| * | PSP slot | // 8 bytes * |-----------------------| * ~ alignment padding ~ // To make the whole frame 16 byte aligned. * |-----------------------| * | Saved FP, LR | // 16 bytes * |-----------------------| * | Outgoing arg space | // multiple of 8 bytes * |-----------------------| <---- Ambient SP * | | | * ~ | Stack grows ~ * | | downward | * V * * Frame type 3: * For #framesz > 512: * stp fp,lr,[sp,- (#framesz - #outsz)]! ; establish the frame, save FP/LR: note that it is guaranteed here that (#framesz - #outsz) <= 168 * stp x19,x20,[sp,#xxx] ; save callee-saved registers, as necessary * sub sp,sp,#outsz ; create space for outgoing argument space * * The funclet frame is thus: * * | | * |-----------------------| * | incoming | * | arguments | * +=======================+ <---- Caller's SP * |Callee saved registers | // multiple of 8 bytes * |-----------------------| * | PSP slot | // 8 bytes * |-----------------------| * ~ alignment padding ~ // To make the first SP subtraction 16 byte aligned * |-----------------------| * | Saved FP, LR | // 16 bytes * |-----------------------| * ~ alignment padding ~ // To make the whole frame 16 byte aligned (specifically, to 16-byte align the outgoing argument space). * |-----------------------| * | Outgoing arg space | // multiple of 8 bytes * |-----------------------| <---- Ambient SP * | | | * ~ | Stack grows ~ * | | downward | * V * * Both #1 and #2 only change SP once. That means that there will be a maximum of one alignment slot needed. For the general case, #3, * it is possible that we will need to add alignment to both changes to SP, leading to 16 bytes of alignment. Remember that the stack * pointer needs to be 16 byte aligned at all times. The size of the PSP slot plus callee-saved registers space is a maximum of 168 bytes: * (1 PSP slot + 12 integer registers + 8 FP/SIMD registers) * 8 bytes. The outgoing argument size, however, can be very large, if we call a * function that takes a large number of arguments (note that we currently use the same outgoing argument space size in the funclet as for the main * function, even if the funclet doesn't have any calls, or has a much smaller, or larger, maximum number of outgoing arguments for any call). * In that case, we need to 16-byte align the initial change to SP, before saving off the callee-saved registers and establishing the PSPsym, * so we can use the limited immediate offset encodings we have available, before doing another 16-byte aligned SP adjustment to create the * outgoing argument space. Both changes to SP might need to add alignment padding. * * Note that in all cases, the PSPSym is in exactly the same position with respect to Caller-SP, and that location is the same relative to Caller-SP * as in the main function. * * ; After this header, fill the PSP slot, for use by the VM (it gets reported with the GC info), or by code generation of nested filters. * ; This is not part of the "OS prolog"; it has no associated unwind data, and is not reversed in the funclet epilog. * * if (this is a filter funclet) * { * // x1 on entry to a filter funclet is CallerSP of the containing function: * // either the main function, or the funclet for a handler that this filter is dynamically nested within. * // Note that a filter can be dynamically nested within a funclet even if it is not statically within * // a funclet. Consider: * // * // try { * // try { * // throw new Exception(); * // } catch(Exception) { * // throw new Exception(); // The exception thrown here ... * // } * // } filter { // ... will be processed here, while the "catch" funclet frame is still on the stack * // } filter-handler { * // } * // * // Because of this, we need a PSP in the main function anytime a filter funclet doesn't know whether the enclosing frame will * // be a funclet or main function. We won't know any time there is a filter protecting nested EH. To simplify, we just always * // create a main function PSP for any function with a filter. * * ldr x1, [x1, #CallerSP_to_PSP_slot_delta] ; Load the CallerSP of the main function (stored in the PSP of the dynamically containing funclet or function) * str x1, [sp, #SP_to_PSP_slot_delta] ; store the PSP * add fp, x1, #Function_CallerSP_to_FP_delta ; re-establish the frame pointer * } * else * { * // This is NOT a filter funclet. The VM re-establishes the frame pointer on entry. * // TODO-ARM64-CQ: if VM set x1 to CallerSP on entry, like for filters, we could save an instruction. * * add x3, fp, #Function_FP_to_CallerSP_delta ; compute the CallerSP, given the frame pointer. x3 is scratch. * str x3, [sp, #SP_to_PSP_slot_delta] ; store the PSP * } * * An example epilog sequence is then: * * add sp,sp,#outsz ; if any outgoing argument space * ... ; restore callee-saved registers * ldp x19,x20,[sp,#xxx] * ldp fp,lr,[sp],#framesz * ret lr * * The funclet frame is thus: * * | | * |-----------------------| * | incoming | * | arguments | * +=======================+ <---- Caller's SP * |Callee saved registers | // multiple of 8 bytes * |-----------------------| * | PSP slot | // 8 bytes * |-----------------------| * | Saved FP, LR | // 16 bytes * |-----------------------| * ~ alignment padding ~ // To make the whole frame 16 byte aligned. * |-----------------------| * | Outgoing arg space | // multiple of 8 bytes * |-----------------------| <---- Ambient SP * | | | * ~ | Stack grows ~ * | | downward | * V */ void CodeGen::genFuncletProlog(BasicBlock* block) { #ifdef DEBUG if (verbose) printf("*************** In genFuncletProlog()\n"); #endif assert(block != NULL); assert(block->bbFlags && BBF_FUNCLET_BEG); ScopedSetVariable _setGeneratingProlog(&compiler->compGeneratingProlog, true); gcInfo.gcResetForBB(); compiler->unwindBegProlog(); regMaskTP maskSaveRegsFloat = genFuncletInfo.fiSaveRegs & RBM_ALLFLOAT; regMaskTP maskSaveRegsInt = genFuncletInfo.fiSaveRegs & ~maskSaveRegsFloat; // Funclets must always save LR and FP, since when we have funclets we must have an FP frame. assert((maskSaveRegsInt & RBM_LR) != 0); assert((maskSaveRegsInt & RBM_FP) != 0); bool isFilter = (block->bbCatchTyp == BBCT_FILTER); regMaskTP maskArgRegsLiveIn; if (isFilter) { maskArgRegsLiveIn = RBM_R0 | RBM_R1; } else if ((block->bbCatchTyp == BBCT_FINALLY) || (block->bbCatchTyp == BBCT_FAULT)) { maskArgRegsLiveIn = RBM_NONE; } else { maskArgRegsLiveIn = RBM_R0; } int lowestCalleeSavedOffset = genFuncletInfo.fiSP_to_CalleeSave_delta; if (genFuncletInfo.fiFrameType == 1) { getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, genFuncletInfo.fiSpDelta1, INS_OPTS_PRE_INDEX); compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, genFuncletInfo.fiSpDelta1); assert(genFuncletInfo.fiSpDelta2 == 0); assert(genFuncletInfo.fiSP_to_FPLR_save_delta == 0); } else if (genFuncletInfo.fiFrameType == 2) { getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, -genFuncletInfo.fiSpDelta1); compiler->unwindAllocStack(-genFuncletInfo.fiSpDelta1); assert(genFuncletInfo.fiSpDelta2 == 0); getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, genFuncletInfo.fiSP_to_FPLR_save_delta); compiler->unwindSaveRegPair(REG_FP, REG_LR, genFuncletInfo.fiSP_to_FPLR_save_delta); } else { assert(genFuncletInfo.fiFrameType == 3); getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, genFuncletInfo.fiSpDelta1, INS_OPTS_PRE_INDEX); compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, genFuncletInfo.fiSpDelta1); lowestCalleeSavedOffset += genFuncletInfo.fiSpDelta2; // We haven't done the second adjustment of SP yet. } maskSaveRegsInt &= ~(RBM_LR | RBM_FP); // We've saved these now genSaveCalleeSavedRegistersHelp(maskSaveRegsInt | maskSaveRegsFloat, lowestCalleeSavedOffset, 0); if (genFuncletInfo.fiFrameType == 3) { assert(genFuncletInfo.fiSpDelta2 != 0); getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, -genFuncletInfo.fiSpDelta2); compiler->unwindAllocStack(-genFuncletInfo.fiSpDelta2); } // This is the end of the OS-reported prolog for purposes of unwinding compiler->unwindEndProlog(); if (isFilter) { // This is the first block of a filter getEmitter()->emitIns_R_R_I(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_R1, REG_R1, genFuncletInfo.fiCallerSP_to_PSP_slot_delta); regTracker.rsTrackRegTrash(REG_R1); getEmitter()->emitIns_R_R_I(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_R1, REG_SPBASE, genFuncletInfo.fiSP_to_PSP_slot_delta); getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_FPBASE, REG_R1, genFuncletInfo.fiFunction_CallerSP_to_FP_delta); } else { // This is a non-filter funclet getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_R3, REG_FPBASE, -genFuncletInfo.fiFunction_CallerSP_to_FP_delta); regTracker.rsTrackRegTrash(REG_R3); getEmitter()->emitIns_R_R_I(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_R3, REG_SPBASE, genFuncletInfo.fiSP_to_PSP_slot_delta); } } /***************************************************************************** * * Generates code for an EH funclet epilog. */ void CodeGen::genFuncletEpilog() { #ifdef DEBUG if (verbose) printf("*************** In genFuncletEpilog()\n"); #endif ScopedSetVariable _setGeneratingEpilog(&compiler->compGeneratingEpilog, true); bool unwindStarted = false; if (!unwindStarted) { // We can delay this until we know we'll generate an unwindable instruction, if necessary. compiler->unwindBegEpilog(); unwindStarted = true; } regMaskTP maskRestoreRegsFloat = genFuncletInfo.fiSaveRegs & RBM_ALLFLOAT; regMaskTP maskRestoreRegsInt = genFuncletInfo.fiSaveRegs & ~maskRestoreRegsFloat; // Funclets must always save LR and FP, since when we have funclets we must have an FP frame. assert((maskRestoreRegsInt & RBM_LR) != 0); assert((maskRestoreRegsInt & RBM_FP) != 0); maskRestoreRegsInt &= ~(RBM_LR | RBM_FP); // We restore FP/LR at the end int lowestCalleeSavedOffset = genFuncletInfo.fiSP_to_CalleeSave_delta; if (genFuncletInfo.fiFrameType == 3) { assert(genFuncletInfo.fiSpDelta2 != 0); getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, -genFuncletInfo.fiSpDelta2); compiler->unwindAllocStack(-genFuncletInfo.fiSpDelta2); lowestCalleeSavedOffset += genFuncletInfo.fiSpDelta2; } regMaskTP regsToRestoreMask = maskRestoreRegsInt | maskRestoreRegsFloat; genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, lowestCalleeSavedOffset, 0); if (genFuncletInfo.fiFrameType == 1) { getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, -genFuncletInfo.fiSpDelta1, INS_OPTS_POST_INDEX); compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, genFuncletInfo.fiSpDelta1); assert(genFuncletInfo.fiSpDelta2 == 0); assert(genFuncletInfo.fiSP_to_FPLR_save_delta == 0); } else if (genFuncletInfo.fiFrameType == 2) { getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, genFuncletInfo.fiSP_to_FPLR_save_delta); compiler->unwindSaveRegPair(REG_FP, REG_LR, genFuncletInfo.fiSP_to_FPLR_save_delta); getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, -genFuncletInfo.fiSpDelta1); compiler->unwindAllocStack(-genFuncletInfo.fiSpDelta1); assert(genFuncletInfo.fiSpDelta2 == 0); } else { assert(genFuncletInfo.fiFrameType == 3); getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, -genFuncletInfo.fiSpDelta1, INS_OPTS_POST_INDEX); compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, genFuncletInfo.fiSpDelta1); } inst_RV(INS_ret, REG_LR, TYP_I_IMPL); compiler->unwindReturn(REG_LR); compiler->unwindEndEpilog(); } /***************************************************************************** * * Capture the information used to generate the funclet prologs and epilogs. * Note that all funclet prologs are identical, and all funclet epilogs are * identical (per type: filters are identical, and non-filters are identical). * Thus, we compute the data used for these just once. * * See genFuncletProlog() for more information about the prolog/epilog sequences. */ void CodeGen::genCaptureFuncletPrologEpilogInfo() { if (!compiler->ehAnyFunclets()) return; assert(isFramePointerUsed()); assert(compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT); // The frame size and offsets must be finalized genFuncletInfo.fiFunction_CallerSP_to_FP_delta = genCallerSPtoFPdelta(); regMaskTP rsMaskSaveRegs = regSet.rsMaskCalleeSaved; assert((rsMaskSaveRegs & RBM_LR) != 0); assert((rsMaskSaveRegs & RBM_FP) != 0); unsigned saveRegsCount = genCountBits(rsMaskSaveRegs); unsigned saveRegsPlusPSPSize = saveRegsCount * REGSIZE_BYTES + /* PSPSym */ REGSIZE_BYTES; unsigned saveRegsPlusPSPSizeAligned = (unsigned)roundUp(saveRegsPlusPSPSize, STACK_ALIGN); assert(compiler->lvaOutgoingArgSpaceSize % REGSIZE_BYTES == 0); unsigned outgoingArgSpaceAligned = (unsigned)roundUp(compiler->lvaOutgoingArgSpaceSize, STACK_ALIGN); unsigned maxFuncletFrameSizeAligned = saveRegsPlusPSPSizeAligned + outgoingArgSpaceAligned; assert((maxFuncletFrameSizeAligned % STACK_ALIGN) == 0); int SP_to_FPLR_save_delta; int SP_to_PSP_slot_delta; int CallerSP_to_PSP_slot_delta; if (maxFuncletFrameSizeAligned <= 512) { unsigned funcletFrameSize = saveRegsPlusPSPSize + compiler->lvaOutgoingArgSpaceSize; unsigned funcletFrameSizeAligned = (unsigned)roundUp(funcletFrameSize, STACK_ALIGN); assert(funcletFrameSizeAligned <= maxFuncletFrameSizeAligned); unsigned funcletFrameAlignmentPad = funcletFrameSizeAligned - funcletFrameSize; assert((funcletFrameAlignmentPad == 0) || (funcletFrameAlignmentPad == REGSIZE_BYTES)); SP_to_FPLR_save_delta = compiler->lvaOutgoingArgSpaceSize; SP_to_PSP_slot_delta = SP_to_FPLR_save_delta + 2 /* FP, LR */ * REGSIZE_BYTES + funcletFrameAlignmentPad; CallerSP_to_PSP_slot_delta = -(int)(saveRegsPlusPSPSize - 2 /* FP, LR */ * REGSIZE_BYTES); if (compiler->lvaOutgoingArgSpaceSize == 0) { genFuncletInfo.fiFrameType = 1; } else { genFuncletInfo.fiFrameType = 2; } genFuncletInfo.fiSpDelta1 = -(int)funcletFrameSizeAligned; genFuncletInfo.fiSpDelta2 = 0; assert(genFuncletInfo.fiSpDelta1 + genFuncletInfo.fiSpDelta2 == -(int)funcletFrameSizeAligned); } else { unsigned saveRegsPlusPSPAlignmentPad = saveRegsPlusPSPSizeAligned - saveRegsPlusPSPSize; assert((saveRegsPlusPSPAlignmentPad == 0) || (saveRegsPlusPSPAlignmentPad == REGSIZE_BYTES)); SP_to_FPLR_save_delta = outgoingArgSpaceAligned; SP_to_PSP_slot_delta = SP_to_FPLR_save_delta + 2 /* FP, LR */ * REGSIZE_BYTES + saveRegsPlusPSPAlignmentPad; CallerSP_to_PSP_slot_delta = -(int)(saveRegsPlusPSPSizeAligned - 2 /* FP, LR */ * REGSIZE_BYTES - saveRegsPlusPSPAlignmentPad); genFuncletInfo.fiFrameType = 3; genFuncletInfo.fiSpDelta1 = -(int)saveRegsPlusPSPSizeAligned; genFuncletInfo.fiSpDelta2 = -(int)outgoingArgSpaceAligned; assert(genFuncletInfo.fiSpDelta1 + genFuncletInfo.fiSpDelta2 == -(int)maxFuncletFrameSizeAligned); } /* Now save it for future use */ genFuncletInfo.fiSaveRegs = rsMaskSaveRegs; genFuncletInfo.fiSP_to_FPLR_save_delta = SP_to_FPLR_save_delta; genFuncletInfo.fiSP_to_PSP_slot_delta = SP_to_PSP_slot_delta; genFuncletInfo.fiSP_to_CalleeSave_delta = SP_to_PSP_slot_delta + REGSIZE_BYTES; genFuncletInfo.fiCallerSP_to_PSP_slot_delta = CallerSP_to_PSP_slot_delta; #ifdef DEBUG if (verbose) { printf("\n"); printf("Funclet prolog / epilog info\n"); printf(" Save regs: "); dspRegMask(genFuncletInfo.fiSaveRegs); printf("\n"); printf(" Function CallerSP-to-FP delta: %d\n", genFuncletInfo.fiFunction_CallerSP_to_FP_delta); printf(" SP to FP/LR save location delta: %d\n", genFuncletInfo.fiSP_to_FPLR_save_delta); printf(" SP to PSP slot delta: %d\n", genFuncletInfo.fiSP_to_PSP_slot_delta); printf(" SP to callee-saved area delta: %d\n", genFuncletInfo.fiSP_to_CalleeSave_delta); printf(" Caller SP to PSP slot delta: %d\n", genFuncletInfo.fiCallerSP_to_PSP_slot_delta); printf(" Frame type: %d\n", genFuncletInfo.fiFrameType); printf(" SP delta 1: %d\n", genFuncletInfo.fiSpDelta1); printf(" SP delta 2: %d\n", genFuncletInfo.fiSpDelta2); if (CallerSP_to_PSP_slot_delta != compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)) // for debugging { printf("lvaGetCallerSPRelativeOffset(lvaPSPSym): %d\n", compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)); } } #endif // DEBUG assert(genFuncletInfo.fiSP_to_FPLR_save_delta >= 0); assert(genFuncletInfo.fiSP_to_PSP_slot_delta >= 0); assert(genFuncletInfo.fiSP_to_CalleeSave_delta >= 0); assert(genFuncletInfo.fiCallerSP_to_PSP_slot_delta <= 0); assert(compiler->lvaPSPSym != BAD_VAR_NUM); assert(genFuncletInfo.fiCallerSP_to_PSP_slot_delta == compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)); // same offset used in main function and funclet! } /* XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XX XX XX End Prolog / Epilog XX XX XX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX */ // Get the register assigned to the given node regNumber CodeGenInterface::genGetAssignedReg(GenTreePtr tree) { return tree->gtRegNum; } //------------------------------------------------------------------------ // genSpillVar: Spill a local variable // // Arguments: // tree - the lclVar node for the variable being spilled // // Return Value: // None. // // Assumptions: // The lclVar must be a register candidate (lvRegCandidate) void CodeGen::genSpillVar(GenTreePtr tree) { unsigned varNum = tree->gtLclVarCommon.gtLclNum; LclVarDsc * varDsc = &(compiler->lvaTable[varNum]); assert(varDsc->lvIsRegCandidate()); // We don't actually need to spill if it is already living in memory bool needsSpill = ((tree->gtFlags & GTF_VAR_DEF) == 0 && varDsc->lvIsInReg()); if (needsSpill) { var_types lclTyp = varDsc->TypeGet(); if (varDsc->lvNormalizeOnStore()) lclTyp = genActualType(lclTyp); emitAttr size = emitTypeSize(lclTyp); bool restoreRegVar = false; if (tree->gtOper == GT_REG_VAR) { tree->SetOper(GT_LCL_VAR); restoreRegVar = true; } // mask off the flag to generate the right spill code, then bring it back tree->gtFlags &= ~GTF_REG_VAL; instruction storeIns = ins_Store(tree->TypeGet(), compiler->isSIMDTypeLocalAligned(varNum)); if (varTypeIsMultiReg(tree)) { assert(varDsc->lvRegNum == genRegPairLo(tree->gtRegPair)); assert(varDsc->lvOtherReg == genRegPairHi(tree->gtRegPair)); regNumber regLo = genRegPairLo(tree->gtRegPair); regNumber regHi = genRegPairHi(tree->gtRegPair); inst_TT_RV(storeIns, tree, regLo); inst_TT_RV(storeIns, tree, regHi, 4); } else { assert(varDsc->lvRegNum == tree->gtRegNum); inst_TT_RV(storeIns, tree, tree->gtRegNum, 0, size); } tree->gtFlags |= GTF_REG_VAL; if (restoreRegVar) { tree->SetOper(GT_REG_VAR); } genUpdateRegLife(varDsc, /*isBorn*/ false, /*isDying*/ true DEBUGARG(tree)); gcInfo.gcMarkRegSetNpt(varDsc->lvRegMask()); if (VarSetOps::IsMember(compiler, gcInfo.gcTrkStkPtrLcls, varDsc->lvVarIndex)) { #ifdef DEBUG if (!VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex)) { JITDUMP("\t\t\t\t\t\t\tVar V%02u becoming live\n", varNum); } else { JITDUMP("\t\t\t\t\t\t\tVar V%02u continuing live\n", varNum); } #endif VarSetOps::AddElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex); } } tree->gtFlags &= ~GTF_SPILL; varDsc->lvRegNum = REG_STK; if (varTypeIsMultiReg(tree)) { varDsc->lvOtherReg = REG_STK; } } // inline void CodeGenInterface::genUpdateVarReg(LclVarDsc * varDsc, GenTreePtr tree) { assert(tree->OperIsScalarLocal() || (tree->gtOper == GT_COPY)); varDsc->lvRegNum = tree->gtRegNum; } /*****************************************************************************/ /*****************************************************************************/ /***************************************************************************** * * Generate code that will set the given register to the integer constant. */ void CodeGen::genSetRegToIcon(regNumber reg, ssize_t val, var_types type, insFlags flags) { // Reg cannot be a FP reg assert(!genIsValidFloatReg(reg)); // The only TYP_REF constant that can come this path is a managed 'null' since it is not // relocatable. Other ref type constants (e.g. string objects) go through a different // code path. noway_assert(type != TYP_REF || val == 0); instGen_Set_Reg_To_Imm(emitActualTypeSize(type), reg, val, flags); } /***************************************************************************** * * Generate code to check that the GS cookie wasn't thrashed by a buffer * overrun. On ARM64 we always use REG_TMP_0 and REG_TMP_1 as temp registers * and this works fine in the case of tail calls * Implementation Note: pushReg = true, in case of tail calls. */ void CodeGen::genEmitGSCookieCheck(bool pushReg) { noway_assert(compiler->gsGlobalSecurityCookieAddr || compiler->gsGlobalSecurityCookieVal); // Make sure that the return register is reported as live GC-ref so that any GC that kicks in while // executing GS cookie check will not collect the object pointed to by REG_INTRET (R0). if (!pushReg && (compiler->info.compRetType == TYP_REF)) gcInfo.gcRegGCrefSetCur |= RBM_INTRET; regNumber regGSConst = REG_TMP_0; regNumber regGSValue = REG_TMP_1; if (compiler->gsGlobalSecurityCookieAddr == nullptr) { // load the GS cookie constant into a reg // genSetRegToIcon(regGSConst, compiler->gsGlobalSecurityCookieVal, TYP_I_IMPL); } else { // Ngen case - GS cookie constant needs to be accessed through an indirection. instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, regGSConst, (ssize_t)compiler->gsGlobalSecurityCookieAddr); getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regGSConst, regGSConst, 0); } // Load this method's GS value from the stack frame getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regGSValue, compiler->lvaGSSecurityCookie, 0); // Compare with the GC cookie constant getEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, regGSConst, regGSValue); BasicBlock *gsCheckBlk = genCreateTempLabel(); inst_JMP(genJumpKindForOper(GT_EQ, true), gsCheckBlk); genEmitHelperCall(CORINFO_HELP_FAIL_FAST, 0, EA_UNKNOWN); genDefineTempLabel(gsCheckBlk); } /***************************************************************************** * * Generate code for all the basic blocks in the function. */ void CodeGen::genCodeForBBlist() { unsigned varNum; LclVarDsc * varDsc; unsigned savedStkLvl; #ifdef DEBUG genInterruptibleUsed = true; unsigned stmtNum = 0; UINT64 totalCostEx = 0; UINT64 totalCostSz = 0; // You have to be careful if you create basic blocks from now on compiler->fgSafeBasicBlockCreation = false; // This stress mode is not comptible with fully interruptible GC if (genInterruptible && compiler->opts.compStackCheckOnCall) { compiler->opts.compStackCheckOnCall = false; } // This stress mode is not comptible with fully interruptible GC if (genInterruptible && compiler->opts.compStackCheckOnRet) { compiler->opts.compStackCheckOnRet = false; } #endif // DEBUG // Prepare the blocks for exception handling codegen: mark the blocks that needs labels. genPrepForEHCodegen(); assert(!compiler->fgFirstBBScratch || compiler->fgFirstBB == compiler->fgFirstBBScratch); // compiler->fgFirstBBScratch has to be first. /* Initialize the spill tracking logic */ regSet.rsSpillBeg(); /* Initialize the line# tracking logic */ #ifdef DEBUGGING_SUPPORT if (compiler->opts.compScopeInfo) { siInit(); } #endif // The current implementation of switch tables requires the first block to have a label so it // can generate offsets to the switch label targets. // TODO-ARM64-CQ: remove this when switches have been re-implemented to not use this. if (compiler->fgHasSwitch) { compiler->fgFirstBB->bbFlags |= BBF_JMP_TARGET; } genPendingCallLabel = nullptr; /* Initialize the pointer tracking code */ gcInfo.gcRegPtrSetInit(); gcInfo.gcVarPtrSetInit(); /* If any arguments live in registers, mark those regs as such */ for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++ , varDsc++) { /* Is this variable a parameter assigned to a register? */ if (!varDsc->lvIsParam || !varDsc->lvRegister) continue; /* Is the argument live on entry to the method? */ if (!VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex)) continue; /* Is this a floating-point argument? */ if (varDsc->IsFloatRegType()) continue; noway_assert(!varTypeIsFloating(varDsc->TypeGet())); /* Mark the register as holding the variable */ regTracker.rsTrackRegLclVar(varDsc->lvRegNum, varNum); } unsigned finallyNesting = 0; // Make sure a set is allocated for compiler->compCurLife (in the long case), so we can set it to empty without // allocation at the start of each basic block. VarSetOps::AssignNoCopy(compiler, compiler->compCurLife, VarSetOps::MakeEmpty(compiler)); /*------------------------------------------------------------------------- * * Walk the basic blocks and generate code for each one * */ BasicBlock * block; BasicBlock * lblk; /* previous block */ for (lblk = NULL, block = compiler->fgFirstBB; block != NULL; lblk = block, block = block->bbNext) { #ifdef DEBUG if (compiler->verbose) { printf("\n=============== Generating "); block->dspBlockHeader(compiler, true, true); compiler->fgDispBBLiveness(block); } #endif // DEBUG /* Figure out which registers hold variables on entry to this block */ regSet.rsMaskVars = RBM_NONE; gcInfo.gcRegGCrefSetCur = RBM_NONE; gcInfo.gcRegByrefSetCur = RBM_NONE; compiler->m_pLinearScan->recordVarLocationsAtStartOfBB(block); genUpdateLife(block->bbLiveIn); // Even if liveness didn't change, we need to update the registers containing GC references. // genUpdateLife will update the registers live due to liveness changes. But what about registers that didn't change? // We cleared them out above. Maybe we should just not clear them out, but update the ones that change here. // That would require handling the changes in recordVarLocationsAtStartOfBB(). regMaskTP newLiveRegSet = RBM_NONE; regMaskTP newRegGCrefSet = RBM_NONE; regMaskTP newRegByrefSet = RBM_NONE; #ifdef DEBUG VARSET_TP VARSET_INIT_NOCOPY(removedGCVars, VarSetOps::MakeEmpty(compiler)); VARSET_TP VARSET_INIT_NOCOPY(addedGCVars, VarSetOps::MakeEmpty(compiler)); #endif VARSET_ITER_INIT(compiler, iter, block->bbLiveIn, varIndex); while (iter.NextElem(compiler, &varIndex)) { unsigned varNum = compiler->lvaTrackedToVarNum[varIndex]; LclVarDsc* varDsc = &(compiler->lvaTable[varNum]); if (varDsc->lvIsInReg()) { newLiveRegSet |= varDsc->lvRegMask(); if (varDsc->lvType == TYP_REF) { newRegGCrefSet |= varDsc->lvRegMask(); } else if (varDsc->lvType == TYP_BYREF) { newRegByrefSet |= varDsc->lvRegMask(); } #ifdef DEBUG if (verbose && VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varIndex)) { VarSetOps::AddElemD(compiler, removedGCVars, varIndex); } #endif DEBUG VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varIndex); } else if (compiler->lvaIsGCTracked(varDsc)) { #ifdef DEBUG if (verbose && !VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varIndex)) { VarSetOps::AddElemD(compiler, addedGCVars, varIndex); } #endif DEBUG VarSetOps::AddElemD(compiler, gcInfo.gcVarPtrSetCur, varIndex); } } #ifdef DEBUG if (compiler->verbose) { printf("\t\t\t\t\t\t\tLive regs: "); if (regSet.rsMaskVars == newLiveRegSet) { printf("(unchanged) "); } else { printRegMaskInt(regSet.rsMaskVars); compiler->getEmitter()->emitDispRegSet(regSet.rsMaskVars); printf(" => "); } printRegMaskInt(newLiveRegSet); compiler->getEmitter()->emitDispRegSet(newLiveRegSet); printf("\n"); if (!VarSetOps::IsEmpty(compiler, addedGCVars)) { printf("\t\t\t\t\t\t\tAdded GCVars: "); dumpConvertedVarSet(compiler, addedGCVars); printf("\n"); } if (!VarSetOps::IsEmpty(compiler, removedGCVars)) { printf("\t\t\t\t\t\t\tRemoved GCVars: "); dumpConvertedVarSet(compiler, removedGCVars); printf("\n"); } } #endif // DEBUG regSet.rsMaskVars = newLiveRegSet; gcInfo.gcMarkRegSetGCref(newRegGCrefSet DEBUG_ARG(true)); gcInfo.gcMarkRegSetByref(newRegByrefSet DEBUG_ARG(true)); /* Blocks with handlerGetsXcptnObj()==true use GT_CATCH_ARG to represent the exception object (TYP_REF). We mark REG_EXCEPTION_OBJECT as holding a GC object on entry to the block, it will be the first thing evaluated (thanks to GTF_ORDER_SIDEEFF). */ if (handlerGetsXcptnObj(block->bbCatchTyp)) { #if JIT_FEATURE_SSA_SKIP_DEFS GenTreePtr firstStmt = block->FirstNonPhiDef(); #else GenTreePtr firstStmt = block->bbTreeList; #endif if (firstStmt != NULL) { GenTreePtr firstTree = firstStmt->gtStmt.gtStmtExpr; if (compiler->gtHasCatchArg(firstTree)) { gcInfo.gcMarkRegSetGCref(RBM_EXCEPTION_OBJECT); } } } /* Start a new code output block */ genUpdateCurrentFunclet(block); #ifdef _TARGET_XARCH_ if (genAlignLoops && block->bbFlags & BBF_LOOP_HEAD) { getEmitter()->emitLoopAlign(); } #endif #ifdef DEBUG if (compiler->opts.dspCode) printf("\n L_M%03u_BB%02u:\n", Compiler::s_compMethodsCount, block->bbNum); #endif block->bbEmitCookie = NULL; if (block->bbFlags & (BBF_JMP_TARGET|BBF_HAS_LABEL)) { /* Mark a label and update the current set of live GC refs */ block->bbEmitCookie = getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, FALSE); } if (block == compiler->fgFirstColdBlock) { #ifdef DEBUG if (compiler->verbose) { printf("\nThis is the start of the cold region of the method\n"); } #endif // We should never have a block that falls through into the Cold section noway_assert(!lblk->bbFallsThrough()); // We require the block that starts the Cold section to have a label noway_assert(block->bbEmitCookie); getEmitter()->emitSetFirstColdIGCookie(block->bbEmitCookie); } /* Both stacks are always empty on entry to a basic block */ genStackLevel = 0; savedStkLvl = genStackLevel; /* Tell everyone which basic block we're working on */ compiler->compCurBB = block; #ifdef DEBUGGING_SUPPORT siBeginBlock(block); // BBF_INTERNAL blocks don't correspond to any single IL instruction. if (compiler->opts.compDbgInfo && (block->bbFlags & BBF_INTERNAL) && !compiler->fgBBisScratch(block)) // If the block is the distinguished first scratch block, then no need to emit a NO_MAPPING entry, immediately after the prolog. { genIPmappingAdd((IL_OFFSETX) ICorDebugInfo::NO_MAPPING, true); } bool firstMapping = true; #endif // DEBUGGING_SUPPORT /*--------------------------------------------------------------------- * * Generate code for each statement-tree in the block * */ if (block->bbFlags & BBF_FUNCLET_BEG) { genReserveFuncletProlog(block); } for (GenTreePtr stmt = block->FirstNonPhiDef(); stmt; stmt = stmt->gtNext) { noway_assert(stmt->gtOper == GT_STMT); if (stmt->AsStmt()->gtStmtIsEmbedded()) continue; /* Get hold of the statement tree */ GenTreePtr tree = stmt->gtStmt.gtStmtExpr; #if defined(DEBUGGING_SUPPORT) /* Do we have a new IL-offset ? */ if (stmt->gtStmt.gtStmtILoffsx != BAD_IL_OFFSET) { /* Create and append a new IP-mapping entry */ genIPmappingAdd(stmt->gtStmt.gtStmt.gtStmtILoffsx, firstMapping); firstMapping = false; } #endif // DEBUGGING_SUPPORT #ifdef DEBUG noway_assert(stmt->gtStmt.gtStmtLastILoffs <= compiler->info.compILCodeSize || stmt->gtStmt.gtStmtLastILoffs == BAD_IL_OFFSET); if (compiler->opts.dspCode && compiler->opts.dspInstrs && stmt->gtStmt.gtStmtLastILoffs != BAD_IL_OFFSET) { while (genCurDispOffset <= stmt->gtStmt.gtStmtLastILoffs) { genCurDispOffset += dumpSingleInstr(compiler->info.compCode, genCurDispOffset, "> "); } } stmtNum++; if (compiler->verbose) { printf("\nGenerating BB%02u, stmt %u\t\t", block->bbNum, stmtNum); printf("Holding variables: "); dspRegMask(regSet.rsMaskVars); printf("\n\n"); if (compiler->verboseTrees) { compiler->gtDispTree(compiler->opts.compDbgInfo ? stmt : tree); printf("\n"); } } totalCostEx += ((UINT64)stmt->gtCostEx * block->getBBWeight(compiler)); totalCostSz += (UINT64) stmt->gtCostSz; #endif // DEBUG // Traverse the tree in linear order, generating code for each node in the // tree as we encounter it compiler->compCurLifeTree = NULL; compiler->compCurStmt = stmt; for (GenTreePtr treeNode = stmt->gtStmt.gtStmtList; treeNode != NULL; treeNode = treeNode->gtNext) { genCodeForTreeNode(treeNode); if (treeNode->gtHasReg() && treeNode->gtLsraInfo.isLocalDefUse) { genConsumeReg(treeNode); } } regSet.rsSpillChk(); #ifdef DEBUG /* Make sure we didn't bungle pointer register tracking */ regMaskTP ptrRegs = (gcInfo.gcRegGCrefSetCur|gcInfo.gcRegByrefSetCur); regMaskTP nonVarPtrRegs = ptrRegs & ~regSet.rsMaskVars; // If return is a GC-type, clear it. Note that if a common // epilog is generated (genReturnBB) it has a void return // even though we might return a ref. We can't use the compRetType // as the determiner because something we are tracking as a byref // might be used as a return value of a int function (which is legal) if (tree->gtOper == GT_RETURN && (varTypeIsGC(compiler->info.compRetType) || (tree->gtOp.gtOp1 != 0 && varTypeIsGC(tree->gtOp.gtOp1->TypeGet())))) { nonVarPtrRegs &= ~RBM_INTRET; } // When profiling, the first statement in a catch block will be the // harmless "inc" instruction (does not interfere with the exception // object). if ((compiler->opts.eeFlags & CORJIT_FLG_BBINSTR) && (stmt == block->bbTreeList) && handlerGetsXcptnObj(block->bbCatchTyp)) { nonVarPtrRegs &= ~RBM_EXCEPTION_OBJECT; } if (nonVarPtrRegs) { printf("Regset after tree="); compiler->printTreeID(tree); printf(" BB%02u gcr=", block->bbNum); printRegMaskInt(gcInfo.gcRegGCrefSetCur & ~regSet.rsMaskVars); compiler->getEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur & ~regSet.rsMaskVars); printf(", byr="); printRegMaskInt(gcInfo.gcRegByrefSetCur & ~regSet.rsMaskVars); compiler->getEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur & ~regSet.rsMaskVars); printf(", regVars="); printRegMaskInt(regSet.rsMaskVars); compiler->getEmitter()->emitDispRegSet(regSet.rsMaskVars); printf("\n"); } noway_assert(nonVarPtrRegs == 0); for (GenTree * node = stmt->gtStmt.gtStmtList; node; node=node->gtNext) { assert(!(node->gtFlags & GTF_SPILL)); } #endif // DEBUG noway_assert(stmt->gtOper == GT_STMT); #ifdef DEBUGGING_SUPPORT genEnsureCodeEmitted(stmt->gtStmt.gtStmtILoffsx); #endif } //-------- END-FOR each statement-tree of the current block --------- #if defined(DEBUG) && defined(_TARGET_ARM64_) if (block->bbNext == nullptr) { // Unit testing of the ARM64 emitter: generate a bunch of instructions into the last block // (it's as good as any, but better than the prolog, which can only be a single instruction // group) then use COMPLUS_JitLateDisasm=* to see if the late disassembler // thinks the instructions are the same as we do. genArm64EmitterUnitTests(); } #endif // defined(DEBUG) && defined(_TARGET_ARM64_) #ifdef DEBUGGING_SUPPORT if (compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0)) { siEndBlock(block); /* Is this the last block, and are there any open scopes left ? */ bool isLastBlockProcessed = (block->bbNext == NULL); if (block->isBBCallAlwaysPair()) { isLastBlockProcessed = (block->bbNext->bbNext == NULL); } if (isLastBlockProcessed && siOpenScopeList.scNext) { /* This assert no longer holds, because we may insert a throw block to demarcate the end of a try or finally region when they are at the end of the method. It would be nice if we could fix our code so that this throw block will no longer be necessary. */ //noway_assert(block->bbCodeOffsEnd != compiler->info.compILCodeSize); siCloseAllOpenScopes(); } } #endif // DEBUGGING_SUPPORT genStackLevel -= savedStkLvl; #ifdef DEBUG // compCurLife should be equal to the liveOut set, except that we don't keep // it up to date for vars that are not register candidates // (it would be nice to have a xor set function) VARSET_TP VARSET_INIT_NOCOPY(extraLiveVars, VarSetOps::Diff(compiler, block->bbLiveOut, compiler->compCurLife)); VarSetOps::UnionD(compiler, extraLiveVars, VarSetOps::Diff(compiler, compiler->compCurLife, block->bbLiveOut)); VARSET_ITER_INIT(compiler, extraLiveVarIter, extraLiveVars, extraLiveVarIndex); while (extraLiveVarIter.NextElem(compiler, &extraLiveVarIndex)) { unsigned varNum = compiler->lvaTrackedToVarNum[extraLiveVarIndex]; LclVarDsc * varDsc = compiler->lvaTable + varNum; assert(!varDsc->lvIsRegCandidate()); } #endif /* Both stacks should always be empty on exit from a basic block */ noway_assert(genStackLevel == 0); #if 0 // On AMD64, we need to generate a NOP after a call that is the last instruction of the block, in several // situations, to support proper exception handling semantics. This is mostly to ensure that when the stack // walker computes an instruction pointer for a frame, that instruction pointer is in the correct EH region. // The document "X64 and ARM ABIs.docx" has more details. The situations: // 1. If the call instruction is in a different EH region as the instruction that follows it. // 2. If the call immediately precedes an OS epilog. (Note that what the JIT or VM consider an epilog might // be slightly different from what the OS considers an epilog, and it is the OS-reported epilog that matters here.) // We handle case #1 here, and case #2 in the emitter. if (getEmitter()->emitIsLastInsCall()) { // Ok, the last instruction generated is a call instruction. Do any of the other conditions hold? // Note: we may be generating a few too many NOPs for the case of call preceding an epilog. Technically, // if the next block is a BBJ_RETURN, an epilog will be generated, but there may be some instructions // generated before the OS epilog starts, such as a GS cookie check. if ((block->bbNext == nullptr) || !BasicBlock::sameEHRegion(block, block->bbNext)) { // We only need the NOP if we're not going to generate any more code as part of the block end. switch (block->bbJumpKind) { case BBJ_ALWAYS: case BBJ_THROW: case BBJ_CALLFINALLY: case BBJ_EHCATCHRET: // We're going to generate more code below anyway, so no need for the NOP. case BBJ_RETURN: case BBJ_EHFINALLYRET: case BBJ_EHFILTERRET: // These are the "epilog follows" case, handled in the emitter. break; case BBJ_NONE: if (block->bbNext == nullptr) { // Call immediately before the end of the code; we should never get here . instGen(INS_BREAKPOINT); // This should never get executed } else { // We need the NOP instGen(INS_nop); } break; case BBJ_COND: case BBJ_SWITCH: // These can't have a call as the last instruction! default: noway_assert(!"Unexpected bbJumpKind"); break; } } } #endif // 0 /* Do we need to generate a jump or return? */ switch (block->bbJumpKind) { case BBJ_ALWAYS: inst_JMP(EJ_jmp, block->bbJumpDest); break; case BBJ_RETURN: genExitCode(block); break; case BBJ_THROW: // If we have a throw at the end of a function or funclet, we need to emit another instruction // afterwards to help the OS unwinder determine the correct context during unwind. // We insert an unexecuted breakpoint instruction in several situations // following a throw instruction: // 1. If the throw is the last instruction of the function or funclet. This helps // the OS unwinder determine the correct context during an unwind from the // thrown exception. // 2. If this is this is the last block of the hot section. // 3. If the subsequent block is a special throw block. // 4. On AMD64, if the next block is in a different EH region. if ((block->bbNext == NULL) || (block->bbNext->bbFlags & BBF_FUNCLET_BEG) || !BasicBlock::sameEHRegion(block, block->bbNext) || (!isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block->bbNext)) || block->bbNext == compiler->fgFirstColdBlock ) { instGen(INS_BREAKPOINT); // This should never get executed } break; case BBJ_CALLFINALLY: // Generate a call to the finally, like this: // mov x0,qword ptr [fp + 10H] // Load x0 with PSPSym // bl finally-funclet // b finally-return // Only for non-retless finally calls // The 'b' can be a NOP if we're going to the next block. getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_R0, compiler->lvaPSPSym, 0); getEmitter()->emitIns_J(INS_bl_local, block->bbJumpDest); if (block->bbFlags & BBF_RETLESS_CALL) { // We have a retless call, and the last instruction generated was a call. // If the next block is in a different EH region (or is the end of the code // block), then we need to generate a breakpoint here (since it will never // get executed) to get proper unwind behavior. if ((block->bbNext == nullptr) || !BasicBlock::sameEHRegion(block, block->bbNext)) { instGen(INS_BREAKPOINT); // This should never get executed } } else { // Because of the way the flowgraph is connected, the liveness info for this one instruction // after the call is not (can not be) correct in cases where a variable has a last use in the // handler. So turn off GC reporting for this single instruction. getEmitter()->emitMakeRemainderNonInterruptible(); // Now go to where the finally funclet needs to return to. if (block->bbNext->bbJumpDest == block->bbNext->bbNext) { // Fall-through. // TODO-ARM64-CQ: Can we get rid of this instruction, and just have the call return directly // to the next instruction? This would depend on stack walking from within the finally // handler working without this instruction being in this special EH region. instGen(INS_nop); } else { inst_JMP(EJ_jmp, block->bbNext->bbJumpDest); } } // The BBJ_ALWAYS is used because the BBJ_CALLFINALLY can't point to the // jump target using bbJumpDest - that is already used to point // to the finally block. So just skip past the BBJ_ALWAYS unless the // block is RETLESS. if ( !(block->bbFlags & BBF_RETLESS_CALL) ) { assert(block->isBBCallAlwaysPair()); lblk = block; block = block->bbNext; } break; case BBJ_EHCATCHRET: getEmitter()->emitIns_R_L(INS_adr, EA_4BYTE_DSP_RELOC, block->bbJumpDest, REG_INTRET); __fallthrough; case BBJ_EHFINALLYRET: case BBJ_EHFILTERRET: genReserveFuncletEpilog(block); break; case BBJ_NONE: case BBJ_COND: case BBJ_SWITCH: break; default: noway_assert(!"Unexpected bbJumpKind"); break; } #ifdef DEBUG compiler->compCurBB = 0; #endif } //------------------ END-FOR each block of the method ------------------- /* Nothing is live at this point */ genUpdateLife(VarSetOps::MakeEmpty(compiler)); /* Finalize the spill tracking logic */ regSet.rsSpillEnd(); /* Finalize the temp tracking logic */ compiler->tmpEnd(); #ifdef DEBUG if (compiler->verbose) { printf("\n# "); printf("totalCostEx = %6d, totalCostSz = %5d ", totalCostEx, totalCostSz); printf("%s\n", compiler->info.compFullName); } #endif } // return the child that has the same reg as the dst (if any) // other child returned (out param) in 'other' // TODO-Cleanup: move to CodeGenCommon.cpp GenTree * sameRegAsDst(GenTree *tree, GenTree *&other /*out*/) { if (tree->gtRegNum == REG_NA) { other = nullptr; return NULL; } GenTreePtr op1 = tree->gtOp.gtOp1; GenTreePtr op2 = tree->gtOp.gtOp2; if (op1->gtRegNum == tree->gtRegNum) { other = op2; return op1; } if (op2->gtRegNum == tree->gtRegNum) { other = op1; return op2; } else { other = nullptr; return NULL; } } // move an immediate value into an integer register void CodeGen::instGen_Set_Reg_To_Imm(emitAttr size, regNumber reg, ssize_t imm, insFlags flags) { // reg cannot be a FP register assert(!genIsValidFloatReg(reg)); if (!compiler->opts.compReloc) { size = EA_SIZE(size); // Strip any Reloc flags from size if we aren't doing relocs } if (EA_IS_RELOC(size)) { NYI("Reloc constant"); } else if (imm == 0) { instGen_Set_Reg_To_Zero(size, reg, flags); } else { if (emitter::emitIns_valid_imm_for_mov(imm, size)) { getEmitter()->emitIns_R_I(INS_mov, size, reg, imm); } else { getEmitter()->emitIns_R_I(INS_mov, size, reg, (imm & 0xffff)); getEmitter()->emitIns_R_I_I(INS_movk, size, reg, ((imm >> 16) & 0xffff), 16, INS_OPTS_LSL); if ((size == EA_8BYTE) && ((imm >> 32) != 0)) // Sometimes the upper 32 bits are zero and the first mov has zero-ed them { getEmitter()->emitIns_R_I_I(INS_movk, EA_8BYTE, reg, ((imm >> 32) & 0xffff), 32, INS_OPTS_LSL); if ((imm >> 48) != 0) // Frequently the upper 16 bits are zero and the first mov has zero-ed them { getEmitter()->emitIns_R_I_I(INS_movk, EA_8BYTE, reg, ((imm >> 48) & 0xffff), 48, INS_OPTS_LSL); } } } // The caller may have requested that the flags be set on this mov (rarely/never) if (flags == INS_FLAGS_SET) { getEmitter()->emitIns_R_I(INS_tst, size, reg, 0); } } regTracker.rsTrackRegIntCns(reg, imm); } /*********************************************************************************** * * Generate code to set a register 'targetReg' of type 'targetType' to the constant * specified by the constant (GT_CNS_INT or GT_CNS_DBL) in 'tree'. This does not call * genProduceReg() on the target register. */ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTreePtr tree) { switch (tree->gtOper) { case GT_CNS_INT: { // relocatable values tend to come down as a CNS_INT of native int type // so the line between these two opcodes is kind of blurry GenTreeIntConCommon* con = tree->AsIntConCommon(); ssize_t cnsVal = con->IconValue(); bool needReloc = compiler->opts.compReloc && tree->IsIconHandle(); if (needReloc) { instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, targetReg, cnsVal); regTracker.rsTrackRegTrash(targetReg); } else { genSetRegToIcon(targetReg, cnsVal, targetType); } } break; case GT_CNS_DBL: { emitter *emit = getEmitter(); emitAttr size = emitTypeSize(tree); GenTreeDblCon *dblConst = tree->AsDblCon(); double constValue = dblConst->gtDblCon.gtDconVal; // Make sure we use "movi reg, 0x00" only for positive zero (0.0) and not for negative zero (-0.0) if (*(__int64*)&constValue == 0) { // A faster/smaller way to generate 0.0 // We will just zero out the entire vector register for both float and double emit->emitIns_R_I(INS_movi, EA_16BYTE, targetReg, 0x00, INS_OPTS_16B); } else if (emitter::emitIns_valid_imm_for_fmov(constValue)) { // We can load the FP constant using the fmov FP-immediate for this constValue emit->emitIns_R_F(INS_fmov, size, targetReg, constValue); } else { // We must load the FP constant from the constant pool // Emit a data section constant for the float or double constant. CORINFO_FIELD_HANDLE hnd = emit->emitFltOrDblConst(dblConst); emit->emitIns_R_C(INS_ldr, size, targetReg, hnd, 0); } } break; default: unreached(); } } // Generate code to get the high N bits of a N*N=2N bit multiplication result void CodeGen::genCodeForMulHi(GenTreeOp* treeNode) { assert(!(treeNode->gtFlags & GTF_UNSIGNED)); assert(!treeNode->gtOverflowEx()); #if 0 regNumber targetReg = treeNode->gtRegNum; var_types targetType = treeNode->TypeGet(); emitter *emit = getEmitter(); emitAttr size = emitTypeSize(treeNode); GenTree *op1 = treeNode->gtOp.gtOp1; GenTree *op2 = treeNode->gtOp.gtOp2; // to get the high bits of the multiply, we are constrained to using the // 1-op form: RDX:RAX = RAX * rm // The 3-op form (Rx=Ry*Rz) does not support it. genConsumeOperands(treeNode->AsOp()); GenTree* regOp = op1; GenTree* rmOp = op2; // Set rmOp to the contained memory operand (if any) // if (op1->isContained() || (!op2->isContained() && (op2->gtRegNum == targetReg))) { regOp = op2; rmOp = op1; } assert(!regOp->isContained()); // Setup targetReg when neither of the source operands was a matching register if (regOp->gtRegNum != targetReg) { inst_RV_RV(ins_Copy(targetType), targetReg, regOp->gtRegNum, targetType); } emit->emitInsBinary(INS_imulEAX, size, treeNode, rmOp); // Move the result to the desired register, if necessary if (targetReg != REG_RDX) { inst_RV_RV(INS_mov, targetReg, REG_RDX, targetType); } #else // !0 NYI("genCodeForMulHi"); #endif // !0 } // generate code for a DIV or MOD operation // void CodeGen::genCodeForDivMod(GenTreeOp* treeNode) { // unused on ARM64 } // Generate code for ADD, SUB, MUL, DIV, UDIV, AND, OR and XOR void CodeGen::genCodeForBinary(GenTree* treeNode) { const genTreeOps oper = treeNode->OperGet(); regNumber targetReg = treeNode->gtRegNum; var_types targetType = treeNode->TypeGet(); emitter *emit = getEmitter(); assert (oper == GT_ADD || oper == GT_SUB || oper == GT_MUL || oper == GT_DIV || oper == GT_UDIV || oper == GT_AND || oper == GT_OR || oper == GT_XOR); GenTreePtr op1 = treeNode->gtGetOp1(); GenTreePtr op2 = treeNode->gtGetOp2(); instruction ins = genGetInsForOper(treeNode->OperGet(), targetType); // The arithmetic node must be sitting in a register (since it's not contained) noway_assert(targetReg != REG_NA); genConsumeOperands(treeNode->AsOp()); regNumber r = emit->emitInsTernary(ins, emitTypeSize(treeNode), treeNode, op1, op2); noway_assert(r == targetReg); genProduceReg(treeNode); } /***************************************************************************** * * Generate code for a single node in the tree. * Preconditions: All operands have been evaluated * */ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode) { regNumber targetReg = treeNode->gtRegNum; var_types targetType = treeNode->TypeGet(); emitter *emit = getEmitter(); #ifdef DEBUG if (compiler->verbose) { unsigned seqNum = treeNode->gtSeqNum; // Useful for setting a conditional break in Visual Studio printf("Generating: "); compiler->gtDispTree(treeNode, nullptr, nullptr, true); } #endif // DEBUG // Is this a node whose value is already in a register? LSRA denotes this by // setting the GTF_REUSE_REG_VAL flag. if (treeNode->IsReuseRegVal()) { // For now, this is only used for constant nodes. assert((treeNode->OperGet() == GT_CNS_INT) || (treeNode->OperGet() == GT_CNS_DBL)); JITDUMP(" TreeNode is marked ReuseReg\n"); return; } // contained nodes are part of their parents for codegen purposes // ex : immediates, most LEAs if (treeNode->isContained()) { return; } switch (treeNode->gtOper) { case GT_START_NONGC: getEmitter()->emitMakeRemainderNonInterruptible(); break; case GT_PROF_HOOK: // We should be seeing this only if profiler hook is needed noway_assert(compiler->compIsProfilerHookNeeded()); #ifdef PROFILING_SUPPORTED // Right now this node is used only for tail calls. In future if // we intend to use it for Enter or Leave hooks, add a data member // to this node indicating the kind of profiler hook. For example, // helper number can be used. genProfilingLeaveCallback(CORINFO_HELP_PROF_FCN_TAILCALL); #endif // PROFILING_SUPPORTED break; case GT_LCLHEAP: genLclHeap(treeNode); break; case GT_CNS_INT: case GT_CNS_DBL: genSetRegToConst(targetReg, targetType, treeNode); genProduceReg(treeNode); break; case GT_NOT: assert(!varTypeIsFloating(targetType)); __fallthrough; case GT_NEG: { instruction ins = genGetInsForOper(treeNode->OperGet(), targetType); // The arithmetic node must be sitting in a register (since it's not contained) assert(!treeNode->isContained()); // The dst can only be a register. assert(targetReg != REG_NA); GenTreePtr operand = treeNode->gtGetOp1(); assert(!operand->isContained()); // The src must be a register. regNumber operandReg = genConsumeReg(operand); getEmitter()->emitIns_R_R(ins, emitTypeSize(treeNode), targetReg, operandReg); } genProduceReg(treeNode); break; case GT_DIV: case GT_UDIV: if (varTypeIsFloating(targetType)) { // Floating point divide never raises an exception genCodeForBinary(treeNode); } else // integer divide operation { GenTreePtr divisorOp = treeNode->gtGetOp2(); // TODO-ARM64-CQ: Optimize a divide by power of 2 as we do for AMD64 if (divisorOp->IsZero()) { genJumpToThrowHlpBlk(EJ_je, Compiler::ACK_DIV_BY_ZERO); // We don't need to generate the sdiv/udiv instruction } else { emitAttr cmpSize = EA_ATTR(genTypeSize(genActualType(treeNode->TypeGet()))); regNumber divisorReg = divisorOp->gtRegNum; if (treeNode->gtOper == GT_DIV) { BasicBlock* sdivLabel = genCreateTempLabel(); // Two possible exceptions: // (AnyVal / 0) => DivideByZeroException // (MinInt / -1) => ArithmeticException // bool checkDividend = true; // Do we have a contained immediate for the 'divisorOp'? if (divisorOp->isContainedIntOrIImmed()) { GenTreeIntConCommon* intConst = divisorOp->AsIntConCommon(); assert(intConst->IconValue() != 0); // already checked above by IsZero() if (intConst->IconValue() != -1) { checkDividend = false; // We statically know that the dividend is not -1 } } else { // Check if the divisor is zero throw a DivideByZeroException emit->emitIns_R_I(INS_cmp, cmpSize, divisorReg, 0); genJumpToThrowHlpBlk(EJ_je, Compiler::ACK_DIV_BY_ZERO); // Check if the divisor is not -1 branch to 'sdivLabel' emit->emitIns_R_I(INS_cmp, cmpSize, divisorReg, -1); inst_JMP(genJumpKindForOper(GT_NE, true), sdivLabel); // If control flow continues past here the 'divisorReg' is known to be -1 } if (checkDividend) { regNumber dividendReg = treeNode->gtGetOp1()->gtRegNum; // At this point the divisor is known to be -1 // // Issue 'adds zr, dividendReg, dividendReg' instruction // this will set the Z and V flags only when dividendReg is MinInt // emit->emitIns_R_R_R(INS_adds, cmpSize, REG_ZR, dividendReg, dividendReg); inst_JMP(genJumpKindForOper(GT_NE, true), sdivLabel); // goto sdiv if Z flag is clear genJumpToThrowHlpBlk(EJ_jo, Compiler::ACK_ARITH_EXCPN); // if the V flags is set throw ArithmeticException } genDefineTempLabel(sdivLabel); genCodeForBinary(treeNode); // Generate the sdiv instruction } else // (treeNode->gtOper == GT_UDIV) { // Only one possible exception // (AnyVal / 0) => DivideByZeroException // // Note that division by the constant 0 was already checked for above by the op2->IsZero() check // if (!divisorOp->isContainedIntOrIImmed()) { emit->emitIns_R_I(INS_cmp, cmpSize, divisorReg, 0); genJumpToThrowHlpBlk(EJ_je, Compiler::ACK_DIV_BY_ZERO); } genCodeForBinary(treeNode); // Generate the udiv instruction } } } break; case GT_OR: case GT_XOR: case GT_AND: assert(varTypeIsIntegralOrI(treeNode)); __fallthrough; case GT_ADD: case GT_SUB: case GT_MUL: genCodeForBinary(treeNode); break; case GT_LSH: case GT_RSH: case GT_RSZ: genCodeForShift(treeNode->gtGetOp1(), treeNode->gtGetOp2(), treeNode); // genCodeForShift() calls genProduceReg() break; case GT_CAST: if (varTypeIsFloating(targetType) && varTypeIsFloating(treeNode->gtOp.gtOp1)) { // Casts float/double <--> double/float genFloatToFloatCast(treeNode); } else if (varTypeIsFloating(treeNode->gtOp.gtOp1)) { // Casts float/double --> int32/int64 genFloatToIntCast(treeNode); } else if (varTypeIsFloating(targetType)) { // Casts int32/uint32/int64/uint64 --> float/double genIntToFloatCast(treeNode); } else { // Casts int <--> int genIntToIntCast(treeNode); } // The per-case functions call genProduceReg() break; case GT_LCL_VAR: { // lcl_vars are not defs assert((treeNode->gtFlags & GTF_VAR_DEF) == 0); GenTreeLclVarCommon *lcl = treeNode->AsLclVarCommon(); bool isRegCandidate = compiler->lvaTable[lcl->gtLclNum].lvIsRegCandidate(); if (isRegCandidate && !(treeNode->gtFlags & GTF_VAR_DEATH)) { assert((treeNode->InReg()) || (treeNode->gtFlags & GTF_SPILLED)); } // If this is a register candidate that has been spilled, genConsumeReg() will // reload it at the point of use. Otherwise, if it's not in a register, we load it here. if (!treeNode->InReg() && !(treeNode->gtFlags & GTF_SPILLED)) { assert(!isRegCandidate); emit->emitIns_R_S(ins_Load(targetType, compiler->isSIMDTypeLocalAligned(lcl->gtLclNum)), emitTypeSize(treeNode), targetReg, lcl->gtLclNum, 0); genProduceReg(treeNode); } } break; case GT_LCL_FLD_ADDR: case GT_LCL_VAR_ADDR: // Address of a local var. This by itself should never be allocated a register. // If it is worth storing the address in a register then it should be cse'ed into // a temp and that would be allocated a register. noway_assert(targetType == TYP_BYREF); noway_assert(!treeNode->InReg()); inst_RV_TT(INS_lea, targetReg, treeNode, 0, EA_BYREF); genProduceReg(treeNode); break; case GT_LCL_FLD: { noway_assert(targetType != TYP_STRUCT); noway_assert(targetReg != REG_NA); unsigned offs = treeNode->gtLclFld.gtLclOffs; unsigned varNum = treeNode->gtLclVarCommon.gtLclNum; assert(varNum < compiler->lvaCount); emit->emitIns_R_S(ins_Move_Extend(targetType, treeNode->InReg()), EA_8BYTE, targetReg, varNum, offs); genProduceReg(treeNode); } break; case GT_STORE_LCL_FLD: { NYI_IF(varTypeIsFloating(targetType), "Code generation for FP field assignment"); noway_assert(targetType != TYP_STRUCT); noway_assert(!treeNode->InReg()); unsigned offs = treeNode->gtLclFld.gtLclOffs; unsigned varNum = treeNode->gtLclVarCommon.gtLclNum; assert(varNum < compiler->lvaCount); GenTreePtr op1 = treeNode->gtOp.gtOp1; genConsumeRegs(op1); emit->emitIns_R_S(ins_Store(targetType), emitTypeSize(targetType), op1->gtRegNum, varNum, offs); } break; case GT_STORE_LCL_VAR: { noway_assert(targetType != TYP_STRUCT); unsigned lclNum = treeNode->AsLclVarCommon()->gtLclNum; LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]); // Ensure that lclVar nodes are typed correctly. assert(!varDsc->lvNormalizeOnStore() || targetType == genActualType(varDsc->TypeGet())); GenTreePtr op1 = treeNode->gtOp.gtOp1; genConsumeRegs(op1); if (targetReg == REG_NA) { // stack store emit->emitInsMov(ins_Store(targetType, compiler->isSIMDTypeLocalAligned(lclNum)), emitTypeSize(treeNode), treeNode); varDsc->lvRegNum = REG_STK; } else // store into register (i.e move into register) { if (op1->isContained()) { // Currently, we assume that the contained source of a GT_STORE_LCL_VAR writing to a register // must be a constant. However, in the future we might want to support a contained memory op. // This is a bit tricky because we have to decide it's contained before register allocation, // and this would be a case where, once that's done, we need to mark that node as always // requiring a register - which we always assume now anyway, but once we "optimize" that // we'll have to take cases like this into account. assert((op1->gtRegNum == REG_NA) && op1->OperIsConst()); genSetRegToConst(targetReg, targetType, op1); } else if (op1->gtRegNum != targetReg) { // Setup targetReg when op1 is not a matching register assert(op1->gtRegNum != REG_NA); inst_RV_RV(ins_Copy(targetType), targetReg, op1->gtRegNum, targetType); } genProduceReg(treeNode); } } break; case GT_RETFILT: // A void GT_RETFILT is the end of a finally. For non-void filter returns we need to load the result in // the return register, if it's not already there. The processing is the same as GT_RETURN. if (targetType != TYP_VOID) { // For filters, the IL spec says the result is type int32. Further, the only specified legal values // are 0 or 1, with the use of other values "undefined". assert(targetType == TYP_INT); } __fallthrough; case GT_RETURN: { GenTreePtr op1 = treeNode->gtOp.gtOp1; if (targetType == TYP_VOID) { assert(op1 == nullptr); } else { assert(op1 != nullptr); noway_assert(op1->gtRegNum != REG_NA); genConsumeReg(op1); regNumber retReg = varTypeIsFloating(treeNode) ? REG_FLOATRET : REG_INTRET; bool movRequired = (op1->gtRegNum != retReg); if (!movRequired) { if (op1->OperGet() == GT_LCL_VAR) { GenTreeLclVarCommon *lcl = op1->AsLclVarCommon(); bool isRegCandidate = compiler->lvaTable[lcl->gtLclNum].lvIsRegCandidate(); if (isRegCandidate && ((op1->gtFlags & GTF_SPILLED) == 0)) { assert(op1->InReg()); // We may need to generate a zero-extending mov instruction to load the value from this GT_LCL_VAR unsigned lclNum = lcl->gtLclNum; LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]); var_types op1Type = genActualType(op1->TypeGet()); var_types lclType = genActualType(varDsc->TypeGet()); if (genTypeSize(op1Type) < genTypeSize(lclType)) { movRequired = true; } } } } if (movRequired) { emitAttr movSize = EA_ATTR(genTypeSize(targetType)); getEmitter()->emitIns_R_R(INS_mov, movSize, retReg, op1->gtRegNum); } } #ifdef PROFILING_SUPPORTED // There will be a single return block while generating profiler ELT callbacks. // // Reason for not materializing Leave callback as a GT_PROF_HOOK node after GT_RETURN: // In flowgraph and other places assert that the last node of a block marked as // GT_RETURN is either a GT_RETURN or GT_JMP or a tail call. It would be nice to // maintain such an invariant irrespective of whether profiler hook needed or not. // Also, there is not much to be gained by materializing it as an explicit node. if (compiler->compCurBB == compiler->genReturnBB) { genProfilingLeaveCallback(); } #endif } break; case GT_LEA: { // if we are here, it is the case where there is an LEA that cannot // be folded into a parent instruction GenTreeAddrMode *lea = treeNode->AsAddrMode(); genLeaInstruction(lea); } // genLeaInstruction calls genProduceReg() break; case GT_IND: genConsumeAddress(treeNode->AsIndir()->Addr()); emit->emitInsMov(ins_Load(targetType), emitTypeSize(treeNode), treeNode); genProduceReg(treeNode); break; case GT_MULHI: genCodeForMulHi(treeNode->AsOp()); genProduceReg(treeNode); break; case GT_MOD: case GT_UMOD: // Integer MOD should have been morphed into a sequence of sub, mul, div in fgMorph. // // We shouldn't be seeing GT_MOD on float/double as it is morphed into a helper call by front-end. noway_assert(!"Codegen for GT_MOD/GT_UMOD"); break; case GT_MATH: genMathIntrinsic(treeNode); break; #ifdef FEATURE_SIMD case GT_SIMD: genSIMDIntrinsic(treeNode->AsSIMD()); break; #endif // FEATURE_SIMD case GT_CKFINITE: genCkfinite(treeNode); break; case GT_EQ: case GT_NE: case GT_LT: case GT_LE: case GT_GE: case GT_GT: { // TODO-ARM64-CQ: Check if we can use the currently set flags. // TODO-ARM64-CQ: Check for the case where we can simply transfer the carry bit to a register // (signed < or >= where targetReg != REG_NA) GenTreeOp* tree = treeNode->AsOp(); GenTreePtr op1 = tree->gtOp1; GenTreePtr op2 = tree->gtOp2; var_types op1Type = op1->TypeGet(); var_types op2Type = op2->TypeGet(); assert(!op1->isContainedMemoryOp()); assert(!op2->isContainedMemoryOp()); genConsumeOperands(tree); emitAttr cmpSize = EA_UNKNOWN; if (varTypeIsFloating(op1Type)) { assert(varTypeIsFloating(op2Type)); assert(!op1->isContained()); assert(op1Type == op2Type); cmpSize = EA_ATTR(genTypeSize(op1Type)); if (op2->IsZero()) { emit->emitIns_R_F(INS_fcmp, cmpSize, op1->gtRegNum, 0.0); } else { assert(!op2->isContained()); emit->emitIns_R_R(INS_fcmp, cmpSize, op1->gtRegNum, op2->gtRegNum); } } else { assert(!varTypeIsFloating(op2Type)); // We don't support swapping op1 and op2 to generate cmp reg, imm assert(!op1->isContainedIntOrIImmed()); // TODO-ARM64-CQ: the second register argument of a CMP can be sign/zero // extended as part of the instruction (using "CMP (extended register)"). // We should use that if possible, swapping operands // (and reversing the condition) if necessary. unsigned op1Size = genTypeSize(op1Type); unsigned op2Size = genTypeSize(op2Type); if ((op1Size < 4) || (op1Size < op2Size)) { // We need to sign/zero extend op1 up to 32 or 64 bits. instruction ins = ins_Move_Extend(op1Type, true); inst_RV_RV(ins, op1->gtRegNum, op1->gtRegNum); } if (!op2->isContainedIntOrIImmed()) { if ((op2Size < 4) || (op2Size < op1Size)) { // We need to sign/zero extend op2 up to 32 or 64 bits. instruction ins = ins_Move_Extend(op2Type, true); inst_RV_RV(ins, op2->gtRegNum, op2->gtRegNum); } } cmpSize = EA_4BYTE; if ((op1Size == EA_8BYTE) || (op2Size == EA_8BYTE)) { cmpSize = EA_8BYTE; } if (op2->isContainedIntOrIImmed()) { GenTreeIntConCommon* intConst = op2->AsIntConCommon(); emit->emitIns_R_I(INS_cmp, cmpSize, op1->gtRegNum, intConst->IconValue()); } else { emit->emitIns_R_R(INS_cmp, cmpSize, op1->gtRegNum, op2->gtRegNum); } } // Are we evaluating this into a register? if (targetReg != REG_NA) { genSetRegToCond(targetReg, tree); genProduceReg(tree); } } break; case GT_JTRUE: { GenTree *cmp = treeNode->gtOp.gtOp1->gtEffectiveVal(); assert(cmp->OperIsCompare()); assert(compiler->compCurBB->bbJumpKind == BBJ_COND); // Get the "jmpKind" using the gtOper kind // Note that whether it is an unsigned cmp is governed by the GTF_UNSIGNED flags emitJumpKind jmpKind = genJumpKindForOper(cmp->gtOper, (cmp->gtFlags & GTF_UNSIGNED) != 0); BasicBlock * jmpTarget = compiler->compCurBB->bbJumpDest; inst_JMP(jmpKind, jmpTarget); } break; case GT_RETURNTRAP: { // this is nothing but a conditional call to CORINFO_HELP_STOP_FOR_GC // based on the contents of 'data' GenTree *data = treeNode->gtOp.gtOp1; genConsumeRegs(data); emit->emitIns_R_I(INS_cmp, EA_4BYTE, data->gtRegNum, 0); BasicBlock* skipLabel = genCreateTempLabel(); inst_JMP(genJumpKindForOper(GT_EQ, true), skipLabel); // emit the call to the EE-helper that stops for GC (or other reasons) genEmitHelperCall(CORINFO_HELP_STOP_FOR_GC, 0, EA_UNKNOWN); genDefineTempLabel(skipLabel); } break; case GT_STOREIND: { GenTree* data = treeNode->gtOp.gtOp2; GenTree* addr = treeNode->gtOp.gtOp1; GCInfo::WriteBarrierForm writeBarrierForm = gcInfo.gcIsWriteBarrierCandidate(treeNode, data); if (writeBarrierForm != GCInfo::WBF_NoBarrier) { // data and addr must be in registers. // Consume both registers so that any copies of interfering // registers are taken care of. genConsumeOperands(treeNode->AsOp()); #if NOGC_WRITE_BARRIERS // At this point, we should not have any interference. // That is, 'data' must not be in REG_WRITE_BARRIER_DST_BYREF, // as that is where 'addr' must go. noway_assert(data->gtRegNum != REG_WRITE_BARRIER_DST_BYREF); // 'addr' goes into x14 (REG_WRITE_BARRIER_DST_BYREF) if (addr->gtRegNum != REG_WRITE_BARRIER_DST_BYREF) { inst_RV_RV(INS_mov, REG_WRITE_BARRIER_DST_BYREF, addr->gtRegNum, addr->TypeGet()); } // 'data' goes into x15 (REG_WRITE_BARRIER) if (data->gtRegNum != REG_WRITE_BARRIER) { inst_RV_RV(INS_mov, REG_WRITE_BARRIER, data->gtRegNum, data->TypeGet()); } #else // At this point, we should not have any interference. // That is, 'data' must not be in REG_ARG_0, // as that is where 'addr' must go. noway_assert(data->gtRegNum != REG_ARG_0); // addr goes in REG_ARG_0 if (addr->gtRegNum != REG_ARG_0) { inst_RV_RV(INS_mov, REG_ARG_0, addr->gtRegNum, addr->TypeGet()); } // data goes in REG_ARG_1 if (data->gtRegNum != REG_ARG_1) { inst_RV_RV(INS_mov, REG_ARG_1, data->gtRegNum, data->TypeGet()); } #endif // NOGC_WRITE_BARRIERS genGCWriteBarrier(treeNode, writeBarrierForm); } else { bool reverseOps = ((treeNode->gtFlags & GTF_REVERSE_OPS) != 0); bool dataIsUnary = false; GenTree* nonRMWsrc = nullptr; // We must consume the operands in the proper execution order, // so that liveness is updated appropriately. if (!reverseOps) { genConsumeAddress(addr); } if (data->isContained() && !data->OperIsLeaf()) { dataIsUnary = (GenTree::OperIsUnary(data->OperGet()) != 0); if (!dataIsUnary) { nonRMWsrc = data->gtGetOp1(); if (nonRMWsrc->isIndir() && Lowering::IndirsAreEquivalent(nonRMWsrc, treeNode)) { nonRMWsrc = data->gtGetOp2(); } genConsumeRegs(nonRMWsrc); } } else { genConsumeRegs(data); } if (reverseOps) { genConsumeAddress(addr); } if (data->isContained() && !data->OperIsLeaf()) { NYI("RMW?"); } else { emit->emitInsMov(ins_Store(targetType), emitTypeSize(treeNode), treeNode); } } } break; case GT_COPY: // This is handled at the time we call genConsumeReg() on the GT_COPY break; case GT_SWAP: { // Swap is only supported for lclVar operands that are enregistered // We do not consume or produce any registers. Both operands remain enregistered. // However, the gc-ness may change. assert(genIsRegCandidateLocal(treeNode->gtOp.gtOp1) && genIsRegCandidateLocal(treeNode->gtOp.gtOp2)); GenTreeLclVarCommon* lcl1 = treeNode->gtOp.gtOp1->AsLclVarCommon(); LclVarDsc* varDsc1 = &(compiler->lvaTable[lcl1->gtLclNum]); var_types type1 = varDsc1->TypeGet(); GenTreeLclVarCommon* lcl2 = treeNode->gtOp.gtOp2->AsLclVarCommon(); LclVarDsc* varDsc2 = &(compiler->lvaTable[lcl2->gtLclNum]); var_types type2 = varDsc2->TypeGet(); // We must have both int or both fp regs assert(!varTypeIsFloating(type1) || varTypeIsFloating(type2)); // FP swap is not yet implemented (and should have NYI'd in LSRA) assert(!varTypeIsFloating(type1)); regNumber oldOp1Reg = lcl1->gtRegNum; regMaskTP oldOp1RegMask = genRegMask(oldOp1Reg); regNumber oldOp2Reg = lcl2->gtRegNum; regMaskTP oldOp2RegMask = genRegMask(oldOp2Reg); // We don't call genUpdateVarReg because we don't have a tree node with the new register. varDsc1->lvRegNum = oldOp2Reg; varDsc2->lvRegNum = oldOp1Reg; // Do the xchg emitAttr size = EA_PTRSIZE; if (varTypeGCtype(type1) != varTypeGCtype(type2)) { // If the type specified to the emitter is a GC type, it will swap the GC-ness of the registers. // Otherwise it will leave them alone, which is correct if they have the same GC-ness. size = EA_GCREF; } NYI("register swap"); // inst_RV_RV(INS_xchg, oldOp1Reg, oldOp2Reg, TYP_I_IMPL, size); // Update the gcInfo. // Manually remove these regs for the gc sets (mostly to avoid confusing duplicative dump output) gcInfo.gcRegByrefSetCur &= ~(oldOp1RegMask|oldOp2RegMask); gcInfo.gcRegGCrefSetCur &= ~(oldOp1RegMask|oldOp2RegMask); // gcMarkRegPtrVal will do the appropriate thing for non-gc types. // It will also dump the updates. gcInfo.gcMarkRegPtrVal(oldOp2Reg, type1); gcInfo.gcMarkRegPtrVal(oldOp1Reg, type2); } break; case GT_LIST: case GT_ARGPLACE: // Nothing to do break; case GT_PUTARG_STK: { noway_assert(targetType != TYP_STRUCT); // Get argument offset on stack. // Here we cross check that argument offset hasn't changed from lowering to codegen since // we are storing arg slot number in GT_PUTARG_STK node in lowering phase. int argOffset = treeNode->AsPutArgStk()->gtSlotNum * TARGET_POINTER_SIZE; #ifdef DEBUG fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(treeNode->AsPutArgStk()->gtCall, treeNode); assert(curArgTabEntry); assert(argOffset == (int)curArgTabEntry->slotNum * TARGET_POINTER_SIZE); #endif GenTreePtr data = treeNode->gtOp.gtOp1; unsigned varNum; #if FEATURE_FASTTAILCALL bool putInIncomingArgArea = treeNode->AsPutArgStk()->putInIncomingArgArea; #else const bool putInIncomingArgArea = false; #endif // Whether to setup stk arg in incoming or out-going arg area? // Fast tail calls implemented as epilog+jmp = stk arg is setup in incoming arg area. // All other calls - stk arg is setup in out-going arg area. if (putInIncomingArgArea) { // The first varNum is guaranteed to be the first incoming arg of the method being compiled. // See lvaInitTypeRef() for the order in which lvaTable entries are initialized. varNum = 0; #ifdef DEBUG #if FEATURE_FASTTAILCALL // This must be a fast tail call. assert(treeNode->AsPutArgStk()->gtCall->AsCall()->IsFastTailCall()); // Since it is a fast tail call, the existence of first incoming arg is guaranteed // because fast tail call requires that in-coming arg area of caller is >= out-going // arg area required for tail call. LclVarDsc* varDsc = compiler->lvaTable; assert(varDsc != nullptr); assert(varDsc->lvIsRegArg && ((varDsc->lvArgReg == REG_ARG_0) || (varDsc->lvArgReg == REG_FLTARG_0))); #endif // FEATURE_FASTTAILCALL #endif } else { varNum = compiler->lvaOutgoingArgSpaceVar; } if (data->isContained()) { getEmitter()->emitIns_S_I(ins_Store(targetType), emitTypeSize(targetType), varNum, argOffset, (int) data->AsIntConCommon()->IconValue()); } else { genConsumeReg(data); getEmitter()->emitIns_S_R(ins_Store(targetType), emitTypeSize(targetType), data->gtRegNum, varNum, argOffset); } } break; case GT_PUTARG_REG: { noway_assert(targetType != TYP_STRUCT); // commas show up here commonly, as part of a nullchk operation GenTree *op1 = treeNode->gtOp.gtOp1; // If child node is not already in the register we need, move it genConsumeReg(op1); if (targetReg != op1->gtRegNum) { inst_RV_RV(ins_Copy(targetType), targetReg, op1->gtRegNum, targetType); } } genProduceReg(treeNode); break; case GT_CALL: genCallInstruction(treeNode); break; case GT_JMP: genJmpMethod(treeNode); break; case GT_LOCKADD: case GT_XCHG: case GT_XADD: genLockedInstructions(treeNode); break; case GT_MEMORYBARRIER: instGen_MemoryBarrier(); break; case GT_CMPXCHG: NYI("GT_CMPXCHG"); break; case GT_RELOAD: // do nothing - reload is just a marker. // The parent node will call genConsumeReg on this which will trigger the unspill of this node's child // into the register specified in this node. break; case GT_NOP: break; case GT_NO_OP: if (treeNode->gtFlags & GTF_NO_OP_NO) { noway_assert(!"GTF_NO_OP_NO should not be set"); } else { instGen(INS_nop); } break; case GT_ARR_BOUNDS_CHECK: #ifdef FEATURE_SIMD case GT_SIMD_CHK: #endif // FEATURE_SIMD genRangeCheck(treeNode); break; case GT_PHYSREG: if (targetReg != treeNode->AsPhysReg()->gtSrcReg) { inst_RV_RV(ins_Copy(targetType), targetReg, treeNode->AsPhysReg()->gtSrcReg, targetType); genTransferRegGCState(targetReg, treeNode->AsPhysReg()->gtSrcReg); } genProduceReg(treeNode); break; case GT_PHYSREGDST: break; case GT_NULLCHECK: { assert(!treeNode->gtOp.gtOp1->isContained()); regNumber reg = genConsumeReg(treeNode->gtOp.gtOp1); emit->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_ZR, reg, 0); } break; case GT_CATCH_ARG: noway_assert(handlerGetsXcptnObj(compiler->compCurBB->bbCatchTyp)); /* Catch arguments get passed in a register. genCodeForBBlist() would have marked it as holding a GC object, but not used. */ noway_assert(gcInfo.gcRegGCrefSetCur & RBM_EXCEPTION_OBJECT); genConsumeReg(treeNode); break; case GT_PINVOKE_PROLOG: noway_assert(((gcInfo.gcRegGCrefSetCur|gcInfo.gcRegByrefSetCur) & ~RBM_ARG_REGS) == 0); // the runtime side requires the codegen here to be consistent emit->emitDisableRandomNops(); break; case GT_LABEL: genPendingCallLabel = genCreateTempLabel(); treeNode->gtLabel.gtLabBB = genPendingCallLabel; emit->emitIns_R_L(INS_adr, EA_PTRSIZE, genPendingCallLabel, targetReg); break; case GT_COPYOBJ: genCodeForCpObj(treeNode->AsCpObj()); break; case GT_COPYBLK: { GenTreeCpBlk* cpBlkOp = treeNode->AsCpBlk(); if (cpBlkOp->gtBlkOpGcUnsafe) { getEmitter()->emitDisableGC(); } switch (cpBlkOp->gtBlkOpKind) { case GenTreeBlkOp::BlkOpKindHelper: genCodeForCpBlk(cpBlkOp); break; case GenTreeBlkOp::BlkOpKindUnroll: genCodeForCpBlkUnroll(cpBlkOp); break; default: unreached(); } if (cpBlkOp->gtBlkOpGcUnsafe) { getEmitter()->emitEnableGC(); } } break; case GT_INITBLK: { GenTreeInitBlk* initBlkOp = treeNode->AsInitBlk(); switch (initBlkOp->gtBlkOpKind) { case GenTreeBlkOp::BlkOpKindHelper: genCodeForInitBlk(initBlkOp); break; case GenTreeBlkOp::BlkOpKindUnroll: genCodeForInitBlkUnroll(initBlkOp); break; default: unreached(); } } break; case GT_JMPTABLE: genJumpTable(treeNode); break; case GT_SWITCH_TABLE: genTableBasedSwitch(treeNode); break; case GT_ARR_INDEX: genCodeForArrIndex(treeNode->AsArrIndex()); break; case GT_ARR_OFFSET: genCodeForArrOffset(treeNode->AsArrOffs()); break; case GT_CLS_VAR_ADDR: NYI("GT_CLS_VAR_ADDR"); break; default: { #ifdef DEBUG char message[256]; sprintf(message, "Unimplemented node type %s\n", GenTree::NodeName(treeNode->OperGet())); #endif assert(!"Unknown node in codegen"); } break; } } // Generate code for division (or mod) by power of two // or negative powers of two. (meaning -1 * a power of two, not 2^(-1)) // Op2 must be a contained integer constant. void CodeGen::genCodeForPow2Div(GenTreeOp* tree) { #if 0 GenTree *dividend = tree->gtOp.gtOp1; GenTree *divisor = tree->gtOp.gtOp2; genTreeOps oper = tree->OperGet(); emitAttr size = emitTypeSize(tree); emitter *emit = getEmitter(); regNumber targetReg = tree->gtRegNum; var_types targetType = tree->TypeGet(); bool isSigned = oper == GT_MOD || oper == GT_DIV; // precondition: extended dividend is in RDX:RAX // which means it is either all zeros or all ones noway_assert(divisor->isContained()); GenTreeIntConCommon* divImm = divisor->AsIntConCommon(); int64_t imm = divImm->IconValue(); ssize_t abs_imm = abs(imm); noway_assert(isPow2(abs_imm)); if (isSigned) { if (imm == 1) { if (targetReg != REG_RAX) inst_RV_RV(INS_mov, targetReg, REG_RAX, targetType); return; } if (abs_imm == 2) { if (oper == GT_MOD) { emit->emitIns_R_I(INS_and, size, REG_RAX, 1); // result is 0 or 1 // xor with rdx will flip all bits if negative emit->emitIns_R_R(INS_xor, size, REG_RAX, REG_RDX); // 111.11110 or 0 } else { assert(oper == GT_DIV); // add 1 if it's negative emit->emitIns_R_R(INS_sub, size, REG_RAX, REG_RDX); } } else { // add imm-1 if negative emit->emitIns_R_I(INS_and, size, REG_RDX, abs_imm - 1); emit->emitIns_R_R(INS_add, size, REG_RAX, REG_RDX); } if (oper == GT_DIV) { unsigned shiftAmount = genLog2(unsigned(abs_imm)); inst_RV_SH(INS_sar, size, REG_RAX, shiftAmount); if (imm < 0) { emit->emitIns_R(INS_neg, size, REG_RAX); } } else { assert(oper == GT_MOD); if (abs_imm > 2) { emit->emitIns_R_I(INS_and, size, REG_RAX, abs_imm - 1); } // RDX contains 'imm-1' if negative emit->emitIns_R_R(INS_sub, size, REG_RAX, REG_RDX); } if (targetReg != REG_RAX) { inst_RV_RV(INS_mov, targetReg, REG_RAX, targetType); } } else { assert (imm > 0); if (targetReg != dividend->gtRegNum) { inst_RV_RV(INS_mov, targetReg, dividend->gtRegNum, targetType); } if (oper == GT_UDIV) { inst_RV_SH(INS_shr, size, targetReg, genLog2(unsigned(imm))); } else { assert(oper == GT_UMOD); emit->emitIns_R_I(INS_and, size, targetReg, imm -1); } } #else // !0 NYI("genCodeForPow2Div"); #endif // !0 } /*********************************************************************************************** * Generate code for localloc */ void CodeGen::genLclHeap(GenTreePtr tree) { assert(tree->OperGet() == GT_LCLHEAP); GenTreePtr size = tree->gtOp.gtOp1; noway_assert((genActualType(size->gtType) == TYP_INT) || (genActualType(size->gtType) == TYP_I_IMPL)); regNumber targetReg = tree->gtRegNum; regMaskTP tmpRegsMask = tree->gtRsvdRegs; regNumber regCnt = REG_NA; regNumber pspSymReg = REG_NA; var_types type = genActualType(size->gtType); emitAttr easz = emitTypeSize(type); BasicBlock* endLabel = nullptr; #ifdef DEBUG // Verify ESP if (compiler->opts.compStackCheckOnRet) { noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC && compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister && compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame); getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0); BasicBlock * esp_check = genCreateTempLabel(); inst_JMP(genJumpKindForOper(GT_EQ, true), esp_check); getEmitter()->emitIns(INS_BREAKPOINT); genDefineTempLabel(esp_check); } #endif noway_assert(isFramePointerUsed()); // localloc requires Frame Pointer to be established since SP changes noway_assert(genStackLevel == 0); // Can't have anything on the stack // Whether method has PSPSym. bool hasPspSym; #if FEATURE_EH_FUNCLETS hasPspSym = (compiler->lvaPSPSym != BAD_VAR_NUM); #else hasPspSym = false; #endif // compute the amount of memory to allocate to properly STACK_ALIGN. size_t amount = 0; if (size->IsCnsIntOrI()) { // If size is a constant, then it must be contained. assert(size->isContained()); // If amount is zero then return null in targetReg amount = size->gtIntCon.gtIconVal; if (amount == 0) { instGen_Set_Reg_To_Zero(EA_PTRSIZE, targetReg); goto BAILOUT; } // 'amount' is the total numbe of bytes to localloc to properly STACK_ALIGN amount = AlignUp(amount, STACK_ALIGN); } else { // If 0 bail out by returning null in targetReg genConsumeRegAndCopy(size, targetReg); endLabel = genCreateTempLabel(); getEmitter()->emitIns_R_R(INS_TEST, easz, targetReg, targetReg); inst_JMP(EJ_je, endLabel); // Compute the size of the block to allocate and perform alignment. // If the method has no PSPSym and compInitMem=true, we can reuse targetReg as regcnt, // since we don't need any internal registers. if (!hasPspSym && compiler->info.compInitMem) { assert(genCountBits(tmpRegsMask) == 0); regCnt = targetReg; } else { assert(genCountBits(tmpRegsMask) >= 1); regMaskTP regCntMask = genFindLowestBit(tmpRegsMask); tmpRegsMask &= ~regCntMask; regCnt = genRegNumFromMask(regCntMask); if (regCnt != targetReg) inst_RV_RV(INS_mov, regCnt, targetReg, size->TypeGet()); } // Align to STACK_ALIGN // regCnt will be the total number of bytes to localloc inst_RV_IV(INS_add, regCnt, (STACK_ALIGN - 1), emitActualTypeSize(type)); inst_RV_IV(INS_AND, regCnt, ~(STACK_ALIGN - 1), emitActualTypeSize(type)); } unsigned stackAdjustment = 0; #if FEATURE_EH_FUNCLETS // If we have PSPsym, then need to re-locate it after localloc. if (hasPspSym) { stackAdjustment += STACK_ALIGN; // Save a copy of PSPSym assert(genCountBits(tmpRegsMask) >= 1); regMaskTP pspSymRegMask = genFindLowestBit(tmpRegsMask); tmpRegsMask &= ~pspSymRegMask; pspSymReg = genRegNumFromMask(pspSymRegMask); getEmitter()->emitIns_R_S(ins_Store(TYP_I_IMPL), EA_PTRSIZE, pspSymReg, compiler->lvaPSPSym, 0); } #endif #if FEATURE_FIXED_OUT_ARGS // If we have an outgoing arg area then we must adjust the SP by popping off the // outgoing arg area. We will restore it right before we return from this method. // // Localloc is supposed to return stack space that is STACK_ALIGN'ed. The following // are the cases that needs to be handled: // i) Method has PSPSym + out-going arg area. // It is guaranteed that size of out-going arg area is STACK_ALIGNED (see fgMorphArgs). // Therefore, we will pop-off RSP upto out-going arg area before locallocating. // We need to add padding to ensure RSP is STACK_ALIGN'ed while re-locating PSPSym + arg area. // ii) Method has no PSPSym but out-going arg area. // Almost same case as above without the requirement to pad for the final RSP to be STACK_ALIGN'ed. // iii) Method has PSPSym but no out-going arg area. // Nothing to pop-off from the stack but needs to relocate PSPSym with SP padded. // iv) Method has neither PSPSym nor out-going arg area. // Nothing needs to popped off from stack nor relocated. if (compiler->lvaOutgoingArgSpaceSize > 0) { assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) == 0); // This must be true for the stack to remain aligned inst_RV_IV(INS_add, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize, EA_PTRSIZE); stackAdjustment += compiler->lvaOutgoingArgSpaceSize; } #endif if (size->IsCnsIntOrI()) { // We should reach here only for non-zero, constant size allocations. assert(amount > 0); // For small allocations we will generate up to four stp instructions size_t cntStackAlignedWidthItems = (amount >> STACK_ALIGN_SHIFT); if (cntStackAlignedWidthItems <= 4) { while (cntStackAlignedWidthItems != 0) { // We can use pre-indexed addressing. // stp ZR, ZR, [SP, #-16]! getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_ZR, REG_ZR, REG_SPBASE, -16, INS_OPTS_PRE_INDEX); cntStackAlignedWidthItems -= 1; } goto ALLOC_DONE; } else if (!compiler->info.compInitMem && (amount < CORINFO_PAGE_SIZE)) // must be < not <= { // Since the size is a page or less, simply adjust ESP // ESP might already be in the guard page, must touch it BEFORE // the alloc, not after. getEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0); inst_RV_IV(INS_sub, REG_SPBASE, amount, EA_PTRSIZE); goto ALLOC_DONE; } // else, "mov regCnt, amount" // If the method has no PSPSym and compInitMem=true, we can reuse targetReg as regcnt. // Since size is a constant, regCnt is not yet initialized. assert(regCnt == REG_NA); if (!hasPspSym && compiler->info.compInitMem) { assert(genCountBits(tmpRegsMask) == 0); regCnt = targetReg; } else { assert(genCountBits(tmpRegsMask) >= 1); regMaskTP regCntMask = genFindLowestBit(tmpRegsMask); tmpRegsMask &= ~regCntMask; regCnt = genRegNumFromMask(regCntMask); } genSetRegToIcon(regCnt, amount, ((int)amount == amount)? TYP_INT : TYP_LONG); } BasicBlock* loop = genCreateTempLabel(); if (compiler->info.compInitMem) { // At this point 'regCnt' is set to the total number of bytes to locAlloc. // Since we have to zero out the allocated memory AND ensure that RSP is always valid // by tickling the pages, we will just push 0's on the stack. // // Note: regCnt is guaranteed to be even on Amd64 since STACK_ALIGN/TARGET_POINTER_SIZE = 2 // and localloc size is a multiple of STACK_ALIGN. // Loop: genDefineTempLabel(loop); // We can use pre-indexed addressing. // stp ZR, ZR, [SP, #-16]! getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_ZR, REG_ZR, REG_SPBASE, -16, INS_OPTS_PRE_INDEX); // If not done, loop // Note that regCnt is the number of bytes to stack allocate. // Therefore we need to subtract 16 from regcnt here. assert(genIsValidIntReg(regCnt)); inst_RV_IV(INS_subs, regCnt, 16, emitActualTypeSize(type)); inst_JMP(EJ_jne, loop); } else { //At this point 'regCnt' is set to the total number of bytes to locAlloc. // //We don't need to zero out the allocated memory. However, we do have //to tickle the pages to ensure that ESP is always valid and is //in sync with the "stack guard page". Note that in the worst //case ESP is on the last byte of the guard page. Thus you must //touch ESP+0 first not ESP+x01000. // //Another subtlety is that you don't want ESP to be exactly on the //boundary of the guard page because PUSH is predecrement, thus //call setup would not touch the guard page but just beyond it // //Note that we go through a few hoops so that ESP never points to //illegal pages at any time during the ticking process // // neg REGCNT // add REGCNT, ESP // reg now holds ultimate ESP // jb loop // result is smaller than orignial ESP (no wrap around) // xor REGCNT, REGCNT, // Overflow, pick lowest possible number // loop: // test ESP, [ESP+0] // tickle the page // mov REGTMP, ESP // sub REGTMP, PAGE_SIZE // mov ESP, REGTMP // cmp ESP, REGCNT // jae loop // // mov ESP, REG // end: inst_RV(INS_NEG, regCnt, TYP_I_IMPL); inst_RV_RV(INS_adds, regCnt, REG_SPBASE, TYP_I_IMPL); inst_JMP(EJ_jb, loop); instGen_Set_Reg_To_Zero(EA_PTRSIZE, regCnt); genDefineTempLabel(loop); // Tickle the decremented value, and move back to ESP, // note that it has to be done BEFORE the update of ESP since // ESP might already be on the guard page. It is OK to leave // the final value of ESP on the guard page getEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0); // This is a harmless workaround to avoid the emitter trying to track the // decrement of the ESP - we do the subtraction in another reg instead // of adjusting ESP directly. assert(tmpRegsMask != RBM_NONE); assert(genCountBits(tmpRegsMask) == 1); regNumber regTmp = genRegNumFromMask(tmpRegsMask); inst_RV_RV(INS_mov, regTmp, REG_SPBASE, TYP_I_IMPL); inst_RV_IV(INS_sub, regTmp, CORINFO_PAGE_SIZE, EA_PTRSIZE); inst_RV_RV(INS_mov, REG_SPBASE, regTmp, TYP_I_IMPL); inst_RV_RV(INS_cmp, REG_SPBASE, regCnt, TYP_I_IMPL); inst_JMP(EJ_jae, loop); // Move the final value to ESP inst_RV_RV(INS_mov, REG_SPBASE, regCnt); } ALLOC_DONE: // Re-adjust SP to allocate PSPSym and out-going arg area if (stackAdjustment != 0) { assert((stackAdjustment % STACK_ALIGN) == 0); // This must be true for the stack to remain aligned assert(stackAdjustment > 0); getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, (int) stackAdjustment); #if FEATURE_EH_FUNCLETS // Write PSPSym to its new location. if (hasPspSym) { assert(genIsValidIntReg(pspSymReg)); getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, pspSymReg, compiler->lvaPSPSym, 0); } #endif // Return the stackalloc'ed address in result register. // TargetReg = RSP + stackAdjustment. // getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, targetReg, REG_SPBASE, (int) stackAdjustment); } else // stackAdjustment == 0 { // Move the final value of SP to targetReg inst_RV_RV(INS_mov, targetReg, REG_SPBASE); } BAILOUT: if (endLabel != nullptr) genDefineTempLabel(endLabel); // Write the lvaShadowSPfirst stack frame slot noway_assert(compiler->lvaLocAllocSPvar != BAD_VAR_NUM); getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, targetReg, compiler->lvaLocAllocSPvar, 0); #if STACK_PROBES if (compiler->opts.compNeedStackProbes) { genGenerateStackProbe(); } #endif #ifdef DEBUG // Update new ESP if (compiler->opts.compStackCheckOnRet) { noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC && compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister && compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame); getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, targetReg, compiler->lvaReturnEspCheck, 0); } #endif genProduceReg(tree); } // Generate code for InitBlk by performing a loop unroll // Preconditions: // a) Both the size and fill byte value are integer constants. // b) The size of the struct to initialize is smaller than INITBLK_UNROLL_LIMIT bytes. void CodeGen::genCodeForInitBlkUnroll(GenTreeInitBlk* initBlkNode) { #if 0 // Make sure we got the arguments of the initblk/initobj operation in the right registers GenTreePtr blockSize = initBlkNode->Size(); GenTreePtr dstAddr = initBlkNode->Dest(); GenTreePtr initVal = initBlkNode->InitVal(); #ifdef DEBUG assert(!dstAddr->isContained()); assert(!initVal->isContained()); assert(blockSize->isContained()); assert(blockSize->IsCnsIntOrI()); #endif // DEBUG size_t size = blockSize->gtIntCon.gtIconVal; assert(size <= INITBLK_UNROLL_LIMIT); assert(initVal->gtSkipReloadOrCopy()->IsCnsIntOrI()); emitter *emit = getEmitter(); genConsumeReg(initVal); genConsumeReg(dstAddr); // If the initVal was moved, or spilled and reloaded to a different register, // get the original initVal from below the GT_RELOAD, but only after capturing the valReg, // which needs to be the new register. regNumber valReg = initVal->gtRegNum; initVal = initVal->gtSkipReloadOrCopy(); #else // !0 NYI("genCodeForInitBlkUnroll"); #endif // !0 } // Generates code for InitBlk by calling the VM memset helper function. // Preconditions: // a) The size argument of the InitBlk is not an integer constant. // b) The size argument of the InitBlk is >= INITBLK_STOS_LIMIT bytes. void CodeGen::genCodeForInitBlk(GenTreeInitBlk* initBlkNode) { // Make sure we got the arguments of the initblk operation in the right registers GenTreePtr blockSize = initBlkNode->Size(); GenTreePtr dstAddr = initBlkNode->Dest(); GenTreePtr initVal = initBlkNode->InitVal(); #ifdef DEBUG assert(!dstAddr->isContained()); assert(!initVal->isContained()); assert(!blockSize->isContained()); // TODO-ARM64-CQ: When initblk loop unrolling is implemented // put this assert back on. #if 0 if (blockSize->IsCnsIntOrI()) { assert(blockSize->gtIntCon.gtIconVal >= INITBLK_UNROLL_LIMIT); } #endif // 0 #endif // DEBUG genConsumeRegAndCopy(blockSize, REG_ARG_2); genConsumeRegAndCopy(initVal, REG_ARG_1); genConsumeRegAndCopy(dstAddr, REG_ARG_0); genEmitHelperCall(CORINFO_HELP_MEMSET, 0, EA_UNKNOWN); } // Generate code for a load from some address + offset // base: tree node which can be either a local address or arbitrary node // offset: distance from the base from which to load void CodeGen::genCodeForLoadOffset(instruction ins, emitAttr size, regNumber dst, GenTree* base, unsigned offset) { #if 0 emitter *emit = getEmitter(); if (base->OperIsLocalAddr()) { if (base->gtOper == GT_LCL_FLD_ADDR) offset += base->gtLclFld.gtLclOffs; emit->emitIns_R_S(ins, size, dst, base->gtLclVarCommon.gtLclNum, offset); } else { emit->emitIns_R_AR(ins, size, dst, base->gtRegNum, offset); } #else // !0 NYI("genCodeForLoadOffset"); #endif // !0 } // Generate code for a store to some address + offset // base: tree node which can be either a local address or arbitrary node // offset: distance from the base from which to load void CodeGen::genCodeForStoreOffset(instruction ins, emitAttr size, regNumber src, GenTree* base, unsigned offset) { #if 0 emitter *emit = getEmitter(); if (base->OperIsLocalAddr()) { if (base->gtOper == GT_LCL_FLD_ADDR) offset += base->gtLclFld.gtLclOffs; emit->emitIns_S_R(ins, size, src, base->gtLclVarCommon.gtLclNum, offset); } else { emit->emitIns_AR_R(ins, size, src, base->gtRegNum, offset); } #else // !0 NYI("genCodeForStoreOffset"); #endif // !0 } // Generates CpBlk code by performing a loop unroll // Preconditions: // The size argument of the CpBlk node is a constant and <= 64 bytes. // This may seem small but covers >95% of the cases in several framework assemblies. void CodeGen::genCodeForCpBlkUnroll(GenTreeCpBlk* cpBlkNode) { #if 0 // Make sure we got the arguments of the cpblk operation in the right registers GenTreePtr blockSize = cpBlkNode->Size(); GenTreePtr dstAddr = cpBlkNode->Dest(); GenTreePtr srcAddr = cpBlkNode->Source(); assert(blockSize->IsCnsIntOrI()); size_t size = blockSize->gtIntCon.gtIconVal; assert(size <= CPBLK_UNROLL_LIMIT); emitter *emit = getEmitter(); if (!srcAddr->isContained()) genConsumeReg(srcAddr); if (!dstAddr->isContained()) genConsumeReg(dstAddr); unsigned offset = 0; // If the size of this struct is larger than 16 bytes // let's use SSE2 to be able to do 16 byte at a time // loads and stores. if (size >= XMM_REGSIZE_BYTES) { assert(cpBlkNode->gtRsvdRegs != RBM_NONE); assert(genCountBits(cpBlkNode->gtRsvdRegs) == 1); regNumber xmmReg = genRegNumFromMask(cpBlkNode->gtRsvdRegs); assert(genIsValidFloatReg(xmmReg)); size_t slots = size / XMM_REGSIZE_BYTES; while (slots-- > 0) { // Load genCodeForLoadOffset(INS_movdqu, EA_8BYTE, xmmReg, srcAddr, offset); // Store genCodeForStoreOffset(INS_movdqu, EA_8BYTE, xmmReg, dstAddr, offset); offset += XMM_REGSIZE_BYTES; } } // Fill the remainder (15 bytes or less) if there's one. if ((size & 0xf) != 0) { // Grab the integer temp register to emit the remaining loads and stores. regNumber tmpReg = genRegNumFromMask(cpBlkNode->gtRsvdRegs & RBM_ALLINT); if ((size & 8) != 0) { genCodeForLoadOffset(INS_mov, EA_8BYTE, tmpReg, srcAddr, offset); genCodeForStoreOffset(INS_mov, EA_8BYTE, tmpReg, dstAddr, offset); offset += 8; } if ((size & 4) != 0) { genCodeForLoadOffset(INS_mov, EA_4BYTE, tmpReg, srcAddr, offset); genCodeForStoreOffset(INS_mov, EA_4BYTE, tmpReg, dstAddr, offset); offset += 4; } if ((size & 2) != 0) { genCodeForLoadOffset(INS_mov, EA_2BYTE, tmpReg, srcAddr, offset); genCodeForStoreOffset(INS_mov, EA_2BYTE, tmpReg, dstAddr, offset); offset += 2; } if ((size & 1) != 0) { genCodeForLoadOffset(INS_mov, EA_1BYTE, tmpReg, srcAddr, offset); genCodeForStoreOffset(INS_mov, EA_1BYTE, tmpReg, dstAddr, offset); } } #else // !0 NYI("genCodeForCpBlkUnroll"); #endif // !0 } // Generate code for CpObj nodes wich copy structs that have interleaved // GC pointers. // For this case we'll generate a sequence of loads/stores in the case of struct // slots that don't contain GC pointers. The generated code will look like: // ldr tempReg, [R13, #8] // str tempReg, [R14, #8] // // In the case of a GC-Pointer we'll call the ByRef write barrier helper // who happens to use the same registers as the previous call to maintain // the same register requirements and register killsets: // bl CORINFO_HELP_ASSIGN_BYREF // // So finally an example would look like this: // ldr tempReg, [R13, #8] // str tempReg, [R14, #8] // bl CORINFO_HELP_ASSIGN_BYREF // ldr tempReg, [R13, #8] // str tempReg, [R14, #8] // bl CORINFO_HELP_ASSIGN_BYREF // ldr tempReg, [R13, #8] // str tempReg, [R14, #8] void CodeGen::genCodeForCpObj(GenTreeCpObj* cpObjNode) { // Make sure we got the arguments of the cpobj operation in the right registers GenTreePtr clsTok = cpObjNode->ClsTok(); GenTreePtr dstAddr = cpObjNode->Dest(); GenTreePtr srcAddr = cpObjNode->Source(); bool dstOnStack = dstAddr->OperIsLocalAddr(); #ifdef DEBUG assert(!dstAddr->isContained()); assert(!srcAddr->isContained()); // This GenTree node has data about GC pointers, this means we're dealing // with CpObj. assert(cpObjNode->gtGcPtrCount > 0); #endif // DEBUG // Consume these registers. // They may now contain gc pointers (depending on their type; gcMarkRegPtrVal will "do the right thing"). genConsumeRegAndCopy(srcAddr, REG_WRITE_BARRIER_SRC_BYREF); gcInfo.gcMarkRegPtrVal(REG_WRITE_BARRIER_SRC_BYREF, srcAddr->TypeGet()); genConsumeRegAndCopy(dstAddr, REG_WRITE_BARRIER_DST_BYREF); gcInfo.gcMarkRegPtrVal(REG_WRITE_BARRIER_DST_BYREF, dstAddr->TypeGet()); // Temp register used to perform the sequence of loads and stores. regNumber tmpReg = genRegNumFromMask(cpObjNode->gtRsvdRegs); #ifdef DEBUG assert(cpObjNode->gtRsvdRegs != RBM_NONE); assert(genCountBits(cpObjNode->gtRsvdRegs) == 1); assert(genIsValidIntReg(tmpReg)); #endif // DEBUG unsigned slots = cpObjNode->gtSlots; emitter *emit = getEmitter(); // If we can prove it's on the stack we don't need to use the write barrier. if (dstOnStack) { // TODO-ARM64-CQ: Consider using LDP/STP to save codesize. while (slots > 0) { emit->emitIns_R_R_I(INS_ldr, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE, INS_OPTS_POST_INDEX); emit->emitIns_R_R_I(INS_str, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_DST_BYREF, TARGET_POINTER_SIZE, INS_OPTS_POST_INDEX); slots--; } } else { BYTE* gcPtrs = cpObjNode->gtGcPtrs; unsigned gcPtrCount = cpObjNode->gtGcPtrCount; unsigned i = 0; while (i < slots) { switch (gcPtrs[i]) { case TYPE_GC_NONE: // TODO-ARM64-CQ: Consider using LDP/STP to save codesize in case of contigous NON-GC slots. emit->emitIns_R_R_I(INS_ldr, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE, INS_OPTS_POST_INDEX); emit->emitIns_R_R_I(INS_str, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_DST_BYREF, TARGET_POINTER_SIZE, INS_OPTS_POST_INDEX); break; default: // We have a GC pointer, call the memory barrier. genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, 0, EA_PTRSIZE); gcPtrCount--; break; } ++i; } assert(gcPtrCount == 0); } // Clear the gcInfo for REG_WRITE_BARRIER_SRC_BYREF and REG_WRITE_BARRIER_DST_BYREF. // While we normally update GC info prior to the last instruction that uses them, // these actually live into the helper call. gcInfo.gcMarkRegSetNpt(RBM_WRITE_BARRIER_SRC_BYREF | RBM_WRITE_BARRIER_DST_BYREF); } // Generate code for a CpBlk node by the means of the VM memcpy helper call // Preconditions: // a) The size argument of the CpBlk is not an integer constant // b) The size argument is a constant but is larger than CPBLK_MOVS_LIMIT bytes. void CodeGen::genCodeForCpBlk(GenTreeCpBlk* cpBlkNode) { // Make sure we got the arguments of the cpblk operation in the right registers GenTreePtr blockSize = cpBlkNode->Size(); GenTreePtr dstAddr = cpBlkNode->Dest(); GenTreePtr srcAddr = cpBlkNode->Source(); assert(!dstAddr->isContained()); assert(!srcAddr->isContained()); assert(!blockSize->isContained()); // Enable this when we support cpblk loop unrolling. #if 0 #ifdef DEBUG if (blockSize->IsCnsIntOrI()) { assert(blockSize->gtIntCon.gtIconVal >= CPBLK_UNROLL_LIMIT); } #endif // DEBUG #endif // 0 genConsumeRegAndCopy(blockSize, REG_ARG_2); genConsumeRegAndCopy(srcAddr, REG_ARG_1); genConsumeRegAndCopy(dstAddr, REG_ARG_0); genEmitHelperCall(CORINFO_HELP_MEMCPY, 0, EA_UNKNOWN); } // generate code do a switch statement based on a table of ip-relative offsets void CodeGen::genTableBasedSwitch(GenTree* treeNode) { NYI("Emit table based switch"); genConsumeOperands(treeNode->AsOp()); regNumber idxReg = treeNode->gtOp.gtOp1->gtRegNum; regNumber baseReg = treeNode->gtOp.gtOp2->gtRegNum; regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs); // load the ip-relative offset (which is relative to start of fgFirstBB) //getEmitter()->emitIns_R_ARX(INS_mov, EA_4BYTE, baseReg, baseReg, idxReg, 4, 0); // add it to the absolute address of fgFirstBB compiler->fgFirstBB->bbFlags |= BBF_JMP_TARGET; //getEmitter()->emitIns_R_L(INS_lea, EA_PTRSIZE, compiler->fgFirstBB, tmpReg); //getEmitter()->emitIns_R_R(INS_add, EA_PTRSIZE, baseReg, tmpReg); // jmp baseReg // getEmitter()->emitIns_R(INS_i_jmp, emitTypeSize(TYP_I_IMPL), baseReg); } // emits the table and an instruction to get the address of the first element void CodeGen::genJumpTable(GenTree* treeNode) { NYI("Emit Jump table"); noway_assert(compiler->compCurBB->bbJumpKind == BBJ_SWITCH); assert(treeNode->OperGet() == GT_JMPTABLE); unsigned jumpCount = compiler->compCurBB->bbJumpSwt->bbsCount; BasicBlock** jumpTable = compiler->compCurBB->bbJumpSwt->bbsDstTab; unsigned jmpTabOffs; unsigned jmpTabBase; jmpTabBase = getEmitter()->emitBBTableDataGenBeg(jumpCount, true); jmpTabOffs = 0; JITDUMP("\n J_M%03u_DS%02u LABEL DWORD\n", Compiler::s_compMethodsCount, jmpTabBase); for (unsigned i = 0; ibbFlags & BBF_JMP_TARGET); JITDUMP(" DD L_M%03u_BB%02u\n", Compiler::s_compMethodsCount, target->bbNum); getEmitter()->emitDataGenData(i, target); }; getEmitter()->emitDataGenEnd(); // Access to inline data is 'abstracted' by a special type of static member // (produced by eeFindJitDataOffs) which the emitter recognizes as being a reference // to constant data, not a real static field. getEmitter()->emitIns_R_C(INS_lea, emitTypeSize(TYP_I_IMPL), treeNode->gtRegNum, compiler->eeFindJitDataOffs(jmpTabBase), 0); genProduceReg(treeNode); } // generate code for the locked operations: // GT_LOCKADD, GT_XCHG, GT_XADD void CodeGen::genLockedInstructions(GenTree* treeNode) { #if 0 GenTree* data = treeNode->gtOp.gtOp2; GenTree* addr = treeNode->gtOp.gtOp1; regNumber targetReg = treeNode->gtRegNum; regNumber dataReg = data->gtRegNum; regNumber addrReg = addr->gtRegNum; instruction ins; // all of these nodes implicitly do an indirection on op1 // so create a temporary node to feed into the pattern matching GenTreeIndir i = indirForm(data->TypeGet(), addr); genConsumeReg(addr); // The register allocator should have extended the lifetime of the address // so that it is not used as the target. noway_assert(addrReg != targetReg); // If data is a lclVar that's not a last use, we'd better have allocated a register // for the result (except in the case of GT_LOCKADD which does not produce a register result). assert(targetReg != REG_NA || treeNode->OperGet() == GT_LOCKADD || !genIsRegCandidateLocal(data) || (data->gtFlags & GTF_VAR_DEATH) != 0); genConsumeIfReg(data); if (targetReg != REG_NA && dataReg != REG_NA && dataReg != targetReg) { inst_RV_RV(ins_Copy(data->TypeGet()), targetReg, dataReg); data->gtRegNum = targetReg; // TODO-ARM64-Cleanup: Consider whether it is worth it, for debugging purposes, to restore the // original gtRegNum on data, after calling emitInsBinary below. } switch (treeNode->OperGet()) { case GT_LOCKADD: instGen(INS_lock); ins = INS_add; break; case GT_XCHG: // lock is implied by xchg ins = INS_xchg; break; case GT_XADD: instGen(INS_lock); ins = INS_xadd; break; default: unreached(); } getEmitter()->emitInsBinary(ins, emitTypeSize(data), &i, data); if (treeNode->gtRegNum != REG_NA) { genProduceReg(treeNode); } #else // !0 NYI("genLockedInstructions"); #endif // !0 } // generate code for BoundsCheck nodes void CodeGen::genRangeCheck(GenTreePtr oper) { #ifdef FEATURE_SIMD noway_assert(oper->OperGet() == GT_ARR_BOUNDS_CHECK || oper->OperGet() == GT_SIMD_CHK); #else // !FEATURE_SIMD noway_assert(oper->OperGet() == GT_ARR_BOUNDS_CHECK); #endif // !FEATURE_SIMD GenTreeBoundsChk* bndsChk = oper->AsBoundsChk(); GenTreePtr arrLen = bndsChk->gtArrLen; GenTreePtr arrIndex = bndsChk->gtIndex; GenTreePtr arrRef = NULL; int lenOffset = 0; GenTree *src1, *src2; emitJumpKind jmpKind; genConsumeRegs(arrLen); genConsumeRegs(arrIndex); if (arrIndex->isContainedIntOrIImmed()) { src1 = arrLen; src2 = arrIndex; jmpKind = EJ_jbe; } else { src1 = arrIndex; src2 = arrLen; jmpKind = EJ_jae; } GenTreeIntConCommon* intConst = nullptr; if (src2->isContainedIntOrIImmed()) { intConst = src2->AsIntConCommon(); } if (intConst != nullptr) { getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, src1->gtRegNum, intConst->IconValue()); } else { getEmitter()->emitIns_R_R(INS_cmp, EA_4BYTE, src1->gtRegNum, src2->gtRegNum); } genJumpToThrowHlpBlk(jmpKind, Compiler::ACK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB); } //------------------------------------------------------------------------ // genOffsetOfMDArrayLowerBound: Returns the offset from the Array object to the // lower bound for the given dimension. // // Arguments: // elemType - the element type of the array // rank - the rank of the array // dimension - the dimension for which the lower bound offset will be returned. // // Return Value: // The offset. // TODO-Cleanup: move to CodeGenCommon.cpp // static unsigned CodeGen::genOffsetOfMDArrayLowerBound(var_types elemType, unsigned rank, unsigned dimension) { // Note that the lower bound and length fields of the Array object are always TYP_INT, even on 64-bit targets. return compiler->eeGetArrayDataOffset(elemType) + genTypeSize(TYP_INT) * (dimension + rank); } //------------------------------------------------------------------------ // genOffsetOfMDArrayLength: Returns the offset from the Array object to the // size for the given dimension. // // Arguments: // elemType - the element type of the array // rank - the rank of the array // dimension - the dimension for which the lower bound offset will be returned. // // Return Value: // The offset. // TODO-Cleanup: move to CodeGenCommon.cpp // static unsigned CodeGen::genOffsetOfMDArrayDimensionSize(var_types elemType, unsigned rank, unsigned dimension) { // Note that the lower bound and length fields of the Array object are always TYP_INT, even on 64-bit targets. return compiler->eeGetArrayDataOffset(elemType) + genTypeSize(TYP_INT) * dimension; } //------------------------------------------------------------------------ // genCodeForArrIndex: Generates code to bounds check the index for one dimension of an array reference, // producing the effective index by subtracting the lower bound. // // Arguments: // arrIndex - the node for which we're generating code // // Return Value: // None. // void CodeGen::genCodeForArrIndex(GenTreeArrIndex* arrIndex) { #if 0 GenTreePtr arrObj = arrIndex->ArrObj(); GenTreePtr indexNode = arrIndex->IndexExpr(); regNumber arrReg = genConsumeReg(arrObj); regNumber indexReg = genConsumeReg(indexNode); regNumber tgtReg = arrIndex->gtRegNum; unsigned dim = arrIndex->gtCurrDim; unsigned rank = arrIndex->gtArrRank; var_types elemType = arrIndex->gtArrElemType; noway_assert(tgtReg != REG_NA); // Subtract the lower bound for this dimension. // TODO-ARM64-CQ: make this contained if it's an immediate that fits. if (tgtReg != indexReg) { inst_RV_RV(INS_mov, tgtReg, indexReg, indexNode->TypeGet()); } getEmitter()->emitIns_R_AR(INS_sub, emitActualTypeSize(TYP_INT), tgtReg, arrReg, genOffsetOfMDArrayLowerBound(elemType, rank, dim)); getEmitter()->emitIns_R_AR(INS_cmp, emitActualTypeSize(TYP_INT), tgtReg, arrReg, genOffsetOfMDArrayDimensionSize(elemType, rank, dim)); genJumpToThrowHlpBlk(EJ_jae, Compiler::ACK_RNGCHK_FAIL); genProduceReg(arrIndex); #else // !0 NYI("genCodeForArrIndex"); #endif // !0 } //------------------------------------------------------------------------ // genCodeForArrOffset: Generates code to compute the flattened array offset for // one dimension of an array reference: // result = (prevDimOffset * dimSize) + effectiveIndex // where dimSize is obtained from the arrObj operand // // Arguments: // arrOffset - the node for which we're generating code // // Return Value: // None. // // Notes: // dimSize and effectiveIndex are always non-negative, the former by design, // and the latter because it has been normalized to be zero-based. void CodeGen::genCodeForArrOffset(GenTreeArrOffs* arrOffset) { #if 0 GenTreePtr offsetNode = arrOffset->gtOffset; GenTreePtr indexNode = arrOffset->gtIndex; GenTreePtr arrObj = arrOffset->gtArrObj; regNumber tgtReg = arrOffset->gtRegNum; noway_assert(tgtReg != REG_NA); unsigned dim = arrOffset->gtCurrDim; unsigned rank = arrOffset->gtArrRank; var_types elemType = arrOffset->gtArrElemType; // We will use a temp register for the offset*scale+effectiveIndex computation. regMaskTP tmpRegMask = arrOffset->gtRsvdRegs; regNumber tmpReg = genRegNumFromMask(tmpRegMask); if (!offsetNode->IsZero()) { // Evaluate tgtReg = offsetReg*dim_size + indexReg. // tmpReg is used to load dim_size and the result of the multiplication. // Note that dim_size will never be negative. regNumber offsetReg = genConsumeReg(offsetNode); regNumber indexReg = genConsumeReg(indexNode); regNumber arrReg = genConsumeReg(arrObj); getEmitter()->emitIns_R_AR(INS_mov, emitActualTypeSize(TYP_INT), tmpReg, arrReg, genOffsetOfMDArrayDimensionSize(elemType, rank, dim)); inst_RV_RV(INS_imul, tmpReg, offsetReg); if (tmpReg == tgtReg) { inst_RV_RV(INS_add, tmpReg, indexReg); } else { if (indexReg != tgtReg) { inst_RV_RV(INS_mov, tgtReg, indexReg, TYP_I_IMPL); } inst_RV_RV(INS_add, tgtReg, tmpReg); } } else { regNumber indexReg = genConsumeReg(indexNode); if (indexReg != tgtReg) { inst_RV_RV(INS_mov, tgtReg, indexReg, TYP_INT); } } genProduceReg(arrOffset); #else // !0 NYI("genCodeForArrOffset"); #endif // !0 } // make a temporary indir we can feed to pattern matching routines // in cases where we don't want to instantiate all the indirs that happen // // TODO-Cleanup: move to CodeGenCommon.cpp GenTreeIndir CodeGen::indirForm(var_types type, GenTree *base) { GenTreeIndir i(GT_IND, type, base, nullptr); i.gtRegNum = REG_NA; // has to be nonnull (because contained nodes can't be the last in block) // but don't want it to be a valid pointer i.gtNext = (GenTree *)(-1); return i; } // make a temporary int we can feed to pattern matching routines // in cases where we don't want to instantiate // // TODO-Cleanup: move to CodeGenCommon.cpp GenTreeIntCon CodeGen::intForm(var_types type, ssize_t value) { GenTreeIntCon i(type, value); i.gtRegNum = REG_NA; // has to be nonnull (because contained nodes can't be the last in block) // but don't want it to be a valid pointer i.gtNext = (GenTree *)(-1); return i; } instruction CodeGen::genGetInsForOper(genTreeOps oper, var_types type) { instruction ins = INS_brk; if (varTypeIsFloating(type)) { switch (oper) { case GT_ADD: ins = INS_fadd; break; case GT_SUB: ins = INS_fsub; break; case GT_MUL: ins = INS_fmul; break; case GT_DIV: ins = INS_fdiv; break; case GT_NEG: ins = INS_fneg; break; default: NYI("Unhandled oper in genGetInsForOper() - float"); unreached(); break; } } else { switch (oper) { case GT_ADD: ins = INS_add; break; case GT_AND: ins = INS_and; break; case GT_DIV: ins = INS_sdiv; break; case GT_UDIV: ins = INS_udiv; break; case GT_MUL: ins = INS_mul; break; case GT_LSH: ins = INS_lsl; break; case GT_NOT: ins = INS_mvn; break; case GT_OR: ins = INS_orr; break; case GT_RSH: ins = INS_asr; break; case GT_RSZ: ins = INS_lsr; break; case GT_SUB: ins = INS_sub; break; case GT_XOR: ins = INS_eor; break; default: NYI("Unhandled oper in genGetInsForOper() - integer"); unreached(); break; } } return ins; } /** Generates the code sequence for a GenTree node that * represents a bit shift operation (<<, >>, >>>). * * Arguments: operand: the value to be shifted by shiftBy bits. * shiftBy: the number of bits to shift the operand. * parent: the actual bitshift node (that specifies the * type of bitshift to perform. * * Preconditions: a) All GenTrees are register allocated. * b) Either shiftBy is a contained constant or * it's an expression sitting in RCX. * c) The actual bit shift node is not stack allocated * nor contained (not yet supported). */ void CodeGen::genCodeForShift(GenTreePtr operand, GenTreePtr shiftBy, GenTreePtr parent) { var_types targetType = parent->TypeGet(); genTreeOps oper = parent->OperGet(); instruction ins = genGetInsForOper(oper, targetType); emitAttr size = emitTypeSize(parent); assert(parent->gtRegNum != REG_NA); genConsumeReg(operand); if (!shiftBy->IsCnsIntOrI()) { genConsumeReg(shiftBy); getEmitter()->emitIns_R_R_R(ins, size, parent->gtRegNum, operand->gtRegNum, shiftBy->gtRegNum); } else { getEmitter()->emitIns_R_R_I(ins, size, parent->gtRegNum, operand->gtRegNum, shiftBy->gtIntCon.gtIconVal); } genProduceReg(parent); } // TODO-Cleanup: move to CodeGenCommon.cpp void CodeGen::genUnspillRegIfNeeded(GenTree *tree) { regNumber dstReg = tree->gtRegNum; GenTree* unspillTree = tree; if (tree->gtOper == GT_RELOAD) { unspillTree = tree->gtOp.gtOp1; } if (unspillTree->gtFlags & GTF_SPILLED) { if (genIsRegCandidateLocal(unspillTree)) { // Reset spilled flag, since we are going to load a local variable from its home location. unspillTree->gtFlags &= ~GTF_SPILLED; GenTreeLclVarCommon* lcl = unspillTree->AsLclVarCommon(); LclVarDsc* varDsc = &compiler->lvaTable[lcl->gtLclNum]; // Load local variable from its home location. inst_RV_TT(ins_Load(unspillTree->gtType, compiler->isSIMDTypeLocalAligned(lcl->gtLclNum)), dstReg, unspillTree); unspillTree->SetInReg(); // TODO-Review: We would like to call: // genUpdateRegLife(varDsc, /*isBorn*/ true, /*isDying*/ false DEBUGARG(tree)); // instead of the following code, but this ends up hitting this assert: // assert((regSet.rsMaskVars & regMask) == 0); // due to issues with LSRA resolution moves. // So, just force it for now. This probably indicates a condition that creates a GC hole! // // Extra note: I think we really want to call something like gcInfo.gcUpdateForRegVarMove, // because the variable is not really going live or dead, but that method is somewhat poorly // factored because it, in turn, updates rsMaskVars which is part of RegSet not GCInfo. // This code exists in other CodeGen*.cpp files. // Don't update the variable's location if we are just re-spilling it again. if ((unspillTree->gtFlags & GTF_SPILL) == 0) { genUpdateVarReg(varDsc, tree); #ifdef DEBUG if (VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex)) { JITDUMP("\t\t\t\t\t\t\tRemoving V%02u from gcVarPtrSetCur\n", lcl->gtLclNum); } #endif // DEBUG VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex); #ifdef DEBUG if (compiler->verbose) { printf("\t\t\t\t\t\t\tV%02u in reg ", lcl->gtLclNum); varDsc->PrintVarReg(); printf(" is becoming live "); compiler->printTreeID(unspillTree); printf("\n"); } #endif // DEBUG regSet.rsMaskVars |= genGetRegMask(varDsc); } } else { TempDsc* t = regSet.rsUnspillInPlace(unspillTree); getEmitter()->emitIns_R_S(ins_Load(unspillTree->gtType), emitActualTypeSize(unspillTree->gtType), dstReg, t->tdTempNum(), 0); compiler->tmpRlsTemp(t); unspillTree->gtFlags &= ~GTF_SPILLED; unspillTree->SetInReg(); } gcInfo.gcMarkRegPtrVal(dstReg, unspillTree->TypeGet()); } } // Do Liveness update for a subnodes that is being consumed by codegen // including the logic for reload in case is needed and also takes care // of locating the value on the desired register. void CodeGen::genConsumeRegAndCopy(GenTree *tree, regNumber needReg) { regNumber treeReg = genConsumeReg(tree); if (treeReg != needReg) { var_types targetType = tree->TypeGet(); inst_RV_RV(ins_Copy(targetType), needReg, treeReg, targetType); } } void CodeGen::genRegCopy(GenTree* treeNode) { assert(treeNode->OperGet() == GT_COPY); var_types targetType = treeNode->TypeGet(); regNumber targetReg = treeNode->gtRegNum; assert(targetReg != REG_NA); GenTree* op1 = treeNode->gtOp.gtOp1; // Check whether this node and the node from which we're copying the value have the same // register type. // This can happen if (currently iff) we have a SIMD vector type that fits in an integer // register, in which case it is passed as an argument, or returned from a call, // in an integer register and must be copied if it's in an xmm register. if (varTypeIsFloating(treeNode) != varTypeIsFloating(op1)) { #if 0 instruction ins; regNumber fpReg; regNumber intReg; if(varTypeIsFloating(treeNode)) { ins = INS_mov_i2xmm; fpReg = targetReg; intReg = op1->gtRegNum; } else { ins = INS_mov_xmm2i; intReg = targetReg; fpReg = op1->gtRegNum; } inst_RV_RV(ins, fpReg, intReg, targetType); #else NYI_ARM64("CodeGen - FP/Int RegCopy"); #endif } else { inst_RV_RV(ins_Copy(targetType), targetReg, genConsumeReg(op1), targetType); } if (op1->IsLocal()) { // The lclVar will never be a def. // If it is a last use, the lclVar will be killed by genConsumeReg(), as usual, and genProduceReg will // appropriately set the gcInfo for the copied value. // If not, there are two cases we need to handle: // - If this is a TEMPORARY copy (indicated by the GTF_VAR_DEATH flag) the variable // will remain live in its original register. // genProduceReg() will appropriately set the gcInfo for the copied value, // and genConsumeReg will reset it. // - Otherwise, we need to update register info for the lclVar. GenTreeLclVarCommon* lcl = op1->AsLclVarCommon(); assert((lcl->gtFlags & GTF_VAR_DEF) == 0); if ((lcl->gtFlags & GTF_VAR_DEATH) == 0 && (treeNode->gtFlags & GTF_VAR_DEATH) == 0) { LclVarDsc* varDsc = &compiler->lvaTable[lcl->gtLclNum]; // If we didn't just spill it (in genConsumeReg, above), then update the register info if (varDsc->lvRegNum != REG_STK) { // The old location is dying genUpdateRegLife(varDsc, /*isBorn*/ false, /*isDying*/ true DEBUGARG(op1)); gcInfo.gcMarkRegSetNpt(genRegMask(op1->gtRegNum)); genUpdateVarReg(varDsc, treeNode); // The new location is going live genUpdateRegLife(varDsc, /*isBorn*/ true, /*isDying*/ false DEBUGARG(treeNode)); } } } genProduceReg(treeNode); } // Do liveness update for a subnode that is being consumed by codegen. // TODO-Cleanup: move to CodeGenCommon.cpp regNumber CodeGen::genConsumeReg(GenTree *tree) { if (tree->OperGet() == GT_COPY) { genRegCopy(tree); } // Handle the case where we have a lclVar that needs to be copied before use (i.e. because it // interferes with one of the other sources (or the target, if it's a "delayed use" register)). // TODO-Cleanup: This is a special copyReg case in LSRA - consider eliminating these and // always using GT_COPY to make the lclVar location explicit. // Note that we have to do this before calling genUpdateLife because otherwise if we spill it // the lvRegNum will be set to REG_STK and we will lose track of what register currently holds // the lclVar (normally when a lclVar is spilled it is then used from its former register // location, which matches the gtRegNum on the node). // (Note that it doesn't matter if we call this before or after genUnspillRegIfNeeded // because if it's on the stack it will always get reloaded into tree->gtRegNum). if (genIsRegCandidateLocal(tree)) { GenTreeLclVarCommon *lcl = tree->AsLclVarCommon(); LclVarDsc* varDsc = &compiler->lvaTable[lcl->GetLclNum()]; if ((varDsc->lvRegNum != REG_STK) && (varDsc->lvRegNum != tree->gtRegNum)) { inst_RV_RV(ins_Copy(tree->TypeGet()), tree->gtRegNum, varDsc->lvRegNum); } } genUnspillRegIfNeeded(tree); // genUpdateLife() will also spill local var if marked as GTF_SPILL by calling CodeGen::genSpillVar genUpdateLife(tree); assert(tree->gtRegNum != REG_NA); // there are three cases where consuming a reg means clearing the bit in the live mask // 1. it was not produced by a local // 2. it was produced by a local that is going dead // 3. it was produced by a local that does not live in that reg (like one allocated on the stack) if (genIsRegCandidateLocal(tree)) { GenTreeLclVarCommon *lcl = tree->AsLclVarCommon(); LclVarDsc* varDsc = &compiler->lvaTable[lcl->GetLclNum()]; assert(varDsc->lvLRACandidate); if ((tree->gtFlags & GTF_VAR_DEATH) != 0) { gcInfo.gcMarkRegSetNpt(genRegMask(varDsc->lvRegNum)); } else if (varDsc->lvRegNum == REG_STK) { // We have loaded this into a register only temporarily gcInfo.gcMarkRegSetNpt(genRegMask(tree->gtRegNum)); } } else { gcInfo.gcMarkRegSetNpt(genRegMask(tree->gtRegNum)); } return tree->gtRegNum; } // Do liveness update for an address tree: one of GT_LEA, GT_LCL_VAR, or GT_CNS_INT (for call indirect). // TODO-Cleanup: move to CodeGenCommon.cpp void CodeGen::genConsumeAddress(GenTree* addr) { if (addr->OperGet() == GT_LEA) { genConsumeAddrMode(addr->AsAddrMode()); } else if (!addr->isContained()) { genConsumeReg(addr); } } // do liveness update for a subnode that is being consumed by codegen // TODO-Cleanup: move to CodeGenCommon.cpp void CodeGen::genConsumeAddrMode(GenTreeAddrMode *addr) { if (addr->Base()) genConsumeReg(addr->Base()); if (addr->Index()) genConsumeReg(addr->Index()); } // TODO-Cleanup: move to CodeGenCommon.cpp void CodeGen::genConsumeRegs(GenTree* tree) { if (tree->isContained()) { if (tree->isIndir()) { genConsumeAddress(tree->AsIndir()->Addr()); } else if (tree->OperGet() == GT_AND) { // This is the special contained GT_AND that we created in Lowering::LowerCmp() // Now we need to consume the operands of the GT_AND node. genConsumeOperands(tree->AsOp()); } else { assert(tree->OperIsLeaf()); } } else { genConsumeReg(tree); } } //------------------------------------------------------------------------ // genConsumeOperands: Do liveness update for the operands of a unary or binary tree // // Arguments: // tree - the GenTreeOp whose operands will have their liveness updated. // // Return Value: // None. // // Notes: // Note that this logic is localized here because we must do the liveness update in // the correct execution order. This is important because we may have two operands // that involve the same lclVar, and if one is marked "lastUse" we must handle it // after the first. // TODO-Cleanup: move to CodeGenCommon.cpp void CodeGen::genConsumeOperands(GenTreeOp* tree) { GenTree* firstOp = tree->gtOp1; GenTree* secondOp = tree->gtOp2; if ((tree->gtFlags & GTF_REVERSE_OPS) != 0) { assert(secondOp != nullptr); firstOp = secondOp; secondOp = tree->gtOp1; } if (firstOp != nullptr) { genConsumeRegs(firstOp); } if (secondOp != nullptr) { genConsumeRegs(secondOp); } } // do liveness update for register produced by the current node in codegen // TODO-Cleanup: move to CodeGenCommon.cpp void CodeGen::genProduceReg(GenTree *tree) { if (tree->gtFlags & GTF_SPILL) { if (genIsRegCandidateLocal(tree)) { // Store local variable to its home location. tree->gtFlags &= ~GTF_REG_VAL; inst_TT_RV(ins_Store(tree->gtType, compiler->isSIMDTypeLocalAligned(tree->gtLclVarCommon.gtLclNum)), tree, tree->gtRegNum); } else { tree->SetInReg(); regSet.rsSpillTree(tree->gtRegNum, tree); tree->gtFlags |= GTF_SPILLED; tree->gtFlags &= ~GTF_SPILL; gcInfo.gcMarkRegSetNpt(genRegMask(tree->gtRegNum)); return; } } genUpdateLife(tree); // If we've produced a register, mark it as a pointer, as needed. if (tree->gtHasReg()) { // We only mark the register in the following cases: // 1. It is not a register candidate local. In this case, we're producing a // register from a local, but the local is not a register candidate. Thus, // we must be loading it as a temp register, and any "last use" flag on // the register wouldn't be relevant. // 2. The register candidate local is going dead. There's no point to mark // the register as live, with a GC pointer, if the variable is dead. if (!genIsRegCandidateLocal(tree) || ((tree->gtFlags & GTF_VAR_DEATH) == 0)) { gcInfo.gcMarkRegPtrVal(tree->gtRegNum, tree->TypeGet()); } } tree->SetInReg(); } // transfer gc/byref status of src reg to dst reg // TODO-Cleanup: move to CodeGenCommon.cpp void CodeGen::genTransferRegGCState(regNumber dst, regNumber src) { regMaskTP srcMask = genRegMask(src); regMaskTP dstMask = genRegMask(dst); if (gcInfo.gcRegGCrefSetCur & srcMask) { gcInfo.gcMarkRegSetGCref(dstMask); } else if (gcInfo.gcRegByrefSetCur & srcMask) { gcInfo.gcMarkRegSetByref(dstMask); } else { gcInfo.gcMarkRegSetNpt(dstMask); } } // generates an ip-relative call or indirect call via reg ('call reg') // pass in 'addr' for a relative call or 'base' for a indirect register call // methHnd - optional, only used for pretty printing // retSize - emitter type of return for GC purposes, should be EA_BYREF, EA_GCREF, or EA_PTRSIZE(not GC) // TODO-Cleanup: move to CodeGenCommon.cpp void CodeGen::genEmitCall(int callType, CORINFO_METHOD_HANDLE methHnd, INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) void* addr, emitAttr retSize, IL_OFFSETX ilOffset, regNumber base, bool isJump, bool isNoGC) { getEmitter()->emitIns_Call(emitter::EmitCallType(callType), methHnd, INDEBUG_LDISASM_COMMA(sigInfo) addr, 0, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, ilOffset, base, REG_NA, 0, 0, isJump, emitter::emitNoGChelper(compiler->eeGetHelperNum(methHnd))); } // generates an indirect call via addressing mode (call []) given an indir node // methHnd - optional, only used for pretty printing // retSize - emitter type of return for GC purposes, should be EA_BYREF, EA_GCREF, or EA_PTRSIZE(not GC) // TODO-Cleanup: move to CodeGenCommon.cpp void CodeGen::genEmitCall(int callType, CORINFO_METHOD_HANDLE methHnd, INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) GenTreeIndir* indir, emitAttr retSize, IL_OFFSETX ilOffset) { genConsumeAddress(indir->Addr()); getEmitter()->emitIns_Call(emitter::EmitCallType(callType), methHnd, INDEBUG_LDISASM_COMMA(sigInfo) nullptr, 0, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, ilOffset, indir->Base() ? indir->Base()->gtRegNum : REG_NA, indir->Index() ? indir->Index()->gtRegNum : REG_NA, indir->Scale(), indir->Offset()); } // Produce code for a GT_CALL node void CodeGen::genCallInstruction(GenTreePtr node) { GenTreeCall *call = node->AsCall(); assert(call->gtOper == GT_CALL); gtCallTypes callType = (gtCallTypes)call->gtCallType; IL_OFFSETX ilOffset = BAD_IL_OFFSET; // all virtuals should have been expanded into a control expression assert (!call->IsVirtual() || call->gtControlExpr || call->gtCallAddr); // Consume all the arg regs for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext()) { assert(list->IsList()); GenTreePtr argNode = list->Current(); fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, argNode->gtSkipReloadOrCopy()); assert(curArgTabEntry); if (curArgTabEntry->regNum == REG_STK) continue; regNumber argReg = curArgTabEntry->regNum; genConsumeReg(argNode); if (argNode->gtRegNum != argReg) { inst_RV_RV(ins_Move_Extend(argNode->TypeGet(), argNode->InReg()), argReg, argNode->gtRegNum); } // In the case of a varargs call, // the ABI dictates that if we have floating point args, // we must pass the enregistered arguments in both the // integer and floating point registers so, let's do that. if (call->IsVarargs() && varTypeIsFloating(argNode)) { NYI_ARM64("CodeGen - IsVarargs"); } } // Insert a null check on "this" pointer if asked. if (call->NeedsNullCheck()) { const regNumber regThis = genGetThisArgReg(call); getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_ZR, regThis, 0); } // Either gtControlExpr != null or gtCallAddr != null or it is a direct non-virtual call to a user or helper method. CORINFO_METHOD_HANDLE methHnd; GenTree* target = call->gtControlExpr; if (callType == CT_INDIRECT) { assert(target == nullptr); target = call->gtCall.gtCallAddr; methHnd = nullptr; } else { methHnd = call->gtCallMethHnd; } CORINFO_SIG_INFO* sigInfo = nullptr; #ifdef DEBUG // Pass the call signature information down into the emitter so the emitter can associate // native call sites with the signatures they were generated from. if (callType != CT_HELPER) { sigInfo = call->callSig; } #endif // DEBUG // If fast tail call, then we are done. In this case we setup the args (both reg args // and stack args in incoming arg area) and call target in rax. Epilog sequence would // generate "br x0". if (call->IsFastTailCall()) { NYI_ARM64("CodeGen - IsFastTailCall"); // Don't support fast tail calling JIT helpers assert(callType != CT_HELPER); // Fast tail calls materialize call target either in gtControlExpr or in gtCallAddr. assert(target != nullptr); genConsumeReg(target); #if 0 if (target->gtRegNum != REG_RAX) { inst_RV_RV(INS_mov, REG_RAX, target->gtRegNum); } #endif return; } // For a pinvoke to unmanged code we emit a label to clear // the GC pointer state before the callsite. // We can't utilize the typical lazy killing of GC pointers // at (or inside) the callsite. if (call->IsUnmanaged()) { genDefineTempLabel(genCreateTempLabel()); } // Determine return value size. emitAttr retSize = EA_PTRSIZE; if (call->gtType == TYP_REF || call->gtType == TYP_ARRAY) { retSize = EA_GCREF; } else if (call->gtType == TYP_BYREF) { retSize = EA_BYREF; } #ifdef DEBUGGING_SUPPORT // We need to propagate the IL offset information to the call instruction, so we can emit // an IL to native mapping record for the call, to support managed return value debugging. // We don't want tail call helper calls that were converted from normal calls to get a record, // so we skip this hash table lookup logic in that case. if (compiler->opts.compDbgInfo && compiler->genCallSite2ILOffsetMap != nullptr && !call->IsTailCall()) { (void)compiler->genCallSite2ILOffsetMap->Lookup(call, &ilOffset); } #endif // DEBUGGING_SUPPORT if (target != nullptr) { // For Arm64 a call target can not be a contained indirection assert(!target->isContainedIndir()); // We have already generated code for gtControlExpr evaluating it into a register. // We just need to emit "call reg" in this case. // assert(genIsValidIntReg(target->gtRegNum)); genEmitCall(emitter::EC_INDIR_R, methHnd, INDEBUG_LDISASM_COMMA(sigInfo) nullptr, //addr retSize, ilOffset, genConsumeReg(target)); } else { // Generate a direct call to a non-virtual user defined or helper method assert(callType == CT_HELPER || callType == CT_USER_FUNC); void *addr = nullptr; if (callType == CT_HELPER) { // Direct call to a helper method. CorInfoHelpFunc helperNum = compiler->eeGetHelperNum(methHnd); noway_assert(helperNum != CORINFO_HELP_UNDEF); void *pAddr = nullptr; addr = compiler->compGetHelperFtn(helperNum, (void **)&pAddr); if (addr == nullptr) { addr = pAddr; } } else { // Direct call to a non-virtual user function. CORINFO_ACCESS_FLAGS aflags = CORINFO_ACCESS_ANY; if (call->IsSameThis()) { aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_THIS); } if ((call->NeedsNullCheck()) == 0) { aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_NONNULL); } CORINFO_CONST_LOOKUP addrInfo; compiler->info.compCompHnd->getFunctionEntryPoint(methHnd, &addrInfo, aflags); addr = addrInfo.addr; } #if 0 // Use this path if you want to load an absolute call target using // a sequence of movs followed by an indirect call (blr instruction) // Load the call target address in x16 instGen_Set_Reg_To_Imm(EA_8BYTE, REG_IP0, (ssize_t) addr); // indirect call to constant address in IP0 genEmitCall(emitter::EC_INDIR_R, methHnd, INDEBUG_LDISASM_COMMA(sigInfo) nullptr, //addr retSize, ilOffset, REG_IP0); #else // Non-virtual direct call to known addresses genEmitCall(emitter::EC_FUNC_TOKEN, methHnd, INDEBUG_LDISASM_COMMA(sigInfo) addr, retSize, ilOffset); #endif } // if it was a pinvoke we may have needed to get the address of a label if (genPendingCallLabel) { assert(call->IsUnmanaged()); genDefineTempLabel(genPendingCallLabel); genPendingCallLabel = nullptr; } // Update GC info: // All Callee arg registers are trashed and no longer contain any GC pointers. // TODO-ARM64-Bug?: As a matter of fact shouldn't we be killing all of callee trashed regs here? // For now we will assert that other than arg regs gc ref/byref set doesn't contain any other // registers from RBM_CALLEE_TRASH assert((gcInfo.gcRegGCrefSetCur & (RBM_CALLEE_TRASH & ~RBM_ARG_REGS)) == 0); assert((gcInfo.gcRegByrefSetCur & (RBM_CALLEE_TRASH & ~RBM_ARG_REGS)) == 0); gcInfo.gcRegGCrefSetCur &= ~RBM_ARG_REGS; gcInfo.gcRegByrefSetCur &= ~RBM_ARG_REGS; var_types returnType = call->TypeGet(); if (returnType != TYP_VOID) { regNumber returnReg = (varTypeIsFloating(returnType) ? REG_FLOATRET : REG_INTRET); if (call->gtRegNum != returnReg) { inst_RV_RV(ins_Copy(returnType), call->gtRegNum, returnReg, returnType); } genProduceReg(call); } // If there is nothing next, that means the result is thrown away, so this value is not live. // However, for minopts or debuggable code, we keep it live to support managed return value debugging. if ((call->gtNext == nullptr) && !compiler->opts.MinOpts() && !compiler->opts.compDbgCode) { gcInfo.gcMarkRegSetNpt(RBM_INTRET); } } // Produce code for a GT_JMP node. // The arguments of the caller needs to be transferred to the callee before exiting caller. // The actual jump to callee is generated as part of caller epilog sequence. // Therefore the codegen of GT_JMP is to ensure that the callee arguments are correctly setup. void CodeGen::genJmpMethod(GenTreePtr jmp) { assert(jmp->OperGet() == GT_JMP); assert(compiler->compJmpOpUsed); // If no arguments, nothing to do if (compiler->info.compArgsCount == 0) { return; } #if 0 // Make sure register arguments are in their initial registers // and stack arguments are put back as well. unsigned varNum; LclVarDsc* varDsc; // First move any en-registered stack arguments back to the stack. // At the same time any reg arg not in correct reg is moved back to its stack location. // // We are not strictly required to spill reg args that are not in the desired reg for a jmp call // But that would require us to deal with circularity while moving values around. Spilling // to stack makes the implementation simple, which is not a bad trade off given Jmp calls // are not frequent. for (varNum = 0; (varNum < compiler->info.compArgsCount); varNum++) { varDsc = compiler->lvaTable + varNum; if (varDsc->lvPromoted) { noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here unsigned fieldVarNum = varDsc->lvFieldLclStart; varDsc = compiler->lvaTable + fieldVarNum; } noway_assert(varDsc->lvIsParam); if (varDsc->lvIsRegArg && (varDsc->lvRegNum != REG_STK)) { // Skip reg args which are already in its right register for jmp call. // If not, we will spill such args to their stack locations. // // If we need to generate a tail call profiler hook, then spill all // arg regs to free them up for the callback. if (!compiler->compIsProfilerHookNeeded() && (varDsc->lvRegNum == varDsc->lvArgReg)) continue; } else if (varDsc->lvRegNum == REG_STK) { // Skip args which are currently living in stack. continue; } // If we came here it means either a reg argument not in the right register or // a stack argument currently living in a register. In either case the following // assert should hold. assert(varDsc->lvRegNum != REG_STK); var_types loadType = varDsc->lvaArgType(); getEmitter()->emitIns_S_R(ins_Store(loadType), emitTypeSize(loadType), varDsc->lvRegNum, varNum, 0); // Update lvRegNum life and GC info to indicate lvRegNum is dead and varDsc stack slot is going live. // Note that we cannot modify varDsc->lvRegNum here because another basic block may not be expecting it. // Therefore manually update life of varDsc->lvRegNum. regMaskTP tempMask = genRegMask(varDsc->lvRegNum); regSet.rsMaskVars &= ~tempMask; gcInfo.gcMarkRegSetNpt(tempMask); if (varDsc->lvTracked) { VarSetOps::AddElemD(compiler, gcInfo.gcVarPtrSetCur, varNum); } } #ifdef PROFILING_SUPPORTED // At this point all arg regs are free. // Emit tail call profiler callback. genProfilingLeaveCallback(CORINFO_HELP_PROF_FCN_TAILCALL); #endif // Next move any un-enregistered register arguments back to their register. regMaskTP fixedIntArgMask = RBM_NONE; // tracks the int arg regs occupying fixed args in case of a vararg method. unsigned firstArgVarNum = BAD_VAR_NUM; // varNum of the first argument in case of a vararg method. for (varNum = 0; (varNum < compiler->info.compArgsCount); varNum++) { varDsc = compiler->lvaTable + varNum; if (varDsc->lvPromoted) { noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here unsigned fieldVarNum = varDsc->lvFieldLclStart; varDsc = compiler->lvaTable + fieldVarNum; } noway_assert(varDsc->lvIsParam); // Skip if arg not passed in a register. if (!varDsc->lvIsRegArg) continue; // Register argument noway_assert(isRegParamType(genActualType(varDsc->TypeGet()))); // Is register argument already in the right register? // If not load it from its stack location. var_types loadType = varDsc->lvaArgType(); regNumber argReg = varDsc->lvArgReg; // incoming arg register if (varDsc->lvRegNum != argReg) { assert(genIsValidReg(argReg)); getEmitter()->emitIns_R_S(ins_Load(loadType), emitTypeSize(loadType), argReg, varNum, 0); // Update argReg life and GC Info to indicate varDsc stack slot is dead and argReg is going live. // Note that we cannot modify varDsc->lvRegNum here because another basic block may not be expecting it. // Therefore manually update life of argReg. Note that GT_JMP marks the end of the basic block // and after which reg life and gc info will be recomputed for the new block in genCodeForBBList(). regSet.rsMaskVars |= genRegMask(argReg); gcInfo.gcMarkRegPtrVal(argReg, loadType); if (varDsc->lvTracked) { VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varNum); } } // In case of a jmp call to a vararg method also pass the float/double arg in the corresponding int arg register. if (compiler->info.compIsVarArgs) { regNumber intArgReg; if (varTypeIsFloating(loadType)) { intArgReg = compiler->getCallArgIntRegister(argReg); inst_RV_RV(INS_mov_xmm2i, argReg, intArgReg, loadType); } else { intArgReg = argReg; } fixedIntArgMask |= genRegMask(intArgReg); if (intArgReg == REG_ARG_0) { assert(firstArgVarNum == BAD_VAR_NUM); firstArgVarNum = varNum; } } } // Jmp call to a vararg method - if the method has fewer than 4 fixed arguments, // load the remaining arg registers (both int and float) from the corresponding // shadow stack slots. This is for the reason that we don't know the number and type // of non-fixed params passed by the caller, therefore we have to assume the worst case // of caller passing float/double args both in int and float arg regs. // // The caller could have passed gc-ref/byref type var args. Since these are var args // the callee no way of knowing their gc-ness. Therefore, mark the region that loads // remaining arg registers from shadow stack slots as non-gc interruptible. if (fixedIntArgMask != RBM_NONE) { assert(compiler->info.compIsVarArgs); assert(firstArgVarNum != BAD_VAR_NUM); regMaskTP remainingIntArgMask = RBM_ARG_REGS & ~fixedIntArgMask; if (remainingIntArgMask != RBM_NONE) { getEmitter()->emitDisableGC(); for (int argNum = 0, argOffset=0; argNum < MAX_REG_ARG; ++argNum) { regNumber argReg = intArgRegs[argNum]; regMaskTP argRegMask = genRegMask(argReg); if ((remainingIntArgMask & argRegMask) != 0) { remainingIntArgMask &= ~argRegMask; getEmitter()->emitIns_R_S(INS_mov, EA_8BYTE, argReg, firstArgVarNum, argOffset); // also load it in corresponding float arg reg regNumber floatReg = compiler->getCallArgFloatRegister(argReg); inst_RV_RV(INS_mov_i2xmm, floatReg, argReg); } argOffset += REGSIZE_BYTES; } getEmitter()->emitEnableGC(); } } #else // !0 NYI("genJmpMethod"); #endif // !0 } // produce code for a GT_LEA subnode void CodeGen::genLeaInstruction(GenTreeAddrMode *lea) { genConsumeOperands(lea); emitter *emit = getEmitter(); emitAttr size = emitTypeSize(lea); // In ARM64 we can only load addresses of the form: // // [Base + index*scale] // [Base + Offset] // [Literal] (PC-Relative) // // So for the case of a LEA node of the form [Base + Index*Scale + Offset] we will generate: // destReg = baseReg + indexReg * scale; // destReg = destReg + offset; // // TODO-ARM64-CQ: The purpose of the GT_LEA node is to directly reflect a single target architecture // addressing mode instruction. Currently we're 'cheating' by producing one or more // instructions to generate the addressing mode so we need to modify lowering to // produce LEAs that are a 1:1 relationship to the ARM64 architecture. if (lea->Base() && lea->Index()) { DWORD lsl; assert(isPow2(lea->gtScale)); BitScanForward(&lsl, lea->gtScale); assert(lsl <= 4); // First, generate code to load rd = [base + index*scale] if (lsl > 0) { emit->emitIns_R_R_R_I(INS_add, size, lea->gtRegNum, lea->Base()->gtRegNum, lea->Index()->gtRegNum, lsl, INS_OPTS_LSL); } else { emit->emitIns_R_R_R(INS_add, size, lea->gtRegNum, lea->Base()->gtRegNum, lea->Index()->gtRegNum); } // If the offset is not zero, then compute rd = [rd + offset] if (lea->gtOffset != 0) { emit->emitIns_R_R_I(INS_add, size, lea->gtRegNum, lea->gtRegNum, (int) lea->gtOffset); } } else if (lea->Base()) { if (lea->gtOffset != 0) { emit->emitIns_R_R_I(INS_add, size, lea->gtRegNum, lea->Base()->gtRegNum, (int) lea->gtOffset); } else { emit->emitIns_R_R(INS_mov, size, lea->gtRegNum, lea->Base()->gtRegNum); } } else if (lea->Index()) { // If we encounter a GT_LEA node without a base it means it came out // when attempting to optimize an arbitrary arithmetic expression during lower. // This is currently disabled in ARM64 since we need to adjust lower to account // for the simpler instructions ARM64 supports. // TODO-ARM64-CQ: Fix this and let LEA optimize arithmetic trees too. assert(!"We shouldn't see a baseless address computation during CodeGen for ARM64"); } genProduceReg(lea); } // Generate code to materialize a condition into a register // (the condition codes must already have been appropriately set) void CodeGen::genSetRegToCond(regNumber dstReg, GenTreePtr tree) { // Get the "jmpKind" using the gtOper kind // Note that whether it is an unsigned cmp is governed by the GTF_UNSIGNED flags emitJumpKind jmpKind = genJumpKindForOper(tree->gtOper, (tree->gtFlags & GTF_UNSIGNED) != 0); inst_SET(jmpKind, dstReg); } //------------------------------------------------------------------------ // genIntToIntCast: Generate code for an integer cast // This method handles integer overflow checking casts // as well as ordinary integer casts. // // Arguments: // treeNode - The GT_CAST node // // Return Value: // None. // // Assumptions: // The treeNode is not a contained node and must have an assigned register. // For a signed convert from byte, the source must be in a byte-addressable register. // Neither the source nor target type can be a floating point type. // // TODO-ARM64-CQ: Allow castOp to be a contained node without an assigned register. // void CodeGen::genIntToIntCast(GenTreePtr treeNode) { assert(treeNode->OperGet() == GT_CAST); GenTreePtr castOp = treeNode->gtCast.CastOp(); emitter * emit = getEmitter(); var_types dstType = treeNode->CastToType(); var_types srcType = genActualType(castOp->TypeGet()); emitAttr movSize = emitActualTypeSize(dstType); bool movRequired = false; bool isUnsignedDst = varTypeIsUnsigned(dstType); bool isUnsignedSrc = varTypeIsUnsigned(srcType); bool requiresOverflowCheck = false; regNumber targetReg = treeNode->gtRegNum; regNumber sourceReg = castOp->gtRegNum; assert(genIsValidIntReg(targetReg)); assert(genIsValidIntReg(sourceReg)); instruction ins = INS_invalid; // If necessary, force the srcType to unsigned when the GT_UNSIGNED flag is set. if (!isUnsignedSrc && (treeNode->gtFlags & GTF_UNSIGNED) != 0) { srcType = genUnsignedType(srcType); isUnsignedSrc = true; } if (treeNode->gtOverflow() && (genTypeSize(srcType) >= genTypeSize(dstType) || (srcType == TYP_INT && dstType == TYP_ULONG))) { requiresOverflowCheck = true; } genConsumeReg(castOp); if (requiresOverflowCheck) { emitAttr cmpSize = EA_ATTR(genTypeSize(srcType)); ssize_t typeMin = 0; ssize_t typeMax = 0; ssize_t typeMask = 0; bool signCheckOnly = false; /* Do we need to compare the value, or just check masks */ switch (dstType) { case TYP_BYTE: typeMask = ssize_t((int)0xFFFFFF80); typeMin = SCHAR_MIN; typeMax = SCHAR_MAX; break; case TYP_UBYTE: typeMask = ssize_t((int)0xFFFFFF00L); break; case TYP_SHORT: typeMask = ssize_t((int)0xFFFF8000); typeMin = SHRT_MIN; break; case TYP_CHAR: typeMask = ssize_t((int)0xFFFF0000L); break; case TYP_INT: if (srcType == TYP_UINT) { signCheckOnly = true; } else { typeMask = 0xFFFFFFFF80000000LL; typeMin = INT_MIN; typeMax = INT_MAX; } break; case TYP_UINT: if (srcType == TYP_INT) { signCheckOnly = true; } else { typeMask = 0xFFFFFFFF00000000LL; } break; case TYP_LONG: noway_assert(srcType == TYP_ULONG); signCheckOnly = true; break; case TYP_ULONG: noway_assert((srcType == TYP_LONG) || (srcType == TYP_INT)); signCheckOnly = true; break; default: NO_WAY("Unknown type"); return; } if (signCheckOnly) { // We only need to check for a negative value in sourceReg emit->emitIns_R_I(INS_cmp, cmpSize, sourceReg, 0); genJumpToThrowHlpBlk(EJ_jl, Compiler::ACK_OVERFLOW); if (dstType == TYP_ULONG) { // cast to TYP_ULONG: // We use a mov with size=EA_4BYTE // which will zero out the upper bits movSize = EA_4BYTE; movRequired = true; } } else { // When we are converting from/to unsigned, // we only have to check for any bits set in 'typeMask' if (isUnsignedSrc || isUnsignedDst) { noway_assert(typeMask != 0); emit->emitIns_R_I(INS_tst, cmpSize, sourceReg, typeMask); genJumpToThrowHlpBlk(EJ_jne, Compiler::ACK_OVERFLOW); } else { // For a narrowing signed cast // // We must check the value is in a signed range. // Compare with the MAX noway_assert((typeMin != 0) && (typeMax != 0)); emit->emitIns_R_I(INS_cmp, cmpSize, sourceReg, typeMax); genJumpToThrowHlpBlk(EJ_jg, Compiler::ACK_OVERFLOW); // Compare with the MIN emit->emitIns_R_I(INS_cmp, cmpSize, sourceReg, typeMin); genJumpToThrowHlpBlk(EJ_jl, Compiler::ACK_OVERFLOW); } } ins = INS_mov; } else // Non-overflow checking cast. { if (genTypeSize(srcType) == genTypeSize(dstType)) { ins = INS_mov; } else { var_types extendType; if (genTypeSize(srcType) < genTypeSize(dstType)) { extendType = srcType; if (srcType == TYP_UINT) { movSize = EA_4BYTE; // force a mov EA_4BYTE to zero the upper bits movRequired = true; } } else // (genTypeSize(srcType) > genTypeSize(dstType)) { extendType = dstType; if (dstType == TYP_INT) { movSize = EA_8BYTE; // a sxtw instruction requires EA_8BYTE } } ins = ins_Move_Extend(extendType, castOp->InReg()); } } if ((ins != INS_mov) || movRequired || (targetReg != sourceReg)) { emit->emitIns_R_R(ins, movSize, targetReg, sourceReg); } genProduceReg(treeNode); } //------------------------------------------------------------------------ // genFloatToFloatCast: Generate code for a cast between float and double // // Arguments: // treeNode - The GT_CAST node // // Return Value: // None. // // Assumptions: // Cast is a non-overflow conversion. // The treeNode must have an assigned register. // The cast is between float and double or vice versa. // void CodeGen::genFloatToFloatCast(GenTreePtr treeNode) { // float <--> double conversions are always non-overflow ones assert(treeNode->OperGet() == GT_CAST); assert(!treeNode->gtOverflow()); regNumber targetReg = treeNode->gtRegNum; assert(genIsValidFloatReg(targetReg)); GenTreePtr op1 = treeNode->gtOp.gtOp1; assert(!op1->isContained()); // Cannot be contained assert(genIsValidFloatReg(op1->gtRegNum)); // Must be a valid float reg. var_types dstType = treeNode->CastToType(); var_types srcType = op1->TypeGet(); assert(varTypeIsFloating(srcType) && varTypeIsFloating(dstType)); assert(srcType != dstType); // Must specify two different types insOpts cvtOption = (srcType == TYP_FLOAT) ? INS_OPTS_S_TO_D // convert Single to Double : INS_OPTS_D_TO_S; // convert Double to Single genConsumeOperands(treeNode->AsOp()); // treeNode must be a reg assert(!treeNode->isContained()); getEmitter()->emitIns_R_R(INS_fcvt, emitTypeSize(treeNode), treeNode->gtRegNum, op1->gtRegNum, cvtOption); genProduceReg(treeNode); } //------------------------------------------------------------------------ // genIntToFloatCast: Generate code to cast an int/long to float/double // // Arguments: // treeNode - The GT_CAST node // // Return Value: // None. // // Assumptions: // Cast is a non-overflow conversion. // The treeNode must have an assigned register. // SrcType= int32/uint32/int64/uint64 and DstType=float/double. // void CodeGen::genIntToFloatCast(GenTreePtr treeNode) { // int type --> float/double conversions are always non-overflow ones assert(treeNode->OperGet() == GT_CAST); assert(!treeNode->gtOverflow()); regNumber targetReg = treeNode->gtRegNum; assert(genIsValidFloatReg(targetReg)); GenTreePtr op1 = treeNode->gtOp.gtOp1; assert(!op1->isContained()); // Cannot be contained assert(genIsValidIntReg(op1->gtRegNum)); // Must be a valid int reg. var_types dstType = treeNode->CastToType(); var_types srcType = op1->TypeGet(); assert(!varTypeIsFloating(srcType) && varTypeIsFloating(dstType)); // force the srcType to unsigned if GT_UNSIGNED flag is set if (treeNode->gtFlags & GTF_UNSIGNED) { srcType = genUnsignedType(srcType); } // We should never see a srcType whose size is neither EA_4BYTE or EA_8BYTE // For conversions from small types (byte/sbyte/int16/uint16) to float/double, // we expect the front-end or lowering phase to have generated two levels of cast. // emitAttr srcSize = EA_ATTR(genTypeSize(srcType)); noway_assert((srcSize == EA_4BYTE) ||(srcSize == EA_8BYTE)); instruction ins = INS_scvtf; // default to sign converts insOpts cvtOption = INS_OPTS_NONE; // invalid value if (varTypeIsUnsigned(dstType)) { ins = INS_ucvtf; // use unsigned converts } if (dstType == TYP_DOUBLE) { if (srcSize == EA_4BYTE) { cvtOption = INS_OPTS_4BYTE_TO_D; } else { assert(srcSize == EA_8BYTE); cvtOption = INS_OPTS_8BYTE_TO_D; } } else { assert(dstType == TYP_FLOAT); if (srcSize == EA_4BYTE) { cvtOption = INS_OPTS_4BYTE_TO_S; } else { assert(srcSize == EA_8BYTE); cvtOption = INS_OPTS_8BYTE_TO_S; } } genConsumeOperands(treeNode->AsOp()); getEmitter()->emitIns_R_R(ins, emitTypeSize(dstType), treeNode->gtRegNum, op1->gtRegNum, cvtOption); genProduceReg(treeNode); } //------------------------------------------------------------------------ // genFloatToIntCast: Generate code to cast float/double to int/long // // Arguments: // treeNode - The GT_CAST node // // Return Value: // None. // // Assumptions: // Cast is a non-overflow conversion. // The treeNode must have an assigned register. // SrcType=float/double and DstType= int32/uint32/int64/uint64 // void CodeGen::genFloatToIntCast(GenTreePtr treeNode) { // we don't expect to see overflow detecting float/double --> int type conversions here // as they should have been converted into helper calls by front-end. assert(treeNode->OperGet() == GT_CAST); assert(!treeNode->gtOverflow()); regNumber targetReg = treeNode->gtRegNum; assert(genIsValidIntReg(targetReg)); // Must be a valid int reg. GenTreePtr op1 = treeNode->gtOp.gtOp1; assert(!op1->isContained()); // Cannot be contained assert(genIsValidFloatReg(op1->gtRegNum)); // Must be a valid float reg. var_types dstType = treeNode->CastToType(); var_types srcType = op1->TypeGet(); assert(varTypeIsFloating(srcType) && !varTypeIsFloating(dstType)); // We should never see a dstType whose size is neither EA_4BYTE or EA_8BYTE // For conversions to small types (byte/sbyte/int16/uint16) from float/double, // we expect the front-end or lowering phase to have generated two levels of cast. // emitAttr dstSize = EA_ATTR(genTypeSize(dstType)); noway_assert((dstSize == EA_4BYTE) ||(dstSize == EA_8BYTE)); instruction ins = INS_fcvtzs; // default to sign converts insOpts cvtOption = INS_OPTS_NONE; // invalid value if (varTypeIsUnsigned(dstType)) { ins = INS_fcvtzu; // use unsigned converts } if (srcType == TYP_DOUBLE) { if (dstSize == EA_4BYTE) { cvtOption = INS_OPTS_D_TO_4BYTE; } else { assert(dstSize == EA_8BYTE); cvtOption = INS_OPTS_D_TO_8BYTE; } } else { assert(srcType == TYP_FLOAT); if (dstSize == EA_4BYTE) { cvtOption = INS_OPTS_S_TO_4BYTE; } else { assert(dstSize == EA_8BYTE); cvtOption = INS_OPTS_S_TO_8BYTE; } } genConsumeOperands(treeNode->AsOp()); getEmitter()->emitIns_R_R(ins, dstSize, treeNode->gtRegNum, op1->gtRegNum, cvtOption); genProduceReg(treeNode); } //------------------------------------------------------------------------ // genCkfinite: Generate code for ckfinite opcode. // // Arguments: // treeNode - The GT_CKFINITE node // // Return Value: // None. // // Assumptions: // GT_CKFINITE node has reserved an internal register. // // TODO-ARM64-CQ - mark the operand as contained if known to be in // memory (e.g. field or an array element). // void CodeGen::genCkfinite(GenTreePtr treeNode) { assert(treeNode->OperGet() == GT_CKFINITE); #if 0 GenTreePtr op1 = treeNode->gtOp.gtOp1; var_types targetType = treeNode->TypeGet(); int expMask = (targetType == TYP_FLOAT) ? 0x7F800000 : 0x7FF00000; // Bit mask to extract exponent. // Extract exponent into a register. assert(treeNode->gtRsvdRegs != RBM_NONE); assert(genCountBits(treeNode->gtRsvdRegs) == 1); regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs); inst_RV_RV(INS_mov_xmm2i, genConsumeReg(op1), tmpReg, targetType); if (targetType == TYP_DOUBLE) { // right shift by 32 bits to get to exponent. inst_RV_SH(INS_shr, EA_8BYTE, tmpReg, 32); } // Mask of exponent with all 1's and check if the exponent is all 1's inst_RV_IV(INS_and, tmpReg, expMask, EA_4BYTE); inst_RV_IV(INS_cmp, tmpReg, expMask, EA_4BYTE); // If exponent is all 1's, throw ArithmeticException genJumpToThrowHlpBlk(EJ_je, Compiler::ACK_ARITH_EXCPN); // if it is a finite value copy it to targetReg if (treeNode->gtRegNum != op1->gtRegNum) { inst_RV_RV(ins_Copy(targetType), treeNode->gtRegNum, op1->gtRegNum, targetType); } genProduceReg(treeNode); #else // !0 NYI("genCkfinite"); #endif // !0 } int CodeGenInterface::genSPtoFPdelta() { int delta; // We place the saved frame pointer immediately above the outgoing argument space. delta = (int)compiler->lvaOutgoingArgSpaceSize; assert(delta >= 0); return delta; } //--------------------------------------------------------------------- // genTotalFrameSize - return the total size of the stack frame, including local size, // callee-saved register size, etc. // // Return value: // Total frame size // int CodeGenInterface::genTotalFrameSize() { // For varargs functions, we home all the incoming register arguments. They are not // included in the compCalleeRegsPushed count. This is like prespill on ARM32, but // since we don't use "push" instructions to save them, we don't have to do the // save of these varargs register arguments as the first thing in the prolog. assert(!IsUninitialized(compiler->compCalleeRegsPushed)); int totalFrameSize = (compiler->info.compIsVarArgs ? MAX_REG_ARG * REGSIZE_BYTES : 0) + compiler->compCalleeRegsPushed * REGSIZE_BYTES + compiler->compLclFrameSize; assert(totalFrameSize >= 0); return totalFrameSize; } //--------------------------------------------------------------------- // genCallerSPtoFPdelta - return the offset from Caller-SP to the frame pointer. // This number is going to be negative, since the Caller-SP is at a higher // address than the frame pointer. // // There must be a frame pointer to call this function! int CodeGenInterface::genCallerSPtoFPdelta() { assert(isFramePointerUsed()); int callerSPtoFPdelta; callerSPtoFPdelta = genCallerSPtoInitialSPdelta() + genSPtoFPdelta(); assert(callerSPtoFPdelta <= 0); return callerSPtoFPdelta; } //--------------------------------------------------------------------- // genCallerSPtoInitialSPdelta - return the offset from Caller-SP to Initial SP. // // This number will be negative. int CodeGenInterface::genCallerSPtoInitialSPdelta() { int callerSPtoSPdelta = 0; callerSPtoSPdelta -= genTotalFrameSize(); assert(callerSPtoSPdelta <= 0); return callerSPtoSPdelta; } //--------------------------------------------------------------------- // genMathIntrinsic - generate code for a given math intrinsic // // Arguments // treeNode - the GT_MATH node // // Return value: // None // void CodeGen::genMathIntrinsic(GenTreePtr treeNode) { #if 0 // Right now only Sqrt/Abs are treated as math intrinsics. switch(treeNode->gtMath.gtMathFN) { case CORINFO_INTRINSIC_Sqrt: noway_assert(treeNode->TypeGet() == TYP_DOUBLE); genConsumeOperands(treeNode->AsOp()); getEmitter()->emitInsBinary(INS_sqrtsd, emitTypeSize(treeNode), treeNode, treeNode->gtOp.gtOp1); break; case CORINFO_INTRINSIC_Abs: genSSE2BitwiseOp(treeNode); break; default: assert(!"genMathIntrinsic: Unsupported math intrinsic"); unreached(); } genProduceReg(treeNode); #else // !0 NYI("genMathIntrinsic"); #endif // !0 } /***************************************************************************** * * Create and record GC Info for the function. */ void CodeGen::genCreateAndStoreGCInfo(unsigned codeSize, unsigned prologSize, unsigned epilogSize DEBUG_ARG(void* codePtr)) { genCreateAndStoreGCInfoX64(codeSize, prologSize DEBUG_ARG(codePtr)); } void CodeGen::genCreateAndStoreGCInfoX64(unsigned codeSize, unsigned prologSize DEBUG_ARG(void* codePtr)) { IAllocator* allowZeroAlloc = new (compiler, CMK_GC) AllowZeroAllocator(compiler->getAllocatorGC()); GcInfoEncoder* gcInfoEncoder = new (compiler, CMK_GC) GcInfoEncoder(compiler->info.compCompHnd, compiler->info.compMethodInfo, allowZeroAlloc); assert(gcInfoEncoder != nullptr); // Follow the code pattern of the x86 gc info encoder (genCreateAndStoreGCInfoJIT32). gcInfo.gcInfoBlockHdrSave(gcInfoEncoder, codeSize, prologSize); // First we figure out the encoder ID's for the stack slots and registers. gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_ASSIGN_SLOTS); // Now we've requested all the slots we'll need; "finalize" these (make more compact data structures for them). gcInfoEncoder->FinalizeSlotIds(); // Now we can actually use those slot ID's to declare live ranges. gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_DO_WORK); #if defined(DEBUGGING_SUPPORT) if (compiler->opts.compDbgEnC) { // what we have to preserve is called the "frame header" (see comments in VM\eetwain.cpp) // which is: // -return address // -saved off RBP // -saved 'this' pointer and bool for synchronized methods // 4 slots for RBP + return address + RSI + RDI int preservedAreaSize = 4 * REGSIZE_BYTES; if (compiler->info.compFlags & CORINFO_FLG_SYNCH) { if (!(compiler->info.compFlags & CORINFO_FLG_STATIC)) preservedAreaSize += REGSIZE_BYTES; preservedAreaSize += 1; // bool for synchronized methods } // Used to signal both that the method is compiled for EnC, and also the size of the block at the top of the frame gcInfoEncoder->SetSizeOfEditAndContinuePreservedArea(preservedAreaSize); } #endif gcInfoEncoder->Build(); //GC Encoder automatically puts the GC info in the right spot using ICorJitInfo::allocGCInfo(size_t) //let's save the values anyway for debugging purposes compiler->compInfoBlkAddr = gcInfoEncoder->Emit(); compiler->compInfoBlkSize = 0; //not exposed by the GCEncoder interface } /***************************************************************************** * Emit a call to a helper function. * */ void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize) { void* addr = nullptr; void* pAddr = nullptr; emitter::EmitCallType callType = emitter::EC_FUNC_TOKEN; addr = compiler->compGetHelperFtn((CorInfoHelpFunc)helper, &pAddr); regNumber callTarget = REG_NA; if (addr == nullptr) { NYI("genEmitHelperCall indirect"); #if 0 assert(pAddr != nullptr); if (genAddrShouldUsePCRel((size_t)pAddr)) { // generate call whose target is specified by PC-relative 32-bit offset. callType = emitter::EC_FUNC_TOKEN_INDIR; addr = pAddr; } else { // If this address cannot be encoded as PC-relative 32-bit offset, load it into REG_HELPER_CALL_TARGET // and use register indirect addressing mode to make the call. // mov reg, addr // call [reg] callTarget = callTargetReg; CodeGen::genSetRegToIcon(callTarget, (ssize_t) pAddr, TYP_I_IMPL); callType = emitter::EC_INDIR_ARD; } #endif // 0 } getEmitter()->emitIns_Call(callType, compiler->eeFindHelper(helper), INDEBUG_LDISASM_COMMA(nullptr) addr, argSize, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, BAD_IL_OFFSET, /* IL offset */ callTarget, /* ireg */ REG_NA, 0, 0, /* xreg, xmul, disp */ false, /* isJump */ emitter::emitNoGChelper(helper)); regMaskTP killMask = compiler->compHelperCallKillSet((CorInfoHelpFunc)helper); regTracker.rsTrashRegSet(killMask); regTracker.rsTrashRegsForGCInterruptability(); } /*****************************************************************************/ #ifdef DEBUGGING_SUPPORT /***************************************************************************** * genSetScopeInfo * * Called for every scope info piece to record by the main genSetScopeInfo() */ // TODO-Cleanup: move to CodeGenCommon.cpp void CodeGen::genSetScopeInfo (unsigned which, UNATIVE_OFFSET startOffs, UNATIVE_OFFSET length, unsigned varNum, unsigned LVnum, bool avail, Compiler::siVarLoc& varLoc) { /* We need to do some mapping while reporting back these variables */ unsigned ilVarNum = compiler->compMap2ILvarNum(varNum); noway_assert((int)ilVarNum != ICorDebugInfo::UNKNOWN_ILNUM); VarName name = nullptr; #ifdef DEBUG for (unsigned scopeNum = 0; scopeNum < compiler->info.compVarScopesCount; scopeNum++) { if (LVnum == compiler->info.compVarScopes[scopeNum].vsdLVnum) { name = compiler->info.compVarScopes[scopeNum].vsdName; } } // Hang on to this compiler->info. TrnslLocalVarInfo &tlvi = genTrnslLocalVarInfo[which]; tlvi.tlviVarNum = ilVarNum; tlvi.tlviLVnum = LVnum; tlvi.tlviName = name; tlvi.tlviStartPC = startOffs; tlvi.tlviLength = length; tlvi.tlviAvailable = avail; tlvi.tlviVarLoc = varLoc; #endif // DEBUG compiler->eeSetLVinfo(which, startOffs, length, ilVarNum, LVnum, name, avail, varLoc); } #endif // DEBUGGING_SUPPORT /***************************************************************************** * Unit testing of the ARM64 emitter: generate a bunch of instructions into the prolog * (it's as good a place as any), then use COMPLUS_JitLateDisasm=* to see if the late * disassembler thinks the instructions as the same as we do. */ // Uncomment "#define ALL_ARM64_EMITTER_UNIT_TESTS" to run all the unit tests here. // After adding a unit test, and verifying it works, put it under this #ifdef, so we don't see it run every time. //#define ALL_ARM64_EMITTER_UNIT_TESTS #if defined(DEBUG) void CodeGen::genArm64EmitterUnitTests() { if (!verbose) { return; } if (!compiler->opts.altJit) { // No point doing this in a "real" JIT. return; } // Mark the "fake" instructions in the output. printf("*************** In genArm64EmitterUnitTests()\n"); emitter* theEmitter = getEmitter(); // We use this: // genDefineTempLabel(genCreateTempLabel()); // to create artificial labels to help separate groups of tests. // // Loads/Stores basic general register // #ifdef ALL_ARM64_EMITTER_UNIT_TESTS genDefineTempLabel(genCreateTempLabel()); // ldr/str Xt, [reg] theEmitter->emitIns_R_R(INS_ldr, EA_8BYTE, REG_R8, REG_R9); theEmitter->emitIns_R_R(INS_ldrb, EA_1BYTE, REG_R8, REG_R9); theEmitter->emitIns_R_R(INS_ldrh, EA_2BYTE, REG_R8, REG_R9); theEmitter->emitIns_R_R(INS_str, EA_8BYTE, REG_R8, REG_R9); theEmitter->emitIns_R_R(INS_strb, EA_1BYTE, REG_R8, REG_R9); theEmitter->emitIns_R_R(INS_strh, EA_2BYTE, REG_R8, REG_R9); // ldr/str Wt, [reg] theEmitter->emitIns_R_R(INS_ldr, EA_4BYTE, REG_R8, REG_R9); theEmitter->emitIns_R_R(INS_ldrb, EA_1BYTE, REG_R8, REG_R9); theEmitter->emitIns_R_R(INS_ldrh, EA_2BYTE, REG_R8, REG_R9); theEmitter->emitIns_R_R(INS_str, EA_4BYTE, REG_R8, REG_R9); theEmitter->emitIns_R_R(INS_strb, EA_1BYTE, REG_R8, REG_R9); theEmitter->emitIns_R_R(INS_strh, EA_2BYTE, REG_R8, REG_R9); theEmitter->emitIns_R_R(INS_ldrsb, EA_4BYTE, REG_R8, REG_R9); // target Wt theEmitter->emitIns_R_R(INS_ldrsh, EA_4BYTE, REG_R8, REG_R9); // target Wt theEmitter->emitIns_R_R(INS_ldrsb, EA_8BYTE, REG_R8, REG_R9); // target Xt theEmitter->emitIns_R_R(INS_ldrsh, EA_8BYTE, REG_R8, REG_R9); // target Xt theEmitter->emitIns_R_R(INS_ldrsw, EA_8BYTE, REG_R8, REG_R9); // target Xt theEmitter->emitIns_R_R_I(INS_ldurb, EA_4BYTE, REG_R8, REG_R9, 1); theEmitter->emitIns_R_R_I(INS_ldurh, EA_4BYTE, REG_R8, REG_R9, 1); theEmitter->emitIns_R_R_I(INS_sturb, EA_4BYTE, REG_R8, REG_R9, 1); theEmitter->emitIns_R_R_I(INS_sturh, EA_4BYTE, REG_R8, REG_R9, 1); theEmitter->emitIns_R_R_I(INS_ldursb, EA_4BYTE, REG_R8, REG_R9, 1); theEmitter->emitIns_R_R_I(INS_ldursb, EA_8BYTE, REG_R8, REG_R9, 1); theEmitter->emitIns_R_R_I(INS_ldursh, EA_4BYTE, REG_R8, REG_R9, 1); theEmitter->emitIns_R_R_I(INS_ldursh, EA_8BYTE, REG_R8, REG_R9, 1); theEmitter->emitIns_R_R_I(INS_ldur, EA_8BYTE, REG_R8, REG_R9, 1); theEmitter->emitIns_R_R_I(INS_ldur, EA_4BYTE, REG_R8, REG_R9, 1); theEmitter->emitIns_R_R_I(INS_stur, EA_4BYTE, REG_R8, REG_R9, 1); theEmitter->emitIns_R_R_I(INS_stur, EA_8BYTE, REG_R8, REG_R9, 1); theEmitter->emitIns_R_R_I(INS_ldursw, EA_8BYTE, REG_R8, REG_R9, 1); // SP and ZR tests theEmitter->emitIns_R_R_I(INS_ldur, EA_8BYTE, REG_R8, REG_SP, 1); theEmitter->emitIns_R_R_I(INS_ldurb, EA_8BYTE, REG_ZR, REG_R9, 1); theEmitter->emitIns_R_R_I(INS_ldurh, EA_8BYTE, REG_ZR, REG_SP, 1); // scaled theEmitter->emitIns_R_R_I(INS_ldrb, EA_1BYTE, REG_R8, REG_R9, 1); theEmitter->emitIns_R_R_I(INS_ldrh, EA_2BYTE, REG_R8, REG_R9, 2); theEmitter->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_R8, REG_R9, 4); theEmitter->emitIns_R_R_I(INS_ldr, EA_8BYTE, REG_R8, REG_R9, 8); // pre-/post-indexed (unscaled) theEmitter->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_R8, REG_R9, 1, INS_OPTS_POST_INDEX); theEmitter->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_R8, REG_R9, 1, INS_OPTS_PRE_INDEX); theEmitter->emitIns_R_R_I(INS_ldr, EA_8BYTE, REG_R8, REG_R9, 1, INS_OPTS_POST_INDEX); theEmitter->emitIns_R_R_I(INS_ldr, EA_8BYTE, REG_R8, REG_R9, 1, INS_OPTS_PRE_INDEX); #endif // ALL_ARM64_EMITTER_UNIT_TESTS // // Compares // #ifdef ALL_ARM64_EMITTER_UNIT_TESTS genDefineTempLabel(genCreateTempLabel()); // cmp reg, reg theEmitter->emitIns_R_R(INS_cmp, EA_8BYTE, REG_R8, REG_R9); theEmitter->emitIns_R_R(INS_cmn, EA_8BYTE, REG_R8, REG_R9); // cmp reg, imm theEmitter->emitIns_R_I(INS_cmp, EA_8BYTE, REG_R8, 0); theEmitter->emitIns_R_I(INS_cmp, EA_8BYTE, REG_R8, 4095); theEmitter->emitIns_R_I(INS_cmp, EA_8BYTE, REG_R8, 1 << 12); theEmitter->emitIns_R_I(INS_cmp, EA_8BYTE, REG_R8, 4095 << 12); theEmitter->emitIns_R_I(INS_cmn, EA_8BYTE, REG_R8, 0); theEmitter->emitIns_R_I(INS_cmn, EA_8BYTE, REG_R8, 4095); theEmitter->emitIns_R_I(INS_cmn, EA_8BYTE, REG_R8, 1 << 12); theEmitter->emitIns_R_I(INS_cmn, EA_8BYTE, REG_R8, 4095 << 12); theEmitter->emitIns_R_I(INS_cmp, EA_8BYTE, REG_R8, -1); theEmitter->emitIns_R_I(INS_cmp, EA_8BYTE, REG_R8, -0xfff); theEmitter->emitIns_R_I(INS_cmp, EA_8BYTE, REG_R8, 0xfffffffffffff000LL); theEmitter->emitIns_R_I(INS_cmp, EA_8BYTE, REG_R8, 0xffffffffff800000LL); theEmitter->emitIns_R_I(INS_cmn, EA_8BYTE, REG_R8, -1); theEmitter->emitIns_R_I(INS_cmn, EA_8BYTE, REG_R8, -0xfff); theEmitter->emitIns_R_I(INS_cmn, EA_8BYTE, REG_R8, 0xfffffffffffff000LL); theEmitter->emitIns_R_I(INS_cmn, EA_8BYTE, REG_R8, 0xffffffffff800000LL); #endif // ALL_ARM64_EMITTER_UNIT_TESTS // R_R // #ifdef ALL_ARM64_EMITTER_UNIT_TESTS genDefineTempLabel(genCreateTempLabel()); theEmitter->emitIns_R_R(INS_cls, EA_8BYTE, REG_R1, REG_R12); theEmitter->emitIns_R_R(INS_clz, EA_8BYTE, REG_R2, REG_R13); theEmitter->emitIns_R_R(INS_rbit, EA_8BYTE, REG_R3, REG_R14); theEmitter->emitIns_R_R(INS_rev, EA_8BYTE, REG_R4, REG_R15); theEmitter->emitIns_R_R(INS_rev16, EA_8BYTE, REG_R5, REG_R0); theEmitter->emitIns_R_R(INS_rev32, EA_8BYTE, REG_R6, REG_R1); theEmitter->emitIns_R_R(INS_cls, EA_4BYTE, REG_R7, REG_R2); theEmitter->emitIns_R_R(INS_clz, EA_4BYTE, REG_R8, REG_R3); theEmitter->emitIns_R_R(INS_rbit, EA_4BYTE, REG_R9, REG_R4); theEmitter->emitIns_R_R(INS_rev, EA_4BYTE, REG_R10, REG_R5); theEmitter->emitIns_R_R(INS_rev16, EA_4BYTE, REG_R11, REG_R6); #endif // ALL_ARM64_EMITTER_UNIT_TESTS // // R_I // #ifdef ALL_ARM64_EMITTER_UNIT_TESTS genDefineTempLabel(genCreateTempLabel()); // mov reg, imm(i16,hw) theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x0000000000001234); theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x0000000043210000); theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x0000567800000000); theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x8765000000000000); theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0xFFFFFFFFFFFF1234); theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0xFFFFFFFF4321FFFF); theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0xFFFF5678FFFFFFFF); theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x8765FFFFFFFFFFFF); theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0x00001234); theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0x87650000); theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0xFFFF1234); theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0x4567FFFF); // mov reg, imm(N,r,s) theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x00FFFFF000000000); theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x6666666666666666); theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_SP, 0x7FFF00007FFF0000); theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x5555555555555555); theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0xE003E003E003E003); theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x0707070707070707); theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0x00FFFFF0); theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0x66666666); theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0x03FFC000); theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0x55555555); theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0xE003E003); theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0x07070707); theEmitter->emitIns_R_I(INS_tst, EA_8BYTE, REG_R8, 0xE003E003E003E003); theEmitter->emitIns_R_I(INS_tst, EA_8BYTE, REG_R8, 0x00FFFFF000000000); theEmitter->emitIns_R_I(INS_tst, EA_8BYTE, REG_R8, 0x6666666666666666); theEmitter->emitIns_R_I(INS_tst, EA_8BYTE, REG_R8, 0x0707070707070707); theEmitter->emitIns_R_I(INS_tst, EA_8BYTE, REG_R8, 0x7FFF00007FFF0000); theEmitter->emitIns_R_I(INS_tst, EA_8BYTE, REG_R8, 0x5555555555555555); theEmitter->emitIns_R_I(INS_tst, EA_4BYTE, REG_R8, 0xE003E003); theEmitter->emitIns_R_I(INS_tst, EA_4BYTE, REG_R8, 0x00FFFFF0); theEmitter->emitIns_R_I(INS_tst, EA_4BYTE, REG_R8, 0x66666666); theEmitter->emitIns_R_I(INS_tst, EA_4BYTE, REG_R8, 0x07070707); theEmitter->emitIns_R_I(INS_tst, EA_4BYTE, REG_R8, 0xFFF00000); theEmitter->emitIns_R_I(INS_tst, EA_4BYTE, REG_R8, 0x55555555); #endif // ALL_ARM64_EMITTER_UNIT_TESTS // // R_R // #ifdef ALL_ARM64_EMITTER_UNIT_TESTS genDefineTempLabel(genCreateTempLabel()); // tst reg, reg theEmitter->emitIns_R_R(INS_tst, EA_8BYTE, REG_R7, REG_R10); // mov reg, reg theEmitter->emitIns_R_R(INS_mov, EA_8BYTE, REG_R7, REG_R10); theEmitter->emitIns_R_R(INS_mov, EA_8BYTE, REG_R8, REG_SP); theEmitter->emitIns_R_R(INS_mov, EA_8BYTE, REG_SP, REG_R9); theEmitter->emitIns_R_R(INS_mvn, EA_8BYTE, REG_R5, REG_R11); theEmitter->emitIns_R_R(INS_neg, EA_8BYTE, REG_R4, REG_R12); theEmitter->emitIns_R_R(INS_negs, EA_8BYTE, REG_R3, REG_R13); theEmitter->emitIns_R_R(INS_mov, EA_4BYTE, REG_R7, REG_R10); theEmitter->emitIns_R_R(INS_mvn, EA_4BYTE, REG_R5, REG_R11); theEmitter->emitIns_R_R(INS_neg, EA_4BYTE, REG_R4, REG_R12); theEmitter->emitIns_R_R(INS_negs, EA_4BYTE, REG_R3, REG_R13); theEmitter->emitIns_R_R(INS_sxtb, EA_8BYTE, REG_R7, REG_R10); theEmitter->emitIns_R_R(INS_sxth, EA_8BYTE, REG_R5, REG_R11); theEmitter->emitIns_R_R(INS_sxtw, EA_8BYTE, REG_R4, REG_R12); theEmitter->emitIns_R_R(INS_uxtb, EA_8BYTE, REG_R3, REG_R13); // map to Wt theEmitter->emitIns_R_R(INS_uxth, EA_8BYTE, REG_R2, REG_R14); // map to Wt theEmitter->emitIns_R_R(INS_sxtb, EA_4BYTE, REG_R7, REG_R10); theEmitter->emitIns_R_R(INS_sxth, EA_4BYTE, REG_R5, REG_R11); theEmitter->emitIns_R_R(INS_uxtb, EA_4BYTE, REG_R3, REG_R13); theEmitter->emitIns_R_R(INS_uxth, EA_4BYTE, REG_R2, REG_R14); #endif // ALL_ARM64_EMITTER_UNIT_TESTS // // R_I_I // #ifdef ALL_ARM64_EMITTER_UNIT_TESTS genDefineTempLabel(genCreateTempLabel()); // mov reg, imm(i16,hw) theEmitter->emitIns_R_I_I(INS_mov, EA_8BYTE, REG_R8, 0x1234, 0, INS_OPTS_LSL); theEmitter->emitIns_R_I_I(INS_mov, EA_8BYTE, REG_R8, 0x4321, 16, INS_OPTS_LSL); theEmitter->emitIns_R_I_I(INS_movk, EA_8BYTE, REG_R8, 0x4321, 16, INS_OPTS_LSL); theEmitter->emitIns_R_I_I(INS_movn, EA_8BYTE, REG_R8, 0x5678, 32, INS_OPTS_LSL); theEmitter->emitIns_R_I_I(INS_movz, EA_8BYTE, REG_R8, 0x8765, 48, INS_OPTS_LSL); theEmitter->emitIns_R_I_I(INS_movk, EA_4BYTE, REG_R8, 0x4321, 16, INS_OPTS_LSL); theEmitter->emitIns_R_I_I(INS_movn, EA_4BYTE, REG_R8, 0x5678, 16, INS_OPTS_LSL); theEmitter->emitIns_R_I_I(INS_movz, EA_4BYTE, REG_R8, 0x8765, 16, INS_OPTS_LSL); #endif // ALL_ARM64_EMITTER_UNIT_TESTS // // R_R_I // #ifdef ALL_ARM64_EMITTER_UNIT_TESTS genDefineTempLabel(genCreateTempLabel()); theEmitter->emitIns_R_R_I(INS_lsl, EA_8BYTE, REG_R0, REG_R0, 1); theEmitter->emitIns_R_R_I(INS_lsl, EA_4BYTE, REG_R9, REG_R3, 18); theEmitter->emitIns_R_R_I(INS_lsr, EA_8BYTE, REG_R7, REG_R0, 37); theEmitter->emitIns_R_R_I(INS_lsr, EA_4BYTE, REG_R0, REG_R1, 2); theEmitter->emitIns_R_R_I(INS_asr, EA_8BYTE, REG_R2, REG_R3, 53); theEmitter->emitIns_R_R_I(INS_asr, EA_4BYTE, REG_R9, REG_R3, 18); theEmitter->emitIns_R_R_I(INS_and, EA_8BYTE, REG_R2, REG_R3, 0x5555555555555555); theEmitter->emitIns_R_R_I(INS_ands, EA_8BYTE, REG_R1, REG_R5, 0x6666666666666666); theEmitter->emitIns_R_R_I(INS_eor, EA_8BYTE, REG_R8, REG_R9, 0x0707070707070707); theEmitter->emitIns_R_R_I(INS_orr, EA_8BYTE, REG_SP, REG_R3, 0xFFFC000000000000); theEmitter->emitIns_R_R_I(INS_ands, EA_4BYTE, REG_R8, REG_R9, 0xE003E003); theEmitter->emitIns_R_R_I(INS_ror, EA_8BYTE, REG_R8, REG_R9, 1); theEmitter->emitIns_R_R_I(INS_ror, EA_8BYTE, REG_R8, REG_R9, 31); theEmitter->emitIns_R_R_I(INS_ror, EA_8BYTE, REG_R8, REG_R9, 32); theEmitter->emitIns_R_R_I(INS_ror, EA_8BYTE, REG_R8, REG_R9, 63); theEmitter->emitIns_R_R_I(INS_ror, EA_4BYTE, REG_R8, REG_R9, 1); theEmitter->emitIns_R_R_I(INS_ror, EA_4BYTE, REG_R8, REG_R9, 31); theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, 0); // == mov theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, 1); theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, -1); theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, 0xfff); theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, -0xfff); theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, 0x1000); theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, 0xfff000); theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, 0xfffffffffffff000LL); theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, 0xffffffffff800000LL); theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, 0); // == mov theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, 1); theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, -1); theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, 0xfff); theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, -0xfff); theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, 0x1000); theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, 0xfff000); theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, 0xfffffffffffff000LL); theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, 0xffffffffff800000LL); theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, 0); // == mov theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, 1); theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, -1); theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, 0xfff); theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, -0xfff); theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, 0x1000); theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, 0xfff000); theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, 0xfffffffffffff000LL); theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, 0xffffffffff800000LL); theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, 0); // == mov theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, 1); theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, -1); theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, 0xfff); theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, -0xfff); theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, 0x1000); theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, 0xfff000); theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, 0xfffffffffffff000LL); theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, 0xffffffffff800000LL); theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, 0); // == mov theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, 1); theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, -1); theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, 0xfff); theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, -0xfff); theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, 0x1000); theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, 0xfff000); theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, 0xfffffffffffff000LL); theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, 0xffffffffff800000LL); theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, 0); // == mov theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, 1); theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, -1); theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, 0xfff); theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, -0xfff); theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, 0x1000); theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, 0xfff000); theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, 0xfffffffffffff000LL); theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, 0xffffffffff800000LL); theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, 0); // == mov theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, 1); theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, -1); theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, 0xfff); theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, -0xfff); theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, 0x1000); theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, 0xfff000); theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, 0xfffffffffffff000LL); theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, 0xffffffffff800000LL); theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, 0); // == mov theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, 1); theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, -1); theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, 0xfff); theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, -0xfff); theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, 0x1000); theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, 0xfff000); theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, 0xfffffffffffff000LL); theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, 0xffffffffff800000LL); #endif // ALL_ARM64_EMITTER_UNIT_TESTS // // R_R_I cmp/txt // #ifdef ALL_ARM64_EMITTER_UNIT_TESTS // cmp theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0); theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 0); // CMP (shifted register) theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 31, INS_OPTS_LSL); theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 32, INS_OPTS_LSR); theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 33, INS_OPTS_ASR); theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 21, INS_OPTS_LSL); theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 22, INS_OPTS_LSR); theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 23, INS_OPTS_ASR); // TST (shifted register) theEmitter->emitIns_R_R_I(INS_tst, EA_8BYTE, REG_R8, REG_R9, 31, INS_OPTS_LSL); theEmitter->emitIns_R_R_I(INS_tst, EA_8BYTE, REG_R8, REG_R9, 32, INS_OPTS_LSR); theEmitter->emitIns_R_R_I(INS_tst, EA_8BYTE, REG_R8, REG_R9, 33, INS_OPTS_ASR); theEmitter->emitIns_R_R_I(INS_tst, EA_8BYTE, REG_R8, REG_R9, 34, INS_OPTS_ROR); theEmitter->emitIns_R_R_I(INS_tst, EA_4BYTE, REG_R8, REG_R9, 21, INS_OPTS_LSL); theEmitter->emitIns_R_R_I(INS_tst, EA_4BYTE, REG_R8, REG_R9, 22, INS_OPTS_LSR); theEmitter->emitIns_R_R_I(INS_tst, EA_4BYTE, REG_R8, REG_R9, 23, INS_OPTS_ASR); theEmitter->emitIns_R_R_I(INS_tst, EA_4BYTE, REG_R8, REG_R9, 24, INS_OPTS_ROR); // CMP (extended register) theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0, INS_OPTS_UXTB); theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0, INS_OPTS_UXTH); theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0, INS_OPTS_UXTW); // "cmp x8, x9, UXTW"; msdis disassembles this "cmp x8,x9", which looks like an msdis issue. theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0, INS_OPTS_UXTX); theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0, INS_OPTS_SXTB); theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0, INS_OPTS_SXTH); theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0, INS_OPTS_SXTW); theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0, INS_OPTS_SXTX); // CMP 64-bit (extended register) and left shift theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 1, INS_OPTS_UXTB); theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 2, INS_OPTS_UXTH); theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 3, INS_OPTS_UXTW); theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 4, INS_OPTS_UXTX); theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 1, INS_OPTS_SXTB); theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 2, INS_OPTS_SXTH); theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 3, INS_OPTS_SXTW); theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 4, INS_OPTS_SXTX); // CMP 32-bit (extended register) and left shift theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 0, INS_OPTS_UXTB); theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 2, INS_OPTS_UXTH); theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 4, INS_OPTS_UXTW); theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 0, INS_OPTS_SXTB); theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 2, INS_OPTS_SXTH); theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 4, INS_OPTS_SXTW); #endif // ALL_ARM64_EMITTER_UNIT_TESTS // // R_R_R // #ifdef ALL_ARM64_EMITTER_UNIT_TESTS genDefineTempLabel(genCreateTempLabel()); theEmitter->emitIns_R_R_R(INS_lsl, EA_8BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_lsr, EA_8BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_asr, EA_8BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_ror, EA_8BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_adc, EA_8BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_adcs, EA_8BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_sbc, EA_8BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_sbcs, EA_8BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_udiv, EA_8BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_sdiv, EA_8BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_mul, EA_8BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_mneg, EA_8BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_smull, EA_8BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_smnegl, EA_8BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_smulh, EA_8BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_umull, EA_8BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_umnegl, EA_8BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_umulh, EA_8BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_lslv, EA_8BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_lsrv, EA_8BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_asrv, EA_8BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_rorv, EA_8BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_lsl, EA_4BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_lsr, EA_4BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_asr, EA_4BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_ror, EA_4BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_adc, EA_4BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_adcs, EA_4BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_sbc, EA_4BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_sbcs, EA_4BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_udiv, EA_4BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_sdiv, EA_4BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_mul, EA_4BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_mneg, EA_4BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_smull, EA_4BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_smnegl, EA_4BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_smulh, EA_4BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_umull, EA_4BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_umnegl, EA_4BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_umulh, EA_4BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_lslv, EA_4BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_lsrv, EA_4BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_asrv, EA_4BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_rorv, EA_4BYTE, REG_R8, REG_R9, REG_R10); #endif // ALL_ARM64_EMITTER_UNIT_TESTS // // R_R_I_I // #ifdef ALL_ARM64_EMITTER_UNIT_TESTS genDefineTempLabel(genCreateTempLabel()); theEmitter->emitIns_R_R_I_I(INS_sbfm, EA_8BYTE, REG_R2, REG_R3, 4, 39); theEmitter->emitIns_R_R_I_I(INS_bfm, EA_8BYTE, REG_R1, REG_R5, 20, 23); theEmitter->emitIns_R_R_I_I(INS_ubfm, EA_8BYTE, REG_R8, REG_R9, 36, 7); theEmitter->emitIns_R_R_I_I(INS_sbfiz, EA_8BYTE, REG_R2, REG_R3, 7, 37); theEmitter->emitIns_R_R_I_I(INS_bfi, EA_8BYTE, REG_R1, REG_R5, 23, 21); theEmitter->emitIns_R_R_I_I(INS_ubfiz, EA_8BYTE, REG_R8, REG_R9, 39, 5); theEmitter->emitIns_R_R_I_I(INS_sbfx, EA_8BYTE, REG_R2, REG_R3, 10, 24); theEmitter->emitIns_R_R_I_I(INS_bfxil, EA_8BYTE, REG_R1, REG_R5, 26, 16); theEmitter->emitIns_R_R_I_I(INS_ubfx, EA_8BYTE, REG_R8, REG_R9, 42, 8); theEmitter->emitIns_R_R_I_I(INS_sbfm, EA_4BYTE, REG_R2, REG_R3, 4, 19); theEmitter->emitIns_R_R_I_I(INS_bfm, EA_4BYTE, REG_R1, REG_R5, 10, 13); theEmitter->emitIns_R_R_I_I(INS_ubfm, EA_4BYTE, REG_R8, REG_R9, 16, 7); theEmitter->emitIns_R_R_I_I(INS_sbfiz, EA_4BYTE, REG_R2, REG_R3, 5, 17); theEmitter->emitIns_R_R_I_I(INS_bfi, EA_4BYTE, REG_R1, REG_R5, 13, 11); theEmitter->emitIns_R_R_I_I(INS_ubfiz, EA_4BYTE, REG_R8, REG_R9, 19, 5); theEmitter->emitIns_R_R_I_I(INS_sbfx, EA_4BYTE, REG_R2, REG_R3, 3, 14); theEmitter->emitIns_R_R_I_I(INS_bfxil, EA_4BYTE, REG_R1, REG_R5, 11, 9); theEmitter->emitIns_R_R_I_I(INS_ubfx, EA_4BYTE, REG_R8, REG_R9, 22, 8); #endif // ALL_ARM64_EMITTER_UNIT_TESTS // // R_R_R_I // #ifdef ALL_ARM64_EMITTER_UNIT_TESTS genDefineTempLabel(genCreateTempLabel()); // ADD (extended register) theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_UXTB); theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_UXTH); theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_UXTW); theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_UXTX); theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_SXTB); theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_SXTH); theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_SXTW); theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_SXTX); // ADD (extended register) and left shift theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_UXTB); theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_UXTH); theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_UXTW); theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_UXTX); theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_SXTB); theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_SXTH); theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_SXTW); theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_SXTX); // ADD (shifted register) theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0); theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 31, INS_OPTS_LSL); theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 32, INS_OPTS_LSR); theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 33, INS_OPTS_ASR); // EXTR (extract field from register pair) theEmitter->emitIns_R_R_R_I(INS_extr, EA_8BYTE, REG_R8, REG_R9, REG_R10, 1); theEmitter->emitIns_R_R_R_I(INS_extr, EA_8BYTE, REG_R8, REG_R9, REG_R10, 31); theEmitter->emitIns_R_R_R_I(INS_extr, EA_8BYTE, REG_R8, REG_R9, REG_R10, 32); theEmitter->emitIns_R_R_R_I(INS_extr, EA_8BYTE, REG_R8, REG_R9, REG_R10, 63); theEmitter->emitIns_R_R_R_I(INS_extr, EA_4BYTE, REG_R8, REG_R9, REG_R10, 1); theEmitter->emitIns_R_R_R_I(INS_extr, EA_4BYTE, REG_R8, REG_R9, REG_R10, 31); // SUB (extended register) theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_UXTB); theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_UXTH); theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_UXTW); theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_UXTX); theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_SXTB); theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_SXTH); theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_SXTW); theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_SXTX); // SUB (extended register) and left shift theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_UXTB); theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_UXTH); theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_UXTW); theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_UXTX); theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_SXTB); theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_SXTH); theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_SXTW); theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_SXTX); // SUB (shifted register) theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0); theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 27, INS_OPTS_LSL); theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 28, INS_OPTS_LSR); theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 29, INS_OPTS_ASR); // bit operations theEmitter->emitIns_R_R_R_I(INS_and, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0); theEmitter->emitIns_R_R_R_I(INS_ands, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0); theEmitter->emitIns_R_R_R_I(INS_eor, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0); theEmitter->emitIns_R_R_R_I(INS_orr, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0); theEmitter->emitIns_R_R_R_I(INS_bic, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0); theEmitter->emitIns_R_R_R_I(INS_bics, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0); theEmitter->emitIns_R_R_R_I(INS_eon, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0); theEmitter->emitIns_R_R_R_I(INS_orn, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0); theEmitter->emitIns_R_R_R_I(INS_and, EA_8BYTE, REG_R8, REG_R9, REG_R10, 1, INS_OPTS_LSL); theEmitter->emitIns_R_R_R_I(INS_ands, EA_8BYTE, REG_R8, REG_R9, REG_R10, 2, INS_OPTS_LSR); theEmitter->emitIns_R_R_R_I(INS_eor, EA_8BYTE, REG_R8, REG_R9, REG_R10, 3, INS_OPTS_ASR); theEmitter->emitIns_R_R_R_I(INS_orr, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_ROR); theEmitter->emitIns_R_R_R_I(INS_bic, EA_8BYTE, REG_R8, REG_R9, REG_R10, 5, INS_OPTS_LSL); theEmitter->emitIns_R_R_R_I(INS_bics, EA_8BYTE, REG_R8, REG_R9, REG_R10, 6, INS_OPTS_LSR); theEmitter->emitIns_R_R_R_I(INS_eon, EA_8BYTE, REG_R8, REG_R9, REG_R10, 7, INS_OPTS_ASR); theEmitter->emitIns_R_R_R_I(INS_orn, EA_8BYTE, REG_R8, REG_R9, REG_R10, 8, INS_OPTS_ROR); theEmitter->emitIns_R_R_R_I(INS_and, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0); theEmitter->emitIns_R_R_R_I(INS_ands, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0); theEmitter->emitIns_R_R_R_I(INS_eor, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0); theEmitter->emitIns_R_R_R_I(INS_orr, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0); theEmitter->emitIns_R_R_R_I(INS_bic, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0); theEmitter->emitIns_R_R_R_I(INS_bics, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0); theEmitter->emitIns_R_R_R_I(INS_eon, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0); theEmitter->emitIns_R_R_R_I(INS_orn, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0); theEmitter->emitIns_R_R_R_I(INS_and, EA_4BYTE, REG_R8, REG_R9, REG_R10, 1, INS_OPTS_LSL); theEmitter->emitIns_R_R_R_I(INS_ands, EA_4BYTE, REG_R8, REG_R9, REG_R10, 2, INS_OPTS_LSR); theEmitter->emitIns_R_R_R_I(INS_eor, EA_4BYTE, REG_R8, REG_R9, REG_R10, 3, INS_OPTS_ASR); theEmitter->emitIns_R_R_R_I(INS_orr, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_ROR); theEmitter->emitIns_R_R_R_I(INS_bic, EA_4BYTE, REG_R8, REG_R9, REG_R10, 5, INS_OPTS_LSL); theEmitter->emitIns_R_R_R_I(INS_bics, EA_4BYTE, REG_R8, REG_R9, REG_R10, 6, INS_OPTS_LSR); theEmitter->emitIns_R_R_R_I(INS_eon, EA_4BYTE, REG_R8, REG_R9, REG_R10, 7, INS_OPTS_ASR); theEmitter->emitIns_R_R_R_I(INS_orn, EA_4BYTE, REG_R8, REG_R9, REG_R10, 8, INS_OPTS_ROR); #endif // ALL_ARM64_EMITTER_UNIT_TESTS // // R_R_R_I -- load/store pair // #ifdef ALL_ARM64_EMITTER_UNIT_TESTS theEmitter->emitIns_R_R_R_I(INS_ldnp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0); theEmitter->emitIns_R_R_R_I(INS_stnp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0); theEmitter->emitIns_R_R_R_I(INS_ldnp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 8); theEmitter->emitIns_R_R_R_I(INS_stnp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 8); theEmitter->emitIns_R_R_R_I(INS_ldnp, EA_4BYTE, REG_R8, REG_R9, REG_SP, 0); theEmitter->emitIns_R_R_R_I(INS_stnp, EA_4BYTE, REG_R8, REG_R9, REG_SP, 0); theEmitter->emitIns_R_R_R_I(INS_ldnp, EA_4BYTE, REG_R8, REG_R9, REG_SP, 8); theEmitter->emitIns_R_R_R_I(INS_stnp, EA_4BYTE, REG_R8, REG_R9, REG_SP, 8); theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0); theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0); theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 16); theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 16); theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_POST_INDEX); theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_POST_INDEX); theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_PRE_INDEX); theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_PRE_INDEX); theEmitter->emitIns_R_R_R_I(INS_ldp, EA_4BYTE, REG_R8, REG_R9, REG_SP, 0); theEmitter->emitIns_R_R_R_I(INS_stp, EA_4BYTE, REG_R8, REG_R9, REG_SP, 0); theEmitter->emitIns_R_R_R_I(INS_ldp, EA_4BYTE, REG_R8, REG_R9, REG_SP, 16); theEmitter->emitIns_R_R_R_I(INS_stp, EA_4BYTE, REG_R8, REG_R9, REG_SP, 16); theEmitter->emitIns_R_R_R_I(INS_ldp, EA_4BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_POST_INDEX); theEmitter->emitIns_R_R_R_I(INS_stp, EA_4BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_POST_INDEX); theEmitter->emitIns_R_R_R_I(INS_ldp, EA_4BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_PRE_INDEX); theEmitter->emitIns_R_R_R_I(INS_stp, EA_4BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_PRE_INDEX); theEmitter->emitIns_R_R_R_I(INS_ldpsw, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0); theEmitter->emitIns_R_R_R_I(INS_ldpsw, EA_4BYTE, REG_R8, REG_R9, REG_R10, 16); theEmitter->emitIns_R_R_R_I(INS_ldpsw, EA_4BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_POST_INDEX); theEmitter->emitIns_R_R_R_I(INS_ldpsw, EA_4BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_PRE_INDEX); // SP and ZR tests theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_ZR, REG_R1, REG_SP, 0); theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_R0, REG_ZR, REG_SP, 16); theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_ZR, REG_R1, REG_SP, 0); theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_R0, REG_ZR, REG_SP, 16); theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_ZR, REG_ZR, REG_SP, 16, INS_OPTS_POST_INDEX); theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_ZR, REG_ZR, REG_R8, 16, INS_OPTS_PRE_INDEX); #endif // ALL_ARM64_EMITTER_UNIT_TESTS // // R_R_R_Ext -- load/store shifted/extend // #ifdef ALL_ARM64_EMITTER_UNIT_TESTS genDefineTempLabel(genCreateTempLabel()); // LDR (register) theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL, 3); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW, 3); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW, 3); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX, 3); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX, 3); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL, 2); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW, 2); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW, 2); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX, 2); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX, 2); theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9); theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL); theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL, 1); theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW); theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW, 1); theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW); theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW, 1); theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX); theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX, 1); theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX); theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX, 1); theEmitter->emitIns_R_R_R_Ext(INS_ldrb, EA_1BYTE, REG_R8, REG_SP, REG_R9); theEmitter->emitIns_R_R_R_Ext(INS_ldrb, EA_1BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW); theEmitter->emitIns_R_R_R_Ext(INS_ldrb, EA_1BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW); theEmitter->emitIns_R_R_R_Ext(INS_ldrb, EA_1BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX); theEmitter->emitIns_R_R_R_Ext(INS_ldrb, EA_1BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX); theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9); theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL); theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL, 2); theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW); theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW, 2); theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW); theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW, 2); theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX); theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX, 2); theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX); theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX, 2); theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_4BYTE, REG_R8, REG_SP, REG_R9); theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_8BYTE, REG_R8, REG_SP, REG_R9); theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL); theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL, 1); theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW); theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW, 1); theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW); theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW, 1); theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX); theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX, 1); theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX); theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX, 1); theEmitter->emitIns_R_R_R_Ext(INS_ldrsb, EA_4BYTE, REG_R8, REG_SP, REG_R9); theEmitter->emitIns_R_R_R_Ext(INS_ldrsb, EA_8BYTE, REG_R8, REG_SP, REG_R9); theEmitter->emitIns_R_R_R_Ext(INS_ldrsb, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW); theEmitter->emitIns_R_R_R_Ext(INS_ldrsb, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW); theEmitter->emitIns_R_R_R_Ext(INS_ldrsb, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX); theEmitter->emitIns_R_R_R_Ext(INS_ldrsb, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX); // STR (register) theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9); theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL); theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL, 3); theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW); theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW, 3); theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW); theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW, 3); theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX); theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX, 3); theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX); theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX, 3); theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9); theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL); theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL, 2); theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW); theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW, 2); theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW); theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW, 2); theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX); theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX, 2); theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX); theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX, 2); theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9); theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL); theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL, 1); theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW); theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW, 1); theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW); theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW, 1); theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX); theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX, 1); theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX); theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX, 1); theEmitter->emitIns_R_R_R_Ext(INS_strb, EA_1BYTE, REG_R8, REG_SP, REG_R9); theEmitter->emitIns_R_R_R_Ext(INS_strb, EA_1BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW); theEmitter->emitIns_R_R_R_Ext(INS_strb, EA_1BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW); theEmitter->emitIns_R_R_R_Ext(INS_strb, EA_1BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX); theEmitter->emitIns_R_R_R_Ext(INS_strb, EA_1BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX); #endif // ALL_ARM64_EMITTER_UNIT_TESTS // // R_R_R_R // #ifdef ALL_ARM64_EMITTER_UNIT_TESTS genDefineTempLabel(genCreateTempLabel()); theEmitter->emitIns_R_R_R_R(INS_madd, EA_4BYTE, REG_R0, REG_R12, REG_R27, REG_R10); theEmitter->emitIns_R_R_R_R(INS_msub, EA_4BYTE, REG_R1, REG_R13, REG_R28, REG_R11); theEmitter->emitIns_R_R_R_R(INS_smaddl, EA_4BYTE, REG_R2, REG_R14, REG_R0, REG_R12); theEmitter->emitIns_R_R_R_R(INS_smsubl, EA_4BYTE, REG_R3, REG_R15, REG_R1, REG_R13); theEmitter->emitIns_R_R_R_R(INS_umaddl, EA_4BYTE, REG_R4, REG_R19, REG_R2, REG_R14); theEmitter->emitIns_R_R_R_R(INS_umsubl, EA_4BYTE, REG_R5, REG_R20, REG_R3, REG_R15); theEmitter->emitIns_R_R_R_R(INS_madd, EA_8BYTE, REG_R6, REG_R21, REG_R4, REG_R19); theEmitter->emitIns_R_R_R_R(INS_msub, EA_8BYTE, REG_R7, REG_R22, REG_R5, REG_R20); theEmitter->emitIns_R_R_R_R(INS_smaddl, EA_8BYTE, REG_R8, REG_R23, REG_R6, REG_R21); theEmitter->emitIns_R_R_R_R(INS_smsubl, EA_8BYTE, REG_R9, REG_R24, REG_R7, REG_R22); theEmitter->emitIns_R_R_R_R(INS_umaddl, EA_8BYTE, REG_R10, REG_R25, REG_R8, REG_R23); theEmitter->emitIns_R_R_R_R(INS_umsubl, EA_8BYTE, REG_R11, REG_R26, REG_R9, REG_R24); #endif // ALL_ARM64_EMITTER_UNIT_TESTS // R_COND // #ifdef ALL_ARM64_EMITTER_UNIT_TESTS // cset reg, cond theEmitter->emitIns_R_COND(INS_cset, EA_8BYTE, REG_R9, INS_COND_EQ); // eq theEmitter->emitIns_R_COND(INS_cset, EA_4BYTE, REG_R8, INS_COND_NE); // ne theEmitter->emitIns_R_COND(INS_cset, EA_4BYTE, REG_R7, INS_COND_HS); // hs theEmitter->emitIns_R_COND(INS_cset, EA_8BYTE, REG_R6, INS_COND_LO); // lo theEmitter->emitIns_R_COND(INS_cset, EA_8BYTE, REG_R5, INS_COND_MI); // mi theEmitter->emitIns_R_COND(INS_cset, EA_4BYTE, REG_R4, INS_COND_PL); // pl theEmitter->emitIns_R_COND(INS_cset, EA_4BYTE, REG_R3, INS_COND_VS); // vs theEmitter->emitIns_R_COND(INS_cset, EA_8BYTE, REG_R2, INS_COND_VC); // vc theEmitter->emitIns_R_COND(INS_cset, EA_8BYTE, REG_R1, INS_COND_HI); // hi theEmitter->emitIns_R_COND(INS_cset, EA_4BYTE, REG_R0, INS_COND_LS); // ls theEmitter->emitIns_R_COND(INS_cset, EA_4BYTE, REG_R9, INS_COND_GE); // ge theEmitter->emitIns_R_COND(INS_cset, EA_8BYTE, REG_R8, INS_COND_LT); // lt theEmitter->emitIns_R_COND(INS_cset, EA_8BYTE, REG_R7, INS_COND_GT); // gt theEmitter->emitIns_R_COND(INS_cset, EA_4BYTE, REG_R6, INS_COND_LE); // le // csetm reg, cond theEmitter->emitIns_R_COND(INS_csetm, EA_4BYTE, REG_R9, INS_COND_EQ); // eq theEmitter->emitIns_R_COND(INS_csetm, EA_8BYTE, REG_R8, INS_COND_NE); // ne theEmitter->emitIns_R_COND(INS_csetm, EA_8BYTE, REG_R7, INS_COND_HS); // hs theEmitter->emitIns_R_COND(INS_csetm, EA_4BYTE, REG_R6, INS_COND_LO); // lo theEmitter->emitIns_R_COND(INS_csetm, EA_4BYTE, REG_R5, INS_COND_MI); // mi theEmitter->emitIns_R_COND(INS_csetm, EA_8BYTE, REG_R4, INS_COND_PL); // pl theEmitter->emitIns_R_COND(INS_csetm, EA_8BYTE, REG_R3, INS_COND_VS); // vs theEmitter->emitIns_R_COND(INS_csetm, EA_4BYTE, REG_R2, INS_COND_VC); // vc theEmitter->emitIns_R_COND(INS_csetm, EA_4BYTE, REG_R1, INS_COND_HI); // hi theEmitter->emitIns_R_COND(INS_csetm, EA_8BYTE, REG_R0, INS_COND_LS); // ls theEmitter->emitIns_R_COND(INS_csetm, EA_8BYTE, REG_R9, INS_COND_GE); // ge theEmitter->emitIns_R_COND(INS_csetm, EA_4BYTE, REG_R8, INS_COND_LT); // lt theEmitter->emitIns_R_COND(INS_csetm, EA_4BYTE, REG_R7, INS_COND_GT); // gt theEmitter->emitIns_R_COND(INS_csetm, EA_8BYTE, REG_R6, INS_COND_LE); // le #endif // ALL_ARM64_EMITTER_UNIT_TESTS // R_R_COND // #ifdef ALL_ARM64_EMITTER_UNIT_TESTS // cinc reg, reg, cond // cinv reg, reg, cond // cneg reg, reg, cond theEmitter->emitIns_R_R_COND(INS_cinc, EA_8BYTE, REG_R0, REG_R4, INS_COND_EQ); // eq theEmitter->emitIns_R_R_COND(INS_cinv, EA_4BYTE, REG_R1, REG_R5, INS_COND_NE); // ne theEmitter->emitIns_R_R_COND(INS_cneg, EA_4BYTE, REG_R2, REG_R6, INS_COND_HS); // hs theEmitter->emitIns_R_R_COND(INS_cinc, EA_8BYTE, REG_R3, REG_R7, INS_COND_LO); // lo theEmitter->emitIns_R_R_COND(INS_cinv, EA_4BYTE, REG_R4, REG_R8, INS_COND_MI); // mi theEmitter->emitIns_R_R_COND(INS_cneg, EA_8BYTE, REG_R5, REG_R9, INS_COND_PL); // pl theEmitter->emitIns_R_R_COND(INS_cinc, EA_8BYTE, REG_R6, REG_R0, INS_COND_VS); // vs theEmitter->emitIns_R_R_COND(INS_cinv, EA_4BYTE, REG_R7, REG_R1, INS_COND_VC); // vc theEmitter->emitIns_R_R_COND(INS_cneg, EA_8BYTE, REG_R8, REG_R2, INS_COND_HI); // hi theEmitter->emitIns_R_R_COND(INS_cinc, EA_4BYTE, REG_R9, REG_R3, INS_COND_LS); // ls theEmitter->emitIns_R_R_COND(INS_cinv, EA_4BYTE, REG_R0, REG_R4, INS_COND_GE); // ge theEmitter->emitIns_R_R_COND(INS_cneg, EA_8BYTE, REG_R2, REG_R5, INS_COND_LT); // lt theEmitter->emitIns_R_R_COND(INS_cinc, EA_4BYTE, REG_R2, REG_R6, INS_COND_GT); // gt theEmitter->emitIns_R_R_COND(INS_cinv, EA_8BYTE, REG_R3, REG_R7, INS_COND_LE); // le #endif // ALL_ARM64_EMITTER_UNIT_TESTS // R_R_R_COND // #ifdef ALL_ARM64_EMITTER_UNIT_TESTS // csel reg, reg, reg, cond // csinc reg, reg, reg, cond // csinv reg, reg, reg, cond // csneg reg, reg, reg, cond theEmitter->emitIns_R_R_R_COND(INS_csel, EA_8BYTE, REG_R0, REG_R4, REG_R8, INS_COND_EQ); // eq theEmitter->emitIns_R_R_R_COND(INS_csinc, EA_4BYTE, REG_R1, REG_R5, REG_R9, INS_COND_NE); // ne theEmitter->emitIns_R_R_R_COND(INS_csinv, EA_4BYTE, REG_R2, REG_R6, REG_R0, INS_COND_HS); // hs theEmitter->emitIns_R_R_R_COND(INS_csneg, EA_8BYTE, REG_R3, REG_R7, REG_R1, INS_COND_LO); // lo theEmitter->emitIns_R_R_R_COND(INS_csel, EA_4BYTE, REG_R4, REG_R8, REG_R2, INS_COND_MI); // mi theEmitter->emitIns_R_R_R_COND(INS_csinc, EA_8BYTE, REG_R5, REG_R9, REG_R3, INS_COND_PL); // pl theEmitter->emitIns_R_R_R_COND(INS_csinv, EA_8BYTE, REG_R6, REG_R0, REG_R4, INS_COND_VS); // vs theEmitter->emitIns_R_R_R_COND(INS_csneg, EA_4BYTE, REG_R7, REG_R1, REG_R5, INS_COND_VC); // vc theEmitter->emitIns_R_R_R_COND(INS_csel, EA_8BYTE, REG_R8, REG_R2, REG_R6, INS_COND_HI); // hi theEmitter->emitIns_R_R_R_COND(INS_csinc, EA_4BYTE, REG_R9, REG_R3, REG_R7, INS_COND_LS); // ls theEmitter->emitIns_R_R_R_COND(INS_csinv, EA_4BYTE, REG_R0, REG_R4, REG_R8, INS_COND_GE); // ge theEmitter->emitIns_R_R_R_COND(INS_csneg, EA_8BYTE, REG_R2, REG_R5, REG_R9, INS_COND_LT); // lt theEmitter->emitIns_R_R_R_COND(INS_csel, EA_4BYTE, REG_R2, REG_R6, REG_R0, INS_COND_GT); // gt theEmitter->emitIns_R_R_R_COND(INS_csinc, EA_8BYTE, REG_R3, REG_R7, REG_R1, INS_COND_LE); // le #endif // ALL_ARM64_EMITTER_UNIT_TESTS // R_R_FLAGS_COND // #ifdef ALL_ARM64_EMITTER_UNIT_TESTS // ccmp reg1, reg2, nzcv, cond theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R9, REG_R3, INS_FLAGS_V, INS_COND_EQ); // eq theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R8, REG_R2, INS_FLAGS_C, INS_COND_NE); // ne theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R7, REG_R1, INS_FLAGS_Z, INS_COND_HS); // hs theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R6, REG_R0, INS_FLAGS_N, INS_COND_LO); // lo theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R5, REG_R3, INS_FLAGS_CV, INS_COND_MI); // mi theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R4, REG_R2, INS_FLAGS_ZV, INS_COND_PL); // pl theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R3, REG_R1, INS_FLAGS_ZC, INS_COND_VS); // vs theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R2, REG_R0, INS_FLAGS_NV, INS_COND_VC); // vc theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R1, REG_R3, INS_FLAGS_NC, INS_COND_HI); // hi theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R0, REG_R2, INS_FLAGS_NZ, INS_COND_LS); // ls theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R9, REG_R1, INS_FLAGS_NONE, INS_COND_GE); // ge theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R8, REG_R0, INS_FLAGS_NZV, INS_COND_LT); // lt theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R7, REG_R3, INS_FLAGS_NZC, INS_COND_GT); // gt theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R6, REG_R2, INS_FLAGS_NZCV, INS_COND_LE); // le // ccmp reg1, imm, nzcv, cond theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R9, 3, INS_FLAGS_V, INS_COND_EQ); // eq theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R8, 2, INS_FLAGS_C, INS_COND_NE); // ne theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R7, 1, INS_FLAGS_Z, INS_COND_HS); // hs theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R6, 0, INS_FLAGS_N, INS_COND_LO); // lo theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R5, 31, INS_FLAGS_CV, INS_COND_MI); // mi theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R4, 28, INS_FLAGS_ZV, INS_COND_PL); // pl theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R3, 25, INS_FLAGS_ZC, INS_COND_VS); // vs theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R2, 22, INS_FLAGS_NV, INS_COND_VC); // vc theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R1, 19, INS_FLAGS_NC, INS_COND_HI); // hi theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R0, 16, INS_FLAGS_NZ, INS_COND_LS); // ls theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R9, 13, INS_FLAGS_NONE, INS_COND_GE); // ge theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R8, 10, INS_FLAGS_NZV, INS_COND_LT); // lt theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R7, 7, INS_FLAGS_NZC, INS_COND_GT); // gt theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R6, 4, INS_FLAGS_NZCV, INS_COND_LE); // le // ccmp reg1, imm, nzcv, cond -- encoded as ccmn theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R9, -3, INS_FLAGS_V, INS_COND_EQ); // eq theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R8, -2, INS_FLAGS_C, INS_COND_NE); // ne theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R7, -1, INS_FLAGS_Z, INS_COND_HS); // hs theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R6, -5, INS_FLAGS_N, INS_COND_LO); // lo theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R5, -31, INS_FLAGS_CV, INS_COND_MI); // mi theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R4, -28, INS_FLAGS_ZV, INS_COND_PL); // pl theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R3, -25, INS_FLAGS_ZC, INS_COND_VS); // vs theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R2, -22, INS_FLAGS_NV, INS_COND_VC); // vc theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R1, -19, INS_FLAGS_NC, INS_COND_HI); // hi theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R0, -16, INS_FLAGS_NZ, INS_COND_LS); // ls theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R9, -13, INS_FLAGS_NONE, INS_COND_GE); // ge theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R8, -10, INS_FLAGS_NZV, INS_COND_LT); // lt theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R7, -7, INS_FLAGS_NZC, INS_COND_GT); // gt theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R6, -4, INS_FLAGS_NZCV, INS_COND_LE); // le // ccmn reg1, reg2, nzcv, cond theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R9, REG_R3, INS_FLAGS_V, INS_COND_EQ); // eq theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R8, REG_R2, INS_FLAGS_C, INS_COND_NE); // ne theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R7, REG_R1, INS_FLAGS_Z, INS_COND_HS); // hs theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R6, REG_R0, INS_FLAGS_N, INS_COND_LO); // lo theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R5, REG_R3, INS_FLAGS_CV, INS_COND_MI); // mi theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R4, REG_R2, INS_FLAGS_ZV, INS_COND_PL); // pl theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R3, REG_R1, INS_FLAGS_ZC, INS_COND_VS); // vs theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R2, REG_R0, INS_FLAGS_NV, INS_COND_VC); // vc theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R1, REG_R3, INS_FLAGS_NC, INS_COND_HI); // hi theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R0, REG_R2, INS_FLAGS_NZ, INS_COND_LS); // ls theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R9, REG_R1, INS_FLAGS_NONE, INS_COND_GE); // ge theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R8, REG_R0, INS_FLAGS_NZV, INS_COND_LT); // lt theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R7, REG_R3, INS_FLAGS_NZC, INS_COND_GT); // gt theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R6, REG_R2, INS_FLAGS_NZCV, INS_COND_LE); // le // ccmn reg1, imm, nzcv, cond theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R9, 3, INS_FLAGS_V, INS_COND_EQ); // eq theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R8, 2, INS_FLAGS_C, INS_COND_NE); // ne theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R7, 1, INS_FLAGS_Z, INS_COND_HS); // hs theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R6, 0, INS_FLAGS_N, INS_COND_LO); // lo theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R5, 31, INS_FLAGS_CV, INS_COND_MI); // mi theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R4, 28, INS_FLAGS_ZV, INS_COND_PL); // pl theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R3, 25, INS_FLAGS_ZC, INS_COND_VS); // vs theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R2, 22, INS_FLAGS_NV, INS_COND_VC); // vc theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R1, 19, INS_FLAGS_NC, INS_COND_HI); // hi theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R0, 16, INS_FLAGS_NZ, INS_COND_LS); // ls theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R9, 13, INS_FLAGS_NONE, INS_COND_GE); // ge theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R8, 10, INS_FLAGS_NZV, INS_COND_LT); // lt theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R7, 7, INS_FLAGS_NZC, INS_COND_GT); // gt theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R6, 4, INS_FLAGS_NZCV, INS_COND_LE); // le #endif // ALL_ARM64_EMITTER_UNIT_TESTS // // Branch to register // #ifdef ALL_ARM64_EMITTER_UNIT_TESTS genDefineTempLabel(genCreateTempLabel()); theEmitter->emitIns_R(INS_br, EA_PTRSIZE, REG_R8); theEmitter->emitIns_R(INS_blr, EA_PTRSIZE, REG_R9); theEmitter->emitIns_R(INS_ret, EA_PTRSIZE, REG_R8); theEmitter->emitIns_R(INS_ret, EA_PTRSIZE, REG_LR); #endif // ALL_ARM64_EMITTER_UNIT_TESTS // // Misc // #ifdef ALL_ARM64_EMITTER_UNIT_TESTS genDefineTempLabel(genCreateTempLabel()); theEmitter->emitIns_I(INS_brk, EA_PTRSIZE, 0); theEmitter->emitIns_I(INS_brk, EA_PTRSIZE, 65535); theEmitter->emitIns_BARR(INS_dsb, INS_BARRIER_OSHLD); theEmitter->emitIns_BARR(INS_dmb, INS_BARRIER_OSHST); theEmitter->emitIns_BARR(INS_isb, INS_BARRIER_OSH); theEmitter->emitIns_BARR(INS_dmb, INS_BARRIER_NSHLD); theEmitter->emitIns_BARR(INS_isb, INS_BARRIER_NSHST); theEmitter->emitIns_BARR(INS_dsb, INS_BARRIER_NSH); theEmitter->emitIns_BARR(INS_isb, INS_BARRIER_ISHLD); theEmitter->emitIns_BARR(INS_dsb, INS_BARRIER_ISHST); theEmitter->emitIns_BARR(INS_dmb, INS_BARRIER_ISH); theEmitter->emitIns_BARR(INS_dsb, INS_BARRIER_LD); theEmitter->emitIns_BARR(INS_dmb, INS_BARRIER_ST); theEmitter->emitIns_BARR(INS_isb, INS_BARRIER_SY); #endif // ALL_ARM64_EMITTER_UNIT_TESTS //////////////////////////////////////////////////////////////////////////////// // // SIMD and Floating point // //////////////////////////////////////////////////////////////////////////////// // // Load/Stores vector register // #ifdef ALL_ARM64_EMITTER_UNIT_TESTS genDefineTempLabel(genCreateTempLabel()); // ldr/str Vt, [reg] theEmitter->emitIns_R_R(INS_ldr, EA_8BYTE, REG_V1, REG_R9); theEmitter->emitIns_R_R(INS_str, EA_8BYTE, REG_V2, REG_R8); theEmitter->emitIns_R_R(INS_ldr, EA_4BYTE, REG_V3, REG_R7); theEmitter->emitIns_R_R(INS_str, EA_4BYTE, REG_V4, REG_R6); theEmitter->emitIns_R_R(INS_ldr, EA_2BYTE, REG_V5, REG_R5); theEmitter->emitIns_R_R(INS_str, EA_2BYTE, REG_V6, REG_R4); theEmitter->emitIns_R_R(INS_ldr, EA_1BYTE, REG_V7, REG_R3); theEmitter->emitIns_R_R(INS_str, EA_1BYTE, REG_V8, REG_R2); theEmitter->emitIns_R_R(INS_ldr, EA_16BYTE, REG_V9, REG_R1); theEmitter->emitIns_R_R(INS_str, EA_16BYTE, REG_V10, REG_R0); // ldr/str Vt, [reg+cns] -- scaled theEmitter->emitIns_R_R_I(INS_ldr, EA_1BYTE, REG_V8, REG_R9, 1); theEmitter->emitIns_R_R_I(INS_ldr, EA_2BYTE, REG_V8, REG_R9, 2); theEmitter->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_V8, REG_R9, 4); theEmitter->emitIns_R_R_I(INS_ldr, EA_8BYTE, REG_V8, REG_R9, 8); theEmitter->emitIns_R_R_I(INS_ldr, EA_16BYTE, REG_V8, REG_R9, 16); theEmitter->emitIns_R_R_I(INS_ldr, EA_1BYTE, REG_V7, REG_R10, 1); theEmitter->emitIns_R_R_I(INS_ldr, EA_2BYTE, REG_V7, REG_R10, 2); theEmitter->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_V7, REG_R10, 4); theEmitter->emitIns_R_R_I(INS_ldr, EA_8BYTE, REG_V7, REG_R10, 8); theEmitter->emitIns_R_R_I(INS_ldr, EA_16BYTE, REG_V7, REG_R10, 16); // ldr/str Vt, [reg],cns -- post-indexed (unscaled) // ldr/str Vt, [reg+cns]! -- post-indexed (unscaled) theEmitter->emitIns_R_R_I(INS_ldr, EA_1BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX); theEmitter->emitIns_R_R_I(INS_ldr, EA_2BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX); theEmitter->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX); theEmitter->emitIns_R_R_I(INS_ldr, EA_8BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX); theEmitter->emitIns_R_R_I(INS_ldr, EA_16BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX); theEmitter->emitIns_R_R_I(INS_ldr, EA_1BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX); theEmitter->emitIns_R_R_I(INS_ldr, EA_2BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX); theEmitter->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX); theEmitter->emitIns_R_R_I(INS_ldr, EA_8BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX); theEmitter->emitIns_R_R_I(INS_ldr, EA_16BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX); theEmitter->emitIns_R_R_I(INS_str, EA_1BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX); theEmitter->emitIns_R_R_I(INS_str, EA_2BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX); theEmitter->emitIns_R_R_I(INS_str, EA_4BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX); theEmitter->emitIns_R_R_I(INS_str, EA_8BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX); theEmitter->emitIns_R_R_I(INS_str, EA_16BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX); theEmitter->emitIns_R_R_I(INS_str, EA_1BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX); theEmitter->emitIns_R_R_I(INS_str, EA_2BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX); theEmitter->emitIns_R_R_I(INS_str, EA_4BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX); theEmitter->emitIns_R_R_I(INS_str, EA_8BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX); theEmitter->emitIns_R_R_I(INS_str, EA_16BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX); theEmitter->emitIns_R_R_I(INS_ldur, EA_1BYTE, REG_V8, REG_R9, 2); theEmitter->emitIns_R_R_I(INS_ldur, EA_2BYTE, REG_V8, REG_R9, 3); theEmitter->emitIns_R_R_I(INS_ldur, EA_4BYTE, REG_V8, REG_R9, 5); theEmitter->emitIns_R_R_I(INS_ldur, EA_8BYTE, REG_V8, REG_R9, 9); theEmitter->emitIns_R_R_I(INS_ldur, EA_16BYTE, REG_V8, REG_R9, 17); theEmitter->emitIns_R_R_I(INS_stur, EA_1BYTE, REG_V7, REG_R10, 2); theEmitter->emitIns_R_R_I(INS_stur, EA_2BYTE, REG_V7, REG_R10, 3); theEmitter->emitIns_R_R_I(INS_stur, EA_4BYTE, REG_V7, REG_R10, 5); theEmitter->emitIns_R_R_I(INS_stur, EA_8BYTE, REG_V7, REG_R10, 9); theEmitter->emitIns_R_R_I(INS_stur, EA_16BYTE, REG_V7, REG_R10, 17); // load/store pair theEmitter->emitIns_R_R_R (INS_ldnp, EA_8BYTE, REG_V0, REG_V1, REG_R10); theEmitter->emitIns_R_R_R_I(INS_stnp, EA_8BYTE, REG_V1, REG_V2, REG_R10, 0); theEmitter->emitIns_R_R_R_I(INS_ldnp, EA_8BYTE, REG_V2, REG_V3, REG_R10, 8); theEmitter->emitIns_R_R_R_I(INS_stnp, EA_8BYTE, REG_V3, REG_V4, REG_R10, 24); theEmitter->emitIns_R_R_R (INS_ldnp, EA_4BYTE, REG_V4, REG_V5, REG_SP); theEmitter->emitIns_R_R_R_I(INS_stnp, EA_4BYTE, REG_V5, REG_V6, REG_SP, 0); theEmitter->emitIns_R_R_R_I(INS_ldnp, EA_4BYTE, REG_V6, REG_V7, REG_SP, 4); theEmitter->emitIns_R_R_R_I(INS_stnp, EA_4BYTE, REG_V7, REG_V8, REG_SP, 12); theEmitter->emitIns_R_R_R (INS_ldnp, EA_16BYTE, REG_V8, REG_V9, REG_R10); theEmitter->emitIns_R_R_R_I(INS_stnp, EA_16BYTE, REG_V9, REG_V10, REG_R10, 0); theEmitter->emitIns_R_R_R_I(INS_ldnp, EA_16BYTE, REG_V10, REG_V11, REG_R10, 16); theEmitter->emitIns_R_R_R_I(INS_stnp, EA_16BYTE, REG_V11, REG_V12, REG_R10, 48); theEmitter->emitIns_R_R_R (INS_ldp, EA_8BYTE, REG_V0, REG_V1, REG_R10); theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_V1, REG_V2, REG_SP, 0); theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_V2, REG_V3, REG_SP, 8); theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_V3, REG_V4, REG_R10, 16); theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_V4, REG_V5, REG_R10, 24, INS_OPTS_POST_INDEX); theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_V5, REG_V6, REG_SP, 32, INS_OPTS_POST_INDEX); theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_V6, REG_V7, REG_SP, 40, INS_OPTS_PRE_INDEX); theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_V7, REG_V8, REG_R10, 48, INS_OPTS_PRE_INDEX); theEmitter->emitIns_R_R_R (INS_ldp, EA_4BYTE, REG_V0, REG_V1, REG_R10); theEmitter->emitIns_R_R_R_I(INS_stp, EA_4BYTE, REG_V1, REG_V2, REG_SP, 0); theEmitter->emitIns_R_R_R_I(INS_ldp, EA_4BYTE, REG_V2, REG_V3, REG_SP, 4); theEmitter->emitIns_R_R_R_I(INS_stp, EA_4BYTE, REG_V3, REG_V4, REG_R10, 8); theEmitter->emitIns_R_R_R_I(INS_ldp, EA_4BYTE, REG_V4, REG_V5, REG_R10, 12, INS_OPTS_POST_INDEX); theEmitter->emitIns_R_R_R_I(INS_stp, EA_4BYTE, REG_V5, REG_V6, REG_SP, 16, INS_OPTS_POST_INDEX); theEmitter->emitIns_R_R_R_I(INS_ldp, EA_4BYTE, REG_V6, REG_V7, REG_SP, 20, INS_OPTS_PRE_INDEX); theEmitter->emitIns_R_R_R_I(INS_stp, EA_4BYTE, REG_V7, REG_V8, REG_R10, 24, INS_OPTS_PRE_INDEX); theEmitter->emitIns_R_R_R (INS_ldp, EA_16BYTE, REG_V0, REG_V1, REG_R10); theEmitter->emitIns_R_R_R_I(INS_stp, EA_16BYTE, REG_V1, REG_V2, REG_SP, 0); theEmitter->emitIns_R_R_R_I(INS_ldp, EA_16BYTE, REG_V2, REG_V3, REG_SP, 16); theEmitter->emitIns_R_R_R_I(INS_stp, EA_16BYTE, REG_V3, REG_V4, REG_R10, 32); theEmitter->emitIns_R_R_R_I(INS_ldp, EA_16BYTE, REG_V4, REG_V5, REG_R10, 48, INS_OPTS_POST_INDEX); theEmitter->emitIns_R_R_R_I(INS_stp, EA_16BYTE, REG_V5, REG_V6, REG_SP, 64, INS_OPTS_POST_INDEX); theEmitter->emitIns_R_R_R_I(INS_ldp, EA_16BYTE, REG_V6, REG_V7, REG_SP, 80, INS_OPTS_PRE_INDEX); theEmitter->emitIns_R_R_R_I(INS_stp, EA_16BYTE, REG_V7, REG_V8, REG_R10, 96, INS_OPTS_PRE_INDEX); // LDR (register) theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V1, REG_SP, REG_R9); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V2, REG_R7, REG_R9, INS_OPTS_LSL); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V3, REG_R7, REG_R9, INS_OPTS_LSL, 3); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V4, REG_R7, REG_R9, INS_OPTS_SXTW); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V5, REG_R7, REG_R9, INS_OPTS_SXTW, 3); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V6, REG_SP, REG_R9, INS_OPTS_UXTW); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V7, REG_R7, REG_R9, INS_OPTS_UXTW, 3); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V8, REG_R7, REG_R9, INS_OPTS_SXTX); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V9, REG_R7, REG_R9, INS_OPTS_SXTX, 3); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V10, REG_R7, REG_R9, INS_OPTS_UXTX); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V11, REG_SP, REG_R9, INS_OPTS_UXTX, 3); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V1, REG_SP, REG_R9); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V2, REG_R7, REG_R9, INS_OPTS_LSL); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V3, REG_R7, REG_R9, INS_OPTS_LSL, 2); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V4, REG_R7, REG_R9, INS_OPTS_SXTW); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V5, REG_R7, REG_R9, INS_OPTS_SXTW, 2); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V6, REG_SP, REG_R9, INS_OPTS_UXTW); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V7, REG_R7, REG_R9, INS_OPTS_UXTW, 2); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V8, REG_R7, REG_R9, INS_OPTS_SXTX); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V9, REG_R7, REG_R9, INS_OPTS_SXTX, 2); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V10, REG_R7, REG_R9, INS_OPTS_UXTX); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V11, REG_SP, REG_R9, INS_OPTS_UXTX, 2); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V1, REG_SP, REG_R9); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V2, REG_R7, REG_R9, INS_OPTS_LSL); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V3, REG_R7, REG_R9, INS_OPTS_LSL, 4); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V4, REG_R7, REG_R9, INS_OPTS_SXTW); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V5, REG_R7, REG_R9, INS_OPTS_SXTW, 4); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V6, REG_SP, REG_R9, INS_OPTS_UXTW); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V7, REG_R7, REG_R9, INS_OPTS_UXTW, 4); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V8, REG_R7, REG_R9, INS_OPTS_SXTX); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V9, REG_R7, REG_R9, INS_OPTS_SXTX, 4); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V10, REG_R7, REG_R9, INS_OPTS_UXTX); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V11, REG_SP, REG_R9, INS_OPTS_UXTX, 4); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V1, REG_SP, REG_R9); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V2, REG_R7, REG_R9, INS_OPTS_LSL); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V3, REG_R7, REG_R9, INS_OPTS_LSL, 1); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V4, REG_R7, REG_R9, INS_OPTS_SXTW); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V5, REG_R7, REG_R9, INS_OPTS_SXTW, 1); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V6, REG_SP, REG_R9, INS_OPTS_UXTW); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V7, REG_R7, REG_R9, INS_OPTS_UXTW, 1); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V8, REG_R7, REG_R9, INS_OPTS_SXTX); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V9, REG_R7, REG_R9, INS_OPTS_SXTX, 1); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V10, REG_R7, REG_R9, INS_OPTS_UXTX); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V11, REG_SP, REG_R9, INS_OPTS_UXTX, 1); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_1BYTE, REG_V1, REG_R7, REG_R9); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_1BYTE, REG_V2, REG_SP, REG_R9, INS_OPTS_SXTW); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_1BYTE, REG_V3, REG_R7, REG_R9, INS_OPTS_UXTW); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_1BYTE, REG_V4, REG_SP, REG_R9, INS_OPTS_SXTX); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_1BYTE, REG_V5, REG_R7, REG_R9, INS_OPTS_UXTX); #endif // ALL_ARM64_EMITTER_UNIT_TESTS // // R_R mov and aliases for mov // #ifdef ALL_ARM64_EMITTER_UNIT_TESTS // mov vector to vector theEmitter->emitIns_R_R(INS_mov, EA_8BYTE, REG_V0, REG_V1); theEmitter->emitIns_R_R(INS_mov, EA_16BYTE, REG_V2, REG_V3); theEmitter->emitIns_R_R(INS_mov, EA_4BYTE, REG_V12, REG_V13); theEmitter->emitIns_R_R(INS_mov, EA_2BYTE, REG_V14, REG_V15); theEmitter->emitIns_R_R(INS_mov, EA_1BYTE, REG_V16, REG_V17); // mov vector to general theEmitter->emitIns_R_R(INS_mov, EA_8BYTE, REG_R0, REG_V4); theEmitter->emitIns_R_R(INS_mov, EA_4BYTE, REG_R1, REG_V5); theEmitter->emitIns_R_R(INS_mov, EA_2BYTE, REG_R2, REG_V6); theEmitter->emitIns_R_R(INS_mov, EA_1BYTE, REG_R3, REG_V7); // mov general to vector theEmitter->emitIns_R_R(INS_mov, EA_8BYTE, REG_V8, REG_R4); theEmitter->emitIns_R_R(INS_mov, EA_4BYTE, REG_V9, REG_R5); theEmitter->emitIns_R_R(INS_mov, EA_2BYTE, REG_V10, REG_R6); theEmitter->emitIns_R_R(INS_mov, EA_1BYTE, REG_V11, REG_R7); // mov vector[index] to vector theEmitter->emitIns_R_R_I(INS_mov, EA_8BYTE, REG_V0, REG_V1, 1); theEmitter->emitIns_R_R_I(INS_mov, EA_4BYTE, REG_V2, REG_V3, 3); theEmitter->emitIns_R_R_I(INS_mov, EA_2BYTE, REG_V4, REG_V5, 7); theEmitter->emitIns_R_R_I(INS_mov, EA_1BYTE, REG_V6, REG_V7, 15); // mov to general from vector[index] theEmitter->emitIns_R_R_I(INS_mov, EA_8BYTE, REG_R8, REG_V16, 1); theEmitter->emitIns_R_R_I(INS_mov, EA_4BYTE, REG_R9, REG_V17, 2); theEmitter->emitIns_R_R_I(INS_mov, EA_2BYTE, REG_R10, REG_V18, 3); theEmitter->emitIns_R_R_I(INS_mov, EA_1BYTE, REG_R11, REG_V19, 4); // mov to vector[index] from general theEmitter->emitIns_R_R_I(INS_mov, EA_8BYTE, REG_V20, REG_R12, 1); theEmitter->emitIns_R_R_I(INS_mov, EA_4BYTE, REG_V21, REG_R13, 2); theEmitter->emitIns_R_R_I(INS_mov, EA_2BYTE, REG_V22, REG_R14, 6); theEmitter->emitIns_R_R_I(INS_mov, EA_1BYTE, REG_V23, REG_R15, 8); // mov vector[index] to vector[index2] theEmitter->emitIns_R_R_I_I(INS_mov, EA_8BYTE, REG_V8, REG_V9, 1, 0); theEmitter->emitIns_R_R_I_I(INS_mov, EA_4BYTE, REG_V10, REG_V11, 2, 1); theEmitter->emitIns_R_R_I_I(INS_mov, EA_2BYTE, REG_V12, REG_V13, 5, 2); theEmitter->emitIns_R_R_I_I(INS_mov, EA_1BYTE, REG_V14, REG_V15, 12, 3); ////////////////////////////////////////////////////////////////////////////////// // mov/dup scalar theEmitter->emitIns_R_R_I(INS_dup, EA_8BYTE, REG_V24, REG_V25, 1); theEmitter->emitIns_R_R_I(INS_dup, EA_4BYTE, REG_V26, REG_V27, 3); theEmitter->emitIns_R_R_I(INS_dup, EA_2BYTE, REG_V28, REG_V29, 7); theEmitter->emitIns_R_R_I(INS_dup, EA_1BYTE, REG_V30, REG_V31, 15); // mov/ins vector element theEmitter->emitIns_R_R_I_I(INS_ins, EA_8BYTE, REG_V0, REG_V1, 0, 1); theEmitter->emitIns_R_R_I_I(INS_ins, EA_4BYTE, REG_V2, REG_V3, 2, 2); theEmitter->emitIns_R_R_I_I(INS_ins, EA_2BYTE, REG_V4, REG_V5, 4, 3); theEmitter->emitIns_R_R_I_I(INS_ins, EA_1BYTE, REG_V6, REG_V7, 8, 4); // umov to general from vector element theEmitter->emitIns_R_R_I(INS_umov, EA_8BYTE, REG_R0, REG_V8, 1); theEmitter->emitIns_R_R_I(INS_umov, EA_4BYTE, REG_R1, REG_V9, 2); theEmitter->emitIns_R_R_I(INS_umov, EA_2BYTE, REG_R2, REG_V10, 4); theEmitter->emitIns_R_R_I(INS_umov, EA_1BYTE, REG_R3, REG_V11, 8); // ins to vector element from general theEmitter->emitIns_R_R_I(INS_ins, EA_8BYTE, REG_V12, REG_R4, 1); theEmitter->emitIns_R_R_I(INS_ins, EA_4BYTE, REG_V13, REG_R5, 3); theEmitter->emitIns_R_R_I(INS_ins, EA_2BYTE, REG_V14, REG_R6, 7); theEmitter->emitIns_R_R_I(INS_ins, EA_1BYTE, REG_V15, REG_R7, 15); // smov to general from vector element theEmitter->emitIns_R_R_I(INS_smov, EA_4BYTE, REG_R5, REG_V17, 2); theEmitter->emitIns_R_R_I(INS_smov, EA_2BYTE, REG_R6, REG_V18, 4); theEmitter->emitIns_R_R_I(INS_smov, EA_1BYTE, REG_R7, REG_V19, 8); #endif // ALL_ARM64_EMITTER_UNIT_TESTS // // R_I movi and mvni // #ifdef ALL_ARM64_EMITTER_UNIT_TESTS // movi imm8 (vector) theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V0, 0x00, INS_OPTS_8B); theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V1, 0xFF, INS_OPTS_8B); theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V2, 0x00, INS_OPTS_16B); theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V3, 0xFF, INS_OPTS_16B); theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V4, 0x007F, INS_OPTS_4H); theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V5, 0x7F00, INS_OPTS_4H); // LSL 8 theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V6, 0x003F, INS_OPTS_8H); theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V7, 0x3F00, INS_OPTS_8H); // LSL 8 theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V8, 0x1F, INS_OPTS_2S); theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V9, 0x1F00, INS_OPTS_2S); // LSL 8 theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V10, 0x1F0000, INS_OPTS_2S); // LSL 16 theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V11, 0x1F000000, INS_OPTS_2S); // LSL 24 theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V12, 0x1FFF, INS_OPTS_2S); // MSL 8 theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V13, 0x1FFFFF, INS_OPTS_2S); // MSL 16 theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V14, 0x37, INS_OPTS_4S); theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V15, 0x3700, INS_OPTS_4S); // LSL 8 theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V16, 0x370000, INS_OPTS_4S); // LSL 16 theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V17, 0x37000000, INS_OPTS_4S); // LSL 24 theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V18, 0x37FF, INS_OPTS_4S); // MSL 8 theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V19, 0x37FFFF, INS_OPTS_4S); // MSL 16 theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V20, 0xFF80, INS_OPTS_4H); // mvni theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V21, 0xFFC0, INS_OPTS_8H); // mvni theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V22, 0xFFFFFFE0, INS_OPTS_2S); // mvni theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V23, 0xFFFFF0FF, INS_OPTS_4S); // mvni LSL 8 theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V24, 0xFFF8FFFF, INS_OPTS_2S); // mvni LSL 16 theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V25, 0xFCFFFFFF, INS_OPTS_4S); // mvni LSL 24 theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V26, 0xFFFFFE00, INS_OPTS_2S); // mvni MSL 8 theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V27, 0xFFFC0000, INS_OPTS_4S); // mvni MSL 16 theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V28, 0x00FF00FF00FF00FF, INS_OPTS_1D); theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V29, 0x00FFFF0000FFFF00, INS_OPTS_2D); theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V30, 0xFF000000FF000000); theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V31, 0x0, INS_OPTS_2D); theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V0, 0x0022, INS_OPTS_4H); theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V1, 0x2200, INS_OPTS_4H); // LSL 8 theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V2, 0x0033, INS_OPTS_8H); theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V3, 0x3300, INS_OPTS_8H); // LSL 8 theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V4, 0x42, INS_OPTS_2S); theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V5, 0x4200, INS_OPTS_2S); // LSL 8 theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V6, 0x420000, INS_OPTS_2S); // LSL 16 theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V7, 0x42000000, INS_OPTS_2S); // LSL 24 theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V8, 0x42FF, INS_OPTS_2S); // MSL 8 theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V9, 0x42FFFF, INS_OPTS_2S); // MSL 16 theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V10, 0x5D, INS_OPTS_4S); theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V11, 0x5D00, INS_OPTS_4S); // LSL 8 theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V12, 0x5D0000, INS_OPTS_4S); // LSL 16 theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V13, 0x5D000000, INS_OPTS_4S); // LSL 24 theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V14, 0x5DFF, INS_OPTS_4S); // MSL 8 theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V15, 0x5DFFFF, INS_OPTS_4S); // MSL 16 #endif // ALL_ARM64_EMITTER_UNIT_TESTS // // R_I orr/bic vector immediate // #ifdef ALL_ARM64_EMITTER_UNIT_TESTS theEmitter->emitIns_R_I(INS_orr, EA_8BYTE, REG_V0, 0x0022, INS_OPTS_4H); theEmitter->emitIns_R_I(INS_orr, EA_8BYTE, REG_V1, 0x2200, INS_OPTS_4H); // LSL 8 theEmitter->emitIns_R_I(INS_orr, EA_16BYTE, REG_V2, 0x0033, INS_OPTS_8H); theEmitter->emitIns_R_I(INS_orr, EA_16BYTE, REG_V3, 0x3300, INS_OPTS_8H); // LSL 8 theEmitter->emitIns_R_I(INS_orr, EA_8BYTE, REG_V4, 0x42, INS_OPTS_2S); theEmitter->emitIns_R_I(INS_orr, EA_8BYTE, REG_V5, 0x4200, INS_OPTS_2S); // LSL 8 theEmitter->emitIns_R_I(INS_orr, EA_8BYTE, REG_V6, 0x420000, INS_OPTS_2S); // LSL 16 theEmitter->emitIns_R_I(INS_orr, EA_8BYTE, REG_V7, 0x42000000, INS_OPTS_2S); // LSL 24 theEmitter->emitIns_R_I(INS_orr, EA_16BYTE, REG_V10, 0x5D, INS_OPTS_4S); theEmitter->emitIns_R_I(INS_orr, EA_16BYTE, REG_V11, 0x5D00, INS_OPTS_4S); // LSL 8 theEmitter->emitIns_R_I(INS_orr, EA_16BYTE, REG_V12, 0x5D0000, INS_OPTS_4S); // LSL 16 theEmitter->emitIns_R_I(INS_orr, EA_16BYTE, REG_V13, 0x5D000000, INS_OPTS_4S); // LSL 24 theEmitter->emitIns_R_I(INS_bic, EA_8BYTE, REG_V0, 0x0022, INS_OPTS_4H); theEmitter->emitIns_R_I(INS_bic, EA_8BYTE, REG_V1, 0x2200, INS_OPTS_4H); // LSL 8 theEmitter->emitIns_R_I(INS_bic, EA_16BYTE, REG_V2, 0x0033, INS_OPTS_8H); theEmitter->emitIns_R_I(INS_bic, EA_16BYTE, REG_V3, 0x3300, INS_OPTS_8H); // LSL 8 theEmitter->emitIns_R_I(INS_bic, EA_8BYTE, REG_V4, 0x42, INS_OPTS_2S); theEmitter->emitIns_R_I(INS_bic, EA_8BYTE, REG_V5, 0x4200, INS_OPTS_2S); // LSL 8 theEmitter->emitIns_R_I(INS_bic, EA_8BYTE, REG_V6, 0x420000, INS_OPTS_2S); // LSL 16 theEmitter->emitIns_R_I(INS_bic, EA_8BYTE, REG_V7, 0x42000000, INS_OPTS_2S); // LSL 24 theEmitter->emitIns_R_I(INS_bic, EA_16BYTE, REG_V10, 0x5D, INS_OPTS_4S); theEmitter->emitIns_R_I(INS_bic, EA_16BYTE, REG_V11, 0x5D00, INS_OPTS_4S); // LSL 8 theEmitter->emitIns_R_I(INS_bic, EA_16BYTE, REG_V12, 0x5D0000, INS_OPTS_4S); // LSL 16 theEmitter->emitIns_R_I(INS_bic, EA_16BYTE, REG_V13, 0x5D000000, INS_OPTS_4S); // LSL 24 #endif // ALL_ARM64_EMITTER_UNIT_TESTS // // R_F cmp/fmov immediate // #ifdef ALL_ARM64_EMITTER_UNIT_TESTS // fmov imm8 (scalar) theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V14, 1.0); theEmitter->emitIns_R_F(INS_fmov, EA_4BYTE, REG_V15, -1.0); theEmitter->emitIns_R_F(INS_fmov, EA_4BYTE, REG_V0, 2.0); // encodes imm8 == 0 theEmitter->emitIns_R_F(INS_fmov, EA_4BYTE, REG_V16, 10.0); theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V17, -10.0); theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V18, 31); // Largest encodable value theEmitter->emitIns_R_F(INS_fmov, EA_4BYTE, REG_V19, -31); theEmitter->emitIns_R_F(INS_fmov, EA_4BYTE, REG_V20, 1.25); theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V21, -1.25); theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V22, 0.125); // Smallest encodable value theEmitter->emitIns_R_F(INS_fmov, EA_4BYTE, REG_V23, -0.125); // fmov imm8 (vector) theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V0, 2.0, INS_OPTS_2S); theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V24, 1.0, INS_OPTS_2S); theEmitter->emitIns_R_F(INS_fmov, EA_16BYTE, REG_V25, 1.0, INS_OPTS_4S); theEmitter->emitIns_R_F(INS_fmov, EA_16BYTE, REG_V26, 1.0, INS_OPTS_2D); theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V27, -10.0, INS_OPTS_2S); theEmitter->emitIns_R_F(INS_fmov, EA_16BYTE, REG_V28, -10.0, INS_OPTS_4S); theEmitter->emitIns_R_F(INS_fmov, EA_16BYTE, REG_V29, -10.0, INS_OPTS_2D); theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V30, 31.0, INS_OPTS_2S); theEmitter->emitIns_R_F(INS_fmov, EA_16BYTE, REG_V31, 31.0, INS_OPTS_4S); theEmitter->emitIns_R_F(INS_fmov, EA_16BYTE, REG_V0, 31.0, INS_OPTS_2D); theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V1, -0.125, INS_OPTS_2S); theEmitter->emitIns_R_F(INS_fmov, EA_16BYTE, REG_V2, -0.125, INS_OPTS_4S); theEmitter->emitIns_R_F(INS_fmov, EA_16BYTE, REG_V3, -0.125, INS_OPTS_2D); // fcmp with 0.0 theEmitter->emitIns_R_F(INS_fcmp, EA_8BYTE, REG_V12, 0.0); theEmitter->emitIns_R_F(INS_fcmp, EA_4BYTE, REG_V13, 0.0); theEmitter->emitIns_R_F(INS_fcmpe, EA_8BYTE, REG_V14, 0.0); theEmitter->emitIns_R_F(INS_fcmpe, EA_4BYTE, REG_V15, 0.0); #endif // ALL_ARM64_EMITTER_UNIT_TESTS // // R_R fmov/fcmp/fcvt // #ifdef ALL_ARM64_EMITTER_UNIT_TESTS // fmov to vector to vector theEmitter->emitIns_R_R(INS_fmov, EA_8BYTE, REG_V0, REG_V2); theEmitter->emitIns_R_R(INS_fmov, EA_4BYTE, REG_V1, REG_V3); // fmov to vector to general theEmitter->emitIns_R_R(INS_fmov, EA_8BYTE, REG_R0, REG_V4); theEmitter->emitIns_R_R(INS_fmov, EA_4BYTE, REG_R1, REG_V5); // using the optional conversion specifier theEmitter->emitIns_R_R(INS_fmov, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_D_TO_8BYTE); theEmitter->emitIns_R_R(INS_fmov, EA_4BYTE, REG_R3, REG_V7, INS_OPTS_S_TO_4BYTE); // fmov to general to vector theEmitter->emitIns_R_R(INS_fmov, EA_8BYTE, REG_V8, REG_R4); theEmitter->emitIns_R_R(INS_fmov, EA_4BYTE, REG_V9, REG_R5); // using the optional conversion specifier theEmitter->emitIns_R_R(INS_fmov, EA_8BYTE, REG_V10, REG_R6, INS_OPTS_8BYTE_TO_D); theEmitter->emitIns_R_R(INS_fmov, EA_4BYTE, REG_V11, REG_R7, INS_OPTS_4BYTE_TO_S); // fcmp/fcmpe theEmitter->emitIns_R_R(INS_fcmp, EA_8BYTE, REG_V8, REG_V16); theEmitter->emitIns_R_R(INS_fcmp, EA_4BYTE, REG_V9, REG_V17); theEmitter->emitIns_R_R(INS_fcmpe, EA_8BYTE, REG_V10, REG_V18); theEmitter->emitIns_R_R(INS_fcmpe, EA_4BYTE, REG_V11, REG_V19); // fcvt theEmitter->emitIns_R_R(INS_fcvt, EA_8BYTE, REG_V24, REG_V25, INS_OPTS_S_TO_D); // Single to Double theEmitter->emitIns_R_R(INS_fcvt, EA_4BYTE, REG_V26, REG_V27, INS_OPTS_D_TO_S); // Double to Single theEmitter->emitIns_R_R(INS_fcvt, EA_4BYTE, REG_V1, REG_V2, INS_OPTS_H_TO_S); theEmitter->emitIns_R_R(INS_fcvt, EA_8BYTE, REG_V3, REG_V4, INS_OPTS_H_TO_D); theEmitter->emitIns_R_R(INS_fcvt, EA_2BYTE, REG_V5, REG_V6, INS_OPTS_S_TO_H); theEmitter->emitIns_R_R(INS_fcvt, EA_2BYTE, REG_V7, REG_V8, INS_OPTS_D_TO_H); #endif // ALL_ARM64_EMITTER_UNIT_TESTS // // R_R floating point conversions // #ifdef ALL_ARM64_EMITTER_UNIT_TESTS // fcvtas scalar theEmitter->emitIns_R_R(INS_fcvtas, EA_4BYTE, REG_V0, REG_V1); theEmitter->emitIns_R_R(INS_fcvtas, EA_8BYTE, REG_V2, REG_V3); // fcvtas scalar to general theEmitter->emitIns_R_R(INS_fcvtas, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE); theEmitter->emitIns_R_R(INS_fcvtas, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE); theEmitter->emitIns_R_R(INS_fcvtas, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE); theEmitter->emitIns_R_R(INS_fcvtas, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE); // fcvtas vector theEmitter->emitIns_R_R(INS_fcvtas, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S); theEmitter->emitIns_R_R(INS_fcvtas, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S); theEmitter->emitIns_R_R(INS_fcvtas, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D); // fcvtau scalar theEmitter->emitIns_R_R(INS_fcvtau, EA_4BYTE, REG_V0, REG_V1); theEmitter->emitIns_R_R(INS_fcvtau, EA_8BYTE, REG_V2, REG_V3); // fcvtau scalar to general theEmitter->emitIns_R_R(INS_fcvtau, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE); theEmitter->emitIns_R_R(INS_fcvtau, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE); theEmitter->emitIns_R_R(INS_fcvtau, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE); theEmitter->emitIns_R_R(INS_fcvtau, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE); // fcvtau vector theEmitter->emitIns_R_R(INS_fcvtau, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S); theEmitter->emitIns_R_R(INS_fcvtau, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S); theEmitter->emitIns_R_R(INS_fcvtau, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D); //////////////////////////////////////////////////////////////////////////////// // fcvtms scalar theEmitter->emitIns_R_R(INS_fcvtms, EA_4BYTE, REG_V0, REG_V1); theEmitter->emitIns_R_R(INS_fcvtms, EA_8BYTE, REG_V2, REG_V3); // fcvtms scalar to general theEmitter->emitIns_R_R(INS_fcvtms, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE); theEmitter->emitIns_R_R(INS_fcvtms, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE); theEmitter->emitIns_R_R(INS_fcvtms, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE); theEmitter->emitIns_R_R(INS_fcvtms, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE); // fcvtms vector theEmitter->emitIns_R_R(INS_fcvtms, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S); theEmitter->emitIns_R_R(INS_fcvtms, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S); theEmitter->emitIns_R_R(INS_fcvtms, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D); // fcvtmu scalar theEmitter->emitIns_R_R(INS_fcvtmu, EA_4BYTE, REG_V0, REG_V1); theEmitter->emitIns_R_R(INS_fcvtmu, EA_8BYTE, REG_V2, REG_V3); // fcvtmu scalar to general theEmitter->emitIns_R_R(INS_fcvtmu, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE); theEmitter->emitIns_R_R(INS_fcvtmu, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE); theEmitter->emitIns_R_R(INS_fcvtmu, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE); theEmitter->emitIns_R_R(INS_fcvtmu, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE); // fcvtmu vector theEmitter->emitIns_R_R(INS_fcvtmu, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S); theEmitter->emitIns_R_R(INS_fcvtmu, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S); theEmitter->emitIns_R_R(INS_fcvtmu, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D); //////////////////////////////////////////////////////////////////////////////// // fcvtns scalar theEmitter->emitIns_R_R(INS_fcvtns, EA_4BYTE, REG_V0, REG_V1); theEmitter->emitIns_R_R(INS_fcvtns, EA_8BYTE, REG_V2, REG_V3); // fcvtns scalar to general theEmitter->emitIns_R_R(INS_fcvtns, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE); theEmitter->emitIns_R_R(INS_fcvtns, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE); theEmitter->emitIns_R_R(INS_fcvtns, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE); theEmitter->emitIns_R_R(INS_fcvtns, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE); // fcvtns vector theEmitter->emitIns_R_R(INS_fcvtns, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S); theEmitter->emitIns_R_R(INS_fcvtns, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S); theEmitter->emitIns_R_R(INS_fcvtns, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D); // fcvtnu scalar theEmitter->emitIns_R_R(INS_fcvtnu, EA_4BYTE, REG_V0, REG_V1); theEmitter->emitIns_R_R(INS_fcvtnu, EA_8BYTE, REG_V2, REG_V3); // fcvtnu scalar to general theEmitter->emitIns_R_R(INS_fcvtnu, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE); theEmitter->emitIns_R_R(INS_fcvtnu, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE); theEmitter->emitIns_R_R(INS_fcvtnu, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE); theEmitter->emitIns_R_R(INS_fcvtnu, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE); // fcvtnu vector theEmitter->emitIns_R_R(INS_fcvtnu, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S); theEmitter->emitIns_R_R(INS_fcvtnu, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S); theEmitter->emitIns_R_R(INS_fcvtnu, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D); //////////////////////////////////////////////////////////////////////////////// // fcvtps scalar theEmitter->emitIns_R_R(INS_fcvtps, EA_4BYTE, REG_V0, REG_V1); theEmitter->emitIns_R_R(INS_fcvtps, EA_8BYTE, REG_V2, REG_V3); // fcvtps scalar to general theEmitter->emitIns_R_R(INS_fcvtps, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE); theEmitter->emitIns_R_R(INS_fcvtps, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE); theEmitter->emitIns_R_R(INS_fcvtps, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE); theEmitter->emitIns_R_R(INS_fcvtps, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE); // fcvtps vector theEmitter->emitIns_R_R(INS_fcvtps, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S); theEmitter->emitIns_R_R(INS_fcvtps, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S); theEmitter->emitIns_R_R(INS_fcvtps, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D); // fcvtpu scalar theEmitter->emitIns_R_R(INS_fcvtpu, EA_4BYTE, REG_V0, REG_V1); theEmitter->emitIns_R_R(INS_fcvtpu, EA_8BYTE, REG_V2, REG_V3); // fcvtpu scalar to general theEmitter->emitIns_R_R(INS_fcvtpu, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE); theEmitter->emitIns_R_R(INS_fcvtpu, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE); theEmitter->emitIns_R_R(INS_fcvtpu, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE); theEmitter->emitIns_R_R(INS_fcvtpu, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE); // fcvtpu vector theEmitter->emitIns_R_R(INS_fcvtpu, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S); theEmitter->emitIns_R_R(INS_fcvtpu, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S); theEmitter->emitIns_R_R(INS_fcvtpu, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D); //////////////////////////////////////////////////////////////////////////////// // fcvtzs scalar theEmitter->emitIns_R_R(INS_fcvtzs, EA_4BYTE, REG_V0, REG_V1); theEmitter->emitIns_R_R(INS_fcvtzs, EA_8BYTE, REG_V2, REG_V3); // fcvtzs scalar to general theEmitter->emitIns_R_R(INS_fcvtzs, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE); theEmitter->emitIns_R_R(INS_fcvtzs, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE); theEmitter->emitIns_R_R(INS_fcvtzs, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE); theEmitter->emitIns_R_R(INS_fcvtzs, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE); // fcvtzs vector theEmitter->emitIns_R_R(INS_fcvtzs, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S); theEmitter->emitIns_R_R(INS_fcvtzs, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S); theEmitter->emitIns_R_R(INS_fcvtzs, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D); // fcvtzu scalar theEmitter->emitIns_R_R(INS_fcvtzu, EA_4BYTE, REG_V0, REG_V1); theEmitter->emitIns_R_R(INS_fcvtzu, EA_8BYTE, REG_V2, REG_V3); // fcvtzu scalar to general theEmitter->emitIns_R_R(INS_fcvtzu, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE); theEmitter->emitIns_R_R(INS_fcvtzu, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE); theEmitter->emitIns_R_R(INS_fcvtzu, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE); theEmitter->emitIns_R_R(INS_fcvtzu, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE); // fcvtzu vector theEmitter->emitIns_R_R(INS_fcvtzu, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S); theEmitter->emitIns_R_R(INS_fcvtzu, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S); theEmitter->emitIns_R_R(INS_fcvtzu, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D); //////////////////////////////////////////////////////////////////////////////// // scvtf scalar theEmitter->emitIns_R_R(INS_scvtf, EA_4BYTE, REG_V0, REG_V1); theEmitter->emitIns_R_R(INS_scvtf, EA_8BYTE, REG_V2, REG_V3); // scvtf scalar from general theEmitter->emitIns_R_R(INS_scvtf, EA_4BYTE, REG_V4, REG_R0, INS_OPTS_4BYTE_TO_S); theEmitter->emitIns_R_R(INS_scvtf, EA_4BYTE, REG_V5, REG_R1, INS_OPTS_8BYTE_TO_S); theEmitter->emitIns_R_R(INS_scvtf, EA_8BYTE, REG_V6, REG_R2, INS_OPTS_4BYTE_TO_D); theEmitter->emitIns_R_R(INS_scvtf, EA_8BYTE, REG_V7, REG_R3, INS_OPTS_8BYTE_TO_D); // scvtf vector theEmitter->emitIns_R_R(INS_scvtf, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S); theEmitter->emitIns_R_R(INS_scvtf, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S); theEmitter->emitIns_R_R(INS_scvtf, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D); // ucvtf scalar theEmitter->emitIns_R_R(INS_ucvtf, EA_4BYTE, REG_V0, REG_V1); theEmitter->emitIns_R_R(INS_ucvtf, EA_8BYTE, REG_V2, REG_V3); // ucvtf scalar from general theEmitter->emitIns_R_R(INS_ucvtf, EA_4BYTE, REG_V4, REG_R0, INS_OPTS_4BYTE_TO_S); theEmitter->emitIns_R_R(INS_ucvtf, EA_4BYTE, REG_V5, REG_R1, INS_OPTS_8BYTE_TO_S); theEmitter->emitIns_R_R(INS_ucvtf, EA_8BYTE, REG_V6, REG_R2, INS_OPTS_4BYTE_TO_D); theEmitter->emitIns_R_R(INS_ucvtf, EA_8BYTE, REG_V7, REG_R3, INS_OPTS_8BYTE_TO_D); // ucvtf vector theEmitter->emitIns_R_R(INS_ucvtf, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S); theEmitter->emitIns_R_R(INS_ucvtf, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S); theEmitter->emitIns_R_R(INS_ucvtf, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D); #endif // ALL_ARM64_EMITTER_UNIT_TESTS // // R_R floating point operations, one dest, one source // #ifdef ALL_ARM64_EMITTER_UNIT_TESTS // fabs scalar theEmitter->emitIns_R_R(INS_fabs, EA_4BYTE, REG_V0, REG_V1); theEmitter->emitIns_R_R(INS_fabs, EA_8BYTE, REG_V2, REG_V3); // fabs vector theEmitter->emitIns_R_R(INS_fabs, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S); theEmitter->emitIns_R_R(INS_fabs, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S); theEmitter->emitIns_R_R(INS_fabs, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D); // fneg scalar theEmitter->emitIns_R_R(INS_fneg, EA_4BYTE, REG_V0, REG_V1); theEmitter->emitIns_R_R(INS_fneg, EA_8BYTE, REG_V2, REG_V3); // fneg vector theEmitter->emitIns_R_R(INS_fneg, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S); theEmitter->emitIns_R_R(INS_fneg, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S); theEmitter->emitIns_R_R(INS_fneg, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D); // fsqrt scalar theEmitter->emitIns_R_R(INS_fsqrt, EA_4BYTE, REG_V0, REG_V1); theEmitter->emitIns_R_R(INS_fsqrt, EA_8BYTE, REG_V2, REG_V3); // fsqrt vector theEmitter->emitIns_R_R(INS_fsqrt, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S); theEmitter->emitIns_R_R(INS_fsqrt, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S); theEmitter->emitIns_R_R(INS_fsqrt, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D); genDefineTempLabel(genCreateTempLabel()); // abs scalar theEmitter->emitIns_R_R(INS_abs, EA_8BYTE, REG_V2, REG_V3); // abs vector theEmitter->emitIns_R_R(INS_abs, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B); theEmitter->emitIns_R_R(INS_abs, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B); theEmitter->emitIns_R_R(INS_abs, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H); theEmitter->emitIns_R_R(INS_abs, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H); theEmitter->emitIns_R_R(INS_abs, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S); theEmitter->emitIns_R_R(INS_abs, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S); theEmitter->emitIns_R_R(INS_abs, EA_16BYTE, REG_V16, REG_V17, INS_OPTS_2D); // neg scalar theEmitter->emitIns_R_R(INS_neg, EA_8BYTE, REG_V2, REG_V3); // neg vector theEmitter->emitIns_R_R(INS_neg, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B); theEmitter->emitIns_R_R(INS_neg, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B); theEmitter->emitIns_R_R(INS_neg, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H); theEmitter->emitIns_R_R(INS_neg, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H); theEmitter->emitIns_R_R(INS_neg, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S); theEmitter->emitIns_R_R(INS_neg, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S); theEmitter->emitIns_R_R(INS_neg, EA_16BYTE, REG_V16, REG_V17, INS_OPTS_2D); // mvn vector theEmitter->emitIns_R_R(INS_mvn, EA_8BYTE, REG_V4, REG_V5); theEmitter->emitIns_R_R(INS_mvn, EA_8BYTE, REG_V6, REG_V7, INS_OPTS_8B); theEmitter->emitIns_R_R(INS_mvn, EA_16BYTE, REG_V8, REG_V9); theEmitter->emitIns_R_R(INS_mvn, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_16B); // cnt vector theEmitter->emitIns_R_R(INS_cnt, EA_8BYTE, REG_V22, REG_V23, INS_OPTS_8B); theEmitter->emitIns_R_R(INS_cnt, EA_16BYTE, REG_V24, REG_V25, INS_OPTS_16B); // not vector (the same encoding as mvn) theEmitter->emitIns_R_R(INS_not, EA_8BYTE, REG_V12, REG_V13); theEmitter->emitIns_R_R(INS_not, EA_8BYTE, REG_V14, REG_V15, INS_OPTS_8B); theEmitter->emitIns_R_R(INS_not, EA_16BYTE, REG_V16, REG_V17); theEmitter->emitIns_R_R(INS_not, EA_16BYTE, REG_V18, REG_V19, INS_OPTS_16B); // cls vector theEmitter->emitIns_R_R(INS_cls, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B); theEmitter->emitIns_R_R(INS_cls, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B); theEmitter->emitIns_R_R(INS_cls, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H); theEmitter->emitIns_R_R(INS_cls, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H); theEmitter->emitIns_R_R(INS_cls, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S); theEmitter->emitIns_R_R(INS_cls, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S); // clz vector theEmitter->emitIns_R_R(INS_clz, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B); theEmitter->emitIns_R_R(INS_clz, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B); theEmitter->emitIns_R_R(INS_clz, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H); theEmitter->emitIns_R_R(INS_clz, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H); theEmitter->emitIns_R_R(INS_clz, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S); theEmitter->emitIns_R_R(INS_clz, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S); // rbit vector theEmitter->emitIns_R_R(INS_rbit, EA_8BYTE, REG_V0, REG_V1, INS_OPTS_8B); theEmitter->emitIns_R_R(INS_rbit, EA_16BYTE, REG_V2, REG_V3, INS_OPTS_16B); // rev16 vector theEmitter->emitIns_R_R(INS_rev16, EA_8BYTE, REG_V0, REG_V1, INS_OPTS_8B); theEmitter->emitIns_R_R(INS_rev16, EA_16BYTE, REG_V2, REG_V3, INS_OPTS_16B); // rev32 vector theEmitter->emitIns_R_R(INS_rev32, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B); theEmitter->emitIns_R_R(INS_rev32, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B); theEmitter->emitIns_R_R(INS_rev32, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H); theEmitter->emitIns_R_R(INS_rev32, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H); // rev64 vector theEmitter->emitIns_R_R(INS_rev64, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B); theEmitter->emitIns_R_R(INS_rev64, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B); theEmitter->emitIns_R_R(INS_rev64, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H); theEmitter->emitIns_R_R(INS_rev64, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H); theEmitter->emitIns_R_R(INS_rev64, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S); theEmitter->emitIns_R_R(INS_rev64, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S); #endif // // R_R floating point round to int, one dest, one source // #ifdef ALL_ARM64_EMITTER_UNIT_TESTS // frinta scalar theEmitter->emitIns_R_R(INS_frinta, EA_4BYTE, REG_V0, REG_V1); theEmitter->emitIns_R_R(INS_frinta, EA_8BYTE, REG_V2, REG_V3); // frinta vector theEmitter->emitIns_R_R(INS_frinta, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S); theEmitter->emitIns_R_R(INS_frinta, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S); theEmitter->emitIns_R_R(INS_frinta, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D); // frinti scalar theEmitter->emitIns_R_R(INS_frinti, EA_4BYTE, REG_V0, REG_V1); theEmitter->emitIns_R_R(INS_frinti, EA_8BYTE, REG_V2, REG_V3); // frinti vector theEmitter->emitIns_R_R(INS_frinti, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S); theEmitter->emitIns_R_R(INS_frinti, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S); theEmitter->emitIns_R_R(INS_frinti, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D); // frintm scalar theEmitter->emitIns_R_R(INS_frintm, EA_4BYTE, REG_V0, REG_V1); theEmitter->emitIns_R_R(INS_frintm, EA_8BYTE, REG_V2, REG_V3); // frintm vector theEmitter->emitIns_R_R(INS_frintm, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S); theEmitter->emitIns_R_R(INS_frintm, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S); theEmitter->emitIns_R_R(INS_frintm, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D); // frintn scalar theEmitter->emitIns_R_R(INS_frintn, EA_4BYTE, REG_V0, REG_V1); theEmitter->emitIns_R_R(INS_frintn, EA_8BYTE, REG_V2, REG_V3); // frintn vector theEmitter->emitIns_R_R(INS_frintn, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S); theEmitter->emitIns_R_R(INS_frintn, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S); theEmitter->emitIns_R_R(INS_frintn, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D); // frintp scalar theEmitter->emitIns_R_R(INS_frintp, EA_4BYTE, REG_V0, REG_V1); theEmitter->emitIns_R_R(INS_frintp, EA_8BYTE, REG_V2, REG_V3); // frintp vector theEmitter->emitIns_R_R(INS_frintp, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S); theEmitter->emitIns_R_R(INS_frintp, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S); theEmitter->emitIns_R_R(INS_frintp, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D); // frintx scalar theEmitter->emitIns_R_R(INS_frintx, EA_4BYTE, REG_V0, REG_V1); theEmitter->emitIns_R_R(INS_frintx, EA_8BYTE, REG_V2, REG_V3); // frintx vector theEmitter->emitIns_R_R(INS_frintx, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S); theEmitter->emitIns_R_R(INS_frintx, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S); theEmitter->emitIns_R_R(INS_frintx, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D); // frintz scalar theEmitter->emitIns_R_R(INS_frintz, EA_4BYTE, REG_V0, REG_V1); theEmitter->emitIns_R_R(INS_frintz, EA_8BYTE, REG_V2, REG_V3); // frintz vector theEmitter->emitIns_R_R(INS_frintz, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S); theEmitter->emitIns_R_R(INS_frintz, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S); theEmitter->emitIns_R_R(INS_frintz, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D); #endif // ALL_ARM64_EMITTER_UNIT_TESTS // // R_R_R floating point operations, one dest, two source // #ifdef ALL_ARM64_EMITTER_UNIT_TESTS genDefineTempLabel(genCreateTempLabel()); theEmitter->emitIns_R_R_R(INS_fadd, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE theEmitter->emitIns_R_R_R(INS_fadd, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE theEmitter->emitIns_R_R_R(INS_fadd, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S); theEmitter->emitIns_R_R_R(INS_fadd, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S); theEmitter->emitIns_R_R_R(INS_fadd, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D); theEmitter->emitIns_R_R_R(INS_fsub, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE theEmitter->emitIns_R_R_R(INS_fsub, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE theEmitter->emitIns_R_R_R(INS_fsub, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S); theEmitter->emitIns_R_R_R(INS_fsub, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S); theEmitter->emitIns_R_R_R(INS_fsub, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D); theEmitter->emitIns_R_R_R(INS_fdiv, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE theEmitter->emitIns_R_R_R(INS_fdiv, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE theEmitter->emitIns_R_R_R(INS_fdiv, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S); theEmitter->emitIns_R_R_R(INS_fdiv, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S); theEmitter->emitIns_R_R_R(INS_fdiv, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D); theEmitter->emitIns_R_R_R(INS_fmax, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE theEmitter->emitIns_R_R_R(INS_fmax, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE theEmitter->emitIns_R_R_R(INS_fmax, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S); theEmitter->emitIns_R_R_R(INS_fmax, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S); theEmitter->emitIns_R_R_R(INS_fmax, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D); theEmitter->emitIns_R_R_R(INS_fmin, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE theEmitter->emitIns_R_R_R(INS_fmin, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE theEmitter->emitIns_R_R_R(INS_fmin, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S); theEmitter->emitIns_R_R_R(INS_fmin, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S); theEmitter->emitIns_R_R_R(INS_fmin, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D); // fabd theEmitter->emitIns_R_R_R(INS_fabd, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE theEmitter->emitIns_R_R_R(INS_fabd, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE theEmitter->emitIns_R_R_R(INS_fabd, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S); theEmitter->emitIns_R_R_R(INS_fabd, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S); theEmitter->emitIns_R_R_R(INS_fabd, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D); genDefineTempLabel(genCreateTempLabel()); theEmitter->emitIns_R_R_R(INS_fmul, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE theEmitter->emitIns_R_R_R(INS_fmul, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE theEmitter->emitIns_R_R_R(INS_fmul, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S); theEmitter->emitIns_R_R_R(INS_fmul, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S); theEmitter->emitIns_R_R_R(INS_fmul, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D); theEmitter->emitIns_R_R_R_I(INS_fmul, EA_4BYTE, REG_V15, REG_V16, REG_V17, 3); // scalar by elem 4BYTE theEmitter->emitIns_R_R_R_I(INS_fmul, EA_8BYTE, REG_V18, REG_V19, REG_V20, 1); // scalar by elem 8BYTE theEmitter->emitIns_R_R_R_I(INS_fmul, EA_8BYTE, REG_V21, REG_V22, REG_V23, 0, INS_OPTS_2S); theEmitter->emitIns_R_R_R_I(INS_fmul, EA_16BYTE, REG_V24, REG_V25, REG_V26, 2, INS_OPTS_4S); theEmitter->emitIns_R_R_R_I(INS_fmul, EA_16BYTE, REG_V27, REG_V28, REG_V29, 0, INS_OPTS_2D); theEmitter->emitIns_R_R_R(INS_fmulx, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE theEmitter->emitIns_R_R_R(INS_fmulx, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE theEmitter->emitIns_R_R_R(INS_fmulx, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S); theEmitter->emitIns_R_R_R(INS_fmulx, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S); theEmitter->emitIns_R_R_R(INS_fmulx, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D); theEmitter->emitIns_R_R_R_I(INS_fmulx, EA_4BYTE, REG_V15, REG_V16, REG_V17, 3); // scalar by elem 4BYTE theEmitter->emitIns_R_R_R_I(INS_fmulx, EA_8BYTE, REG_V18, REG_V19, REG_V20, 1); // scalar by elem 8BYTE theEmitter->emitIns_R_R_R_I(INS_fmulx, EA_8BYTE, REG_V21, REG_V22, REG_V23, 0, INS_OPTS_2S); theEmitter->emitIns_R_R_R_I(INS_fmulx, EA_16BYTE, REG_V24, REG_V25, REG_V26, 2, INS_OPTS_4S); theEmitter->emitIns_R_R_R_I(INS_fmulx, EA_16BYTE, REG_V27, REG_V28, REG_V29, 0, INS_OPTS_2D); theEmitter->emitIns_R_R_R(INS_fnmul, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE theEmitter->emitIns_R_R_R(INS_fnmul, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE #endif // ALL_ARM64_EMITTER_UNIT_TESTS // // R_R_I vector operations, one dest, one source reg, one immed // #ifdef ALL_ARM64_EMITTER_UNIT_TESTS genDefineTempLabel(genCreateTempLabel()); // 'sshr' scalar theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V0, REG_V1, 1); theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V2, REG_V3, 14); theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V4, REG_V5, 27); theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V6, REG_V7, 40); theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V8, REG_V9, 63); // 'sshr' vector theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B); theEmitter->emitIns_R_R_I(INS_sshr, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B); theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H); theEmitter->emitIns_R_R_I(INS_sshr, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H); theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S); theEmitter->emitIns_R_R_I(INS_sshr, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S); theEmitter->emitIns_R_R_I(INS_sshr, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D); theEmitter->emitIns_R_R_I(INS_sshr, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D); // 'ssra' scalar theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V0, REG_V1, 1); theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V2, REG_V3, 14); theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V4, REG_V5, 27); theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V6, REG_V7, 40); theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V8, REG_V9, 63); // 'ssra' vector theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B); theEmitter->emitIns_R_R_I(INS_ssra, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B); theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H); theEmitter->emitIns_R_R_I(INS_ssra, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H); theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S); theEmitter->emitIns_R_R_I(INS_ssra, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S); theEmitter->emitIns_R_R_I(INS_ssra, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D); theEmitter->emitIns_R_R_I(INS_ssra, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D); // 'srshr' scalar theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V0, REG_V1, 1); theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V2, REG_V3, 14); theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V4, REG_V5, 27); theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V6, REG_V7, 40); theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V8, REG_V9, 63); // 'srshr' vector theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B); theEmitter->emitIns_R_R_I(INS_srshr, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B); theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H); theEmitter->emitIns_R_R_I(INS_srshr, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H); theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S); theEmitter->emitIns_R_R_I(INS_srshr, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S); theEmitter->emitIns_R_R_I(INS_srshr, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D); theEmitter->emitIns_R_R_I(INS_srshr, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D); // 'srsra' scalar theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V0, REG_V1, 1); theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V2, REG_V3, 14); theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V4, REG_V5, 27); theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V6, REG_V7, 40); theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V8, REG_V9, 63); // 'srsra' vector theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B); theEmitter->emitIns_R_R_I(INS_srsra, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B); theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H); theEmitter->emitIns_R_R_I(INS_srsra, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H); theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S); theEmitter->emitIns_R_R_I(INS_srsra, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S); theEmitter->emitIns_R_R_I(INS_srsra, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D); theEmitter->emitIns_R_R_I(INS_srsra, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D); // 'shl' scalar theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V0, REG_V1, 1); theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V2, REG_V3, 14); theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V4, REG_V5, 27); theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V6, REG_V7, 40); theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V8, REG_V9, 63); // 'shl' vector theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B); theEmitter->emitIns_R_R_I(INS_shl, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B); theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H); theEmitter->emitIns_R_R_I(INS_shl, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H); theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S); theEmitter->emitIns_R_R_I(INS_shl, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S); theEmitter->emitIns_R_R_I(INS_shl, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D); theEmitter->emitIns_R_R_I(INS_shl, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D); // 'ushr' scalar theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V0, REG_V1, 1); theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V2, REG_V3, 14); theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V4, REG_V5, 27); theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V6, REG_V7, 40); theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V8, REG_V9, 63); // 'ushr' vector theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B); theEmitter->emitIns_R_R_I(INS_ushr, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B); theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H); theEmitter->emitIns_R_R_I(INS_ushr, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H); theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S); theEmitter->emitIns_R_R_I(INS_ushr, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S); theEmitter->emitIns_R_R_I(INS_ushr, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D); theEmitter->emitIns_R_R_I(INS_ushr, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D); // 'usra' scalar theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V0, REG_V1, 1); theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V2, REG_V3, 14); theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V4, REG_V5, 27); theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V6, REG_V7, 40); theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V8, REG_V9, 63); // 'usra' vector theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B); theEmitter->emitIns_R_R_I(INS_usra, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B); theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H); theEmitter->emitIns_R_R_I(INS_usra, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H); theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S); theEmitter->emitIns_R_R_I(INS_usra, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S); theEmitter->emitIns_R_R_I(INS_usra, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D); theEmitter->emitIns_R_R_I(INS_usra, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D); // 'urshr' scalar theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V0, REG_V1, 1); theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V2, REG_V3, 14); theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V4, REG_V5, 27); theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V6, REG_V7, 40); theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V8, REG_V9, 63); // 'urshr' vector theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B); theEmitter->emitIns_R_R_I(INS_urshr, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B); theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H); theEmitter->emitIns_R_R_I(INS_urshr, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H); theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S); theEmitter->emitIns_R_R_I(INS_urshr, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S); theEmitter->emitIns_R_R_I(INS_urshr, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D); theEmitter->emitIns_R_R_I(INS_urshr, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D); // 'ursra' scalar theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V0, REG_V1, 1); theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V2, REG_V3, 14); theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V4, REG_V5, 27); theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V6, REG_V7, 40); theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V8, REG_V9, 63); // 'srsra' vector theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B); theEmitter->emitIns_R_R_I(INS_ursra, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B); theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H); theEmitter->emitIns_R_R_I(INS_ursra, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H); theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S); theEmitter->emitIns_R_R_I(INS_ursra, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S); theEmitter->emitIns_R_R_I(INS_ursra, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D); theEmitter->emitIns_R_R_I(INS_ursra, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D); // 'sri' scalar theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V0, REG_V1, 1); theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V2, REG_V3, 14); theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V4, REG_V5, 27); theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V6, REG_V7, 40); theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V8, REG_V9, 63); // 'sri' vector theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B); theEmitter->emitIns_R_R_I(INS_sri, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B); theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H); theEmitter->emitIns_R_R_I(INS_sri, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H); theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S); theEmitter->emitIns_R_R_I(INS_sri, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S); theEmitter->emitIns_R_R_I(INS_sri, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D); theEmitter->emitIns_R_R_I(INS_sri, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D); // 'sli' scalar theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V0, REG_V1, 1); theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V2, REG_V3, 14); theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V4, REG_V5, 27); theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V6, REG_V7, 40); theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V8, REG_V9, 63); // 'sli' vector theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B); theEmitter->emitIns_R_R_I(INS_sli, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B); theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H); theEmitter->emitIns_R_R_I(INS_sli, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H); theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S); theEmitter->emitIns_R_R_I(INS_sli, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S); theEmitter->emitIns_R_R_I(INS_sli, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D); theEmitter->emitIns_R_R_I(INS_sli, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D); // 'sshll' vector theEmitter->emitIns_R_R_I(INS_sshll, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B); theEmitter->emitIns_R_R_I(INS_sshll2, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B); theEmitter->emitIns_R_R_I(INS_sshll, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H); theEmitter->emitIns_R_R_I(INS_sshll2, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H); theEmitter->emitIns_R_R_I(INS_sshll, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S); theEmitter->emitIns_R_R_I(INS_sshll2, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S); // 'ushll' vector theEmitter->emitIns_R_R_I(INS_ushll, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B); theEmitter->emitIns_R_R_I(INS_ushll2, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B); theEmitter->emitIns_R_R_I(INS_ushll, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H); theEmitter->emitIns_R_R_I(INS_ushll2, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H); theEmitter->emitIns_R_R_I(INS_ushll, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S); theEmitter->emitIns_R_R_I(INS_ushll2, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S); // 'shrn' vector theEmitter->emitIns_R_R_I(INS_shrn, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B); theEmitter->emitIns_R_R_I(INS_shrn2, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B); theEmitter->emitIns_R_R_I(INS_shrn, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H); theEmitter->emitIns_R_R_I(INS_shrn2, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H); theEmitter->emitIns_R_R_I(INS_shrn, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S); theEmitter->emitIns_R_R_I(INS_shrn2, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S); // 'rshrn' vector theEmitter->emitIns_R_R_I(INS_rshrn, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B); theEmitter->emitIns_R_R_I(INS_rshrn2, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B); theEmitter->emitIns_R_R_I(INS_rshrn, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H); theEmitter->emitIns_R_R_I(INS_rshrn2, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H); theEmitter->emitIns_R_R_I(INS_rshrn, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S); theEmitter->emitIns_R_R_I(INS_rshrn2, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S); // 'sxtl' vector theEmitter->emitIns_R_R(INS_sxtl, EA_8BYTE, REG_V0, REG_V1, INS_OPTS_8B); theEmitter->emitIns_R_R(INS_sxtl2, EA_16BYTE, REG_V2, REG_V3, INS_OPTS_16B); theEmitter->emitIns_R_R(INS_sxtl, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_4H); theEmitter->emitIns_R_R(INS_sxtl2, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_8H); theEmitter->emitIns_R_R(INS_sxtl, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S); theEmitter->emitIns_R_R(INS_sxtl2, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S); // 'uxtl' vector theEmitter->emitIns_R_R(INS_uxtl, EA_8BYTE, REG_V0, REG_V1, INS_OPTS_8B); theEmitter->emitIns_R_R(INS_uxtl2, EA_16BYTE, REG_V2, REG_V3, INS_OPTS_16B); theEmitter->emitIns_R_R(INS_uxtl, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_4H); theEmitter->emitIns_R_R(INS_uxtl2, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_8H); theEmitter->emitIns_R_R(INS_uxtl, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S); theEmitter->emitIns_R_R(INS_uxtl2, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S); #endif // ALL_ARM64_EMITTER_UNIT_TESTS // // R_R_R vector operations, one dest, two source // #ifdef ALL_ARM64_EMITTER_UNIT_TESTS genDefineTempLabel(genCreateTempLabel()); // Specifying an Arrangement is optional // theEmitter->emitIns_R_R_R(INS_and, EA_8BYTE, REG_V6, REG_V7, REG_V8); theEmitter->emitIns_R_R_R(INS_bic, EA_8BYTE, REG_V9, REG_V10, REG_V11); theEmitter->emitIns_R_R_R(INS_eor, EA_8BYTE, REG_V12, REG_V13, REG_V14); theEmitter->emitIns_R_R_R(INS_orr, EA_8BYTE, REG_V15, REG_V16, REG_V17); theEmitter->emitIns_R_R_R(INS_orn, EA_8BYTE, REG_V18, REG_V19, REG_V20); theEmitter->emitIns_R_R_R(INS_and, EA_16BYTE, REG_V21, REG_V22, REG_V23); theEmitter->emitIns_R_R_R(INS_bic, EA_16BYTE, REG_V24, REG_V25, REG_V26); theEmitter->emitIns_R_R_R(INS_eor, EA_16BYTE, REG_V27, REG_V28, REG_V29); theEmitter->emitIns_R_R_R(INS_orr, EA_16BYTE, REG_V30, REG_V31, REG_V0); theEmitter->emitIns_R_R_R(INS_orn, EA_16BYTE, REG_V1, REG_V2, REG_V3); theEmitter->emitIns_R_R_R(INS_bsl, EA_8BYTE, REG_V4, REG_V5, REG_V6); theEmitter->emitIns_R_R_R(INS_bit, EA_8BYTE, REG_V7, REG_V8, REG_V9); theEmitter->emitIns_R_R_R(INS_bif, EA_8BYTE, REG_V10, REG_V11, REG_V12); theEmitter->emitIns_R_R_R(INS_bsl, EA_16BYTE, REG_V13, REG_V14, REG_V15); theEmitter->emitIns_R_R_R(INS_bit, EA_16BYTE, REG_V16, REG_V17, REG_V18); theEmitter->emitIns_R_R_R(INS_bif, EA_16BYTE, REG_V19, REG_V20, REG_V21); // Default Arrangement as per the ARM64 manual // theEmitter->emitIns_R_R_R(INS_and, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_8B); theEmitter->emitIns_R_R_R(INS_bic, EA_8BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8B); theEmitter->emitIns_R_R_R(INS_eor, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_8B); theEmitter->emitIns_R_R_R(INS_orr, EA_8BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_8B); theEmitter->emitIns_R_R_R(INS_orn, EA_8BYTE, REG_V18, REG_V19, REG_V20, INS_OPTS_8B); theEmitter->emitIns_R_R_R(INS_and, EA_16BYTE, REG_V21, REG_V22, REG_V23, INS_OPTS_16B); theEmitter->emitIns_R_R_R(INS_bic, EA_16BYTE, REG_V24, REG_V25, REG_V26, INS_OPTS_16B); theEmitter->emitIns_R_R_R(INS_eor, EA_16BYTE, REG_V27, REG_V28, REG_V29, INS_OPTS_16B); theEmitter->emitIns_R_R_R(INS_orr, EA_16BYTE, REG_V30, REG_V31, REG_V0, INS_OPTS_16B); theEmitter->emitIns_R_R_R(INS_orn, EA_16BYTE, REG_V1, REG_V2, REG_V3, INS_OPTS_16B); theEmitter->emitIns_R_R_R(INS_bsl, EA_8BYTE, REG_V4, REG_V5, REG_V6, INS_OPTS_8B); theEmitter->emitIns_R_R_R(INS_bit, EA_8BYTE, REG_V7, REG_V8, REG_V9, INS_OPTS_8B); theEmitter->emitIns_R_R_R(INS_bif, EA_8BYTE, REG_V10, REG_V11, REG_V12, INS_OPTS_8B); theEmitter->emitIns_R_R_R(INS_bsl, EA_16BYTE, REG_V13, REG_V14, REG_V15, INS_OPTS_16B); theEmitter->emitIns_R_R_R(INS_bit, EA_16BYTE, REG_V16, REG_V17, REG_V18, INS_OPTS_16B); theEmitter->emitIns_R_R_R(INS_bif, EA_16BYTE, REG_V19, REG_V20, REG_V21, INS_OPTS_16B); genDefineTempLabel(genCreateTempLabel()); theEmitter->emitIns_R_R_R(INS_add, EA_8BYTE, REG_V0, REG_V1, REG_V2); // scalar 8BYTE theEmitter->emitIns_R_R_R(INS_add, EA_8BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_8B); theEmitter->emitIns_R_R_R(INS_add, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H); theEmitter->emitIns_R_R_R(INS_add, EA_8BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_2S); theEmitter->emitIns_R_R_R(INS_add, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_16B); theEmitter->emitIns_R_R_R(INS_add, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_8H); theEmitter->emitIns_R_R_R(INS_add, EA_16BYTE, REG_V18, REG_V19, REG_V20, INS_OPTS_4S); theEmitter->emitIns_R_R_R(INS_add, EA_16BYTE, REG_V21, REG_V22, REG_V23, INS_OPTS_2D); theEmitter->emitIns_R_R_R(INS_sub, EA_8BYTE, REG_V1, REG_V2, REG_V3); // scalar 8BYTE theEmitter->emitIns_R_R_R(INS_sub, EA_8BYTE, REG_V4, REG_V5, REG_V6, INS_OPTS_8B); theEmitter->emitIns_R_R_R(INS_sub, EA_8BYTE, REG_V7, REG_V8, REG_V9, INS_OPTS_4H); theEmitter->emitIns_R_R_R(INS_sub, EA_8BYTE, REG_V10, REG_V11, REG_V12, INS_OPTS_2S); theEmitter->emitIns_R_R_R(INS_sub, EA_16BYTE, REG_V13, REG_V14, REG_V15, INS_OPTS_16B); theEmitter->emitIns_R_R_R(INS_sub, EA_16BYTE, REG_V16, REG_V17, REG_V18, INS_OPTS_8H); theEmitter->emitIns_R_R_R(INS_sub, EA_16BYTE, REG_V19, REG_V20, REG_V21, INS_OPTS_4S); theEmitter->emitIns_R_R_R(INS_sub, EA_16BYTE, REG_V22, REG_V23, REG_V24, INS_OPTS_2D); genDefineTempLabel(genCreateTempLabel()); // saba vector theEmitter->emitIns_R_R_R(INS_saba, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B); theEmitter->emitIns_R_R_R(INS_saba, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B); theEmitter->emitIns_R_R_R(INS_saba, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H); theEmitter->emitIns_R_R_R(INS_saba, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H); theEmitter->emitIns_R_R_R(INS_saba, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S); theEmitter->emitIns_R_R_R(INS_saba, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S); // sabd vector theEmitter->emitIns_R_R_R(INS_sabd, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B); theEmitter->emitIns_R_R_R(INS_sabd, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B); theEmitter->emitIns_R_R_R(INS_sabd, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H); theEmitter->emitIns_R_R_R(INS_sabd, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H); theEmitter->emitIns_R_R_R(INS_sabd, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S); theEmitter->emitIns_R_R_R(INS_sabd, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S); // uaba vector theEmitter->emitIns_R_R_R(INS_uaba, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B); theEmitter->emitIns_R_R_R(INS_uaba, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B); theEmitter->emitIns_R_R_R(INS_uaba, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H); theEmitter->emitIns_R_R_R(INS_uaba, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H); theEmitter->emitIns_R_R_R(INS_uaba, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S); theEmitter->emitIns_R_R_R(INS_uaba, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S); // uabd vector theEmitter->emitIns_R_R_R(INS_uabd, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B); theEmitter->emitIns_R_R_R(INS_uabd, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B); theEmitter->emitIns_R_R_R(INS_uabd, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H); theEmitter->emitIns_R_R_R(INS_uabd, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H); theEmitter->emitIns_R_R_R(INS_uabd, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S); theEmitter->emitIns_R_R_R(INS_uabd, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S); #endif // ALL_ARM64_EMITTER_UNIT_TESTS // // R_R_R vector multiply // #ifdef ALL_ARM64_EMITTER_UNIT_TESTS genDefineTempLabel(genCreateTempLabel()); theEmitter->emitIns_R_R_R(INS_mul, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B); theEmitter->emitIns_R_R_R(INS_mul, EA_8BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_4H); theEmitter->emitIns_R_R_R(INS_mul, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S); theEmitter->emitIns_R_R_R(INS_mul, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_16B); theEmitter->emitIns_R_R_R(INS_mul, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_8H); theEmitter->emitIns_R_R_R(INS_mul, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S); theEmitter->emitIns_R_R_R(INS_pmul, EA_8BYTE, REG_V18, REG_V19, REG_V20, INS_OPTS_8B); theEmitter->emitIns_R_R_R(INS_pmul, EA_16BYTE, REG_V21, REG_V22, REG_V23, INS_OPTS_16B); // 'mul' vector by elem theEmitter->emitIns_R_R_R_I(INS_mul, EA_8BYTE, REG_V0, REG_V1, REG_V16, 0, INS_OPTS_2S); theEmitter->emitIns_R_R_R_I(INS_mul, EA_8BYTE, REG_V2, REG_V3, REG_V15, 1, INS_OPTS_2S); theEmitter->emitIns_R_R_R_I(INS_mul, EA_8BYTE, REG_V4, REG_V5, REG_V17, 3, INS_OPTS_2S); theEmitter->emitIns_R_R_R_I(INS_mul, EA_8BYTE, REG_V6, REG_V7, REG_V0, 0, INS_OPTS_4H); theEmitter->emitIns_R_R_R_I(INS_mul, EA_8BYTE, REG_V8, REG_V9, REG_V1, 3, INS_OPTS_4H); theEmitter->emitIns_R_R_R_I(INS_mul, EA_8BYTE, REG_V10, REG_V11, REG_V2, 7, INS_OPTS_4H); theEmitter->emitIns_R_R_R_I(INS_mul, EA_16BYTE, REG_V12, REG_V13, REG_V14, 0, INS_OPTS_4S); theEmitter->emitIns_R_R_R_I(INS_mul, EA_16BYTE, REG_V14, REG_V15, REG_V18, 1, INS_OPTS_4S); theEmitter->emitIns_R_R_R_I(INS_mul, EA_16BYTE, REG_V16, REG_V17, REG_V13, 3, INS_OPTS_4S); theEmitter->emitIns_R_R_R_I(INS_mul, EA_16BYTE, REG_V18, REG_V19, REG_V3, 0, INS_OPTS_8H); theEmitter->emitIns_R_R_R_I(INS_mul, EA_16BYTE, REG_V20, REG_V21, REG_V4, 3, INS_OPTS_8H); theEmitter->emitIns_R_R_R_I(INS_mul, EA_16BYTE, REG_V22, REG_V23, REG_V5, 7, INS_OPTS_8H); // 'mla' vector by elem theEmitter->emitIns_R_R_R_I(INS_mla, EA_8BYTE, REG_V0, REG_V1, REG_V16, 0, INS_OPTS_2S); theEmitter->emitIns_R_R_R_I(INS_mla, EA_8BYTE, REG_V2, REG_V3, REG_V15, 1, INS_OPTS_2S); theEmitter->emitIns_R_R_R_I(INS_mla, EA_8BYTE, REG_V4, REG_V5, REG_V17, 3, INS_OPTS_2S); theEmitter->emitIns_R_R_R_I(INS_mla, EA_8BYTE, REG_V6, REG_V7, REG_V0, 0, INS_OPTS_4H); theEmitter->emitIns_R_R_R_I(INS_mla, EA_8BYTE, REG_V8, REG_V9, REG_V1, 3, INS_OPTS_4H); theEmitter->emitIns_R_R_R_I(INS_mla, EA_8BYTE, REG_V10, REG_V11, REG_V2, 7, INS_OPTS_4H); theEmitter->emitIns_R_R_R_I(INS_mla, EA_16BYTE, REG_V12, REG_V13, REG_V14, 0, INS_OPTS_4S); theEmitter->emitIns_R_R_R_I(INS_mla, EA_16BYTE, REG_V14, REG_V15, REG_V18, 1, INS_OPTS_4S); theEmitter->emitIns_R_R_R_I(INS_mla, EA_16BYTE, REG_V16, REG_V17, REG_V13, 3, INS_OPTS_4S); theEmitter->emitIns_R_R_R_I(INS_mla, EA_16BYTE, REG_V18, REG_V19, REG_V3, 0, INS_OPTS_8H); theEmitter->emitIns_R_R_R_I(INS_mla, EA_16BYTE, REG_V20, REG_V21, REG_V4, 3, INS_OPTS_8H); theEmitter->emitIns_R_R_R_I(INS_mla, EA_16BYTE, REG_V22, REG_V23, REG_V5, 7, INS_OPTS_8H); // 'mls' vector by elem theEmitter->emitIns_R_R_R_I(INS_mls, EA_8BYTE, REG_V0, REG_V1, REG_V16, 0, INS_OPTS_2S); theEmitter->emitIns_R_R_R_I(INS_mls, EA_8BYTE, REG_V2, REG_V3, REG_V15, 1, INS_OPTS_2S); theEmitter->emitIns_R_R_R_I(INS_mls, EA_8BYTE, REG_V4, REG_V5, REG_V17, 3, INS_OPTS_2S); theEmitter->emitIns_R_R_R_I(INS_mls, EA_8BYTE, REG_V6, REG_V7, REG_V0, 0, INS_OPTS_4H); theEmitter->emitIns_R_R_R_I(INS_mls, EA_8BYTE, REG_V8, REG_V9, REG_V1, 3, INS_OPTS_4H); theEmitter->emitIns_R_R_R_I(INS_mls, EA_8BYTE, REG_V10, REG_V11, REG_V2, 7, INS_OPTS_4H); theEmitter->emitIns_R_R_R_I(INS_mls, EA_16BYTE, REG_V12, REG_V13, REG_V14, 0, INS_OPTS_4S); theEmitter->emitIns_R_R_R_I(INS_mls, EA_16BYTE, REG_V14, REG_V15, REG_V18, 1, INS_OPTS_4S); theEmitter->emitIns_R_R_R_I(INS_mls, EA_16BYTE, REG_V16, REG_V17, REG_V13, 3, INS_OPTS_4S); theEmitter->emitIns_R_R_R_I(INS_mls, EA_16BYTE, REG_V18, REG_V19, REG_V3, 0, INS_OPTS_8H); theEmitter->emitIns_R_R_R_I(INS_mls, EA_16BYTE, REG_V20, REG_V21, REG_V4, 3, INS_OPTS_8H); theEmitter->emitIns_R_R_R_I(INS_mls, EA_16BYTE, REG_V22, REG_V23, REG_V5, 7, INS_OPTS_8H); #endif // ALL_ARM64_EMITTER_UNIT_TESTS // // R_R_R floating point operations, one source/dest, and two source // #ifdef ALL_ARM64_EMITTER_UNIT_TESTS genDefineTempLabel(genCreateTempLabel()); theEmitter->emitIns_R_R_R(INS_fmla, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S); theEmitter->emitIns_R_R_R(INS_fmla, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S); theEmitter->emitIns_R_R_R(INS_fmla, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D); theEmitter->emitIns_R_R_R_I(INS_fmla, EA_4BYTE, REG_V15, REG_V16, REG_V17, 3); // scalar by elem 4BYTE theEmitter->emitIns_R_R_R_I(INS_fmla, EA_8BYTE, REG_V18, REG_V19, REG_V20, 1); // scalar by elem 8BYTE theEmitter->emitIns_R_R_R_I(INS_fmla, EA_8BYTE, REG_V21, REG_V22, REG_V23, 0, INS_OPTS_2S); theEmitter->emitIns_R_R_R_I(INS_fmla, EA_16BYTE, REG_V24, REG_V25, REG_V26, 2, INS_OPTS_4S); theEmitter->emitIns_R_R_R_I(INS_fmla, EA_16BYTE, REG_V27, REG_V28, REG_V29, 0, INS_OPTS_2D); theEmitter->emitIns_R_R_R(INS_fmls, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S); theEmitter->emitIns_R_R_R(INS_fmls, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S); theEmitter->emitIns_R_R_R(INS_fmls, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D); theEmitter->emitIns_R_R_R_I(INS_fmls, EA_4BYTE, REG_V15, REG_V16, REG_V17, 3); // scalar by elem 4BYTE theEmitter->emitIns_R_R_R_I(INS_fmls, EA_8BYTE, REG_V18, REG_V19, REG_V20, 1); // scalar by elem 8BYTE theEmitter->emitIns_R_R_R_I(INS_fmls, EA_8BYTE, REG_V21, REG_V22, REG_V23, 0, INS_OPTS_2S); theEmitter->emitIns_R_R_R_I(INS_fmls, EA_16BYTE, REG_V24, REG_V25, REG_V26, 2, INS_OPTS_4S); theEmitter->emitIns_R_R_R_I(INS_fmls, EA_16BYTE, REG_V27, REG_V28, REG_V29, 0, INS_OPTS_2D); #endif // ALL_ARM64_EMITTER_UNIT_TESTS // // R_R_R_R floating point operations, one dest, and three source // #ifdef ALL_ARM64_EMITTER_UNIT_TESTS theEmitter->emitIns_R_R_R_R(INS_fmadd, EA_4BYTE, REG_V0, REG_V8, REG_V16, REG_V24); theEmitter->emitIns_R_R_R_R(INS_fmsub, EA_4BYTE, REG_V1, REG_V9, REG_V17, REG_V25); theEmitter->emitIns_R_R_R_R(INS_fnmadd, EA_4BYTE, REG_V2, REG_V10, REG_V18, REG_V26); theEmitter->emitIns_R_R_R_R(INS_fnmsub, EA_4BYTE, REG_V3, REG_V11, REG_V19, REG_V27); theEmitter->emitIns_R_R_R_R(INS_fmadd, EA_8BYTE, REG_V4, REG_V12, REG_V20, REG_V28); theEmitter->emitIns_R_R_R_R(INS_fmsub, EA_8BYTE, REG_V5, REG_V13, REG_V21, REG_V29); theEmitter->emitIns_R_R_R_R(INS_fnmadd, EA_8BYTE, REG_V6, REG_V14, REG_V22, REG_V30); theEmitter->emitIns_R_R_R_R(INS_fnmsub, EA_8BYTE, REG_V7, REG_V15, REG_V23, REG_V31); #endif #ifdef ALL_ARM64_EMITTER_UNIT_TESTS BasicBlock* label = genCreateTempLabel(); genDefineTempLabel(label); instGen(INS_nop); instGen(INS_nop); instGen(INS_nop); instGen(INS_nop); theEmitter->emitIns_R_L(INS_adr, EA_4BYTE_DSP_RELOC, label, REG_R0); #endif // ALL_ARM64_EMITTER_UNIT_TESTS printf("*************** End of genArm64EmitterUnitTests()\n"); } #endif // defined(DEBUG) #endif // _TARGET_ARM64_ #endif // !LEGACY_BACKEND