diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-03-24 12:35:59 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-03-24 12:35:59 -0700 |
commit | 7403e6d8263937dea206dd201fed1ceed190ca18 (patch) | |
tree | 72e84c7bc56998c9998e95a4f14ebdc252dded41 /drivers | |
parent | 66711cfea642a162bc99abdefe1645b26dbd778f (diff) | |
parent | f621eb13facb7681a79f4fec8ec6553ae160da76 (diff) | |
download | linux-rpi-7403e6d8263937dea206dd201fed1ceed190ca18.tar.gz linux-rpi-7403e6d8263937dea206dd201fed1ceed190ca18.tar.bz2 linux-rpi-7403e6d8263937dea206dd201fed1ceed190ca18.zip |
Merge tag 'vfio-v5.18-rc1' of https://github.com/awilliam/linux-vfio
Pull VFIO updates from Alex Williamson:
- Introduce new device migration uAPI and implement device specific
mlx5 vfio-pci variant driver supporting new protocol (Jason
Gunthorpe, Yishai Hadas, Leon Romanovsky)
- New HiSilicon acc vfio-pci variant driver, also supporting migration
interface (Shameer Kolothum, Longfang Liu)
- D3hot fixes for vfio-pci-core (Abhishek Sahu)
- Document new vfio-pci variant driver acceptance criteria
(Alex Williamson)
- Fix UML build unresolved ioport_{un}map() functions
(Alex Williamson)
- Fix MAINTAINERS due to header movement (Lukas Bulwahn)
* tag 'vfio-v5.18-rc1' of https://github.com/awilliam/linux-vfio: (31 commits)
vfio-pci: Provide reviewers and acceptance criteria for variant drivers
MAINTAINERS: adjust entry for header movement in hisilicon qm driver
hisi_acc_vfio_pci: Use its own PCI reset_done error handler
hisi_acc_vfio_pci: Add support for VFIO live migration
crypto: hisilicon/qm: Set the VF QM state register
hisi_acc_vfio_pci: Add helper to retrieve the struct pci_driver
hisi_acc_vfio_pci: Restrict access to VF dev BAR2 migration region
hisi_acc_vfio_pci: add new vfio_pci driver for HiSilicon ACC devices
hisi_acc_qm: Move VF PCI device IDs to common header
crypto: hisilicon/qm: Move few definitions to common header
crypto: hisilicon/qm: Move the QM header to include/linux
vfio/mlx5: Fix to not use 0 as NULL pointer
PCI/IOV: Fix wrong kernel-doc identifier
vfio/mlx5: Use its own PCI reset_done error handler
vfio/pci: Expose vfio_pci_core_aer_err_detected()
vfio/mlx5: Implement vfio_pci driver for mlx5 devices
vfio/mlx5: Expose migration commands over mlx5 device
vfio: Remove migration protocol v1 documentation
vfio: Extend the device migration protocol with RUNNING_P2P
vfio: Define device migration protocol v2
...
Diffstat (limited to 'drivers')
29 files changed, 3021 insertions, 585 deletions
diff --git a/drivers/crypto/hisilicon/hpre/hpre.h b/drivers/crypto/hisilicon/hpre/hpre.h index e0b4a1982ee9..9a0558ed82f9 100644 --- a/drivers/crypto/hisilicon/hpre/hpre.h +++ b/drivers/crypto/hisilicon/hpre/hpre.h @@ -4,7 +4,7 @@ #define __HISI_HPRE_H #include <linux/list.h> -#include "../qm.h" +#include <linux/hisi_acc_qm.h> #define HPRE_SQE_SIZE sizeof(struct hpre_sqe) #define HPRE_PF_DEF_Q_NUM 64 diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c index ebfab3e14499..36ab30e9e654 100644 --- a/drivers/crypto/hisilicon/hpre/hpre_main.c +++ b/drivers/crypto/hisilicon/hpre/hpre_main.c @@ -68,8 +68,7 @@ #define HPRE_REG_RD_INTVRL_US 10 #define HPRE_REG_RD_TMOUT_US 1000 #define HPRE_DBGFS_VAL_MAX_LEN 20 -#define HPRE_PCI_DEVICE_ID 0xa258 -#define HPRE_PCI_VF_DEVICE_ID 0xa259 +#define PCI_DEVICE_ID_HUAWEI_HPRE_PF 0xa258 #define HPRE_QM_USR_CFG_MASK GENMASK(31, 1) #define HPRE_QM_AXI_CFG_MASK GENMASK(15, 0) #define HPRE_QM_VFG_AX_MASK GENMASK(7, 0) @@ -111,8 +110,8 @@ static const char hpre_name[] = "hisi_hpre"; static struct dentry *hpre_debugfs_root; static const struct pci_device_id hpre_dev_ids[] = { - { PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, HPRE_PCI_DEVICE_ID) }, - { PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, HPRE_PCI_VF_DEVICE_ID) }, + { PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, PCI_DEVICE_ID_HUAWEI_HPRE_PF) }, + { PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, PCI_DEVICE_ID_HUAWEI_HPRE_VF) }, { 0, } }; @@ -242,7 +241,7 @@ MODULE_PARM_DESC(uacce_mode, UACCE_MODE_DESC); static int pf_q_num_set(const char *val, const struct kernel_param *kp) { - return q_num_set(val, kp, HPRE_PCI_DEVICE_ID); + return q_num_set(val, kp, PCI_DEVICE_ID_HUAWEI_HPRE_PF); } static const struct kernel_param_ops hpre_pf_q_num_ops = { @@ -921,7 +920,7 @@ static int hpre_debugfs_init(struct hisi_qm *qm) qm->debug.sqe_mask_len = HPRE_SQE_MASK_LEN; hisi_qm_debug_init(qm); - if (qm->pdev->device == HPRE_PCI_DEVICE_ID) { + if (qm->pdev->device == PCI_DEVICE_ID_HUAWEI_HPRE_PF) { ret = hpre_ctrl_debug_init(qm); if (ret) goto failed_to_create; @@ -958,7 +957,7 @@ static int hpre_qm_init(struct hisi_qm *qm, struct pci_dev *pdev) qm->sqe_size = HPRE_SQE_SIZE; qm->dev_name = hpre_name; - qm->fun_type = (pdev->device == HPRE_PCI_DEVICE_ID) ? + qm->fun_type = (pdev->device == PCI_DEVICE_ID_HUAWEI_HPRE_PF) ? QM_HW_PF : QM_HW_VF; if (qm->fun_type == QM_HW_PF) { qm->qp_base = HPRE_PF_DEF_Q_BASE; @@ -1191,6 +1190,12 @@ static struct pci_driver hpre_pci_driver = { .driver.pm = &hpre_pm_ops, }; +struct pci_driver *hisi_hpre_get_pf_driver(void) +{ + return &hpre_pci_driver; +} +EXPORT_SYMBOL_GPL(hisi_hpre_get_pf_driver); + static void hpre_register_debugfs(void) { if (!debugfs_initialized()) diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index 453390044181..009132333d2b 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -15,7 +15,7 @@ #include <linux/uacce.h> #include <linux/uaccess.h> #include <uapi/misc/uacce/hisi_qm.h> -#include "qm.h" +#include <linux/hisi_acc_qm.h> /* eq/aeq irq enable */ #define QM_VF_AEQ_INT_SOURCE 0x0 @@ -33,23 +33,6 @@ #define QM_ABNORMAL_EVENT_IRQ_VECTOR 3 /* mailbox */ -#define QM_MB_CMD_SQC 0x0 -#define QM_MB_CMD_CQC 0x1 -#define QM_MB_CMD_EQC 0x2 -#define QM_MB_CMD_AEQC 0x3 -#define QM_MB_CMD_SQC_BT 0x4 -#define QM_MB_CMD_CQC_BT 0x5 -#define QM_MB_CMD_SQC_VFT_V2 0x6 -#define QM_MB_CMD_STOP_QP 0x8 -#define QM_MB_CMD_SRC 0xc -#define QM_MB_CMD_DST 0xd - -#define QM_MB_CMD_SEND_BASE 0x300 -#define QM_MB_EVENT_SHIFT 8 -#define QM_MB_BUSY_SHIFT 13 -#define QM_MB_OP_SHIFT 14 -#define QM_MB_CMD_DATA_ADDR_L 0x304 -#define QM_MB_CMD_DATA_ADDR_H 0x308 #define QM_MB_PING_ALL_VFS 0xffff #define QM_MB_CMD_DATA_SHIFT 32 #define QM_MB_CMD_DATA_MASK GENMASK(31, 0) @@ -103,19 +86,12 @@ #define QM_DB_CMD_SHIFT_V1 16 #define QM_DB_INDEX_SHIFT_V1 32 #define QM_DB_PRIORITY_SHIFT_V1 48 -#define QM_DOORBELL_SQ_CQ_BASE_V2 0x1000 -#define QM_DOORBELL_EQ_AEQ_BASE_V2 0x2000 #define QM_QUE_ISO_CFG_V 0x0030 #define QM_PAGE_SIZE 0x0034 #define QM_QUE_ISO_EN 0x100154 #define QM_CAPBILITY 0x100158 #define QM_QP_NUN_MASK GENMASK(10, 0) #define QM_QP_DB_INTERVAL 0x10000 -#define QM_QP_MAX_NUM_SHIFT 11 -#define QM_DB_CMD_SHIFT_V2 12 -#define QM_DB_RAND_SHIFT_V2 16 -#define QM_DB_INDEX_SHIFT_V2 32 -#define QM_DB_PRIORITY_SHIFT_V2 48 #define QM_MEM_START_INIT 0x100040 #define QM_MEM_INIT_DONE 0x100044 @@ -693,7 +669,7 @@ static void qm_mb_pre_init(struct qm_mailbox *mailbox, u8 cmd, } /* return 0 mailbox ready, -ETIMEDOUT hardware timeout */ -static int qm_wait_mb_ready(struct hisi_qm *qm) +int hisi_qm_wait_mb_ready(struct hisi_qm *qm) { u32 val; @@ -701,6 +677,7 @@ static int qm_wait_mb_ready(struct hisi_qm *qm) val, !((val >> QM_MB_BUSY_SHIFT) & 0x1), POLL_PERIOD, POLL_TIMEOUT); } +EXPORT_SYMBOL_GPL(hisi_qm_wait_mb_ready); /* 128 bit should be written to hardware at one time to trigger a mailbox */ static void qm_mb_write(struct hisi_qm *qm, const void *src) @@ -726,14 +703,14 @@ static void qm_mb_write(struct hisi_qm *qm, const void *src) static int qm_mb_nolock(struct hisi_qm *qm, struct qm_mailbox *mailbox) { - if (unlikely(qm_wait_mb_ready(qm))) { + if (unlikely(hisi_qm_wait_mb_ready(qm))) { dev_err(&qm->pdev->dev, "QM mailbox is busy to start!\n"); goto mb_busy; } qm_mb_write(qm, mailbox); - if (unlikely(qm_wait_mb_ready(qm))) { + if (unlikely(hisi_qm_wait_mb_ready(qm))) { dev_err(&qm->pdev->dev, "QM mailbox operation timeout!\n"); goto mb_busy; } @@ -745,8 +722,8 @@ mb_busy: return -EBUSY; } -static int qm_mb(struct hisi_qm *qm, u8 cmd, dma_addr_t dma_addr, u16 queue, - bool op) +int hisi_qm_mb(struct hisi_qm *qm, u8 cmd, dma_addr_t dma_addr, u16 queue, + bool op) { struct qm_mailbox mailbox; int ret; @@ -762,6 +739,7 @@ static int qm_mb(struct hisi_qm *qm, u8 cmd, dma_addr_t dma_addr, u16 queue, return ret; } +EXPORT_SYMBOL_GPL(hisi_qm_mb); static void qm_db_v1(struct hisi_qm *qm, u16 qn, u8 cmd, u16 index, u8 priority) { @@ -1351,7 +1329,7 @@ static int qm_get_vft_v2(struct hisi_qm *qm, u32 *base, u32 *number) u64 sqc_vft; int ret; - ret = qm_mb(qm, QM_MB_CMD_SQC_VFT_V2, 0, 0, 1); + ret = hisi_qm_mb(qm, QM_MB_CMD_SQC_VFT_V2, 0, 0, 1); if (ret) return ret; @@ -1725,12 +1703,12 @@ static int dump_show(struct hisi_qm *qm, void *info, static int qm_dump_sqc_raw(struct hisi_qm *qm, dma_addr_t dma_addr, u16 qp_id) { - return qm_mb(qm, QM_MB_CMD_SQC, dma_addr, qp_id, 1); + return hisi_qm_mb(qm, QM_MB_CMD_SQC, dma_addr, qp_id, 1); } static int qm_dump_cqc_raw(struct hisi_qm *qm, dma_addr_t dma_addr, u16 qp_id) { - return qm_mb(qm, QM_MB_CMD_CQC, dma_addr, qp_id, 1); + return hisi_qm_mb(qm, QM_MB_CMD_CQC, dma_addr, qp_id, 1); } static int qm_sqc_dump(struct hisi_qm *qm, const char *s) @@ -1842,7 +1820,7 @@ static int qm_eqc_aeqc_dump(struct hisi_qm *qm, char *s, size_t size, if (IS_ERR(xeqc)) return PTR_ERR(xeqc); - ret = qm_mb(qm, cmd, xeqc_dma, 0, 1); + ret = hisi_qm_mb(qm, cmd, xeqc_dma, 0, 1); if (ret) goto err_free_ctx; @@ -2495,7 +2473,7 @@ unlock: static int qm_stop_qp(struct hisi_qp *qp) { - return qm_mb(qp->qm, QM_MB_CMD_STOP_QP, 0, qp->qp_id, 0); + return hisi_qm_mb(qp->qm, QM_MB_CMD_STOP_QP, 0, qp->qp_id, 0); } static int qm_set_msi(struct hisi_qm *qm, bool set) @@ -2763,7 +2741,7 @@ static int qm_sq_ctx_cfg(struct hisi_qp *qp, int qp_id, u32 pasid) return -ENOMEM; } - ret = qm_mb(qm, QM_MB_CMD_SQC, sqc_dma, qp_id, 0); + ret = hisi_qm_mb(qm, QM_MB_CMD_SQC, sqc_dma, qp_id, 0); dma_unmap_single(dev, sqc_dma, sizeof(struct qm_sqc), DMA_TO_DEVICE); kfree(sqc); @@ -2804,7 +2782,7 @@ static int qm_cq_ctx_cfg(struct hisi_qp *qp, int qp_id, u32 pasid) return -ENOMEM; } - ret = qm_mb(qm, QM_MB_CMD_CQC, cqc_dma, qp_id, 0); + ret = hisi_qm_mb(qm, QM_MB_CMD_CQC, cqc_dma, qp_id, 0); dma_unmap_single(dev, cqc_dma, sizeof(struct qm_cqc), DMA_TO_DEVICE); kfree(cqc); @@ -3514,6 +3492,12 @@ static void hisi_qm_pci_uninit(struct hisi_qm *qm) pci_disable_device(pdev); } +static void hisi_qm_set_state(struct hisi_qm *qm, u8 state) +{ + if (qm->ver > QM_HW_V2 && qm->fun_type == QM_HW_VF) + writel(state, qm->io_base + QM_VF_STATE); +} + /** * hisi_qm_uninit() - Uninitialize qm. * @qm: The qm needed uninit. @@ -3542,6 +3526,7 @@ void hisi_qm_uninit(struct hisi_qm *qm) dma_free_coherent(dev, qm->qdma.size, qm->qdma.va, qm->qdma.dma); } + hisi_qm_set_state(qm, QM_NOT_READY); up_write(&qm->qps_lock); qm_irq_unregister(qm); @@ -3655,7 +3640,7 @@ static int qm_eq_ctx_cfg(struct hisi_qm *qm) return -ENOMEM; } - ret = qm_mb(qm, QM_MB_CMD_EQC, eqc_dma, 0, 0); + ret = hisi_qm_mb(qm, QM_MB_CMD_EQC, eqc_dma, 0, 0); dma_unmap_single(dev, eqc_dma, sizeof(struct qm_eqc), DMA_TO_DEVICE); kfree(eqc); @@ -3684,7 +3669,7 @@ static int qm_aeq_ctx_cfg(struct hisi_qm *qm) return -ENOMEM; } - ret = qm_mb(qm, QM_MB_CMD_AEQC, aeqc_dma, 0, 0); + ret = hisi_qm_mb(qm, QM_MB_CMD_AEQC, aeqc_dma, 0, 0); dma_unmap_single(dev, aeqc_dma, sizeof(struct qm_aeqc), DMA_TO_DEVICE); kfree(aeqc); @@ -3723,11 +3708,11 @@ static int __hisi_qm_start(struct hisi_qm *qm) if (ret) return ret; - ret = qm_mb(qm, QM_MB_CMD_SQC_BT, qm->sqc_dma, 0, 0); + ret = hisi_qm_mb(qm, QM_MB_CMD_SQC_BT, qm->sqc_dma, 0, 0); if (ret) return ret; - ret = qm_mb(qm, QM_MB_CMD_CQC_BT, qm->cqc_dma, 0, 0); + ret = hisi_qm_mb(qm, QM_MB_CMD_CQC_BT, qm->cqc_dma, 0, 0); if (ret) return ret; @@ -3767,6 +3752,7 @@ int hisi_qm_start(struct hisi_qm *qm) if (!ret) atomic_set(&qm->status.flags, QM_START); + hisi_qm_set_state(qm, QM_READY); err_unlock: up_write(&qm->qps_lock); return ret; diff --git a/drivers/crypto/hisilicon/qm.h b/drivers/crypto/hisilicon/qm.h deleted file mode 100644 index 3068093229a5..000000000000 --- a/drivers/crypto/hisilicon/qm.h +++ /dev/null @@ -1,441 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (c) 2019 HiSilicon Limited. */ -#ifndef HISI_ACC_QM_H -#define HISI_ACC_QM_H - -#include <linux/bitfield.h> -#include <linux/debugfs.h> -#include <linux/iopoll.h> -#include <linux/module.h> -#include <linux/pci.h> - -#define QM_QNUM_V1 4096 -#define QM_QNUM_V2 1024 -#define QM_MAX_VFS_NUM_V2 63 - -/* qm user domain */ -#define QM_ARUSER_M_CFG_1 0x100088 -#define AXUSER_SNOOP_ENABLE BIT(30) -#define AXUSER_CMD_TYPE GENMASK(14, 12) -#define AXUSER_CMD_SMMU_NORMAL 1 -#define AXUSER_NS BIT(6) -#define AXUSER_NO BIT(5) -#define AXUSER_FP BIT(4) -#define AXUSER_SSV BIT(0) -#define AXUSER_BASE (AXUSER_SNOOP_ENABLE | \ - FIELD_PREP(AXUSER_CMD_TYPE, \ - AXUSER_CMD_SMMU_NORMAL) | \ - AXUSER_NS | AXUSER_NO | AXUSER_FP) -#define QM_ARUSER_M_CFG_ENABLE 0x100090 -#define ARUSER_M_CFG_ENABLE 0xfffffffe -#define QM_AWUSER_M_CFG_1 0x100098 -#define QM_AWUSER_M_CFG_ENABLE 0x1000a0 -#define AWUSER_M_CFG_ENABLE 0xfffffffe -#define QM_WUSER_M_CFG_ENABLE 0x1000a8 -#define WUSER_M_CFG_ENABLE 0xffffffff - -/* qm cache */ -#define QM_CACHE_CTL 0x100050 -#define SQC_CACHE_ENABLE BIT(0) -#define CQC_CACHE_ENABLE BIT(1) -#define SQC_CACHE_WB_ENABLE BIT(4) -#define SQC_CACHE_WB_THRD GENMASK(10, 5) -#define CQC_CACHE_WB_ENABLE BIT(11) -#define CQC_CACHE_WB_THRD GENMASK(17, 12) -#define QM_AXI_M_CFG 0x1000ac -#define AXI_M_CFG 0xffff -#define QM_AXI_M_CFG_ENABLE 0x1000b0 -#define AM_CFG_SINGLE_PORT_MAX_TRANS 0x300014 -#define AXI_M_CFG_ENABLE 0xffffffff -#define QM_PEH_AXUSER_CFG 0x1000cc -#define QM_PEH_AXUSER_CFG_ENABLE 0x1000d0 -#define PEH_AXUSER_CFG 0x401001 -#define PEH_AXUSER_CFG_ENABLE 0xffffffff - -#define QM_AXI_RRESP BIT(0) -#define QM_AXI_BRESP BIT(1) -#define QM_ECC_MBIT BIT(2) -#define QM_ECC_1BIT BIT(3) -#define QM_ACC_GET_TASK_TIMEOUT BIT(4) -#define QM_ACC_DO_TASK_TIMEOUT BIT(5) -#define QM_ACC_WB_NOT_READY_TIMEOUT BIT(6) -#define QM_SQ_CQ_VF_INVALID BIT(7) -#define QM_CQ_VF_INVALID BIT(8) -#define QM_SQ_VF_INVALID BIT(9) -#define QM_DB_TIMEOUT BIT(10) -#define QM_OF_FIFO_OF BIT(11) -#define QM_DB_RANDOM_INVALID BIT(12) -#define QM_MAILBOX_TIMEOUT BIT(13) -#define QM_FLR_TIMEOUT BIT(14) - -#define QM_BASE_NFE (QM_AXI_RRESP | QM_AXI_BRESP | QM_ECC_MBIT | \ - QM_ACC_GET_TASK_TIMEOUT | QM_DB_TIMEOUT | \ - QM_OF_FIFO_OF | QM_DB_RANDOM_INVALID | \ - QM_MAILBOX_TIMEOUT | QM_FLR_TIMEOUT) -#define QM_BASE_CE QM_ECC_1BIT - -#define QM_Q_DEPTH 1024 -#define QM_MIN_QNUM 2 -#define HISI_ACC_SGL_SGE_NR_MAX 255 -#define QM_SHAPER_CFG 0x100164 -#define QM_SHAPER_ENABLE BIT(30) -#define QM_SHAPER_TYPE1_OFFSET 10 - -/* page number for queue file region */ -#define QM_DOORBELL_PAGE_NR 1 - -/* uacce mode of the driver */ -#define UACCE_MODE_NOUACCE 0 /* don't use uacce */ -#define UACCE_MODE_SVA 1 /* use uacce sva mode */ -#define UACCE_MODE_DESC "0(default) means only register to crypto, 1 means both register to crypto and uacce" - -enum qm_stop_reason { - QM_NORMAL, - QM_SOFT_RESET, - QM_FLR, -}; - -enum qm_state { - QM_INIT = 0, - QM_START, - QM_CLOSE, - QM_STOP, -}; - -enum qp_state { - QP_INIT = 1, - QP_START, - QP_STOP, - QP_CLOSE, -}; - -enum qm_hw_ver { - QM_HW_UNKNOWN = -1, - QM_HW_V1 = 0x20, - QM_HW_V2 = 0x21, - QM_HW_V3 = 0x30, -}; - -enum qm_fun_type { - QM_HW_PF, - QM_HW_VF, -}; - -enum qm_debug_file { - CURRENT_QM, - CURRENT_Q, - CLEAR_ENABLE, - DEBUG_FILE_NUM, -}; - -struct qm_dfx { - atomic64_t err_irq_cnt; - atomic64_t aeq_irq_cnt; - atomic64_t abnormal_irq_cnt; - atomic64_t create_qp_err_cnt; - atomic64_t mb_err_cnt; -}; - -struct debugfs_file { - enum qm_debug_file index; - struct mutex lock; - struct qm_debug *debug; -}; - -struct qm_debug { - u32 curr_qm_qp_num; - u32 sqe_mask_offset; - u32 sqe_mask_len; - struct qm_dfx dfx; - struct dentry *debug_root; - struct dentry *qm_d; - struct debugfs_file files[DEBUG_FILE_NUM]; -}; - -struct qm_shaper_factor { - u32 func_qos; - u64 cir_b; - u64 cir_u; - u64 cir_s; - u64 cbs_s; -}; - -struct qm_dma { - void *va; - dma_addr_t dma; - size_t size; -}; - -struct hisi_qm_status { - u32 eq_head; - bool eqc_phase; - u32 aeq_head; - bool aeqc_phase; - atomic_t flags; - int stop_reason; -}; - -struct hisi_qm; - -struct hisi_qm_err_info { - char *acpi_rst; - u32 msi_wr_port; - u32 ecc_2bits_mask; - u32 dev_ce_mask; - u32 ce; - u32 nfe; - u32 fe; -}; - -struct hisi_qm_err_status { - u32 is_qm_ecc_mbit; - u32 is_dev_ecc_mbit; -}; - -struct hisi_qm_err_ini { - int (*hw_init)(struct hisi_qm *qm); - void (*hw_err_enable)(struct hisi_qm *qm); - void (*hw_err_disable)(struct hisi_qm *qm); - u32 (*get_dev_hw_err_status)(struct hisi_qm *qm); - void (*clear_dev_hw_err_status)(struct hisi_qm *qm, u32 err_sts); - void (*open_axi_master_ooo)(struct hisi_qm *qm); - void (*close_axi_master_ooo)(struct hisi_qm *qm); - void (*open_sva_prefetch)(struct hisi_qm *qm); - void (*close_sva_prefetch)(struct hisi_qm *qm); - void (*log_dev_hw_err)(struct hisi_qm *qm, u32 err_sts); - void (*err_info_init)(struct hisi_qm *qm); -}; - -struct hisi_qm_list { - struct mutex lock; - struct list_head list; - int (*register_to_crypto)(struct hisi_qm *qm); - void (*unregister_from_crypto)(struct hisi_qm *qm); -}; - -struct hisi_qm { - enum qm_hw_ver ver; - enum qm_fun_type fun_type; - const char *dev_name; - struct pci_dev *pdev; - void __iomem *io_base; - void __iomem *db_io_base; - u32 sqe_size; - u32 qp_base; - u32 qp_num; - u32 qp_in_used; - u32 ctrl_qp_num; - u32 max_qp_num; - u32 vfs_num; - u32 db_interval; - struct list_head list; - struct hisi_qm_list *qm_list; - - struct qm_dma qdma; - struct qm_sqc *sqc; - struct qm_cqc *cqc; - struct qm_eqe *eqe; - struct qm_aeqe *aeqe; - dma_addr_t sqc_dma; - dma_addr_t cqc_dma; - dma_addr_t eqe_dma; - dma_addr_t aeqe_dma; - - struct hisi_qm_status status; - const struct hisi_qm_err_ini *err_ini; - struct hisi_qm_err_info err_info; - struct hisi_qm_err_status err_status; - unsigned long misc_ctl; /* driver removing and reset sched */ - - struct rw_semaphore qps_lock; - struct idr qp_idr; - struct hisi_qp *qp_array; - - struct mutex mailbox_lock; - - const struct hisi_qm_hw_ops *ops; - - struct qm_debug debug; - - u32 error_mask; - - struct workqueue_struct *wq; - struct work_struct work; - struct work_struct rst_work; - struct work_struct cmd_process; - - const char *algs; - bool use_sva; - bool is_frozen; - - /* doorbell isolation enable */ - bool use_db_isolation; - resource_size_t phys_base; - resource_size_t db_phys_base; - struct uacce_device *uacce; - int mode; - struct qm_shaper_factor *factor; - u32 mb_qos; - u32 type_rate; -}; - -struct hisi_qp_status { - atomic_t used; - u16 sq_tail; - u16 cq_head; - bool cqc_phase; - atomic_t flags; -}; - -struct hisi_qp_ops { - int (*fill_sqe)(void *sqe, void *q_parm, void *d_parm); -}; - -struct hisi_qp { - u32 qp_id; - u8 alg_type; - u8 req_type; - - struct qm_dma qdma; - void *sqe; - struct qm_cqe *cqe; - dma_addr_t sqe_dma; - dma_addr_t cqe_dma; - - struct hisi_qp_status qp_status; - struct hisi_qp_ops *hw_ops; - void *qp_ctx; - void (*req_cb)(struct hisi_qp *qp, void *data); - void (*event_cb)(struct hisi_qp *qp); - - struct hisi_qm *qm; - bool is_resetting; - bool is_in_kernel; - u16 pasid; - struct uacce_queue *uacce_q; -}; - -static inline int q_num_set(const char *val, const struct kernel_param *kp, - unsigned int device) -{ - struct pci_dev *pdev = pci_get_device(PCI_VENDOR_ID_HUAWEI, - device, NULL); - u32 n, q_num; - int ret; - - if (!val) - return -EINVAL; - - if (!pdev) { - q_num = min_t(u32, QM_QNUM_V1, QM_QNUM_V2); - pr_info("No device found currently, suppose queue number is %u\n", - q_num); - } else { - if (pdev->revision == QM_HW_V1) - q_num = QM_QNUM_V1; - else - q_num = QM_QNUM_V2; - } - - ret = kstrtou32(val, 10, &n); - if (ret || n < QM_MIN_QNUM || n > q_num) - return -EINVAL; - - return param_set_int(val, kp); -} - -static inline int vfs_num_set(const char *val, const struct kernel_param *kp) -{ - u32 n; - int ret; - - if (!val) - return -EINVAL; - - ret = kstrtou32(val, 10, &n); - if (ret < 0) - return ret; - - if (n > QM_MAX_VFS_NUM_V2) - return -EINVAL; - - return param_set_int(val, kp); -} - -static inline int mode_set(const char *val, const struct kernel_param *kp) -{ - u32 n; - int ret; - - if (!val) - return -EINVAL; - - ret = kstrtou32(val, 10, &n); - if (ret != 0 || (n != UACCE_MODE_SVA && - n != UACCE_MODE_NOUACCE)) - return -EINVAL; - - return param_set_int(val, kp); -} - -static inline int uacce_mode_set(const char *val, const struct kernel_param *kp) -{ - return mode_set(val, kp); -} - -static inline void hisi_qm_init_list(struct hisi_qm_list *qm_list) -{ - INIT_LIST_HEAD(&qm_list->list); - mutex_init(&qm_list->lock); -} - -int hisi_qm_init(struct hisi_qm *qm); -void hisi_qm_uninit(struct hisi_qm *qm); -int hisi_qm_start(struct hisi_qm *qm); -int hisi_qm_stop(struct hisi_qm *qm, enum qm_stop_reason r); -struct hisi_qp *hisi_qm_create_qp(struct hisi_qm *qm, u8 alg_type); -int hisi_qm_start_qp(struct hisi_qp *qp, unsigned long arg); -int hisi_qm_stop_qp(struct hisi_qp *qp); -void hisi_qm_release_qp(struct hisi_qp *qp); -int hisi_qp_send(struct hisi_qp *qp, const void *msg); -int hisi_qm_get_free_qp_num(struct hisi_qm *qm); -int hisi_qm_get_vft(struct hisi_qm *qm, u32 *base, u32 *number); -void hisi_qm_debug_init(struct hisi_qm *qm); -enum qm_hw_ver hisi_qm_get_hw_version(struct pci_dev *pdev); -void hisi_qm_debug_regs_clear(struct hisi_qm *qm); -int hisi_qm_sriov_enable(struct pci_dev *pdev, int max_vfs); -int hisi_qm_sriov_disable(struct pci_dev *pdev, bool is_frozen); -int hisi_qm_sriov_configure(struct pci_dev *pdev, int num_vfs); -void hisi_qm_dev_err_init(struct hisi_qm *qm); -void hisi_qm_dev_err_uninit(struct hisi_qm *qm); -pci_ers_result_t hisi_qm_dev_err_detected(struct pci_dev *pdev, - pci_channel_state_t state); -pci_ers_result_t hisi_qm_dev_slot_reset(struct pci_dev *pdev); -void hisi_qm_reset_prepare(struct pci_dev *pdev); -void hisi_qm_reset_done(struct pci_dev *pdev); - -struct hisi_acc_sgl_pool; -struct hisi_acc_hw_sgl *hisi_acc_sg_buf_map_to_hw_sgl(struct device *dev, - struct scatterlist *sgl, struct hisi_acc_sgl_pool *pool, - u32 index, dma_addr_t *hw_sgl_dma); -void hisi_acc_sg_buf_unmap(struct device *dev, struct scatterlist *sgl, - struct hisi_acc_hw_sgl *hw_sgl); -struct hisi_acc_sgl_pool *hisi_acc_create_sgl_pool(struct device *dev, - u32 count, u32 sge_nr); -void hisi_acc_free_sgl_pool(struct device *dev, - struct hisi_acc_sgl_pool *pool); -int hisi_qm_alloc_qps_node(struct hisi_qm_list *qm_list, int qp_num, - u8 alg_type, int node, struct hisi_qp **qps); -void hisi_qm_free_qps(struct hisi_qp **qps, int qp_num); -void hisi_qm_dev_shutdown(struct pci_dev *pdev); -void hisi_qm_wait_task_finish(struct hisi_qm *qm, struct hisi_qm_list *qm_list); -int hisi_qm_alg_register(struct hisi_qm *qm, struct hisi_qm_list *qm_list); -void hisi_qm_alg_unregister(struct hisi_qm *qm, struct hisi_qm_list *qm_list); -int hisi_qm_resume(struct device *dev); -int hisi_qm_suspend(struct device *dev); -void hisi_qm_pm_uninit(struct hisi_qm *qm); -void hisi_qm_pm_init(struct hisi_qm *qm); -int hisi_qm_get_dfx_access(struct hisi_qm *qm); -void hisi_qm_put_dfx_access(struct hisi_qm *qm); -void hisi_qm_regs_dump(struct seq_file *s, struct debugfs_regset32 *regset); -#endif diff --git a/drivers/crypto/hisilicon/sec2/sec.h b/drivers/crypto/hisilicon/sec2/sec.h index d97cf02b1df7..c2e9b01187a7 100644 --- a/drivers/crypto/hisilicon/sec2/sec.h +++ b/drivers/crypto/hisilicon/sec2/sec.h @@ -4,7 +4,7 @@ #ifndef __HISI_SEC_V2_H #define __HISI_SEC_V2_H -#include "../qm.h" +#include <linux/hisi_acc_qm.h> #include "sec_crypto.h" /* Algorithm resource per hardware SEC queue */ diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c index 0b9906ff69e3..92fae706bdb2 100644 --- a/drivers/crypto/hisilicon/sec2/sec_main.c +++ b/drivers/crypto/hisilicon/sec2/sec_main.c @@ -20,8 +20,7 @@ #define SEC_VF_NUM 63 #define SEC_QUEUE_NUM_V1 4096 -#define SEC_PF_PCI_DEVICE_ID 0xa255 -#define SEC_VF_PCI_DEVICE_ID 0xa256 +#define PCI_DEVICE_ID_HUAWEI_SEC_PF 0xa255 #define SEC_BD_ERR_CHK_EN0 0xEFFFFFFF #define SEC_BD_ERR_CHK_EN1 0x7ffff7fd @@ -229,7 +228,7 @@ static const struct debugfs_reg32 sec_dfx_regs[] = { static int sec_pf_q_num_set(const char *val, const struct kernel_param *kp) { - return q_num_set(val, kp, SEC_PF_PCI_DEVICE_ID); + return q_num_set(val, kp, PCI_DEVICE_ID_HUAWEI_SEC_PF); } static const struct kernel_param_ops sec_pf_q_num_ops = { @@ -317,8 +316,8 @@ module_param_cb(uacce_mode, &sec_uacce_mode_ops, &uacce_mode, 0444); MODULE_PARM_DESC(uacce_mode, UACCE_MODE_DESC); static const struct pci_device_id sec_dev_ids[] = { - { PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, SEC_PF_PCI_DEVICE_ID) }, - { PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, SEC_VF_PCI_DEVICE_ID) }, + { PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, PCI_DEVICE_ID_HUAWEI_SEC_PF) }, + { PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, PCI_DEVICE_ID_HUAWEI_SEC_VF) }, { 0, } }; MODULE_DEVICE_TABLE(pci, sec_dev_ids); @@ -748,7 +747,7 @@ static int sec_core_debug_init(struct hisi_qm *qm) regset->base = qm->io_base; regset->dev = dev; - if (qm->pdev->device == SEC_PF_PCI_DEVICE_ID) + if (qm->pdev->device == PCI_DEVICE_ID_HUAWEI_SEC_PF) debugfs_create_file("regs", 0444, tmp_d, regset, &sec_regs_fops); for (i = 0; i < ARRAY_SIZE(sec_dfx_labels); i++) { @@ -766,7 +765,7 @@ static int sec_debug_init(struct hisi_qm *qm) struct sec_dev *sec = container_of(qm, struct sec_dev, qm); int i; - if (qm->pdev->device == SEC_PF_PCI_DEVICE_ID) { + if (qm->pdev->device == PCI_DEVICE_ID_HUAWEI_SEC_PF) { for (i = SEC_CLEAR_ENABLE; i < SEC_DEBUG_FILE_NUM; i++) { spin_lock_init(&sec->debug.files[i].lock); sec->debug.files[i].index = i; @@ -908,7 +907,7 @@ static int sec_qm_init(struct hisi_qm *qm, struct pci_dev *pdev) qm->sqe_size = SEC_SQE_SIZE; qm->dev_name = sec_name; - qm->fun_type = (pdev->device == SEC_PF_PCI_DEVICE_ID) ? + qm->fun_type = (pdev->device == PCI_DEVICE_ID_HUAWEI_SEC_PF) ? QM_HW_PF : QM_HW_VF; if (qm->fun_type == QM_HW_PF) { qm->qp_base = SEC_PF_DEF_Q_BASE; @@ -1120,6 +1119,12 @@ static struct pci_driver sec_pci_driver = { .driver.pm = &sec_pm_ops, }; +struct pci_driver *hisi_sec_get_pf_driver(void) +{ + return &sec_pci_driver; +} +EXPORT_SYMBOL_GPL(hisi_sec_get_pf_driver); + static void sec_register_debugfs(void) { if (!debugfs_initialized()) diff --git a/drivers/crypto/hisilicon/sgl.c b/drivers/crypto/hisilicon/sgl.c index 057273769f26..f7efc02b065f 100644 --- a/drivers/crypto/hisilicon/sgl.c +++ b/drivers/crypto/hisilicon/sgl.c @@ -1,9 +1,9 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2019 HiSilicon Limited. */ #include <linux/dma-mapping.h> +#include <linux/hisi_acc_qm.h> #include <linux/module.h> #include <linux/slab.h> -#include "qm.h" #define HISI_ACC_SGL_SGE_NR_MIN 1 #define HISI_ACC_SGL_NR_MAX 256 diff --git a/drivers/crypto/hisilicon/zip/zip.h b/drivers/crypto/hisilicon/zip/zip.h index 517fdbdff3ea..3dfd3bac5a33 100644 --- a/drivers/crypto/hisilicon/zip/zip.h +++ b/drivers/crypto/hisilicon/zip/zip.h @@ -7,7 +7,7 @@ #define pr_fmt(fmt) "hisi_zip: " fmt #include <linux/list.h> -#include "../qm.h" +#include <linux/hisi_acc_qm.h> enum hisi_zip_error_type { /* negative compression */ diff --git a/drivers/crypto/hisilicon/zip/zip_main.c b/drivers/crypto/hisilicon/zip/zip_main.c index 678f8b58ec42..4534e1e107d1 100644 --- a/drivers/crypto/hisilicon/zip/zip_main.c +++ b/drivers/crypto/hisilicon/zip/zip_main.c @@ -15,8 +15,7 @@ #include <linux/uacce.h> #include "zip.h" -#define PCI_DEVICE_ID_ZIP_PF 0xa250 -#define PCI_DEVICE_ID_ZIP_VF 0xa251 +#define PCI_DEVICE_ID_HUAWEI_ZIP_PF 0xa250 #define HZIP_QUEUE_NUM_V1 4096 @@ -246,7 +245,7 @@ MODULE_PARM_DESC(uacce_mode, UACCE_MODE_DESC); static int pf_q_num_set(const char *val, const struct kernel_param *kp) { - return q_num_set(val, kp, PCI_DEVICE_ID_ZIP_PF); + return q_num_set(val, kp, PCI_DEVICE_ID_HUAWEI_ZIP_PF); } static const struct kernel_param_ops pf_q_num_ops = { @@ -268,8 +267,8 @@ module_param_cb(vfs_num, &vfs_num_ops, &vfs_num, 0444); MODULE_PARM_DESC(vfs_num, "Number of VFs to enable(1-63), 0(default)"); static const struct pci_device_id hisi_zip_dev_ids[] = { - { PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, PCI_DEVICE_ID_ZIP_PF) }, - { PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, PCI_DEVICE_ID_ZIP_VF) }, + { PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, PCI_DEVICE_ID_HUAWEI_ZIP_PF) }, + { PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, PCI_DEVICE_ID_HUAWEI_ZIP_VF) }, { 0, } }; MODULE_DEVICE_TABLE(pci, hisi_zip_dev_ids); @@ -838,7 +837,7 @@ static int hisi_zip_qm_init(struct hisi_qm *qm, struct pci_dev *pdev) qm->sqe_size = HZIP_SQE_SIZE; qm->dev_name = hisi_zip_name; - qm->fun_type = (pdev->device == PCI_DEVICE_ID_ZIP_PF) ? + qm->fun_type = (pdev->device == PCI_DEVICE_ID_HUAWEI_ZIP_PF) ? QM_HW_PF : QM_HW_VF; if (qm->fun_type == QM_HW_PF) { qm->qp_base = HZIP_PF_DEF_Q_BASE; @@ -1013,6 +1012,12 @@ static struct pci_driver hisi_zip_pci_driver = { .driver.pm = &hisi_zip_pm_ops, }; +struct pci_driver *hisi_zip_get_pf_driver(void) +{ + return &hisi_zip_pci_driver; +} +EXPORT_SYMBOL_GPL(hisi_zip_get_pf_driver); + static void hisi_zip_register_debugfs(void) { if (!debugfs_initialized()) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 3eacd8739929..fc19a0762af2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -478,6 +478,11 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_QUERY_VHCA_STATE: case MLX5_CMD_OP_MODIFY_VHCA_STATE: case MLX5_CMD_OP_ALLOC_SF: + case MLX5_CMD_OP_SUSPEND_VHCA: + case MLX5_CMD_OP_RESUME_VHCA: + case MLX5_CMD_OP_QUERY_VHCA_MIGRATION_STATE: + case MLX5_CMD_OP_SAVE_VHCA_STATE: + case MLX5_CMD_OP_LOAD_VHCA_STATE: *status = MLX5_DRIVER_STATUS_ABORTED; *synd = MLX5_DRIVER_SYND; return -EIO; @@ -675,6 +680,11 @@ const char *mlx5_command_str(int command) MLX5_COMMAND_STR_CASE(MODIFY_VHCA_STATE); MLX5_COMMAND_STR_CASE(ALLOC_SF); MLX5_COMMAND_STR_CASE(DEALLOC_SF); + MLX5_COMMAND_STR_CASE(SUSPEND_VHCA); + MLX5_COMMAND_STR_CASE(RESUME_VHCA); + MLX5_COMMAND_STR_CASE(QUERY_VHCA_MIGRATION_STATE); + MLX5_COMMAND_STR_CASE(SAVE_VHCA_STATE); + MLX5_COMMAND_STR_CASE(LOAD_VHCA_STATE); default: return "unknown command opcode"; } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index bba72b220cc3..976578d1904c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1620,6 +1620,7 @@ static void remove_one(struct pci_dev *pdev) struct devlink *devlink = priv_to_devlink(dev); devlink_unregister(devlink); + mlx5_sriov_disable(pdev); mlx5_crdump_disable(dev); mlx5_drain_health_wq(dev); mlx5_uninit_one(dev); @@ -1882,6 +1883,50 @@ static struct pci_driver mlx5_core_driver = { .sriov_set_msix_vec_count = mlx5_core_sriov_set_msix_vec_count, }; +/** + * mlx5_vf_get_core_dev - Get the mlx5 core device from a given VF PCI device if + * mlx5_core is its driver. + * @pdev: The associated PCI device. + * + * Upon return the interface state lock stay held to let caller uses it safely. + * Caller must ensure to use the returned mlx5 device for a narrow window + * and put it back with mlx5_vf_put_core_dev() immediately once usage was over. + * + * Return: Pointer to the associated mlx5_core_dev or NULL. + */ +struct mlx5_core_dev *mlx5_vf_get_core_dev(struct pci_dev *pdev) + __acquires(&mdev->intf_state_mutex) +{ + struct mlx5_core_dev *mdev; + + mdev = pci_iov_get_pf_drvdata(pdev, &mlx5_core_driver); + if (IS_ERR(mdev)) + return NULL; + + mutex_lock(&mdev->intf_state_mutex); + if (!test_bit(MLX5_INTERFACE_STATE_UP, &mdev->intf_state)) { + mutex_unlock(&mdev->intf_state_mutex); + return NULL; + } + + return mdev; +} +EXPORT_SYMBOL(mlx5_vf_get_core_dev); + +/** + * mlx5_vf_put_core_dev - Put the mlx5 core device back. + * @mdev: The mlx5 core device. + * + * Upon return the interface state lock is unlocked and caller should not + * access the mdev any more. + */ +void mlx5_vf_put_core_dev(struct mlx5_core_dev *mdev) + __releases(&mdev->intf_state_mutex) +{ + mutex_unlock(&mdev->intf_state_mutex); +} +EXPORT_SYMBOL(mlx5_vf_put_core_dev); + static void mlx5_core_verify_params(void) { if (prof_sel >= ARRAY_SIZE(profile)) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 6f8baa0f2a73..37b2805b3bf3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -164,6 +164,7 @@ void mlx5_sriov_cleanup(struct mlx5_core_dev *dev); int mlx5_sriov_attach(struct mlx5_core_dev *dev); void mlx5_sriov_detach(struct mlx5_core_dev *dev); int mlx5_core_sriov_configure(struct pci_dev *dev, int num_vfs); +void mlx5_sriov_disable(struct pci_dev *pdev); int mlx5_core_sriov_set_msix_vec_count(struct pci_dev *vf, int msix_vec_count); int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id); int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c index e8185b69ac6c..887ee0f729d1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -161,7 +161,7 @@ static int mlx5_sriov_enable(struct pci_dev *pdev, int num_vfs) return err; } -static void mlx5_sriov_disable(struct pci_dev *pdev) +void mlx5_sriov_disable(struct pci_dev *pdev) { struct mlx5_core_dev *dev = pci_get_drvdata(pdev); int num_vfs = pci_num_vf(dev->pdev); @@ -205,19 +205,8 @@ int mlx5_core_sriov_set_msix_vec_count(struct pci_dev *vf, int msix_vec_count) mlx5_get_default_msix_vec_count(dev, pci_num_vf(pf)); sriov = &dev->priv.sriov; - - /* Reversed translation of PCI VF function number to the internal - * function_id, which exists in the name of virtfn symlink. - */ - for (id = 0; id < pci_num_vf(pf); id++) { - if (!sriov->vfs_ctx[id].enabled) - continue; - - if (vf->devfn == pci_iov_virtfn_devfn(pf, id)) - break; - } - - if (id == pci_num_vf(pf) || !sriov->vfs_ctx[id].enabled) + id = pci_iov_vf_id(vf); + if (id < 0 || !sriov->vfs_ctx[id].enabled) return -EINVAL; return mlx5_set_msix_vec_count(dev, id + 1, msix_vec_count); diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c index 0267977c9f17..952217572113 100644 --- a/drivers/pci/iov.c +++ b/drivers/pci/iov.c @@ -33,6 +33,49 @@ int pci_iov_virtfn_devfn(struct pci_dev *dev, int vf_id) } EXPORT_SYMBOL_GPL(pci_iov_virtfn_devfn); +int pci_iov_vf_id(struct pci_dev *dev) +{ + struct pci_dev *pf; + + if (!dev->is_virtfn) + return -EINVAL; + + pf = pci_physfn(dev); + return (((dev->bus->number << 8) + dev->devfn) - + ((pf->bus->number << 8) + pf->devfn + pf->sriov->offset)) / + pf->sriov->stride; +} +EXPORT_SYMBOL_GPL(pci_iov_vf_id); + +/** + * pci_iov_get_pf_drvdata - Return the drvdata of a PF + * @dev: VF pci_dev + * @pf_driver: Device driver required to own the PF + * + * This must be called from a context that ensures that a VF driver is attached. + * The value returned is invalid once the VF driver completes its remove() + * callback. + * + * Locking is achieved by the driver core. A VF driver cannot be probed until + * pci_enable_sriov() is called and pci_disable_sriov() does not return until + * all VF drivers have completed their remove(). + * + * The PF driver must call pci_disable_sriov() before it begins to destroy the + * drvdata. + */ +void *pci_iov_get_pf_drvdata(struct pci_dev *dev, struct pci_driver *pf_driver) +{ + struct pci_dev *pf_dev; + + if (!dev->is_virtfn) + return ERR_PTR(-EINVAL); + pf_dev = dev->physfn; + if (pf_dev->driver != pf_driver) + return ERR_PTR(-EINVAL); + return pci_get_drvdata(pf_dev); +} +EXPORT_SYMBOL_GPL(pci_iov_get_pf_drvdata); + /* * Per SR-IOV spec sec 3.3.10 and 3.3.11, First VF Offset and VF Stride may * change when NumVFs changes. diff --git a/drivers/vfio/pci/Kconfig b/drivers/vfio/pci/Kconfig index 860424ccda1b..4da1914425e1 100644 --- a/drivers/vfio/pci/Kconfig +++ b/drivers/vfio/pci/Kconfig @@ -43,4 +43,9 @@ config VFIO_PCI_IGD To enable Intel IGD assignment through vfio-pci, say Y. endif + +source "drivers/vfio/pci/mlx5/Kconfig" + +source "drivers/vfio/pci/hisilicon/Kconfig" + endif diff --git a/drivers/vfio/pci/Makefile b/drivers/vfio/pci/Makefile index 349d68d242b4..7052ebd893e0 100644 --- a/drivers/vfio/pci/Makefile +++ b/drivers/vfio/pci/Makefile @@ -7,3 +7,7 @@ obj-$(CONFIG_VFIO_PCI_CORE) += vfio-pci-core.o vfio-pci-y := vfio_pci.o vfio-pci-$(CONFIG_VFIO_PCI_IGD) += vfio_pci_igd.o obj-$(CONFIG_VFIO_PCI) += vfio-pci.o + +obj-$(CONFIG_MLX5_VFIO_PCI) += mlx5/ + +obj-$(CONFIG_HISI_ACC_VFIO_PCI) += hisilicon/ diff --git a/drivers/vfio/pci/hisilicon/Kconfig b/drivers/vfio/pci/hisilicon/Kconfig new file mode 100644 index 000000000000..5daa0f45d2f9 --- /dev/null +++ b/drivers/vfio/pci/hisilicon/Kconfig @@ -0,0 +1,15 @@ +# SPDX-License-Identifier: GPL-2.0-only +config HISI_ACC_VFIO_PCI + tristate "VFIO PCI support for HiSilicon ACC devices" + depends on ARM64 || (COMPILE_TEST && 64BIT) + depends on VFIO_PCI_CORE + depends on PCI_MSI + depends on CRYPTO_DEV_HISI_QM + depends on CRYPTO_DEV_HISI_HPRE + depends on CRYPTO_DEV_HISI_SEC2 + depends on CRYPTO_DEV_HISI_ZIP + help + This provides generic PCI support for HiSilicon ACC devices + using the VFIO framework. + + If you don't know what to do here, say N. diff --git a/drivers/vfio/pci/hisilicon/Makefile b/drivers/vfio/pci/hisilicon/Makefile new file mode 100644 index 000000000000..c66b3783f2f9 --- /dev/null +++ b/drivers/vfio/pci/hisilicon/Makefile @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only +obj-$(CONFIG_HISI_ACC_VFIO_PCI) += hisi-acc-vfio-pci.o +hisi-acc-vfio-pci-y := hisi_acc_vfio_pci.o + diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c new file mode 100644 index 000000000000..767b5d47631a --- /dev/null +++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c @@ -0,0 +1,1326 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2021, HiSilicon Ltd. + */ + +#include <linux/device.h> +#include <linux/eventfd.h> +#include <linux/file.h> +#include <linux/hisi_acc_qm.h> +#include <linux/interrupt.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/vfio.h> +#include <linux/vfio_pci_core.h> +#include <linux/anon_inodes.h> + +#include "hisi_acc_vfio_pci.h" + +/* return 0 on VM acc device ready, -ETIMEDOUT hardware timeout */ +static int qm_wait_dev_not_ready(struct hisi_qm *qm) +{ + u32 val; + + return readl_relaxed_poll_timeout(qm->io_base + QM_VF_STATE, + val, !(val & 0x1), MB_POLL_PERIOD_US, + MB_POLL_TIMEOUT_US); +} + +/* + * Each state Reg is checked 100 times, + * with a delay of 100 microseconds after each check + */ +static u32 qm_check_reg_state(struct hisi_qm *qm, u32 regs) +{ + int check_times = 0; + u32 state; + + state = readl(qm->io_base + regs); + while (state && check_times < ERROR_CHECK_TIMEOUT) { + udelay(CHECK_DELAY_TIME); + state = readl(qm->io_base + regs); + check_times++; + } + + return state; +} + +static int qm_read_regs(struct hisi_qm *qm, u32 reg_addr, + u32 *data, u8 nums) +{ + int i; + + if (nums < 1 || nums > QM_REGS_MAX_LEN) + return -EINVAL; + + for (i = 0; i < nums; i++) { + data[i] = readl(qm->io_base + reg_addr); + reg_addr += QM_REG_ADDR_OFFSET; + } + + return 0; +} + +static int qm_write_regs(struct hisi_qm *qm, u32 reg, + u32 *data, u8 nums) +{ + int i; + + if (nums < 1 || nums > QM_REGS_MAX_LEN) + return -EINVAL; + + for (i = 0; i < nums; i++) + writel(data[i], qm->io_base + reg + i * QM_REG_ADDR_OFFSET); + + return 0; +} + +static int qm_get_vft(struct hisi_qm *qm, u32 *base) +{ + u64 sqc_vft; + u32 qp_num; + int ret; + + ret = hisi_qm_mb(qm, QM_MB_CMD_SQC_VFT_V2, 0, 0, 1); + if (ret) + return ret; + + sqc_vft = readl(qm->io_base + QM_MB_CMD_DATA_ADDR_L) | + ((u64)readl(qm->io_base + QM_MB_CMD_DATA_ADDR_H) << + QM_XQC_ADDR_OFFSET); + *base = QM_SQC_VFT_BASE_MASK_V2 & (sqc_vft >> QM_SQC_VFT_BASE_SHIFT_V2); + qp_num = (QM_SQC_VFT_NUM_MASK_V2 & + (sqc_vft >> QM_SQC_VFT_NUM_SHIFT_V2)) + 1; + + return qp_num; +} + +static int qm_get_sqc(struct hisi_qm *qm, u64 *addr) +{ + int ret; + + ret = hisi_qm_mb(qm, QM_MB_CMD_SQC_BT, 0, 0, 1); + if (ret) + return ret; + + *addr = readl(qm->io_base + QM_MB_CMD_DATA_ADDR_L) | + ((u64)readl(qm->io_base + QM_MB_CMD_DATA_ADDR_H) << + QM_XQC_ADDR_OFFSET); + + return 0; +} + +static int qm_get_cqc(struct hisi_qm *qm, u64 *addr) +{ + int ret; + + ret = hisi_qm_mb(qm, QM_MB_CMD_CQC_BT, 0, 0, 1); + if (ret) + return ret; + + *addr = readl(qm->io_base + QM_MB_CMD_DATA_ADDR_L) | + ((u64)readl(qm->io_base + QM_MB_CMD_DATA_ADDR_H) << + QM_XQC_ADDR_OFFSET); + + return 0; +} + +static int qm_get_regs(struct hisi_qm *qm, struct acc_vf_data *vf_data) +{ + struct device *dev = &qm->pdev->dev; + int ret; + + ret = qm_read_regs(qm, QM_VF_AEQ_INT_MASK, &vf_data->aeq_int_mask, 1); + if (ret) { + dev_err(dev, "failed to read QM_VF_AEQ_INT_MASK\n"); + return ret; + } + + ret = qm_read_regs(qm, QM_VF_EQ_INT_MASK, &vf_data->eq_int_mask, 1); + if (ret) { + dev_err(dev, "failed to read QM_VF_EQ_INT_MASK\n"); + return ret; + } + + ret = qm_read_regs(qm, QM_IFC_INT_SOURCE_V, + &vf_data->ifc_int_source, 1); + if (ret) { + dev_err(dev, "failed to read QM_IFC_INT_SOURCE_V\n"); + return ret; + } + + ret = qm_read_regs(qm, QM_IFC_INT_MASK, &vf_data->ifc_int_mask, 1); + if (ret) { + dev_err(dev, "failed to read QM_IFC_INT_MASK\n"); + return ret; + } + + ret = qm_read_regs(qm, QM_IFC_INT_SET_V, &vf_data->ifc_int_set, 1); + if (ret) { + dev_err(dev, "failed to read QM_IFC_INT_SET_V\n"); + return ret; + } + + ret = qm_read_regs(qm, QM_PAGE_SIZE, &vf_data->page_size, 1); + if (ret) { + dev_err(dev, "failed to read QM_PAGE_SIZE\n"); + return ret; + } + + /* QM_EQC_DW has 7 regs */ + ret = qm_read_regs(qm, QM_EQC_DW0, vf_data->qm_eqc_dw, 7); + if (ret) { + dev_err(dev, "failed to read QM_EQC_DW\n"); + return ret; + } + + /* QM_AEQC_DW has 7 regs */ + ret = qm_read_regs(qm, QM_AEQC_DW0, vf_data->qm_aeqc_dw, 7); + if (ret) { + dev_err(dev, "failed to read QM_AEQC_DW\n"); + return ret; + } + + return 0; +} + +static int qm_set_regs(struct hisi_qm *qm, struct acc_vf_data *vf_data) +{ + struct device *dev = &qm->pdev->dev; + int ret; + + /* check VF state */ + if (unlikely(hisi_qm_wait_mb_ready(qm))) { + dev_err(&qm->pdev->dev, "QM device is not ready to write\n"); + return -EBUSY; + } + + ret = qm_write_regs(qm, QM_VF_AEQ_INT_MASK, &vf_data->aeq_int_mask, 1); + if (ret) { + dev_err(dev, "failed to write QM_VF_AEQ_INT_MASK\n"); + return ret; + } + + ret = qm_write_regs(qm, QM_VF_EQ_INT_MASK, &vf_data->eq_int_mask, 1); + if (ret) { + dev_err(dev, "failed to write QM_VF_EQ_INT_MASK\n"); + return ret; + } + + ret = qm_write_regs(qm, QM_IFC_INT_SOURCE_V, + &vf_data->ifc_int_source, 1); + if (ret) { + dev_err(dev, "failed to write QM_IFC_INT_SOURCE_V\n"); + return ret; + } + + ret = qm_write_regs(qm, QM_IFC_INT_MASK, &vf_data->ifc_int_mask, 1); + if (ret) { + dev_err(dev, "failed to write QM_IFC_INT_MASK\n"); + return ret; + } + + ret = qm_write_regs(qm, QM_IFC_INT_SET_V, &vf_data->ifc_int_set, 1); + if (ret) { + dev_err(dev, "failed to write QM_IFC_INT_SET_V\n"); + return ret; + } + + ret = qm_write_regs(qm, QM_QUE_ISO_CFG_V, &vf_data->que_iso_cfg, 1); + if (ret) { + dev_err(dev, "failed to write QM_QUE_ISO_CFG_V\n"); + return ret; + } + + ret = qm_write_regs(qm, QM_PAGE_SIZE, &vf_data->page_size, 1); + if (ret) { + dev_err(dev, "failed to write QM_PAGE_SIZE\n"); + return ret; + } + + /* QM_EQC_DW has 7 regs */ + ret = qm_write_regs(qm, QM_EQC_DW0, vf_data->qm_eqc_dw, 7); + if (ret) { + dev_err(dev, "failed to write QM_EQC_DW\n"); + return ret; + } + + /* QM_AEQC_DW has 7 regs */ + ret = qm_write_regs(qm, QM_AEQC_DW0, vf_data->qm_aeqc_dw, 7); + if (ret) { + dev_err(dev, "failed to write QM_AEQC_DW\n"); + return ret; + } + + return 0; +} + +static void qm_db(struct hisi_qm *qm, u16 qn, u8 cmd, + u16 index, u8 priority) +{ + u64 doorbell; + u64 dbase; + u16 randata = 0; + + if (cmd == QM_DOORBELL_CMD_SQ || cmd == QM_DOORBELL_CMD_CQ) + dbase = QM_DOORBELL_SQ_CQ_BASE_V2; + else + dbase = QM_DOORBELL_EQ_AEQ_BASE_V2; + + doorbell = qn | ((u64)cmd << QM_DB_CMD_SHIFT_V2) | + ((u64)randata << QM_DB_RAND_SHIFT_V2) | + ((u64)index << QM_DB_INDEX_SHIFT_V2) | + ((u64)priority << QM_DB_PRIORITY_SHIFT_V2); + + writeq(doorbell, qm->io_base + dbase); +} + +static int pf_qm_get_qp_num(struct hisi_qm *qm, int vf_id, u32 *rbase) +{ + unsigned int val; + u64 sqc_vft; + u32 qp_num; + int ret; + + ret = readl_relaxed_poll_timeout(qm->io_base + QM_VFT_CFG_RDY, val, + val & BIT(0), MB_POLL_PERIOD_US, + MB_POLL_TIMEOUT_US); + if (ret) + return ret; + + writel(0x1, qm->io_base + QM_VFT_CFG_OP_WR); + /* 0 mean SQC VFT */ + writel(0x0, qm->io_base + QM_VFT_CFG_TYPE); + writel(vf_id, qm->io_base + QM_VFT_CFG); + + writel(0x0, qm->io_base + QM_VFT_CFG_RDY); + writel(0x1, qm->io_base + QM_VFT_CFG_OP_ENABLE); + + ret = readl_relaxed_poll_timeout(qm->io_base + QM_VFT_CFG_RDY, val, + val & BIT(0), MB_POLL_PERIOD_US, + MB_POLL_TIMEOUT_US); + if (ret) + return ret; + + sqc_vft = readl(qm->io_base + QM_VFT_CFG_DATA_L) | + ((u64)readl(qm->io_base + QM_VFT_CFG_DATA_H) << + QM_XQC_ADDR_OFFSET); + *rbase = QM_SQC_VFT_BASE_MASK_V2 & + (sqc_vft >> QM_SQC_VFT_BASE_SHIFT_V2); + qp_num = (QM_SQC_VFT_NUM_MASK_V2 & + (sqc_vft >> QM_SQC_VFT_NUM_SHIFT_V2)) + 1; + + return qp_num; +} + +static void qm_dev_cmd_init(struct hisi_qm *qm) +{ + /* Clear VF communication status registers. */ + writel(0x1, qm->io_base + QM_IFC_INT_SOURCE_V); + + /* Enable pf and vf communication. */ + writel(0x0, qm->io_base + QM_IFC_INT_MASK); +} + +static int vf_qm_cache_wb(struct hisi_qm *qm) +{ + unsigned int val; + + writel(0x1, qm->io_base + QM_CACHE_WB_START); + if (readl_relaxed_poll_timeout(qm->io_base + QM_CACHE_WB_DONE, + val, val & BIT(0), MB_POLL_PERIOD_US, + MB_POLL_TIMEOUT_US)) { + dev_err(&qm->pdev->dev, "vf QM writeback sqc cache fail\n"); + return -EINVAL; + } + + return 0; +} + +static void vf_qm_fun_reset(struct hisi_acc_vf_core_device *hisi_acc_vdev, + struct hisi_qm *qm) +{ + int i; + + for (i = 0; i < qm->qp_num; i++) + qm_db(qm, i, QM_DOORBELL_CMD_SQ, 0, 1); +} + +static int vf_qm_func_stop(struct hisi_qm *qm) +{ + return hisi_qm_mb(qm, QM_MB_CMD_PAUSE_QM, 0, 0, 0); +} + +static int vf_qm_check_match(struct hisi_acc_vf_core_device *hisi_acc_vdev, + struct hisi_acc_vf_migration_file *migf) +{ + struct acc_vf_data *vf_data = &migf->vf_data; + struct hisi_qm *vf_qm = &hisi_acc_vdev->vf_qm; + struct hisi_qm *pf_qm = hisi_acc_vdev->pf_qm; + struct device *dev = &vf_qm->pdev->dev; + u32 que_iso_state; + int ret; + + if (migf->total_length < QM_MATCH_SIZE) + return -EINVAL; + + if (vf_data->acc_magic != ACC_DEV_MAGIC) { + dev_err(dev, "failed to match ACC_DEV_MAGIC\n"); + return -EINVAL; + } + + if (vf_data->dev_id != hisi_acc_vdev->vf_dev->device) { + dev_err(dev, "failed to match VF devices\n"); + return -EINVAL; + } + + /* vf qp num check */ + ret = qm_get_vft(vf_qm, &vf_qm->qp_base); + if (ret <= 0) { + dev_err(dev, "failed to get vft qp nums\n"); + return -EINVAL; + } + + if (ret != vf_data->qp_num) { + dev_err(dev, "failed to match VF qp num\n"); + return -EINVAL; + } + + vf_qm->qp_num = ret; + + /* vf isolation state check */ + ret = qm_read_regs(pf_qm, QM_QUE_ISO_CFG_V, &que_iso_state, 1); + if (ret) { + dev_err(dev, "failed to read QM_QUE_ISO_CFG_V\n"); + return ret; + } + + if (vf_data->que_iso_cfg != que_iso_state) { + dev_err(dev, "failed to match isolation state\n"); + return ret; + } + + ret = qm_write_regs(vf_qm, QM_VF_STATE, &vf_data->vf_qm_state, 1); + if (ret) { + dev_err(dev, "failed to write QM_VF_STATE\n"); + return ret; + } + + hisi_acc_vdev->vf_qm_state = vf_data->vf_qm_state; + return 0; +} + +static int vf_qm_get_match_data(struct hisi_acc_vf_core_device *hisi_acc_vdev, + struct acc_vf_data *vf_data) +{ + struct hisi_qm *pf_qm = hisi_acc_vdev->pf_qm; + struct device *dev = &pf_qm->pdev->dev; + int vf_id = hisi_acc_vdev->vf_id; + int ret; + + vf_data->acc_magic = ACC_DEV_MAGIC; + /* save device id */ + vf_data->dev_id = hisi_acc_vdev->vf_dev->device; + + /* vf qp num save from PF */ + ret = pf_qm_get_qp_num(pf_qm, vf_id, &vf_data->qp_base); + if (ret <= 0) { + dev_err(dev, "failed to get vft qp nums!\n"); + return -EINVAL; + } + + vf_data->qp_num = ret; + + /* VF isolation state save from PF */ + ret = qm_read_regs(pf_qm, QM_QUE_ISO_CFG_V, &vf_data->que_iso_cfg, 1); + if (ret) { + dev_err(dev, "failed to read QM_QUE_ISO_CFG_V!\n"); + return ret; + } + + return 0; +} + +static int vf_qm_load_data(struct hisi_acc_vf_core_device *hisi_acc_vdev, + struct hisi_acc_vf_migration_file *migf) +{ + struct hisi_qm *qm = &hisi_acc_vdev->vf_qm; + struct device *dev = &qm->pdev->dev; + struct acc_vf_data *vf_data = &migf->vf_data; + int ret; + + /* Return if only match data was transferred */ + if (migf->total_length == QM_MATCH_SIZE) + return 0; + + if (migf->total_length < sizeof(struct acc_vf_data)) + return -EINVAL; + + qm->eqe_dma = vf_data->eqe_dma; + qm->aeqe_dma = vf_data->aeqe_dma; + qm->sqc_dma = vf_data->sqc_dma; + qm->cqc_dma = vf_data->cqc_dma; + + qm->qp_base = vf_data->qp_base; + qm->qp_num = vf_data->qp_num; + + ret = qm_set_regs(qm, vf_data); + if (ret) { + dev_err(dev, "Set VF regs failed\n"); + return ret; + } + + ret = hisi_qm_mb(qm, QM_MB_CMD_SQC_BT, qm->sqc_dma, 0, 0); + if (ret) { + dev_err(dev, "Set sqc failed\n"); + return ret; + } + + ret = hisi_qm_mb(qm, QM_MB_CMD_CQC_BT, qm->cqc_dma, 0, 0); + if (ret) { + dev_err(dev, "Set cqc failed\n"); + return ret; + } + + qm_dev_cmd_init(qm); + return 0; +} + +static int vf_qm_state_save(struct hisi_acc_vf_core_device *hisi_acc_vdev, + struct hisi_acc_vf_migration_file *migf) +{ + struct acc_vf_data *vf_data = &migf->vf_data; + struct hisi_qm *vf_qm = &hisi_acc_vdev->vf_qm; + struct device *dev = &vf_qm->pdev->dev; + int ret; + + ret = vf_qm_get_match_data(hisi_acc_vdev, vf_data); + if (ret) + return ret; + + if (unlikely(qm_wait_dev_not_ready(vf_qm))) { + /* Update state and return with match data */ + vf_data->vf_qm_state = QM_NOT_READY; + hisi_acc_vdev->vf_qm_state = vf_data->vf_qm_state; + migf->total_length = QM_MATCH_SIZE; + return 0; + } + + vf_data->vf_qm_state = QM_READY; + hisi_acc_vdev->vf_qm_state = vf_data->vf_qm_state; + + ret = vf_qm_cache_wb(vf_qm); + if (ret) { + dev_err(dev, "failed to writeback QM Cache!\n"); + return ret; + } + + ret = qm_get_regs(vf_qm, vf_data); + if (ret) + return -EINVAL; + + /* Every reg is 32 bit, the dma address is 64 bit. */ + vf_data->eqe_dma = vf_data->qm_eqc_dw[2]; + vf_data->eqe_dma <<= QM_XQC_ADDR_OFFSET; + vf_data->eqe_dma |= vf_data->qm_eqc_dw[1]; + vf_data->aeqe_dma = vf_data->qm_aeqc_dw[2]; + vf_data->aeqe_dma <<= QM_XQC_ADDR_OFFSET; + vf_data->aeqe_dma |= vf_data->qm_aeqc_dw[1]; + + /* Through SQC_BT/CQC_BT to get sqc and cqc address */ + ret = qm_get_sqc(vf_qm, &vf_data->sqc_dma); + if (ret) { + dev_err(dev, "failed to read SQC addr!\n"); + return -EINVAL; + } + + ret = qm_get_cqc(vf_qm, &vf_data->cqc_dma); + if (ret) { + dev_err(dev, "failed to read CQC addr!\n"); + return -EINVAL; + } + + migf->total_length = sizeof(struct acc_vf_data); + return 0; +} + +/* Check the PF's RAS state and Function INT state */ +static int +hisi_acc_check_int_state(struct hisi_acc_vf_core_device *hisi_acc_vdev) +{ + struct hisi_qm *vfqm = &hisi_acc_vdev->vf_qm; + struct hisi_qm *qm = hisi_acc_vdev->pf_qm; + struct pci_dev *vf_pdev = hisi_acc_vdev->vf_dev; + struct device *dev = &qm->pdev->dev; + u32 state; + + /* Check RAS state */ + state = qm_check_reg_state(qm, QM_ABNORMAL_INT_STATUS); + if (state) { + dev_err(dev, "failed to check QM RAS state!\n"); + return -EBUSY; + } + + /* Check Function Communication state between PF and VF */ + state = qm_check_reg_state(vfqm, QM_IFC_INT_STATUS); + if (state) { + dev_err(dev, "failed to check QM IFC INT state!\n"); + return -EBUSY; + } + state = qm_check_reg_state(vfqm, QM_IFC_INT_SET_V); + if (state) { + dev_err(dev, "failed to check QM IFC INT SET state!\n"); + return -EBUSY; + } + + /* Check submodule task state */ + switch (vf_pdev->device) { + case PCI_DEVICE_ID_HUAWEI_SEC_VF: + state = qm_check_reg_state(qm, SEC_CORE_INT_STATUS); + if (state) { + dev_err(dev, "failed to check QM SEC Core INT state!\n"); + return -EBUSY; + } + return 0; + case PCI_DEVICE_ID_HUAWEI_HPRE_VF: + state = qm_check_reg_state(qm, HPRE_HAC_INT_STATUS); + if (state) { + dev_err(dev, "failed to check QM HPRE HAC INT state!\n"); + return -EBUSY; + } + return 0; + case PCI_DEVICE_ID_HUAWEI_ZIP_VF: + state = qm_check_reg_state(qm, HZIP_CORE_INT_STATUS); + if (state) { + dev_err(dev, "failed to check QM ZIP Core INT state!\n"); + return -EBUSY; + } + return 0; + default: + dev_err(dev, "failed to detect acc module type!\n"); + return -EINVAL; + } +} + +static void hisi_acc_vf_disable_fd(struct hisi_acc_vf_migration_file *migf) +{ + mutex_lock(&migf->lock); + migf->disabled = true; + migf->total_length = 0; + migf->filp->f_pos = 0; + mutex_unlock(&migf->lock); +} + +static void hisi_acc_vf_disable_fds(struct hisi_acc_vf_core_device *hisi_acc_vdev) +{ + if (hisi_acc_vdev->resuming_migf) { + hisi_acc_vf_disable_fd(hisi_acc_vdev->resuming_migf); + fput(hisi_acc_vdev->resuming_migf->filp); + hisi_acc_vdev->resuming_migf = NULL; + } + + if (hisi_acc_vdev->saving_migf) { + hisi_acc_vf_disable_fd(hisi_acc_vdev->saving_migf); + fput(hisi_acc_vdev->saving_migf->filp); + hisi_acc_vdev->saving_migf = NULL; + } +} + +/* + * This function is called in all state_mutex unlock cases to + * handle a 'deferred_reset' if exists. + */ +static void +hisi_acc_vf_state_mutex_unlock(struct hisi_acc_vf_core_device *hisi_acc_vdev) +{ +again: + spin_lock(&hisi_acc_vdev->reset_lock); + if (hisi_acc_vdev->deferred_reset) { + hisi_acc_vdev->deferred_reset = false; + spin_unlock(&hisi_acc_vdev->reset_lock); + hisi_acc_vdev->vf_qm_state = QM_NOT_READY; + hisi_acc_vdev->mig_state = VFIO_DEVICE_STATE_RUNNING; + hisi_acc_vf_disable_fds(hisi_acc_vdev); + goto again; + } + mutex_unlock(&hisi_acc_vdev->state_mutex); + spin_unlock(&hisi_acc_vdev->reset_lock); +} + +static void hisi_acc_vf_start_device(struct hisi_acc_vf_core_device *hisi_acc_vdev) +{ + struct hisi_qm *vf_qm = &hisi_acc_vdev->vf_qm; + + if (hisi_acc_vdev->vf_qm_state != QM_READY) + return; + + vf_qm_fun_reset(hisi_acc_vdev, vf_qm); +} + +static int hisi_acc_vf_load_state(struct hisi_acc_vf_core_device *hisi_acc_vdev) +{ + struct device *dev = &hisi_acc_vdev->vf_dev->dev; + struct hisi_acc_vf_migration_file *migf = hisi_acc_vdev->resuming_migf; + int ret; + + /* Check dev compatibility */ + ret = vf_qm_check_match(hisi_acc_vdev, migf); + if (ret) { + dev_err(dev, "failed to match the VF!\n"); + return ret; + } + /* Recover data to VF */ + ret = vf_qm_load_data(hisi_acc_vdev, migf); + if (ret) { + dev_err(dev, "failed to recover the VF!\n"); + return ret; + } + + return 0; +} + +static int hisi_acc_vf_release_file(struct inode *inode, struct file *filp) +{ + struct hisi_acc_vf_migration_file *migf = filp->private_data; + + hisi_acc_vf_disable_fd(migf); + mutex_destroy(&migf->lock); + kfree(migf); + return 0; +} + +static ssize_t hisi_acc_vf_resume_write(struct file *filp, const char __user *buf, + size_t len, loff_t *pos) +{ + struct hisi_acc_vf_migration_file *migf = filp->private_data; + loff_t requested_length; + ssize_t done = 0; + int ret; + + if (pos) + return -ESPIPE; + pos = &filp->f_pos; + + if (*pos < 0 || + check_add_overflow((loff_t)len, *pos, &requested_length)) + return -EINVAL; + + if (requested_length > sizeof(struct acc_vf_data)) + return -ENOMEM; + + mutex_lock(&migf->lock); + if (migf->disabled) { + done = -ENODEV; + goto out_unlock; + } + + ret = copy_from_user(&migf->vf_data, buf, len); + if (ret) { + done = -EFAULT; + goto out_unlock; + } + *pos += len; + done = len; + migf->total_length += len; +out_unlock: + mutex_unlock(&migf->lock); + return done; +} + +static const struct file_operations hisi_acc_vf_resume_fops = { + .owner = THIS_MODULE, + .write = hisi_acc_vf_resume_write, + .release = hisi_acc_vf_release_file, + .llseek = no_llseek, +}; + +static struct hisi_acc_vf_migration_file * +hisi_acc_vf_pci_resume(struct hisi_acc_vf_core_device *hisi_acc_vdev) +{ + struct hisi_acc_vf_migration_file *migf; + + migf = kzalloc(sizeof(*migf), GFP_KERNEL); + if (!migf) + return ERR_PTR(-ENOMEM); + + migf->filp = anon_inode_getfile("hisi_acc_vf_mig", &hisi_acc_vf_resume_fops, migf, + O_WRONLY); + if (IS_ERR(migf->filp)) { + int err = PTR_ERR(migf->filp); + + kfree(migf); + return ERR_PTR(err); + } + + stream_open(migf->filp->f_inode, migf->filp); + mutex_init(&migf->lock); + return migf; +} + +static ssize_t hisi_acc_vf_save_read(struct file *filp, char __user *buf, size_t len, + loff_t *pos) +{ + struct hisi_acc_vf_migration_file *migf = filp->private_data; + ssize_t done = 0; + int ret; + + if (pos) + return -ESPIPE; + pos = &filp->f_pos; + + mutex_lock(&migf->lock); + if (*pos > migf->total_length) { + done = -EINVAL; + goto out_unlock; + } + + if (migf->disabled) { + done = -ENODEV; + goto out_unlock; + } + + len = min_t(size_t, migf->total_length - *pos, len); + if (len) { + ret = copy_to_user(buf, &migf->vf_data, len); + if (ret) { + done = -EFAULT; + goto out_unlock; + } + *pos += len; + done = len; + } +out_unlock: + mutex_unlock(&migf->lock); + return done; +} + +static const struct file_operations hisi_acc_vf_save_fops = { + .owner = THIS_MODULE, + .read = hisi_acc_vf_save_read, + .release = hisi_acc_vf_release_file, + .llseek = no_llseek, +}; + +static struct hisi_acc_vf_migration_file * +hisi_acc_vf_stop_copy(struct hisi_acc_vf_core_device *hisi_acc_vdev) +{ + struct hisi_acc_vf_migration_file *migf; + int ret; + + migf = kzalloc(sizeof(*migf), GFP_KERNEL); + if (!migf) + return ERR_PTR(-ENOMEM); + + migf->filp = anon_inode_getfile("hisi_acc_vf_mig", &hisi_acc_vf_save_fops, migf, + O_RDONLY); + if (IS_ERR(migf->filp)) { + int err = PTR_ERR(migf->filp); + + kfree(migf); + return ERR_PTR(err); + } + + stream_open(migf->filp->f_inode, migf->filp); + mutex_init(&migf->lock); + + ret = vf_qm_state_save(hisi_acc_vdev, migf); + if (ret) { + fput(migf->filp); + return ERR_PTR(ret); + } + + return migf; +} + +static int hisi_acc_vf_stop_device(struct hisi_acc_vf_core_device *hisi_acc_vdev) +{ + struct device *dev = &hisi_acc_vdev->vf_dev->dev; + struct hisi_qm *vf_qm = &hisi_acc_vdev->vf_qm; + int ret; + + ret = vf_qm_func_stop(vf_qm); + if (ret) { + dev_err(dev, "failed to stop QM VF function!\n"); + return ret; + } + + ret = hisi_acc_check_int_state(hisi_acc_vdev); + if (ret) { + dev_err(dev, "failed to check QM INT state!\n"); + return ret; + } + return 0; +} + +static struct file * +hisi_acc_vf_set_device_state(struct hisi_acc_vf_core_device *hisi_acc_vdev, + u32 new) +{ + u32 cur = hisi_acc_vdev->mig_state; + int ret; + + if (cur == VFIO_DEVICE_STATE_RUNNING && new == VFIO_DEVICE_STATE_STOP) { + ret = hisi_acc_vf_stop_device(hisi_acc_vdev); + if (ret) + return ERR_PTR(ret); + return NULL; + } + + if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_STOP_COPY) { + struct hisi_acc_vf_migration_file *migf; + + migf = hisi_acc_vf_stop_copy(hisi_acc_vdev); + if (IS_ERR(migf)) + return ERR_CAST(migf); + get_file(migf->filp); + hisi_acc_vdev->saving_migf = migf; + return migf->filp; + } + + if ((cur == VFIO_DEVICE_STATE_STOP_COPY && new == VFIO_DEVICE_STATE_STOP)) { + hisi_acc_vf_disable_fds(hisi_acc_vdev); + return NULL; + } + + if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_RESUMING) { + struct hisi_acc_vf_migration_file *migf; + + migf = hisi_acc_vf_pci_resume(hisi_acc_vdev); + if (IS_ERR(migf)) + return ERR_CAST(migf); + get_file(migf->filp); + hisi_acc_vdev->resuming_migf = migf; + return migf->filp; + } + + if (cur == VFIO_DEVICE_STATE_RESUMING && new == VFIO_DEVICE_STATE_STOP) { + ret = hisi_acc_vf_load_state(hisi_acc_vdev); + if (ret) + return ERR_PTR(ret); + hisi_acc_vf_disable_fds(hisi_acc_vdev); + return NULL; + } + + if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_RUNNING) { + hisi_acc_vf_start_device(hisi_acc_vdev); + return NULL; + } + + /* + * vfio_mig_get_next_state() does not use arcs other than the above + */ + WARN_ON(true); + return ERR_PTR(-EINVAL); +} + +static struct file * +hisi_acc_vfio_pci_set_device_state(struct vfio_device *vdev, + enum vfio_device_mig_state new_state) +{ + struct hisi_acc_vf_core_device *hisi_acc_vdev = container_of(vdev, + struct hisi_acc_vf_core_device, core_device.vdev); + enum vfio_device_mig_state next_state; + struct file *res = NULL; + int ret; + + mutex_lock(&hisi_acc_vdev->state_mutex); + while (new_state != hisi_acc_vdev->mig_state) { + ret = vfio_mig_get_next_state(vdev, + hisi_acc_vdev->mig_state, + new_state, &next_state); + if (ret) { + res = ERR_PTR(-EINVAL); + break; + } + + res = hisi_acc_vf_set_device_state(hisi_acc_vdev, next_state); + if (IS_ERR(res)) + break; + hisi_acc_vdev->mig_state = next_state; + if (WARN_ON(res && new_state != hisi_acc_vdev->mig_state)) { + fput(res); + res = ERR_PTR(-EINVAL); + break; + } + } + hisi_acc_vf_state_mutex_unlock(hisi_acc_vdev); + return res; +} + +static int +hisi_acc_vfio_pci_get_device_state(struct vfio_device *vdev, + enum vfio_device_mig_state *curr_state) +{ + struct hisi_acc_vf_core_device *hisi_acc_vdev = container_of(vdev, + struct hisi_acc_vf_core_device, core_device.vdev); + + mutex_lock(&hisi_acc_vdev->state_mutex); + *curr_state = hisi_acc_vdev->mig_state; + hisi_acc_vf_state_mutex_unlock(hisi_acc_vdev); + return 0; +} + +static void hisi_acc_vf_pci_aer_reset_done(struct pci_dev *pdev) +{ + struct hisi_acc_vf_core_device *hisi_acc_vdev = dev_get_drvdata(&pdev->dev); + + if (hisi_acc_vdev->core_device.vdev.migration_flags != + VFIO_MIGRATION_STOP_COPY) + return; + + /* + * As the higher VFIO layers are holding locks across reset and using + * those same locks with the mm_lock we need to prevent ABBA deadlock + * with the state_mutex and mm_lock. + * In case the state_mutex was taken already we defer the cleanup work + * to the unlock flow of the other running context. + */ + spin_lock(&hisi_acc_vdev->reset_lock); + hisi_acc_vdev->deferred_reset = true; + if (!mutex_trylock(&hisi_acc_vdev->state_mutex)) { + spin_unlock(&hisi_acc_vdev->reset_lock); + return; + } + spin_unlock(&hisi_acc_vdev->reset_lock); + hisi_acc_vf_state_mutex_unlock(hisi_acc_vdev); +} + +static int hisi_acc_vf_qm_init(struct hisi_acc_vf_core_device *hisi_acc_vdev) +{ + struct vfio_pci_core_device *vdev = &hisi_acc_vdev->core_device; + struct hisi_qm *vf_qm = &hisi_acc_vdev->vf_qm; + struct pci_dev *vf_dev = vdev->pdev; + + /* + * ACC VF dev BAR2 region consists of both functional register space + * and migration control register space. For migration to work, we + * need access to both. Hence, we map the entire BAR2 region here. + * But unnecessarily exposing the migration BAR region to the Guest + * has the potential to prevent/corrupt the Guest migration. Hence, + * we restrict access to the migration control space from + * Guest(Please see mmap/ioctl/read/write override functions). + * + * Please note that it is OK to expose the entire VF BAR if migration + * is not supported or required as this cannot affect the ACC PF + * configurations. + * + * Also the HiSilicon ACC VF devices supported by this driver on + * HiSilicon hardware platforms are integrated end point devices + * and the platform lacks the capability to perform any PCIe P2P + * between these devices. + */ + + vf_qm->io_base = + ioremap(pci_resource_start(vf_dev, VFIO_PCI_BAR2_REGION_INDEX), + pci_resource_len(vf_dev, VFIO_PCI_BAR2_REGION_INDEX)); + if (!vf_qm->io_base) + return -EIO; + + vf_qm->fun_type = QM_HW_VF; + vf_qm->pdev = vf_dev; + mutex_init(&vf_qm->mailbox_lock); + + return 0; +} + +static struct hisi_qm *hisi_acc_get_pf_qm(struct pci_dev *pdev) +{ + struct hisi_qm *pf_qm; + struct pci_driver *pf_driver; + + if (!pdev->is_virtfn) + return NULL; + + switch (pdev->device) { + case PCI_DEVICE_ID_HUAWEI_SEC_VF: + pf_driver = hisi_sec_get_pf_driver(); + break; + case PCI_DEVICE_ID_HUAWEI_HPRE_VF: + pf_driver = hisi_hpre_get_pf_driver(); + break; + case PCI_DEVICE_ID_HUAWEI_ZIP_VF: + pf_driver = hisi_zip_get_pf_driver(); + break; + default: + return NULL; + } + + if (!pf_driver) + return NULL; + + pf_qm = pci_iov_get_pf_drvdata(pdev, pf_driver); + + return !IS_ERR(pf_qm) ? pf_qm : NULL; +} + +static int hisi_acc_pci_rw_access_check(struct vfio_device *core_vdev, + size_t count, loff_t *ppos, + size_t *new_count) +{ + unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos); + struct vfio_pci_core_device *vdev = + container_of(core_vdev, struct vfio_pci_core_device, vdev); + + if (index == VFIO_PCI_BAR2_REGION_INDEX) { + loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK; + resource_size_t end = pci_resource_len(vdev->pdev, index) / 2; + + /* Check if access is for migration control region */ + if (pos >= end) + return -EINVAL; + + *new_count = min(count, (size_t)(end - pos)); + } + + return 0; +} + +static int hisi_acc_vfio_pci_mmap(struct vfio_device *core_vdev, + struct vm_area_struct *vma) +{ + struct vfio_pci_core_device *vdev = + container_of(core_vdev, struct vfio_pci_core_device, vdev); + unsigned int index; + + index = vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT); + if (index == VFIO_PCI_BAR2_REGION_INDEX) { + u64 req_len, pgoff, req_start; + resource_size_t end = pci_resource_len(vdev->pdev, index) / 2; + + req_len = vma->vm_end - vma->vm_start; + pgoff = vma->vm_pgoff & + ((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1); + req_start = pgoff << PAGE_SHIFT; + + if (req_start + req_len > end) + return -EINVAL; + } + + return vfio_pci_core_mmap(core_vdev, vma); +} + +static ssize_t hisi_acc_vfio_pci_write(struct vfio_device *core_vdev, + const char __user *buf, size_t count, + loff_t *ppos) +{ + size_t new_count = count; + int ret; + + ret = hisi_acc_pci_rw_access_check(core_vdev, count, ppos, &new_count); + if (ret) + return ret; + + return vfio_pci_core_write(core_vdev, buf, new_count, ppos); +} + +static ssize_t hisi_acc_vfio_pci_read(struct vfio_device *core_vdev, + char __user *buf, size_t count, + loff_t *ppos) +{ + size_t new_count = count; + int ret; + + ret = hisi_acc_pci_rw_access_check(core_vdev, count, ppos, &new_count); + if (ret) + return ret; + + return vfio_pci_core_read(core_vdev, buf, new_count, ppos); +} + +static long hisi_acc_vfio_pci_ioctl(struct vfio_device *core_vdev, unsigned int cmd, + unsigned long arg) +{ + if (cmd == VFIO_DEVICE_GET_REGION_INFO) { + struct vfio_pci_core_device *vdev = + container_of(core_vdev, struct vfio_pci_core_device, vdev); + struct pci_dev *pdev = vdev->pdev; + struct vfio_region_info info; + unsigned long minsz; + + minsz = offsetofend(struct vfio_region_info, offset); + + if (copy_from_user(&info, (void __user *)arg, minsz)) + return -EFAULT; + + if (info.argsz < minsz) + return -EINVAL; + + if (info.index == VFIO_PCI_BAR2_REGION_INDEX) { + info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index); + + /* + * ACC VF dev BAR2 region consists of both functional + * register space and migration control register space. + * Report only the functional region to Guest. + */ + info.size = pci_resource_len(pdev, info.index) / 2; + + info.flags = VFIO_REGION_INFO_FLAG_READ | + VFIO_REGION_INFO_FLAG_WRITE | + VFIO_REGION_INFO_FLAG_MMAP; + + return copy_to_user((void __user *)arg, &info, minsz) ? + -EFAULT : 0; + } + } + return vfio_pci_core_ioctl(core_vdev, cmd, arg); +} + +static int hisi_acc_vfio_pci_open_device(struct vfio_device *core_vdev) +{ + struct hisi_acc_vf_core_device *hisi_acc_vdev = container_of(core_vdev, + struct hisi_acc_vf_core_device, core_device.vdev); + struct vfio_pci_core_device *vdev = &hisi_acc_vdev->core_device; + int ret; + + ret = vfio_pci_core_enable(vdev); + if (ret) + return ret; + + if (core_vdev->ops->migration_set_state) { + ret = hisi_acc_vf_qm_init(hisi_acc_vdev); + if (ret) { + vfio_pci_core_disable(vdev); + return ret; + } + hisi_acc_vdev->mig_state = VFIO_DEVICE_STATE_RUNNING; + } + + vfio_pci_core_finish_enable(vdev); + return 0; +} + +static void hisi_acc_vfio_pci_close_device(struct vfio_device *core_vdev) +{ + struct hisi_acc_vf_core_device *hisi_acc_vdev = container_of(core_vdev, + struct hisi_acc_vf_core_device, core_device.vdev); + struct hisi_qm *vf_qm = &hisi_acc_vdev->vf_qm; + + iounmap(vf_qm->io_base); + vfio_pci_core_close_device(core_vdev); +} + +static const struct vfio_device_ops hisi_acc_vfio_pci_migrn_ops = { + .name = "hisi-acc-vfio-pci-migration", + .open_device = hisi_acc_vfio_pci_open_device, + .close_device = hisi_acc_vfio_pci_close_device, + .ioctl = hisi_acc_vfio_pci_ioctl, + .device_feature = vfio_pci_core_ioctl_feature, + .read = hisi_acc_vfio_pci_read, + .write = hisi_acc_vfio_pci_write, + .mmap = hisi_acc_vfio_pci_mmap, + .request = vfio_pci_core_request, + .match = vfio_pci_core_match, + .migration_set_state = hisi_acc_vfio_pci_set_device_state, + .migration_get_state = hisi_acc_vfio_pci_get_device_state, +}; + +static const struct vfio_device_ops hisi_acc_vfio_pci_ops = { + .name = "hisi-acc-vfio-pci", + .open_device = hisi_acc_vfio_pci_open_device, + .close_device = vfio_pci_core_close_device, + .ioctl = vfio_pci_core_ioctl, + .device_feature = vfio_pci_core_ioctl_feature, + .read = vfio_pci_core_read, + .write = vfio_pci_core_write, + .mmap = vfio_pci_core_mmap, + .request = vfio_pci_core_request, + .match = vfio_pci_core_match, +}; + +static int +hisi_acc_vfio_pci_migrn_init(struct hisi_acc_vf_core_device *hisi_acc_vdev, + struct pci_dev *pdev, struct hisi_qm *pf_qm) +{ + int vf_id; + + vf_id = pci_iov_vf_id(pdev); + if (vf_id < 0) + return vf_id; + + hisi_acc_vdev->vf_id = vf_id + 1; + hisi_acc_vdev->core_device.vdev.migration_flags = + VFIO_MIGRATION_STOP_COPY; + hisi_acc_vdev->pf_qm = pf_qm; + hisi_acc_vdev->vf_dev = pdev; + mutex_init(&hisi_acc_vdev->state_mutex); + + return 0; +} + +static int hisi_acc_vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + struct hisi_acc_vf_core_device *hisi_acc_vdev; + struct hisi_qm *pf_qm; + int ret; + + hisi_acc_vdev = kzalloc(sizeof(*hisi_acc_vdev), GFP_KERNEL); + if (!hisi_acc_vdev) + return -ENOMEM; + + pf_qm = hisi_acc_get_pf_qm(pdev); + if (pf_qm && pf_qm->ver >= QM_HW_V3) { + ret = hisi_acc_vfio_pci_migrn_init(hisi_acc_vdev, pdev, pf_qm); + if (!ret) { + vfio_pci_core_init_device(&hisi_acc_vdev->core_device, pdev, + &hisi_acc_vfio_pci_migrn_ops); + } else { + pci_warn(pdev, "migration support failed, continue with generic interface\n"); + vfio_pci_core_init_device(&hisi_acc_vdev->core_device, pdev, + &hisi_acc_vfio_pci_ops); + } + } else { + vfio_pci_core_init_device(&hisi_acc_vdev->core_device, pdev, + &hisi_acc_vfio_pci_ops); + } + + ret = vfio_pci_core_register_device(&hisi_acc_vdev->core_device); + if (ret) + goto out_free; + + dev_set_drvdata(&pdev->dev, hisi_acc_vdev); + return 0; + +out_free: + vfio_pci_core_uninit_device(&hisi_acc_vdev->core_device); + kfree(hisi_acc_vdev); + return ret; +} + +static void hisi_acc_vfio_pci_remove(struct pci_dev *pdev) +{ + struct hisi_acc_vf_core_device *hisi_acc_vdev = dev_get_drvdata(&pdev->dev); + + vfio_pci_core_unregister_device(&hisi_acc_vdev->core_device); + vfio_pci_core_uninit_device(&hisi_acc_vdev->core_device); + kfree(hisi_acc_vdev); +} + +static const struct pci_device_id hisi_acc_vfio_pci_table[] = { + { PCI_DRIVER_OVERRIDE_DEVICE_VFIO(PCI_VENDOR_ID_HUAWEI, PCI_DEVICE_ID_HUAWEI_SEC_VF) }, + { PCI_DRIVER_OVERRIDE_DEVICE_VFIO(PCI_VENDOR_ID_HUAWEI, PCI_DEVICE_ID_HUAWEI_HPRE_VF) }, + { PCI_DRIVER_OVERRIDE_DEVICE_VFIO(PCI_VENDOR_ID_HUAWEI, PCI_DEVICE_ID_HUAWEI_ZIP_VF) }, + { } +}; + +MODULE_DEVICE_TABLE(pci, hisi_acc_vfio_pci_table); + +static const struct pci_error_handlers hisi_acc_vf_err_handlers = { + .reset_done = hisi_acc_vf_pci_aer_reset_done, + .error_detected = vfio_pci_core_aer_err_detected, +}; + +static struct pci_driver hisi_acc_vfio_pci_driver = { + .name = KBUILD_MODNAME, + .id_table = hisi_acc_vfio_pci_table, + .probe = hisi_acc_vfio_pci_probe, + .remove = hisi_acc_vfio_pci_remove, + .err_handler = &hisi_acc_vf_err_handlers, +}; + +module_pci_driver(hisi_acc_vfio_pci_driver); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Liu Longfang <liulongfang@huawei.com>"); +MODULE_AUTHOR("Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>"); +MODULE_DESCRIPTION("HiSilicon VFIO PCI - VFIO PCI driver with live migration support for HiSilicon ACC device family"); diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h new file mode 100644 index 000000000000..5494f4983bbe --- /dev/null +++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h @@ -0,0 +1,116 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2021 HiSilicon Ltd. */ + +#ifndef HISI_ACC_VFIO_PCI_H +#define HISI_ACC_VFIO_PCI_H + +#include <linux/hisi_acc_qm.h> + +#define MB_POLL_PERIOD_US 10 +#define MB_POLL_TIMEOUT_US 1000 +#define QM_CACHE_WB_START 0x204 +#define QM_CACHE_WB_DONE 0x208 +#define QM_MB_CMD_PAUSE_QM 0xe +#define QM_ABNORMAL_INT_STATUS 0x100008 +#define QM_IFC_INT_STATUS 0x0028 +#define SEC_CORE_INT_STATUS 0x301008 +#define HPRE_HAC_INT_STATUS 0x301800 +#define HZIP_CORE_INT_STATUS 0x3010AC +#define QM_QUE_ISO_CFG 0x301154 + +#define QM_VFT_CFG_RDY 0x10006c +#define QM_VFT_CFG_OP_WR 0x100058 +#define QM_VFT_CFG_TYPE 0x10005c +#define QM_VFT_CFG 0x100060 +#define QM_VFT_CFG_OP_ENABLE 0x100054 +#define QM_VFT_CFG_DATA_L 0x100064 +#define QM_VFT_CFG_DATA_H 0x100068 + +#define ERROR_CHECK_TIMEOUT 100 +#define CHECK_DELAY_TIME 100 + +#define QM_SQC_VFT_BASE_SHIFT_V2 28 +#define QM_SQC_VFT_BASE_MASK_V2 GENMASK(15, 0) +#define QM_SQC_VFT_NUM_SHIFT_V2 45 +#define QM_SQC_VFT_NUM_MASK_V2 GENMASK(9, 0) + +/* RW regs */ +#define QM_REGS_MAX_LEN 7 +#define QM_REG_ADDR_OFFSET 0x0004 + +#define QM_XQC_ADDR_OFFSET 32U +#define QM_VF_AEQ_INT_MASK 0x0004 +#define QM_VF_EQ_INT_MASK 0x000c +#define QM_IFC_INT_SOURCE_V 0x0020 +#define QM_IFC_INT_MASK 0x0024 +#define QM_IFC_INT_SET_V 0x002c +#define QM_QUE_ISO_CFG_V 0x0030 +#define QM_PAGE_SIZE 0x0034 + +#define QM_EQC_DW0 0X8000 +#define QM_AEQC_DW0 0X8020 + +struct acc_vf_data { +#define QM_MATCH_SIZE offsetofend(struct acc_vf_data, qm_rsv_state) + /* QM match information */ +#define ACC_DEV_MAGIC 0XCDCDCDCDFEEDAACC + u64 acc_magic; + u32 qp_num; + u32 dev_id; + u32 que_iso_cfg; + u32 qp_base; + u32 vf_qm_state; + /* QM reserved match information */ + u32 qm_rsv_state[3]; + + /* QM RW regs */ + u32 aeq_int_mask; + u32 eq_int_mask; + u32 ifc_int_source; + u32 ifc_int_mask; + u32 ifc_int_set; + u32 page_size; + + /* QM_EQC_DW has 7 regs */ + u32 qm_eqc_dw[7]; + + /* QM_AEQC_DW has 7 regs */ + u32 qm_aeqc_dw[7]; + + /* QM reserved 5 regs */ + u32 qm_rsv_regs[5]; + u32 padding; + /* qm memory init information */ + u64 eqe_dma; + u64 aeqe_dma; + u64 sqc_dma; + u64 cqc_dma; +}; + +struct hisi_acc_vf_migration_file { + struct file *filp; + struct mutex lock; + bool disabled; + + struct acc_vf_data vf_data; + size_t total_length; +}; + +struct hisi_acc_vf_core_device { + struct vfio_pci_core_device core_device; + u8 deferred_reset:1; + /* for migration state */ + struct mutex state_mutex; + enum vfio_device_mig_state mig_state; + struct pci_dev *pf_dev; + struct pci_dev *vf_dev; + struct hisi_qm *pf_qm; + struct hisi_qm vf_qm; + u32 vf_qm_state; + int vf_id; + /* for reset handler */ + spinlock_t reset_lock; + struct hisi_acc_vf_migration_file *resuming_migf; + struct hisi_acc_vf_migration_file *saving_migf; +}; +#endif /* HISI_ACC_VFIO_PCI_H */ diff --git a/drivers/vfio/pci/mlx5/Kconfig b/drivers/vfio/pci/mlx5/Kconfig new file mode 100644 index 000000000000..29ba9c504a75 --- /dev/null +++ b/drivers/vfio/pci/mlx5/Kconfig @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: GPL-2.0-only +config MLX5_VFIO_PCI + tristate "VFIO support for MLX5 PCI devices" + depends on MLX5_CORE + depends on VFIO_PCI_CORE + help + This provides migration support for MLX5 devices using the VFIO + framework. + + If you don't know what to do here, say N. diff --git a/drivers/vfio/pci/mlx5/Makefile b/drivers/vfio/pci/mlx5/Makefile new file mode 100644 index 000000000000..689627da7ff5 --- /dev/null +++ b/drivers/vfio/pci/mlx5/Makefile @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only +obj-$(CONFIG_MLX5_VFIO_PCI) += mlx5-vfio-pci.o +mlx5-vfio-pci-y := main.o cmd.o + diff --git a/drivers/vfio/pci/mlx5/cmd.c b/drivers/vfio/pci/mlx5/cmd.c new file mode 100644 index 000000000000..5c9f9218cc1d --- /dev/null +++ b/drivers/vfio/pci/mlx5/cmd.c @@ -0,0 +1,259 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved + */ + +#include "cmd.h" + +int mlx5vf_cmd_suspend_vhca(struct pci_dev *pdev, u16 vhca_id, u16 op_mod) +{ + struct mlx5_core_dev *mdev = mlx5_vf_get_core_dev(pdev); + u32 out[MLX5_ST_SZ_DW(suspend_vhca_out)] = {}; + u32 in[MLX5_ST_SZ_DW(suspend_vhca_in)] = {}; + int ret; + + if (!mdev) + return -ENOTCONN; + + MLX5_SET(suspend_vhca_in, in, opcode, MLX5_CMD_OP_SUSPEND_VHCA); + MLX5_SET(suspend_vhca_in, in, vhca_id, vhca_id); + MLX5_SET(suspend_vhca_in, in, op_mod, op_mod); + + ret = mlx5_cmd_exec_inout(mdev, suspend_vhca, in, out); + mlx5_vf_put_core_dev(mdev); + return ret; +} + +int mlx5vf_cmd_resume_vhca(struct pci_dev *pdev, u16 vhca_id, u16 op_mod) +{ + struct mlx5_core_dev *mdev = mlx5_vf_get_core_dev(pdev); + u32 out[MLX5_ST_SZ_DW(resume_vhca_out)] = {}; + u32 in[MLX5_ST_SZ_DW(resume_vhca_in)] = {}; + int ret; + + if (!mdev) + return -ENOTCONN; + + MLX5_SET(resume_vhca_in, in, opcode, MLX5_CMD_OP_RESUME_VHCA); + MLX5_SET(resume_vhca_in, in, vhca_id, vhca_id); + MLX5_SET(resume_vhca_in, in, op_mod, op_mod); + + ret = mlx5_cmd_exec_inout(mdev, resume_vhca, in, out); + mlx5_vf_put_core_dev(mdev); + return ret; +} + +int mlx5vf_cmd_query_vhca_migration_state(struct pci_dev *pdev, u16 vhca_id, + size_t *state_size) +{ + struct mlx5_core_dev *mdev = mlx5_vf_get_core_dev(pdev); + u32 out[MLX5_ST_SZ_DW(query_vhca_migration_state_out)] = {}; + u32 in[MLX5_ST_SZ_DW(query_vhca_migration_state_in)] = {}; + int ret; + + if (!mdev) + return -ENOTCONN; + + MLX5_SET(query_vhca_migration_state_in, in, opcode, + MLX5_CMD_OP_QUERY_VHCA_MIGRATION_STATE); + MLX5_SET(query_vhca_migration_state_in, in, vhca_id, vhca_id); + MLX5_SET(query_vhca_migration_state_in, in, op_mod, 0); + + ret = mlx5_cmd_exec_inout(mdev, query_vhca_migration_state, in, out); + if (ret) + goto end; + + *state_size = MLX5_GET(query_vhca_migration_state_out, out, + required_umem_size); + +end: + mlx5_vf_put_core_dev(mdev); + return ret; +} + +int mlx5vf_cmd_get_vhca_id(struct pci_dev *pdev, u16 function_id, u16 *vhca_id) +{ + struct mlx5_core_dev *mdev = mlx5_vf_get_core_dev(pdev); + u32 in[MLX5_ST_SZ_DW(query_hca_cap_in)] = {}; + int out_size; + void *out; + int ret; + + if (!mdev) + return -ENOTCONN; + + out_size = MLX5_ST_SZ_BYTES(query_hca_cap_out); + out = kzalloc(out_size, GFP_KERNEL); + if (!out) { + ret = -ENOMEM; + goto end; + } + + MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP); + MLX5_SET(query_hca_cap_in, in, other_function, 1); + MLX5_SET(query_hca_cap_in, in, function_id, function_id); + MLX5_SET(query_hca_cap_in, in, op_mod, + MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE << 1 | + HCA_CAP_OPMOD_GET_CUR); + + ret = mlx5_cmd_exec_inout(mdev, query_hca_cap, in, out); + if (ret) + goto err_exec; + + *vhca_id = MLX5_GET(query_hca_cap_out, out, + capability.cmd_hca_cap.vhca_id); + +err_exec: + kfree(out); +end: + mlx5_vf_put_core_dev(mdev); + return ret; +} + +static int _create_state_mkey(struct mlx5_core_dev *mdev, u32 pdn, + struct mlx5_vf_migration_file *migf, u32 *mkey) +{ + size_t npages = DIV_ROUND_UP(migf->total_length, PAGE_SIZE); + struct sg_dma_page_iter dma_iter; + int err = 0, inlen; + __be64 *mtt; + void *mkc; + u32 *in; + + inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + + sizeof(*mtt) * round_up(npages, 2); + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(create_mkey_in, in, translations_octword_actual_size, + DIV_ROUND_UP(npages, 2)); + mtt = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); + + for_each_sgtable_dma_page(&migf->table.sgt, &dma_iter, 0) + *mtt++ = cpu_to_be64(sg_page_iter_dma_address(&dma_iter)); + + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT); + MLX5_SET(mkc, mkc, lr, 1); + MLX5_SET(mkc, mkc, lw, 1); + MLX5_SET(mkc, mkc, rr, 1); + MLX5_SET(mkc, mkc, rw, 1); + MLX5_SET(mkc, mkc, pd, pdn); + MLX5_SET(mkc, mkc, bsf_octword_size, 0); + MLX5_SET(mkc, mkc, qpn, 0xffffff); + MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT); + MLX5_SET(mkc, mkc, translations_octword_size, DIV_ROUND_UP(npages, 2)); + MLX5_SET64(mkc, mkc, len, migf->total_length); + err = mlx5_core_create_mkey(mdev, mkey, in, inlen); + kvfree(in); + return err; +} + +int mlx5vf_cmd_save_vhca_state(struct pci_dev *pdev, u16 vhca_id, + struct mlx5_vf_migration_file *migf) +{ + struct mlx5_core_dev *mdev = mlx5_vf_get_core_dev(pdev); + u32 out[MLX5_ST_SZ_DW(save_vhca_state_out)] = {}; + u32 in[MLX5_ST_SZ_DW(save_vhca_state_in)] = {}; + u32 pdn, mkey; + int err; + + if (!mdev) + return -ENOTCONN; + + err = mlx5_core_alloc_pd(mdev, &pdn); + if (err) + goto end; + + err = dma_map_sgtable(mdev->device, &migf->table.sgt, DMA_FROM_DEVICE, + 0); + if (err) + goto err_dma_map; + + err = _create_state_mkey(mdev, pdn, migf, &mkey); + if (err) + goto err_create_mkey; + + MLX5_SET(save_vhca_state_in, in, opcode, + MLX5_CMD_OP_SAVE_VHCA_STATE); + MLX5_SET(save_vhca_state_in, in, op_mod, 0); + MLX5_SET(save_vhca_state_in, in, vhca_id, vhca_id); + MLX5_SET(save_vhca_state_in, in, mkey, mkey); + MLX5_SET(save_vhca_state_in, in, size, migf->total_length); + + err = mlx5_cmd_exec_inout(mdev, save_vhca_state, in, out); + if (err) + goto err_exec; + + migf->total_length = + MLX5_GET(save_vhca_state_out, out, actual_image_size); + + mlx5_core_destroy_mkey(mdev, mkey); + mlx5_core_dealloc_pd(mdev, pdn); + dma_unmap_sgtable(mdev->device, &migf->table.sgt, DMA_FROM_DEVICE, 0); + mlx5_vf_put_core_dev(mdev); + + return 0; + +err_exec: + mlx5_core_destroy_mkey(mdev, mkey); +err_create_mkey: + dma_unmap_sgtable(mdev->device, &migf->table.sgt, DMA_FROM_DEVICE, 0); +err_dma_map: + mlx5_core_dealloc_pd(mdev, pdn); +end: + mlx5_vf_put_core_dev(mdev); + return err; +} + +int mlx5vf_cmd_load_vhca_state(struct pci_dev *pdev, u16 vhca_id, + struct mlx5_vf_migration_file *migf) +{ + struct mlx5_core_dev *mdev = mlx5_vf_get_core_dev(pdev); + u32 out[MLX5_ST_SZ_DW(save_vhca_state_out)] = {}; + u32 in[MLX5_ST_SZ_DW(save_vhca_state_in)] = {}; + u32 pdn, mkey; + int err; + + if (!mdev) + return -ENOTCONN; + + mutex_lock(&migf->lock); + if (!migf->total_length) { + err = -EINVAL; + goto end; + } + + err = mlx5_core_alloc_pd(mdev, &pdn); + if (err) + goto end; + + err = dma_map_sgtable(mdev->device, &migf->table.sgt, DMA_TO_DEVICE, 0); + if (err) + goto err_reg; + + err = _create_state_mkey(mdev, pdn, migf, &mkey); + if (err) + goto err_mkey; + + MLX5_SET(load_vhca_state_in, in, opcode, + MLX5_CMD_OP_LOAD_VHCA_STATE); + MLX5_SET(load_vhca_state_in, in, op_mod, 0); + MLX5_SET(load_vhca_state_in, in, vhca_id, vhca_id); + MLX5_SET(load_vhca_state_in, in, mkey, mkey); + MLX5_SET(load_vhca_state_in, in, size, migf->total_length); + + err = mlx5_cmd_exec_inout(mdev, load_vhca_state, in, out); + + mlx5_core_destroy_mkey(mdev, mkey); +err_mkey: + dma_unmap_sgtable(mdev->device, &migf->table.sgt, DMA_TO_DEVICE, 0); +err_reg: + mlx5_core_dealloc_pd(mdev, pdn); +end: + mlx5_vf_put_core_dev(mdev); + mutex_unlock(&migf->lock); + return err; +} diff --git a/drivers/vfio/pci/mlx5/cmd.h b/drivers/vfio/pci/mlx5/cmd.h new file mode 100644 index 000000000000..1392a11a9cc0 --- /dev/null +++ b/drivers/vfio/pci/mlx5/cmd.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* + * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + */ + +#ifndef MLX5_VFIO_CMD_H +#define MLX5_VFIO_CMD_H + +#include <linux/kernel.h> +#include <linux/mlx5/driver.h> + +struct mlx5_vf_migration_file { + struct file *filp; + struct mutex lock; + bool disabled; + + struct sg_append_table table; + size_t total_length; + size_t allocated_length; + + /* Optimize mlx5vf_get_migration_page() for sequential access */ + struct scatterlist *last_offset_sg; + unsigned int sg_last_entry; + unsigned long last_offset; +}; + +int mlx5vf_cmd_suspend_vhca(struct pci_dev *pdev, u16 vhca_id, u16 op_mod); +int mlx5vf_cmd_resume_vhca(struct pci_dev *pdev, u16 vhca_id, u16 op_mod); +int mlx5vf_cmd_query_vhca_migration_state(struct pci_dev *pdev, u16 vhca_id, + size_t *state_size); +int mlx5vf_cmd_get_vhca_id(struct pci_dev *pdev, u16 function_id, u16 *vhca_id); +int mlx5vf_cmd_save_vhca_state(struct pci_dev *pdev, u16 vhca_id, + struct mlx5_vf_migration_file *migf); +int mlx5vf_cmd_load_vhca_state(struct pci_dev *pdev, u16 vhca_id, + struct mlx5_vf_migration_file *migf); +#endif /* MLX5_VFIO_CMD_H */ diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c new file mode 100644 index 000000000000..bbec5d288fee --- /dev/null +++ b/drivers/vfio/pci/mlx5/main.c @@ -0,0 +1,676 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved + */ + +#include <linux/device.h> +#include <linux/eventfd.h> +#include <linux/file.h> +#include <linux/interrupt.h> +#include <linux/iommu.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/notifier.h> +#include <linux/pci.h> +#include <linux/pm_runtime.h> +#include <linux/types.h> +#include <linux/uaccess.h> +#include <linux/vfio.h> +#include <linux/sched/mm.h> +#include <linux/vfio_pci_core.h> +#include <linux/anon_inodes.h> + +#include "cmd.h" + +/* Arbitrary to prevent userspace from consuming endless memory */ +#define MAX_MIGRATION_SIZE (512*1024*1024) + +struct mlx5vf_pci_core_device { + struct vfio_pci_core_device core_device; + u16 vhca_id; + u8 migrate_cap:1; + u8 deferred_reset:1; + /* protect migration state */ + struct mutex state_mutex; + enum vfio_device_mig_state mig_state; + /* protect the reset_done flow */ + spinlock_t reset_lock; + struct mlx5_vf_migration_file *resuming_migf; + struct mlx5_vf_migration_file *saving_migf; +}; + +static struct page * +mlx5vf_get_migration_page(struct mlx5_vf_migration_file *migf, + unsigned long offset) +{ + unsigned long cur_offset = 0; + struct scatterlist *sg; + unsigned int i; + + /* All accesses are sequential */ + if (offset < migf->last_offset || !migf->last_offset_sg) { + migf->last_offset = 0; + migf->last_offset_sg = migf->table.sgt.sgl; + migf->sg_last_entry = 0; + } + + cur_offset = migf->last_offset; + + for_each_sg(migf->last_offset_sg, sg, + migf->table.sgt.orig_nents - migf->sg_last_entry, i) { + if (offset < sg->length + cur_offset) { + migf->last_offset_sg = sg; + migf->sg_last_entry += i; + migf->last_offset = cur_offset; + return nth_page(sg_page(sg), + (offset - cur_offset) / PAGE_SIZE); + } + cur_offset += sg->length; + } + return NULL; +} + +static int mlx5vf_add_migration_pages(struct mlx5_vf_migration_file *migf, + unsigned int npages) +{ + unsigned int to_alloc = npages; + struct page **page_list; + unsigned long filled; + unsigned int to_fill; + int ret; + + to_fill = min_t(unsigned int, npages, PAGE_SIZE / sizeof(*page_list)); + page_list = kvzalloc(to_fill * sizeof(*page_list), GFP_KERNEL); + if (!page_list) + return -ENOMEM; + + do { + filled = alloc_pages_bulk_array(GFP_KERNEL, to_fill, page_list); + if (!filled) { + ret = -ENOMEM; + goto err; + } + to_alloc -= filled; + ret = sg_alloc_append_table_from_pages( + &migf->table, page_list, filled, 0, + filled << PAGE_SHIFT, UINT_MAX, SG_MAX_SINGLE_ALLOC, + GFP_KERNEL); + + if (ret) + goto err; + migf->allocated_length += filled * PAGE_SIZE; + /* clean input for another bulk allocation */ + memset(page_list, 0, filled * sizeof(*page_list)); + to_fill = min_t(unsigned int, to_alloc, + PAGE_SIZE / sizeof(*page_list)); + } while (to_alloc > 0); + + kvfree(page_list); + return 0; + +err: + kvfree(page_list); + return ret; +} + +static void mlx5vf_disable_fd(struct mlx5_vf_migration_file *migf) +{ + struct sg_page_iter sg_iter; + + mutex_lock(&migf->lock); + /* Undo alloc_pages_bulk_array() */ + for_each_sgtable_page(&migf->table.sgt, &sg_iter, 0) + __free_page(sg_page_iter_page(&sg_iter)); + sg_free_append_table(&migf->table); + migf->disabled = true; + migf->total_length = 0; + migf->allocated_length = 0; + migf->filp->f_pos = 0; + mutex_unlock(&migf->lock); +} + +static int mlx5vf_release_file(struct inode *inode, struct file *filp) +{ + struct mlx5_vf_migration_file *migf = filp->private_data; + + mlx5vf_disable_fd(migf); + mutex_destroy(&migf->lock); + kfree(migf); + return 0; +} + +static ssize_t mlx5vf_save_read(struct file *filp, char __user *buf, size_t len, + loff_t *pos) +{ + struct mlx5_vf_migration_file *migf = filp->private_data; + ssize_t done = 0; + + if (pos) + return -ESPIPE; + pos = &filp->f_pos; + + mutex_lock(&migf->lock); + if (*pos > migf->total_length) { + done = -EINVAL; + goto out_unlock; + } + if (migf->disabled) { + done = -ENODEV; + goto out_unlock; + } + + len = min_t(size_t, migf->total_length - *pos, len); + while (len) { + size_t page_offset; + struct page *page; + size_t page_len; + u8 *from_buff; + int ret; + + page_offset = (*pos) % PAGE_SIZE; + page = mlx5vf_get_migration_page(migf, *pos - page_offset); + if (!page) { + if (done == 0) + done = -EINVAL; + goto out_unlock; + } + + page_len = min_t(size_t, len, PAGE_SIZE - page_offset); + from_buff = kmap_local_page(page); + ret = copy_to_user(buf, from_buff + page_offset, page_len); + kunmap_local(from_buff); + if (ret) { + done = -EFAULT; + goto out_unlock; + } + *pos += page_len; + len -= page_len; + done += page_len; + buf += page_len; + } + +out_unlock: + mutex_unlock(&migf->lock); + return done; +} + +static const struct file_operations mlx5vf_save_fops = { + .owner = THIS_MODULE, + .read = mlx5vf_save_read, + .release = mlx5vf_release_file, + .llseek = no_llseek, +}; + +static struct mlx5_vf_migration_file * +mlx5vf_pci_save_device_data(struct mlx5vf_pci_core_device *mvdev) +{ + struct mlx5_vf_migration_file *migf; + int ret; + + migf = kzalloc(sizeof(*migf), GFP_KERNEL); + if (!migf) + return ERR_PTR(-ENOMEM); + + migf->filp = anon_inode_getfile("mlx5vf_mig", &mlx5vf_save_fops, migf, + O_RDONLY); + if (IS_ERR(migf->filp)) { + int err = PTR_ERR(migf->filp); + + kfree(migf); + return ERR_PTR(err); + } + + stream_open(migf->filp->f_inode, migf->filp); + mutex_init(&migf->lock); + + ret = mlx5vf_cmd_query_vhca_migration_state( + mvdev->core_device.pdev, mvdev->vhca_id, &migf->total_length); + if (ret) + goto out_free; + + ret = mlx5vf_add_migration_pages( + migf, DIV_ROUND_UP_ULL(migf->total_length, PAGE_SIZE)); + if (ret) + goto out_free; + + ret = mlx5vf_cmd_save_vhca_state(mvdev->core_device.pdev, + mvdev->vhca_id, migf); + if (ret) + goto out_free; + return migf; +out_free: + fput(migf->filp); + return ERR_PTR(ret); +} + +static ssize_t mlx5vf_resume_write(struct file *filp, const char __user *buf, + size_t len, loff_t *pos) +{ + struct mlx5_vf_migration_file *migf = filp->private_data; + loff_t requested_length; + ssize_t done = 0; + + if (pos) + return -ESPIPE; + pos = &filp->f_pos; + + if (*pos < 0 || + check_add_overflow((loff_t)len, *pos, &requested_length)) + return -EINVAL; + + if (requested_length > MAX_MIGRATION_SIZE) + return -ENOMEM; + + mutex_lock(&migf->lock); + if (migf->disabled) { + done = -ENODEV; + goto out_unlock; + } + + if (migf->allocated_length < requested_length) { + done = mlx5vf_add_migration_pages( + migf, + DIV_ROUND_UP(requested_length - migf->allocated_length, + PAGE_SIZE)); + if (done) + goto out_unlock; + } + + while (len) { + size_t page_offset; + struct page *page; + size_t page_len; + u8 *to_buff; + int ret; + + page_offset = (*pos) % PAGE_SIZE; + page = mlx5vf_get_migration_page(migf, *pos - page_offset); + if (!page) { + if (done == 0) + done = -EINVAL; + goto out_unlock; + } + + page_len = min_t(size_t, len, PAGE_SIZE - page_offset); + to_buff = kmap_local_page(page); + ret = copy_from_user(to_buff + page_offset, buf, page_len); + kunmap_local(to_buff); + if (ret) { + done = -EFAULT; + goto out_unlock; + } + *pos += page_len; + len -= page_len; + done += page_len; + buf += page_len; + migf->total_length += page_len; + } +out_unlock: + mutex_unlock(&migf->lock); + return done; +} + +static const struct file_operations mlx5vf_resume_fops = { + .owner = THIS_MODULE, + .write = mlx5vf_resume_write, + .release = mlx5vf_release_file, + .llseek = no_llseek, +}; + +static struct mlx5_vf_migration_file * +mlx5vf_pci_resume_device_data(struct mlx5vf_pci_core_device *mvdev) +{ + struct mlx5_vf_migration_file *migf; + + migf = kzalloc(sizeof(*migf), GFP_KERNEL); + if (!migf) + return ERR_PTR(-ENOMEM); + + migf->filp = anon_inode_getfile("mlx5vf_mig", &mlx5vf_resume_fops, migf, + O_WRONLY); + if (IS_ERR(migf->filp)) { + int err = PTR_ERR(migf->filp); + + kfree(migf); + return ERR_PTR(err); + } + stream_open(migf->filp->f_inode, migf->filp); + mutex_init(&migf->lock); + return migf; +} + +static void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev) +{ + if (mvdev->resuming_migf) { + mlx5vf_disable_fd(mvdev->resuming_migf); + fput(mvdev->resuming_migf->filp); + mvdev->resuming_migf = NULL; + } + if (mvdev->saving_migf) { + mlx5vf_disable_fd(mvdev->saving_migf); + fput(mvdev->saving_migf->filp); + mvdev->saving_migf = NULL; + } +} + +static struct file * +mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev, + u32 new) +{ + u32 cur = mvdev->mig_state; + int ret; + + if (cur == VFIO_DEVICE_STATE_RUNNING_P2P && new == VFIO_DEVICE_STATE_STOP) { + ret = mlx5vf_cmd_suspend_vhca( + mvdev->core_device.pdev, mvdev->vhca_id, + MLX5_SUSPEND_VHCA_IN_OP_MOD_SUSPEND_RESPONDER); + if (ret) + return ERR_PTR(ret); + return NULL; + } + + if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_RUNNING_P2P) { + ret = mlx5vf_cmd_resume_vhca( + mvdev->core_device.pdev, mvdev->vhca_id, + MLX5_RESUME_VHCA_IN_OP_MOD_RESUME_RESPONDER); + if (ret) + return ERR_PTR(ret); + return NULL; + } + + if (cur == VFIO_DEVICE_STATE_RUNNING && new == VFIO_DEVICE_STATE_RUNNING_P2P) { + ret = mlx5vf_cmd_suspend_vhca( + mvdev->core_device.pdev, mvdev->vhca_id, + MLX5_SUSPEND_VHCA_IN_OP_MOD_SUSPEND_INITIATOR); + if (ret) + return ERR_PTR(ret); + return NULL; + } + + if (cur == VFIO_DEVICE_STATE_RUNNING_P2P && new == VFIO_DEVICE_STATE_RUNNING) { + ret = mlx5vf_cmd_resume_vhca( + mvdev->core_device.pdev, mvdev->vhca_id, + MLX5_RESUME_VHCA_IN_OP_MOD_RESUME_INITIATOR); + if (ret) + return ERR_PTR(ret); + return NULL; + } + + if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_STOP_COPY) { + struct mlx5_vf_migration_file *migf; + + migf = mlx5vf_pci_save_device_data(mvdev); + if (IS_ERR(migf)) + return ERR_CAST(migf); + get_file(migf->filp); + mvdev->saving_migf = migf; + return migf->filp; + } + + if ((cur == VFIO_DEVICE_STATE_STOP_COPY && new == VFIO_DEVICE_STATE_STOP)) { + mlx5vf_disable_fds(mvdev); + return NULL; + } + + if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_RESUMING) { + struct mlx5_vf_migration_file *migf; + + migf = mlx5vf_pci_resume_device_data(mvdev); + if (IS_ERR(migf)) + return ERR_CAST(migf); + get_file(migf->filp); + mvdev->resuming_migf = migf; + return migf->filp; + } + + if (cur == VFIO_DEVICE_STATE_RESUMING && new == VFIO_DEVICE_STATE_STOP) { + ret = mlx5vf_cmd_load_vhca_state(mvdev->core_device.pdev, + mvdev->vhca_id, + mvdev->resuming_migf); + if (ret) + return ERR_PTR(ret); + mlx5vf_disable_fds(mvdev); + return NULL; + } + + /* + * vfio_mig_get_next_state() does not use arcs other than the above + */ + WARN_ON(true); + return ERR_PTR(-EINVAL); +} + +/* + * This function is called in all state_mutex unlock cases to + * handle a 'deferred_reset' if exists. + */ +static void mlx5vf_state_mutex_unlock(struct mlx5vf_pci_core_device *mvdev) +{ +again: + spin_lock(&mvdev->reset_lock); + if (mvdev->deferred_reset) { + mvdev->deferred_reset = false; + spin_unlock(&mvdev->reset_lock); + mvdev->mig_state = VFIO_DEVICE_STATE_RUNNING; + mlx5vf_disable_fds(mvdev); + goto again; + } + mutex_unlock(&mvdev->state_mutex); + spin_unlock(&mvdev->reset_lock); +} + +static struct file * +mlx5vf_pci_set_device_state(struct vfio_device *vdev, + enum vfio_device_mig_state new_state) +{ + struct mlx5vf_pci_core_device *mvdev = container_of( + vdev, struct mlx5vf_pci_core_device, core_device.vdev); + enum vfio_device_mig_state next_state; + struct file *res = NULL; + int ret; + + mutex_lock(&mvdev->state_mutex); + while (new_state != mvdev->mig_state) { + ret = vfio_mig_get_next_state(vdev, mvdev->mig_state, + new_state, &next_state); + if (ret) { + res = ERR_PTR(ret); + break; + } + res = mlx5vf_pci_step_device_state_locked(mvdev, next_state); + if (IS_ERR(res)) + break; + mvdev->mig_state = next_state; + if (WARN_ON(res && new_state != mvdev->mig_state)) { + fput(res); + res = ERR_PTR(-EINVAL); + break; + } + } + mlx5vf_state_mutex_unlock(mvdev); + return res; +} + +static int mlx5vf_pci_get_device_state(struct vfio_device *vdev, + enum vfio_device_mig_state *curr_state) +{ + struct mlx5vf_pci_core_device *mvdev = container_of( + vdev, struct mlx5vf_pci_core_device, core_device.vdev); + + mutex_lock(&mvdev->state_mutex); + *curr_state = mvdev->mig_state; + mlx5vf_state_mutex_unlock(mvdev); + return 0; +} + +static void mlx5vf_pci_aer_reset_done(struct pci_dev *pdev) +{ + struct mlx5vf_pci_core_device *mvdev = dev_get_drvdata(&pdev->dev); + + if (!mvdev->migrate_cap) + return; + + /* + * As the higher VFIO layers are holding locks across reset and using + * those same locks with the mm_lock we need to prevent ABBA deadlock + * with the state_mutex and mm_lock. + * In case the state_mutex was taken already we defer the cleanup work + * to the unlock flow of the other running context. + */ + spin_lock(&mvdev->reset_lock); + mvdev->deferred_reset = true; + if (!mutex_trylock(&mvdev->state_mutex)) { + spin_unlock(&mvdev->reset_lock); + return; + } + spin_unlock(&mvdev->reset_lock); + mlx5vf_state_mutex_unlock(mvdev); +} + +static int mlx5vf_pci_open_device(struct vfio_device *core_vdev) +{ + struct mlx5vf_pci_core_device *mvdev = container_of( + core_vdev, struct mlx5vf_pci_core_device, core_device.vdev); + struct vfio_pci_core_device *vdev = &mvdev->core_device; + int vf_id; + int ret; + + ret = vfio_pci_core_enable(vdev); + if (ret) + return ret; + + if (!mvdev->migrate_cap) { + vfio_pci_core_finish_enable(vdev); + return 0; + } + + vf_id = pci_iov_vf_id(vdev->pdev); + if (vf_id < 0) { + ret = vf_id; + goto out_disable; + } + + ret = mlx5vf_cmd_get_vhca_id(vdev->pdev, vf_id + 1, &mvdev->vhca_id); + if (ret) + goto out_disable; + + mvdev->mig_state = VFIO_DEVICE_STATE_RUNNING; + vfio_pci_core_finish_enable(vdev); + return 0; +out_disable: + vfio_pci_core_disable(vdev); + return ret; +} + +static void mlx5vf_pci_close_device(struct vfio_device *core_vdev) +{ + struct mlx5vf_pci_core_device *mvdev = container_of( + core_vdev, struct mlx5vf_pci_core_device, core_device.vdev); + + mlx5vf_disable_fds(mvdev); + vfio_pci_core_close_device(core_vdev); +} + +static const struct vfio_device_ops mlx5vf_pci_ops = { + .name = "mlx5-vfio-pci", + .open_device = mlx5vf_pci_open_device, + .close_device = mlx5vf_pci_close_device, + .ioctl = vfio_pci_core_ioctl, + .device_feature = vfio_pci_core_ioctl_feature, + .read = vfio_pci_core_read, + .write = vfio_pci_core_write, + .mmap = vfio_pci_core_mmap, + .request = vfio_pci_core_request, + .match = vfio_pci_core_match, + .migration_set_state = mlx5vf_pci_set_device_state, + .migration_get_state = mlx5vf_pci_get_device_state, +}; + +static int mlx5vf_pci_probe(struct pci_dev *pdev, + const struct pci_device_id *id) +{ + struct mlx5vf_pci_core_device *mvdev; + int ret; + + mvdev = kzalloc(sizeof(*mvdev), GFP_KERNEL); + if (!mvdev) + return -ENOMEM; + vfio_pci_core_init_device(&mvdev->core_device, pdev, &mlx5vf_pci_ops); + + if (pdev->is_virtfn) { + struct mlx5_core_dev *mdev = + mlx5_vf_get_core_dev(pdev); + + if (mdev) { + if (MLX5_CAP_GEN(mdev, migration)) { + mvdev->migrate_cap = 1; + mvdev->core_device.vdev.migration_flags = + VFIO_MIGRATION_STOP_COPY | + VFIO_MIGRATION_P2P; + mutex_init(&mvdev->state_mutex); + spin_lock_init(&mvdev->reset_lock); + } + mlx5_vf_put_core_dev(mdev); + } + } + + ret = vfio_pci_core_register_device(&mvdev->core_device); + if (ret) + goto out_free; + + dev_set_drvdata(&pdev->dev, mvdev); + return 0; + +out_free: + vfio_pci_core_uninit_device(&mvdev->core_device); + kfree(mvdev); + return ret; +} + +static void mlx5vf_pci_remove(struct pci_dev *pdev) +{ + struct mlx5vf_pci_core_device *mvdev = dev_get_drvdata(&pdev->dev); + + vfio_pci_core_unregister_device(&mvdev->core_device); + vfio_pci_core_uninit_device(&mvdev->core_device); + kfree(mvdev); +} + +static const struct pci_device_id mlx5vf_pci_table[] = { + { PCI_DRIVER_OVERRIDE_DEVICE_VFIO(PCI_VENDOR_ID_MELLANOX, 0x101e) }, /* ConnectX Family mlx5Gen Virtual Function */ + {} +}; + +MODULE_DEVICE_TABLE(pci, mlx5vf_pci_table); + +static const struct pci_error_handlers mlx5vf_err_handlers = { + .reset_done = mlx5vf_pci_aer_reset_done, + .error_detected = vfio_pci_core_aer_err_detected, +}; + +static struct pci_driver mlx5vf_pci_driver = { + .name = KBUILD_MODNAME, + .id_table = mlx5vf_pci_table, + .probe = mlx5vf_pci_probe, + .remove = mlx5vf_pci_remove, + .err_handler = &mlx5vf_err_handlers, +}; + +static void __exit mlx5vf_pci_cleanup(void) +{ + pci_unregister_driver(&mlx5vf_pci_driver); +} + +static int __init mlx5vf_pci_init(void) +{ + return pci_register_driver(&mlx5vf_pci_driver); +} + +module_init(mlx5vf_pci_init); +module_exit(mlx5vf_pci_cleanup); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Max Gurtovoy <mgurtovoy@nvidia.com>"); +MODULE_AUTHOR("Yishai Hadas <yishaih@nvidia.com>"); +MODULE_DESCRIPTION( + "MLX5 VFIO PCI - User Level meta-driver for MLX5 device family"); diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index a5ce92beb655..2b047469e02f 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -130,6 +130,7 @@ static const struct vfio_device_ops vfio_pci_ops = { .open_device = vfio_pci_open_device, .close_device = vfio_pci_core_close_device, .ioctl = vfio_pci_core_ioctl, + .device_feature = vfio_pci_core_ioctl_feature, .read = vfio_pci_core_read, .write = vfio_pci_core_write, .mmap = vfio_pci_core_mmap, diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index f948e6cd2993..b7bb16f92ac6 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -228,6 +228,19 @@ int vfio_pci_set_power_state(struct vfio_pci_core_device *vdev, pci_power_t stat if (!ret) { /* D3 might be unsupported via quirk, skip unless in D3 */ if (needs_save && pdev->current_state >= PCI_D3hot) { + /* + * The current PCI state will be saved locally in + * 'pm_save' during the D3hot transition. When the + * device state is changed to D0 again with the current + * function, then pci_store_saved_state() will restore + * the state and will free the memory pointed by + * 'pm_save'. There are few cases where the PCI power + * state can be changed to D0 without the involvement + * of the driver. For these cases, free the earlier + * allocated memory first before overwriting 'pm_save' + * to prevent the memory leak. + */ + kfree(vdev->pm_save); vdev->pm_save = pci_store_saved_state(pdev); } else if (needs_restore) { pci_load_and_free_saved_state(pdev, &vdev->pm_save); @@ -322,6 +335,17 @@ void vfio_pci_core_disable(struct vfio_pci_core_device *vdev) /* For needs_reset */ lockdep_assert_held(&vdev->vdev.dev_set->lock); + /* + * This function can be invoked while the power state is non-D0. + * This function calls __pci_reset_function_locked() which internally + * can use pci_pm_reset() for the function reset. pci_pm_reset() will + * fail if the power state is non-D0. Also, for the devices which + * have NoSoftRst-, the reset function can cause the PCI config space + * reset without restoring the original state (saved locally in + * 'vdev->pm_save'). + */ + vfio_pci_set_power_state(vdev, PCI_D0); + /* Stop the device from further DMA */ pci_clear_master(pdev); @@ -921,6 +945,19 @@ long vfio_pci_core_ioctl(struct vfio_device *core_vdev, unsigned int cmd, return -EINVAL; vfio_pci_zap_and_down_write_memory_lock(vdev); + + /* + * This function can be invoked while the power state is non-D0. + * If pci_try_reset_function() has been called while the power + * state is non-D0, then pci_try_reset_function() will + * internally set the power state to D0 without vfio driver + * involvement. For the devices which have NoSoftRst-, the + * reset function can cause the PCI config space reset without + * restoring the original state (saved locally in + * 'vdev->pm_save'). + */ + vfio_pci_set_power_state(vdev, PCI_D0); + ret = pci_try_reset_function(vdev->pdev); up_write(&vdev->memory_lock); @@ -1114,70 +1151,50 @@ hot_reset_release: return vfio_pci_ioeventfd(vdev, ioeventfd.offset, ioeventfd.data, count, ioeventfd.fd); - } else if (cmd == VFIO_DEVICE_FEATURE) { - struct vfio_device_feature feature; - uuid_t uuid; - - minsz = offsetofend(struct vfio_device_feature, flags); - - if (copy_from_user(&feature, (void __user *)arg, minsz)) - return -EFAULT; - - if (feature.argsz < minsz) - return -EINVAL; - - /* Check unknown flags */ - if (feature.flags & ~(VFIO_DEVICE_FEATURE_MASK | - VFIO_DEVICE_FEATURE_SET | - VFIO_DEVICE_FEATURE_GET | - VFIO_DEVICE_FEATURE_PROBE)) - return -EINVAL; - - /* GET & SET are mutually exclusive except with PROBE */ - if (!(feature.flags & VFIO_DEVICE_FEATURE_PROBE) && - (feature.flags & VFIO_DEVICE_FEATURE_SET) && - (feature.flags & VFIO_DEVICE_FEATURE_GET)) - return -EINVAL; - - switch (feature.flags & VFIO_DEVICE_FEATURE_MASK) { - case VFIO_DEVICE_FEATURE_PCI_VF_TOKEN: - if (!vdev->vf_token) - return -ENOTTY; - - /* - * We do not support GET of the VF Token UUID as this - * could expose the token of the previous device user. - */ - if (feature.flags & VFIO_DEVICE_FEATURE_GET) - return -EINVAL; - - if (feature.flags & VFIO_DEVICE_FEATURE_PROBE) - return 0; + } + return -ENOTTY; +} +EXPORT_SYMBOL_GPL(vfio_pci_core_ioctl); - /* Don't SET unless told to do so */ - if (!(feature.flags & VFIO_DEVICE_FEATURE_SET)) - return -EINVAL; +static int vfio_pci_core_feature_token(struct vfio_device *device, u32 flags, + void __user *arg, size_t argsz) +{ + struct vfio_pci_core_device *vdev = + container_of(device, struct vfio_pci_core_device, vdev); + uuid_t uuid; + int ret; - if (feature.argsz < minsz + sizeof(uuid)) - return -EINVAL; + if (!vdev->vf_token) + return -ENOTTY; + /* + * We do not support GET of the VF Token UUID as this could + * expose the token of the previous device user. + */ + ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_SET, + sizeof(uuid)); + if (ret != 1) + return ret; - if (copy_from_user(&uuid, (void __user *)(arg + minsz), - sizeof(uuid))) - return -EFAULT; + if (copy_from_user(&uuid, arg, sizeof(uuid))) + return -EFAULT; - mutex_lock(&vdev->vf_token->lock); - uuid_copy(&vdev->vf_token->uuid, &uuid); - mutex_unlock(&vdev->vf_token->lock); + mutex_lock(&vdev->vf_token->lock); + uuid_copy(&vdev->vf_token->uuid, &uuid); + mutex_unlock(&vdev->vf_token->lock); + return 0; +} - return 0; - default: - return -ENOTTY; - } +int vfio_pci_core_ioctl_feature(struct vfio_device *device, u32 flags, + void __user *arg, size_t argsz) +{ + switch (flags & VFIO_DEVICE_FEATURE_MASK) { + case VFIO_DEVICE_FEATURE_PCI_VF_TOKEN: + return vfio_pci_core_feature_token(device, flags, arg, argsz); + default: + return -ENOTTY; } - - return -ENOTTY; } -EXPORT_SYMBOL_GPL(vfio_pci_core_ioctl); +EXPORT_SYMBOL_GPL(vfio_pci_core_ioctl_feature); static ssize_t vfio_pci_rw(struct vfio_pci_core_device *vdev, char __user *buf, size_t count, loff_t *ppos, bool iswrite) @@ -1891,8 +1908,8 @@ void vfio_pci_core_unregister_device(struct vfio_pci_core_device *vdev) } EXPORT_SYMBOL_GPL(vfio_pci_core_unregister_device); -static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev, - pci_channel_state_t state) +pci_ers_result_t vfio_pci_core_aer_err_detected(struct pci_dev *pdev, + pci_channel_state_t state) { struct vfio_pci_core_device *vdev; struct vfio_device *device; @@ -1914,6 +1931,7 @@ static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev, return PCI_ERS_RESULT_CAN_RECOVER; } +EXPORT_SYMBOL_GPL(vfio_pci_core_aer_err_detected); int vfio_pci_core_sriov_configure(struct pci_dev *pdev, int nr_virtfn) { @@ -1936,7 +1954,7 @@ int vfio_pci_core_sriov_configure(struct pci_dev *pdev, int nr_virtfn) EXPORT_SYMBOL_GPL(vfio_pci_core_sriov_configure); const struct pci_error_handlers vfio_pci_core_err_handlers = { - .error_detected = vfio_pci_aer_err_detected, + .error_detected = vfio_pci_core_aer_err_detected, }; EXPORT_SYMBOL_GPL(vfio_pci_core_err_handlers); @@ -2055,6 +2073,18 @@ static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set, } cur_mem = NULL; + /* + * The pci_reset_bus() will reset all the devices in the bus. + * The power state can be non-D0 for some of the devices in the bus. + * For these devices, the pci_reset_bus() will internally set + * the power state to D0 without vfio driver involvement. + * For the devices which have NoSoftRst-, the reset function can + * cause the PCI config space reset without restoring the original + * state (saved locally in 'vdev->pm_save'). + */ + list_for_each_entry(cur, &dev_set->device_list, vdev.dev_set_list) + vfio_pci_set_power_state(cur, PCI_D0); + ret = pci_reset_bus(pdev); err_undo: @@ -2108,6 +2138,18 @@ static bool vfio_pci_dev_set_try_reset(struct vfio_device_set *dev_set) if (!pdev) return false; + /* + * The pci_reset_bus() will reset all the devices in the bus. + * The power state can be non-D0 for some of the devices in the bus. + * For these devices, the pci_reset_bus() will internally set + * the power state to D0 without vfio driver involvement. + * For the devices which have NoSoftRst-, the reset function can + * cause the PCI config space reset without restoring the original + * state (saved locally in 'vdev->pm_save'). + */ + list_for_each_entry(cur, &dev_set->device_list, vdev.dev_set_list) + vfio_pci_set_power_state(cur, PCI_D0); + ret = pci_reset_bus(pdev); if (ret) return false; diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c b/drivers/vfio/pci/vfio_pci_rdwr.c index 57d3b2cbbd8e..82ac1569deb0 100644 --- a/drivers/vfio/pci/vfio_pci_rdwr.c +++ b/drivers/vfio/pci/vfio_pci_rdwr.c @@ -288,6 +288,7 @@ out: return done; } +#ifdef CONFIG_VFIO_PCI_VGA ssize_t vfio_pci_vga_rw(struct vfio_pci_core_device *vdev, char __user *buf, size_t count, loff_t *ppos, bool iswrite) { @@ -355,6 +356,7 @@ ssize_t vfio_pci_vga_rw(struct vfio_pci_core_device *vdev, char __user *buf, return done; } +#endif static void vfio_pci_ioeventfd_do_write(struct vfio_pci_ioeventfd *ioeventfd, bool test_mem) diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 735d1d344af9..a4555014bd1e 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -1557,15 +1557,303 @@ static int vfio_device_fops_release(struct inode *inode, struct file *filep) return 0; } +/* + * vfio_mig_get_next_state - Compute the next step in the FSM + * @cur_fsm - The current state the device is in + * @new_fsm - The target state to reach + * @next_fsm - Pointer to the next step to get to new_fsm + * + * Return 0 upon success, otherwise -errno + * Upon success the next step in the state progression between cur_fsm and + * new_fsm will be set in next_fsm. + * + * This breaks down requests for combination transitions into smaller steps and + * returns the next step to get to new_fsm. The function may need to be called + * multiple times before reaching new_fsm. + * + */ +int vfio_mig_get_next_state(struct vfio_device *device, + enum vfio_device_mig_state cur_fsm, + enum vfio_device_mig_state new_fsm, + enum vfio_device_mig_state *next_fsm) +{ + enum { VFIO_DEVICE_NUM_STATES = VFIO_DEVICE_STATE_RUNNING_P2P + 1 }; + /* + * The coding in this table requires the driver to implement the + * following FSM arcs: + * RESUMING -> STOP + * STOP -> RESUMING + * STOP -> STOP_COPY + * STOP_COPY -> STOP + * + * If P2P is supported then the driver must also implement these FSM + * arcs: + * RUNNING -> RUNNING_P2P + * RUNNING_P2P -> RUNNING + * RUNNING_P2P -> STOP + * STOP -> RUNNING_P2P + * Without P2P the driver must implement: + * RUNNING -> STOP + * STOP -> RUNNING + * + * The coding will step through multiple states for some combination + * transitions; if all optional features are supported, this means the + * following ones: + * RESUMING -> STOP -> RUNNING_P2P + * RESUMING -> STOP -> RUNNING_P2P -> RUNNING + * RESUMING -> STOP -> STOP_COPY + * RUNNING -> RUNNING_P2P -> STOP + * RUNNING -> RUNNING_P2P -> STOP -> RESUMING + * RUNNING -> RUNNING_P2P -> STOP -> STOP_COPY + * RUNNING_P2P -> STOP -> RESUMING + * RUNNING_P2P -> STOP -> STOP_COPY + * STOP -> RUNNING_P2P -> RUNNING + * STOP_COPY -> STOP -> RESUMING + * STOP_COPY -> STOP -> RUNNING_P2P + * STOP_COPY -> STOP -> RUNNING_P2P -> RUNNING + */ + static const u8 vfio_from_fsm_table[VFIO_DEVICE_NUM_STATES][VFIO_DEVICE_NUM_STATES] = { + [VFIO_DEVICE_STATE_STOP] = { + [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP, + [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING_P2P, + [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP_COPY, + [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RESUMING, + [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P, + [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, + }, + [VFIO_DEVICE_STATE_RUNNING] = { + [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_RUNNING_P2P, + [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING, + [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_RUNNING_P2P, + [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RUNNING_P2P, + [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P, + [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, + }, + [VFIO_DEVICE_STATE_STOP_COPY] = { + [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP, + [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_STOP, + [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP_COPY, + [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_STOP, + [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_STOP, + [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, + }, + [VFIO_DEVICE_STATE_RESUMING] = { + [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP, + [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_STOP, + [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP, + [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RESUMING, + [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_STOP, + [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, + }, + [VFIO_DEVICE_STATE_RUNNING_P2P] = { + [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP, + [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING, + [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP, + [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_STOP, + [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P, + [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, + }, + [VFIO_DEVICE_STATE_ERROR] = { + [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_ERROR, + [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_ERROR, + [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_ERROR, + [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_ERROR, + [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_ERROR, + [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, + }, + }; + + static const unsigned int state_flags_table[VFIO_DEVICE_NUM_STATES] = { + [VFIO_DEVICE_STATE_STOP] = VFIO_MIGRATION_STOP_COPY, + [VFIO_DEVICE_STATE_RUNNING] = VFIO_MIGRATION_STOP_COPY, + [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_MIGRATION_STOP_COPY, + [VFIO_DEVICE_STATE_RESUMING] = VFIO_MIGRATION_STOP_COPY, + [VFIO_DEVICE_STATE_RUNNING_P2P] = + VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_P2P, + [VFIO_DEVICE_STATE_ERROR] = ~0U, + }; + + if (WARN_ON(cur_fsm >= ARRAY_SIZE(vfio_from_fsm_table) || + (state_flags_table[cur_fsm] & device->migration_flags) != + state_flags_table[cur_fsm])) + return -EINVAL; + + if (new_fsm >= ARRAY_SIZE(vfio_from_fsm_table) || + (state_flags_table[new_fsm] & device->migration_flags) != + state_flags_table[new_fsm]) + return -EINVAL; + + /* + * Arcs touching optional and unsupported states are skipped over. The + * driver will instead see an arc from the original state to the next + * logical state, as per the above comment. + */ + *next_fsm = vfio_from_fsm_table[cur_fsm][new_fsm]; + while ((state_flags_table[*next_fsm] & device->migration_flags) != + state_flags_table[*next_fsm]) + *next_fsm = vfio_from_fsm_table[*next_fsm][new_fsm]; + + return (*next_fsm != VFIO_DEVICE_STATE_ERROR) ? 0 : -EINVAL; +} +EXPORT_SYMBOL_GPL(vfio_mig_get_next_state); + +/* + * Convert the drivers's struct file into a FD number and return it to userspace + */ +static int vfio_ioct_mig_return_fd(struct file *filp, void __user *arg, + struct vfio_device_feature_mig_state *mig) +{ + int ret; + int fd; + + fd = get_unused_fd_flags(O_CLOEXEC); + if (fd < 0) { + ret = fd; + goto out_fput; + } + + mig->data_fd = fd; + if (copy_to_user(arg, mig, sizeof(*mig))) { + ret = -EFAULT; + goto out_put_unused; + } + fd_install(fd, filp); + return 0; + +out_put_unused: + put_unused_fd(fd); +out_fput: + fput(filp); + return ret; +} + +static int +vfio_ioctl_device_feature_mig_device_state(struct vfio_device *device, + u32 flags, void __user *arg, + size_t argsz) +{ + size_t minsz = + offsetofend(struct vfio_device_feature_mig_state, data_fd); + struct vfio_device_feature_mig_state mig; + struct file *filp = NULL; + int ret; + + if (!device->ops->migration_set_state || + !device->ops->migration_get_state) + return -ENOTTY; + + ret = vfio_check_feature(flags, argsz, + VFIO_DEVICE_FEATURE_SET | + VFIO_DEVICE_FEATURE_GET, + sizeof(mig)); + if (ret != 1) + return ret; + + if (copy_from_user(&mig, arg, minsz)) + return -EFAULT; + + if (flags & VFIO_DEVICE_FEATURE_GET) { + enum vfio_device_mig_state curr_state; + + ret = device->ops->migration_get_state(device, &curr_state); + if (ret) + return ret; + mig.device_state = curr_state; + goto out_copy; + } + + /* Handle the VFIO_DEVICE_FEATURE_SET */ + filp = device->ops->migration_set_state(device, mig.device_state); + if (IS_ERR(filp) || !filp) + goto out_copy; + + return vfio_ioct_mig_return_fd(filp, arg, &mig); +out_copy: + mig.data_fd = -1; + if (copy_to_user(arg, &mig, sizeof(mig))) + return -EFAULT; + if (IS_ERR(filp)) + return PTR_ERR(filp); + return 0; +} + +static int vfio_ioctl_device_feature_migration(struct vfio_device *device, + u32 flags, void __user *arg, + size_t argsz) +{ + struct vfio_device_feature_migration mig = { + .flags = device->migration_flags, + }; + int ret; + + if (!device->ops->migration_set_state || + !device->ops->migration_get_state) + return -ENOTTY; + + ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_GET, + sizeof(mig)); + if (ret != 1) + return ret; + if (copy_to_user(arg, &mig, sizeof(mig))) + return -EFAULT; + return 0; +} + +static int vfio_ioctl_device_feature(struct vfio_device *device, + struct vfio_device_feature __user *arg) +{ + size_t minsz = offsetofend(struct vfio_device_feature, flags); + struct vfio_device_feature feature; + + if (copy_from_user(&feature, arg, minsz)) + return -EFAULT; + + if (feature.argsz < minsz) + return -EINVAL; + + /* Check unknown flags */ + if (feature.flags & + ~(VFIO_DEVICE_FEATURE_MASK | VFIO_DEVICE_FEATURE_SET | + VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_PROBE)) + return -EINVAL; + + /* GET & SET are mutually exclusive except with PROBE */ + if (!(feature.flags & VFIO_DEVICE_FEATURE_PROBE) && + (feature.flags & VFIO_DEVICE_FEATURE_SET) && + (feature.flags & VFIO_DEVICE_FEATURE_GET)) + return -EINVAL; + + switch (feature.flags & VFIO_DEVICE_FEATURE_MASK) { + case VFIO_DEVICE_FEATURE_MIGRATION: + return vfio_ioctl_device_feature_migration( + device, feature.flags, arg->data, + feature.argsz - minsz); + case VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE: + return vfio_ioctl_device_feature_mig_device_state( + device, feature.flags, arg->data, + feature.argsz - minsz); + default: + if (unlikely(!device->ops->device_feature)) + return -EINVAL; + return device->ops->device_feature(device, feature.flags, + arg->data, + feature.argsz - minsz); + } +} + static long vfio_device_fops_unl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) { struct vfio_device *device = filep->private_data; - if (unlikely(!device->ops->ioctl)) - return -EINVAL; - - return device->ops->ioctl(device, cmd, arg); + switch (cmd) { + case VFIO_DEVICE_FEATURE: + return vfio_ioctl_device_feature(device, (void __user *)arg); + default: + if (unlikely(!device->ops->ioctl)) + return -EINVAL; + return device->ops->ioctl(device, cmd, arg); + } } static ssize_t vfio_device_fops_read(struct file *filep, char __user *buf, |