summaryrefslogtreecommitdiff
path: root/drivers
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-03-24 12:35:59 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2022-03-24 12:35:59 -0700
commit7403e6d8263937dea206dd201fed1ceed190ca18 (patch)
tree72e84c7bc56998c9998e95a4f14ebdc252dded41 /drivers
parent66711cfea642a162bc99abdefe1645b26dbd778f (diff)
parentf621eb13facb7681a79f4fec8ec6553ae160da76 (diff)
downloadlinux-rpi-7403e6d8263937dea206dd201fed1ceed190ca18.tar.gz
linux-rpi-7403e6d8263937dea206dd201fed1ceed190ca18.tar.bz2
linux-rpi-7403e6d8263937dea206dd201fed1ceed190ca18.zip
Merge tag 'vfio-v5.18-rc1' of https://github.com/awilliam/linux-vfio
Pull VFIO updates from Alex Williamson: - Introduce new device migration uAPI and implement device specific mlx5 vfio-pci variant driver supporting new protocol (Jason Gunthorpe, Yishai Hadas, Leon Romanovsky) - New HiSilicon acc vfio-pci variant driver, also supporting migration interface (Shameer Kolothum, Longfang Liu) - D3hot fixes for vfio-pci-core (Abhishek Sahu) - Document new vfio-pci variant driver acceptance criteria (Alex Williamson) - Fix UML build unresolved ioport_{un}map() functions (Alex Williamson) - Fix MAINTAINERS due to header movement (Lukas Bulwahn) * tag 'vfio-v5.18-rc1' of https://github.com/awilliam/linux-vfio: (31 commits) vfio-pci: Provide reviewers and acceptance criteria for variant drivers MAINTAINERS: adjust entry for header movement in hisilicon qm driver hisi_acc_vfio_pci: Use its own PCI reset_done error handler hisi_acc_vfio_pci: Add support for VFIO live migration crypto: hisilicon/qm: Set the VF QM state register hisi_acc_vfio_pci: Add helper to retrieve the struct pci_driver hisi_acc_vfio_pci: Restrict access to VF dev BAR2 migration region hisi_acc_vfio_pci: add new vfio_pci driver for HiSilicon ACC devices hisi_acc_qm: Move VF PCI device IDs to common header crypto: hisilicon/qm: Move few definitions to common header crypto: hisilicon/qm: Move the QM header to include/linux vfio/mlx5: Fix to not use 0 as NULL pointer PCI/IOV: Fix wrong kernel-doc identifier vfio/mlx5: Use its own PCI reset_done error handler vfio/pci: Expose vfio_pci_core_aer_err_detected() vfio/mlx5: Implement vfio_pci driver for mlx5 devices vfio/mlx5: Expose migration commands over mlx5 device vfio: Remove migration protocol v1 documentation vfio: Extend the device migration protocol with RUNNING_P2P vfio: Define device migration protocol v2 ...
Diffstat (limited to 'drivers')
-rw-r--r--drivers/crypto/hisilicon/hpre/hpre.h2
-rw-r--r--drivers/crypto/hisilicon/hpre/hpre_main.c19
-rw-r--r--drivers/crypto/hisilicon/qm.c68
-rw-r--r--drivers/crypto/hisilicon/qm.h441
-rw-r--r--drivers/crypto/hisilicon/sec2/sec.h2
-rw-r--r--drivers/crypto/hisilicon/sec2/sec_main.c21
-rw-r--r--drivers/crypto/hisilicon/sgl.c2
-rw-r--r--drivers/crypto/hisilicon/zip/zip.h2
-rw-r--r--drivers/crypto/hisilicon/zip/zip_main.c17
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/cmd.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c45
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sriov.c17
-rw-r--r--drivers/pci/iov.c43
-rw-r--r--drivers/vfio/pci/Kconfig5
-rw-r--r--drivers/vfio/pci/Makefile4
-rw-r--r--drivers/vfio/pci/hisilicon/Kconfig15
-rw-r--r--drivers/vfio/pci/hisilicon/Makefile4
-rw-r--r--drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c1326
-rw-r--r--drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h116
-rw-r--r--drivers/vfio/pci/mlx5/Kconfig10
-rw-r--r--drivers/vfio/pci/mlx5/Makefile4
-rw-r--r--drivers/vfio/pci/mlx5/cmd.c259
-rw-r--r--drivers/vfio/pci/mlx5/cmd.h36
-rw-r--r--drivers/vfio/pci/mlx5/main.c676
-rw-r--r--drivers/vfio/pci/vfio_pci.c1
-rw-r--r--drivers/vfio/pci/vfio_pci_core.c162
-rw-r--r--drivers/vfio/pci/vfio_pci_rdwr.c2
-rw-r--r--drivers/vfio/vfio.c296
29 files changed, 3021 insertions, 585 deletions
diff --git a/drivers/crypto/hisilicon/hpre/hpre.h b/drivers/crypto/hisilicon/hpre/hpre.h
index e0b4a1982ee9..9a0558ed82f9 100644
--- a/drivers/crypto/hisilicon/hpre/hpre.h
+++ b/drivers/crypto/hisilicon/hpre/hpre.h
@@ -4,7 +4,7 @@
#define __HISI_HPRE_H
#include <linux/list.h>
-#include "../qm.h"
+#include <linux/hisi_acc_qm.h>
#define HPRE_SQE_SIZE sizeof(struct hpre_sqe)
#define HPRE_PF_DEF_Q_NUM 64
diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c
index ebfab3e14499..36ab30e9e654 100644
--- a/drivers/crypto/hisilicon/hpre/hpre_main.c
+++ b/drivers/crypto/hisilicon/hpre/hpre_main.c
@@ -68,8 +68,7 @@
#define HPRE_REG_RD_INTVRL_US 10
#define HPRE_REG_RD_TMOUT_US 1000
#define HPRE_DBGFS_VAL_MAX_LEN 20
-#define HPRE_PCI_DEVICE_ID 0xa258
-#define HPRE_PCI_VF_DEVICE_ID 0xa259
+#define PCI_DEVICE_ID_HUAWEI_HPRE_PF 0xa258
#define HPRE_QM_USR_CFG_MASK GENMASK(31, 1)
#define HPRE_QM_AXI_CFG_MASK GENMASK(15, 0)
#define HPRE_QM_VFG_AX_MASK GENMASK(7, 0)
@@ -111,8 +110,8 @@
static const char hpre_name[] = "hisi_hpre";
static struct dentry *hpre_debugfs_root;
static const struct pci_device_id hpre_dev_ids[] = {
- { PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, HPRE_PCI_DEVICE_ID) },
- { PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, HPRE_PCI_VF_DEVICE_ID) },
+ { PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, PCI_DEVICE_ID_HUAWEI_HPRE_PF) },
+ { PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, PCI_DEVICE_ID_HUAWEI_HPRE_VF) },
{ 0, }
};
@@ -242,7 +241,7 @@ MODULE_PARM_DESC(uacce_mode, UACCE_MODE_DESC);
static int pf_q_num_set(const char *val, const struct kernel_param *kp)
{
- return q_num_set(val, kp, HPRE_PCI_DEVICE_ID);
+ return q_num_set(val, kp, PCI_DEVICE_ID_HUAWEI_HPRE_PF);
}
static const struct kernel_param_ops hpre_pf_q_num_ops = {
@@ -921,7 +920,7 @@ static int hpre_debugfs_init(struct hisi_qm *qm)
qm->debug.sqe_mask_len = HPRE_SQE_MASK_LEN;
hisi_qm_debug_init(qm);
- if (qm->pdev->device == HPRE_PCI_DEVICE_ID) {
+ if (qm->pdev->device == PCI_DEVICE_ID_HUAWEI_HPRE_PF) {
ret = hpre_ctrl_debug_init(qm);
if (ret)
goto failed_to_create;
@@ -958,7 +957,7 @@ static int hpre_qm_init(struct hisi_qm *qm, struct pci_dev *pdev)
qm->sqe_size = HPRE_SQE_SIZE;
qm->dev_name = hpre_name;
- qm->fun_type = (pdev->device == HPRE_PCI_DEVICE_ID) ?
+ qm->fun_type = (pdev->device == PCI_DEVICE_ID_HUAWEI_HPRE_PF) ?
QM_HW_PF : QM_HW_VF;
if (qm->fun_type == QM_HW_PF) {
qm->qp_base = HPRE_PF_DEF_Q_BASE;
@@ -1191,6 +1190,12 @@ static struct pci_driver hpre_pci_driver = {
.driver.pm = &hpre_pm_ops,
};
+struct pci_driver *hisi_hpre_get_pf_driver(void)
+{
+ return &hpre_pci_driver;
+}
+EXPORT_SYMBOL_GPL(hisi_hpre_get_pf_driver);
+
static void hpre_register_debugfs(void)
{
if (!debugfs_initialized())
diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
index 453390044181..009132333d2b 100644
--- a/drivers/crypto/hisilicon/qm.c
+++ b/drivers/crypto/hisilicon/qm.c
@@ -15,7 +15,7 @@
#include <linux/uacce.h>
#include <linux/uaccess.h>
#include <uapi/misc/uacce/hisi_qm.h>
-#include "qm.h"
+#include <linux/hisi_acc_qm.h>
/* eq/aeq irq enable */
#define QM_VF_AEQ_INT_SOURCE 0x0
@@ -33,23 +33,6 @@
#define QM_ABNORMAL_EVENT_IRQ_VECTOR 3
/* mailbox */
-#define QM_MB_CMD_SQC 0x0
-#define QM_MB_CMD_CQC 0x1
-#define QM_MB_CMD_EQC 0x2
-#define QM_MB_CMD_AEQC 0x3
-#define QM_MB_CMD_SQC_BT 0x4
-#define QM_MB_CMD_CQC_BT 0x5
-#define QM_MB_CMD_SQC_VFT_V2 0x6
-#define QM_MB_CMD_STOP_QP 0x8
-#define QM_MB_CMD_SRC 0xc
-#define QM_MB_CMD_DST 0xd
-
-#define QM_MB_CMD_SEND_BASE 0x300
-#define QM_MB_EVENT_SHIFT 8
-#define QM_MB_BUSY_SHIFT 13
-#define QM_MB_OP_SHIFT 14
-#define QM_MB_CMD_DATA_ADDR_L 0x304
-#define QM_MB_CMD_DATA_ADDR_H 0x308
#define QM_MB_PING_ALL_VFS 0xffff
#define QM_MB_CMD_DATA_SHIFT 32
#define QM_MB_CMD_DATA_MASK GENMASK(31, 0)
@@ -103,19 +86,12 @@
#define QM_DB_CMD_SHIFT_V1 16
#define QM_DB_INDEX_SHIFT_V1 32
#define QM_DB_PRIORITY_SHIFT_V1 48
-#define QM_DOORBELL_SQ_CQ_BASE_V2 0x1000
-#define QM_DOORBELL_EQ_AEQ_BASE_V2 0x2000
#define QM_QUE_ISO_CFG_V 0x0030
#define QM_PAGE_SIZE 0x0034
#define QM_QUE_ISO_EN 0x100154
#define QM_CAPBILITY 0x100158
#define QM_QP_NUN_MASK GENMASK(10, 0)
#define QM_QP_DB_INTERVAL 0x10000
-#define QM_QP_MAX_NUM_SHIFT 11
-#define QM_DB_CMD_SHIFT_V2 12
-#define QM_DB_RAND_SHIFT_V2 16
-#define QM_DB_INDEX_SHIFT_V2 32
-#define QM_DB_PRIORITY_SHIFT_V2 48
#define QM_MEM_START_INIT 0x100040
#define QM_MEM_INIT_DONE 0x100044
@@ -693,7 +669,7 @@ static void qm_mb_pre_init(struct qm_mailbox *mailbox, u8 cmd,
}
/* return 0 mailbox ready, -ETIMEDOUT hardware timeout */
-static int qm_wait_mb_ready(struct hisi_qm *qm)
+int hisi_qm_wait_mb_ready(struct hisi_qm *qm)
{
u32 val;
@@ -701,6 +677,7 @@ static int qm_wait_mb_ready(struct hisi_qm *qm)
val, !((val >> QM_MB_BUSY_SHIFT) &
0x1), POLL_PERIOD, POLL_TIMEOUT);
}
+EXPORT_SYMBOL_GPL(hisi_qm_wait_mb_ready);
/* 128 bit should be written to hardware at one time to trigger a mailbox */
static void qm_mb_write(struct hisi_qm *qm, const void *src)
@@ -726,14 +703,14 @@ static void qm_mb_write(struct hisi_qm *qm, const void *src)
static int qm_mb_nolock(struct hisi_qm *qm, struct qm_mailbox *mailbox)
{
- if (unlikely(qm_wait_mb_ready(qm))) {
+ if (unlikely(hisi_qm_wait_mb_ready(qm))) {
dev_err(&qm->pdev->dev, "QM mailbox is busy to start!\n");
goto mb_busy;
}
qm_mb_write(qm, mailbox);
- if (unlikely(qm_wait_mb_ready(qm))) {
+ if (unlikely(hisi_qm_wait_mb_ready(qm))) {
dev_err(&qm->pdev->dev, "QM mailbox operation timeout!\n");
goto mb_busy;
}
@@ -745,8 +722,8 @@ mb_busy:
return -EBUSY;
}
-static int qm_mb(struct hisi_qm *qm, u8 cmd, dma_addr_t dma_addr, u16 queue,
- bool op)
+int hisi_qm_mb(struct hisi_qm *qm, u8 cmd, dma_addr_t dma_addr, u16 queue,
+ bool op)
{
struct qm_mailbox mailbox;
int ret;
@@ -762,6 +739,7 @@ static int qm_mb(struct hisi_qm *qm, u8 cmd, dma_addr_t dma_addr, u16 queue,
return ret;
}
+EXPORT_SYMBOL_GPL(hisi_qm_mb);
static void qm_db_v1(struct hisi_qm *qm, u16 qn, u8 cmd, u16 index, u8 priority)
{
@@ -1351,7 +1329,7 @@ static int qm_get_vft_v2(struct hisi_qm *qm, u32 *base, u32 *number)
u64 sqc_vft;
int ret;
- ret = qm_mb(qm, QM_MB_CMD_SQC_VFT_V2, 0, 0, 1);
+ ret = hisi_qm_mb(qm, QM_MB_CMD_SQC_VFT_V2, 0, 0, 1);
if (ret)
return ret;
@@ -1725,12 +1703,12 @@ static int dump_show(struct hisi_qm *qm, void *info,
static int qm_dump_sqc_raw(struct hisi_qm *qm, dma_addr_t dma_addr, u16 qp_id)
{
- return qm_mb(qm, QM_MB_CMD_SQC, dma_addr, qp_id, 1);
+ return hisi_qm_mb(qm, QM_MB_CMD_SQC, dma_addr, qp_id, 1);
}
static int qm_dump_cqc_raw(struct hisi_qm *qm, dma_addr_t dma_addr, u16 qp_id)
{
- return qm_mb(qm, QM_MB_CMD_CQC, dma_addr, qp_id, 1);
+ return hisi_qm_mb(qm, QM_MB_CMD_CQC, dma_addr, qp_id, 1);
}
static int qm_sqc_dump(struct hisi_qm *qm, const char *s)
@@ -1842,7 +1820,7 @@ static int qm_eqc_aeqc_dump(struct hisi_qm *qm, char *s, size_t size,
if (IS_ERR(xeqc))
return PTR_ERR(xeqc);
- ret = qm_mb(qm, cmd, xeqc_dma, 0, 1);
+ ret = hisi_qm_mb(qm, cmd, xeqc_dma, 0, 1);
if (ret)
goto err_free_ctx;
@@ -2495,7 +2473,7 @@ unlock:
static int qm_stop_qp(struct hisi_qp *qp)
{
- return qm_mb(qp->qm, QM_MB_CMD_STOP_QP, 0, qp->qp_id, 0);
+ return hisi_qm_mb(qp->qm, QM_MB_CMD_STOP_QP, 0, qp->qp_id, 0);
}
static int qm_set_msi(struct hisi_qm *qm, bool set)
@@ -2763,7 +2741,7 @@ static int qm_sq_ctx_cfg(struct hisi_qp *qp, int qp_id, u32 pasid)
return -ENOMEM;
}
- ret = qm_mb(qm, QM_MB_CMD_SQC, sqc_dma, qp_id, 0);
+ ret = hisi_qm_mb(qm, QM_MB_CMD_SQC, sqc_dma, qp_id, 0);
dma_unmap_single(dev, sqc_dma, sizeof(struct qm_sqc), DMA_TO_DEVICE);
kfree(sqc);
@@ -2804,7 +2782,7 @@ static int qm_cq_ctx_cfg(struct hisi_qp *qp, int qp_id, u32 pasid)
return -ENOMEM;
}
- ret = qm_mb(qm, QM_MB_CMD_CQC, cqc_dma, qp_id, 0);
+ ret = hisi_qm_mb(qm, QM_MB_CMD_CQC, cqc_dma, qp_id, 0);
dma_unmap_single(dev, cqc_dma, sizeof(struct qm_cqc), DMA_TO_DEVICE);
kfree(cqc);
@@ -3514,6 +3492,12 @@ static void hisi_qm_pci_uninit(struct hisi_qm *qm)
pci_disable_device(pdev);
}
+static void hisi_qm_set_state(struct hisi_qm *qm, u8 state)
+{
+ if (qm->ver > QM_HW_V2 && qm->fun_type == QM_HW_VF)
+ writel(state, qm->io_base + QM_VF_STATE);
+}
+
/**
* hisi_qm_uninit() - Uninitialize qm.
* @qm: The qm needed uninit.
@@ -3542,6 +3526,7 @@ void hisi_qm_uninit(struct hisi_qm *qm)
dma_free_coherent(dev, qm->qdma.size,
qm->qdma.va, qm->qdma.dma);
}
+ hisi_qm_set_state(qm, QM_NOT_READY);
up_write(&qm->qps_lock);
qm_irq_unregister(qm);
@@ -3655,7 +3640,7 @@ static int qm_eq_ctx_cfg(struct hisi_qm *qm)
return -ENOMEM;
}
- ret = qm_mb(qm, QM_MB_CMD_EQC, eqc_dma, 0, 0);
+ ret = hisi_qm_mb(qm, QM_MB_CMD_EQC, eqc_dma, 0, 0);
dma_unmap_single(dev, eqc_dma, sizeof(struct qm_eqc), DMA_TO_DEVICE);
kfree(eqc);
@@ -3684,7 +3669,7 @@ static int qm_aeq_ctx_cfg(struct hisi_qm *qm)
return -ENOMEM;
}
- ret = qm_mb(qm, QM_MB_CMD_AEQC, aeqc_dma, 0, 0);
+ ret = hisi_qm_mb(qm, QM_MB_CMD_AEQC, aeqc_dma, 0, 0);
dma_unmap_single(dev, aeqc_dma, sizeof(struct qm_aeqc), DMA_TO_DEVICE);
kfree(aeqc);
@@ -3723,11 +3708,11 @@ static int __hisi_qm_start(struct hisi_qm *qm)
if (ret)
return ret;
- ret = qm_mb(qm, QM_MB_CMD_SQC_BT, qm->sqc_dma, 0, 0);
+ ret = hisi_qm_mb(qm, QM_MB_CMD_SQC_BT, qm->sqc_dma, 0, 0);
if (ret)
return ret;
- ret = qm_mb(qm, QM_MB_CMD_CQC_BT, qm->cqc_dma, 0, 0);
+ ret = hisi_qm_mb(qm, QM_MB_CMD_CQC_BT, qm->cqc_dma, 0, 0);
if (ret)
return ret;
@@ -3767,6 +3752,7 @@ int hisi_qm_start(struct hisi_qm *qm)
if (!ret)
atomic_set(&qm->status.flags, QM_START);
+ hisi_qm_set_state(qm, QM_READY);
err_unlock:
up_write(&qm->qps_lock);
return ret;
diff --git a/drivers/crypto/hisilicon/qm.h b/drivers/crypto/hisilicon/qm.h
deleted file mode 100644
index 3068093229a5..000000000000
--- a/drivers/crypto/hisilicon/qm.h
+++ /dev/null
@@ -1,441 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (c) 2019 HiSilicon Limited. */
-#ifndef HISI_ACC_QM_H
-#define HISI_ACC_QM_H
-
-#include <linux/bitfield.h>
-#include <linux/debugfs.h>
-#include <linux/iopoll.h>
-#include <linux/module.h>
-#include <linux/pci.h>
-
-#define QM_QNUM_V1 4096
-#define QM_QNUM_V2 1024
-#define QM_MAX_VFS_NUM_V2 63
-
-/* qm user domain */
-#define QM_ARUSER_M_CFG_1 0x100088
-#define AXUSER_SNOOP_ENABLE BIT(30)
-#define AXUSER_CMD_TYPE GENMASK(14, 12)
-#define AXUSER_CMD_SMMU_NORMAL 1
-#define AXUSER_NS BIT(6)
-#define AXUSER_NO BIT(5)
-#define AXUSER_FP BIT(4)
-#define AXUSER_SSV BIT(0)
-#define AXUSER_BASE (AXUSER_SNOOP_ENABLE | \
- FIELD_PREP(AXUSER_CMD_TYPE, \
- AXUSER_CMD_SMMU_NORMAL) | \
- AXUSER_NS | AXUSER_NO | AXUSER_FP)
-#define QM_ARUSER_M_CFG_ENABLE 0x100090
-#define ARUSER_M_CFG_ENABLE 0xfffffffe
-#define QM_AWUSER_M_CFG_1 0x100098
-#define QM_AWUSER_M_CFG_ENABLE 0x1000a0
-#define AWUSER_M_CFG_ENABLE 0xfffffffe
-#define QM_WUSER_M_CFG_ENABLE 0x1000a8
-#define WUSER_M_CFG_ENABLE 0xffffffff
-
-/* qm cache */
-#define QM_CACHE_CTL 0x100050
-#define SQC_CACHE_ENABLE BIT(0)
-#define CQC_CACHE_ENABLE BIT(1)
-#define SQC_CACHE_WB_ENABLE BIT(4)
-#define SQC_CACHE_WB_THRD GENMASK(10, 5)
-#define CQC_CACHE_WB_ENABLE BIT(11)
-#define CQC_CACHE_WB_THRD GENMASK(17, 12)
-#define QM_AXI_M_CFG 0x1000ac
-#define AXI_M_CFG 0xffff
-#define QM_AXI_M_CFG_ENABLE 0x1000b0
-#define AM_CFG_SINGLE_PORT_MAX_TRANS 0x300014
-#define AXI_M_CFG_ENABLE 0xffffffff
-#define QM_PEH_AXUSER_CFG 0x1000cc
-#define QM_PEH_AXUSER_CFG_ENABLE 0x1000d0
-#define PEH_AXUSER_CFG 0x401001
-#define PEH_AXUSER_CFG_ENABLE 0xffffffff
-
-#define QM_AXI_RRESP BIT(0)
-#define QM_AXI_BRESP BIT(1)
-#define QM_ECC_MBIT BIT(2)
-#define QM_ECC_1BIT BIT(3)
-#define QM_ACC_GET_TASK_TIMEOUT BIT(4)
-#define QM_ACC_DO_TASK_TIMEOUT BIT(5)
-#define QM_ACC_WB_NOT_READY_TIMEOUT BIT(6)
-#define QM_SQ_CQ_VF_INVALID BIT(7)
-#define QM_CQ_VF_INVALID BIT(8)
-#define QM_SQ_VF_INVALID BIT(9)
-#define QM_DB_TIMEOUT BIT(10)
-#define QM_OF_FIFO_OF BIT(11)
-#define QM_DB_RANDOM_INVALID BIT(12)
-#define QM_MAILBOX_TIMEOUT BIT(13)
-#define QM_FLR_TIMEOUT BIT(14)
-
-#define QM_BASE_NFE (QM_AXI_RRESP | QM_AXI_BRESP | QM_ECC_MBIT | \
- QM_ACC_GET_TASK_TIMEOUT | QM_DB_TIMEOUT | \
- QM_OF_FIFO_OF | QM_DB_RANDOM_INVALID | \
- QM_MAILBOX_TIMEOUT | QM_FLR_TIMEOUT)
-#define QM_BASE_CE QM_ECC_1BIT
-
-#define QM_Q_DEPTH 1024
-#define QM_MIN_QNUM 2
-#define HISI_ACC_SGL_SGE_NR_MAX 255
-#define QM_SHAPER_CFG 0x100164
-#define QM_SHAPER_ENABLE BIT(30)
-#define QM_SHAPER_TYPE1_OFFSET 10
-
-/* page number for queue file region */
-#define QM_DOORBELL_PAGE_NR 1
-
-/* uacce mode of the driver */
-#define UACCE_MODE_NOUACCE 0 /* don't use uacce */
-#define UACCE_MODE_SVA 1 /* use uacce sva mode */
-#define UACCE_MODE_DESC "0(default) means only register to crypto, 1 means both register to crypto and uacce"
-
-enum qm_stop_reason {
- QM_NORMAL,
- QM_SOFT_RESET,
- QM_FLR,
-};
-
-enum qm_state {
- QM_INIT = 0,
- QM_START,
- QM_CLOSE,
- QM_STOP,
-};
-
-enum qp_state {
- QP_INIT = 1,
- QP_START,
- QP_STOP,
- QP_CLOSE,
-};
-
-enum qm_hw_ver {
- QM_HW_UNKNOWN = -1,
- QM_HW_V1 = 0x20,
- QM_HW_V2 = 0x21,
- QM_HW_V3 = 0x30,
-};
-
-enum qm_fun_type {
- QM_HW_PF,
- QM_HW_VF,
-};
-
-enum qm_debug_file {
- CURRENT_QM,
- CURRENT_Q,
- CLEAR_ENABLE,
- DEBUG_FILE_NUM,
-};
-
-struct qm_dfx {
- atomic64_t err_irq_cnt;
- atomic64_t aeq_irq_cnt;
- atomic64_t abnormal_irq_cnt;
- atomic64_t create_qp_err_cnt;
- atomic64_t mb_err_cnt;
-};
-
-struct debugfs_file {
- enum qm_debug_file index;
- struct mutex lock;
- struct qm_debug *debug;
-};
-
-struct qm_debug {
- u32 curr_qm_qp_num;
- u32 sqe_mask_offset;
- u32 sqe_mask_len;
- struct qm_dfx dfx;
- struct dentry *debug_root;
- struct dentry *qm_d;
- struct debugfs_file files[DEBUG_FILE_NUM];
-};
-
-struct qm_shaper_factor {
- u32 func_qos;
- u64 cir_b;
- u64 cir_u;
- u64 cir_s;
- u64 cbs_s;
-};
-
-struct qm_dma {
- void *va;
- dma_addr_t dma;
- size_t size;
-};
-
-struct hisi_qm_status {
- u32 eq_head;
- bool eqc_phase;
- u32 aeq_head;
- bool aeqc_phase;
- atomic_t flags;
- int stop_reason;
-};
-
-struct hisi_qm;
-
-struct hisi_qm_err_info {
- char *acpi_rst;
- u32 msi_wr_port;
- u32 ecc_2bits_mask;
- u32 dev_ce_mask;
- u32 ce;
- u32 nfe;
- u32 fe;
-};
-
-struct hisi_qm_err_status {
- u32 is_qm_ecc_mbit;
- u32 is_dev_ecc_mbit;
-};
-
-struct hisi_qm_err_ini {
- int (*hw_init)(struct hisi_qm *qm);
- void (*hw_err_enable)(struct hisi_qm *qm);
- void (*hw_err_disable)(struct hisi_qm *qm);
- u32 (*get_dev_hw_err_status)(struct hisi_qm *qm);
- void (*clear_dev_hw_err_status)(struct hisi_qm *qm, u32 err_sts);
- void (*open_axi_master_ooo)(struct hisi_qm *qm);
- void (*close_axi_master_ooo)(struct hisi_qm *qm);
- void (*open_sva_prefetch)(struct hisi_qm *qm);
- void (*close_sva_prefetch)(struct hisi_qm *qm);
- void (*log_dev_hw_err)(struct hisi_qm *qm, u32 err_sts);
- void (*err_info_init)(struct hisi_qm *qm);
-};
-
-struct hisi_qm_list {
- struct mutex lock;
- struct list_head list;
- int (*register_to_crypto)(struct hisi_qm *qm);
- void (*unregister_from_crypto)(struct hisi_qm *qm);
-};
-
-struct hisi_qm {
- enum qm_hw_ver ver;
- enum qm_fun_type fun_type;
- const char *dev_name;
- struct pci_dev *pdev;
- void __iomem *io_base;
- void __iomem *db_io_base;
- u32 sqe_size;
- u32 qp_base;
- u32 qp_num;
- u32 qp_in_used;
- u32 ctrl_qp_num;
- u32 max_qp_num;
- u32 vfs_num;
- u32 db_interval;
- struct list_head list;
- struct hisi_qm_list *qm_list;
-
- struct qm_dma qdma;
- struct qm_sqc *sqc;
- struct qm_cqc *cqc;
- struct qm_eqe *eqe;
- struct qm_aeqe *aeqe;
- dma_addr_t sqc_dma;
- dma_addr_t cqc_dma;
- dma_addr_t eqe_dma;
- dma_addr_t aeqe_dma;
-
- struct hisi_qm_status status;
- const struct hisi_qm_err_ini *err_ini;
- struct hisi_qm_err_info err_info;
- struct hisi_qm_err_status err_status;
- unsigned long misc_ctl; /* driver removing and reset sched */
-
- struct rw_semaphore qps_lock;
- struct idr qp_idr;
- struct hisi_qp *qp_array;
-
- struct mutex mailbox_lock;
-
- const struct hisi_qm_hw_ops *ops;
-
- struct qm_debug debug;
-
- u32 error_mask;
-
- struct workqueue_struct *wq;
- struct work_struct work;
- struct work_struct rst_work;
- struct work_struct cmd_process;
-
- const char *algs;
- bool use_sva;
- bool is_frozen;
-
- /* doorbell isolation enable */
- bool use_db_isolation;
- resource_size_t phys_base;
- resource_size_t db_phys_base;
- struct uacce_device *uacce;
- int mode;
- struct qm_shaper_factor *factor;
- u32 mb_qos;
- u32 type_rate;
-};
-
-struct hisi_qp_status {
- atomic_t used;
- u16 sq_tail;
- u16 cq_head;
- bool cqc_phase;
- atomic_t flags;
-};
-
-struct hisi_qp_ops {
- int (*fill_sqe)(void *sqe, void *q_parm, void *d_parm);
-};
-
-struct hisi_qp {
- u32 qp_id;
- u8 alg_type;
- u8 req_type;
-
- struct qm_dma qdma;
- void *sqe;
- struct qm_cqe *cqe;
- dma_addr_t sqe_dma;
- dma_addr_t cqe_dma;
-
- struct hisi_qp_status qp_status;
- struct hisi_qp_ops *hw_ops;
- void *qp_ctx;
- void (*req_cb)(struct hisi_qp *qp, void *data);
- void (*event_cb)(struct hisi_qp *qp);
-
- struct hisi_qm *qm;
- bool is_resetting;
- bool is_in_kernel;
- u16 pasid;
- struct uacce_queue *uacce_q;
-};
-
-static inline int q_num_set(const char *val, const struct kernel_param *kp,
- unsigned int device)
-{
- struct pci_dev *pdev = pci_get_device(PCI_VENDOR_ID_HUAWEI,
- device, NULL);
- u32 n, q_num;
- int ret;
-
- if (!val)
- return -EINVAL;
-
- if (!pdev) {
- q_num = min_t(u32, QM_QNUM_V1, QM_QNUM_V2);
- pr_info("No device found currently, suppose queue number is %u\n",
- q_num);
- } else {
- if (pdev->revision == QM_HW_V1)
- q_num = QM_QNUM_V1;
- else
- q_num = QM_QNUM_V2;
- }
-
- ret = kstrtou32(val, 10, &n);
- if (ret || n < QM_MIN_QNUM || n > q_num)
- return -EINVAL;
-
- return param_set_int(val, kp);
-}
-
-static inline int vfs_num_set(const char *val, const struct kernel_param *kp)
-{
- u32 n;
- int ret;
-
- if (!val)
- return -EINVAL;
-
- ret = kstrtou32(val, 10, &n);
- if (ret < 0)
- return ret;
-
- if (n > QM_MAX_VFS_NUM_V2)
- return -EINVAL;
-
- return param_set_int(val, kp);
-}
-
-static inline int mode_set(const char *val, const struct kernel_param *kp)
-{
- u32 n;
- int ret;
-
- if (!val)
- return -EINVAL;
-
- ret = kstrtou32(val, 10, &n);
- if (ret != 0 || (n != UACCE_MODE_SVA &&
- n != UACCE_MODE_NOUACCE))
- return -EINVAL;
-
- return param_set_int(val, kp);
-}
-
-static inline int uacce_mode_set(const char *val, const struct kernel_param *kp)
-{
- return mode_set(val, kp);
-}
-
-static inline void hisi_qm_init_list(struct hisi_qm_list *qm_list)
-{
- INIT_LIST_HEAD(&qm_list->list);
- mutex_init(&qm_list->lock);
-}
-
-int hisi_qm_init(struct hisi_qm *qm);
-void hisi_qm_uninit(struct hisi_qm *qm);
-int hisi_qm_start(struct hisi_qm *qm);
-int hisi_qm_stop(struct hisi_qm *qm, enum qm_stop_reason r);
-struct hisi_qp *hisi_qm_create_qp(struct hisi_qm *qm, u8 alg_type);
-int hisi_qm_start_qp(struct hisi_qp *qp, unsigned long arg);
-int hisi_qm_stop_qp(struct hisi_qp *qp);
-void hisi_qm_release_qp(struct hisi_qp *qp);
-int hisi_qp_send(struct hisi_qp *qp, const void *msg);
-int hisi_qm_get_free_qp_num(struct hisi_qm *qm);
-int hisi_qm_get_vft(struct hisi_qm *qm, u32 *base, u32 *number);
-void hisi_qm_debug_init(struct hisi_qm *qm);
-enum qm_hw_ver hisi_qm_get_hw_version(struct pci_dev *pdev);
-void hisi_qm_debug_regs_clear(struct hisi_qm *qm);
-int hisi_qm_sriov_enable(struct pci_dev *pdev, int max_vfs);
-int hisi_qm_sriov_disable(struct pci_dev *pdev, bool is_frozen);
-int hisi_qm_sriov_configure(struct pci_dev *pdev, int num_vfs);
-void hisi_qm_dev_err_init(struct hisi_qm *qm);
-void hisi_qm_dev_err_uninit(struct hisi_qm *qm);
-pci_ers_result_t hisi_qm_dev_err_detected(struct pci_dev *pdev,
- pci_channel_state_t state);
-pci_ers_result_t hisi_qm_dev_slot_reset(struct pci_dev *pdev);
-void hisi_qm_reset_prepare(struct pci_dev *pdev);
-void hisi_qm_reset_done(struct pci_dev *pdev);
-
-struct hisi_acc_sgl_pool;
-struct hisi_acc_hw_sgl *hisi_acc_sg_buf_map_to_hw_sgl(struct device *dev,
- struct scatterlist *sgl, struct hisi_acc_sgl_pool *pool,
- u32 index, dma_addr_t *hw_sgl_dma);
-void hisi_acc_sg_buf_unmap(struct device *dev, struct scatterlist *sgl,
- struct hisi_acc_hw_sgl *hw_sgl);
-struct hisi_acc_sgl_pool *hisi_acc_create_sgl_pool(struct device *dev,
- u32 count, u32 sge_nr);
-void hisi_acc_free_sgl_pool(struct device *dev,
- struct hisi_acc_sgl_pool *pool);
-int hisi_qm_alloc_qps_node(struct hisi_qm_list *qm_list, int qp_num,
- u8 alg_type, int node, struct hisi_qp **qps);
-void hisi_qm_free_qps(struct hisi_qp **qps, int qp_num);
-void hisi_qm_dev_shutdown(struct pci_dev *pdev);
-void hisi_qm_wait_task_finish(struct hisi_qm *qm, struct hisi_qm_list *qm_list);
-int hisi_qm_alg_register(struct hisi_qm *qm, struct hisi_qm_list *qm_list);
-void hisi_qm_alg_unregister(struct hisi_qm *qm, struct hisi_qm_list *qm_list);
-int hisi_qm_resume(struct device *dev);
-int hisi_qm_suspend(struct device *dev);
-void hisi_qm_pm_uninit(struct hisi_qm *qm);
-void hisi_qm_pm_init(struct hisi_qm *qm);
-int hisi_qm_get_dfx_access(struct hisi_qm *qm);
-void hisi_qm_put_dfx_access(struct hisi_qm *qm);
-void hisi_qm_regs_dump(struct seq_file *s, struct debugfs_regset32 *regset);
-#endif
diff --git a/drivers/crypto/hisilicon/sec2/sec.h b/drivers/crypto/hisilicon/sec2/sec.h
index d97cf02b1df7..c2e9b01187a7 100644
--- a/drivers/crypto/hisilicon/sec2/sec.h
+++ b/drivers/crypto/hisilicon/sec2/sec.h
@@ -4,7 +4,7 @@
#ifndef __HISI_SEC_V2_H
#define __HISI_SEC_V2_H
-#include "../qm.h"
+#include <linux/hisi_acc_qm.h>
#include "sec_crypto.h"
/* Algorithm resource per hardware SEC queue */
diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c
index 0b9906ff69e3..92fae706bdb2 100644
--- a/drivers/crypto/hisilicon/sec2/sec_main.c
+++ b/drivers/crypto/hisilicon/sec2/sec_main.c
@@ -20,8 +20,7 @@
#define SEC_VF_NUM 63
#define SEC_QUEUE_NUM_V1 4096
-#define SEC_PF_PCI_DEVICE_ID 0xa255
-#define SEC_VF_PCI_DEVICE_ID 0xa256
+#define PCI_DEVICE_ID_HUAWEI_SEC_PF 0xa255
#define SEC_BD_ERR_CHK_EN0 0xEFFFFFFF
#define SEC_BD_ERR_CHK_EN1 0x7ffff7fd
@@ -229,7 +228,7 @@ static const struct debugfs_reg32 sec_dfx_regs[] = {
static int sec_pf_q_num_set(const char *val, const struct kernel_param *kp)
{
- return q_num_set(val, kp, SEC_PF_PCI_DEVICE_ID);
+ return q_num_set(val, kp, PCI_DEVICE_ID_HUAWEI_SEC_PF);
}
static const struct kernel_param_ops sec_pf_q_num_ops = {
@@ -317,8 +316,8 @@ module_param_cb(uacce_mode, &sec_uacce_mode_ops, &uacce_mode, 0444);
MODULE_PARM_DESC(uacce_mode, UACCE_MODE_DESC);
static const struct pci_device_id sec_dev_ids[] = {
- { PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, SEC_PF_PCI_DEVICE_ID) },
- { PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, SEC_VF_PCI_DEVICE_ID) },
+ { PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, PCI_DEVICE_ID_HUAWEI_SEC_PF) },
+ { PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, PCI_DEVICE_ID_HUAWEI_SEC_VF) },
{ 0, }
};
MODULE_DEVICE_TABLE(pci, sec_dev_ids);
@@ -748,7 +747,7 @@ static int sec_core_debug_init(struct hisi_qm *qm)
regset->base = qm->io_base;
regset->dev = dev;
- if (qm->pdev->device == SEC_PF_PCI_DEVICE_ID)
+ if (qm->pdev->device == PCI_DEVICE_ID_HUAWEI_SEC_PF)
debugfs_create_file("regs", 0444, tmp_d, regset, &sec_regs_fops);
for (i = 0; i < ARRAY_SIZE(sec_dfx_labels); i++) {
@@ -766,7 +765,7 @@ static int sec_debug_init(struct hisi_qm *qm)
struct sec_dev *sec = container_of(qm, struct sec_dev, qm);
int i;
- if (qm->pdev->device == SEC_PF_PCI_DEVICE_ID) {
+ if (qm->pdev->device == PCI_DEVICE_ID_HUAWEI_SEC_PF) {
for (i = SEC_CLEAR_ENABLE; i < SEC_DEBUG_FILE_NUM; i++) {
spin_lock_init(&sec->debug.files[i].lock);
sec->debug.files[i].index = i;
@@ -908,7 +907,7 @@ static int sec_qm_init(struct hisi_qm *qm, struct pci_dev *pdev)
qm->sqe_size = SEC_SQE_SIZE;
qm->dev_name = sec_name;
- qm->fun_type = (pdev->device == SEC_PF_PCI_DEVICE_ID) ?
+ qm->fun_type = (pdev->device == PCI_DEVICE_ID_HUAWEI_SEC_PF) ?
QM_HW_PF : QM_HW_VF;
if (qm->fun_type == QM_HW_PF) {
qm->qp_base = SEC_PF_DEF_Q_BASE;
@@ -1120,6 +1119,12 @@ static struct pci_driver sec_pci_driver = {
.driver.pm = &sec_pm_ops,
};
+struct pci_driver *hisi_sec_get_pf_driver(void)
+{
+ return &sec_pci_driver;
+}
+EXPORT_SYMBOL_GPL(hisi_sec_get_pf_driver);
+
static void sec_register_debugfs(void)
{
if (!debugfs_initialized())
diff --git a/drivers/crypto/hisilicon/sgl.c b/drivers/crypto/hisilicon/sgl.c
index 057273769f26..f7efc02b065f 100644
--- a/drivers/crypto/hisilicon/sgl.c
+++ b/drivers/crypto/hisilicon/sgl.c
@@ -1,9 +1,9 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2019 HiSilicon Limited. */
#include <linux/dma-mapping.h>
+#include <linux/hisi_acc_qm.h>
#include <linux/module.h>
#include <linux/slab.h>
-#include "qm.h"
#define HISI_ACC_SGL_SGE_NR_MIN 1
#define HISI_ACC_SGL_NR_MAX 256
diff --git a/drivers/crypto/hisilicon/zip/zip.h b/drivers/crypto/hisilicon/zip/zip.h
index 517fdbdff3ea..3dfd3bac5a33 100644
--- a/drivers/crypto/hisilicon/zip/zip.h
+++ b/drivers/crypto/hisilicon/zip/zip.h
@@ -7,7 +7,7 @@
#define pr_fmt(fmt) "hisi_zip: " fmt
#include <linux/list.h>
-#include "../qm.h"
+#include <linux/hisi_acc_qm.h>
enum hisi_zip_error_type {
/* negative compression */
diff --git a/drivers/crypto/hisilicon/zip/zip_main.c b/drivers/crypto/hisilicon/zip/zip_main.c
index 678f8b58ec42..4534e1e107d1 100644
--- a/drivers/crypto/hisilicon/zip/zip_main.c
+++ b/drivers/crypto/hisilicon/zip/zip_main.c
@@ -15,8 +15,7 @@
#include <linux/uacce.h>
#include "zip.h"
-#define PCI_DEVICE_ID_ZIP_PF 0xa250
-#define PCI_DEVICE_ID_ZIP_VF 0xa251
+#define PCI_DEVICE_ID_HUAWEI_ZIP_PF 0xa250
#define HZIP_QUEUE_NUM_V1 4096
@@ -246,7 +245,7 @@ MODULE_PARM_DESC(uacce_mode, UACCE_MODE_DESC);
static int pf_q_num_set(const char *val, const struct kernel_param *kp)
{
- return q_num_set(val, kp, PCI_DEVICE_ID_ZIP_PF);
+ return q_num_set(val, kp, PCI_DEVICE_ID_HUAWEI_ZIP_PF);
}
static const struct kernel_param_ops pf_q_num_ops = {
@@ -268,8 +267,8 @@ module_param_cb(vfs_num, &vfs_num_ops, &vfs_num, 0444);
MODULE_PARM_DESC(vfs_num, "Number of VFs to enable(1-63), 0(default)");
static const struct pci_device_id hisi_zip_dev_ids[] = {
- { PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, PCI_DEVICE_ID_ZIP_PF) },
- { PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, PCI_DEVICE_ID_ZIP_VF) },
+ { PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, PCI_DEVICE_ID_HUAWEI_ZIP_PF) },
+ { PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, PCI_DEVICE_ID_HUAWEI_ZIP_VF) },
{ 0, }
};
MODULE_DEVICE_TABLE(pci, hisi_zip_dev_ids);
@@ -838,7 +837,7 @@ static int hisi_zip_qm_init(struct hisi_qm *qm, struct pci_dev *pdev)
qm->sqe_size = HZIP_SQE_SIZE;
qm->dev_name = hisi_zip_name;
- qm->fun_type = (pdev->device == PCI_DEVICE_ID_ZIP_PF) ?
+ qm->fun_type = (pdev->device == PCI_DEVICE_ID_HUAWEI_ZIP_PF) ?
QM_HW_PF : QM_HW_VF;
if (qm->fun_type == QM_HW_PF) {
qm->qp_base = HZIP_PF_DEF_Q_BASE;
@@ -1013,6 +1012,12 @@ static struct pci_driver hisi_zip_pci_driver = {
.driver.pm = &hisi_zip_pm_ops,
};
+struct pci_driver *hisi_zip_get_pf_driver(void)
+{
+ return &hisi_zip_pci_driver;
+}
+EXPORT_SYMBOL_GPL(hisi_zip_get_pf_driver);
+
static void hisi_zip_register_debugfs(void)
{
if (!debugfs_initialized())
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 3eacd8739929..fc19a0762af2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -478,6 +478,11 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
case MLX5_CMD_OP_QUERY_VHCA_STATE:
case MLX5_CMD_OP_MODIFY_VHCA_STATE:
case MLX5_CMD_OP_ALLOC_SF:
+ case MLX5_CMD_OP_SUSPEND_VHCA:
+ case MLX5_CMD_OP_RESUME_VHCA:
+ case MLX5_CMD_OP_QUERY_VHCA_MIGRATION_STATE:
+ case MLX5_CMD_OP_SAVE_VHCA_STATE:
+ case MLX5_CMD_OP_LOAD_VHCA_STATE:
*status = MLX5_DRIVER_STATUS_ABORTED;
*synd = MLX5_DRIVER_SYND;
return -EIO;
@@ -675,6 +680,11 @@ const char *mlx5_command_str(int command)
MLX5_COMMAND_STR_CASE(MODIFY_VHCA_STATE);
MLX5_COMMAND_STR_CASE(ALLOC_SF);
MLX5_COMMAND_STR_CASE(DEALLOC_SF);
+ MLX5_COMMAND_STR_CASE(SUSPEND_VHCA);
+ MLX5_COMMAND_STR_CASE(RESUME_VHCA);
+ MLX5_COMMAND_STR_CASE(QUERY_VHCA_MIGRATION_STATE);
+ MLX5_COMMAND_STR_CASE(SAVE_VHCA_STATE);
+ MLX5_COMMAND_STR_CASE(LOAD_VHCA_STATE);
default: return "unknown command opcode";
}
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index bba72b220cc3..976578d1904c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1620,6 +1620,7 @@ static void remove_one(struct pci_dev *pdev)
struct devlink *devlink = priv_to_devlink(dev);
devlink_unregister(devlink);
+ mlx5_sriov_disable(pdev);
mlx5_crdump_disable(dev);
mlx5_drain_health_wq(dev);
mlx5_uninit_one(dev);
@@ -1882,6 +1883,50 @@ static struct pci_driver mlx5_core_driver = {
.sriov_set_msix_vec_count = mlx5_core_sriov_set_msix_vec_count,
};
+/**
+ * mlx5_vf_get_core_dev - Get the mlx5 core device from a given VF PCI device if
+ * mlx5_core is its driver.
+ * @pdev: The associated PCI device.
+ *
+ * Upon return the interface state lock stay held to let caller uses it safely.
+ * Caller must ensure to use the returned mlx5 device for a narrow window
+ * and put it back with mlx5_vf_put_core_dev() immediately once usage was over.
+ *
+ * Return: Pointer to the associated mlx5_core_dev or NULL.
+ */
+struct mlx5_core_dev *mlx5_vf_get_core_dev(struct pci_dev *pdev)
+ __acquires(&mdev->intf_state_mutex)
+{
+ struct mlx5_core_dev *mdev;
+
+ mdev = pci_iov_get_pf_drvdata(pdev, &mlx5_core_driver);
+ if (IS_ERR(mdev))
+ return NULL;
+
+ mutex_lock(&mdev->intf_state_mutex);
+ if (!test_bit(MLX5_INTERFACE_STATE_UP, &mdev->intf_state)) {
+ mutex_unlock(&mdev->intf_state_mutex);
+ return NULL;
+ }
+
+ return mdev;
+}
+EXPORT_SYMBOL(mlx5_vf_get_core_dev);
+
+/**
+ * mlx5_vf_put_core_dev - Put the mlx5 core device back.
+ * @mdev: The mlx5 core device.
+ *
+ * Upon return the interface state lock is unlocked and caller should not
+ * access the mdev any more.
+ */
+void mlx5_vf_put_core_dev(struct mlx5_core_dev *mdev)
+ __releases(&mdev->intf_state_mutex)
+{
+ mutex_unlock(&mdev->intf_state_mutex);
+}
+EXPORT_SYMBOL(mlx5_vf_put_core_dev);
+
static void mlx5_core_verify_params(void)
{
if (prof_sel >= ARRAY_SIZE(profile)) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index 6f8baa0f2a73..37b2805b3bf3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -164,6 +164,7 @@ void mlx5_sriov_cleanup(struct mlx5_core_dev *dev);
int mlx5_sriov_attach(struct mlx5_core_dev *dev);
void mlx5_sriov_detach(struct mlx5_core_dev *dev);
int mlx5_core_sriov_configure(struct pci_dev *dev, int num_vfs);
+void mlx5_sriov_disable(struct pci_dev *pdev);
int mlx5_core_sriov_set_msix_vec_count(struct pci_dev *vf, int msix_vec_count);
int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id);
int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
index e8185b69ac6c..887ee0f729d1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
@@ -161,7 +161,7 @@ static int mlx5_sriov_enable(struct pci_dev *pdev, int num_vfs)
return err;
}
-static void mlx5_sriov_disable(struct pci_dev *pdev)
+void mlx5_sriov_disable(struct pci_dev *pdev)
{
struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
int num_vfs = pci_num_vf(dev->pdev);
@@ -205,19 +205,8 @@ int mlx5_core_sriov_set_msix_vec_count(struct pci_dev *vf, int msix_vec_count)
mlx5_get_default_msix_vec_count(dev, pci_num_vf(pf));
sriov = &dev->priv.sriov;
-
- /* Reversed translation of PCI VF function number to the internal
- * function_id, which exists in the name of virtfn symlink.
- */
- for (id = 0; id < pci_num_vf(pf); id++) {
- if (!sriov->vfs_ctx[id].enabled)
- continue;
-
- if (vf->devfn == pci_iov_virtfn_devfn(pf, id))
- break;
- }
-
- if (id == pci_num_vf(pf) || !sriov->vfs_ctx[id].enabled)
+ id = pci_iov_vf_id(vf);
+ if (id < 0 || !sriov->vfs_ctx[id].enabled)
return -EINVAL;
return mlx5_set_msix_vec_count(dev, id + 1, msix_vec_count);
diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
index 0267977c9f17..952217572113 100644
--- a/drivers/pci/iov.c
+++ b/drivers/pci/iov.c
@@ -33,6 +33,49 @@ int pci_iov_virtfn_devfn(struct pci_dev *dev, int vf_id)
}
EXPORT_SYMBOL_GPL(pci_iov_virtfn_devfn);
+int pci_iov_vf_id(struct pci_dev *dev)
+{
+ struct pci_dev *pf;
+
+ if (!dev->is_virtfn)
+ return -EINVAL;
+
+ pf = pci_physfn(dev);
+ return (((dev->bus->number << 8) + dev->devfn) -
+ ((pf->bus->number << 8) + pf->devfn + pf->sriov->offset)) /
+ pf->sriov->stride;
+}
+EXPORT_SYMBOL_GPL(pci_iov_vf_id);
+
+/**
+ * pci_iov_get_pf_drvdata - Return the drvdata of a PF
+ * @dev: VF pci_dev
+ * @pf_driver: Device driver required to own the PF
+ *
+ * This must be called from a context that ensures that a VF driver is attached.
+ * The value returned is invalid once the VF driver completes its remove()
+ * callback.
+ *
+ * Locking is achieved by the driver core. A VF driver cannot be probed until
+ * pci_enable_sriov() is called and pci_disable_sriov() does not return until
+ * all VF drivers have completed their remove().
+ *
+ * The PF driver must call pci_disable_sriov() before it begins to destroy the
+ * drvdata.
+ */
+void *pci_iov_get_pf_drvdata(struct pci_dev *dev, struct pci_driver *pf_driver)
+{
+ struct pci_dev *pf_dev;
+
+ if (!dev->is_virtfn)
+ return ERR_PTR(-EINVAL);
+ pf_dev = dev->physfn;
+ if (pf_dev->driver != pf_driver)
+ return ERR_PTR(-EINVAL);
+ return pci_get_drvdata(pf_dev);
+}
+EXPORT_SYMBOL_GPL(pci_iov_get_pf_drvdata);
+
/*
* Per SR-IOV spec sec 3.3.10 and 3.3.11, First VF Offset and VF Stride may
* change when NumVFs changes.
diff --git a/drivers/vfio/pci/Kconfig b/drivers/vfio/pci/Kconfig
index 860424ccda1b..4da1914425e1 100644
--- a/drivers/vfio/pci/Kconfig
+++ b/drivers/vfio/pci/Kconfig
@@ -43,4 +43,9 @@ config VFIO_PCI_IGD
To enable Intel IGD assignment through vfio-pci, say Y.
endif
+
+source "drivers/vfio/pci/mlx5/Kconfig"
+
+source "drivers/vfio/pci/hisilicon/Kconfig"
+
endif
diff --git a/drivers/vfio/pci/Makefile b/drivers/vfio/pci/Makefile
index 349d68d242b4..7052ebd893e0 100644
--- a/drivers/vfio/pci/Makefile
+++ b/drivers/vfio/pci/Makefile
@@ -7,3 +7,7 @@ obj-$(CONFIG_VFIO_PCI_CORE) += vfio-pci-core.o
vfio-pci-y := vfio_pci.o
vfio-pci-$(CONFIG_VFIO_PCI_IGD) += vfio_pci_igd.o
obj-$(CONFIG_VFIO_PCI) += vfio-pci.o
+
+obj-$(CONFIG_MLX5_VFIO_PCI) += mlx5/
+
+obj-$(CONFIG_HISI_ACC_VFIO_PCI) += hisilicon/
diff --git a/drivers/vfio/pci/hisilicon/Kconfig b/drivers/vfio/pci/hisilicon/Kconfig
new file mode 100644
index 000000000000..5daa0f45d2f9
--- /dev/null
+++ b/drivers/vfio/pci/hisilicon/Kconfig
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config HISI_ACC_VFIO_PCI
+ tristate "VFIO PCI support for HiSilicon ACC devices"
+ depends on ARM64 || (COMPILE_TEST && 64BIT)
+ depends on VFIO_PCI_CORE
+ depends on PCI_MSI
+ depends on CRYPTO_DEV_HISI_QM
+ depends on CRYPTO_DEV_HISI_HPRE
+ depends on CRYPTO_DEV_HISI_SEC2
+ depends on CRYPTO_DEV_HISI_ZIP
+ help
+ This provides generic PCI support for HiSilicon ACC devices
+ using the VFIO framework.
+
+ If you don't know what to do here, say N.
diff --git a/drivers/vfio/pci/hisilicon/Makefile b/drivers/vfio/pci/hisilicon/Makefile
new file mode 100644
index 000000000000..c66b3783f2f9
--- /dev/null
+++ b/drivers/vfio/pci/hisilicon/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_HISI_ACC_VFIO_PCI) += hisi-acc-vfio-pci.o
+hisi-acc-vfio-pci-y := hisi_acc_vfio_pci.o
+
diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
new file mode 100644
index 000000000000..767b5d47631a
--- /dev/null
+++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
@@ -0,0 +1,1326 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021, HiSilicon Ltd.
+ */
+
+#include <linux/device.h>
+#include <linux/eventfd.h>
+#include <linux/file.h>
+#include <linux/hisi_acc_qm.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/vfio.h>
+#include <linux/vfio_pci_core.h>
+#include <linux/anon_inodes.h>
+
+#include "hisi_acc_vfio_pci.h"
+
+/* return 0 on VM acc device ready, -ETIMEDOUT hardware timeout */
+static int qm_wait_dev_not_ready(struct hisi_qm *qm)
+{
+ u32 val;
+
+ return readl_relaxed_poll_timeout(qm->io_base + QM_VF_STATE,
+ val, !(val & 0x1), MB_POLL_PERIOD_US,
+ MB_POLL_TIMEOUT_US);
+}
+
+/*
+ * Each state Reg is checked 100 times,
+ * with a delay of 100 microseconds after each check
+ */
+static u32 qm_check_reg_state(struct hisi_qm *qm, u32 regs)
+{
+ int check_times = 0;
+ u32 state;
+
+ state = readl(qm->io_base + regs);
+ while (state && check_times < ERROR_CHECK_TIMEOUT) {
+ udelay(CHECK_DELAY_TIME);
+ state = readl(qm->io_base + regs);
+ check_times++;
+ }
+
+ return state;
+}
+
+static int qm_read_regs(struct hisi_qm *qm, u32 reg_addr,
+ u32 *data, u8 nums)
+{
+ int i;
+
+ if (nums < 1 || nums > QM_REGS_MAX_LEN)
+ return -EINVAL;
+
+ for (i = 0; i < nums; i++) {
+ data[i] = readl(qm->io_base + reg_addr);
+ reg_addr += QM_REG_ADDR_OFFSET;
+ }
+
+ return 0;
+}
+
+static int qm_write_regs(struct hisi_qm *qm, u32 reg,
+ u32 *data, u8 nums)
+{
+ int i;
+
+ if (nums < 1 || nums > QM_REGS_MAX_LEN)
+ return -EINVAL;
+
+ for (i = 0; i < nums; i++)
+ writel(data[i], qm->io_base + reg + i * QM_REG_ADDR_OFFSET);
+
+ return 0;
+}
+
+static int qm_get_vft(struct hisi_qm *qm, u32 *base)
+{
+ u64 sqc_vft;
+ u32 qp_num;
+ int ret;
+
+ ret = hisi_qm_mb(qm, QM_MB_CMD_SQC_VFT_V2, 0, 0, 1);
+ if (ret)
+ return ret;
+
+ sqc_vft = readl(qm->io_base + QM_MB_CMD_DATA_ADDR_L) |
+ ((u64)readl(qm->io_base + QM_MB_CMD_DATA_ADDR_H) <<
+ QM_XQC_ADDR_OFFSET);
+ *base = QM_SQC_VFT_BASE_MASK_V2 & (sqc_vft >> QM_SQC_VFT_BASE_SHIFT_V2);
+ qp_num = (QM_SQC_VFT_NUM_MASK_V2 &
+ (sqc_vft >> QM_SQC_VFT_NUM_SHIFT_V2)) + 1;
+
+ return qp_num;
+}
+
+static int qm_get_sqc(struct hisi_qm *qm, u64 *addr)
+{
+ int ret;
+
+ ret = hisi_qm_mb(qm, QM_MB_CMD_SQC_BT, 0, 0, 1);
+ if (ret)
+ return ret;
+
+ *addr = readl(qm->io_base + QM_MB_CMD_DATA_ADDR_L) |
+ ((u64)readl(qm->io_base + QM_MB_CMD_DATA_ADDR_H) <<
+ QM_XQC_ADDR_OFFSET);
+
+ return 0;
+}
+
+static int qm_get_cqc(struct hisi_qm *qm, u64 *addr)
+{
+ int ret;
+
+ ret = hisi_qm_mb(qm, QM_MB_CMD_CQC_BT, 0, 0, 1);
+ if (ret)
+ return ret;
+
+ *addr = readl(qm->io_base + QM_MB_CMD_DATA_ADDR_L) |
+ ((u64)readl(qm->io_base + QM_MB_CMD_DATA_ADDR_H) <<
+ QM_XQC_ADDR_OFFSET);
+
+ return 0;
+}
+
+static int qm_get_regs(struct hisi_qm *qm, struct acc_vf_data *vf_data)
+{
+ struct device *dev = &qm->pdev->dev;
+ int ret;
+
+ ret = qm_read_regs(qm, QM_VF_AEQ_INT_MASK, &vf_data->aeq_int_mask, 1);
+ if (ret) {
+ dev_err(dev, "failed to read QM_VF_AEQ_INT_MASK\n");
+ return ret;
+ }
+
+ ret = qm_read_regs(qm, QM_VF_EQ_INT_MASK, &vf_data->eq_int_mask, 1);
+ if (ret) {
+ dev_err(dev, "failed to read QM_VF_EQ_INT_MASK\n");
+ return ret;
+ }
+
+ ret = qm_read_regs(qm, QM_IFC_INT_SOURCE_V,
+ &vf_data->ifc_int_source, 1);
+ if (ret) {
+ dev_err(dev, "failed to read QM_IFC_INT_SOURCE_V\n");
+ return ret;
+ }
+
+ ret = qm_read_regs(qm, QM_IFC_INT_MASK, &vf_data->ifc_int_mask, 1);
+ if (ret) {
+ dev_err(dev, "failed to read QM_IFC_INT_MASK\n");
+ return ret;
+ }
+
+ ret = qm_read_regs(qm, QM_IFC_INT_SET_V, &vf_data->ifc_int_set, 1);
+ if (ret) {
+ dev_err(dev, "failed to read QM_IFC_INT_SET_V\n");
+ return ret;
+ }
+
+ ret = qm_read_regs(qm, QM_PAGE_SIZE, &vf_data->page_size, 1);
+ if (ret) {
+ dev_err(dev, "failed to read QM_PAGE_SIZE\n");
+ return ret;
+ }
+
+ /* QM_EQC_DW has 7 regs */
+ ret = qm_read_regs(qm, QM_EQC_DW0, vf_data->qm_eqc_dw, 7);
+ if (ret) {
+ dev_err(dev, "failed to read QM_EQC_DW\n");
+ return ret;
+ }
+
+ /* QM_AEQC_DW has 7 regs */
+ ret = qm_read_regs(qm, QM_AEQC_DW0, vf_data->qm_aeqc_dw, 7);
+ if (ret) {
+ dev_err(dev, "failed to read QM_AEQC_DW\n");
+ return ret;
+ }
+
+ return 0;
+}
+
+static int qm_set_regs(struct hisi_qm *qm, struct acc_vf_data *vf_data)
+{
+ struct device *dev = &qm->pdev->dev;
+ int ret;
+
+ /* check VF state */
+ if (unlikely(hisi_qm_wait_mb_ready(qm))) {
+ dev_err(&qm->pdev->dev, "QM device is not ready to write\n");
+ return -EBUSY;
+ }
+
+ ret = qm_write_regs(qm, QM_VF_AEQ_INT_MASK, &vf_data->aeq_int_mask, 1);
+ if (ret) {
+ dev_err(dev, "failed to write QM_VF_AEQ_INT_MASK\n");
+ return ret;
+ }
+
+ ret = qm_write_regs(qm, QM_VF_EQ_INT_MASK, &vf_data->eq_int_mask, 1);
+ if (ret) {
+ dev_err(dev, "failed to write QM_VF_EQ_INT_MASK\n");
+ return ret;
+ }
+
+ ret = qm_write_regs(qm, QM_IFC_INT_SOURCE_V,
+ &vf_data->ifc_int_source, 1);
+ if (ret) {
+ dev_err(dev, "failed to write QM_IFC_INT_SOURCE_V\n");
+ return ret;
+ }
+
+ ret = qm_write_regs(qm, QM_IFC_INT_MASK, &vf_data->ifc_int_mask, 1);
+ if (ret) {
+ dev_err(dev, "failed to write QM_IFC_INT_MASK\n");
+ return ret;
+ }
+
+ ret = qm_write_regs(qm, QM_IFC_INT_SET_V, &vf_data->ifc_int_set, 1);
+ if (ret) {
+ dev_err(dev, "failed to write QM_IFC_INT_SET_V\n");
+ return ret;
+ }
+
+ ret = qm_write_regs(qm, QM_QUE_ISO_CFG_V, &vf_data->que_iso_cfg, 1);
+ if (ret) {
+ dev_err(dev, "failed to write QM_QUE_ISO_CFG_V\n");
+ return ret;
+ }
+
+ ret = qm_write_regs(qm, QM_PAGE_SIZE, &vf_data->page_size, 1);
+ if (ret) {
+ dev_err(dev, "failed to write QM_PAGE_SIZE\n");
+ return ret;
+ }
+
+ /* QM_EQC_DW has 7 regs */
+ ret = qm_write_regs(qm, QM_EQC_DW0, vf_data->qm_eqc_dw, 7);
+ if (ret) {
+ dev_err(dev, "failed to write QM_EQC_DW\n");
+ return ret;
+ }
+
+ /* QM_AEQC_DW has 7 regs */
+ ret = qm_write_regs(qm, QM_AEQC_DW0, vf_data->qm_aeqc_dw, 7);
+ if (ret) {
+ dev_err(dev, "failed to write QM_AEQC_DW\n");
+ return ret;
+ }
+
+ return 0;
+}
+
+static void qm_db(struct hisi_qm *qm, u16 qn, u8 cmd,
+ u16 index, u8 priority)
+{
+ u64 doorbell;
+ u64 dbase;
+ u16 randata = 0;
+
+ if (cmd == QM_DOORBELL_CMD_SQ || cmd == QM_DOORBELL_CMD_CQ)
+ dbase = QM_DOORBELL_SQ_CQ_BASE_V2;
+ else
+ dbase = QM_DOORBELL_EQ_AEQ_BASE_V2;
+
+ doorbell = qn | ((u64)cmd << QM_DB_CMD_SHIFT_V2) |
+ ((u64)randata << QM_DB_RAND_SHIFT_V2) |
+ ((u64)index << QM_DB_INDEX_SHIFT_V2) |
+ ((u64)priority << QM_DB_PRIORITY_SHIFT_V2);
+
+ writeq(doorbell, qm->io_base + dbase);
+}
+
+static int pf_qm_get_qp_num(struct hisi_qm *qm, int vf_id, u32 *rbase)
+{
+ unsigned int val;
+ u64 sqc_vft;
+ u32 qp_num;
+ int ret;
+
+ ret = readl_relaxed_poll_timeout(qm->io_base + QM_VFT_CFG_RDY, val,
+ val & BIT(0), MB_POLL_PERIOD_US,
+ MB_POLL_TIMEOUT_US);
+ if (ret)
+ return ret;
+
+ writel(0x1, qm->io_base + QM_VFT_CFG_OP_WR);
+ /* 0 mean SQC VFT */
+ writel(0x0, qm->io_base + QM_VFT_CFG_TYPE);
+ writel(vf_id, qm->io_base + QM_VFT_CFG);
+
+ writel(0x0, qm->io_base + QM_VFT_CFG_RDY);
+ writel(0x1, qm->io_base + QM_VFT_CFG_OP_ENABLE);
+
+ ret = readl_relaxed_poll_timeout(qm->io_base + QM_VFT_CFG_RDY, val,
+ val & BIT(0), MB_POLL_PERIOD_US,
+ MB_POLL_TIMEOUT_US);
+ if (ret)
+ return ret;
+
+ sqc_vft = readl(qm->io_base + QM_VFT_CFG_DATA_L) |
+ ((u64)readl(qm->io_base + QM_VFT_CFG_DATA_H) <<
+ QM_XQC_ADDR_OFFSET);
+ *rbase = QM_SQC_VFT_BASE_MASK_V2 &
+ (sqc_vft >> QM_SQC_VFT_BASE_SHIFT_V2);
+ qp_num = (QM_SQC_VFT_NUM_MASK_V2 &
+ (sqc_vft >> QM_SQC_VFT_NUM_SHIFT_V2)) + 1;
+
+ return qp_num;
+}
+
+static void qm_dev_cmd_init(struct hisi_qm *qm)
+{
+ /* Clear VF communication status registers. */
+ writel(0x1, qm->io_base + QM_IFC_INT_SOURCE_V);
+
+ /* Enable pf and vf communication. */
+ writel(0x0, qm->io_base + QM_IFC_INT_MASK);
+}
+
+static int vf_qm_cache_wb(struct hisi_qm *qm)
+{
+ unsigned int val;
+
+ writel(0x1, qm->io_base + QM_CACHE_WB_START);
+ if (readl_relaxed_poll_timeout(qm->io_base + QM_CACHE_WB_DONE,
+ val, val & BIT(0), MB_POLL_PERIOD_US,
+ MB_POLL_TIMEOUT_US)) {
+ dev_err(&qm->pdev->dev, "vf QM writeback sqc cache fail\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void vf_qm_fun_reset(struct hisi_acc_vf_core_device *hisi_acc_vdev,
+ struct hisi_qm *qm)
+{
+ int i;
+
+ for (i = 0; i < qm->qp_num; i++)
+ qm_db(qm, i, QM_DOORBELL_CMD_SQ, 0, 1);
+}
+
+static int vf_qm_func_stop(struct hisi_qm *qm)
+{
+ return hisi_qm_mb(qm, QM_MB_CMD_PAUSE_QM, 0, 0, 0);
+}
+
+static int vf_qm_check_match(struct hisi_acc_vf_core_device *hisi_acc_vdev,
+ struct hisi_acc_vf_migration_file *migf)
+{
+ struct acc_vf_data *vf_data = &migf->vf_data;
+ struct hisi_qm *vf_qm = &hisi_acc_vdev->vf_qm;
+ struct hisi_qm *pf_qm = hisi_acc_vdev->pf_qm;
+ struct device *dev = &vf_qm->pdev->dev;
+ u32 que_iso_state;
+ int ret;
+
+ if (migf->total_length < QM_MATCH_SIZE)
+ return -EINVAL;
+
+ if (vf_data->acc_magic != ACC_DEV_MAGIC) {
+ dev_err(dev, "failed to match ACC_DEV_MAGIC\n");
+ return -EINVAL;
+ }
+
+ if (vf_data->dev_id != hisi_acc_vdev->vf_dev->device) {
+ dev_err(dev, "failed to match VF devices\n");
+ return -EINVAL;
+ }
+
+ /* vf qp num check */
+ ret = qm_get_vft(vf_qm, &vf_qm->qp_base);
+ if (ret <= 0) {
+ dev_err(dev, "failed to get vft qp nums\n");
+ return -EINVAL;
+ }
+
+ if (ret != vf_data->qp_num) {
+ dev_err(dev, "failed to match VF qp num\n");
+ return -EINVAL;
+ }
+
+ vf_qm->qp_num = ret;
+
+ /* vf isolation state check */
+ ret = qm_read_regs(pf_qm, QM_QUE_ISO_CFG_V, &que_iso_state, 1);
+ if (ret) {
+ dev_err(dev, "failed to read QM_QUE_ISO_CFG_V\n");
+ return ret;
+ }
+
+ if (vf_data->que_iso_cfg != que_iso_state) {
+ dev_err(dev, "failed to match isolation state\n");
+ return ret;
+ }
+
+ ret = qm_write_regs(vf_qm, QM_VF_STATE, &vf_data->vf_qm_state, 1);
+ if (ret) {
+ dev_err(dev, "failed to write QM_VF_STATE\n");
+ return ret;
+ }
+
+ hisi_acc_vdev->vf_qm_state = vf_data->vf_qm_state;
+ return 0;
+}
+
+static int vf_qm_get_match_data(struct hisi_acc_vf_core_device *hisi_acc_vdev,
+ struct acc_vf_data *vf_data)
+{
+ struct hisi_qm *pf_qm = hisi_acc_vdev->pf_qm;
+ struct device *dev = &pf_qm->pdev->dev;
+ int vf_id = hisi_acc_vdev->vf_id;
+ int ret;
+
+ vf_data->acc_magic = ACC_DEV_MAGIC;
+ /* save device id */
+ vf_data->dev_id = hisi_acc_vdev->vf_dev->device;
+
+ /* vf qp num save from PF */
+ ret = pf_qm_get_qp_num(pf_qm, vf_id, &vf_data->qp_base);
+ if (ret <= 0) {
+ dev_err(dev, "failed to get vft qp nums!\n");
+ return -EINVAL;
+ }
+
+ vf_data->qp_num = ret;
+
+ /* VF isolation state save from PF */
+ ret = qm_read_regs(pf_qm, QM_QUE_ISO_CFG_V, &vf_data->que_iso_cfg, 1);
+ if (ret) {
+ dev_err(dev, "failed to read QM_QUE_ISO_CFG_V!\n");
+ return ret;
+ }
+
+ return 0;
+}
+
+static int vf_qm_load_data(struct hisi_acc_vf_core_device *hisi_acc_vdev,
+ struct hisi_acc_vf_migration_file *migf)
+{
+ struct hisi_qm *qm = &hisi_acc_vdev->vf_qm;
+ struct device *dev = &qm->pdev->dev;
+ struct acc_vf_data *vf_data = &migf->vf_data;
+ int ret;
+
+ /* Return if only match data was transferred */
+ if (migf->total_length == QM_MATCH_SIZE)
+ return 0;
+
+ if (migf->total_length < sizeof(struct acc_vf_data))
+ return -EINVAL;
+
+ qm->eqe_dma = vf_data->eqe_dma;
+ qm->aeqe_dma = vf_data->aeqe_dma;
+ qm->sqc_dma = vf_data->sqc_dma;
+ qm->cqc_dma = vf_data->cqc_dma;
+
+ qm->qp_base = vf_data->qp_base;
+ qm->qp_num = vf_data->qp_num;
+
+ ret = qm_set_regs(qm, vf_data);
+ if (ret) {
+ dev_err(dev, "Set VF regs failed\n");
+ return ret;
+ }
+
+ ret = hisi_qm_mb(qm, QM_MB_CMD_SQC_BT, qm->sqc_dma, 0, 0);
+ if (ret) {
+ dev_err(dev, "Set sqc failed\n");
+ return ret;
+ }
+
+ ret = hisi_qm_mb(qm, QM_MB_CMD_CQC_BT, qm->cqc_dma, 0, 0);
+ if (ret) {
+ dev_err(dev, "Set cqc failed\n");
+ return ret;
+ }
+
+ qm_dev_cmd_init(qm);
+ return 0;
+}
+
+static int vf_qm_state_save(struct hisi_acc_vf_core_device *hisi_acc_vdev,
+ struct hisi_acc_vf_migration_file *migf)
+{
+ struct acc_vf_data *vf_data = &migf->vf_data;
+ struct hisi_qm *vf_qm = &hisi_acc_vdev->vf_qm;
+ struct device *dev = &vf_qm->pdev->dev;
+ int ret;
+
+ ret = vf_qm_get_match_data(hisi_acc_vdev, vf_data);
+ if (ret)
+ return ret;
+
+ if (unlikely(qm_wait_dev_not_ready(vf_qm))) {
+ /* Update state and return with match data */
+ vf_data->vf_qm_state = QM_NOT_READY;
+ hisi_acc_vdev->vf_qm_state = vf_data->vf_qm_state;
+ migf->total_length = QM_MATCH_SIZE;
+ return 0;
+ }
+
+ vf_data->vf_qm_state = QM_READY;
+ hisi_acc_vdev->vf_qm_state = vf_data->vf_qm_state;
+
+ ret = vf_qm_cache_wb(vf_qm);
+ if (ret) {
+ dev_err(dev, "failed to writeback QM Cache!\n");
+ return ret;
+ }
+
+ ret = qm_get_regs(vf_qm, vf_data);
+ if (ret)
+ return -EINVAL;
+
+ /* Every reg is 32 bit, the dma address is 64 bit. */
+ vf_data->eqe_dma = vf_data->qm_eqc_dw[2];
+ vf_data->eqe_dma <<= QM_XQC_ADDR_OFFSET;
+ vf_data->eqe_dma |= vf_data->qm_eqc_dw[1];
+ vf_data->aeqe_dma = vf_data->qm_aeqc_dw[2];
+ vf_data->aeqe_dma <<= QM_XQC_ADDR_OFFSET;
+ vf_data->aeqe_dma |= vf_data->qm_aeqc_dw[1];
+
+ /* Through SQC_BT/CQC_BT to get sqc and cqc address */
+ ret = qm_get_sqc(vf_qm, &vf_data->sqc_dma);
+ if (ret) {
+ dev_err(dev, "failed to read SQC addr!\n");
+ return -EINVAL;
+ }
+
+ ret = qm_get_cqc(vf_qm, &vf_data->cqc_dma);
+ if (ret) {
+ dev_err(dev, "failed to read CQC addr!\n");
+ return -EINVAL;
+ }
+
+ migf->total_length = sizeof(struct acc_vf_data);
+ return 0;
+}
+
+/* Check the PF's RAS state and Function INT state */
+static int
+hisi_acc_check_int_state(struct hisi_acc_vf_core_device *hisi_acc_vdev)
+{
+ struct hisi_qm *vfqm = &hisi_acc_vdev->vf_qm;
+ struct hisi_qm *qm = hisi_acc_vdev->pf_qm;
+ struct pci_dev *vf_pdev = hisi_acc_vdev->vf_dev;
+ struct device *dev = &qm->pdev->dev;
+ u32 state;
+
+ /* Check RAS state */
+ state = qm_check_reg_state(qm, QM_ABNORMAL_INT_STATUS);
+ if (state) {
+ dev_err(dev, "failed to check QM RAS state!\n");
+ return -EBUSY;
+ }
+
+ /* Check Function Communication state between PF and VF */
+ state = qm_check_reg_state(vfqm, QM_IFC_INT_STATUS);
+ if (state) {
+ dev_err(dev, "failed to check QM IFC INT state!\n");
+ return -EBUSY;
+ }
+ state = qm_check_reg_state(vfqm, QM_IFC_INT_SET_V);
+ if (state) {
+ dev_err(dev, "failed to check QM IFC INT SET state!\n");
+ return -EBUSY;
+ }
+
+ /* Check submodule task state */
+ switch (vf_pdev->device) {
+ case PCI_DEVICE_ID_HUAWEI_SEC_VF:
+ state = qm_check_reg_state(qm, SEC_CORE_INT_STATUS);
+ if (state) {
+ dev_err(dev, "failed to check QM SEC Core INT state!\n");
+ return -EBUSY;
+ }
+ return 0;
+ case PCI_DEVICE_ID_HUAWEI_HPRE_VF:
+ state = qm_check_reg_state(qm, HPRE_HAC_INT_STATUS);
+ if (state) {
+ dev_err(dev, "failed to check QM HPRE HAC INT state!\n");
+ return -EBUSY;
+ }
+ return 0;
+ case PCI_DEVICE_ID_HUAWEI_ZIP_VF:
+ state = qm_check_reg_state(qm, HZIP_CORE_INT_STATUS);
+ if (state) {
+ dev_err(dev, "failed to check QM ZIP Core INT state!\n");
+ return -EBUSY;
+ }
+ return 0;
+ default:
+ dev_err(dev, "failed to detect acc module type!\n");
+ return -EINVAL;
+ }
+}
+
+static void hisi_acc_vf_disable_fd(struct hisi_acc_vf_migration_file *migf)
+{
+ mutex_lock(&migf->lock);
+ migf->disabled = true;
+ migf->total_length = 0;
+ migf->filp->f_pos = 0;
+ mutex_unlock(&migf->lock);
+}
+
+static void hisi_acc_vf_disable_fds(struct hisi_acc_vf_core_device *hisi_acc_vdev)
+{
+ if (hisi_acc_vdev->resuming_migf) {
+ hisi_acc_vf_disable_fd(hisi_acc_vdev->resuming_migf);
+ fput(hisi_acc_vdev->resuming_migf->filp);
+ hisi_acc_vdev->resuming_migf = NULL;
+ }
+
+ if (hisi_acc_vdev->saving_migf) {
+ hisi_acc_vf_disable_fd(hisi_acc_vdev->saving_migf);
+ fput(hisi_acc_vdev->saving_migf->filp);
+ hisi_acc_vdev->saving_migf = NULL;
+ }
+}
+
+/*
+ * This function is called in all state_mutex unlock cases to
+ * handle a 'deferred_reset' if exists.
+ */
+static void
+hisi_acc_vf_state_mutex_unlock(struct hisi_acc_vf_core_device *hisi_acc_vdev)
+{
+again:
+ spin_lock(&hisi_acc_vdev->reset_lock);
+ if (hisi_acc_vdev->deferred_reset) {
+ hisi_acc_vdev->deferred_reset = false;
+ spin_unlock(&hisi_acc_vdev->reset_lock);
+ hisi_acc_vdev->vf_qm_state = QM_NOT_READY;
+ hisi_acc_vdev->mig_state = VFIO_DEVICE_STATE_RUNNING;
+ hisi_acc_vf_disable_fds(hisi_acc_vdev);
+ goto again;
+ }
+ mutex_unlock(&hisi_acc_vdev->state_mutex);
+ spin_unlock(&hisi_acc_vdev->reset_lock);
+}
+
+static void hisi_acc_vf_start_device(struct hisi_acc_vf_core_device *hisi_acc_vdev)
+{
+ struct hisi_qm *vf_qm = &hisi_acc_vdev->vf_qm;
+
+ if (hisi_acc_vdev->vf_qm_state != QM_READY)
+ return;
+
+ vf_qm_fun_reset(hisi_acc_vdev, vf_qm);
+}
+
+static int hisi_acc_vf_load_state(struct hisi_acc_vf_core_device *hisi_acc_vdev)
+{
+ struct device *dev = &hisi_acc_vdev->vf_dev->dev;
+ struct hisi_acc_vf_migration_file *migf = hisi_acc_vdev->resuming_migf;
+ int ret;
+
+ /* Check dev compatibility */
+ ret = vf_qm_check_match(hisi_acc_vdev, migf);
+ if (ret) {
+ dev_err(dev, "failed to match the VF!\n");
+ return ret;
+ }
+ /* Recover data to VF */
+ ret = vf_qm_load_data(hisi_acc_vdev, migf);
+ if (ret) {
+ dev_err(dev, "failed to recover the VF!\n");
+ return ret;
+ }
+
+ return 0;
+}
+
+static int hisi_acc_vf_release_file(struct inode *inode, struct file *filp)
+{
+ struct hisi_acc_vf_migration_file *migf = filp->private_data;
+
+ hisi_acc_vf_disable_fd(migf);
+ mutex_destroy(&migf->lock);
+ kfree(migf);
+ return 0;
+}
+
+static ssize_t hisi_acc_vf_resume_write(struct file *filp, const char __user *buf,
+ size_t len, loff_t *pos)
+{
+ struct hisi_acc_vf_migration_file *migf = filp->private_data;
+ loff_t requested_length;
+ ssize_t done = 0;
+ int ret;
+
+ if (pos)
+ return -ESPIPE;
+ pos = &filp->f_pos;
+
+ if (*pos < 0 ||
+ check_add_overflow((loff_t)len, *pos, &requested_length))
+ return -EINVAL;
+
+ if (requested_length > sizeof(struct acc_vf_data))
+ return -ENOMEM;
+
+ mutex_lock(&migf->lock);
+ if (migf->disabled) {
+ done = -ENODEV;
+ goto out_unlock;
+ }
+
+ ret = copy_from_user(&migf->vf_data, buf, len);
+ if (ret) {
+ done = -EFAULT;
+ goto out_unlock;
+ }
+ *pos += len;
+ done = len;
+ migf->total_length += len;
+out_unlock:
+ mutex_unlock(&migf->lock);
+ return done;
+}
+
+static const struct file_operations hisi_acc_vf_resume_fops = {
+ .owner = THIS_MODULE,
+ .write = hisi_acc_vf_resume_write,
+ .release = hisi_acc_vf_release_file,
+ .llseek = no_llseek,
+};
+
+static struct hisi_acc_vf_migration_file *
+hisi_acc_vf_pci_resume(struct hisi_acc_vf_core_device *hisi_acc_vdev)
+{
+ struct hisi_acc_vf_migration_file *migf;
+
+ migf = kzalloc(sizeof(*migf), GFP_KERNEL);
+ if (!migf)
+ return ERR_PTR(-ENOMEM);
+
+ migf->filp = anon_inode_getfile("hisi_acc_vf_mig", &hisi_acc_vf_resume_fops, migf,
+ O_WRONLY);
+ if (IS_ERR(migf->filp)) {
+ int err = PTR_ERR(migf->filp);
+
+ kfree(migf);
+ return ERR_PTR(err);
+ }
+
+ stream_open(migf->filp->f_inode, migf->filp);
+ mutex_init(&migf->lock);
+ return migf;
+}
+
+static ssize_t hisi_acc_vf_save_read(struct file *filp, char __user *buf, size_t len,
+ loff_t *pos)
+{
+ struct hisi_acc_vf_migration_file *migf = filp->private_data;
+ ssize_t done = 0;
+ int ret;
+
+ if (pos)
+ return -ESPIPE;
+ pos = &filp->f_pos;
+
+ mutex_lock(&migf->lock);
+ if (*pos > migf->total_length) {
+ done = -EINVAL;
+ goto out_unlock;
+ }
+
+ if (migf->disabled) {
+ done = -ENODEV;
+ goto out_unlock;
+ }
+
+ len = min_t(size_t, migf->total_length - *pos, len);
+ if (len) {
+ ret = copy_to_user(buf, &migf->vf_data, len);
+ if (ret) {
+ done = -EFAULT;
+ goto out_unlock;
+ }
+ *pos += len;
+ done = len;
+ }
+out_unlock:
+ mutex_unlock(&migf->lock);
+ return done;
+}
+
+static const struct file_operations hisi_acc_vf_save_fops = {
+ .owner = THIS_MODULE,
+ .read = hisi_acc_vf_save_read,
+ .release = hisi_acc_vf_release_file,
+ .llseek = no_llseek,
+};
+
+static struct hisi_acc_vf_migration_file *
+hisi_acc_vf_stop_copy(struct hisi_acc_vf_core_device *hisi_acc_vdev)
+{
+ struct hisi_acc_vf_migration_file *migf;
+ int ret;
+
+ migf = kzalloc(sizeof(*migf), GFP_KERNEL);
+ if (!migf)
+ return ERR_PTR(-ENOMEM);
+
+ migf->filp = anon_inode_getfile("hisi_acc_vf_mig", &hisi_acc_vf_save_fops, migf,
+ O_RDONLY);
+ if (IS_ERR(migf->filp)) {
+ int err = PTR_ERR(migf->filp);
+
+ kfree(migf);
+ return ERR_PTR(err);
+ }
+
+ stream_open(migf->filp->f_inode, migf->filp);
+ mutex_init(&migf->lock);
+
+ ret = vf_qm_state_save(hisi_acc_vdev, migf);
+ if (ret) {
+ fput(migf->filp);
+ return ERR_PTR(ret);
+ }
+
+ return migf;
+}
+
+static int hisi_acc_vf_stop_device(struct hisi_acc_vf_core_device *hisi_acc_vdev)
+{
+ struct device *dev = &hisi_acc_vdev->vf_dev->dev;
+ struct hisi_qm *vf_qm = &hisi_acc_vdev->vf_qm;
+ int ret;
+
+ ret = vf_qm_func_stop(vf_qm);
+ if (ret) {
+ dev_err(dev, "failed to stop QM VF function!\n");
+ return ret;
+ }
+
+ ret = hisi_acc_check_int_state(hisi_acc_vdev);
+ if (ret) {
+ dev_err(dev, "failed to check QM INT state!\n");
+ return ret;
+ }
+ return 0;
+}
+
+static struct file *
+hisi_acc_vf_set_device_state(struct hisi_acc_vf_core_device *hisi_acc_vdev,
+ u32 new)
+{
+ u32 cur = hisi_acc_vdev->mig_state;
+ int ret;
+
+ if (cur == VFIO_DEVICE_STATE_RUNNING && new == VFIO_DEVICE_STATE_STOP) {
+ ret = hisi_acc_vf_stop_device(hisi_acc_vdev);
+ if (ret)
+ return ERR_PTR(ret);
+ return NULL;
+ }
+
+ if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_STOP_COPY) {
+ struct hisi_acc_vf_migration_file *migf;
+
+ migf = hisi_acc_vf_stop_copy(hisi_acc_vdev);
+ if (IS_ERR(migf))
+ return ERR_CAST(migf);
+ get_file(migf->filp);
+ hisi_acc_vdev->saving_migf = migf;
+ return migf->filp;
+ }
+
+ if ((cur == VFIO_DEVICE_STATE_STOP_COPY && new == VFIO_DEVICE_STATE_STOP)) {
+ hisi_acc_vf_disable_fds(hisi_acc_vdev);
+ return NULL;
+ }
+
+ if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_RESUMING) {
+ struct hisi_acc_vf_migration_file *migf;
+
+ migf = hisi_acc_vf_pci_resume(hisi_acc_vdev);
+ if (IS_ERR(migf))
+ return ERR_CAST(migf);
+ get_file(migf->filp);
+ hisi_acc_vdev->resuming_migf = migf;
+ return migf->filp;
+ }
+
+ if (cur == VFIO_DEVICE_STATE_RESUMING && new == VFIO_DEVICE_STATE_STOP) {
+ ret = hisi_acc_vf_load_state(hisi_acc_vdev);
+ if (ret)
+ return ERR_PTR(ret);
+ hisi_acc_vf_disable_fds(hisi_acc_vdev);
+ return NULL;
+ }
+
+ if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_RUNNING) {
+ hisi_acc_vf_start_device(hisi_acc_vdev);
+ return NULL;
+ }
+
+ /*
+ * vfio_mig_get_next_state() does not use arcs other than the above
+ */
+ WARN_ON(true);
+ return ERR_PTR(-EINVAL);
+}
+
+static struct file *
+hisi_acc_vfio_pci_set_device_state(struct vfio_device *vdev,
+ enum vfio_device_mig_state new_state)
+{
+ struct hisi_acc_vf_core_device *hisi_acc_vdev = container_of(vdev,
+ struct hisi_acc_vf_core_device, core_device.vdev);
+ enum vfio_device_mig_state next_state;
+ struct file *res = NULL;
+ int ret;
+
+ mutex_lock(&hisi_acc_vdev->state_mutex);
+ while (new_state != hisi_acc_vdev->mig_state) {
+ ret = vfio_mig_get_next_state(vdev,
+ hisi_acc_vdev->mig_state,
+ new_state, &next_state);
+ if (ret) {
+ res = ERR_PTR(-EINVAL);
+ break;
+ }
+
+ res = hisi_acc_vf_set_device_state(hisi_acc_vdev, next_state);
+ if (IS_ERR(res))
+ break;
+ hisi_acc_vdev->mig_state = next_state;
+ if (WARN_ON(res && new_state != hisi_acc_vdev->mig_state)) {
+ fput(res);
+ res = ERR_PTR(-EINVAL);
+ break;
+ }
+ }
+ hisi_acc_vf_state_mutex_unlock(hisi_acc_vdev);
+ return res;
+}
+
+static int
+hisi_acc_vfio_pci_get_device_state(struct vfio_device *vdev,
+ enum vfio_device_mig_state *curr_state)
+{
+ struct hisi_acc_vf_core_device *hisi_acc_vdev = container_of(vdev,
+ struct hisi_acc_vf_core_device, core_device.vdev);
+
+ mutex_lock(&hisi_acc_vdev->state_mutex);
+ *curr_state = hisi_acc_vdev->mig_state;
+ hisi_acc_vf_state_mutex_unlock(hisi_acc_vdev);
+ return 0;
+}
+
+static void hisi_acc_vf_pci_aer_reset_done(struct pci_dev *pdev)
+{
+ struct hisi_acc_vf_core_device *hisi_acc_vdev = dev_get_drvdata(&pdev->dev);
+
+ if (hisi_acc_vdev->core_device.vdev.migration_flags !=
+ VFIO_MIGRATION_STOP_COPY)
+ return;
+
+ /*
+ * As the higher VFIO layers are holding locks across reset and using
+ * those same locks with the mm_lock we need to prevent ABBA deadlock
+ * with the state_mutex and mm_lock.
+ * In case the state_mutex was taken already we defer the cleanup work
+ * to the unlock flow of the other running context.
+ */
+ spin_lock(&hisi_acc_vdev->reset_lock);
+ hisi_acc_vdev->deferred_reset = true;
+ if (!mutex_trylock(&hisi_acc_vdev->state_mutex)) {
+ spin_unlock(&hisi_acc_vdev->reset_lock);
+ return;
+ }
+ spin_unlock(&hisi_acc_vdev->reset_lock);
+ hisi_acc_vf_state_mutex_unlock(hisi_acc_vdev);
+}
+
+static int hisi_acc_vf_qm_init(struct hisi_acc_vf_core_device *hisi_acc_vdev)
+{
+ struct vfio_pci_core_device *vdev = &hisi_acc_vdev->core_device;
+ struct hisi_qm *vf_qm = &hisi_acc_vdev->vf_qm;
+ struct pci_dev *vf_dev = vdev->pdev;
+
+ /*
+ * ACC VF dev BAR2 region consists of both functional register space
+ * and migration control register space. For migration to work, we
+ * need access to both. Hence, we map the entire BAR2 region here.
+ * But unnecessarily exposing the migration BAR region to the Guest
+ * has the potential to prevent/corrupt the Guest migration. Hence,
+ * we restrict access to the migration control space from
+ * Guest(Please see mmap/ioctl/read/write override functions).
+ *
+ * Please note that it is OK to expose the entire VF BAR if migration
+ * is not supported or required as this cannot affect the ACC PF
+ * configurations.
+ *
+ * Also the HiSilicon ACC VF devices supported by this driver on
+ * HiSilicon hardware platforms are integrated end point devices
+ * and the platform lacks the capability to perform any PCIe P2P
+ * between these devices.
+ */
+
+ vf_qm->io_base =
+ ioremap(pci_resource_start(vf_dev, VFIO_PCI_BAR2_REGION_INDEX),
+ pci_resource_len(vf_dev, VFIO_PCI_BAR2_REGION_INDEX));
+ if (!vf_qm->io_base)
+ return -EIO;
+
+ vf_qm->fun_type = QM_HW_VF;
+ vf_qm->pdev = vf_dev;
+ mutex_init(&vf_qm->mailbox_lock);
+
+ return 0;
+}
+
+static struct hisi_qm *hisi_acc_get_pf_qm(struct pci_dev *pdev)
+{
+ struct hisi_qm *pf_qm;
+ struct pci_driver *pf_driver;
+
+ if (!pdev->is_virtfn)
+ return NULL;
+
+ switch (pdev->device) {
+ case PCI_DEVICE_ID_HUAWEI_SEC_VF:
+ pf_driver = hisi_sec_get_pf_driver();
+ break;
+ case PCI_DEVICE_ID_HUAWEI_HPRE_VF:
+ pf_driver = hisi_hpre_get_pf_driver();
+ break;
+ case PCI_DEVICE_ID_HUAWEI_ZIP_VF:
+ pf_driver = hisi_zip_get_pf_driver();
+ break;
+ default:
+ return NULL;
+ }
+
+ if (!pf_driver)
+ return NULL;
+
+ pf_qm = pci_iov_get_pf_drvdata(pdev, pf_driver);
+
+ return !IS_ERR(pf_qm) ? pf_qm : NULL;
+}
+
+static int hisi_acc_pci_rw_access_check(struct vfio_device *core_vdev,
+ size_t count, loff_t *ppos,
+ size_t *new_count)
+{
+ unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
+ struct vfio_pci_core_device *vdev =
+ container_of(core_vdev, struct vfio_pci_core_device, vdev);
+
+ if (index == VFIO_PCI_BAR2_REGION_INDEX) {
+ loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
+ resource_size_t end = pci_resource_len(vdev->pdev, index) / 2;
+
+ /* Check if access is for migration control region */
+ if (pos >= end)
+ return -EINVAL;
+
+ *new_count = min(count, (size_t)(end - pos));
+ }
+
+ return 0;
+}
+
+static int hisi_acc_vfio_pci_mmap(struct vfio_device *core_vdev,
+ struct vm_area_struct *vma)
+{
+ struct vfio_pci_core_device *vdev =
+ container_of(core_vdev, struct vfio_pci_core_device, vdev);
+ unsigned int index;
+
+ index = vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT);
+ if (index == VFIO_PCI_BAR2_REGION_INDEX) {
+ u64 req_len, pgoff, req_start;
+ resource_size_t end = pci_resource_len(vdev->pdev, index) / 2;
+
+ req_len = vma->vm_end - vma->vm_start;
+ pgoff = vma->vm_pgoff &
+ ((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1);
+ req_start = pgoff << PAGE_SHIFT;
+
+ if (req_start + req_len > end)
+ return -EINVAL;
+ }
+
+ return vfio_pci_core_mmap(core_vdev, vma);
+}
+
+static ssize_t hisi_acc_vfio_pci_write(struct vfio_device *core_vdev,
+ const char __user *buf, size_t count,
+ loff_t *ppos)
+{
+ size_t new_count = count;
+ int ret;
+
+ ret = hisi_acc_pci_rw_access_check(core_vdev, count, ppos, &new_count);
+ if (ret)
+ return ret;
+
+ return vfio_pci_core_write(core_vdev, buf, new_count, ppos);
+}
+
+static ssize_t hisi_acc_vfio_pci_read(struct vfio_device *core_vdev,
+ char __user *buf, size_t count,
+ loff_t *ppos)
+{
+ size_t new_count = count;
+ int ret;
+
+ ret = hisi_acc_pci_rw_access_check(core_vdev, count, ppos, &new_count);
+ if (ret)
+ return ret;
+
+ return vfio_pci_core_read(core_vdev, buf, new_count, ppos);
+}
+
+static long hisi_acc_vfio_pci_ioctl(struct vfio_device *core_vdev, unsigned int cmd,
+ unsigned long arg)
+{
+ if (cmd == VFIO_DEVICE_GET_REGION_INFO) {
+ struct vfio_pci_core_device *vdev =
+ container_of(core_vdev, struct vfio_pci_core_device, vdev);
+ struct pci_dev *pdev = vdev->pdev;
+ struct vfio_region_info info;
+ unsigned long minsz;
+
+ minsz = offsetofend(struct vfio_region_info, offset);
+
+ if (copy_from_user(&info, (void __user *)arg, minsz))
+ return -EFAULT;
+
+ if (info.argsz < minsz)
+ return -EINVAL;
+
+ if (info.index == VFIO_PCI_BAR2_REGION_INDEX) {
+ info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
+
+ /*
+ * ACC VF dev BAR2 region consists of both functional
+ * register space and migration control register space.
+ * Report only the functional region to Guest.
+ */
+ info.size = pci_resource_len(pdev, info.index) / 2;
+
+ info.flags = VFIO_REGION_INFO_FLAG_READ |
+ VFIO_REGION_INFO_FLAG_WRITE |
+ VFIO_REGION_INFO_FLAG_MMAP;
+
+ return copy_to_user((void __user *)arg, &info, minsz) ?
+ -EFAULT : 0;
+ }
+ }
+ return vfio_pci_core_ioctl(core_vdev, cmd, arg);
+}
+
+static int hisi_acc_vfio_pci_open_device(struct vfio_device *core_vdev)
+{
+ struct hisi_acc_vf_core_device *hisi_acc_vdev = container_of(core_vdev,
+ struct hisi_acc_vf_core_device, core_device.vdev);
+ struct vfio_pci_core_device *vdev = &hisi_acc_vdev->core_device;
+ int ret;
+
+ ret = vfio_pci_core_enable(vdev);
+ if (ret)
+ return ret;
+
+ if (core_vdev->ops->migration_set_state) {
+ ret = hisi_acc_vf_qm_init(hisi_acc_vdev);
+ if (ret) {
+ vfio_pci_core_disable(vdev);
+ return ret;
+ }
+ hisi_acc_vdev->mig_state = VFIO_DEVICE_STATE_RUNNING;
+ }
+
+ vfio_pci_core_finish_enable(vdev);
+ return 0;
+}
+
+static void hisi_acc_vfio_pci_close_device(struct vfio_device *core_vdev)
+{
+ struct hisi_acc_vf_core_device *hisi_acc_vdev = container_of(core_vdev,
+ struct hisi_acc_vf_core_device, core_device.vdev);
+ struct hisi_qm *vf_qm = &hisi_acc_vdev->vf_qm;
+
+ iounmap(vf_qm->io_base);
+ vfio_pci_core_close_device(core_vdev);
+}
+
+static const struct vfio_device_ops hisi_acc_vfio_pci_migrn_ops = {
+ .name = "hisi-acc-vfio-pci-migration",
+ .open_device = hisi_acc_vfio_pci_open_device,
+ .close_device = hisi_acc_vfio_pci_close_device,
+ .ioctl = hisi_acc_vfio_pci_ioctl,
+ .device_feature = vfio_pci_core_ioctl_feature,
+ .read = hisi_acc_vfio_pci_read,
+ .write = hisi_acc_vfio_pci_write,
+ .mmap = hisi_acc_vfio_pci_mmap,
+ .request = vfio_pci_core_request,
+ .match = vfio_pci_core_match,
+ .migration_set_state = hisi_acc_vfio_pci_set_device_state,
+ .migration_get_state = hisi_acc_vfio_pci_get_device_state,
+};
+
+static const struct vfio_device_ops hisi_acc_vfio_pci_ops = {
+ .name = "hisi-acc-vfio-pci",
+ .open_device = hisi_acc_vfio_pci_open_device,
+ .close_device = vfio_pci_core_close_device,
+ .ioctl = vfio_pci_core_ioctl,
+ .device_feature = vfio_pci_core_ioctl_feature,
+ .read = vfio_pci_core_read,
+ .write = vfio_pci_core_write,
+ .mmap = vfio_pci_core_mmap,
+ .request = vfio_pci_core_request,
+ .match = vfio_pci_core_match,
+};
+
+static int
+hisi_acc_vfio_pci_migrn_init(struct hisi_acc_vf_core_device *hisi_acc_vdev,
+ struct pci_dev *pdev, struct hisi_qm *pf_qm)
+{
+ int vf_id;
+
+ vf_id = pci_iov_vf_id(pdev);
+ if (vf_id < 0)
+ return vf_id;
+
+ hisi_acc_vdev->vf_id = vf_id + 1;
+ hisi_acc_vdev->core_device.vdev.migration_flags =
+ VFIO_MIGRATION_STOP_COPY;
+ hisi_acc_vdev->pf_qm = pf_qm;
+ hisi_acc_vdev->vf_dev = pdev;
+ mutex_init(&hisi_acc_vdev->state_mutex);
+
+ return 0;
+}
+
+static int hisi_acc_vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+ struct hisi_acc_vf_core_device *hisi_acc_vdev;
+ struct hisi_qm *pf_qm;
+ int ret;
+
+ hisi_acc_vdev = kzalloc(sizeof(*hisi_acc_vdev), GFP_KERNEL);
+ if (!hisi_acc_vdev)
+ return -ENOMEM;
+
+ pf_qm = hisi_acc_get_pf_qm(pdev);
+ if (pf_qm && pf_qm->ver >= QM_HW_V3) {
+ ret = hisi_acc_vfio_pci_migrn_init(hisi_acc_vdev, pdev, pf_qm);
+ if (!ret) {
+ vfio_pci_core_init_device(&hisi_acc_vdev->core_device, pdev,
+ &hisi_acc_vfio_pci_migrn_ops);
+ } else {
+ pci_warn(pdev, "migration support failed, continue with generic interface\n");
+ vfio_pci_core_init_device(&hisi_acc_vdev->core_device, pdev,
+ &hisi_acc_vfio_pci_ops);
+ }
+ } else {
+ vfio_pci_core_init_device(&hisi_acc_vdev->core_device, pdev,
+ &hisi_acc_vfio_pci_ops);
+ }
+
+ ret = vfio_pci_core_register_device(&hisi_acc_vdev->core_device);
+ if (ret)
+ goto out_free;
+
+ dev_set_drvdata(&pdev->dev, hisi_acc_vdev);
+ return 0;
+
+out_free:
+ vfio_pci_core_uninit_device(&hisi_acc_vdev->core_device);
+ kfree(hisi_acc_vdev);
+ return ret;
+}
+
+static void hisi_acc_vfio_pci_remove(struct pci_dev *pdev)
+{
+ struct hisi_acc_vf_core_device *hisi_acc_vdev = dev_get_drvdata(&pdev->dev);
+
+ vfio_pci_core_unregister_device(&hisi_acc_vdev->core_device);
+ vfio_pci_core_uninit_device(&hisi_acc_vdev->core_device);
+ kfree(hisi_acc_vdev);
+}
+
+static const struct pci_device_id hisi_acc_vfio_pci_table[] = {
+ { PCI_DRIVER_OVERRIDE_DEVICE_VFIO(PCI_VENDOR_ID_HUAWEI, PCI_DEVICE_ID_HUAWEI_SEC_VF) },
+ { PCI_DRIVER_OVERRIDE_DEVICE_VFIO(PCI_VENDOR_ID_HUAWEI, PCI_DEVICE_ID_HUAWEI_HPRE_VF) },
+ { PCI_DRIVER_OVERRIDE_DEVICE_VFIO(PCI_VENDOR_ID_HUAWEI, PCI_DEVICE_ID_HUAWEI_ZIP_VF) },
+ { }
+};
+
+MODULE_DEVICE_TABLE(pci, hisi_acc_vfio_pci_table);
+
+static const struct pci_error_handlers hisi_acc_vf_err_handlers = {
+ .reset_done = hisi_acc_vf_pci_aer_reset_done,
+ .error_detected = vfio_pci_core_aer_err_detected,
+};
+
+static struct pci_driver hisi_acc_vfio_pci_driver = {
+ .name = KBUILD_MODNAME,
+ .id_table = hisi_acc_vfio_pci_table,
+ .probe = hisi_acc_vfio_pci_probe,
+ .remove = hisi_acc_vfio_pci_remove,
+ .err_handler = &hisi_acc_vf_err_handlers,
+};
+
+module_pci_driver(hisi_acc_vfio_pci_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Liu Longfang <liulongfang@huawei.com>");
+MODULE_AUTHOR("Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>");
+MODULE_DESCRIPTION("HiSilicon VFIO PCI - VFIO PCI driver with live migration support for HiSilicon ACC device family");
diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h
new file mode 100644
index 000000000000..5494f4983bbe
--- /dev/null
+++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h
@@ -0,0 +1,116 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2021 HiSilicon Ltd. */
+
+#ifndef HISI_ACC_VFIO_PCI_H
+#define HISI_ACC_VFIO_PCI_H
+
+#include <linux/hisi_acc_qm.h>
+
+#define MB_POLL_PERIOD_US 10
+#define MB_POLL_TIMEOUT_US 1000
+#define QM_CACHE_WB_START 0x204
+#define QM_CACHE_WB_DONE 0x208
+#define QM_MB_CMD_PAUSE_QM 0xe
+#define QM_ABNORMAL_INT_STATUS 0x100008
+#define QM_IFC_INT_STATUS 0x0028
+#define SEC_CORE_INT_STATUS 0x301008
+#define HPRE_HAC_INT_STATUS 0x301800
+#define HZIP_CORE_INT_STATUS 0x3010AC
+#define QM_QUE_ISO_CFG 0x301154
+
+#define QM_VFT_CFG_RDY 0x10006c
+#define QM_VFT_CFG_OP_WR 0x100058
+#define QM_VFT_CFG_TYPE 0x10005c
+#define QM_VFT_CFG 0x100060
+#define QM_VFT_CFG_OP_ENABLE 0x100054
+#define QM_VFT_CFG_DATA_L 0x100064
+#define QM_VFT_CFG_DATA_H 0x100068
+
+#define ERROR_CHECK_TIMEOUT 100
+#define CHECK_DELAY_TIME 100
+
+#define QM_SQC_VFT_BASE_SHIFT_V2 28
+#define QM_SQC_VFT_BASE_MASK_V2 GENMASK(15, 0)
+#define QM_SQC_VFT_NUM_SHIFT_V2 45
+#define QM_SQC_VFT_NUM_MASK_V2 GENMASK(9, 0)
+
+/* RW regs */
+#define QM_REGS_MAX_LEN 7
+#define QM_REG_ADDR_OFFSET 0x0004
+
+#define QM_XQC_ADDR_OFFSET 32U
+#define QM_VF_AEQ_INT_MASK 0x0004
+#define QM_VF_EQ_INT_MASK 0x000c
+#define QM_IFC_INT_SOURCE_V 0x0020
+#define QM_IFC_INT_MASK 0x0024
+#define QM_IFC_INT_SET_V 0x002c
+#define QM_QUE_ISO_CFG_V 0x0030
+#define QM_PAGE_SIZE 0x0034
+
+#define QM_EQC_DW0 0X8000
+#define QM_AEQC_DW0 0X8020
+
+struct acc_vf_data {
+#define QM_MATCH_SIZE offsetofend(struct acc_vf_data, qm_rsv_state)
+ /* QM match information */
+#define ACC_DEV_MAGIC 0XCDCDCDCDFEEDAACC
+ u64 acc_magic;
+ u32 qp_num;
+ u32 dev_id;
+ u32 que_iso_cfg;
+ u32 qp_base;
+ u32 vf_qm_state;
+ /* QM reserved match information */
+ u32 qm_rsv_state[3];
+
+ /* QM RW regs */
+ u32 aeq_int_mask;
+ u32 eq_int_mask;
+ u32 ifc_int_source;
+ u32 ifc_int_mask;
+ u32 ifc_int_set;
+ u32 page_size;
+
+ /* QM_EQC_DW has 7 regs */
+ u32 qm_eqc_dw[7];
+
+ /* QM_AEQC_DW has 7 regs */
+ u32 qm_aeqc_dw[7];
+
+ /* QM reserved 5 regs */
+ u32 qm_rsv_regs[5];
+ u32 padding;
+ /* qm memory init information */
+ u64 eqe_dma;
+ u64 aeqe_dma;
+ u64 sqc_dma;
+ u64 cqc_dma;
+};
+
+struct hisi_acc_vf_migration_file {
+ struct file *filp;
+ struct mutex lock;
+ bool disabled;
+
+ struct acc_vf_data vf_data;
+ size_t total_length;
+};
+
+struct hisi_acc_vf_core_device {
+ struct vfio_pci_core_device core_device;
+ u8 deferred_reset:1;
+ /* for migration state */
+ struct mutex state_mutex;
+ enum vfio_device_mig_state mig_state;
+ struct pci_dev *pf_dev;
+ struct pci_dev *vf_dev;
+ struct hisi_qm *pf_qm;
+ struct hisi_qm vf_qm;
+ u32 vf_qm_state;
+ int vf_id;
+ /* for reset handler */
+ spinlock_t reset_lock;
+ struct hisi_acc_vf_migration_file *resuming_migf;
+ struct hisi_acc_vf_migration_file *saving_migf;
+};
+#endif /* HISI_ACC_VFIO_PCI_H */
diff --git a/drivers/vfio/pci/mlx5/Kconfig b/drivers/vfio/pci/mlx5/Kconfig
new file mode 100644
index 000000000000..29ba9c504a75
--- /dev/null
+++ b/drivers/vfio/pci/mlx5/Kconfig
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config MLX5_VFIO_PCI
+ tristate "VFIO support for MLX5 PCI devices"
+ depends on MLX5_CORE
+ depends on VFIO_PCI_CORE
+ help
+ This provides migration support for MLX5 devices using the VFIO
+ framework.
+
+ If you don't know what to do here, say N.
diff --git a/drivers/vfio/pci/mlx5/Makefile b/drivers/vfio/pci/mlx5/Makefile
new file mode 100644
index 000000000000..689627da7ff5
--- /dev/null
+++ b/drivers/vfio/pci/mlx5/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_MLX5_VFIO_PCI) += mlx5-vfio-pci.o
+mlx5-vfio-pci-y := main.o cmd.o
+
diff --git a/drivers/vfio/pci/mlx5/cmd.c b/drivers/vfio/pci/mlx5/cmd.c
new file mode 100644
index 000000000000..5c9f9218cc1d
--- /dev/null
+++ b/drivers/vfio/pci/mlx5/cmd.c
@@ -0,0 +1,259 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved
+ */
+
+#include "cmd.h"
+
+int mlx5vf_cmd_suspend_vhca(struct pci_dev *pdev, u16 vhca_id, u16 op_mod)
+{
+ struct mlx5_core_dev *mdev = mlx5_vf_get_core_dev(pdev);
+ u32 out[MLX5_ST_SZ_DW(suspend_vhca_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(suspend_vhca_in)] = {};
+ int ret;
+
+ if (!mdev)
+ return -ENOTCONN;
+
+ MLX5_SET(suspend_vhca_in, in, opcode, MLX5_CMD_OP_SUSPEND_VHCA);
+ MLX5_SET(suspend_vhca_in, in, vhca_id, vhca_id);
+ MLX5_SET(suspend_vhca_in, in, op_mod, op_mod);
+
+ ret = mlx5_cmd_exec_inout(mdev, suspend_vhca, in, out);
+ mlx5_vf_put_core_dev(mdev);
+ return ret;
+}
+
+int mlx5vf_cmd_resume_vhca(struct pci_dev *pdev, u16 vhca_id, u16 op_mod)
+{
+ struct mlx5_core_dev *mdev = mlx5_vf_get_core_dev(pdev);
+ u32 out[MLX5_ST_SZ_DW(resume_vhca_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(resume_vhca_in)] = {};
+ int ret;
+
+ if (!mdev)
+ return -ENOTCONN;
+
+ MLX5_SET(resume_vhca_in, in, opcode, MLX5_CMD_OP_RESUME_VHCA);
+ MLX5_SET(resume_vhca_in, in, vhca_id, vhca_id);
+ MLX5_SET(resume_vhca_in, in, op_mod, op_mod);
+
+ ret = mlx5_cmd_exec_inout(mdev, resume_vhca, in, out);
+ mlx5_vf_put_core_dev(mdev);
+ return ret;
+}
+
+int mlx5vf_cmd_query_vhca_migration_state(struct pci_dev *pdev, u16 vhca_id,
+ size_t *state_size)
+{
+ struct mlx5_core_dev *mdev = mlx5_vf_get_core_dev(pdev);
+ u32 out[MLX5_ST_SZ_DW(query_vhca_migration_state_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(query_vhca_migration_state_in)] = {};
+ int ret;
+
+ if (!mdev)
+ return -ENOTCONN;
+
+ MLX5_SET(query_vhca_migration_state_in, in, opcode,
+ MLX5_CMD_OP_QUERY_VHCA_MIGRATION_STATE);
+ MLX5_SET(query_vhca_migration_state_in, in, vhca_id, vhca_id);
+ MLX5_SET(query_vhca_migration_state_in, in, op_mod, 0);
+
+ ret = mlx5_cmd_exec_inout(mdev, query_vhca_migration_state, in, out);
+ if (ret)
+ goto end;
+
+ *state_size = MLX5_GET(query_vhca_migration_state_out, out,
+ required_umem_size);
+
+end:
+ mlx5_vf_put_core_dev(mdev);
+ return ret;
+}
+
+int mlx5vf_cmd_get_vhca_id(struct pci_dev *pdev, u16 function_id, u16 *vhca_id)
+{
+ struct mlx5_core_dev *mdev = mlx5_vf_get_core_dev(pdev);
+ u32 in[MLX5_ST_SZ_DW(query_hca_cap_in)] = {};
+ int out_size;
+ void *out;
+ int ret;
+
+ if (!mdev)
+ return -ENOTCONN;
+
+ out_size = MLX5_ST_SZ_BYTES(query_hca_cap_out);
+ out = kzalloc(out_size, GFP_KERNEL);
+ if (!out) {
+ ret = -ENOMEM;
+ goto end;
+ }
+
+ MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
+ MLX5_SET(query_hca_cap_in, in, other_function, 1);
+ MLX5_SET(query_hca_cap_in, in, function_id, function_id);
+ MLX5_SET(query_hca_cap_in, in, op_mod,
+ MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE << 1 |
+ HCA_CAP_OPMOD_GET_CUR);
+
+ ret = mlx5_cmd_exec_inout(mdev, query_hca_cap, in, out);
+ if (ret)
+ goto err_exec;
+
+ *vhca_id = MLX5_GET(query_hca_cap_out, out,
+ capability.cmd_hca_cap.vhca_id);
+
+err_exec:
+ kfree(out);
+end:
+ mlx5_vf_put_core_dev(mdev);
+ return ret;
+}
+
+static int _create_state_mkey(struct mlx5_core_dev *mdev, u32 pdn,
+ struct mlx5_vf_migration_file *migf, u32 *mkey)
+{
+ size_t npages = DIV_ROUND_UP(migf->total_length, PAGE_SIZE);
+ struct sg_dma_page_iter dma_iter;
+ int err = 0, inlen;
+ __be64 *mtt;
+ void *mkc;
+ u32 *in;
+
+ inlen = MLX5_ST_SZ_BYTES(create_mkey_in) +
+ sizeof(*mtt) * round_up(npages, 2);
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
+ DIV_ROUND_UP(npages, 2));
+ mtt = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
+
+ for_each_sgtable_dma_page(&migf->table.sgt, &dma_iter, 0)
+ *mtt++ = cpu_to_be64(sg_page_iter_dma_address(&dma_iter));
+
+ mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+ MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT);
+ MLX5_SET(mkc, mkc, lr, 1);
+ MLX5_SET(mkc, mkc, lw, 1);
+ MLX5_SET(mkc, mkc, rr, 1);
+ MLX5_SET(mkc, mkc, rw, 1);
+ MLX5_SET(mkc, mkc, pd, pdn);
+ MLX5_SET(mkc, mkc, bsf_octword_size, 0);
+ MLX5_SET(mkc, mkc, qpn, 0xffffff);
+ MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT);
+ MLX5_SET(mkc, mkc, translations_octword_size, DIV_ROUND_UP(npages, 2));
+ MLX5_SET64(mkc, mkc, len, migf->total_length);
+ err = mlx5_core_create_mkey(mdev, mkey, in, inlen);
+ kvfree(in);
+ return err;
+}
+
+int mlx5vf_cmd_save_vhca_state(struct pci_dev *pdev, u16 vhca_id,
+ struct mlx5_vf_migration_file *migf)
+{
+ struct mlx5_core_dev *mdev = mlx5_vf_get_core_dev(pdev);
+ u32 out[MLX5_ST_SZ_DW(save_vhca_state_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(save_vhca_state_in)] = {};
+ u32 pdn, mkey;
+ int err;
+
+ if (!mdev)
+ return -ENOTCONN;
+
+ err = mlx5_core_alloc_pd(mdev, &pdn);
+ if (err)
+ goto end;
+
+ err = dma_map_sgtable(mdev->device, &migf->table.sgt, DMA_FROM_DEVICE,
+ 0);
+ if (err)
+ goto err_dma_map;
+
+ err = _create_state_mkey(mdev, pdn, migf, &mkey);
+ if (err)
+ goto err_create_mkey;
+
+ MLX5_SET(save_vhca_state_in, in, opcode,
+ MLX5_CMD_OP_SAVE_VHCA_STATE);
+ MLX5_SET(save_vhca_state_in, in, op_mod, 0);
+ MLX5_SET(save_vhca_state_in, in, vhca_id, vhca_id);
+ MLX5_SET(save_vhca_state_in, in, mkey, mkey);
+ MLX5_SET(save_vhca_state_in, in, size, migf->total_length);
+
+ err = mlx5_cmd_exec_inout(mdev, save_vhca_state, in, out);
+ if (err)
+ goto err_exec;
+
+ migf->total_length =
+ MLX5_GET(save_vhca_state_out, out, actual_image_size);
+
+ mlx5_core_destroy_mkey(mdev, mkey);
+ mlx5_core_dealloc_pd(mdev, pdn);
+ dma_unmap_sgtable(mdev->device, &migf->table.sgt, DMA_FROM_DEVICE, 0);
+ mlx5_vf_put_core_dev(mdev);
+
+ return 0;
+
+err_exec:
+ mlx5_core_destroy_mkey(mdev, mkey);
+err_create_mkey:
+ dma_unmap_sgtable(mdev->device, &migf->table.sgt, DMA_FROM_DEVICE, 0);
+err_dma_map:
+ mlx5_core_dealloc_pd(mdev, pdn);
+end:
+ mlx5_vf_put_core_dev(mdev);
+ return err;
+}
+
+int mlx5vf_cmd_load_vhca_state(struct pci_dev *pdev, u16 vhca_id,
+ struct mlx5_vf_migration_file *migf)
+{
+ struct mlx5_core_dev *mdev = mlx5_vf_get_core_dev(pdev);
+ u32 out[MLX5_ST_SZ_DW(save_vhca_state_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(save_vhca_state_in)] = {};
+ u32 pdn, mkey;
+ int err;
+
+ if (!mdev)
+ return -ENOTCONN;
+
+ mutex_lock(&migf->lock);
+ if (!migf->total_length) {
+ err = -EINVAL;
+ goto end;
+ }
+
+ err = mlx5_core_alloc_pd(mdev, &pdn);
+ if (err)
+ goto end;
+
+ err = dma_map_sgtable(mdev->device, &migf->table.sgt, DMA_TO_DEVICE, 0);
+ if (err)
+ goto err_reg;
+
+ err = _create_state_mkey(mdev, pdn, migf, &mkey);
+ if (err)
+ goto err_mkey;
+
+ MLX5_SET(load_vhca_state_in, in, opcode,
+ MLX5_CMD_OP_LOAD_VHCA_STATE);
+ MLX5_SET(load_vhca_state_in, in, op_mod, 0);
+ MLX5_SET(load_vhca_state_in, in, vhca_id, vhca_id);
+ MLX5_SET(load_vhca_state_in, in, mkey, mkey);
+ MLX5_SET(load_vhca_state_in, in, size, migf->total_length);
+
+ err = mlx5_cmd_exec_inout(mdev, load_vhca_state, in, out);
+
+ mlx5_core_destroy_mkey(mdev, mkey);
+err_mkey:
+ dma_unmap_sgtable(mdev->device, &migf->table.sgt, DMA_TO_DEVICE, 0);
+err_reg:
+ mlx5_core_dealloc_pd(mdev, pdn);
+end:
+ mlx5_vf_put_core_dev(mdev);
+ mutex_unlock(&migf->lock);
+ return err;
+}
diff --git a/drivers/vfio/pci/mlx5/cmd.h b/drivers/vfio/pci/mlx5/cmd.h
new file mode 100644
index 000000000000..1392a11a9cc0
--- /dev/null
+++ b/drivers/vfio/pci/mlx5/cmd.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ */
+
+#ifndef MLX5_VFIO_CMD_H
+#define MLX5_VFIO_CMD_H
+
+#include <linux/kernel.h>
+#include <linux/mlx5/driver.h>
+
+struct mlx5_vf_migration_file {
+ struct file *filp;
+ struct mutex lock;
+ bool disabled;
+
+ struct sg_append_table table;
+ size_t total_length;
+ size_t allocated_length;
+
+ /* Optimize mlx5vf_get_migration_page() for sequential access */
+ struct scatterlist *last_offset_sg;
+ unsigned int sg_last_entry;
+ unsigned long last_offset;
+};
+
+int mlx5vf_cmd_suspend_vhca(struct pci_dev *pdev, u16 vhca_id, u16 op_mod);
+int mlx5vf_cmd_resume_vhca(struct pci_dev *pdev, u16 vhca_id, u16 op_mod);
+int mlx5vf_cmd_query_vhca_migration_state(struct pci_dev *pdev, u16 vhca_id,
+ size_t *state_size);
+int mlx5vf_cmd_get_vhca_id(struct pci_dev *pdev, u16 function_id, u16 *vhca_id);
+int mlx5vf_cmd_save_vhca_state(struct pci_dev *pdev, u16 vhca_id,
+ struct mlx5_vf_migration_file *migf);
+int mlx5vf_cmd_load_vhca_state(struct pci_dev *pdev, u16 vhca_id,
+ struct mlx5_vf_migration_file *migf);
+#endif /* MLX5_VFIO_CMD_H */
diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c
new file mode 100644
index 000000000000..bbec5d288fee
--- /dev/null
+++ b/drivers/vfio/pci/mlx5/main.c
@@ -0,0 +1,676 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved
+ */
+
+#include <linux/device.h>
+#include <linux/eventfd.h>
+#include <linux/file.h>
+#include <linux/interrupt.h>
+#include <linux/iommu.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/notifier.h>
+#include <linux/pci.h>
+#include <linux/pm_runtime.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+#include <linux/vfio.h>
+#include <linux/sched/mm.h>
+#include <linux/vfio_pci_core.h>
+#include <linux/anon_inodes.h>
+
+#include "cmd.h"
+
+/* Arbitrary to prevent userspace from consuming endless memory */
+#define MAX_MIGRATION_SIZE (512*1024*1024)
+
+struct mlx5vf_pci_core_device {
+ struct vfio_pci_core_device core_device;
+ u16 vhca_id;
+ u8 migrate_cap:1;
+ u8 deferred_reset:1;
+ /* protect migration state */
+ struct mutex state_mutex;
+ enum vfio_device_mig_state mig_state;
+ /* protect the reset_done flow */
+ spinlock_t reset_lock;
+ struct mlx5_vf_migration_file *resuming_migf;
+ struct mlx5_vf_migration_file *saving_migf;
+};
+
+static struct page *
+mlx5vf_get_migration_page(struct mlx5_vf_migration_file *migf,
+ unsigned long offset)
+{
+ unsigned long cur_offset = 0;
+ struct scatterlist *sg;
+ unsigned int i;
+
+ /* All accesses are sequential */
+ if (offset < migf->last_offset || !migf->last_offset_sg) {
+ migf->last_offset = 0;
+ migf->last_offset_sg = migf->table.sgt.sgl;
+ migf->sg_last_entry = 0;
+ }
+
+ cur_offset = migf->last_offset;
+
+ for_each_sg(migf->last_offset_sg, sg,
+ migf->table.sgt.orig_nents - migf->sg_last_entry, i) {
+ if (offset < sg->length + cur_offset) {
+ migf->last_offset_sg = sg;
+ migf->sg_last_entry += i;
+ migf->last_offset = cur_offset;
+ return nth_page(sg_page(sg),
+ (offset - cur_offset) / PAGE_SIZE);
+ }
+ cur_offset += sg->length;
+ }
+ return NULL;
+}
+
+static int mlx5vf_add_migration_pages(struct mlx5_vf_migration_file *migf,
+ unsigned int npages)
+{
+ unsigned int to_alloc = npages;
+ struct page **page_list;
+ unsigned long filled;
+ unsigned int to_fill;
+ int ret;
+
+ to_fill = min_t(unsigned int, npages, PAGE_SIZE / sizeof(*page_list));
+ page_list = kvzalloc(to_fill * sizeof(*page_list), GFP_KERNEL);
+ if (!page_list)
+ return -ENOMEM;
+
+ do {
+ filled = alloc_pages_bulk_array(GFP_KERNEL, to_fill, page_list);
+ if (!filled) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ to_alloc -= filled;
+ ret = sg_alloc_append_table_from_pages(
+ &migf->table, page_list, filled, 0,
+ filled << PAGE_SHIFT, UINT_MAX, SG_MAX_SINGLE_ALLOC,
+ GFP_KERNEL);
+
+ if (ret)
+ goto err;
+ migf->allocated_length += filled * PAGE_SIZE;
+ /* clean input for another bulk allocation */
+ memset(page_list, 0, filled * sizeof(*page_list));
+ to_fill = min_t(unsigned int, to_alloc,
+ PAGE_SIZE / sizeof(*page_list));
+ } while (to_alloc > 0);
+
+ kvfree(page_list);
+ return 0;
+
+err:
+ kvfree(page_list);
+ return ret;
+}
+
+static void mlx5vf_disable_fd(struct mlx5_vf_migration_file *migf)
+{
+ struct sg_page_iter sg_iter;
+
+ mutex_lock(&migf->lock);
+ /* Undo alloc_pages_bulk_array() */
+ for_each_sgtable_page(&migf->table.sgt, &sg_iter, 0)
+ __free_page(sg_page_iter_page(&sg_iter));
+ sg_free_append_table(&migf->table);
+ migf->disabled = true;
+ migf->total_length = 0;
+ migf->allocated_length = 0;
+ migf->filp->f_pos = 0;
+ mutex_unlock(&migf->lock);
+}
+
+static int mlx5vf_release_file(struct inode *inode, struct file *filp)
+{
+ struct mlx5_vf_migration_file *migf = filp->private_data;
+
+ mlx5vf_disable_fd(migf);
+ mutex_destroy(&migf->lock);
+ kfree(migf);
+ return 0;
+}
+
+static ssize_t mlx5vf_save_read(struct file *filp, char __user *buf, size_t len,
+ loff_t *pos)
+{
+ struct mlx5_vf_migration_file *migf = filp->private_data;
+ ssize_t done = 0;
+
+ if (pos)
+ return -ESPIPE;
+ pos = &filp->f_pos;
+
+ mutex_lock(&migf->lock);
+ if (*pos > migf->total_length) {
+ done = -EINVAL;
+ goto out_unlock;
+ }
+ if (migf->disabled) {
+ done = -ENODEV;
+ goto out_unlock;
+ }
+
+ len = min_t(size_t, migf->total_length - *pos, len);
+ while (len) {
+ size_t page_offset;
+ struct page *page;
+ size_t page_len;
+ u8 *from_buff;
+ int ret;
+
+ page_offset = (*pos) % PAGE_SIZE;
+ page = mlx5vf_get_migration_page(migf, *pos - page_offset);
+ if (!page) {
+ if (done == 0)
+ done = -EINVAL;
+ goto out_unlock;
+ }
+
+ page_len = min_t(size_t, len, PAGE_SIZE - page_offset);
+ from_buff = kmap_local_page(page);
+ ret = copy_to_user(buf, from_buff + page_offset, page_len);
+ kunmap_local(from_buff);
+ if (ret) {
+ done = -EFAULT;
+ goto out_unlock;
+ }
+ *pos += page_len;
+ len -= page_len;
+ done += page_len;
+ buf += page_len;
+ }
+
+out_unlock:
+ mutex_unlock(&migf->lock);
+ return done;
+}
+
+static const struct file_operations mlx5vf_save_fops = {
+ .owner = THIS_MODULE,
+ .read = mlx5vf_save_read,
+ .release = mlx5vf_release_file,
+ .llseek = no_llseek,
+};
+
+static struct mlx5_vf_migration_file *
+mlx5vf_pci_save_device_data(struct mlx5vf_pci_core_device *mvdev)
+{
+ struct mlx5_vf_migration_file *migf;
+ int ret;
+
+ migf = kzalloc(sizeof(*migf), GFP_KERNEL);
+ if (!migf)
+ return ERR_PTR(-ENOMEM);
+
+ migf->filp = anon_inode_getfile("mlx5vf_mig", &mlx5vf_save_fops, migf,
+ O_RDONLY);
+ if (IS_ERR(migf->filp)) {
+ int err = PTR_ERR(migf->filp);
+
+ kfree(migf);
+ return ERR_PTR(err);
+ }
+
+ stream_open(migf->filp->f_inode, migf->filp);
+ mutex_init(&migf->lock);
+
+ ret = mlx5vf_cmd_query_vhca_migration_state(
+ mvdev->core_device.pdev, mvdev->vhca_id, &migf->total_length);
+ if (ret)
+ goto out_free;
+
+ ret = mlx5vf_add_migration_pages(
+ migf, DIV_ROUND_UP_ULL(migf->total_length, PAGE_SIZE));
+ if (ret)
+ goto out_free;
+
+ ret = mlx5vf_cmd_save_vhca_state(mvdev->core_device.pdev,
+ mvdev->vhca_id, migf);
+ if (ret)
+ goto out_free;
+ return migf;
+out_free:
+ fput(migf->filp);
+ return ERR_PTR(ret);
+}
+
+static ssize_t mlx5vf_resume_write(struct file *filp, const char __user *buf,
+ size_t len, loff_t *pos)
+{
+ struct mlx5_vf_migration_file *migf = filp->private_data;
+ loff_t requested_length;
+ ssize_t done = 0;
+
+ if (pos)
+ return -ESPIPE;
+ pos = &filp->f_pos;
+
+ if (*pos < 0 ||
+ check_add_overflow((loff_t)len, *pos, &requested_length))
+ return -EINVAL;
+
+ if (requested_length > MAX_MIGRATION_SIZE)
+ return -ENOMEM;
+
+ mutex_lock(&migf->lock);
+ if (migf->disabled) {
+ done = -ENODEV;
+ goto out_unlock;
+ }
+
+ if (migf->allocated_length < requested_length) {
+ done = mlx5vf_add_migration_pages(
+ migf,
+ DIV_ROUND_UP(requested_length - migf->allocated_length,
+ PAGE_SIZE));
+ if (done)
+ goto out_unlock;
+ }
+
+ while (len) {
+ size_t page_offset;
+ struct page *page;
+ size_t page_len;
+ u8 *to_buff;
+ int ret;
+
+ page_offset = (*pos) % PAGE_SIZE;
+ page = mlx5vf_get_migration_page(migf, *pos - page_offset);
+ if (!page) {
+ if (done == 0)
+ done = -EINVAL;
+ goto out_unlock;
+ }
+
+ page_len = min_t(size_t, len, PAGE_SIZE - page_offset);
+ to_buff = kmap_local_page(page);
+ ret = copy_from_user(to_buff + page_offset, buf, page_len);
+ kunmap_local(to_buff);
+ if (ret) {
+ done = -EFAULT;
+ goto out_unlock;
+ }
+ *pos += page_len;
+ len -= page_len;
+ done += page_len;
+ buf += page_len;
+ migf->total_length += page_len;
+ }
+out_unlock:
+ mutex_unlock(&migf->lock);
+ return done;
+}
+
+static const struct file_operations mlx5vf_resume_fops = {
+ .owner = THIS_MODULE,
+ .write = mlx5vf_resume_write,
+ .release = mlx5vf_release_file,
+ .llseek = no_llseek,
+};
+
+static struct mlx5_vf_migration_file *
+mlx5vf_pci_resume_device_data(struct mlx5vf_pci_core_device *mvdev)
+{
+ struct mlx5_vf_migration_file *migf;
+
+ migf = kzalloc(sizeof(*migf), GFP_KERNEL);
+ if (!migf)
+ return ERR_PTR(-ENOMEM);
+
+ migf->filp = anon_inode_getfile("mlx5vf_mig", &mlx5vf_resume_fops, migf,
+ O_WRONLY);
+ if (IS_ERR(migf->filp)) {
+ int err = PTR_ERR(migf->filp);
+
+ kfree(migf);
+ return ERR_PTR(err);
+ }
+ stream_open(migf->filp->f_inode, migf->filp);
+ mutex_init(&migf->lock);
+ return migf;
+}
+
+static void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev)
+{
+ if (mvdev->resuming_migf) {
+ mlx5vf_disable_fd(mvdev->resuming_migf);
+ fput(mvdev->resuming_migf->filp);
+ mvdev->resuming_migf = NULL;
+ }
+ if (mvdev->saving_migf) {
+ mlx5vf_disable_fd(mvdev->saving_migf);
+ fput(mvdev->saving_migf->filp);
+ mvdev->saving_migf = NULL;
+ }
+}
+
+static struct file *
+mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev,
+ u32 new)
+{
+ u32 cur = mvdev->mig_state;
+ int ret;
+
+ if (cur == VFIO_DEVICE_STATE_RUNNING_P2P && new == VFIO_DEVICE_STATE_STOP) {
+ ret = mlx5vf_cmd_suspend_vhca(
+ mvdev->core_device.pdev, mvdev->vhca_id,
+ MLX5_SUSPEND_VHCA_IN_OP_MOD_SUSPEND_RESPONDER);
+ if (ret)
+ return ERR_PTR(ret);
+ return NULL;
+ }
+
+ if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_RUNNING_P2P) {
+ ret = mlx5vf_cmd_resume_vhca(
+ mvdev->core_device.pdev, mvdev->vhca_id,
+ MLX5_RESUME_VHCA_IN_OP_MOD_RESUME_RESPONDER);
+ if (ret)
+ return ERR_PTR(ret);
+ return NULL;
+ }
+
+ if (cur == VFIO_DEVICE_STATE_RUNNING && new == VFIO_DEVICE_STATE_RUNNING_P2P) {
+ ret = mlx5vf_cmd_suspend_vhca(
+ mvdev->core_device.pdev, mvdev->vhca_id,
+ MLX5_SUSPEND_VHCA_IN_OP_MOD_SUSPEND_INITIATOR);
+ if (ret)
+ return ERR_PTR(ret);
+ return NULL;
+ }
+
+ if (cur == VFIO_DEVICE_STATE_RUNNING_P2P && new == VFIO_DEVICE_STATE_RUNNING) {
+ ret = mlx5vf_cmd_resume_vhca(
+ mvdev->core_device.pdev, mvdev->vhca_id,
+ MLX5_RESUME_VHCA_IN_OP_MOD_RESUME_INITIATOR);
+ if (ret)
+ return ERR_PTR(ret);
+ return NULL;
+ }
+
+ if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_STOP_COPY) {
+ struct mlx5_vf_migration_file *migf;
+
+ migf = mlx5vf_pci_save_device_data(mvdev);
+ if (IS_ERR(migf))
+ return ERR_CAST(migf);
+ get_file(migf->filp);
+ mvdev->saving_migf = migf;
+ return migf->filp;
+ }
+
+ if ((cur == VFIO_DEVICE_STATE_STOP_COPY && new == VFIO_DEVICE_STATE_STOP)) {
+ mlx5vf_disable_fds(mvdev);
+ return NULL;
+ }
+
+ if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_RESUMING) {
+ struct mlx5_vf_migration_file *migf;
+
+ migf = mlx5vf_pci_resume_device_data(mvdev);
+ if (IS_ERR(migf))
+ return ERR_CAST(migf);
+ get_file(migf->filp);
+ mvdev->resuming_migf = migf;
+ return migf->filp;
+ }
+
+ if (cur == VFIO_DEVICE_STATE_RESUMING && new == VFIO_DEVICE_STATE_STOP) {
+ ret = mlx5vf_cmd_load_vhca_state(mvdev->core_device.pdev,
+ mvdev->vhca_id,
+ mvdev->resuming_migf);
+ if (ret)
+ return ERR_PTR(ret);
+ mlx5vf_disable_fds(mvdev);
+ return NULL;
+ }
+
+ /*
+ * vfio_mig_get_next_state() does not use arcs other than the above
+ */
+ WARN_ON(true);
+ return ERR_PTR(-EINVAL);
+}
+
+/*
+ * This function is called in all state_mutex unlock cases to
+ * handle a 'deferred_reset' if exists.
+ */
+static void mlx5vf_state_mutex_unlock(struct mlx5vf_pci_core_device *mvdev)
+{
+again:
+ spin_lock(&mvdev->reset_lock);
+ if (mvdev->deferred_reset) {
+ mvdev->deferred_reset = false;
+ spin_unlock(&mvdev->reset_lock);
+ mvdev->mig_state = VFIO_DEVICE_STATE_RUNNING;
+ mlx5vf_disable_fds(mvdev);
+ goto again;
+ }
+ mutex_unlock(&mvdev->state_mutex);
+ spin_unlock(&mvdev->reset_lock);
+}
+
+static struct file *
+mlx5vf_pci_set_device_state(struct vfio_device *vdev,
+ enum vfio_device_mig_state new_state)
+{
+ struct mlx5vf_pci_core_device *mvdev = container_of(
+ vdev, struct mlx5vf_pci_core_device, core_device.vdev);
+ enum vfio_device_mig_state next_state;
+ struct file *res = NULL;
+ int ret;
+
+ mutex_lock(&mvdev->state_mutex);
+ while (new_state != mvdev->mig_state) {
+ ret = vfio_mig_get_next_state(vdev, mvdev->mig_state,
+ new_state, &next_state);
+ if (ret) {
+ res = ERR_PTR(ret);
+ break;
+ }
+ res = mlx5vf_pci_step_device_state_locked(mvdev, next_state);
+ if (IS_ERR(res))
+ break;
+ mvdev->mig_state = next_state;
+ if (WARN_ON(res && new_state != mvdev->mig_state)) {
+ fput(res);
+ res = ERR_PTR(-EINVAL);
+ break;
+ }
+ }
+ mlx5vf_state_mutex_unlock(mvdev);
+ return res;
+}
+
+static int mlx5vf_pci_get_device_state(struct vfio_device *vdev,
+ enum vfio_device_mig_state *curr_state)
+{
+ struct mlx5vf_pci_core_device *mvdev = container_of(
+ vdev, struct mlx5vf_pci_core_device, core_device.vdev);
+
+ mutex_lock(&mvdev->state_mutex);
+ *curr_state = mvdev->mig_state;
+ mlx5vf_state_mutex_unlock(mvdev);
+ return 0;
+}
+
+static void mlx5vf_pci_aer_reset_done(struct pci_dev *pdev)
+{
+ struct mlx5vf_pci_core_device *mvdev = dev_get_drvdata(&pdev->dev);
+
+ if (!mvdev->migrate_cap)
+ return;
+
+ /*
+ * As the higher VFIO layers are holding locks across reset and using
+ * those same locks with the mm_lock we need to prevent ABBA deadlock
+ * with the state_mutex and mm_lock.
+ * In case the state_mutex was taken already we defer the cleanup work
+ * to the unlock flow of the other running context.
+ */
+ spin_lock(&mvdev->reset_lock);
+ mvdev->deferred_reset = true;
+ if (!mutex_trylock(&mvdev->state_mutex)) {
+ spin_unlock(&mvdev->reset_lock);
+ return;
+ }
+ spin_unlock(&mvdev->reset_lock);
+ mlx5vf_state_mutex_unlock(mvdev);
+}
+
+static int mlx5vf_pci_open_device(struct vfio_device *core_vdev)
+{
+ struct mlx5vf_pci_core_device *mvdev = container_of(
+ core_vdev, struct mlx5vf_pci_core_device, core_device.vdev);
+ struct vfio_pci_core_device *vdev = &mvdev->core_device;
+ int vf_id;
+ int ret;
+
+ ret = vfio_pci_core_enable(vdev);
+ if (ret)
+ return ret;
+
+ if (!mvdev->migrate_cap) {
+ vfio_pci_core_finish_enable(vdev);
+ return 0;
+ }
+
+ vf_id = pci_iov_vf_id(vdev->pdev);
+ if (vf_id < 0) {
+ ret = vf_id;
+ goto out_disable;
+ }
+
+ ret = mlx5vf_cmd_get_vhca_id(vdev->pdev, vf_id + 1, &mvdev->vhca_id);
+ if (ret)
+ goto out_disable;
+
+ mvdev->mig_state = VFIO_DEVICE_STATE_RUNNING;
+ vfio_pci_core_finish_enable(vdev);
+ return 0;
+out_disable:
+ vfio_pci_core_disable(vdev);
+ return ret;
+}
+
+static void mlx5vf_pci_close_device(struct vfio_device *core_vdev)
+{
+ struct mlx5vf_pci_core_device *mvdev = container_of(
+ core_vdev, struct mlx5vf_pci_core_device, core_device.vdev);
+
+ mlx5vf_disable_fds(mvdev);
+ vfio_pci_core_close_device(core_vdev);
+}
+
+static const struct vfio_device_ops mlx5vf_pci_ops = {
+ .name = "mlx5-vfio-pci",
+ .open_device = mlx5vf_pci_open_device,
+ .close_device = mlx5vf_pci_close_device,
+ .ioctl = vfio_pci_core_ioctl,
+ .device_feature = vfio_pci_core_ioctl_feature,
+ .read = vfio_pci_core_read,
+ .write = vfio_pci_core_write,
+ .mmap = vfio_pci_core_mmap,
+ .request = vfio_pci_core_request,
+ .match = vfio_pci_core_match,
+ .migration_set_state = mlx5vf_pci_set_device_state,
+ .migration_get_state = mlx5vf_pci_get_device_state,
+};
+
+static int mlx5vf_pci_probe(struct pci_dev *pdev,
+ const struct pci_device_id *id)
+{
+ struct mlx5vf_pci_core_device *mvdev;
+ int ret;
+
+ mvdev = kzalloc(sizeof(*mvdev), GFP_KERNEL);
+ if (!mvdev)
+ return -ENOMEM;
+ vfio_pci_core_init_device(&mvdev->core_device, pdev, &mlx5vf_pci_ops);
+
+ if (pdev->is_virtfn) {
+ struct mlx5_core_dev *mdev =
+ mlx5_vf_get_core_dev(pdev);
+
+ if (mdev) {
+ if (MLX5_CAP_GEN(mdev, migration)) {
+ mvdev->migrate_cap = 1;
+ mvdev->core_device.vdev.migration_flags =
+ VFIO_MIGRATION_STOP_COPY |
+ VFIO_MIGRATION_P2P;
+ mutex_init(&mvdev->state_mutex);
+ spin_lock_init(&mvdev->reset_lock);
+ }
+ mlx5_vf_put_core_dev(mdev);
+ }
+ }
+
+ ret = vfio_pci_core_register_device(&mvdev->core_device);
+ if (ret)
+ goto out_free;
+
+ dev_set_drvdata(&pdev->dev, mvdev);
+ return 0;
+
+out_free:
+ vfio_pci_core_uninit_device(&mvdev->core_device);
+ kfree(mvdev);
+ return ret;
+}
+
+static void mlx5vf_pci_remove(struct pci_dev *pdev)
+{
+ struct mlx5vf_pci_core_device *mvdev = dev_get_drvdata(&pdev->dev);
+
+ vfio_pci_core_unregister_device(&mvdev->core_device);
+ vfio_pci_core_uninit_device(&mvdev->core_device);
+ kfree(mvdev);
+}
+
+static const struct pci_device_id mlx5vf_pci_table[] = {
+ { PCI_DRIVER_OVERRIDE_DEVICE_VFIO(PCI_VENDOR_ID_MELLANOX, 0x101e) }, /* ConnectX Family mlx5Gen Virtual Function */
+ {}
+};
+
+MODULE_DEVICE_TABLE(pci, mlx5vf_pci_table);
+
+static const struct pci_error_handlers mlx5vf_err_handlers = {
+ .reset_done = mlx5vf_pci_aer_reset_done,
+ .error_detected = vfio_pci_core_aer_err_detected,
+};
+
+static struct pci_driver mlx5vf_pci_driver = {
+ .name = KBUILD_MODNAME,
+ .id_table = mlx5vf_pci_table,
+ .probe = mlx5vf_pci_probe,
+ .remove = mlx5vf_pci_remove,
+ .err_handler = &mlx5vf_err_handlers,
+};
+
+static void __exit mlx5vf_pci_cleanup(void)
+{
+ pci_unregister_driver(&mlx5vf_pci_driver);
+}
+
+static int __init mlx5vf_pci_init(void)
+{
+ return pci_register_driver(&mlx5vf_pci_driver);
+}
+
+module_init(mlx5vf_pci_init);
+module_exit(mlx5vf_pci_cleanup);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Max Gurtovoy <mgurtovoy@nvidia.com>");
+MODULE_AUTHOR("Yishai Hadas <yishaih@nvidia.com>");
+MODULE_DESCRIPTION(
+ "MLX5 VFIO PCI - User Level meta-driver for MLX5 device family");
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index a5ce92beb655..2b047469e02f 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -130,6 +130,7 @@ static const struct vfio_device_ops vfio_pci_ops = {
.open_device = vfio_pci_open_device,
.close_device = vfio_pci_core_close_device,
.ioctl = vfio_pci_core_ioctl,
+ .device_feature = vfio_pci_core_ioctl_feature,
.read = vfio_pci_core_read,
.write = vfio_pci_core_write,
.mmap = vfio_pci_core_mmap,
diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
index f948e6cd2993..b7bb16f92ac6 100644
--- a/drivers/vfio/pci/vfio_pci_core.c
+++ b/drivers/vfio/pci/vfio_pci_core.c
@@ -228,6 +228,19 @@ int vfio_pci_set_power_state(struct vfio_pci_core_device *vdev, pci_power_t stat
if (!ret) {
/* D3 might be unsupported via quirk, skip unless in D3 */
if (needs_save && pdev->current_state >= PCI_D3hot) {
+ /*
+ * The current PCI state will be saved locally in
+ * 'pm_save' during the D3hot transition. When the
+ * device state is changed to D0 again with the current
+ * function, then pci_store_saved_state() will restore
+ * the state and will free the memory pointed by
+ * 'pm_save'. There are few cases where the PCI power
+ * state can be changed to D0 without the involvement
+ * of the driver. For these cases, free the earlier
+ * allocated memory first before overwriting 'pm_save'
+ * to prevent the memory leak.
+ */
+ kfree(vdev->pm_save);
vdev->pm_save = pci_store_saved_state(pdev);
} else if (needs_restore) {
pci_load_and_free_saved_state(pdev, &vdev->pm_save);
@@ -322,6 +335,17 @@ void vfio_pci_core_disable(struct vfio_pci_core_device *vdev)
/* For needs_reset */
lockdep_assert_held(&vdev->vdev.dev_set->lock);
+ /*
+ * This function can be invoked while the power state is non-D0.
+ * This function calls __pci_reset_function_locked() which internally
+ * can use pci_pm_reset() for the function reset. pci_pm_reset() will
+ * fail if the power state is non-D0. Also, for the devices which
+ * have NoSoftRst-, the reset function can cause the PCI config space
+ * reset without restoring the original state (saved locally in
+ * 'vdev->pm_save').
+ */
+ vfio_pci_set_power_state(vdev, PCI_D0);
+
/* Stop the device from further DMA */
pci_clear_master(pdev);
@@ -921,6 +945,19 @@ long vfio_pci_core_ioctl(struct vfio_device *core_vdev, unsigned int cmd,
return -EINVAL;
vfio_pci_zap_and_down_write_memory_lock(vdev);
+
+ /*
+ * This function can be invoked while the power state is non-D0.
+ * If pci_try_reset_function() has been called while the power
+ * state is non-D0, then pci_try_reset_function() will
+ * internally set the power state to D0 without vfio driver
+ * involvement. For the devices which have NoSoftRst-, the
+ * reset function can cause the PCI config space reset without
+ * restoring the original state (saved locally in
+ * 'vdev->pm_save').
+ */
+ vfio_pci_set_power_state(vdev, PCI_D0);
+
ret = pci_try_reset_function(vdev->pdev);
up_write(&vdev->memory_lock);
@@ -1114,70 +1151,50 @@ hot_reset_release:
return vfio_pci_ioeventfd(vdev, ioeventfd.offset,
ioeventfd.data, count, ioeventfd.fd);
- } else if (cmd == VFIO_DEVICE_FEATURE) {
- struct vfio_device_feature feature;
- uuid_t uuid;
-
- minsz = offsetofend(struct vfio_device_feature, flags);
-
- if (copy_from_user(&feature, (void __user *)arg, minsz))
- return -EFAULT;
-
- if (feature.argsz < minsz)
- return -EINVAL;
-
- /* Check unknown flags */
- if (feature.flags & ~(VFIO_DEVICE_FEATURE_MASK |
- VFIO_DEVICE_FEATURE_SET |
- VFIO_DEVICE_FEATURE_GET |
- VFIO_DEVICE_FEATURE_PROBE))
- return -EINVAL;
-
- /* GET & SET are mutually exclusive except with PROBE */
- if (!(feature.flags & VFIO_DEVICE_FEATURE_PROBE) &&
- (feature.flags & VFIO_DEVICE_FEATURE_SET) &&
- (feature.flags & VFIO_DEVICE_FEATURE_GET))
- return -EINVAL;
-
- switch (feature.flags & VFIO_DEVICE_FEATURE_MASK) {
- case VFIO_DEVICE_FEATURE_PCI_VF_TOKEN:
- if (!vdev->vf_token)
- return -ENOTTY;
-
- /*
- * We do not support GET of the VF Token UUID as this
- * could expose the token of the previous device user.
- */
- if (feature.flags & VFIO_DEVICE_FEATURE_GET)
- return -EINVAL;
-
- if (feature.flags & VFIO_DEVICE_FEATURE_PROBE)
- return 0;
+ }
+ return -ENOTTY;
+}
+EXPORT_SYMBOL_GPL(vfio_pci_core_ioctl);
- /* Don't SET unless told to do so */
- if (!(feature.flags & VFIO_DEVICE_FEATURE_SET))
- return -EINVAL;
+static int vfio_pci_core_feature_token(struct vfio_device *device, u32 flags,
+ void __user *arg, size_t argsz)
+{
+ struct vfio_pci_core_device *vdev =
+ container_of(device, struct vfio_pci_core_device, vdev);
+ uuid_t uuid;
+ int ret;
- if (feature.argsz < minsz + sizeof(uuid))
- return -EINVAL;
+ if (!vdev->vf_token)
+ return -ENOTTY;
+ /*
+ * We do not support GET of the VF Token UUID as this could
+ * expose the token of the previous device user.
+ */
+ ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_SET,
+ sizeof(uuid));
+ if (ret != 1)
+ return ret;
- if (copy_from_user(&uuid, (void __user *)(arg + minsz),
- sizeof(uuid)))
- return -EFAULT;
+ if (copy_from_user(&uuid, arg, sizeof(uuid)))
+ return -EFAULT;
- mutex_lock(&vdev->vf_token->lock);
- uuid_copy(&vdev->vf_token->uuid, &uuid);
- mutex_unlock(&vdev->vf_token->lock);
+ mutex_lock(&vdev->vf_token->lock);
+ uuid_copy(&vdev->vf_token->uuid, &uuid);
+ mutex_unlock(&vdev->vf_token->lock);
+ return 0;
+}
- return 0;
- default:
- return -ENOTTY;
- }
+int vfio_pci_core_ioctl_feature(struct vfio_device *device, u32 flags,
+ void __user *arg, size_t argsz)
+{
+ switch (flags & VFIO_DEVICE_FEATURE_MASK) {
+ case VFIO_DEVICE_FEATURE_PCI_VF_TOKEN:
+ return vfio_pci_core_feature_token(device, flags, arg, argsz);
+ default:
+ return -ENOTTY;
}
-
- return -ENOTTY;
}
-EXPORT_SYMBOL_GPL(vfio_pci_core_ioctl);
+EXPORT_SYMBOL_GPL(vfio_pci_core_ioctl_feature);
static ssize_t vfio_pci_rw(struct vfio_pci_core_device *vdev, char __user *buf,
size_t count, loff_t *ppos, bool iswrite)
@@ -1891,8 +1908,8 @@ void vfio_pci_core_unregister_device(struct vfio_pci_core_device *vdev)
}
EXPORT_SYMBOL_GPL(vfio_pci_core_unregister_device);
-static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev,
- pci_channel_state_t state)
+pci_ers_result_t vfio_pci_core_aer_err_detected(struct pci_dev *pdev,
+ pci_channel_state_t state)
{
struct vfio_pci_core_device *vdev;
struct vfio_device *device;
@@ -1914,6 +1931,7 @@ static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev,
return PCI_ERS_RESULT_CAN_RECOVER;
}
+EXPORT_SYMBOL_GPL(vfio_pci_core_aer_err_detected);
int vfio_pci_core_sriov_configure(struct pci_dev *pdev, int nr_virtfn)
{
@@ -1936,7 +1954,7 @@ int vfio_pci_core_sriov_configure(struct pci_dev *pdev, int nr_virtfn)
EXPORT_SYMBOL_GPL(vfio_pci_core_sriov_configure);
const struct pci_error_handlers vfio_pci_core_err_handlers = {
- .error_detected = vfio_pci_aer_err_detected,
+ .error_detected = vfio_pci_core_aer_err_detected,
};
EXPORT_SYMBOL_GPL(vfio_pci_core_err_handlers);
@@ -2055,6 +2073,18 @@ static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,
}
cur_mem = NULL;
+ /*
+ * The pci_reset_bus() will reset all the devices in the bus.
+ * The power state can be non-D0 for some of the devices in the bus.
+ * For these devices, the pci_reset_bus() will internally set
+ * the power state to D0 without vfio driver involvement.
+ * For the devices which have NoSoftRst-, the reset function can
+ * cause the PCI config space reset without restoring the original
+ * state (saved locally in 'vdev->pm_save').
+ */
+ list_for_each_entry(cur, &dev_set->device_list, vdev.dev_set_list)
+ vfio_pci_set_power_state(cur, PCI_D0);
+
ret = pci_reset_bus(pdev);
err_undo:
@@ -2108,6 +2138,18 @@ static bool vfio_pci_dev_set_try_reset(struct vfio_device_set *dev_set)
if (!pdev)
return false;
+ /*
+ * The pci_reset_bus() will reset all the devices in the bus.
+ * The power state can be non-D0 for some of the devices in the bus.
+ * For these devices, the pci_reset_bus() will internally set
+ * the power state to D0 without vfio driver involvement.
+ * For the devices which have NoSoftRst-, the reset function can
+ * cause the PCI config space reset without restoring the original
+ * state (saved locally in 'vdev->pm_save').
+ */
+ list_for_each_entry(cur, &dev_set->device_list, vdev.dev_set_list)
+ vfio_pci_set_power_state(cur, PCI_D0);
+
ret = pci_reset_bus(pdev);
if (ret)
return false;
diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c b/drivers/vfio/pci/vfio_pci_rdwr.c
index 57d3b2cbbd8e..82ac1569deb0 100644
--- a/drivers/vfio/pci/vfio_pci_rdwr.c
+++ b/drivers/vfio/pci/vfio_pci_rdwr.c
@@ -288,6 +288,7 @@ out:
return done;
}
+#ifdef CONFIG_VFIO_PCI_VGA
ssize_t vfio_pci_vga_rw(struct vfio_pci_core_device *vdev, char __user *buf,
size_t count, loff_t *ppos, bool iswrite)
{
@@ -355,6 +356,7 @@ ssize_t vfio_pci_vga_rw(struct vfio_pci_core_device *vdev, char __user *buf,
return done;
}
+#endif
static void vfio_pci_ioeventfd_do_write(struct vfio_pci_ioeventfd *ioeventfd,
bool test_mem)
diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 735d1d344af9..a4555014bd1e 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -1557,15 +1557,303 @@ static int vfio_device_fops_release(struct inode *inode, struct file *filep)
return 0;
}
+/*
+ * vfio_mig_get_next_state - Compute the next step in the FSM
+ * @cur_fsm - The current state the device is in
+ * @new_fsm - The target state to reach
+ * @next_fsm - Pointer to the next step to get to new_fsm
+ *
+ * Return 0 upon success, otherwise -errno
+ * Upon success the next step in the state progression between cur_fsm and
+ * new_fsm will be set in next_fsm.
+ *
+ * This breaks down requests for combination transitions into smaller steps and
+ * returns the next step to get to new_fsm. The function may need to be called
+ * multiple times before reaching new_fsm.
+ *
+ */
+int vfio_mig_get_next_state(struct vfio_device *device,
+ enum vfio_device_mig_state cur_fsm,
+ enum vfio_device_mig_state new_fsm,
+ enum vfio_device_mig_state *next_fsm)
+{
+ enum { VFIO_DEVICE_NUM_STATES = VFIO_DEVICE_STATE_RUNNING_P2P + 1 };
+ /*
+ * The coding in this table requires the driver to implement the
+ * following FSM arcs:
+ * RESUMING -> STOP
+ * STOP -> RESUMING
+ * STOP -> STOP_COPY
+ * STOP_COPY -> STOP
+ *
+ * If P2P is supported then the driver must also implement these FSM
+ * arcs:
+ * RUNNING -> RUNNING_P2P
+ * RUNNING_P2P -> RUNNING
+ * RUNNING_P2P -> STOP
+ * STOP -> RUNNING_P2P
+ * Without P2P the driver must implement:
+ * RUNNING -> STOP
+ * STOP -> RUNNING
+ *
+ * The coding will step through multiple states for some combination
+ * transitions; if all optional features are supported, this means the
+ * following ones:
+ * RESUMING -> STOP -> RUNNING_P2P
+ * RESUMING -> STOP -> RUNNING_P2P -> RUNNING
+ * RESUMING -> STOP -> STOP_COPY
+ * RUNNING -> RUNNING_P2P -> STOP
+ * RUNNING -> RUNNING_P2P -> STOP -> RESUMING
+ * RUNNING -> RUNNING_P2P -> STOP -> STOP_COPY
+ * RUNNING_P2P -> STOP -> RESUMING
+ * RUNNING_P2P -> STOP -> STOP_COPY
+ * STOP -> RUNNING_P2P -> RUNNING
+ * STOP_COPY -> STOP -> RESUMING
+ * STOP_COPY -> STOP -> RUNNING_P2P
+ * STOP_COPY -> STOP -> RUNNING_P2P -> RUNNING
+ */
+ static const u8 vfio_from_fsm_table[VFIO_DEVICE_NUM_STATES][VFIO_DEVICE_NUM_STATES] = {
+ [VFIO_DEVICE_STATE_STOP] = {
+ [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP,
+ [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING_P2P,
+ [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP_COPY,
+ [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RESUMING,
+ [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P,
+ [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
+ },
+ [VFIO_DEVICE_STATE_RUNNING] = {
+ [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_RUNNING_P2P,
+ [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING,
+ [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_RUNNING_P2P,
+ [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RUNNING_P2P,
+ [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P,
+ [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
+ },
+ [VFIO_DEVICE_STATE_STOP_COPY] = {
+ [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP,
+ [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_STOP,
+ [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP_COPY,
+ [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_STOP,
+ [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_STOP,
+ [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
+ },
+ [VFIO_DEVICE_STATE_RESUMING] = {
+ [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP,
+ [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_STOP,
+ [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP,
+ [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RESUMING,
+ [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_STOP,
+ [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
+ },
+ [VFIO_DEVICE_STATE_RUNNING_P2P] = {
+ [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP,
+ [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING,
+ [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP,
+ [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_STOP,
+ [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P,
+ [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
+ },
+ [VFIO_DEVICE_STATE_ERROR] = {
+ [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_ERROR,
+ [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_ERROR,
+ [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_ERROR,
+ [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_ERROR,
+ [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_ERROR,
+ [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
+ },
+ };
+
+ static const unsigned int state_flags_table[VFIO_DEVICE_NUM_STATES] = {
+ [VFIO_DEVICE_STATE_STOP] = VFIO_MIGRATION_STOP_COPY,
+ [VFIO_DEVICE_STATE_RUNNING] = VFIO_MIGRATION_STOP_COPY,
+ [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_MIGRATION_STOP_COPY,
+ [VFIO_DEVICE_STATE_RESUMING] = VFIO_MIGRATION_STOP_COPY,
+ [VFIO_DEVICE_STATE_RUNNING_P2P] =
+ VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_P2P,
+ [VFIO_DEVICE_STATE_ERROR] = ~0U,
+ };
+
+ if (WARN_ON(cur_fsm >= ARRAY_SIZE(vfio_from_fsm_table) ||
+ (state_flags_table[cur_fsm] & device->migration_flags) !=
+ state_flags_table[cur_fsm]))
+ return -EINVAL;
+
+ if (new_fsm >= ARRAY_SIZE(vfio_from_fsm_table) ||
+ (state_flags_table[new_fsm] & device->migration_flags) !=
+ state_flags_table[new_fsm])
+ return -EINVAL;
+
+ /*
+ * Arcs touching optional and unsupported states are skipped over. The
+ * driver will instead see an arc from the original state to the next
+ * logical state, as per the above comment.
+ */
+ *next_fsm = vfio_from_fsm_table[cur_fsm][new_fsm];
+ while ((state_flags_table[*next_fsm] & device->migration_flags) !=
+ state_flags_table[*next_fsm])
+ *next_fsm = vfio_from_fsm_table[*next_fsm][new_fsm];
+
+ return (*next_fsm != VFIO_DEVICE_STATE_ERROR) ? 0 : -EINVAL;
+}
+EXPORT_SYMBOL_GPL(vfio_mig_get_next_state);
+
+/*
+ * Convert the drivers's struct file into a FD number and return it to userspace
+ */
+static int vfio_ioct_mig_return_fd(struct file *filp, void __user *arg,
+ struct vfio_device_feature_mig_state *mig)
+{
+ int ret;
+ int fd;
+
+ fd = get_unused_fd_flags(O_CLOEXEC);
+ if (fd < 0) {
+ ret = fd;
+ goto out_fput;
+ }
+
+ mig->data_fd = fd;
+ if (copy_to_user(arg, mig, sizeof(*mig))) {
+ ret = -EFAULT;
+ goto out_put_unused;
+ }
+ fd_install(fd, filp);
+ return 0;
+
+out_put_unused:
+ put_unused_fd(fd);
+out_fput:
+ fput(filp);
+ return ret;
+}
+
+static int
+vfio_ioctl_device_feature_mig_device_state(struct vfio_device *device,
+ u32 flags, void __user *arg,
+ size_t argsz)
+{
+ size_t minsz =
+ offsetofend(struct vfio_device_feature_mig_state, data_fd);
+ struct vfio_device_feature_mig_state mig;
+ struct file *filp = NULL;
+ int ret;
+
+ if (!device->ops->migration_set_state ||
+ !device->ops->migration_get_state)
+ return -ENOTTY;
+
+ ret = vfio_check_feature(flags, argsz,
+ VFIO_DEVICE_FEATURE_SET |
+ VFIO_DEVICE_FEATURE_GET,
+ sizeof(mig));
+ if (ret != 1)
+ return ret;
+
+ if (copy_from_user(&mig, arg, minsz))
+ return -EFAULT;
+
+ if (flags & VFIO_DEVICE_FEATURE_GET) {
+ enum vfio_device_mig_state curr_state;
+
+ ret = device->ops->migration_get_state(device, &curr_state);
+ if (ret)
+ return ret;
+ mig.device_state = curr_state;
+ goto out_copy;
+ }
+
+ /* Handle the VFIO_DEVICE_FEATURE_SET */
+ filp = device->ops->migration_set_state(device, mig.device_state);
+ if (IS_ERR(filp) || !filp)
+ goto out_copy;
+
+ return vfio_ioct_mig_return_fd(filp, arg, &mig);
+out_copy:
+ mig.data_fd = -1;
+ if (copy_to_user(arg, &mig, sizeof(mig)))
+ return -EFAULT;
+ if (IS_ERR(filp))
+ return PTR_ERR(filp);
+ return 0;
+}
+
+static int vfio_ioctl_device_feature_migration(struct vfio_device *device,
+ u32 flags, void __user *arg,
+ size_t argsz)
+{
+ struct vfio_device_feature_migration mig = {
+ .flags = device->migration_flags,
+ };
+ int ret;
+
+ if (!device->ops->migration_set_state ||
+ !device->ops->migration_get_state)
+ return -ENOTTY;
+
+ ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_GET,
+ sizeof(mig));
+ if (ret != 1)
+ return ret;
+ if (copy_to_user(arg, &mig, sizeof(mig)))
+ return -EFAULT;
+ return 0;
+}
+
+static int vfio_ioctl_device_feature(struct vfio_device *device,
+ struct vfio_device_feature __user *arg)
+{
+ size_t minsz = offsetofend(struct vfio_device_feature, flags);
+ struct vfio_device_feature feature;
+
+ if (copy_from_user(&feature, arg, minsz))
+ return -EFAULT;
+
+ if (feature.argsz < minsz)
+ return -EINVAL;
+
+ /* Check unknown flags */
+ if (feature.flags &
+ ~(VFIO_DEVICE_FEATURE_MASK | VFIO_DEVICE_FEATURE_SET |
+ VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_PROBE))
+ return -EINVAL;
+
+ /* GET & SET are mutually exclusive except with PROBE */
+ if (!(feature.flags & VFIO_DEVICE_FEATURE_PROBE) &&
+ (feature.flags & VFIO_DEVICE_FEATURE_SET) &&
+ (feature.flags & VFIO_DEVICE_FEATURE_GET))
+ return -EINVAL;
+
+ switch (feature.flags & VFIO_DEVICE_FEATURE_MASK) {
+ case VFIO_DEVICE_FEATURE_MIGRATION:
+ return vfio_ioctl_device_feature_migration(
+ device, feature.flags, arg->data,
+ feature.argsz - minsz);
+ case VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE:
+ return vfio_ioctl_device_feature_mig_device_state(
+ device, feature.flags, arg->data,
+ feature.argsz - minsz);
+ default:
+ if (unlikely(!device->ops->device_feature))
+ return -EINVAL;
+ return device->ops->device_feature(device, feature.flags,
+ arg->data,
+ feature.argsz - minsz);
+ }
+}
+
static long vfio_device_fops_unl_ioctl(struct file *filep,
unsigned int cmd, unsigned long arg)
{
struct vfio_device *device = filep->private_data;
- if (unlikely(!device->ops->ioctl))
- return -EINVAL;
-
- return device->ops->ioctl(device, cmd, arg);
+ switch (cmd) {
+ case VFIO_DEVICE_FEATURE:
+ return vfio_ioctl_device_feature(device, (void __user *)arg);
+ default:
+ if (unlikely(!device->ops->ioctl))
+ return -EINVAL;
+ return device->ops->ioctl(device, cmd, arg);
+ }
}
static ssize_t vfio_device_fops_read(struct file *filep, char __user *buf,