summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-06-12 13:11:26 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2018-06-12 13:11:26 -0700
commit467590e055f5c714fb457803250415879d0da9e5 (patch)
tree7533a26dcd19023d5eeac54f62432097c85f4ef3
parent763f96944c954ce0e00a10a7bdfe29adbe4f92eb (diff)
parentc1abca96b252a9627f99f39215b84e5de92bf1e3 (diff)
downloadlinux-rpi-467590e055f5c714fb457803250415879d0da9e5.tar.gz
linux-rpi-467590e055f5c714fb457803250415879d0da9e5.tar.bz2
linux-rpi-467590e055f5c714fb457803250415879d0da9e5.zip
Merge tag 'vfio-v4.18-rc1' of git://github.com/awilliam/linux-vfio
Pull VFIO updates from Alex Williamson: - Bind type1 task tracking to group_leader to facilitate vCPU hotplug in QEMU (Alex Williamson) - Sample mdev display drivers, including region-based host and guest Linux drivers and bochs compatible dmabuf device (Gerd Hoffmann) - Fix vfio-platform reset module leak (Geert Uytterhoeven) - vfio-platform error message consistency (Geert Uytterhoeven) - Global checking for mdev uuid collisions rather than per parent device (Alex Williamson) - Use match_string() helper (Yisheng Xie) - vfio-platform PM domain fixes (Geert Uytterhoeven) - Fix sample mbochs driver build dependency (Arnd Bergmann) * tag 'vfio-v4.18-rc1' of git://github.com/awilliam/linux-vfio: samples: mbochs: add DMA_SHARED_BUFFER dependency vfio: platform: Fix using devices in PM Domains vfio: use match_string() helper vfio/mdev: Re-order sysfs attribute creation vfio/mdev: Check globally for duplicate devices vfio: platform: Make printed error messages more consistent vfio: platform: Fix reset module leak in error path sample: vfio bochs vbe display (host device for bochs-drm) sample: vfio mdev display - guest driver sample: vfio mdev display - host device vfio/type1: Fix task tracking for QEMU vCPU hotplug
-rw-r--r--Documentation/vfio-mediated-device.txt5
-rw-r--r--drivers/vfio/mdev/mdev_core.c102
-rw-r--r--drivers/vfio/mdev/mdev_private.h2
-rw-r--r--drivers/vfio/mdev/mdev_sysfs.c14
-rw-r--r--drivers/vfio/platform/vfio_platform_common.c30
-rw-r--r--drivers/vfio/vfio.c11
-rw-r--r--drivers/vfio/vfio_iommu_type1.c73
-rw-r--r--samples/Kconfig31
-rw-r--r--samples/vfio-mdev/Makefile3
-rw-r--r--samples/vfio-mdev/mbochs.c1406
-rw-r--r--samples/vfio-mdev/mdpy-defs.h22
-rw-r--r--samples/vfio-mdev/mdpy-fb.c232
-rw-r--r--samples/vfio-mdev/mdpy.c807
13 files changed, 2622 insertions, 116 deletions
diff --git a/Documentation/vfio-mediated-device.txt b/Documentation/vfio-mediated-device.txt
index 1b3950346532..c3f69bcaf96e 100644
--- a/Documentation/vfio-mediated-device.txt
+++ b/Documentation/vfio-mediated-device.txt
@@ -145,6 +145,11 @@ The functions in the mdev_parent_ops structure are as follows:
* create: allocate basic resources in a driver for a mediated device
* remove: free resources in a driver when a mediated device is destroyed
+(Note that mdev-core provides no implicit serialization of create/remove
+callbacks per mdev parent device, per mdev type, or any other categorization.
+Vendor drivers are expected to be fully asynchronous in this respect or
+provide their own internal resource protection.)
+
The callbacks in the mdev_parent_ops structure are as follows:
* open: open callback of mediated device
diff --git a/drivers/vfio/mdev/mdev_core.c b/drivers/vfio/mdev/mdev_core.c
index 126991046eb7..0212f0ee8aea 100644
--- a/drivers/vfio/mdev/mdev_core.c
+++ b/drivers/vfio/mdev/mdev_core.c
@@ -66,34 +66,6 @@ uuid_le mdev_uuid(struct mdev_device *mdev)
}
EXPORT_SYMBOL(mdev_uuid);
-static int _find_mdev_device(struct device *dev, void *data)
-{
- struct mdev_device *mdev;
-
- if (!dev_is_mdev(dev))
- return 0;
-
- mdev = to_mdev_device(dev);
-
- if (uuid_le_cmp(mdev->uuid, *(uuid_le *)data) == 0)
- return 1;
-
- return 0;
-}
-
-static bool mdev_device_exist(struct mdev_parent *parent, uuid_le uuid)
-{
- struct device *dev;
-
- dev = device_find_child(parent->dev, &uuid, _find_mdev_device);
- if (dev) {
- put_device(dev);
- return true;
- }
-
- return false;
-}
-
/* Should be called holding parent_list_lock */
static struct mdev_parent *__find_parent_device(struct device *dev)
{
@@ -221,7 +193,6 @@ int mdev_register_device(struct device *dev, const struct mdev_parent_ops *ops)
}
kref_init(&parent->ref);
- mutex_init(&parent->lock);
parent->dev = dev;
parent->ops = ops;
@@ -297,6 +268,10 @@ static void mdev_device_release(struct device *dev)
{
struct mdev_device *mdev = to_mdev_device(dev);
+ mutex_lock(&mdev_list_lock);
+ list_del(&mdev->next);
+ mutex_unlock(&mdev_list_lock);
+
dev_dbg(&mdev->dev, "MDEV: destroying\n");
kfree(mdev);
}
@@ -304,7 +279,7 @@ static void mdev_device_release(struct device *dev)
int mdev_device_create(struct kobject *kobj, struct device *dev, uuid_le uuid)
{
int ret;
- struct mdev_device *mdev;
+ struct mdev_device *mdev, *tmp;
struct mdev_parent *parent;
struct mdev_type *type = to_mdev_type(kobj);
@@ -312,21 +287,28 @@ int mdev_device_create(struct kobject *kobj, struct device *dev, uuid_le uuid)
if (!parent)
return -EINVAL;
- mutex_lock(&parent->lock);
+ mutex_lock(&mdev_list_lock);
/* Check for duplicate */
- if (mdev_device_exist(parent, uuid)) {
- ret = -EEXIST;
- goto create_err;
+ list_for_each_entry(tmp, &mdev_list, next) {
+ if (!uuid_le_cmp(tmp->uuid, uuid)) {
+ mutex_unlock(&mdev_list_lock);
+ ret = -EEXIST;
+ goto mdev_fail;
+ }
}
mdev = kzalloc(sizeof(*mdev), GFP_KERNEL);
if (!mdev) {
+ mutex_unlock(&mdev_list_lock);
ret = -ENOMEM;
- goto create_err;
+ goto mdev_fail;
}
memcpy(&mdev->uuid, &uuid, sizeof(uuid_le));
+ list_add(&mdev->next, &mdev_list);
+ mutex_unlock(&mdev_list_lock);
+
mdev->parent = parent;
kref_init(&mdev->ref);
@@ -338,35 +320,28 @@ int mdev_device_create(struct kobject *kobj, struct device *dev, uuid_le uuid)
ret = device_register(&mdev->dev);
if (ret) {
put_device(&mdev->dev);
- goto create_err;
+ goto mdev_fail;
}
ret = mdev_device_create_ops(kobj, mdev);
if (ret)
- goto create_failed;
+ goto create_fail;
ret = mdev_create_sysfs_files(&mdev->dev, type);
if (ret) {
mdev_device_remove_ops(mdev, true);
- goto create_failed;
+ goto create_fail;
}
mdev->type_kobj = kobj;
+ mdev->active = true;
dev_dbg(&mdev->dev, "MDEV: created\n");
- mutex_unlock(&parent->lock);
-
- mutex_lock(&mdev_list_lock);
- list_add(&mdev->next, &mdev_list);
- mutex_unlock(&mdev_list_lock);
-
- return ret;
+ return 0;
-create_failed:
+create_fail:
device_unregister(&mdev->dev);
-
-create_err:
- mutex_unlock(&parent->lock);
+mdev_fail:
mdev_put_parent(parent);
return ret;
}
@@ -377,44 +352,39 @@ int mdev_device_remove(struct device *dev, bool force_remove)
struct mdev_parent *parent;
struct mdev_type *type;
int ret;
- bool found = false;
mdev = to_mdev_device(dev);
mutex_lock(&mdev_list_lock);
list_for_each_entry(tmp, &mdev_list, next) {
- if (tmp == mdev) {
- found = true;
+ if (tmp == mdev)
break;
- }
}
- if (found)
- list_del(&mdev->next);
+ if (tmp != mdev) {
+ mutex_unlock(&mdev_list_lock);
+ return -ENODEV;
+ }
- mutex_unlock(&mdev_list_lock);
+ if (!mdev->active) {
+ mutex_unlock(&mdev_list_lock);
+ return -EAGAIN;
+ }
- if (!found)
- return -ENODEV;
+ mdev->active = false;
+ mutex_unlock(&mdev_list_lock);
type = to_mdev_type(mdev->type_kobj);
parent = mdev->parent;
- mutex_lock(&parent->lock);
ret = mdev_device_remove_ops(mdev, force_remove);
if (ret) {
- mutex_unlock(&parent->lock);
-
- mutex_lock(&mdev_list_lock);
- list_add(&mdev->next, &mdev_list);
- mutex_unlock(&mdev_list_lock);
-
+ mdev->active = true;
return ret;
}
mdev_remove_sysfs_files(dev, type);
device_unregister(dev);
- mutex_unlock(&parent->lock);
mdev_put_parent(parent);
return 0;
diff --git a/drivers/vfio/mdev/mdev_private.h b/drivers/vfio/mdev/mdev_private.h
index a9cefd70a705..b5819b7d7ef7 100644
--- a/drivers/vfio/mdev/mdev_private.h
+++ b/drivers/vfio/mdev/mdev_private.h
@@ -20,7 +20,6 @@ struct mdev_parent {
struct device *dev;
const struct mdev_parent_ops *ops;
struct kref ref;
- struct mutex lock;
struct list_head next;
struct kset *mdev_types_kset;
struct list_head type_list;
@@ -34,6 +33,7 @@ struct mdev_device {
struct kref ref;
struct list_head next;
struct kobject *type_kobj;
+ bool active;
};
#define to_mdev_device(dev) container_of(dev, struct mdev_device, dev)
diff --git a/drivers/vfio/mdev/mdev_sysfs.c b/drivers/vfio/mdev/mdev_sysfs.c
index 802df210929b..249472f05509 100644
--- a/drivers/vfio/mdev/mdev_sysfs.c
+++ b/drivers/vfio/mdev/mdev_sysfs.c
@@ -257,24 +257,24 @@ int mdev_create_sysfs_files(struct device *dev, struct mdev_type *type)
{
int ret;
- ret = sysfs_create_files(&dev->kobj, mdev_device_attrs);
- if (ret)
- return ret;
-
ret = sysfs_create_link(type->devices_kobj, &dev->kobj, dev_name(dev));
if (ret)
- goto device_link_failed;
+ return ret;
ret = sysfs_create_link(&dev->kobj, &type->kobj, "mdev_type");
if (ret)
goto type_link_failed;
+ ret = sysfs_create_files(&dev->kobj, mdev_device_attrs);
+ if (ret)
+ goto create_files_failed;
+
return ret;
+create_files_failed:
+ sysfs_remove_link(&dev->kobj, "mdev_type");
type_link_failed:
sysfs_remove_link(type->devices_kobj, dev_name(dev));
-device_link_failed:
- sysfs_remove_files(&dev->kobj, mdev_device_attrs);
return ret;
}
diff --git a/drivers/vfio/platform/vfio_platform_common.c b/drivers/vfio/platform/vfio_platform_common.c
index 4c27f4be3c3d..c0cd824be2b7 100644
--- a/drivers/vfio/platform/vfio_platform_common.c
+++ b/drivers/vfio/platform/vfio_platform_common.c
@@ -17,6 +17,7 @@
#include <linux/iommu.h>
#include <linux/module.h>
#include <linux/mutex.h>
+#include <linux/pm_runtime.h>
#include <linux/slab.h>
#include <linux/types.h>
#include <linux/uaccess.h>
@@ -239,6 +240,7 @@ static void vfio_platform_release(void *device_data)
ret, extra_dbg ? extra_dbg : "");
WARN_ON(1);
}
+ pm_runtime_put(vdev->device);
vfio_platform_regions_cleanup(vdev);
vfio_platform_irq_cleanup(vdev);
}
@@ -269,6 +271,10 @@ static int vfio_platform_open(void *device_data)
if (ret)
goto err_irq;
+ ret = pm_runtime_get_sync(vdev->device);
+ if (ret < 0)
+ goto err_pm;
+
ret = vfio_platform_call_reset(vdev, &extra_dbg);
if (ret && vdev->reset_required) {
dev_warn(vdev->device, "reset driver is required and reset call failed in open (%d) %s\n",
@@ -283,6 +289,8 @@ static int vfio_platform_open(void *device_data)
return 0;
err_rst:
+ pm_runtime_put(vdev->device);
+err_pm:
vfio_platform_irq_cleanup(vdev);
err_irq:
vfio_platform_regions_cleanup(vdev);
@@ -630,8 +638,7 @@ static int vfio_platform_of_probe(struct vfio_platform_device *vdev,
ret = device_property_read_string(dev, "compatible",
&vdev->compat);
if (ret)
- pr_err("VFIO: cannot retrieve compat for %s\n",
- vdev->name);
+ pr_err("VFIO: Cannot retrieve compat for %s\n", vdev->name);
return ret;
}
@@ -673,7 +680,7 @@ int vfio_platform_probe_common(struct vfio_platform_device *vdev,
ret = vfio_platform_get_reset(vdev);
if (ret && vdev->reset_required) {
- pr_err("vfio: no reset function found for device %s\n",
+ pr_err("VFIO: No reset function found for device %s\n",
vdev->name);
return ret;
}
@@ -681,18 +688,24 @@ int vfio_platform_probe_common(struct vfio_platform_device *vdev,
group = vfio_iommu_group_get(dev);
if (!group) {
pr_err("VFIO: No IOMMU group for device %s\n", vdev->name);
- return -EINVAL;
+ ret = -EINVAL;
+ goto put_reset;
}
ret = vfio_add_group_dev(dev, &vfio_platform_ops, vdev);
- if (ret) {
- vfio_iommu_group_put(group, dev);
- return ret;
- }
+ if (ret)
+ goto put_iommu;
mutex_init(&vdev->igate);
+ pm_runtime_enable(vdev->device);
return 0;
+
+put_iommu:
+ vfio_iommu_group_put(group, dev);
+put_reset:
+ vfio_platform_put_reset(vdev);
+ return ret;
}
EXPORT_SYMBOL_GPL(vfio_platform_probe_common);
@@ -703,6 +716,7 @@ struct vfio_platform_device *vfio_platform_remove_common(struct device *dev)
vdev = vfio_del_group_dev(dev);
if (vdev) {
+ pm_runtime_disable(vdev->device);
vfio_platform_put_reset(vdev);
vfio_iommu_group_put(dev->iommu_group, dev);
}
diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 721f97f8dac1..64833879f75d 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -630,8 +630,6 @@ static const char * const vfio_driver_whitelist[] = { "pci-stub" };
static bool vfio_dev_whitelisted(struct device *dev, struct device_driver *drv)
{
- int i;
-
if (dev_is_pci(dev)) {
struct pci_dev *pdev = to_pci_dev(dev);
@@ -639,12 +637,9 @@ static bool vfio_dev_whitelisted(struct device *dev, struct device_driver *drv)
return true;
}
- for (i = 0; i < ARRAY_SIZE(vfio_driver_whitelist); i++) {
- if (!strcmp(drv->name, vfio_driver_whitelist[i]))
- return true;
- }
-
- return false;
+ return match_string(vfio_driver_whitelist,
+ ARRAY_SIZE(vfio_driver_whitelist),
+ drv->name) >= 0;
}
/*
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 3c082451ab1a..2c75b33db4ac 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -83,6 +83,7 @@ struct vfio_dma {
size_t size; /* Map size (bytes) */
int prot; /* IOMMU_READ/WRITE */
bool iommu_mapped;
+ bool lock_cap; /* capable(CAP_IPC_LOCK) */
struct task_struct *task;
struct rb_root pfn_list; /* Ex-user pinned pfn list */
};
@@ -253,29 +254,25 @@ static int vfio_iova_put_vfio_pfn(struct vfio_dma *dma, struct vfio_pfn *vpfn)
return ret;
}
-static int vfio_lock_acct(struct task_struct *task, long npage, bool *lock_cap)
+static int vfio_lock_acct(struct vfio_dma *dma, long npage, bool async)
{
struct mm_struct *mm;
- bool is_current;
int ret;
if (!npage)
return 0;
- is_current = (task->mm == current->mm);
-
- mm = is_current ? task->mm : get_task_mm(task);
+ mm = async ? get_task_mm(dma->task) : dma->task->mm;
if (!mm)
return -ESRCH; /* process exited */
ret = down_write_killable(&mm->mmap_sem);
if (!ret) {
if (npage > 0) {
- if (lock_cap ? !*lock_cap :
- !has_capability(task, CAP_IPC_LOCK)) {
+ if (!dma->lock_cap) {
unsigned long limit;
- limit = task_rlimit(task,
+ limit = task_rlimit(dma->task,
RLIMIT_MEMLOCK) >> PAGE_SHIFT;
if (mm->locked_vm + npage > limit)
@@ -289,7 +286,7 @@ static int vfio_lock_acct(struct task_struct *task, long npage, bool *lock_cap)
up_write(&mm->mmap_sem);
}
- if (!is_current)
+ if (async)
mmput(mm);
return ret;
@@ -400,7 +397,7 @@ static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr,
*/
static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
long npage, unsigned long *pfn_base,
- bool lock_cap, unsigned long limit)
+ unsigned long limit)
{
unsigned long pfn = 0;
long ret, pinned = 0, lock_acct = 0;
@@ -423,7 +420,7 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
* pages are already counted against the user.
*/
if (!rsvd && !vfio_find_vpfn(dma, iova)) {
- if (!lock_cap && current->mm->locked_vm + 1 > limit) {
+ if (!dma->lock_cap && current->mm->locked_vm + 1 > limit) {
put_pfn(*pfn_base, dma->prot);
pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", __func__,
limit << PAGE_SHIFT);
@@ -449,7 +446,7 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
}
if (!rsvd && !vfio_find_vpfn(dma, iova)) {
- if (!lock_cap &&
+ if (!dma->lock_cap &&
current->mm->locked_vm + lock_acct + 1 > limit) {
put_pfn(pfn, dma->prot);
pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n",
@@ -462,7 +459,7 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
}
out:
- ret = vfio_lock_acct(current, lock_acct, &lock_cap);
+ ret = vfio_lock_acct(dma, lock_acct, false);
unpin_out:
if (ret) {
@@ -493,7 +490,7 @@ static long vfio_unpin_pages_remote(struct vfio_dma *dma, dma_addr_t iova,
}
if (do_accounting)
- vfio_lock_acct(dma->task, locked - unlocked, NULL);
+ vfio_lock_acct(dma, locked - unlocked, true);
return unlocked;
}
@@ -510,7 +507,7 @@ static int vfio_pin_page_external(struct vfio_dma *dma, unsigned long vaddr,
ret = vaddr_get_pfn(mm, vaddr, dma->prot, pfn_base);
if (!ret && do_accounting && !is_invalid_reserved_pfn(*pfn_base)) {
- ret = vfio_lock_acct(dma->task, 1, NULL);
+ ret = vfio_lock_acct(dma, 1, true);
if (ret) {
put_pfn(*pfn_base, dma->prot);
if (ret == -ENOMEM)
@@ -537,7 +534,7 @@ static int vfio_unpin_page_external(struct vfio_dma *dma, dma_addr_t iova,
unlocked = vfio_iova_put_vfio_pfn(dma, vpfn);
if (do_accounting)
- vfio_lock_acct(dma->task, -unlocked, NULL);
+ vfio_lock_acct(dma, -unlocked, true);
return unlocked;
}
@@ -829,7 +826,7 @@ static long vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma,
unlocked += vfio_sync_unpin(dma, domain, &unmapped_region_list);
if (do_accounting) {
- vfio_lock_acct(dma->task, -unlocked, NULL);
+ vfio_lock_acct(dma, -unlocked, true);
return 0;
}
return unlocked;
@@ -1044,14 +1041,12 @@ static int vfio_pin_map_dma(struct vfio_iommu *iommu, struct vfio_dma *dma,
size_t size = map_size;
long npage;
unsigned long pfn, limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
- bool lock_cap = capable(CAP_IPC_LOCK);
int ret = 0;
while (size) {
/* Pin a contiguous chunk of memory */
npage = vfio_pin_pages_remote(dma, vaddr + dma->size,
- size >> PAGE_SHIFT, &pfn,
- lock_cap, limit);
+ size >> PAGE_SHIFT, &pfn, limit);
if (npage <= 0) {
WARN_ON(!npage);
ret = (int)npage;
@@ -1126,8 +1121,36 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu,
dma->iova = iova;
dma->vaddr = vaddr;
dma->prot = prot;
- get_task_struct(current);
- dma->task = current;
+
+ /*
+ * We need to be able to both add to a task's locked memory and test
+ * against the locked memory limit and we need to be able to do both
+ * outside of this call path as pinning can be asynchronous via the
+ * external interfaces for mdev devices. RLIMIT_MEMLOCK requires a
+ * task_struct and VM locked pages requires an mm_struct, however
+ * holding an indefinite mm reference is not recommended, therefore we
+ * only hold a reference to a task. We could hold a reference to
+ * current, however QEMU uses this call path through vCPU threads,
+ * which can be killed resulting in a NULL mm and failure in the unmap
+ * path when called via a different thread. Avoid this problem by
+ * using the group_leader as threads within the same group require
+ * both CLONE_THREAD and CLONE_VM and will therefore use the same
+ * mm_struct.
+ *
+ * Previously we also used the task for testing CAP_IPC_LOCK at the
+ * time of pinning and accounting, however has_capability() makes use
+ * of real_cred, a copy-on-write field, so we can't guarantee that it
+ * matches group_leader, or in fact that it might not change by the
+ * time it's evaluated. If a process were to call MAP_DMA with
+ * CAP_IPC_LOCK but later drop it, it doesn't make sense that they
+ * possibly see different results for an iommu_mapped vfio_dma vs
+ * externally mapped. Therefore track CAP_IPC_LOCK in vfio_dma at the
+ * time of calling MAP_DMA.
+ */
+ get_task_struct(current->group_leader);
+ dma->task = current->group_leader;
+ dma->lock_cap = capable(CAP_IPC_LOCK);
+
dma->pfn_list = RB_ROOT;
/* Insert zero-sized and grow as we map chunks of it */
@@ -1162,7 +1185,6 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu,
struct vfio_domain *d;
struct rb_node *n;
unsigned long limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
- bool lock_cap = capable(CAP_IPC_LOCK);
int ret;
/* Arbitrarily pick the first domain in the list for lookups */
@@ -1209,8 +1231,7 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu,
npage = vfio_pin_pages_remote(dma, vaddr,
n >> PAGE_SHIFT,
- &pfn, lock_cap,
- limit);
+ &pfn, limit);
if (npage <= 0) {
WARN_ON(!npage);
ret = (int)npage;
@@ -1487,7 +1508,7 @@ static void vfio_iommu_unmap_unpin_reaccount(struct vfio_iommu *iommu)
if (!is_invalid_reserved_pfn(vpfn->pfn))
locked++;
}
- vfio_lock_acct(dma->task, locked - unlocked, NULL);
+ vfio_lock_acct(dma, locked - unlocked, true);
}
}
diff --git a/samples/Kconfig b/samples/Kconfig
index 3db002b9e1d3..bd133efc1a56 100644
--- a/samples/Kconfig
+++ b/samples/Kconfig
@@ -115,6 +115,37 @@ config SAMPLE_VFIO_MDEV_MTTY
Build a virtual tty sample driver for use as a VFIO
mediated device
+config SAMPLE_VFIO_MDEV_MDPY
+ tristate "Build VFIO mdpy example mediated device sample code -- loadable modules only"
+ depends on VFIO_MDEV_DEVICE && m
+ help
+ Build a virtual display sample driver for use as a VFIO
+ mediated device. It is a simple framebuffer and supports
+ the region display interface (VFIO_GFX_PLANE_TYPE_REGION).
+
+config SAMPLE_VFIO_MDEV_MDPY_FB
+ tristate "Build VFIO mdpy example guest fbdev driver -- loadable module only"
+ depends on FB && m
+ select FB_CFB_FILLRECT
+ select FB_CFB_COPYAREA
+ select FB_CFB_IMAGEBLIT
+ help
+ Guest fbdev driver for the virtual display sample driver.
+
+config SAMPLE_VFIO_MDEV_MBOCHS
+ tristate "Build VFIO mdpy example mediated device sample code -- loadable modules only"
+ depends on VFIO_MDEV_DEVICE && m
+ select DMA_SHARED_BUFFER
+ help
+ Build a virtual display sample driver for use as a VFIO
+ mediated device. It supports the region display interface
+ (VFIO_GFX_PLANE_TYPE_DMABUF).
+ Emulate enough of qemu stdvga to make bochs-drm.ko happy.
+ That is basically the vram memory bar and the bochs dispi
+ interface vbe registers in the mmio register bar.
+ Specifically it does *not* include any legacy vga stuff.
+ Device looks a lot like "qemu -device secondary-vga".
+
config SAMPLE_STATX
bool "Build example extended-stat using code"
depends on BROKEN
diff --git a/samples/vfio-mdev/Makefile b/samples/vfio-mdev/Makefile
index cbbd868a50a8..7db889ca135c 100644
--- a/samples/vfio-mdev/Makefile
+++ b/samples/vfio-mdev/Makefile
@@ -1 +1,4 @@
obj-$(CONFIG_SAMPLE_VFIO_MDEV_MTTY) += mtty.o
+obj-$(CONFIG_SAMPLE_VFIO_MDEV_MDPY) += mdpy.o
+obj-$(CONFIG_SAMPLE_VFIO_MDEV_MDPY_FB) += mdpy-fb.o
+obj-$(CONFIG_SAMPLE_VFIO_MDEV_MBOCHS) += mbochs.o
diff --git a/samples/vfio-mdev/mbochs.c b/samples/vfio-mdev/mbochs.c
new file mode 100644
index 000000000000..2960e26c6ea4
--- /dev/null
+++ b/samples/vfio-mdev/mbochs.c
@@ -0,0 +1,1406 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Mediated virtual PCI display host device driver
+ *
+ * Emulate enough of qemu stdvga to make bochs-drm.ko happy. That is
+ * basically the vram memory bar and the bochs dispi interface vbe
+ * registers in the mmio register bar. Specifically it does *not*
+ * include any legacy vga stuff. Device looks a lot like "qemu -device
+ * secondary-vga".
+ *
+ * (c) Gerd Hoffmann <kraxel@redhat.com>
+ *
+ * based on mtty driver which is:
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ * Author: Neo Jia <cjia@nvidia.com>
+ * Kirti Wankhede <kwankhede@nvidia.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/cdev.h>
+#include <linux/vfio.h>
+#include <linux/iommu.h>
+#include <linux/sysfs.h>
+#include <linux/mdev.h>
+#include <linux/pci.h>
+#include <linux/dma-buf.h>
+#include <linux/highmem.h>
+#include <drm/drm_fourcc.h>
+#include <drm/drm_rect.h>
+#include <drm/drm_modeset_lock.h>
+#include <drm/drm_property.h>
+#include <drm/drm_plane.h>
+
+
+#define VBE_DISPI_INDEX_ID 0x0
+#define VBE_DISPI_INDEX_XRES 0x1
+#define VBE_DISPI_INDEX_YRES 0x2
+#define VBE_DISPI_INDEX_BPP 0x3
+#define VBE_DISPI_INDEX_ENABLE 0x4
+#define VBE_DISPI_INDEX_BANK 0x5
+#define VBE_DISPI_INDEX_VIRT_WIDTH 0x6
+#define VBE_DISPI_INDEX_VIRT_HEIGHT 0x7
+#define VBE_DISPI_INDEX_X_OFFSET 0x8
+#define VBE_DISPI_INDEX_Y_OFFSET 0x9
+#define VBE_DISPI_INDEX_VIDEO_MEMORY_64K 0xa
+#define VBE_DISPI_INDEX_COUNT 0xb
+
+#define VBE_DISPI_ID0 0xB0C0
+#define VBE_DISPI_ID1 0xB0C1
+#define VBE_DISPI_ID2 0xB0C2
+#define VBE_DISPI_ID3 0xB0C3
+#define VBE_DISPI_ID4 0xB0C4
+#define VBE_DISPI_ID5 0xB0C5
+
+#define VBE_DISPI_DISABLED 0x00
+#define VBE_DISPI_ENABLED 0x01
+#define VBE_DISPI_GETCAPS 0x02
+#define VBE_DISPI_8BIT_DAC 0x20
+#define VBE_DISPI_LFB_ENABLED 0x40
+#define VBE_DISPI_NOCLEARMEM 0x80
+
+
+#define MBOCHS_NAME "mbochs"
+#define MBOCHS_CLASS_NAME "mbochs"
+
+#define MBOCHS_CONFIG_SPACE_SIZE 0xff
+#define MBOCHS_MMIO_BAR_OFFSET PAGE_SIZE
+#define MBOCHS_MMIO_BAR_SIZE PAGE_SIZE
+#define MBOCHS_MEMORY_BAR_OFFSET (MBOCHS_MMIO_BAR_OFFSET + \
+ MBOCHS_MMIO_BAR_SIZE)
+
+#define STORE_LE16(addr, val) (*(u16 *)addr = val)
+#define STORE_LE32(addr, val) (*(u32 *)addr = val)
+
+
+MODULE_LICENSE("GPL v2");
+
+static int max_mbytes = 256;
+module_param_named(count, max_mbytes, int, 0444);
+MODULE_PARM_DESC(mem, "megabytes available to " MBOCHS_NAME " devices");
+
+
+#define MBOCHS_TYPE_1 "small"
+#define MBOCHS_TYPE_2 "medium"
+#define MBOCHS_TYPE_3 "large"
+
+static const struct mbochs_type {
+ const char *name;
+ u32 mbytes;
+} mbochs_types[] = {
+ {
+ .name = MBOCHS_CLASS_NAME "-" MBOCHS_TYPE_1,
+ .mbytes = 4,
+ }, {
+ .name = MBOCHS_CLASS_NAME "-" MBOCHS_TYPE_2,
+ .mbytes = 16,
+ }, {
+ .name = MBOCHS_CLASS_NAME "-" MBOCHS_TYPE_3,
+ .mbytes = 64,
+ },
+};
+
+
+static dev_t mbochs_devt;
+static struct class *mbochs_class;
+static struct cdev mbochs_cdev;
+static struct device mbochs_dev;
+static int mbochs_used_mbytes;
+
+struct mbochs_mode {
+ u32 drm_format;
+ u32 bytepp;
+ u32 width;
+ u32 height;
+ u32 stride;
+ u32 __pad;
+ u64 offset;
+ u64 size;
+};
+
+struct mbochs_dmabuf {
+ struct mbochs_mode mode;
+ u32 id;
+ struct page **pages;
+ pgoff_t pagecount;
+ struct dma_buf *buf;
+ struct mdev_state *mdev_state;
+ struct list_head next;
+ bool unlinked;
+};
+
+/* State of each mdev device */
+struct mdev_state {
+ u8 *vconfig;
+ u64 bar_mask[3];
+ u32 memory_bar_mask;
+ struct mutex ops_lock;
+ struct mdev_device *mdev;
+ struct vfio_device_info dev_info;
+
+ const struct mbochs_type *type;
+ u16 vbe[VBE_DISPI_INDEX_COUNT];
+ u64 memsize;
+ struct page **pages;
+ pgoff_t pagecount;
+
+ struct list_head dmabufs;
+ u32 active_id;
+ u32 next_id;
+};
+
+static const char *vbe_name_list[VBE_DISPI_INDEX_COUNT] = {
+ [VBE_DISPI_INDEX_ID] = "id",
+ [VBE_DISPI_INDEX_XRES] = "xres",
+ [VBE_DISPI_INDEX_YRES] = "yres",
+ [VBE_DISPI_INDEX_BPP] = "bpp",
+ [VBE_DISPI_INDEX_ENABLE] = "enable",
+ [VBE_DISPI_INDEX_BANK] = "bank",
+ [VBE_DISPI_INDEX_VIRT_WIDTH] = "virt-width",
+ [VBE_DISPI_INDEX_VIRT_HEIGHT] = "virt-height",
+ [VBE_DISPI_INDEX_X_OFFSET] = "x-offset",
+ [VBE_DISPI_INDEX_Y_OFFSET] = "y-offset",
+ [VBE_DISPI_INDEX_VIDEO_MEMORY_64K] = "video-mem",
+};
+
+static const char *vbe_name(u32 index)
+{
+ if (index < ARRAY_SIZE(vbe_name_list))
+ return vbe_name_list[index];
+ return "(invalid)";
+}
+
+static struct page *mbochs_get_page(struct mdev_state *mdev_state,
+ pgoff_t pgoff);
+
+static const struct mbochs_type *mbochs_find_type(struct kobject *kobj)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(mbochs_types); i++)
+ if (strcmp(mbochs_types[i].name, kobj->name) == 0)
+ return mbochs_types + i;
+ return NULL;
+}
+
+static void mbochs_create_config_space(struct mdev_state *mdev_state)
+{
+ STORE_LE16((u16 *) &mdev_state->vconfig[PCI_VENDOR_ID],
+ 0x1234);
+ STORE_LE16((u16 *) &mdev_state->vconfig[PCI_DEVICE_ID],
+ 0x1111);
+ STORE_LE16((u16 *) &mdev_state->vconfig[PCI_SUBSYSTEM_VENDOR_ID],
+ PCI_SUBVENDOR_ID_REDHAT_QUMRANET);
+ STORE_LE16((u16 *) &mdev_state->vconfig[PCI_SUBSYSTEM_ID],
+ PCI_SUBDEVICE_ID_QEMU);
+
+ STORE_LE16((u16 *) &mdev_state->vconfig[PCI_COMMAND],
+ PCI_COMMAND_IO | PCI_COMMAND_MEMORY);
+ STORE_LE16((u16 *) &mdev_state->vconfig[PCI_CLASS_DEVICE],
+ PCI_CLASS_DISPLAY_OTHER);
+ mdev_state->vconfig[PCI_CLASS_REVISION] = 0x01;
+
+ STORE_LE32((u32 *) &mdev_state->vconfig[PCI_BASE_ADDRESS_0],
+ PCI_BASE_ADDRESS_SPACE_MEMORY |
+ PCI_BASE_ADDRESS_MEM_TYPE_32 |
+ PCI_BASE_ADDRESS_MEM_PREFETCH);
+ mdev_state->bar_mask[0] = ~(mdev_state->memsize) + 1;
+
+ STORE_LE32((u32 *) &mdev_state->vconfig[PCI_BASE_ADDRESS_2],
+ PCI_BASE_ADDRESS_SPACE_MEMORY |
+ PCI_BASE_ADDRESS_MEM_TYPE_32);
+ mdev_state->bar_mask[2] = ~(MBOCHS_MMIO_BAR_SIZE) + 1;
+}
+
+static int mbochs_check_framebuffer(struct mdev_state *mdev_state,
+ struct mbochs_mode *mode)
+{
+ struct device *dev = mdev_dev(mdev_state->mdev);
+ u16 *vbe = mdev_state->vbe;
+ u32 virt_width;
+
+ WARN_ON(!mutex_is_locked(&mdev_state->ops_lock));
+
+ if (!(vbe[VBE_DISPI_INDEX_ENABLE] & VBE_DISPI_ENABLED))
+ goto nofb;
+
+ memset(mode, 0, sizeof(*mode));
+ switch (vbe[VBE_DISPI_INDEX_BPP]) {
+ case 32:
+ mode->drm_format = DRM_FORMAT_XRGB8888;
+ mode->bytepp = 4;
+ break;
+ default:
+ dev_info_ratelimited(dev, "%s: bpp %d not supported\n",
+ __func__, vbe[VBE_DISPI_INDEX_BPP]);
+ goto nofb;
+ }
+
+ mode->width = vbe[VBE_DISPI_INDEX_XRES];
+ mode->height = vbe[VBE_DISPI_INDEX_YRES];
+ virt_width = vbe[VBE_DISPI_INDEX_VIRT_WIDTH];
+ if (virt_width < mode->width)
+ virt_width = mode->width;
+ mode->stride = virt_width * mode->bytepp;
+ mode->size = (u64)mode->stride * mode->height;
+ mode->offset = ((u64)vbe[VBE_DISPI_INDEX_X_OFFSET] * mode->bytepp +
+ (u64)vbe[VBE_DISPI_INDEX_Y_OFFSET] * mode->stride);
+
+ if (mode->width < 64 || mode->height < 64) {
+ dev_info_ratelimited(dev, "%s: invalid resolution %dx%d\n",
+ __func__, mode->width, mode->height);
+ goto nofb;
+ }
+ if (mode->offset + mode->size > mdev_state->memsize) {
+ dev_info_ratelimited(dev, "%s: framebuffer memory overflow\n",
+ __func__);
+ goto nofb;
+ }
+
+ return 0;
+
+nofb:
+ memset(mode, 0, sizeof(*mode));
+ return -EINVAL;
+}
+
+static bool mbochs_modes_equal(struct mbochs_mode *mode1,
+ struct mbochs_mode *mode2)
+{
+ return memcmp(mode1, mode2, sizeof(struct mbochs_mode)) == 0;
+}
+
+static void handle_pci_cfg_write(struct mdev_state *mdev_state, u16 offset,
+ char *buf, u32 count)
+{
+ struct device *dev = mdev_dev(mdev_state->mdev);
+ int index = (offset - PCI_BASE_ADDRESS_0) / 0x04;
+ u32 cfg_addr;
+
+ switch (offset) {
+ case PCI_BASE_ADDRESS_0:
+ case PCI_BASE_ADDRESS_2:
+ cfg_addr = *(u32 *)buf;
+
+ if (cfg_addr == 0xffffffff) {
+ cfg_addr = (cfg_addr & mdev_state->bar_mask[index]);
+ } else {
+ cfg_addr &= PCI_BASE_ADDRESS_MEM_MASK;
+ if (cfg_addr)
+ dev_info(dev, "BAR #%d @ 0x%x\n",
+ index, cfg_addr);
+ }
+
+ cfg_addr |= (mdev_state->vconfig[offset] &
+ ~PCI_BASE_ADDRESS_MEM_MASK);
+ STORE_LE32(&mdev_state->vconfig[offset], cfg_addr);
+ break;
+ }
+}
+
+static void handle_mmio_write(struct mdev_state *mdev_state, u16 offset,
+ char *buf, u32 count)
+{
+ struct device *dev = mdev_dev(mdev_state->mdev);
+ int index;
+ u16 reg16;
+
+ switch (offset) {
+ case 0x400 ... 0x41f: /* vga ioports remapped */
+ goto unhandled;
+ case 0x500 ... 0x515: /* bochs dispi interface */
+ if (count != 2)
+ goto unhandled;
+ index = (offset - 0x500) / 2;
+ reg16 = *(u16 *)buf;
+ if (index < ARRAY_SIZE(mdev_state->vbe))
+ mdev_state->vbe[index] = reg16;
+ dev_dbg(dev, "%s: vbe write %d = %d (%s)\n",
+ __func__, index, reg16, vbe_name(index));
+ break;
+ case 0x600 ... 0x607: /* qemu extended regs */
+ goto unhandled;
+ default:
+unhandled:
+ dev_dbg(dev, "%s: @0x%03x, count %d (unhandled)\n",
+ __func__, offset, count);
+ break;
+ }
+}
+
+static void handle_mmio_read(struct mdev_state *mdev_state, u16 offset,
+ char *buf, u32 count)
+{
+ struct device *dev = mdev_dev(mdev_state->mdev);
+ u16 reg16 = 0;
+ int index;
+
+ switch (offset) {
+ case 0x500 ... 0x515: /* bochs dispi interface */
+ if (count != 2)
+ goto unhandled;
+ index = (offset - 0x500) / 2;
+ if (index < ARRAY_SIZE(mdev_state->vbe))
+ reg16 = mdev_state->vbe[index];
+ dev_dbg(dev, "%s: vbe read %d = %d (%s)\n",
+ __func__, index, reg16, vbe_name(index));
+ *(u16 *)buf = reg16;
+ break;
+ default:
+unhandled:
+ dev_dbg(dev, "%s: @0x%03x, count %d (unhandled)\n",
+ __func__, offset, count);
+ memset(buf, 0, count);
+ break;
+ }
+}
+
+static ssize_t mdev_access(struct mdev_device *mdev, char *buf, size_t count,
+ loff_t pos, bool is_write)
+{
+ struct mdev_state *mdev_state = mdev_get_drvdata(mdev);
+ struct device *dev = mdev_dev(mdev);
+ struct page *pg;
+ loff_t poff;
+ char *map;
+ int ret = 0;
+
+ mutex_lock(&mdev_state->ops_lock);
+
+ if (pos < MBOCHS_CONFIG_SPACE_SIZE) {
+ if (is_write)
+ handle_pci_cfg_write(mdev_state, pos, buf, count);
+ else
+ memcpy(buf, (mdev_state->vconfig + pos), count);
+
+ } else if (pos >= MBOCHS_MMIO_BAR_OFFSET &&
+ pos + count <= MBOCHS_MEMORY_BAR_OFFSET) {
+ pos -= MBOCHS_MMIO_BAR_OFFSET;
+ if (is_write)
+ handle_mmio_write(mdev_state, pos, buf, count);
+ else
+ handle_mmio_read(mdev_state, pos, buf, count);
+
+ } else if (pos >= MBOCHS_MEMORY_BAR_OFFSET &&
+ pos + count <=
+ MBOCHS_MEMORY_BAR_OFFSET + mdev_state->memsize) {
+ pos -= MBOCHS_MMIO_BAR_OFFSET;
+ poff = pos & ~PAGE_MASK;
+ pg = mbochs_get_page(mdev_state, pos >> PAGE_SHIFT);
+ map = kmap(pg);
+ if (is_write)
+ memcpy(map + poff, buf, count);
+ else
+ memcpy(buf, map + poff, count);
+ kunmap(pg);
+ put_page(pg);
+
+ } else {
+ dev_dbg(dev, "%s: %s @0x%llx (unhandled)\n",
+ __func__, is_write ? "WR" : "RD", pos);
+ ret = -1;
+ goto accessfailed;
+ }
+
+ ret = count;
+
+
+accessfailed:
+ mutex_unlock(&mdev_state->ops_lock);
+
+ return ret;
+}
+
+static int mbochs_reset(struct mdev_device *mdev)
+{
+ struct mdev_state *mdev_state = mdev_get_drvdata(mdev);
+ u32 size64k = mdev_state->memsize / (64 * 1024);
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(mdev_state->vbe); i++)
+ mdev_state->vbe[i] = 0;
+ mdev_state->vbe[VBE_DISPI_INDEX_ID] = VBE_DISPI_ID5;
+ mdev_state->vbe[VBE_DISPI_INDEX_VIDEO_MEMORY_64K] = size64k;
+ return 0;
+}
+
+static int mbochs_create(struct kobject *kobj, struct mdev_device *mdev)
+{
+ const struct mbochs_type *type = mbochs_find_type(kobj);
+ struct device *dev = mdev_dev(mdev);
+ struct mdev_state *mdev_state;
+
+ if (!type)
+ type = &mbochs_types[0];
+ if (type->mbytes + mbochs_used_mbytes > max_mbytes)
+ return -ENOMEM;
+
+ mdev_state = kzalloc(sizeof(struct mdev_state), GFP_KERNEL);
+ if (mdev_state == NULL)
+ return -ENOMEM;
+
+ mdev_state->vconfig = kzalloc(MBOCHS_CONFIG_SPACE_SIZE, GFP_KERNEL);
+ if (mdev_state->vconfig == NULL)
+ goto err_mem;
+
+ mdev_state->memsize = type->mbytes * 1024 * 1024;
+ mdev_state->pagecount = mdev_state->memsize >> PAGE_SHIFT;
+ mdev_state->pages = kcalloc(mdev_state->pagecount,
+ sizeof(struct page *),
+ GFP_KERNEL);
+ if (!mdev_state->pages)
+ goto err_mem;
+
+ dev_info(dev, "%s: %s, %d MB, %ld pages\n", __func__,
+ kobj->name, type->mbytes, mdev_state->pagecount);
+
+ mutex_init(&mdev_state->ops_lock);
+ mdev_state->mdev = mdev;
+ mdev_set_drvdata(mdev, mdev_state);
+ INIT_LIST_HEAD(&mdev_state->dmabufs);
+ mdev_state->next_id = 1;
+
+ mdev_state->type = type;
+ mbochs_create_config_space(mdev_state);
+ mbochs_reset(mdev);
+
+ mbochs_used_mbytes += type->mbytes;
+ return 0;
+
+err_mem:
+ kfree(mdev_state->vconfig);
+ kfree(mdev_state);
+ return -ENOMEM;
+}
+
+static int mbochs_remove(struct mdev_device *mdev)
+{
+ struct mdev_state *mdev_state = mdev_get_drvdata(mdev);
+
+ mbochs_used_mbytes -= mdev_state->type->mbytes;
+ mdev_set_drvdata(mdev, NULL);
+ kfree(mdev_state->pages);
+ kfree(mdev_state->vconfig);
+ kfree(mdev_state);
+ return 0;
+}
+
+static ssize_t mbochs_read(struct mdev_device *mdev, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ unsigned int done = 0;
+ int ret;
+
+ while (count) {
+ size_t filled;
+
+ if (count >= 4 && !(*ppos % 4)) {
+ u32 val;
+
+ ret = mdev_access(mdev, (char *)&val, sizeof(val),
+ *ppos, false);
+ if (ret <= 0)
+ goto read_err;
+
+ if (copy_to_user(buf, &val, sizeof(val)))
+ goto read_err;
+
+ filled = 4;
+ } else if (count >= 2 && !(*ppos % 2)) {
+ u16 val;
+
+ ret = mdev_access(mdev, (char *)&val, sizeof(val),
+ *ppos, false);
+ if (ret <= 0)
+ goto read_err;
+
+ if (copy_to_user(buf, &val, sizeof(val)))
+ goto read_err;
+
+ filled = 2;
+ } else {
+ u8 val;
+
+ ret = mdev_access(mdev, (char *)&val, sizeof(val),
+ *ppos, false);
+ if (ret <= 0)
+ goto read_err;
+
+ if (copy_to_user(buf, &val, sizeof(val)))
+ goto read_err;
+
+ filled = 1;
+ }
+
+ count -= filled;
+ done += filled;
+ *ppos += filled;
+ buf += filled;
+ }
+
+ return done;
+
+read_err:
+ return -EFAULT;
+}
+
+static ssize_t mbochs_write(struct mdev_device *mdev, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ unsigned int done = 0;
+ int ret;
+
+ while (count) {
+ size_t filled;
+
+ if (count >= 4 && !(*ppos % 4)) {
+ u32 val;
+
+ if (copy_from_user(&val, buf, sizeof(val)))
+ goto write_err;
+
+ ret = mdev_access(mdev, (char *)&val, sizeof(val),
+ *ppos, true);
+ if (ret <= 0)
+ goto write_err;
+
+ filled = 4;
+ } else if (count >= 2 && !(*ppos % 2)) {
+ u16 val;
+
+ if (copy_from_user(&val, buf, sizeof(val)))
+ goto write_err;
+
+ ret = mdev_access(mdev, (char *)&val, sizeof(val),
+ *ppos, true);
+ if (ret <= 0)
+ goto write_err;
+
+ filled = 2;
+ } else {
+ u8 val;
+
+ if (copy_from_user(&val, buf, sizeof(val)))
+ goto write_err;
+
+ ret = mdev_access(mdev, (char *)&val, sizeof(val),
+ *ppos, true);
+ if (ret <= 0)
+ goto write_err;
+
+ filled = 1;
+ }
+ count -= filled;
+ done += filled;
+ *ppos += filled;
+ buf += filled;
+ }
+
+ return done;
+write_err:
+ return -EFAULT;
+}
+
+static struct page *__mbochs_get_page(struct mdev_state *mdev_state,
+ pgoff_t pgoff)
+{
+ WARN_ON(!mutex_is_locked(&mdev_state->ops_lock));
+
+ if (!mdev_state->pages[pgoff]) {
+ mdev_state->pages[pgoff] =
+ alloc_pages(GFP_HIGHUSER | __GFP_ZERO, 0);
+ if (!mdev_state->pages[pgoff])
+ return NULL;
+ }
+
+ get_page(mdev_state->pages[pgoff]);
+ return mdev_state->pages[pgoff];
+}
+
+static struct page *mbochs_get_page(struct mdev_state *mdev_state,
+ pgoff_t pgoff)
+{
+ struct page *page;
+
+ if (WARN_ON(pgoff >= mdev_state->pagecount))
+ return NULL;
+
+ mutex_lock(&mdev_state->ops_lock);
+ page = __mbochs_get_page(mdev_state, pgoff);
+ mutex_unlock(&mdev_state->ops_lock);
+
+ return page;
+}
+
+static void mbochs_put_pages(struct mdev_state *mdev_state)
+{
+ struct device *dev = mdev_dev(mdev_state->mdev);
+ int i, count = 0;
+
+ WARN_ON(!mutex_is_locked(&mdev_state->ops_lock));
+
+ for (i = 0; i < mdev_state->pagecount; i++) {
+ if (!mdev_state->pages[i])
+ continue;
+ put_page(mdev_state->pages[i]);
+ mdev_state->pages[i] = NULL;
+ count++;
+ }
+ dev_dbg(dev, "%s: %d pages released\n", __func__, count);
+}
+
+static int mbochs_region_vm_fault(struct vm_fault *vmf)
+{
+ struct vm_area_struct *vma = vmf->vma;
+ struct mdev_state *mdev_state = vma->vm_private_data;
+ pgoff_t page_offset = (vmf->address - vma->vm_start) >> PAGE_SHIFT;
+
+ if (page_offset >= mdev_state->pagecount)
+ return VM_FAULT_SIGBUS;
+
+ vmf->page = mbochs_get_page(mdev_state, page_offset);
+ if (!vmf->page)
+ return VM_FAULT_SIGBUS;
+
+ return 0;
+}
+
+static const struct vm_operations_struct mbochs_region_vm_ops = {
+ .fault = mbochs_region_vm_fault,
+};
+
+static int mbochs_mmap(struct mdev_device *mdev, struct vm_area_struct *vma)
+{
+ struct mdev_state *mdev_state = mdev_get_drvdata(mdev);
+
+ if (vma->vm_pgoff != MBOCHS_MEMORY_BAR_OFFSET >> PAGE_SHIFT)
+ return -EINVAL;
+ if (vma->vm_end < vma->vm_start)
+ return -EINVAL;
+ if (vma->vm_end - vma->vm_start > mdev_state->memsize)
+ return -EINVAL;
+ if ((vma->vm_flags & VM_SHARED) == 0)
+ return -EINVAL;
+
+ vma->vm_ops = &mbochs_region_vm_ops;
+ vma->vm_private_data = mdev_state;
+ return 0;
+}
+
+static int mbochs_dmabuf_vm_fault(struct vm_fault *vmf)
+{
+ struct vm_area_struct *vma = vmf->vma;
+ struct mbochs_dmabuf *dmabuf = vma->vm_private_data;
+
+ if (WARN_ON(vmf->pgoff >= dmabuf->pagecount))
+ return VM_FAULT_SIGBUS;
+
+ vmf->page = dmabuf->pages[vmf->pgoff];
+ get_page(vmf->page);
+ return 0;
+}
+
+static const struct vm_operations_struct mbochs_dmabuf_vm_ops = {
+ .fault = mbochs_dmabuf_vm_fault,
+};
+
+static int mbochs_mmap_dmabuf(struct dma_buf *buf, struct vm_area_struct *vma)
+{
+ struct mbochs_dmabuf *dmabuf = buf->priv;
+ struct device *dev = mdev_dev(dmabuf->mdev_state->mdev);
+
+ dev_dbg(dev, "%s: %d\n", __func__, dmabuf->id);
+
+ if ((vma->vm_flags & VM_SHARED) == 0)
+ return -EINVAL;
+
+ vma->vm_ops = &mbochs_dmabuf_vm_ops;
+ vma->vm_private_data = dmabuf;
+ return 0;
+}
+
+static void mbochs_print_dmabuf(struct mbochs_dmabuf *dmabuf,
+ const char *prefix)
+{
+ struct device *dev = mdev_dev(dmabuf->mdev_state->mdev);
+ u32 fourcc = dmabuf->mode.drm_format;
+
+ dev_dbg(dev, "%s/%d: %c%c%c%c, %dx%d, stride %d, off 0x%llx, size 0x%llx, pages %ld\n",
+ prefix, dmabuf->id,
+ fourcc ? ((fourcc >> 0) & 0xff) : '-',
+ fourcc ? ((fourcc >> 8) & 0xff) : '-',
+ fourcc ? ((fourcc >> 16) & 0xff) : '-',
+ fourcc ? ((fourcc >> 24) & 0xff) : '-',
+ dmabuf->mode.width, dmabuf->mode.height, dmabuf->mode.stride,
+ dmabuf->mode.offset, dmabuf->mode.size, dmabuf->pagecount);
+}
+
+static struct sg_table *mbochs_map_dmabuf(struct dma_buf_attachment *at,
+ enum dma_data_direction direction)
+{
+ struct mbochs_dmabuf *dmabuf = at->dmabuf->priv;
+ struct device *dev = mdev_dev(dmabuf->mdev_state->mdev);
+ struct sg_table *sg;
+
+ dev_dbg(dev, "%s: %d\n", __func__, dmabuf->id);
+
+ sg = kzalloc(sizeof(*sg), GFP_KERNEL);
+ if (!sg)
+ goto err1;
+ if (sg_alloc_table_from_pages(sg, dmabuf->pages, dmabuf->pagecount,
+ 0, dmabuf->mode.size, GFP_KERNEL) < 0)
+ goto err2;
+ if (!dma_map_sg(at->dev, sg->sgl, sg->nents, direction))
+ goto err3;
+
+ return sg;
+
+err3:
+ sg_free_table(sg);
+err2:
+ kfree(sg);
+err1:
+ return ERR_PTR(-ENOMEM);
+}
+
+static void mbochs_unmap_dmabuf(struct dma_buf_attachment *at,
+ struct sg_table *sg,
+ enum dma_data_direction direction)
+{
+ struct mbochs_dmabuf *dmabuf = at->dmabuf->priv;
+ struct device *dev = mdev_dev(dmabuf->mdev_state->mdev);
+
+ dev_dbg(dev, "%s: %d\n", __func__, dmabuf->id);
+
+ sg_free_table(sg);
+ kfree(sg);
+}
+
+static void mbochs_release_dmabuf(struct dma_buf *buf)
+{
+ struct mbochs_dmabuf *dmabuf = buf->priv;
+ struct mdev_state *mdev_state = dmabuf->mdev_state;
+ struct device *dev = mdev_dev(mdev_state->mdev);
+ pgoff_t pg;
+
+ dev_dbg(dev, "%s: %d\n", __func__, dmabuf->id);
+
+ for (pg = 0; pg < dmabuf->pagecount; pg++)
+ put_page(dmabuf->pages[pg]);
+
+ mutex_lock(&mdev_state->ops_lock);
+ dmabuf->buf = NULL;
+ if (dmabuf->unlinked)
+ kfree(dmabuf);
+ mutex_unlock(&mdev_state->ops_lock);
+}
+
+static void *mbochs_kmap_atomic_dmabuf(struct dma_buf *buf,
+ unsigned long page_num)
+{
+ struct mbochs_dmabuf *dmabuf = buf->priv;
+ struct page *page = dmabuf->pages[page_num];
+
+ return kmap_atomic(page);
+}
+
+static void *mbochs_kmap_dmabuf(struct dma_buf *buf, unsigned long page_num)
+{
+ struct mbochs_dmabuf *dmabuf = buf->priv;
+ struct page *page = dmabuf->pages[page_num];
+
+ return kmap(page);
+}
+
+static struct dma_buf_ops mbochs_dmabuf_ops = {
+ .map_dma_buf = mbochs_map_dmabuf,
+ .unmap_dma_buf = mbochs_unmap_dmabuf,
+ .release = mbochs_release_dmabuf,
+ .map_atomic = mbochs_kmap_atomic_dmabuf,
+ .map = mbochs_kmap_dmabuf,
+ .mmap = mbochs_mmap_dmabuf,
+};
+
+static struct mbochs_dmabuf *mbochs_dmabuf_alloc(struct mdev_state *mdev_state,
+ struct mbochs_mode *mode)
+{
+ struct mbochs_dmabuf *dmabuf;
+ pgoff_t page_offset, pg;
+
+ WARN_ON(!mutex_is_locked(&mdev_state->ops_lock));
+
+ dmabuf = kzalloc(sizeof(struct mbochs_dmabuf), GFP_KERNEL);
+ if (!dmabuf)
+ return NULL;
+
+ dmabuf->mode = *mode;
+ dmabuf->id = mdev_state->next_id++;
+ dmabuf->pagecount = DIV_ROUND_UP(mode->size, PAGE_SIZE);
+ dmabuf->pages = kcalloc(dmabuf->pagecount, sizeof(struct page *),
+ GFP_KERNEL);
+ if (!dmabuf->pages)
+ goto err_free_dmabuf;
+
+ page_offset = dmabuf->mode.offset >> PAGE_SHIFT;
+ for (pg = 0; pg < dmabuf->pagecount; pg++) {
+ dmabuf->pages[pg] = __mbochs_get_page(mdev_state,
+ page_offset + pg);
+ if (!dmabuf->pages[pg])
+ goto err_free_pages;
+ }
+
+ dmabuf->mdev_state = mdev_state;
+ list_add(&dmabuf->next, &mdev_state->dmabufs);
+
+ mbochs_print_dmabuf(dmabuf, __func__);
+ return dmabuf;
+
+err_free_pages:
+ while (pg > 0)
+ put_page(dmabuf->pages[--pg]);
+ kfree(dmabuf->pages);
+err_free_dmabuf:
+ kfree(dmabuf);
+ return NULL;
+}
+
+static struct mbochs_dmabuf *
+mbochs_dmabuf_find_by_mode(struct mdev_state *mdev_state,
+ struct mbochs_mode *mode)
+{
+ struct mbochs_dmabuf *dmabuf;
+
+ WARN_ON(!mutex_is_locked(&mdev_state->ops_lock));
+
+ list_for_each_entry(dmabuf, &mdev_state->dmabufs, next)
+ if (mbochs_modes_equal(&dmabuf->mode, mode))
+ return dmabuf;
+
+ return NULL;
+}
+
+static struct mbochs_dmabuf *
+mbochs_dmabuf_find_by_id(struct mdev_state *mdev_state, u32 id)
+{
+ struct mbochs_dmabuf *dmabuf;
+
+ WARN_ON(!mutex_is_locked(&mdev_state->ops_lock));
+
+ list_for_each_entry(dmabuf, &mdev_state->dmabufs, next)
+ if (dmabuf->id == id)
+ return dmabuf;
+
+ return NULL;
+}
+
+static int mbochs_dmabuf_export(struct mbochs_dmabuf *dmabuf)
+{
+ struct mdev_state *mdev_state = dmabuf->mdev_state;
+ struct device *dev = mdev_dev(mdev_state->mdev);
+ DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
+ struct dma_buf *buf;
+
+ WARN_ON(!mutex_is_locked(&mdev_state->ops_lock));
+
+ if (!IS_ALIGNED(dmabuf->mode.offset, PAGE_SIZE)) {
+ dev_info_ratelimited(dev, "%s: framebuffer not page-aligned\n",
+ __func__);
+ return -EINVAL;
+ }
+
+ exp_info.ops = &mbochs_dmabuf_ops;
+ exp_info.size = dmabuf->mode.size;
+ exp_info.priv = dmabuf;
+
+ buf = dma_buf_export(&exp_info);
+ if (IS_ERR(buf)) {
+ dev_info_ratelimited(dev, "%s: dma_buf_export failed: %ld\n",
+ __func__, PTR_ERR(buf));
+ return PTR_ERR(buf);
+ }
+
+ dmabuf->buf = buf;
+ dev_dbg(dev, "%s: %d\n", __func__, dmabuf->id);
+ return 0;
+}
+
+static int mbochs_get_region_info(struct mdev_device *mdev,
+ struct vfio_region_info *region_info,
+ u16 *cap_type_id, void **cap_type)
+{
+ struct mdev_state *mdev_state;
+
+ mdev_state = mdev_get_drvdata(mdev);
+ if (!mdev_state)
+ return -EINVAL;
+
+ if (region_info->index >= VFIO_PCI_NUM_REGIONS)
+ return -EINVAL;
+
+ switch (region_info->index) {
+ case VFIO_PCI_CONFIG_REGION_INDEX:
+ region_info->offset = 0;
+ region_info->size = MBOCHS_CONFIG_SPACE_SIZE;
+ region_info->flags = (VFIO_REGION_INFO_FLAG_READ |
+ VFIO_REGION_INFO_FLAG_WRITE);
+ break;
+ case VFIO_PCI_BAR0_REGION_INDEX:
+ region_info->offset = MBOCHS_MEMORY_BAR_OFFSET;
+ region_info->size = mdev_state->memsize;
+ region_info->flags = (VFIO_REGION_INFO_FLAG_READ |
+ VFIO_REGION_INFO_FLAG_WRITE |
+ VFIO_REGION_INFO_FLAG_MMAP);
+ break;
+ case VFIO_PCI_BAR2_REGION_INDEX:
+ region_info->offset = MBOCHS_MMIO_BAR_OFFSET;
+ region_info->size = MBOCHS_MMIO_BAR_SIZE;
+ region_info->flags = (VFIO_REGION_INFO_FLAG_READ |
+ VFIO_REGION_INFO_FLAG_WRITE);
+ break;
+ default:
+ region_info->size = 0;
+ region_info->offset = 0;
+ region_info->flags = 0;
+ }
+
+ return 0;
+}
+
+static int mbochs_get_irq_info(struct mdev_device *mdev,
+ struct vfio_irq_info *irq_info)
+{
+ irq_info->count = 0;
+ return 0;
+}
+
+static int mbochs_get_device_info(struct mdev_device *mdev,
+ struct vfio_device_info *dev_info)
+{
+ dev_info->flags = VFIO_DEVICE_FLAGS_PCI;
+ dev_info->num_regions = VFIO_PCI_NUM_REGIONS;
+ dev_info->num_irqs = VFIO_PCI_NUM_IRQS;
+ return 0;
+}
+
+static int mbochs_query_gfx_plane(struct mdev_device *mdev,
+ struct vfio_device_gfx_plane_info *plane)
+{
+ struct mdev_state *mdev_state = mdev_get_drvdata(mdev);
+ struct device *dev = mdev_dev(mdev);
+ struct mbochs_dmabuf *dmabuf;
+ struct mbochs_mode mode;
+ int ret;
+
+ if (plane->flags & VFIO_GFX_PLANE_TYPE_PROBE) {
+ if (plane->flags == (VFIO_GFX_PLANE_TYPE_PROBE |
+ VFIO_GFX_PLANE_TYPE_DMABUF))
+ return 0;
+ return -EINVAL;
+ }
+
+ if (plane->flags != VFIO_GFX_PLANE_TYPE_DMABUF)
+ return -EINVAL;
+
+ plane->drm_format_mod = 0;
+ plane->x_pos = 0;
+ plane->y_pos = 0;
+ plane->x_hot = 0;
+ plane->y_hot = 0;
+
+ mutex_lock(&mdev_state->ops_lock);
+
+ ret = -EINVAL;
+ if (plane->drm_plane_type == DRM_PLANE_TYPE_PRIMARY)
+ ret = mbochs_check_framebuffer(mdev_state, &mode);
+ if (ret < 0) {
+ plane->drm_format = 0;
+ plane->width = 0;
+ plane->height = 0;
+ plane->stride = 0;
+ plane->size = 0;
+ plane->dmabuf_id = 0;
+ goto done;
+ }
+
+ dmabuf = mbochs_dmabuf_find_by_mode(mdev_state, &mode);
+ if (!dmabuf)
+ mbochs_dmabuf_alloc(mdev_state, &mode);
+ if (!dmabuf) {
+ mutex_unlock(&mdev_state->ops_lock);
+ return -ENOMEM;
+ }
+
+ plane->drm_format = dmabuf->mode.drm_format;
+ plane->width = dmabuf->mode.width;
+ plane->height = dmabuf->mode.height;
+ plane->stride = dmabuf->mode.stride;
+ plane->size = dmabuf->mode.size;
+ plane->dmabuf_id = dmabuf->id;
+
+done:
+ if (plane->drm_plane_type == DRM_PLANE_TYPE_PRIMARY &&
+ mdev_state->active_id != plane->dmabuf_id) {
+ dev_dbg(dev, "%s: primary: %d => %d\n", __func__,
+ mdev_state->active_id, plane->dmabuf_id);
+ mdev_state->active_id = plane->dmabuf_id;
+ }
+ mutex_unlock(&mdev_state->ops_lock);
+ return 0;
+}
+
+static int mbochs_get_gfx_dmabuf(struct mdev_device *mdev,
+ u32 id)
+{
+ struct mdev_state *mdev_state = mdev_get_drvdata(mdev);
+ struct mbochs_dmabuf *dmabuf;
+
+ mutex_lock(&mdev_state->ops_lock);
+
+ dmabuf = mbochs_dmabuf_find_by_id(mdev_state, id);
+ if (!dmabuf) {
+ mutex_unlock(&mdev_state->ops_lock);
+ return -ENOENT;
+ }
+
+ if (!dmabuf->buf)
+ mbochs_dmabuf_export(dmabuf);
+
+ mutex_unlock(&mdev_state->ops_lock);
+
+ if (!dmabuf->buf)
+ return -EINVAL;
+
+ return dma_buf_fd(dmabuf->buf, 0);
+}
+
+static long mbochs_ioctl(struct mdev_device *mdev, unsigned int cmd,
+ unsigned long arg)
+{
+ int ret = 0;
+ unsigned long minsz;
+ struct mdev_state *mdev_state;
+
+ mdev_state = mdev_get_drvdata(mdev);
+
+ switch (cmd) {
+ case VFIO_DEVICE_GET_INFO:
+ {
+ struct vfio_device_info info;
+
+ minsz = offsetofend(struct vfio_device_info, num_irqs);
+
+ if (copy_from_user(&info, (void __user *)arg, minsz))
+ return -EFAULT;
+
+ if (info.argsz < minsz)
+ return -EINVAL;
+
+ ret = mbochs_get_device_info(mdev, &info);
+ if (ret)
+ return ret;
+
+ memcpy(&mdev_state->dev_info, &info, sizeof(info));
+
+ if (copy_to_user((void __user *)arg, &info, minsz))
+ return -EFAULT;
+
+ return 0;
+ }
+ case VFIO_DEVICE_GET_REGION_INFO:
+ {
+ struct vfio_region_info info;
+ u16 cap_type_id = 0;
+ void *cap_type = NULL;
+
+ minsz = offsetofend(struct vfio_region_info, offset);
+
+ if (copy_from_user(&info, (void __user *)arg, minsz))
+ return -EFAULT;
+
+ if (info.argsz < minsz)
+ return -EINVAL;
+
+ ret = mbochs_get_region_info(mdev, &info, &cap_type_id,
+ &cap_type);
+ if (ret)
+ return ret;
+
+ if (copy_to_user((void __user *)arg, &info, minsz))
+ return -EFAULT;
+
+ return 0;
+ }
+
+ case VFIO_DEVICE_GET_IRQ_INFO:
+ {
+ struct vfio_irq_info info;
+
+ minsz = offsetofend(struct vfio_irq_info, count);
+
+ if (copy_from_user(&info, (void __user *)arg, minsz))
+ return -EFAULT;
+
+ if ((info.argsz < minsz) ||
+ (info.index >= mdev_state->dev_info.num_irqs))
+ return -EINVAL;
+
+ ret = mbochs_get_irq_info(mdev, &info);
+ if (ret)
+ return ret;
+
+ if (copy_to_user((void __user *)arg, &info, minsz))
+ return -EFAULT;
+
+ return 0;
+ }
+
+ case VFIO_DEVICE_QUERY_GFX_PLANE:
+ {
+ struct vfio_device_gfx_plane_info plane;
+
+ minsz = offsetofend(struct vfio_device_gfx_plane_info,
+ region_index);
+
+ if (copy_from_user(&plane, (void __user *)arg, minsz))
+ return -EFAULT;
+
+ if (plane.argsz < minsz)
+ return -EINVAL;
+
+ ret = mbochs_query_gfx_plane(mdev, &plane);
+ if (ret)
+ return ret;
+
+ if (copy_to_user((void __user *)arg, &plane, minsz))
+ return -EFAULT;
+
+ return 0;
+ }
+
+ case VFIO_DEVICE_GET_GFX_DMABUF:
+ {
+ u32 dmabuf_id;
+
+ if (get_user(dmabuf_id, (__u32 __user *)arg))
+ return -EFAULT;
+
+ return mbochs_get_gfx_dmabuf(mdev, dmabuf_id);
+ }
+
+ case VFIO_DEVICE_SET_IRQS:
+ return -EINVAL;
+
+ case VFIO_DEVICE_RESET:
+ return mbochs_reset(mdev);
+ }
+ return -ENOTTY;
+}
+
+static int mbochs_open(struct mdev_device *mdev)
+{
+ if (!try_module_get(THIS_MODULE))
+ return -ENODEV;
+
+ return 0;
+}
+
+static void mbochs_close(struct mdev_device *mdev)
+{
+ struct mdev_state *mdev_state = mdev_get_drvdata(mdev);
+ struct mbochs_dmabuf *dmabuf, *tmp;
+
+ mutex_lock(&mdev_state->ops_lock);
+
+ list_for_each_entry_safe(dmabuf, tmp, &mdev_state->dmabufs, next) {
+ list_del(&dmabuf->next);
+ if (dmabuf->buf) {
+ /* free in mbochs_release_dmabuf() */
+ dmabuf->unlinked = true;
+ } else {
+ kfree(dmabuf);
+ }
+ }
+ mbochs_put_pages(mdev_state);
+
+ mutex_unlock(&mdev_state->ops_lock);
+ module_put(THIS_MODULE);
+}
+
+static ssize_t
+memory_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct mdev_device *mdev = mdev_from_dev(dev);
+ struct mdev_state *mdev_state = mdev_get_drvdata(mdev);
+
+ return sprintf(buf, "%d MB\n", mdev_state->type->mbytes);
+}
+static DEVICE_ATTR_RO(memory);
+
+static struct attribute *mdev_dev_attrs[] = {
+ &dev_attr_memory.attr,
+ NULL,
+};
+
+static const struct attribute_group mdev_dev_group = {
+ .name = "vendor",
+ .attrs = mdev_dev_attrs,
+};
+
+const struct attribute_group *mdev_dev_groups[] = {
+ &mdev_dev_group,
+ NULL,
+};
+
+static ssize_t
+name_show(struct kobject *kobj, struct device *dev, char *buf)
+{
+ return sprintf(buf, "%s\n", kobj->name);
+}
+MDEV_TYPE_ATTR_RO(name);
+
+static ssize_t
+description_show(struct kobject *kobj, struct device *dev, char *buf)
+{
+ const struct mbochs_type *type = mbochs_find_type(kobj);
+
+ return sprintf(buf, "virtual display, %d MB video memory\n",
+ type ? type->mbytes : 0);
+}
+MDEV_TYPE_ATTR_RO(description);
+
+static ssize_t
+available_instances_show(struct kobject *kobj, struct device *dev, char *buf)
+{
+ const struct mbochs_type *type = mbochs_find_type(kobj);
+ int count = (max_mbytes - mbochs_used_mbytes) / type->mbytes;
+
+ return sprintf(buf, "%d\n", count);
+}
+MDEV_TYPE_ATTR_RO(available_instances);
+
+static ssize_t device_api_show(struct kobject *kobj, struct device *dev,
+ char *buf)
+{
+ return sprintf(buf, "%s\n", VFIO_DEVICE_API_PCI_STRING);
+}
+MDEV_TYPE_ATTR_RO(device_api);
+
+static struct attribute *mdev_types_attrs[] = {
+ &mdev_type_attr_name.attr,
+ &mdev_type_attr_description.attr,
+ &mdev_type_attr_device_api.attr,
+ &mdev_type_attr_available_instances.attr,
+ NULL,
+};
+
+static struct attribute_group mdev_type_group1 = {
+ .name = MBOCHS_TYPE_1,
+ .attrs = mdev_types_attrs,
+};
+
+static struct attribute_group mdev_type_group2 = {
+ .name = MBOCHS_TYPE_2,
+ .attrs = mdev_types_attrs,
+};
+
+static struct attribute_group mdev_type_group3 = {
+ .name = MBOCHS_TYPE_3,
+ .attrs = mdev_types_attrs,
+};
+
+static struct attribute_group *mdev_type_groups[] = {
+ &mdev_type_group1,
+ &mdev_type_group2,
+ &mdev_type_group3,
+ NULL,
+};
+
+static const struct mdev_parent_ops mdev_fops = {
+ .owner = THIS_MODULE,
+ .mdev_attr_groups = mdev_dev_groups,
+ .supported_type_groups = mdev_type_groups,
+ .create = mbochs_create,
+ .remove = mbochs_remove,
+ .open = mbochs_open,
+ .release = mbochs_close,
+ .read = mbochs_read,
+ .write = mbochs_write,
+ .ioctl = mbochs_ioctl,
+ .mmap = mbochs_mmap,
+};
+
+static const struct file_operations vd_fops = {
+ .owner = THIS_MODULE,
+};
+
+static void mbochs_device_release(struct device *dev)
+{
+ /* nothing */
+}
+
+static int __init mbochs_dev_init(void)
+{
+ int ret = 0;
+
+ ret = alloc_chrdev_region(&mbochs_devt, 0, MINORMASK, MBOCHS_NAME);
+ if (ret < 0) {
+ pr_err("Error: failed to register mbochs_dev, err: %d\n", ret);
+ return ret;
+ }
+ cdev_init(&mbochs_cdev, &vd_fops);
+ cdev_add(&mbochs_cdev, mbochs_devt, MINORMASK);
+ pr_info("%s: major %d\n", __func__, MAJOR(mbochs_devt));
+
+ mbochs_class = class_create(THIS_MODULE, MBOCHS_CLASS_NAME);
+ if (IS_ERR(mbochs_class)) {
+ pr_err("Error: failed to register mbochs_dev class\n");
+ ret = PTR_ERR(mbochs_class);
+ goto failed1;
+ }
+ mbochs_dev.class = mbochs_class;
+ mbochs_dev.release = mbochs_device_release;
+ dev_set_name(&mbochs_dev, "%s", MBOCHS_NAME);
+
+ ret = device_register(&mbochs_dev);
+ if (ret)
+ goto failed2;
+
+ ret = mdev_register_device(&mbochs_dev, &mdev_fops);
+ if (ret)
+ goto failed3;
+
+ return 0;
+
+failed3:
+ device_unregister(&mbochs_dev);
+failed2:
+ class_destroy(mbochs_class);
+failed1:
+ cdev_del(&mbochs_cdev);
+ unregister_chrdev_region(mbochs_devt, MINORMASK);
+ return ret;
+}
+
+static void __exit mbochs_dev_exit(void)
+{
+ mbochs_dev.bus = NULL;
+ mdev_unregister_device(&mbochs_dev);
+
+ device_unregister(&mbochs_dev);
+ cdev_del(&mbochs_cdev);
+ unregister_chrdev_region(mbochs_devt, MINORMASK);
+ class_destroy(mbochs_class);
+ mbochs_class = NULL;
+}
+
+module_init(mbochs_dev_init)
+module_exit(mbochs_dev_exit)
diff --git a/samples/vfio-mdev/mdpy-defs.h b/samples/vfio-mdev/mdpy-defs.h
new file mode 100644
index 000000000000..96b3b1b49d34
--- /dev/null
+++ b/samples/vfio-mdev/mdpy-defs.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Simple pci display device.
+ *
+ * Framebuffer memory is pci bar 0.
+ * Configuration (read-only) is in pci config space.
+ * Format field uses drm fourcc codes.
+ * ATM only DRM_FORMAT_XRGB8888 is supported.
+ */
+
+/* pci ids */
+#define MDPY_PCI_VENDOR_ID 0x1b36 /* redhat */
+#define MDPY_PCI_DEVICE_ID 0x000f
+#define MDPY_PCI_SUBVENDOR_ID PCI_SUBVENDOR_ID_REDHAT_QUMRANET
+#define MDPY_PCI_SUBDEVICE_ID PCI_SUBDEVICE_ID_QEMU
+
+/* pci cfg space offsets for fb config (dword) */
+#define MDPY_VENDORCAP_OFFSET 0x40
+#define MDPY_VENDORCAP_SIZE 0x10
+#define MDPY_FORMAT_OFFSET (MDPY_VENDORCAP_OFFSET + 0x04)
+#define MDPY_WIDTH_OFFSET (MDPY_VENDORCAP_OFFSET + 0x08)
+#define MDPY_HEIGHT_OFFSET (MDPY_VENDORCAP_OFFSET + 0x0c)
diff --git a/samples/vfio-mdev/mdpy-fb.c b/samples/vfio-mdev/mdpy-fb.c
new file mode 100644
index 000000000000..2719bb259653
--- /dev/null
+++ b/samples/vfio-mdev/mdpy-fb.c
@@ -0,0 +1,232 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Framebuffer driver for mdpy (mediated virtual pci display device).
+ *
+ * See mdpy-defs.h for device specs
+ *
+ * (c) Gerd Hoffmann <kraxel@redhat.com>
+ *
+ * Using some code snippets from simplefb and cirrusfb.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+#include <linux/errno.h>
+#include <linux/fb.h>
+#include <linux/io.h>
+#include <linux/pci.h>
+#include <linux/module.h>
+#include <drm/drm_fourcc.h>
+#include "mdpy-defs.h"
+
+static const struct fb_fix_screeninfo mdpy_fb_fix = {
+ .id = "mdpy-fb",
+ .type = FB_TYPE_PACKED_PIXELS,
+ .visual = FB_VISUAL_TRUECOLOR,
+ .accel = FB_ACCEL_NONE,
+};
+
+static const struct fb_var_screeninfo mdpy_fb_var = {
+ .height = -1,
+ .width = -1,
+ .activate = FB_ACTIVATE_NOW,
+ .vmode = FB_VMODE_NONINTERLACED,
+
+ .bits_per_pixel = 32,
+ .transp.offset = 24,
+ .red.offset = 16,
+ .green.offset = 8,
+ .blue.offset = 0,
+ .transp.length = 8,
+ .red.length = 8,
+ .green.length = 8,
+ .blue.length = 8,
+};
+
+#define PSEUDO_PALETTE_SIZE 16
+
+struct mdpy_fb_par {
+ u32 palette[PSEUDO_PALETTE_SIZE];
+};
+
+static int mdpy_fb_setcolreg(u_int regno, u_int red, u_int green, u_int blue,
+ u_int transp, struct fb_info *info)
+{
+ u32 *pal = info->pseudo_palette;
+ u32 cr = red >> (16 - info->var.red.length);
+ u32 cg = green >> (16 - info->var.green.length);
+ u32 cb = blue >> (16 - info->var.blue.length);
+ u32 value, mask;
+
+ if (regno >= PSEUDO_PALETTE_SIZE)
+ return -EINVAL;
+
+ value = (cr << info->var.red.offset) |
+ (cg << info->var.green.offset) |
+ (cb << info->var.blue.offset);
+ if (info->var.transp.length > 0) {
+ mask = (1 << info->var.transp.length) - 1;
+ mask <<= info->var.transp.offset;
+ value |= mask;
+ }
+ pal[regno] = value;
+
+ return 0;
+}
+
+static void mdpy_fb_destroy(struct fb_info *info)
+{
+ if (info->screen_base)
+ iounmap(info->screen_base);
+}
+
+static struct fb_ops mdpy_fb_ops = {
+ .owner = THIS_MODULE,
+ .fb_destroy = mdpy_fb_destroy,
+ .fb_setcolreg = mdpy_fb_setcolreg,
+ .fb_fillrect = cfb_fillrect,
+ .fb_copyarea = cfb_copyarea,
+ .fb_imageblit = cfb_imageblit,
+};
+
+static int mdpy_fb_probe(struct pci_dev *pdev,
+ const struct pci_device_id *ent)
+{
+ struct fb_info *info;
+ struct mdpy_fb_par *par;
+ u32 format, width, height;
+ int ret;
+
+ ret = pci_enable_device(pdev);
+ if (ret < 0)
+ return ret;
+
+ ret = pci_request_regions(pdev, "mdpy-fb");
+ if (ret < 0)
+ return ret;
+
+ pci_read_config_dword(pdev, MDPY_FORMAT_OFFSET, &format);
+ pci_read_config_dword(pdev, MDPY_WIDTH_OFFSET, &width);
+ pci_read_config_dword(pdev, MDPY_HEIGHT_OFFSET, &height);
+ if (format != DRM_FORMAT_XRGB8888) {
+ pci_err(pdev, "format mismatch (0x%x != 0x%x)\n",
+ format, DRM_FORMAT_XRGB8888);
+ return -EINVAL;
+ }
+ if (width < 100 || width > 10000) {
+ pci_err(pdev, "width (%d) out of range\n", width);
+ return -EINVAL;
+ }
+ if (height < 100 || height > 10000) {
+ pci_err(pdev, "height (%d) out of range\n", height);
+ return -EINVAL;
+ }
+ pci_info(pdev, "mdpy found: %dx%d framebuffer\n",
+ width, height);
+
+ info = framebuffer_alloc(sizeof(struct mdpy_fb_par), &pdev->dev);
+ if (!info)
+ goto err_release_regions;
+ pci_set_drvdata(pdev, info);
+ par = info->par;
+
+ info->fix = mdpy_fb_fix;
+ info->fix.smem_start = pci_resource_start(pdev, 0);
+ info->fix.smem_len = pci_resource_len(pdev, 0);
+ info->fix.line_length = width * 4;
+
+ info->var = mdpy_fb_var;
+ info->var.xres = width;
+ info->var.yres = height;
+ info->var.xres_virtual = width;
+ info->var.yres_virtual = height;
+
+ info->screen_size = info->fix.smem_len;
+ info->screen_base = ioremap(info->fix.smem_start,
+ info->screen_size);
+ if (!info->screen_base) {
+ pci_err(pdev, "ioremap(pcibar) failed\n");
+ ret = -EIO;
+ goto err_release_fb;
+ }
+
+ info->apertures = alloc_apertures(1);
+ if (!info->apertures) {
+ ret = -ENOMEM;
+ goto err_unmap;
+ }
+ info->apertures->ranges[0].base = info->fix.smem_start;
+ info->apertures->ranges[0].size = info->fix.smem_len;
+
+ info->fbops = &mdpy_fb_ops;
+ info->flags = FBINFO_DEFAULT;
+ info->pseudo_palette = par->palette;
+
+ ret = register_framebuffer(info);
+ if (ret < 0) {
+ pci_err(pdev, "mdpy-fb device register failed: %d\n", ret);
+ goto err_unmap;
+ }
+
+ pci_info(pdev, "fb%d registered\n", info->node);
+ return 0;
+
+err_unmap:
+ iounmap(info->screen_base);
+
+err_release_fb:
+ framebuffer_release(info);
+
+err_release_regions:
+ pci_release_regions(pdev);
+
+ return ret;
+}
+
+static void mdpy_fb_remove(struct pci_dev *pdev)
+{
+ struct fb_info *info = pci_get_drvdata(pdev);
+
+ unregister_framebuffer(info);
+ framebuffer_release(info);
+}
+
+static struct pci_device_id mdpy_fb_pci_table[] = {
+ {
+ .vendor = MDPY_PCI_VENDOR_ID,
+ .device = MDPY_PCI_DEVICE_ID,
+ .subvendor = MDPY_PCI_SUBVENDOR_ID,
+ .subdevice = MDPY_PCI_SUBDEVICE_ID,
+ }, {
+ /* end of list */
+ }
+};
+
+static struct pci_driver mdpy_fb_pci_driver = {
+ .name = "mdpy-fb",
+ .id_table = mdpy_fb_pci_table,
+ .probe = mdpy_fb_probe,
+ .remove = mdpy_fb_remove,
+};
+
+static int __init mdpy_fb_init(void)
+{
+ int ret;
+
+ ret = pci_register_driver(&mdpy_fb_pci_driver);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+module_init(mdpy_fb_init);
+
+MODULE_DEVICE_TABLE(pci, mdpy_fb_pci_table);
+MODULE_LICENSE("GPL v2");
diff --git a/samples/vfio-mdev/mdpy.c b/samples/vfio-mdev/mdpy.c
new file mode 100644
index 000000000000..96e7969c473a
--- /dev/null
+++ b/samples/vfio-mdev/mdpy.c
@@ -0,0 +1,807 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Mediated virtual PCI display host device driver
+ *
+ * See mdpy-defs.h for device specs
+ *
+ * (c) Gerd Hoffmann <kraxel@redhat.com>
+ *
+ * based on mtty driver which is:
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ * Author: Neo Jia <cjia@nvidia.com>
+ * Kirti Wankhede <kwankhede@nvidia.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/cdev.h>
+#include <linux/vfio.h>
+#include <linux/iommu.h>
+#include <linux/sysfs.h>
+#include <linux/mdev.h>
+#include <linux/pci.h>
+#include <drm/drm_fourcc.h>
+#include "mdpy-defs.h"
+
+#define MDPY_NAME "mdpy"
+#define MDPY_CLASS_NAME "mdpy"
+
+#define MDPY_CONFIG_SPACE_SIZE 0xff
+#define MDPY_MEMORY_BAR_OFFSET PAGE_SIZE
+#define MDPY_DISPLAY_REGION 16
+
+#define STORE_LE16(addr, val) (*(u16 *)addr = val)
+#define STORE_LE32(addr, val) (*(u32 *)addr = val)
+
+
+MODULE_LICENSE("GPL v2");
+
+static int max_devices = 4;
+module_param_named(count, max_devices, int, 0444);
+MODULE_PARM_DESC(count, "number of " MDPY_NAME " devices");
+
+
+#define MDPY_TYPE_1 "vga"
+#define MDPY_TYPE_2 "xga"
+#define MDPY_TYPE_3 "hd"
+
+static const struct mdpy_type {
+ const char *name;
+ u32 format;
+ u32 bytepp;
+ u32 width;
+ u32 height;
+} mdpy_types[] = {
+ {
+ .name = MDPY_CLASS_NAME "-" MDPY_TYPE_1,
+ .format = DRM_FORMAT_XRGB8888,
+ .bytepp = 4,
+ .width = 640,
+ .height = 480,
+ }, {
+ .name = MDPY_CLASS_NAME "-" MDPY_TYPE_2,
+ .format = DRM_FORMAT_XRGB8888,
+ .bytepp = 4,
+ .width = 1024,
+ .height = 768,
+ }, {
+ .name = MDPY_CLASS_NAME "-" MDPY_TYPE_3,
+ .format = DRM_FORMAT_XRGB8888,
+ .bytepp = 4,
+ .width = 1920,
+ .height = 1080,
+ },
+};
+
+static dev_t mdpy_devt;
+static struct class *mdpy_class;
+static struct cdev mdpy_cdev;
+static struct device mdpy_dev;
+static u32 mdpy_count;
+
+/* State of each mdev device */
+struct mdev_state {
+ u8 *vconfig;
+ u32 bar_mask;
+ struct mutex ops_lock;
+ struct mdev_device *mdev;
+ struct vfio_device_info dev_info;
+
+ const struct mdpy_type *type;
+ u32 memsize;
+ void *memblk;
+};
+
+static const struct mdpy_type *mdpy_find_type(struct kobject *kobj)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(mdpy_types); i++)
+ if (strcmp(mdpy_types[i].name, kobj->name) == 0)
+ return mdpy_types + i;
+ return NULL;
+}
+
+static void mdpy_create_config_space(struct mdev_state *mdev_state)
+{
+ STORE_LE16((u16 *) &mdev_state->vconfig[PCI_VENDOR_ID],
+ MDPY_PCI_VENDOR_ID);
+ STORE_LE16((u16 *) &mdev_state->vconfig[PCI_DEVICE_ID],
+ MDPY_PCI_DEVICE_ID);
+ STORE_LE16((u16 *) &mdev_state->vconfig[PCI_SUBSYSTEM_VENDOR_ID],
+ MDPY_PCI_SUBVENDOR_ID);
+ STORE_LE16((u16 *) &mdev_state->vconfig[PCI_SUBSYSTEM_ID],
+ MDPY_PCI_SUBDEVICE_ID);
+
+ STORE_LE16((u16 *) &mdev_state->vconfig[PCI_COMMAND],
+ PCI_COMMAND_IO | PCI_COMMAND_MEMORY);
+ STORE_LE16((u16 *) &mdev_state->vconfig[PCI_STATUS],
+ PCI_STATUS_CAP_LIST);
+ STORE_LE16((u16 *) &mdev_state->vconfig[PCI_CLASS_DEVICE],
+ PCI_CLASS_DISPLAY_OTHER);
+ mdev_state->vconfig[PCI_CLASS_REVISION] = 0x01;
+
+ STORE_LE32((u32 *) &mdev_state->vconfig[PCI_BASE_ADDRESS_0],
+ PCI_BASE_ADDRESS_SPACE_MEMORY |
+ PCI_BASE_ADDRESS_MEM_TYPE_32 |
+ PCI_BASE_ADDRESS_MEM_PREFETCH);
+ mdev_state->bar_mask = ~(mdev_state->memsize) + 1;
+
+ /* vendor specific capability for the config registers */
+ mdev_state->vconfig[PCI_CAPABILITY_LIST] = MDPY_VENDORCAP_OFFSET;
+ mdev_state->vconfig[MDPY_VENDORCAP_OFFSET + 0] = 0x09; /* vendor cap */
+ mdev_state->vconfig[MDPY_VENDORCAP_OFFSET + 1] = 0x00; /* next ptr */
+ mdev_state->vconfig[MDPY_VENDORCAP_OFFSET + 2] = MDPY_VENDORCAP_SIZE;
+ STORE_LE32((u32 *) &mdev_state->vconfig[MDPY_FORMAT_OFFSET],
+ mdev_state->type->format);
+ STORE_LE32((u32 *) &mdev_state->vconfig[MDPY_WIDTH_OFFSET],
+ mdev_state->type->width);
+ STORE_LE32((u32 *) &mdev_state->vconfig[MDPY_HEIGHT_OFFSET],
+ mdev_state->type->height);
+}
+
+static void handle_pci_cfg_write(struct mdev_state *mdev_state, u16 offset,
+ char *buf, u32 count)
+{
+ struct device *dev = mdev_dev(mdev_state->mdev);
+ u32 cfg_addr;
+
+ switch (offset) {
+ case PCI_BASE_ADDRESS_0:
+ cfg_addr = *(u32 *)buf;
+
+ if (cfg_addr == 0xffffffff) {
+ cfg_addr = (cfg_addr & mdev_state->bar_mask);
+ } else {
+ cfg_addr &= PCI_BASE_ADDRESS_MEM_MASK;
+ if (cfg_addr)
+ dev_info(dev, "BAR0 @ 0x%x\n", cfg_addr);
+ }
+
+ cfg_addr |= (mdev_state->vconfig[offset] &
+ ~PCI_BASE_ADDRESS_MEM_MASK);
+ STORE_LE32(&mdev_state->vconfig[offset], cfg_addr);
+ break;
+ }
+}
+
+static ssize_t mdev_access(struct mdev_device *mdev, char *buf, size_t count,
+ loff_t pos, bool is_write)
+{
+ struct mdev_state *mdev_state = mdev_get_drvdata(mdev);
+ struct device *dev = mdev_dev(mdev);
+ int ret = 0;
+
+ mutex_lock(&mdev_state->ops_lock);
+
+ if (pos < MDPY_CONFIG_SPACE_SIZE) {
+ if (is_write)
+ handle_pci_cfg_write(mdev_state, pos, buf, count);
+ else
+ memcpy(buf, (mdev_state->vconfig + pos), count);
+
+ } else if ((pos >= MDPY_MEMORY_BAR_OFFSET) &&
+ (pos + count <=
+ MDPY_MEMORY_BAR_OFFSET + mdev_state->memsize)) {
+ pos -= MDPY_MEMORY_BAR_OFFSET;
+ if (is_write)
+ memcpy(mdev_state->memblk, buf, count);
+ else
+ memcpy(buf, mdev_state->memblk, count);
+
+ } else {
+ dev_info(dev, "%s: %s @0x%llx (unhandled)\n",
+ __func__, is_write ? "WR" : "RD", pos);
+ ret = -1;
+ goto accessfailed;
+ }
+
+ ret = count;
+
+
+accessfailed:
+ mutex_unlock(&mdev_state->ops_lock);
+
+ return ret;
+}
+
+static int mdpy_reset(struct mdev_device *mdev)
+{
+ struct mdev_state *mdev_state = mdev_get_drvdata(mdev);
+ u32 stride, i;
+
+ /* initialize with gray gradient */
+ stride = mdev_state->type->width * mdev_state->type->bytepp;
+ for (i = 0; i < mdev_state->type->height; i++)
+ memset(mdev_state->memblk + i * stride,
+ i * 255 / mdev_state->type->height,
+ stride);
+ return 0;
+}
+
+static int mdpy_create(struct kobject *kobj, struct mdev_device *mdev)
+{
+ const struct mdpy_type *type = mdpy_find_type(kobj);
+ struct device *dev = mdev_dev(mdev);
+ struct mdev_state *mdev_state;
+ u32 fbsize;
+
+ if (mdpy_count >= max_devices)
+ return -ENOMEM;
+
+ mdev_state = kzalloc(sizeof(struct mdev_state), GFP_KERNEL);
+ if (mdev_state == NULL)
+ return -ENOMEM;
+
+ mdev_state->vconfig = kzalloc(MDPY_CONFIG_SPACE_SIZE, GFP_KERNEL);
+ if (mdev_state->vconfig == NULL) {
+ kfree(mdev_state);
+ return -ENOMEM;
+ }
+
+ if (!type)
+ type = &mdpy_types[0];
+ fbsize = roundup_pow_of_two(type->width * type->height * type->bytepp);
+
+ mdev_state->memblk = vmalloc_user(fbsize);
+ if (!mdev_state->memblk) {
+ kfree(mdev_state->vconfig);
+ kfree(mdev_state);
+ return -ENOMEM;
+ }
+ dev_info(dev, "%s: %s (%dx%d)\n",
+ __func__, kobj->name, type->width, type->height);
+
+ mutex_init(&mdev_state->ops_lock);
+ mdev_state->mdev = mdev;
+ mdev_set_drvdata(mdev, mdev_state);
+
+ mdev_state->type = type;
+ mdev_state->memsize = fbsize;
+ mdpy_create_config_space(mdev_state);
+ mdpy_reset(mdev);
+
+ mdpy_count++;
+ return 0;
+}
+
+static int mdpy_remove(struct mdev_device *mdev)
+{
+ struct mdev_state *mdev_state = mdev_get_drvdata(mdev);
+ struct device *dev = mdev_dev(mdev);
+
+ dev_info(dev, "%s\n", __func__);
+
+ mdev_set_drvdata(mdev, NULL);
+ vfree(mdev_state->memblk);
+ kfree(mdev_state->vconfig);
+ kfree(mdev_state);
+
+ mdpy_count--;
+ return 0;
+}
+
+static ssize_t mdpy_read(struct mdev_device *mdev, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ unsigned int done = 0;
+ int ret;
+
+ while (count) {
+ size_t filled;
+
+ if (count >= 4 && !(*ppos % 4)) {
+ u32 val;
+
+ ret = mdev_access(mdev, (char *)&val, sizeof(val),
+ *ppos, false);
+ if (ret <= 0)
+ goto read_err;
+
+ if (copy_to_user(buf, &val, sizeof(val)))
+ goto read_err;
+
+ filled = 4;
+ } else if (count >= 2 && !(*ppos % 2)) {
+ u16 val;
+
+ ret = mdev_access(mdev, (char *)&val, sizeof(val),
+ *ppos, false);
+ if (ret <= 0)
+ goto read_err;
+
+ if (copy_to_user(buf, &val, sizeof(val)))
+ goto read_err;
+
+ filled = 2;
+ } else {
+ u8 val;
+
+ ret = mdev_access(mdev, (char *)&val, sizeof(val),
+ *ppos, false);
+ if (ret <= 0)
+ goto read_err;
+
+ if (copy_to_user(buf, &val, sizeof(val)))
+ goto read_err;
+
+ filled = 1;
+ }
+
+ count -= filled;
+ done += filled;
+ *ppos += filled;
+ buf += filled;
+ }
+
+ return done;
+
+read_err:
+ return -EFAULT;
+}
+
+static ssize_t mdpy_write(struct mdev_device *mdev, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ unsigned int done = 0;
+ int ret;
+
+ while (count) {
+ size_t filled;
+
+ if (count >= 4 && !(*ppos % 4)) {
+ u32 val;
+
+ if (copy_from_user(&val, buf, sizeof(val)))
+ goto write_err;
+
+ ret = mdev_access(mdev, (char *)&val, sizeof(val),
+ *ppos, true);
+ if (ret <= 0)
+ goto write_err;
+
+ filled = 4;
+ } else if (count >= 2 && !(*ppos % 2)) {
+ u16 val;
+
+ if (copy_from_user(&val, buf, sizeof(val)))
+ goto write_err;
+
+ ret = mdev_access(mdev, (char *)&val, sizeof(val),
+ *ppos, true);
+ if (ret <= 0)
+ goto write_err;
+
+ filled = 2;
+ } else {
+ u8 val;
+
+ if (copy_from_user(&val, buf, sizeof(val)))
+ goto write_err;
+
+ ret = mdev_access(mdev, (char *)&val, sizeof(val),
+ *ppos, true);
+ if (ret <= 0)
+ goto write_err;
+
+ filled = 1;
+ }
+ count -= filled;
+ done += filled;
+ *ppos += filled;
+ buf += filled;
+ }
+
+ return done;
+write_err:
+ return -EFAULT;
+}
+
+static int mdpy_mmap(struct mdev_device *mdev, struct vm_area_struct *vma)
+{
+ struct mdev_state *mdev_state = mdev_get_drvdata(mdev);
+
+ if (vma->vm_pgoff != MDPY_MEMORY_BAR_OFFSET >> PAGE_SHIFT)
+ return -EINVAL;
+ if (vma->vm_end < vma->vm_start)
+ return -EINVAL;
+ if (vma->vm_end - vma->vm_start > mdev_state->memsize)
+ return -EINVAL;
+ if ((vma->vm_flags & VM_SHARED) == 0)
+ return -EINVAL;
+
+ return remap_vmalloc_range_partial(vma, vma->vm_start,
+ mdev_state->memblk,
+ vma->vm_end - vma->vm_start);
+}
+
+static int mdpy_get_region_info(struct mdev_device *mdev,
+ struct vfio_region_info *region_info,
+ u16 *cap_type_id, void **cap_type)
+{
+ struct mdev_state *mdev_state;
+
+ mdev_state = mdev_get_drvdata(mdev);
+ if (!mdev_state)
+ return -EINVAL;
+
+ if (region_info->index >= VFIO_PCI_NUM_REGIONS &&
+ region_info->index != MDPY_DISPLAY_REGION)
+ return -EINVAL;
+
+ switch (region_info->index) {
+ case VFIO_PCI_CONFIG_REGION_INDEX:
+ region_info->offset = 0;
+ region_info->size = MDPY_CONFIG_SPACE_SIZE;
+ region_info->flags = (VFIO_REGION_INFO_FLAG_READ |
+ VFIO_REGION_INFO_FLAG_WRITE);
+ break;
+ case VFIO_PCI_BAR0_REGION_INDEX:
+ case MDPY_DISPLAY_REGION:
+ region_info->offset = MDPY_MEMORY_BAR_OFFSET;
+ region_info->size = mdev_state->memsize;
+ region_info->flags = (VFIO_REGION_INFO_FLAG_READ |
+ VFIO_REGION_INFO_FLAG_WRITE |
+ VFIO_REGION_INFO_FLAG_MMAP);
+ break;
+ default:
+ region_info->size = 0;
+ region_info->offset = 0;
+ region_info->flags = 0;
+ }
+
+ return 0;
+}
+
+static int mdpy_get_irq_info(struct mdev_device *mdev,
+ struct vfio_irq_info *irq_info)
+{
+ irq_info->count = 0;
+ return 0;
+}
+
+static int mdpy_get_device_info(struct mdev_device *mdev,
+ struct vfio_device_info *dev_info)
+{
+ dev_info->flags = VFIO_DEVICE_FLAGS_PCI;
+ dev_info->num_regions = VFIO_PCI_NUM_REGIONS;
+ dev_info->num_irqs = VFIO_PCI_NUM_IRQS;
+ return 0;
+}
+
+static int mdpy_query_gfx_plane(struct mdev_device *mdev,
+ struct vfio_device_gfx_plane_info *plane)
+{
+ struct mdev_state *mdev_state = mdev_get_drvdata(mdev);
+
+ if (plane->flags & VFIO_GFX_PLANE_TYPE_PROBE) {
+ if (plane->flags == (VFIO_GFX_PLANE_TYPE_PROBE |
+ VFIO_GFX_PLANE_TYPE_REGION))
+ return 0;
+ return -EINVAL;
+ }
+
+ if (plane->flags != VFIO_GFX_PLANE_TYPE_REGION)
+ return -EINVAL;
+
+ plane->drm_format = mdev_state->type->format;
+ plane->width = mdev_state->type->width;
+ plane->height = mdev_state->type->height;
+ plane->stride = (mdev_state->type->width *
+ mdev_state->type->bytepp);
+ plane->size = mdev_state->memsize;
+ plane->region_index = MDPY_DISPLAY_REGION;
+
+ /* unused */
+ plane->drm_format_mod = 0;
+ plane->x_pos = 0;
+ plane->y_pos = 0;
+ plane->x_hot = 0;
+ plane->y_hot = 0;
+
+ return 0;
+}
+
+static long mdpy_ioctl(struct mdev_device *mdev, unsigned int cmd,
+ unsigned long arg)
+{
+ int ret = 0;
+ unsigned long minsz;
+ struct mdev_state *mdev_state;
+
+ mdev_state = mdev_get_drvdata(mdev);
+
+ switch (cmd) {
+ case VFIO_DEVICE_GET_INFO:
+ {
+ struct vfio_device_info info;
+
+ minsz = offsetofend(struct vfio_device_info, num_irqs);
+
+ if (copy_from_user(&info, (void __user *)arg, minsz))
+ return -EFAULT;
+
+ if (info.argsz < minsz)
+ return -EINVAL;
+
+ ret = mdpy_get_device_info(mdev, &info);
+ if (ret)
+ return ret;
+
+ memcpy(&mdev_state->dev_info, &info, sizeof(info));
+
+ if (copy_to_user((void __user *)arg, &info, minsz))
+ return -EFAULT;
+
+ return 0;
+ }
+ case VFIO_DEVICE_GET_REGION_INFO:
+ {
+ struct vfio_region_info info;
+ u16 cap_type_id = 0;
+ void *cap_type = NULL;
+
+ minsz = offsetofend(struct vfio_region_info, offset);
+
+ if (copy_from_user(&info, (void __user *)arg, minsz))
+ return -EFAULT;
+
+ if (info.argsz < minsz)
+ return -EINVAL;
+
+ ret = mdpy_get_region_info(mdev, &info, &cap_type_id,
+ &cap_type);
+ if (ret)
+ return ret;
+
+ if (copy_to_user((void __user *)arg, &info, minsz))
+ return -EFAULT;
+
+ return 0;
+ }
+
+ case VFIO_DEVICE_GET_IRQ_INFO:
+ {
+ struct vfio_irq_info info;
+
+ minsz = offsetofend(struct vfio_irq_info, count);
+
+ if (copy_from_user(&info, (void __user *)arg, minsz))
+ return -EFAULT;
+
+ if ((info.argsz < minsz) ||
+ (info.index >= mdev_state->dev_info.num_irqs))
+ return -EINVAL;
+
+ ret = mdpy_get_irq_info(mdev, &info);
+ if (ret)
+ return ret;
+
+ if (copy_to_user((void __user *)arg, &info, minsz))
+ return -EFAULT;
+
+ return 0;
+ }
+
+ case VFIO_DEVICE_QUERY_GFX_PLANE:
+ {
+ struct vfio_device_gfx_plane_info plane;
+
+ minsz = offsetofend(struct vfio_device_gfx_plane_info,
+ region_index);
+
+ if (copy_from_user(&plane, (void __user *)arg, minsz))
+ return -EFAULT;
+
+ if (plane.argsz < minsz)
+ return -EINVAL;
+
+ ret = mdpy_query_gfx_plane(mdev, &plane);
+ if (ret)
+ return ret;
+
+ if (copy_to_user((void __user *)arg, &plane, minsz))
+ return -EFAULT;
+
+ return 0;
+ }
+
+ case VFIO_DEVICE_SET_IRQS:
+ return -EINVAL;
+
+ case VFIO_DEVICE_RESET:
+ return mdpy_reset(mdev);
+ }
+ return -ENOTTY;
+}
+
+static int mdpy_open(struct mdev_device *mdev)
+{
+ if (!try_module_get(THIS_MODULE))
+ return -ENODEV;
+
+ return 0;
+}
+
+static void mdpy_close(struct mdev_device *mdev)
+{
+ module_put(THIS_MODULE);
+}
+
+static ssize_t
+resolution_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct mdev_device *mdev = mdev_from_dev(dev);
+ struct mdev_state *mdev_state = mdev_get_drvdata(mdev);
+
+ return sprintf(buf, "%dx%d\n",
+ mdev_state->type->width,
+ mdev_state->type->height);
+}
+static DEVICE_ATTR_RO(resolution);
+
+static struct attribute *mdev_dev_attrs[] = {
+ &dev_attr_resolution.attr,
+ NULL,
+};
+
+static const struct attribute_group mdev_dev_group = {
+ .name = "vendor",
+ .attrs = mdev_dev_attrs,
+};
+
+const struct attribute_group *mdev_dev_groups[] = {
+ &mdev_dev_group,
+ NULL,
+};
+
+static ssize_t
+name_show(struct kobject *kobj, struct device *dev, char *buf)
+{
+ return sprintf(buf, "%s\n", kobj->name);
+}
+MDEV_TYPE_ATTR_RO(name);
+
+static ssize_t
+description_show(struct kobject *kobj, struct device *dev, char *buf)
+{
+ const struct mdpy_type *type = mdpy_find_type(kobj);
+
+ return sprintf(buf, "virtual display, %dx%d framebuffer\n",
+ type ? type->width : 0,
+ type ? type->height : 0);
+}
+MDEV_TYPE_ATTR_RO(description);
+
+static ssize_t
+available_instances_show(struct kobject *kobj, struct device *dev, char *buf)
+{
+ return sprintf(buf, "%d\n", max_devices - mdpy_count);
+}
+MDEV_TYPE_ATTR_RO(available_instances);
+
+static ssize_t device_api_show(struct kobject *kobj, struct device *dev,
+ char *buf)
+{
+ return sprintf(buf, "%s\n", VFIO_DEVICE_API_PCI_STRING);
+}
+MDEV_TYPE_ATTR_RO(device_api);
+
+static struct attribute *mdev_types_attrs[] = {
+ &mdev_type_attr_name.attr,
+ &mdev_type_attr_description.attr,
+ &mdev_type_attr_device_api.attr,
+ &mdev_type_attr_available_instances.attr,
+ NULL,
+};
+
+static struct attribute_group mdev_type_group1 = {
+ .name = MDPY_TYPE_1,
+ .attrs = mdev_types_attrs,
+};
+
+static struct attribute_group mdev_type_group2 = {
+ .name = MDPY_TYPE_2,
+ .attrs = mdev_types_attrs,
+};
+
+static struct attribute_group mdev_type_group3 = {
+ .name = MDPY_TYPE_3,
+ .attrs = mdev_types_attrs,
+};
+
+static struct attribute_group *mdev_type_groups[] = {
+ &mdev_type_group1,
+ &mdev_type_group2,
+ &mdev_type_group3,
+ NULL,
+};
+
+static const struct mdev_parent_ops mdev_fops = {
+ .owner = THIS_MODULE,
+ .mdev_attr_groups = mdev_dev_groups,
+ .supported_type_groups = mdev_type_groups,
+ .create = mdpy_create,
+ .remove = mdpy_remove,
+ .open = mdpy_open,
+ .release = mdpy_close,
+ .read = mdpy_read,
+ .write = mdpy_write,
+ .ioctl = mdpy_ioctl,
+ .mmap = mdpy_mmap,
+};
+
+static const struct file_operations vd_fops = {
+ .owner = THIS_MODULE,
+};
+
+static void mdpy_device_release(struct device *dev)
+{
+ /* nothing */
+}
+
+static int __init mdpy_dev_init(void)
+{
+ int ret = 0;
+
+ ret = alloc_chrdev_region(&mdpy_devt, 0, MINORMASK, MDPY_NAME);
+ if (ret < 0) {
+ pr_err("Error: failed to register mdpy_dev, err: %d\n", ret);
+ return ret;
+ }
+ cdev_init(&mdpy_cdev, &vd_fops);
+ cdev_add(&mdpy_cdev, mdpy_devt, MINORMASK);
+ pr_info("%s: major %d\n", __func__, MAJOR(mdpy_devt));
+
+ mdpy_class = class_create(THIS_MODULE, MDPY_CLASS_NAME);
+ if (IS_ERR(mdpy_class)) {
+ pr_err("Error: failed to register mdpy_dev class\n");
+ ret = PTR_ERR(mdpy_class);
+ goto failed1;
+ }
+ mdpy_dev.class = mdpy_class;
+ mdpy_dev.release = mdpy_device_release;
+ dev_set_name(&mdpy_dev, "%s", MDPY_NAME);
+
+ ret = device_register(&mdpy_dev);
+ if (ret)
+ goto failed2;
+
+ ret = mdev_register_device(&mdpy_dev, &mdev_fops);
+ if (ret)
+ goto failed3;
+
+ return 0;
+
+failed3:
+ device_unregister(&mdpy_dev);
+failed2:
+ class_destroy(mdpy_class);
+failed1:
+ cdev_del(&mdpy_cdev);
+ unregister_chrdev_region(mdpy_devt, MINORMASK);
+ return ret;
+}
+
+static void __exit mdpy_dev_exit(void)
+{
+ mdpy_dev.bus = NULL;
+ mdev_unregister_device(&mdpy_dev);
+
+ device_unregister(&mdpy_dev);
+ cdev_del(&mdpy_cdev);
+ unregister_chrdev_region(mdpy_devt, MINORMASK);
+ class_destroy(mdpy_class);
+ mdpy_class = NULL;
+}
+
+module_init(mdpy_dev_init)
+module_exit(mdpy_dev_exit)