diff options
-rw-r--r-- | drivers/vfio/pci/Kconfig | 10 | ||||
-rw-r--r-- | drivers/vfio/pci/vfio_pci.c | 75 | ||||
-rw-r--r-- | drivers/vfio/pci/vfio_pci_config.c | 52 | ||||
-rw-r--r-- | drivers/vfio/pci/vfio_pci_private.h | 19 | ||||
-rw-r--r-- | drivers/vfio/pci/vfio_pci_rdwr.c | 281 | ||||
-rw-r--r-- | drivers/vfio/vfio.c | 35 | ||||
-rw-r--r-- | include/uapi/linux/vfio.h | 9 |
7 files changed, 254 insertions, 227 deletions
diff --git a/drivers/vfio/pci/Kconfig b/drivers/vfio/pci/Kconfig index 5980758563e..c41b01e2b69 100644 --- a/drivers/vfio/pci/Kconfig +++ b/drivers/vfio/pci/Kconfig @@ -6,3 +6,13 @@ config VFIO_PCI use of PCI drivers using the VFIO framework. If you don't know what to do here, say N. + +config VFIO_PCI_VGA + bool "VFIO PCI support for VGA devices" + depends on VFIO_PCI && X86 && VGA_ARB + help + Support for VGA extension to VFIO PCI. This exposes an additional + region on VGA devices for accessing legacy VGA addresses used by + BIOS and generic video drivers. + + If you don't know what to do here, say N. diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index b28e66c4376..8189cb6a86a 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -84,6 +84,11 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev) } else vdev->msix_bar = 0xFF; +#ifdef CONFIG_VFIO_PCI_VGA + if ((pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA) + vdev->has_vga = true; +#endif + return 0; } @@ -285,6 +290,16 @@ static long vfio_pci_ioctl(void *device_data, info.flags = VFIO_REGION_INFO_FLAG_READ; break; } + case VFIO_PCI_VGA_REGION_INDEX: + if (!vdev->has_vga) + return -EINVAL; + + info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index); + info.size = 0xc0000; + info.flags = VFIO_REGION_INFO_FLAG_READ | + VFIO_REGION_INFO_FLAG_WRITE; + + break; default: return -EINVAL; } @@ -366,52 +381,50 @@ static long vfio_pci_ioctl(void *device_data, return -ENOTTY; } -static ssize_t vfio_pci_read(void *device_data, char __user *buf, - size_t count, loff_t *ppos) +static ssize_t vfio_pci_rw(void *device_data, char __user *buf, + size_t count, loff_t *ppos, bool iswrite) { unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos); struct vfio_pci_device *vdev = device_data; - struct pci_dev *pdev = vdev->pdev; if (index >= VFIO_PCI_NUM_REGIONS) return -EINVAL; - if (index == VFIO_PCI_CONFIG_REGION_INDEX) - return vfio_pci_config_readwrite(vdev, buf, count, ppos, false); - else if (index == VFIO_PCI_ROM_REGION_INDEX) - return vfio_pci_mem_readwrite(vdev, buf, count, ppos, false); - else if (pci_resource_flags(pdev, index) & IORESOURCE_IO) - return vfio_pci_io_readwrite(vdev, buf, count, ppos, false); - else if (pci_resource_flags(pdev, index) & IORESOURCE_MEM) - return vfio_pci_mem_readwrite(vdev, buf, count, ppos, false); + switch (index) { + case VFIO_PCI_CONFIG_REGION_INDEX: + return vfio_pci_config_rw(vdev, buf, count, ppos, iswrite); + + case VFIO_PCI_ROM_REGION_INDEX: + if (iswrite) + return -EINVAL; + return vfio_pci_bar_rw(vdev, buf, count, ppos, false); + + case VFIO_PCI_BAR0_REGION_INDEX ... VFIO_PCI_BAR5_REGION_INDEX: + return vfio_pci_bar_rw(vdev, buf, count, ppos, iswrite); + + case VFIO_PCI_VGA_REGION_INDEX: + return vfio_pci_vga_rw(vdev, buf, count, ppos, iswrite); + } return -EINVAL; } -static ssize_t vfio_pci_write(void *device_data, const char __user *buf, - size_t count, loff_t *ppos) +static ssize_t vfio_pci_read(void *device_data, char __user *buf, + size_t count, loff_t *ppos) { - unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos); - struct vfio_pci_device *vdev = device_data; - struct pci_dev *pdev = vdev->pdev; + if (!count) + return 0; - if (index >= VFIO_PCI_NUM_REGIONS) - return -EINVAL; + return vfio_pci_rw(device_data, buf, count, ppos, false); +} - if (index == VFIO_PCI_CONFIG_REGION_INDEX) - return vfio_pci_config_readwrite(vdev, (char __user *)buf, - count, ppos, true); - else if (index == VFIO_PCI_ROM_REGION_INDEX) - return -EINVAL; - else if (pci_resource_flags(pdev, index) & IORESOURCE_IO) - return vfio_pci_io_readwrite(vdev, (char __user *)buf, - count, ppos, true); - else if (pci_resource_flags(pdev, index) & IORESOURCE_MEM) { - return vfio_pci_mem_readwrite(vdev, (char __user *)buf, - count, ppos, true); - } +static ssize_t vfio_pci_write(void *device_data, const char __user *buf, + size_t count, loff_t *ppos) +{ + if (!count) + return 0; - return -EINVAL; + return vfio_pci_rw(device_data, (char __user *)buf, count, ppos, true); } static int vfio_pci_mmap(void *device_data, struct vm_area_struct *vma) diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c index 8b8f7d11e10..964ff22bf28 100644 --- a/drivers/vfio/pci/vfio_pci_config.c +++ b/drivers/vfio/pci/vfio_pci_config.c @@ -587,12 +587,46 @@ static int __init init_pci_cap_basic_perm(struct perm_bits *perm) return 0; } +static int vfio_pm_config_write(struct vfio_pci_device *vdev, int pos, + int count, struct perm_bits *perm, + int offset, __le32 val) +{ + count = vfio_default_config_write(vdev, pos, count, perm, offset, val); + if (count < 0) + return count; + + if (offset == PCI_PM_CTRL) { + pci_power_t state; + + switch (le32_to_cpu(val) & PCI_PM_CTRL_STATE_MASK) { + case 0: + state = PCI_D0; + break; + case 1: + state = PCI_D1; + break; + case 2: + state = PCI_D2; + break; + case 3: + state = PCI_D3hot; + break; + } + + pci_set_power_state(vdev->pdev, state); + } + + return count; +} + /* Permissions for the Power Management capability */ static int __init init_pci_cap_pm_perm(struct perm_bits *perm) { if (alloc_perm_bits(perm, pci_cap_length[PCI_CAP_ID_PM])) return -ENOMEM; + perm->writefn = vfio_pm_config_write; + /* * We always virtualize the next field so we can remove * capabilities from the chain if we want to. @@ -600,10 +634,11 @@ static int __init init_pci_cap_pm_perm(struct perm_bits *perm) p_setb(perm, PCI_CAP_LIST_NEXT, (u8)ALL_VIRT, NO_WRITE); /* - * Power management is defined *per function*, - * so we let the user write this + * Power management is defined *per function*, so we can let + * the user change power state, but we trap and initiate the + * change ourselves, so the state bits are read-only. */ - p_setd(perm, PCI_PM_CTRL, NO_VIRT, ALL_WRITE); + p_setd(perm, PCI_PM_CTRL, NO_VIRT, ~PCI_PM_CTRL_STATE_MASK); return 0; } @@ -985,12 +1020,12 @@ static int vfio_cap_len(struct vfio_pci_device *vdev, u8 cap, u8 pos) if (ret) return pcibios_err_to_errno(ret); + vdev->extended_caps = true; + if ((word & PCI_EXP_FLAGS_VERS) == 1) return PCI_CAP_EXP_ENDPOINT_SIZEOF_V1; - else { - vdev->extended_caps = true; + else return PCI_CAP_EXP_ENDPOINT_SIZEOF_V2; - } case PCI_CAP_ID_HT: ret = pci_read_config_byte(pdev, pos + 3, &byte); if (ret) @@ -1501,9 +1536,8 @@ static ssize_t vfio_config_do_rw(struct vfio_pci_device *vdev, char __user *buf, return ret; } -ssize_t vfio_pci_config_readwrite(struct vfio_pci_device *vdev, - char __user *buf, size_t count, - loff_t *ppos, bool iswrite) +ssize_t vfio_pci_config_rw(struct vfio_pci_device *vdev, char __user *buf, + size_t count, loff_t *ppos, bool iswrite) { size_t done = 0; int ret = 0; diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h index 611827cba8c..d7e55d03f49 100644 --- a/drivers/vfio/pci/vfio_pci_private.h +++ b/drivers/vfio/pci/vfio_pci_private.h @@ -53,6 +53,7 @@ struct vfio_pci_device { bool reset_works; bool extended_caps; bool bardirty; + bool has_vga; struct pci_saved_state *pci_saved_state; atomic_t refcnt; }; @@ -70,15 +71,15 @@ extern int vfio_pci_set_irqs_ioctl(struct vfio_pci_device *vdev, uint32_t flags, unsigned index, unsigned start, unsigned count, void *data); -extern ssize_t vfio_pci_config_readwrite(struct vfio_pci_device *vdev, - char __user *buf, size_t count, - loff_t *ppos, bool iswrite); -extern ssize_t vfio_pci_mem_readwrite(struct vfio_pci_device *vdev, - char __user *buf, size_t count, - loff_t *ppos, bool iswrite); -extern ssize_t vfio_pci_io_readwrite(struct vfio_pci_device *vdev, - char __user *buf, size_t count, - loff_t *ppos, bool iswrite); +extern ssize_t vfio_pci_config_rw(struct vfio_pci_device *vdev, + char __user *buf, size_t count, + loff_t *ppos, bool iswrite); + +extern ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf, + size_t count, loff_t *ppos, bool iswrite); + +extern ssize_t vfio_pci_vga_rw(struct vfio_pci_device *vdev, char __user *buf, + size_t count, loff_t *ppos, bool iswrite); extern int vfio_pci_init_perm_bits(void); extern void vfio_pci_uninit_perm_bits(void); diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c b/drivers/vfio/pci/vfio_pci_rdwr.c index f72323ef618..210db24d220 100644 --- a/drivers/vfio/pci/vfio_pci_rdwr.c +++ b/drivers/vfio/pci/vfio_pci_rdwr.c @@ -17,253 +17,222 @@ #include <linux/pci.h> #include <linux/uaccess.h> #include <linux/io.h> +#include <linux/vgaarb.h> #include "vfio_pci_private.h" -/* I/O Port BAR access */ -ssize_t vfio_pci_io_readwrite(struct vfio_pci_device *vdev, char __user *buf, - size_t count, loff_t *ppos, bool iswrite) +/* + * Read or write from an __iomem region (MMIO or I/O port) with an excluded + * range which is inaccessible. The excluded range drops writes and fills + * reads with -1. This is intended for handling MSI-X vector tables and + * leftover space for ROM BARs. + */ +static ssize_t do_io_rw(void __iomem *io, char __user *buf, + loff_t off, size_t count, size_t x_start, + size_t x_end, bool iswrite) { - struct pci_dev *pdev = vdev->pdev; - loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK; - int bar = VFIO_PCI_OFFSET_TO_INDEX(*ppos); - void __iomem *io; - size_t done = 0; - - if (!pci_resource_start(pdev, bar)) - return -EINVAL; - - if (pos + count > pci_resource_len(pdev, bar)) - return -EINVAL; - - if (!vdev->barmap[bar]) { - int ret; - - ret = pci_request_selected_regions(pdev, 1 << bar, "vfio"); - if (ret) - return ret; - - vdev->barmap[bar] = pci_iomap(pdev, bar, 0); - - if (!vdev->barmap[bar]) { - pci_release_selected_regions(pdev, 1 << bar); - return -EINVAL; - } - } - - io = vdev->barmap[bar]; + ssize_t done = 0; while (count) { - int filled; + size_t fillable, filled; + + if (off < x_start) + fillable = min(count, (size_t)(x_start - off)); + else if (off >= x_end) + fillable = count; + else + fillable = 0; - if (count >= 3 && !(pos % 4)) { + if (fillable >= 4 && !(off % 4)) { __le32 val; if (iswrite) { if (copy_from_user(&val, buf, 4)) return -EFAULT; - iowrite32(le32_to_cpu(val), io + pos); + iowrite32(le32_to_cpu(val), io + off); } else { - val = cpu_to_le32(ioread32(io + pos)); + val = cpu_to_le32(ioread32(io + off)); if (copy_to_user(buf, &val, 4)) return -EFAULT; } filled = 4; - - } else if ((pos % 2) == 0 && count >= 2) { + } else if (fillable >= 2 && !(off % 2)) { __le16 val; if (iswrite) { if (copy_from_user(&val, buf, 2)) return -EFAULT; - iowrite16(le16_to_cpu(val), io + pos); + iowrite16(le16_to_cpu(val), io + off); } else { - val = cpu_to_le16(ioread16(io + pos)); + val = cpu_to_le16(ioread16(io + off)); if (copy_to_user(buf, &val, 2)) return -EFAULT; } filled = 2; - } else { + } else if (fillable) { u8 val; if (iswrite) { if (copy_from_user(&val, buf, 1)) return -EFAULT; - iowrite8(val, io + pos); + iowrite8(val, io + off); } else { - val = ioread8(io + pos); + val = ioread8(io + off); if (copy_to_user(buf, &val, 1)) return -EFAULT; } filled = 1; + } else { + /* Fill reads with -1, drop writes */ + filled = min(count, (size_t)(x_end - off)); + if (!iswrite) { + u8 val = 0xFF; + size_t i; + + for (i = 0; i < filled; i++) + if (copy_to_user(buf + i, &val, 1)) + return -EFAULT; + } } count -= filled; done += filled; + off += filled; buf += filled; - pos += filled; } - *ppos += done; - return done; } -/* - * MMIO BAR access - * We handle two excluded ranges here as well, if the user tries to read - * the ROM beyond what PCI tells us is available or the MSI-X table region, - * we return 0xFF and writes are dropped. - */ -ssize_t vfio_pci_mem_readwrite(struct vfio_pci_device *vdev, char __user *buf, - size_t count, loff_t *ppos, bool iswrite) +ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf, + size_t count, loff_t *ppos, bool iswrite) { struct pci_dev *pdev = vdev->pdev; loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK; int bar = VFIO_PCI_OFFSET_TO_INDEX(*ppos); - void __iomem *io; + size_t x_start = 0, x_end = 0; resource_size_t end; - size_t done = 0; - size_t x_start = 0, x_end = 0; /* excluded range */ + void __iomem *io; + ssize_t done; if (!pci_resource_start(pdev, bar)) return -EINVAL; end = pci_resource_len(pdev, bar); - if (pos > end) + if (pos >= end) return -EINVAL; - if (pos == end) - return 0; - - if (pos + count > end) - count = end - pos; + count = min(count, (size_t)(end - pos)); if (bar == PCI_ROM_RESOURCE) { + /* + * The ROM can fill less space than the BAR, so we start the + * excluded range at the end of the actual ROM. This makes + * filling large ROM BARs much faster. + */ io = pci_map_rom(pdev, &x_start); + if (!io) + return -ENOMEM; x_end = end; - } else { - if (!vdev->barmap[bar]) { - int ret; - - ret = pci_request_selected_regions(pdev, 1 << bar, - "vfio"); - if (ret) - return ret; + } else if (!vdev->barmap[bar]) { + int ret; - vdev->barmap[bar] = pci_iomap(pdev, bar, 0); + ret = pci_request_selected_regions(pdev, 1 << bar, "vfio"); + if (ret) + return ret; - if (!vdev->barmap[bar]) { - pci_release_selected_regions(pdev, 1 << bar); - return -EINVAL; - } + io = pci_iomap(pdev, bar, 0); + if (!io) { + pci_release_selected_regions(pdev, 1 << bar); + return -ENOMEM; } + vdev->barmap[bar] = io; + } else io = vdev->barmap[bar]; - if (bar == vdev->msix_bar) { - x_start = vdev->msix_offset; - x_end = vdev->msix_offset + vdev->msix_size; - } + if (bar == vdev->msix_bar) { + x_start = vdev->msix_offset; + x_end = vdev->msix_offset + vdev->msix_size; } - if (!io) - return -EINVAL; - - while (count) { - size_t fillable, filled; - - if (pos < x_start) - fillable = x_start - pos; - else if (pos >= x_end) - fillable = end - pos; - else - fillable = 0; - - if (fillable >= 4 && !(pos % 4) && (count >= 4)) { - __le32 val; - - if (iswrite) { - if (copy_from_user(&val, buf, 4)) - goto out; - - iowrite32(le32_to_cpu(val), io + pos); - } else { - val = cpu_to_le32(ioread32(io + pos)); + done = do_io_rw(io, buf, pos, count, x_start, x_end, iswrite); - if (copy_to_user(buf, &val, 4)) - goto out; - } + if (done >= 0) + *ppos += done; - filled = 4; - } else if (fillable >= 2 && !(pos % 2) && (count >= 2)) { - __le16 val; - - if (iswrite) { - if (copy_from_user(&val, buf, 2)) - goto out; - - iowrite16(le16_to_cpu(val), io + pos); - } else { - val = cpu_to_le16(ioread16(io + pos)); - - if (copy_to_user(buf, &val, 2)) - goto out; - } - - filled = 2; - } else if (fillable) { - u8 val; + if (bar == PCI_ROM_RESOURCE) + pci_unmap_rom(pdev, io); - if (iswrite) { - if (copy_from_user(&val, buf, 1)) - goto out; + return done; +} - iowrite8(val, io + pos); - } else { - val = ioread8(io + pos); +ssize_t vfio_pci_vga_rw(struct vfio_pci_device *vdev, char __user *buf, + size_t count, loff_t *ppos, bool iswrite) +{ + int ret; + loff_t off, pos = *ppos & VFIO_PCI_OFFSET_MASK; + void __iomem *iomem = NULL; + unsigned int rsrc; + bool is_ioport; + ssize_t done; + + if (!vdev->has_vga) + return -EINVAL; - if (copy_to_user(buf, &val, 1)) - goto out; - } + switch (pos) { + case 0xa0000 ... 0xbffff: + count = min(count, (size_t)(0xc0000 - pos)); + iomem = ioremap_nocache(0xa0000, 0xbffff - 0xa0000 + 1); + off = pos - 0xa0000; + rsrc = VGA_RSRC_LEGACY_MEM; + is_ioport = false; + break; + case 0x3b0 ... 0x3bb: + count = min(count, (size_t)(0x3bc - pos)); + iomem = ioport_map(0x3b0, 0x3bb - 0x3b0 + 1); + off = pos - 0x3b0; + rsrc = VGA_RSRC_LEGACY_IO; + is_ioport = true; + break; + case 0x3c0 ... 0x3df: + count = min(count, (size_t)(0x3e0 - pos)); + iomem = ioport_map(0x3c0, 0x3df - 0x3c0 + 1); + off = pos - 0x3c0; + rsrc = VGA_RSRC_LEGACY_IO; + is_ioport = true; + break; + default: + return -EINVAL; + } - filled = 1; - } else { - /* Drop writes, fill reads with FF */ - filled = min((size_t)(x_end - pos), count); - if (!iswrite) { - char val = 0xFF; - size_t i; + if (!iomem) + return -ENOMEM; - for (i = 0; i < filled; i++) { - if (put_user(val, buf + i)) - goto out; - } - } + ret = vga_get_interruptible(vdev->pdev, rsrc); + if (ret) { + is_ioport ? ioport_unmap(iomem) : iounmap(iomem); + return ret; + } - } + done = do_io_rw(iomem, buf, off, count, 0, 0, iswrite); - count -= filled; - done += filled; - buf += filled; - pos += filled; - } + vga_put(vdev->pdev, rsrc); - *ppos += done; + is_ioport ? ioport_unmap(iomem) : iounmap(iomem); -out: - if (bar == PCI_ROM_RESOURCE) - pci_unmap_rom(pdev, io); + if (done >= 0) + *ppos += done; - return count ? -EFAULT : done; + return done; } diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 12c264d3b05..28e2d5b2c0c 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -442,7 +442,7 @@ static struct vfio_device *vfio_group_get_device(struct vfio_group *group, * a device. It's not always practical to leave a device within a group * driverless as it could get re-bound to something unsafe. */ -static const char * const vfio_driver_whitelist[] = { "pci-stub" }; +static const char * const vfio_driver_whitelist[] = { "pci-stub", "pcieport" }; static bool vfio_whitelisted_driver(struct device_driver *drv) { @@ -642,33 +642,16 @@ int vfio_add_group_dev(struct device *dev, } EXPORT_SYMBOL_GPL(vfio_add_group_dev); -/* Test whether a struct device is present in our tracking */ -static bool vfio_dev_present(struct device *dev) +/* Given a referenced group, check if it contains the device */ +static bool vfio_dev_present(struct vfio_group *group, struct device *dev) { - struct iommu_group *iommu_group; - struct vfio_group *group; struct vfio_device *device; - iommu_group = iommu_group_get(dev); - if (!iommu_group) - return false; - - group = vfio_group_get_from_iommu(iommu_group); - if (!group) { - iommu_group_put(iommu_group); - return false; - } - device = vfio_group_get_device(group, dev); - if (!device) { - vfio_group_put(group); - iommu_group_put(iommu_group); + if (!device) return false; - } vfio_device_put(device); - vfio_group_put(group); - iommu_group_put(iommu_group); return true; } @@ -682,10 +665,18 @@ void *vfio_del_group_dev(struct device *dev) struct iommu_group *iommu_group = group->iommu_group; void *device_data = device->device_data; + /* + * The group exists so long as we have a device reference. Get + * a group reference and use it to scan for the device going away. + */ + vfio_group_get(group); + vfio_device_put(device); /* TODO send a signal to encourage this to be released */ - wait_event(vfio.release_q, !vfio_dev_present(dev)); + wait_event(vfio.release_q, !vfio_dev_present(group, dev)); + + vfio_group_put(group); iommu_group_put(iommu_group); diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 4758d1bfcf4..4f41f309911 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -303,6 +303,15 @@ enum { VFIO_PCI_BAR5_REGION_INDEX, VFIO_PCI_ROM_REGION_INDEX, VFIO_PCI_CONFIG_REGION_INDEX, + /* + * Expose VGA regions defined for PCI base class 03, subclass 00. + * This includes I/O port ranges 0x3b0 to 0x3bb and 0x3c0 to 0x3df + * as well as the MMIO range 0xa0000 to 0xbffff. Each implemented + * range is found at it's identity mapped offset from the region + * offset, for example 0x3b0 is region_info.offset + 0x3b0. Areas + * between described ranges are unimplemented. + */ + VFIO_PCI_VGA_REGION_INDEX, VFIO_PCI_NUM_REGIONS }; |