summaryrefslogtreecommitdiff
path: root/hw/vfio/pci.c
diff options
context:
space:
mode:
Diffstat (limited to 'hw/vfio/pci.c')
-rw-r--r--hw/vfio/pci.c273
1 files changed, 65 insertions, 208 deletions
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 7bfa17ce3..d091d8cf0 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -21,6 +21,7 @@
#include "qemu/osdep.h"
#include <linux/vfio.h>
#include <sys/ioctl.h>
+#include <sys/mman.h>
#include "hw/pci/msi.h"
#include "hw/pci/msix.h"
@@ -31,7 +32,6 @@
#include "sysemu/sysemu.h"
#include "pci.h"
#include "trace.h"
-#include "qapi/error.h"
#define MSIX_CAP_LENGTH 12
@@ -417,11 +417,11 @@ static int vfio_enable_vectors(VFIOPCIDevice *vdev, bool msix)
}
static void vfio_add_kvm_msi_virq(VFIOPCIDevice *vdev, VFIOMSIVector *vector,
- int vector_n, bool msix)
+ MSIMessage *msg, bool msix)
{
int virq;
- if ((msix && vdev->no_kvm_msix) || (!msix && vdev->no_kvm_msi)) {
+ if ((msix && vdev->no_kvm_msix) || (!msix && vdev->no_kvm_msi) || !msg) {
return;
}
@@ -429,7 +429,7 @@ static void vfio_add_kvm_msi_virq(VFIOPCIDevice *vdev, VFIOMSIVector *vector,
return;
}
- virq = kvm_irqchip_add_msi_route(kvm_state, vector_n, &vdev->pdev);
+ virq = kvm_irqchip_add_msi_route(kvm_state, *msg, &vdev->pdev);
if (virq < 0) {
event_notifier_cleanup(&vector->kvm_interrupt);
return;
@@ -458,7 +458,6 @@ static void vfio_update_kvm_msi_virq(VFIOMSIVector *vector, MSIMessage msg,
PCIDevice *pdev)
{
kvm_irqchip_update_msi_route(kvm_state, vector->virq, msg, pdev);
- kvm_irqchip_commit_routes(kvm_state);
}
static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
@@ -496,7 +495,7 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
vfio_update_kvm_msi_virq(vector, *msg, pdev);
}
} else {
- vfio_add_kvm_msi_virq(vdev, vector, nr, true);
+ vfio_add_kvm_msi_virq(vdev, vector, msg, true);
}
/*
@@ -640,6 +639,7 @@ retry:
for (i = 0; i < vdev->nr_vectors; i++) {
VFIOMSIVector *vector = &vdev->msi_vectors[i];
+ MSIMessage msg = msi_get_message(&vdev->pdev, i);
vector->vdev = vdev;
vector->virq = -1;
@@ -656,7 +656,7 @@ retry:
* Attempt to enable route through KVM irqchip,
* default to userspace handling if unavailable.
*/
- vfio_add_kvm_msi_virq(vdev, vector, i, false);
+ vfio_add_kvm_msi_virq(vdev, vector, &msg, false);
}
/* Set interrupt type prior to possible interrupts */
@@ -1171,7 +1171,6 @@ static int vfio_msi_setup(VFIOPCIDevice *vdev, int pos)
uint16_t ctrl;
bool msi_64bit, msi_maskbit;
int ret, entries;
- Error *err = NULL;
if (pread(vdev->vbasedev.fd, &ctrl, sizeof(ctrl),
vdev->config_offset + pos + PCI_CAP_FLAGS) != sizeof(ctrl)) {
@@ -1185,13 +1184,12 @@ static int vfio_msi_setup(VFIOPCIDevice *vdev, int pos)
trace_vfio_msi_setup(vdev->vbasedev.name, pos);
- ret = msi_init(&vdev->pdev, pos, entries, msi_64bit, msi_maskbit, &err);
+ ret = msi_init(&vdev->pdev, pos, entries, msi_64bit, msi_maskbit);
if (ret < 0) {
if (ret == -ENOTSUP) {
return 0;
}
- error_prepend(&err, "vfio: msi_init failed: ");
- error_report_err(err);
+ error_report("vfio: msi_init failed");
return ret;
}
vdev->msi_cap_size = 0xa + (msi_maskbit ? 0xa : 0) + (msi_64bit ? 0x4 : 0);
@@ -1442,6 +1440,8 @@ static void vfio_bar_setup(VFIOPCIDevice *vdev, int nr)
vdev->vbasedev.name, nr);
}
+ vfio_bar_quirk_setup(vdev, nr);
+
pci_register_bar(&vdev->pdev, nr, type, bar->region.mem);
}
@@ -1452,6 +1452,29 @@ static void vfio_bars_setup(VFIOPCIDevice *vdev)
for (i = 0; i < PCI_ROM_SLOT; i++) {
vfio_bar_setup(vdev, i);
}
+
+ if (vdev->vga) {
+ memory_region_init_io(&vdev->vga->region[QEMU_PCI_VGA_MEM].mem,
+ OBJECT(vdev), &vfio_vga_ops,
+ &vdev->vga->region[QEMU_PCI_VGA_MEM],
+ "vfio-vga-mmio@0xa0000",
+ QEMU_PCI_VGA_MEM_SIZE);
+ memory_region_init_io(&vdev->vga->region[QEMU_PCI_VGA_IO_LO].mem,
+ OBJECT(vdev), &vfio_vga_ops,
+ &vdev->vga->region[QEMU_PCI_VGA_IO_LO],
+ "vfio-vga-io@0x3b0",
+ QEMU_PCI_VGA_IO_LO_SIZE);
+ memory_region_init_io(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem,
+ OBJECT(vdev), &vfio_vga_ops,
+ &vdev->vga->region[QEMU_PCI_VGA_IO_HI],
+ "vfio-vga-io@0x3c0",
+ QEMU_PCI_VGA_IO_HI_SIZE);
+
+ pci_register_vga(&vdev->pdev, &vdev->vga->region[QEMU_PCI_VGA_MEM].mem,
+ &vdev->vga->region[QEMU_PCI_VGA_IO_LO].mem,
+ &vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem);
+ vfio_vga_quirk_setup(vdev);
+ }
}
static void vfio_bars_exit(VFIOPCIDevice *vdev)
@@ -1505,21 +1528,6 @@ static uint8_t vfio_std_cap_max_size(PCIDevice *pdev, uint8_t pos)
return next - pos;
}
-
-static uint16_t vfio_ext_cap_max_size(const uint8_t *config, uint16_t pos)
-{
- uint16_t tmp, next = PCIE_CONFIG_SPACE_SIZE;
-
- for (tmp = PCI_CONFIG_SPACE_SIZE; tmp;
- tmp = PCI_EXT_CAP_NEXT(pci_get_long(config + tmp))) {
- if (tmp > pos && tmp < next) {
- next = tmp;
- }
- }
-
- return next - pos;
-}
-
static void vfio_set_word_bits(uint8_t *buf, uint16_t val, uint16_t mask)
{
pci_set_word(buf, (pci_get_word(buf) & ~mask) | val);
@@ -1767,101 +1775,16 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos)
return 0;
}
-static int vfio_add_ext_cap(VFIOPCIDevice *vdev)
-{
- PCIDevice *pdev = &vdev->pdev;
- uint32_t header;
- uint16_t cap_id, next, size;
- uint8_t cap_ver;
- uint8_t *config;
-
- /* Only add extended caps if we have them and the guest can see them */
- if (!pci_is_express(pdev) || !pci_bus_is_express(pdev->bus) ||
- !pci_get_long(pdev->config + PCI_CONFIG_SPACE_SIZE)) {
- return 0;
- }
-
- /*
- * pcie_add_capability always inserts the new capability at the tail
- * of the chain. Therefore to end up with a chain that matches the
- * physical device, we cache the config space to avoid overwriting
- * the original config space when we parse the extended capabilities.
- */
- config = g_memdup(pdev->config, vdev->config_size);
-
- /*
- * Extended capabilities are chained with each pointing to the next, so we
- * can drop anything other than the head of the chain simply by modifying
- * the previous next pointer. For the head of the chain, we can modify the
- * capability ID to something that cannot match a valid capability. ID
- * 0 is reserved for this since absence of capabilities is indicated by
- * 0 for the ID, version, AND next pointer. However, pcie_add_capability()
- * uses ID 0 as reserved for list management and will incorrectly match and
- * assert if we attempt to pre-load the head of the chain with with this
- * ID. Use ID 0xFFFF temporarily since it is also seems to be reserved in
- * part for identifying absence of capabilities in a root complex register
- * block. If the ID still exists after adding capabilities, switch back to
- * zero. We'll mark this entire first dword as emulated for this purpose.
- */
- pci_set_long(pdev->config + PCI_CONFIG_SPACE_SIZE,
- PCI_EXT_CAP(0xFFFF, 0, 0));
- pci_set_long(pdev->wmask + PCI_CONFIG_SPACE_SIZE, 0);
- pci_set_long(vdev->emulated_config_bits + PCI_CONFIG_SPACE_SIZE, ~0);
-
- for (next = PCI_CONFIG_SPACE_SIZE; next;
- next = PCI_EXT_CAP_NEXT(pci_get_long(config + next))) {
- header = pci_get_long(config + next);
- cap_id = PCI_EXT_CAP_ID(header);
- cap_ver = PCI_EXT_CAP_VER(header);
-
- /*
- * If it becomes important to configure extended capabilities to their
- * actual size, use this as the default when it's something we don't
- * recognize. Since QEMU doesn't actually handle many of the config
- * accesses, exact size doesn't seem worthwhile.
- */
- size = vfio_ext_cap_max_size(config, next);
-
- /* Use emulated next pointer to allow dropping extended caps */
- pci_long_test_and_set_mask(vdev->emulated_config_bits + next,
- PCI_EXT_CAP_NEXT_MASK);
-
- switch (cap_id) {
- case PCI_EXT_CAP_ID_SRIOV: /* Read-only VF BARs confuse OVMF */
- case PCI_EXT_CAP_ID_ARI: /* XXX Needs next function virtualization */
- trace_vfio_add_ext_cap_dropped(vdev->vbasedev.name, cap_id, next);
- break;
- default:
- pcie_add_capability(pdev, cap_id, cap_ver, next, size);
- }
-
- }
-
- /* Cleanup chain head ID if necessary */
- if (pci_get_word(pdev->config + PCI_CONFIG_SPACE_SIZE) == 0xFFFF) {
- pci_set_word(pdev->config + PCI_CONFIG_SPACE_SIZE, 0);
- }
-
- g_free(config);
- return 0;
-}
-
static int vfio_add_capabilities(VFIOPCIDevice *vdev)
{
PCIDevice *pdev = &vdev->pdev;
- int ret;
if (!(pdev->config[PCI_STATUS] & PCI_STATUS_CAP_LIST) ||
!pdev->config[PCI_CAPABILITY_LIST]) {
return 0; /* Nothing to add */
}
- ret = vfio_add_std_cap(vdev, pdev->config[PCI_CAPABILITY_LIST]);
- if (ret) {
- return ret;
- }
-
- return vfio_add_ext_cap(vdev);
+ return vfio_add_std_cap(vdev, pdev->config[PCI_CAPABILITY_LIST]);
}
static void vfio_pci_pre_reset(VFIOPCIDevice *vdev)
@@ -2138,61 +2061,42 @@ int vfio_populate_vga(VFIOPCIDevice *vdev)
struct vfio_region_info *reg_info;
int ret;
- ret = vfio_get_region_info(vbasedev, VFIO_PCI_VGA_REGION_INDEX, &reg_info);
- if (ret) {
- return ret;
- }
-
- if (!(reg_info->flags & VFIO_REGION_INFO_FLAG_READ) ||
- !(reg_info->flags & VFIO_REGION_INFO_FLAG_WRITE) ||
- reg_info->size < 0xbffff + 1) {
- error_report("vfio: Unexpected VGA info, flags 0x%lx, size 0x%lx",
- (unsigned long)reg_info->flags,
- (unsigned long)reg_info->size);
- g_free(reg_info);
- return -EINVAL;
- }
-
- vdev->vga = g_new0(VFIOVGA, 1);
-
- vdev->vga->fd_offset = reg_info->offset;
- vdev->vga->fd = vdev->vbasedev.fd;
-
- g_free(reg_info);
+ if (vbasedev->num_regions > VFIO_PCI_VGA_REGION_INDEX) {
+ ret = vfio_get_region_info(vbasedev,
+ VFIO_PCI_VGA_REGION_INDEX, &reg_info);
+ if (ret) {
+ return ret;
+ }
- vdev->vga->region[QEMU_PCI_VGA_MEM].offset = QEMU_PCI_VGA_MEM_BASE;
- vdev->vga->region[QEMU_PCI_VGA_MEM].nr = QEMU_PCI_VGA_MEM;
- QLIST_INIT(&vdev->vga->region[QEMU_PCI_VGA_MEM].quirks);
+ if (!(reg_info->flags & VFIO_REGION_INFO_FLAG_READ) ||
+ !(reg_info->flags & VFIO_REGION_INFO_FLAG_WRITE) ||
+ reg_info->size < 0xbffff + 1) {
+ error_report("vfio: Unexpected VGA info, flags 0x%lx, size 0x%lx",
+ (unsigned long)reg_info->flags,
+ (unsigned long)reg_info->size);
+ g_free(reg_info);
+ return -EINVAL;
+ }
- memory_region_init_io(&vdev->vga->region[QEMU_PCI_VGA_MEM].mem,
- OBJECT(vdev), &vfio_vga_ops,
- &vdev->vga->region[QEMU_PCI_VGA_MEM],
- "vfio-vga-mmio@0xa0000",
- QEMU_PCI_VGA_MEM_SIZE);
+ vdev->vga = g_new0(VFIOVGA, 1);
- vdev->vga->region[QEMU_PCI_VGA_IO_LO].offset = QEMU_PCI_VGA_IO_LO_BASE;
- vdev->vga->region[QEMU_PCI_VGA_IO_LO].nr = QEMU_PCI_VGA_IO_LO;
- QLIST_INIT(&vdev->vga->region[QEMU_PCI_VGA_IO_LO].quirks);
+ vdev->vga->fd_offset = reg_info->offset;
+ vdev->vga->fd = vdev->vbasedev.fd;
- memory_region_init_io(&vdev->vga->region[QEMU_PCI_VGA_IO_LO].mem,
- OBJECT(vdev), &vfio_vga_ops,
- &vdev->vga->region[QEMU_PCI_VGA_IO_LO],
- "vfio-vga-io@0x3b0",
- QEMU_PCI_VGA_IO_LO_SIZE);
+ g_free(reg_info);
- vdev->vga->region[QEMU_PCI_VGA_IO_HI].offset = QEMU_PCI_VGA_IO_HI_BASE;
- vdev->vga->region[QEMU_PCI_VGA_IO_HI].nr = QEMU_PCI_VGA_IO_HI;
- QLIST_INIT(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].quirks);
+ vdev->vga->region[QEMU_PCI_VGA_MEM].offset = QEMU_PCI_VGA_MEM_BASE;
+ vdev->vga->region[QEMU_PCI_VGA_MEM].nr = QEMU_PCI_VGA_MEM;
+ QLIST_INIT(&vdev->vga->region[QEMU_PCI_VGA_MEM].quirks);
- memory_region_init_io(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem,
- OBJECT(vdev), &vfio_vga_ops,
- &vdev->vga->region[QEMU_PCI_VGA_IO_HI],
- "vfio-vga-io@0x3c0",
- QEMU_PCI_VGA_IO_HI_SIZE);
+ vdev->vga->region[QEMU_PCI_VGA_IO_LO].offset = QEMU_PCI_VGA_IO_LO_BASE;
+ vdev->vga->region[QEMU_PCI_VGA_IO_LO].nr = QEMU_PCI_VGA_IO_LO;
+ QLIST_INIT(&vdev->vga->region[QEMU_PCI_VGA_IO_LO].quirks);
- pci_register_vga(&vdev->pdev, &vdev->vga->region[QEMU_PCI_VGA_MEM].mem,
- &vdev->vga->region[QEMU_PCI_VGA_IO_LO].mem,
- &vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem);
+ vdev->vga->region[QEMU_PCI_VGA_IO_HI].offset = QEMU_PCI_VGA_IO_HI_BASE;
+ vdev->vga->region[QEMU_PCI_VGA_IO_HI].nr = QEMU_PCI_VGA_IO_HI;
+ QLIST_INIT(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].quirks);
+ }
return 0;
}
@@ -2494,7 +2398,7 @@ static int vfio_initfn(PCIDevice *pdev)
ssize_t len;
struct stat st;
int groupid;
- int i, ret;
+ int ret;
if (!vdev->vbasedev.sysfsdev) {
vdev->vbasedev.sysfsdev =
@@ -2656,43 +2560,6 @@ static int vfio_initfn(PCIDevice *pdev)
goto out_teardown;
}
- if (vdev->vga) {
- vfio_vga_quirk_setup(vdev);
- }
-
- for (i = 0; i < PCI_ROM_SLOT; i++) {
- vfio_bar_quirk_setup(vdev, i);
- }
-
- if (!vdev->igd_opregion &&
- vdev->features & VFIO_FEATURE_ENABLE_IGD_OPREGION) {
- struct vfio_region_info *opregion;
-
- if (vdev->pdev.qdev.hotplugged) {
- error_report("Cannot support IGD OpRegion feature on hotplugged "
- "device %s", vdev->vbasedev.name);
- ret = -EINVAL;
- goto out_teardown;
- }
-
- ret = vfio_get_dev_region_info(&vdev->vbasedev,
- VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL,
- VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, &opregion);
- if (ret) {
- error_report("Device %s does not support requested IGD OpRegion "
- "feature", vdev->vbasedev.name);
- goto out_teardown;
- }
-
- ret = vfio_pci_igd_opregion_init(vdev, opregion);
- g_free(opregion);
- if (ret) {
- error_report("Device %s IGD OpRegion initialization failed",
- vdev->vbasedev.name);
- goto out_teardown;
- }
- }
-
/* QEMU emulates all of MSI & MSIX */
if (pdev->cap_present & QEMU_PCI_CAP_MSIX) {
memset(vdev->emulated_config_bits + pdev->msix_cap, 0xff,
@@ -2736,13 +2603,6 @@ static void vfio_instance_finalize(Object *obj)
vfio_bars_finalize(vdev);
g_free(vdev->emulated_config_bits);
g_free(vdev->rom);
- /*
- * XXX Leaking igd_opregion is not an oversight, we can't remove the
- * fw_cfg entry therefore leaking this allocation seems like the safest
- * option.
- *
- * g_free(vdev->igd_opregion);
- */
vfio_put_device(vdev);
vfio_put_group(group);
}
@@ -2817,8 +2677,6 @@ static Property vfio_pci_dev_properties[] = {
VFIO_FEATURE_ENABLE_VGA_BIT, false),
DEFINE_PROP_BIT("x-req", VFIOPCIDevice, features,
VFIO_FEATURE_ENABLE_REQ_BIT, true),
- DEFINE_PROP_BIT("x-igd-opregion", VFIOPCIDevice, features,
- VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT, false),
DEFINE_PROP_BOOL("x-no-mmap", VFIOPCIDevice, vbasedev.no_mmap, false),
DEFINE_PROP_BOOL("x-no-kvm-intx", VFIOPCIDevice, no_kvm_intx, false),
DEFINE_PROP_BOOL("x-no-kvm-msi", VFIOPCIDevice, no_kvm_msi, false),
@@ -2829,7 +2687,6 @@ static Property vfio_pci_dev_properties[] = {
sub_vendor_id, PCI_ANY_ID),
DEFINE_PROP_UINT32("x-pci-sub-device-id", VFIOPCIDevice,
sub_device_id, PCI_ANY_ID),
- DEFINE_PROP_UINT32("x-igd-gms", VFIOPCIDevice, igd_gms, 0),
/*
* TODO - support passed fds... is this necessary?
* DEFINE_PROP_STRING("vfiofd", VFIOPCIDevice, vfiofd_name),