diff options
Diffstat (limited to 'hw/vfio/pci.c')
-rw-r--r-- | hw/vfio/pci.c | 273 |
1 files changed, 65 insertions, 208 deletions
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 7bfa17ce3..d091d8cf0 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -21,6 +21,7 @@ #include "qemu/osdep.h" #include <linux/vfio.h> #include <sys/ioctl.h> +#include <sys/mman.h> #include "hw/pci/msi.h" #include "hw/pci/msix.h" @@ -31,7 +32,6 @@ #include "sysemu/sysemu.h" #include "pci.h" #include "trace.h" -#include "qapi/error.h" #define MSIX_CAP_LENGTH 12 @@ -417,11 +417,11 @@ static int vfio_enable_vectors(VFIOPCIDevice *vdev, bool msix) } static void vfio_add_kvm_msi_virq(VFIOPCIDevice *vdev, VFIOMSIVector *vector, - int vector_n, bool msix) + MSIMessage *msg, bool msix) { int virq; - if ((msix && vdev->no_kvm_msix) || (!msix && vdev->no_kvm_msi)) { + if ((msix && vdev->no_kvm_msix) || (!msix && vdev->no_kvm_msi) || !msg) { return; } @@ -429,7 +429,7 @@ static void vfio_add_kvm_msi_virq(VFIOPCIDevice *vdev, VFIOMSIVector *vector, return; } - virq = kvm_irqchip_add_msi_route(kvm_state, vector_n, &vdev->pdev); + virq = kvm_irqchip_add_msi_route(kvm_state, *msg, &vdev->pdev); if (virq < 0) { event_notifier_cleanup(&vector->kvm_interrupt); return; @@ -458,7 +458,6 @@ static void vfio_update_kvm_msi_virq(VFIOMSIVector *vector, MSIMessage msg, PCIDevice *pdev) { kvm_irqchip_update_msi_route(kvm_state, vector->virq, msg, pdev); - kvm_irqchip_commit_routes(kvm_state); } static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr, @@ -496,7 +495,7 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr, vfio_update_kvm_msi_virq(vector, *msg, pdev); } } else { - vfio_add_kvm_msi_virq(vdev, vector, nr, true); + vfio_add_kvm_msi_virq(vdev, vector, msg, true); } /* @@ -640,6 +639,7 @@ retry: for (i = 0; i < vdev->nr_vectors; i++) { VFIOMSIVector *vector = &vdev->msi_vectors[i]; + MSIMessage msg = msi_get_message(&vdev->pdev, i); vector->vdev = vdev; vector->virq = -1; @@ -656,7 +656,7 @@ retry: * Attempt to enable route through KVM irqchip, * default to userspace handling if unavailable. */ - vfio_add_kvm_msi_virq(vdev, vector, i, false); + vfio_add_kvm_msi_virq(vdev, vector, &msg, false); } /* Set interrupt type prior to possible interrupts */ @@ -1171,7 +1171,6 @@ static int vfio_msi_setup(VFIOPCIDevice *vdev, int pos) uint16_t ctrl; bool msi_64bit, msi_maskbit; int ret, entries; - Error *err = NULL; if (pread(vdev->vbasedev.fd, &ctrl, sizeof(ctrl), vdev->config_offset + pos + PCI_CAP_FLAGS) != sizeof(ctrl)) { @@ -1185,13 +1184,12 @@ static int vfio_msi_setup(VFIOPCIDevice *vdev, int pos) trace_vfio_msi_setup(vdev->vbasedev.name, pos); - ret = msi_init(&vdev->pdev, pos, entries, msi_64bit, msi_maskbit, &err); + ret = msi_init(&vdev->pdev, pos, entries, msi_64bit, msi_maskbit); if (ret < 0) { if (ret == -ENOTSUP) { return 0; } - error_prepend(&err, "vfio: msi_init failed: "); - error_report_err(err); + error_report("vfio: msi_init failed"); return ret; } vdev->msi_cap_size = 0xa + (msi_maskbit ? 0xa : 0) + (msi_64bit ? 0x4 : 0); @@ -1442,6 +1440,8 @@ static void vfio_bar_setup(VFIOPCIDevice *vdev, int nr) vdev->vbasedev.name, nr); } + vfio_bar_quirk_setup(vdev, nr); + pci_register_bar(&vdev->pdev, nr, type, bar->region.mem); } @@ -1452,6 +1452,29 @@ static void vfio_bars_setup(VFIOPCIDevice *vdev) for (i = 0; i < PCI_ROM_SLOT; i++) { vfio_bar_setup(vdev, i); } + + if (vdev->vga) { + memory_region_init_io(&vdev->vga->region[QEMU_PCI_VGA_MEM].mem, + OBJECT(vdev), &vfio_vga_ops, + &vdev->vga->region[QEMU_PCI_VGA_MEM], + "vfio-vga-mmio@0xa0000", + QEMU_PCI_VGA_MEM_SIZE); + memory_region_init_io(&vdev->vga->region[QEMU_PCI_VGA_IO_LO].mem, + OBJECT(vdev), &vfio_vga_ops, + &vdev->vga->region[QEMU_PCI_VGA_IO_LO], + "vfio-vga-io@0x3b0", + QEMU_PCI_VGA_IO_LO_SIZE); + memory_region_init_io(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem, + OBJECT(vdev), &vfio_vga_ops, + &vdev->vga->region[QEMU_PCI_VGA_IO_HI], + "vfio-vga-io@0x3c0", + QEMU_PCI_VGA_IO_HI_SIZE); + + pci_register_vga(&vdev->pdev, &vdev->vga->region[QEMU_PCI_VGA_MEM].mem, + &vdev->vga->region[QEMU_PCI_VGA_IO_LO].mem, + &vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem); + vfio_vga_quirk_setup(vdev); + } } static void vfio_bars_exit(VFIOPCIDevice *vdev) @@ -1505,21 +1528,6 @@ static uint8_t vfio_std_cap_max_size(PCIDevice *pdev, uint8_t pos) return next - pos; } - -static uint16_t vfio_ext_cap_max_size(const uint8_t *config, uint16_t pos) -{ - uint16_t tmp, next = PCIE_CONFIG_SPACE_SIZE; - - for (tmp = PCI_CONFIG_SPACE_SIZE; tmp; - tmp = PCI_EXT_CAP_NEXT(pci_get_long(config + tmp))) { - if (tmp > pos && tmp < next) { - next = tmp; - } - } - - return next - pos; -} - static void vfio_set_word_bits(uint8_t *buf, uint16_t val, uint16_t mask) { pci_set_word(buf, (pci_get_word(buf) & ~mask) | val); @@ -1767,101 +1775,16 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos) return 0; } -static int vfio_add_ext_cap(VFIOPCIDevice *vdev) -{ - PCIDevice *pdev = &vdev->pdev; - uint32_t header; - uint16_t cap_id, next, size; - uint8_t cap_ver; - uint8_t *config; - - /* Only add extended caps if we have them and the guest can see them */ - if (!pci_is_express(pdev) || !pci_bus_is_express(pdev->bus) || - !pci_get_long(pdev->config + PCI_CONFIG_SPACE_SIZE)) { - return 0; - } - - /* - * pcie_add_capability always inserts the new capability at the tail - * of the chain. Therefore to end up with a chain that matches the - * physical device, we cache the config space to avoid overwriting - * the original config space when we parse the extended capabilities. - */ - config = g_memdup(pdev->config, vdev->config_size); - - /* - * Extended capabilities are chained with each pointing to the next, so we - * can drop anything other than the head of the chain simply by modifying - * the previous next pointer. For the head of the chain, we can modify the - * capability ID to something that cannot match a valid capability. ID - * 0 is reserved for this since absence of capabilities is indicated by - * 0 for the ID, version, AND next pointer. However, pcie_add_capability() - * uses ID 0 as reserved for list management and will incorrectly match and - * assert if we attempt to pre-load the head of the chain with with this - * ID. Use ID 0xFFFF temporarily since it is also seems to be reserved in - * part for identifying absence of capabilities in a root complex register - * block. If the ID still exists after adding capabilities, switch back to - * zero. We'll mark this entire first dword as emulated for this purpose. - */ - pci_set_long(pdev->config + PCI_CONFIG_SPACE_SIZE, - PCI_EXT_CAP(0xFFFF, 0, 0)); - pci_set_long(pdev->wmask + PCI_CONFIG_SPACE_SIZE, 0); - pci_set_long(vdev->emulated_config_bits + PCI_CONFIG_SPACE_SIZE, ~0); - - for (next = PCI_CONFIG_SPACE_SIZE; next; - next = PCI_EXT_CAP_NEXT(pci_get_long(config + next))) { - header = pci_get_long(config + next); - cap_id = PCI_EXT_CAP_ID(header); - cap_ver = PCI_EXT_CAP_VER(header); - - /* - * If it becomes important to configure extended capabilities to their - * actual size, use this as the default when it's something we don't - * recognize. Since QEMU doesn't actually handle many of the config - * accesses, exact size doesn't seem worthwhile. - */ - size = vfio_ext_cap_max_size(config, next); - - /* Use emulated next pointer to allow dropping extended caps */ - pci_long_test_and_set_mask(vdev->emulated_config_bits + next, - PCI_EXT_CAP_NEXT_MASK); - - switch (cap_id) { - case PCI_EXT_CAP_ID_SRIOV: /* Read-only VF BARs confuse OVMF */ - case PCI_EXT_CAP_ID_ARI: /* XXX Needs next function virtualization */ - trace_vfio_add_ext_cap_dropped(vdev->vbasedev.name, cap_id, next); - break; - default: - pcie_add_capability(pdev, cap_id, cap_ver, next, size); - } - - } - - /* Cleanup chain head ID if necessary */ - if (pci_get_word(pdev->config + PCI_CONFIG_SPACE_SIZE) == 0xFFFF) { - pci_set_word(pdev->config + PCI_CONFIG_SPACE_SIZE, 0); - } - - g_free(config); - return 0; -} - static int vfio_add_capabilities(VFIOPCIDevice *vdev) { PCIDevice *pdev = &vdev->pdev; - int ret; if (!(pdev->config[PCI_STATUS] & PCI_STATUS_CAP_LIST) || !pdev->config[PCI_CAPABILITY_LIST]) { return 0; /* Nothing to add */ } - ret = vfio_add_std_cap(vdev, pdev->config[PCI_CAPABILITY_LIST]); - if (ret) { - return ret; - } - - return vfio_add_ext_cap(vdev); + return vfio_add_std_cap(vdev, pdev->config[PCI_CAPABILITY_LIST]); } static void vfio_pci_pre_reset(VFIOPCIDevice *vdev) @@ -2138,61 +2061,42 @@ int vfio_populate_vga(VFIOPCIDevice *vdev) struct vfio_region_info *reg_info; int ret; - ret = vfio_get_region_info(vbasedev, VFIO_PCI_VGA_REGION_INDEX, ®_info); - if (ret) { - return ret; - } - - if (!(reg_info->flags & VFIO_REGION_INFO_FLAG_READ) || - !(reg_info->flags & VFIO_REGION_INFO_FLAG_WRITE) || - reg_info->size < 0xbffff + 1) { - error_report("vfio: Unexpected VGA info, flags 0x%lx, size 0x%lx", - (unsigned long)reg_info->flags, - (unsigned long)reg_info->size); - g_free(reg_info); - return -EINVAL; - } - - vdev->vga = g_new0(VFIOVGA, 1); - - vdev->vga->fd_offset = reg_info->offset; - vdev->vga->fd = vdev->vbasedev.fd; - - g_free(reg_info); + if (vbasedev->num_regions > VFIO_PCI_VGA_REGION_INDEX) { + ret = vfio_get_region_info(vbasedev, + VFIO_PCI_VGA_REGION_INDEX, ®_info); + if (ret) { + return ret; + } - vdev->vga->region[QEMU_PCI_VGA_MEM].offset = QEMU_PCI_VGA_MEM_BASE; - vdev->vga->region[QEMU_PCI_VGA_MEM].nr = QEMU_PCI_VGA_MEM; - QLIST_INIT(&vdev->vga->region[QEMU_PCI_VGA_MEM].quirks); + if (!(reg_info->flags & VFIO_REGION_INFO_FLAG_READ) || + !(reg_info->flags & VFIO_REGION_INFO_FLAG_WRITE) || + reg_info->size < 0xbffff + 1) { + error_report("vfio: Unexpected VGA info, flags 0x%lx, size 0x%lx", + (unsigned long)reg_info->flags, + (unsigned long)reg_info->size); + g_free(reg_info); + return -EINVAL; + } - memory_region_init_io(&vdev->vga->region[QEMU_PCI_VGA_MEM].mem, - OBJECT(vdev), &vfio_vga_ops, - &vdev->vga->region[QEMU_PCI_VGA_MEM], - "vfio-vga-mmio@0xa0000", - QEMU_PCI_VGA_MEM_SIZE); + vdev->vga = g_new0(VFIOVGA, 1); - vdev->vga->region[QEMU_PCI_VGA_IO_LO].offset = QEMU_PCI_VGA_IO_LO_BASE; - vdev->vga->region[QEMU_PCI_VGA_IO_LO].nr = QEMU_PCI_VGA_IO_LO; - QLIST_INIT(&vdev->vga->region[QEMU_PCI_VGA_IO_LO].quirks); + vdev->vga->fd_offset = reg_info->offset; + vdev->vga->fd = vdev->vbasedev.fd; - memory_region_init_io(&vdev->vga->region[QEMU_PCI_VGA_IO_LO].mem, - OBJECT(vdev), &vfio_vga_ops, - &vdev->vga->region[QEMU_PCI_VGA_IO_LO], - "vfio-vga-io@0x3b0", - QEMU_PCI_VGA_IO_LO_SIZE); + g_free(reg_info); - vdev->vga->region[QEMU_PCI_VGA_IO_HI].offset = QEMU_PCI_VGA_IO_HI_BASE; - vdev->vga->region[QEMU_PCI_VGA_IO_HI].nr = QEMU_PCI_VGA_IO_HI; - QLIST_INIT(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].quirks); + vdev->vga->region[QEMU_PCI_VGA_MEM].offset = QEMU_PCI_VGA_MEM_BASE; + vdev->vga->region[QEMU_PCI_VGA_MEM].nr = QEMU_PCI_VGA_MEM; + QLIST_INIT(&vdev->vga->region[QEMU_PCI_VGA_MEM].quirks); - memory_region_init_io(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem, - OBJECT(vdev), &vfio_vga_ops, - &vdev->vga->region[QEMU_PCI_VGA_IO_HI], - "vfio-vga-io@0x3c0", - QEMU_PCI_VGA_IO_HI_SIZE); + vdev->vga->region[QEMU_PCI_VGA_IO_LO].offset = QEMU_PCI_VGA_IO_LO_BASE; + vdev->vga->region[QEMU_PCI_VGA_IO_LO].nr = QEMU_PCI_VGA_IO_LO; + QLIST_INIT(&vdev->vga->region[QEMU_PCI_VGA_IO_LO].quirks); - pci_register_vga(&vdev->pdev, &vdev->vga->region[QEMU_PCI_VGA_MEM].mem, - &vdev->vga->region[QEMU_PCI_VGA_IO_LO].mem, - &vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem); + vdev->vga->region[QEMU_PCI_VGA_IO_HI].offset = QEMU_PCI_VGA_IO_HI_BASE; + vdev->vga->region[QEMU_PCI_VGA_IO_HI].nr = QEMU_PCI_VGA_IO_HI; + QLIST_INIT(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].quirks); + } return 0; } @@ -2494,7 +2398,7 @@ static int vfio_initfn(PCIDevice *pdev) ssize_t len; struct stat st; int groupid; - int i, ret; + int ret; if (!vdev->vbasedev.sysfsdev) { vdev->vbasedev.sysfsdev = @@ -2656,43 +2560,6 @@ static int vfio_initfn(PCIDevice *pdev) goto out_teardown; } - if (vdev->vga) { - vfio_vga_quirk_setup(vdev); - } - - for (i = 0; i < PCI_ROM_SLOT; i++) { - vfio_bar_quirk_setup(vdev, i); - } - - if (!vdev->igd_opregion && - vdev->features & VFIO_FEATURE_ENABLE_IGD_OPREGION) { - struct vfio_region_info *opregion; - - if (vdev->pdev.qdev.hotplugged) { - error_report("Cannot support IGD OpRegion feature on hotplugged " - "device %s", vdev->vbasedev.name); - ret = -EINVAL; - goto out_teardown; - } - - ret = vfio_get_dev_region_info(&vdev->vbasedev, - VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL, - VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, &opregion); - if (ret) { - error_report("Device %s does not support requested IGD OpRegion " - "feature", vdev->vbasedev.name); - goto out_teardown; - } - - ret = vfio_pci_igd_opregion_init(vdev, opregion); - g_free(opregion); - if (ret) { - error_report("Device %s IGD OpRegion initialization failed", - vdev->vbasedev.name); - goto out_teardown; - } - } - /* QEMU emulates all of MSI & MSIX */ if (pdev->cap_present & QEMU_PCI_CAP_MSIX) { memset(vdev->emulated_config_bits + pdev->msix_cap, 0xff, @@ -2736,13 +2603,6 @@ static void vfio_instance_finalize(Object *obj) vfio_bars_finalize(vdev); g_free(vdev->emulated_config_bits); g_free(vdev->rom); - /* - * XXX Leaking igd_opregion is not an oversight, we can't remove the - * fw_cfg entry therefore leaking this allocation seems like the safest - * option. - * - * g_free(vdev->igd_opregion); - */ vfio_put_device(vdev); vfio_put_group(group); } @@ -2817,8 +2677,6 @@ static Property vfio_pci_dev_properties[] = { VFIO_FEATURE_ENABLE_VGA_BIT, false), DEFINE_PROP_BIT("x-req", VFIOPCIDevice, features, VFIO_FEATURE_ENABLE_REQ_BIT, true), - DEFINE_PROP_BIT("x-igd-opregion", VFIOPCIDevice, features, - VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT, false), DEFINE_PROP_BOOL("x-no-mmap", VFIOPCIDevice, vbasedev.no_mmap, false), DEFINE_PROP_BOOL("x-no-kvm-intx", VFIOPCIDevice, no_kvm_intx, false), DEFINE_PROP_BOOL("x-no-kvm-msi", VFIOPCIDevice, no_kvm_msi, false), @@ -2829,7 +2687,6 @@ static Property vfio_pci_dev_properties[] = { sub_vendor_id, PCI_ANY_ID), DEFINE_PROP_UINT32("x-pci-sub-device-id", VFIOPCIDevice, sub_device_id, PCI_ANY_ID), - DEFINE_PROP_UINT32("x-igd-gms", VFIOPCIDevice, igd_gms, 0), /* * TODO - support passed fds... is this necessary? * DEFINE_PROP_STRING("vfiofd", VFIOPCIDevice, vfiofd_name), |