summaryrefslogtreecommitdiff
path: root/hw
diff options
context:
space:
mode:
authorAnthony Liguori <aliguori@us.ibm.com>2013-01-14 10:23:50 -0600
committerAnthony Liguori <aliguori@us.ibm.com>2013-01-14 10:23:50 -0600
commit8e9a8681dd6066e4f79ba85b59deedb4d3d11aa2 (patch)
treec14ddf30842e64285294a8deda7ac41218bb1ab0 /hw
parent7adef3bc5a195d483987469fc80fbbe4a25a5b9d (diff)
parentfeb9a2ab4b0260d8d680a7ffd25063dafc7ec628 (diff)
downloadqemu-8e9a8681dd6066e4f79ba85b59deedb4d3d11aa2.tar.gz
qemu-8e9a8681dd6066e4f79ba85b59deedb4d3d11aa2.tar.bz2
qemu-8e9a8681dd6066e4f79ba85b59deedb4d3d11aa2.zip
Merge remote-tracking branch 'mst/tags/for_anthony' into staging
pci,virtio This further optimizes MSIX handling in virtio-pci. Also included is pci cleanup by Paolo, and pci device assignment fix by Alex. Signed-off-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Anthony Liguori <aliguori@us.ibm.com> * mst/tags/for_anthony: pci-assign: Enable MSIX on device to match guest pci: use constants for devices under the 1B36 device ID, document them ivshmem: use symbolic constant for PCI ID, add to pci-ids.txt virtio-9p: use symbolic constant, add to pci-ids.txt reorganize pci-ids.txt docs: move pci-ids.txt to docs/specs/ vhost: backend masking support vhost: set started flag while start is in progress virtio-net: set/clear vhost_started in reverse order virtio: backend virtqueue notifier masking virtio-pci: cache msix messages kvm: add stub for update msi route msix: add api to access msix message virtio: don't waste irqfds on control vqs
Diffstat (limited to 'hw')
-rw-r--r--hw/9pfs/virtio-9p-device.c2
-rw-r--r--hw/ivshmem.c7
-rw-r--r--hw/kvm/pci-assign.c17
-rw-r--r--hw/pci/msix.c2
-rw-r--r--hw/pci/msix.h1
-rw-r--r--hw/pci/pci.h8
-rw-r--r--hw/pci_bridge_dev.c8
-rw-r--r--hw/serial-pci.c12
-rw-r--r--hw/vhost.c112
-rw-r--r--hw/vhost.h10
-rw-r--r--hw/vhost_net.c27
-rw-r--r--hw/vhost_net.h3
-rw-r--r--hw/virtio-net.c22
-rw-r--r--hw/virtio-pci.c203
-rw-r--r--hw/virtio-pci.h2
-rw-r--r--hw/virtio.h15
16 files changed, 382 insertions, 69 deletions
diff --git a/hw/9pfs/virtio-9p-device.c b/hw/9pfs/virtio-9p-device.c
index 2a7c2a3d62..6f427dfc5d 100644
--- a/hw/9pfs/virtio-9p-device.c
+++ b/hw/9pfs/virtio-9p-device.c
@@ -170,7 +170,7 @@ static void virtio_9p_class_init(ObjectClass *klass, void *data)
k->init = virtio_9p_init_pci;
k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET;
- k->device_id = 0x1009;
+ k->device_id = PCI_DEVICE_ID_VIRTIO_9P;
k->revision = VIRTIO_PCI_ABI_VERSION;
k->class_id = 0x2;
dc->props = virtio_9p_properties;
diff --git a/hw/ivshmem.c b/hw/ivshmem.c
index 3adcc98a34..afaf9b3bbf 100644
--- a/hw/ivshmem.c
+++ b/hw/ivshmem.c
@@ -29,6 +29,9 @@
#include <sys/mman.h>
#include <sys/types.h>
+#define PCI_VENDOR_ID_IVSHMEM PCI_VENDOR_ID_REDHAT_QUMRANET
+#define PCI_DEVICE_ID_IVSHMEM 0x1110
+
#define IVSHMEM_IOEVENTFD 0
#define IVSHMEM_MSI 1
@@ -800,8 +803,8 @@ static void ivshmem_class_init(ObjectClass *klass, void *data)
k->init = pci_ivshmem_init;
k->exit = pci_ivshmem_uninit;
- k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET;
- k->device_id = 0x1110;
+ k->vendor_id = PCI_VENDOR_ID_IVSHMEM;
+ k->device_id = PCI_DEVICE_ID_IVSHMEM;
k->class_id = PCI_CLASS_MEMORY_RAM;
dc->reset = ivshmem_reset;
dc->props = ivshmem_properties;
diff --git a/hw/kvm/pci-assign.c b/hw/kvm/pci-assign.c
index 8ee94287ff..896cfe8a59 100644
--- a/hw/kvm/pci-assign.c
+++ b/hw/kvm/pci-assign.c
@@ -1031,6 +1031,19 @@ static bool assigned_dev_msix_masked(MSIXTableEntry *entry)
return (entry->ctrl & cpu_to_le32(0x1)) != 0;
}
+/*
+ * When MSI-X is first enabled the vector table typically has all the
+ * vectors masked, so we can't use that as the obvious test to figure out
+ * how many vectors to initially enable. Instead we look at the data field
+ * because this is what worked for pci-assign for a long time. This makes
+ * sure the physical MSI-X state tracks the guest's view, which is important
+ * for some VF/PF and PF/fw communication channels.
+ */
+static bool assigned_dev_msix_skipped(MSIXTableEntry *entry)
+{
+ return !entry->data;
+}
+
static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
{
AssignedDevice *adev = DO_UPCAST(AssignedDevice, dev, pci_dev);
@@ -1041,7 +1054,7 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
/* Get the usable entry number for allocating */
for (i = 0; i < adev->msix_max; i++, entry++) {
- if (assigned_dev_msix_masked(entry)) {
+ if (assigned_dev_msix_skipped(entry)) {
continue;
}
entries_nr++;
@@ -1070,7 +1083,7 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
for (i = 0; i < adev->msix_max; i++, entry++) {
adev->msi_virq[i] = -1;
- if (assigned_dev_msix_masked(entry)) {
+ if (assigned_dev_msix_skipped(entry)) {
continue;
}
diff --git a/hw/pci/msix.c b/hw/pci/msix.c
index 9eee6570c2..e231a0dc4b 100644
--- a/hw/pci/msix.c
+++ b/hw/pci/msix.c
@@ -27,7 +27,7 @@
#define MSIX_ENABLE_MASK (PCI_MSIX_FLAGS_ENABLE >> 8)
#define MSIX_MASKALL_MASK (PCI_MSIX_FLAGS_MASKALL >> 8)
-static MSIMessage msix_get_message(PCIDevice *dev, unsigned vector)
+MSIMessage msix_get_message(PCIDevice *dev, unsigned vector)
{
uint8_t *table_entry = dev->msix_table + vector * PCI_MSIX_ENTRY_SIZE;
MSIMessage msg;
diff --git a/hw/pci/msix.h b/hw/pci/msix.h
index d0c4429843..e648410535 100644
--- a/hw/pci/msix.h
+++ b/hw/pci/msix.h
@@ -5,6 +5,7 @@
#include "hw/pci/pci.h"
void msix_set_message(PCIDevice *dev, int vector, MSIMessage msg);
+MSIMessage msix_get_message(PCIDevice *dev, unsigned int vector);
int msix_init(PCIDevice *dev, unsigned short nentries,
MemoryRegion *table_bar, uint8_t table_bar_nr,
unsigned table_offset, MemoryRegion *pba_bar,
diff --git a/hw/pci/pci.h b/hw/pci/pci.h
index 72927e3149..f340fe57c9 100644
--- a/hw/pci/pci.h
+++ b/hw/pci/pci.h
@@ -77,6 +77,14 @@
#define PCI_DEVICE_ID_VIRTIO_CONSOLE 0x1003
#define PCI_DEVICE_ID_VIRTIO_SCSI 0x1004
#define PCI_DEVICE_ID_VIRTIO_RNG 0x1005
+#define PCI_DEVICE_ID_VIRTIO_9P 0x1009
+
+#define PCI_VENDOR_ID_REDHAT 0x1b36
+#define PCI_DEVICE_ID_REDHAT_BRIDGE 0x0001
+#define PCI_DEVICE_ID_REDHAT_SERIAL 0x0002
+#define PCI_DEVICE_ID_REDHAT_SERIAL2 0x0003
+#define PCI_DEVICE_ID_REDHAT_SERIAL4 0x0004
+#define PCI_DEVICE_ID_REDHAT_QXL 0x0100
#define FMT_PCIBUS PRIx64
diff --git a/hw/pci_bridge_dev.c b/hw/pci_bridge_dev.c
index 1a7b2cd897..1124c53b8c 100644
--- a/hw/pci_bridge_dev.c
+++ b/hw/pci_bridge_dev.c
@@ -27,10 +27,6 @@
#include "exec/memory.h"
#include "pci/pci_bus.h"
-#define REDHAT_PCI_VENDOR_ID 0x1b36
-#define PCI_BRIDGE_DEV_VENDOR_ID REDHAT_PCI_VENDOR_ID
-#define PCI_BRIDGE_DEV_DEVICE_ID 0x1
-
struct PCIBridgeDev {
PCIBridge bridge;
MemoryRegion bar;
@@ -146,8 +142,8 @@ static void pci_bridge_dev_class_init(ObjectClass *klass, void *data)
k->init = pci_bridge_dev_initfn;
k->exit = pci_bridge_dev_exitfn;
k->config_write = pci_bridge_dev_write_config;
- k->vendor_id = PCI_BRIDGE_DEV_VENDOR_ID;
- k->device_id = PCI_BRIDGE_DEV_DEVICE_ID;
+ k->vendor_id = PCI_VENDOR_ID_REDHAT;
+ k->device_id = PCI_DEVICE_ID_REDHAT_BRIDGE;
k->class_id = PCI_CLASS_BRIDGE_PCI;
k->is_bridge = 1,
dc->desc = "Standard PCI Bridge";
diff --git a/hw/serial-pci.c b/hw/serial-pci.c
index c62cc9e375..1c31353f6d 100644
--- a/hw/serial-pci.c
+++ b/hw/serial-pci.c
@@ -185,8 +185,8 @@ static void serial_pci_class_initfn(ObjectClass *klass, void *data)
PCIDeviceClass *pc = PCI_DEVICE_CLASS(klass);
pc->init = serial_pci_init;
pc->exit = serial_pci_exit;
- pc->vendor_id = 0x1b36; /* Red Hat */
- pc->device_id = 0x0002;
+ pc->vendor_id = PCI_VENDOR_ID_REDHAT;
+ pc->device_id = PCI_DEVICE_ID_REDHAT_SERIAL;
pc->revision = 1;
pc->class_id = PCI_CLASS_COMMUNICATION_SERIAL;
dc->vmsd = &vmstate_pci_serial;
@@ -199,8 +199,8 @@ static void multi_2x_serial_pci_class_initfn(ObjectClass *klass, void *data)
PCIDeviceClass *pc = PCI_DEVICE_CLASS(klass);
pc->init = multi_serial_pci_init;
pc->exit = multi_serial_pci_exit;
- pc->vendor_id = 0x1b36; /* Red Hat */
- pc->device_id = 0x0003;
+ pc->vendor_id = PCI_VENDOR_ID_REDHAT;
+ pc->device_id = PCI_DEVICE_ID_REDHAT_SERIAL2;
pc->revision = 1;
pc->class_id = PCI_CLASS_COMMUNICATION_SERIAL;
dc->vmsd = &vmstate_pci_multi_serial;
@@ -213,8 +213,8 @@ static void multi_4x_serial_pci_class_initfn(ObjectClass *klass, void *data)
PCIDeviceClass *pc = PCI_DEVICE_CLASS(klass);
pc->init = multi_serial_pci_init;
pc->exit = multi_serial_pci_exit;
- pc->vendor_id = 0x1b36; /* Red Hat */
- pc->device_id = 0x0004;
+ pc->vendor_id = PCI_VENDOR_ID_REDHAT;
+ pc->device_id = PCI_DEVICE_ID_REDHAT_SERIAL4;
pc->revision = 1;
pc->class_id = PCI_CLASS_COMMUNICATION_SERIAL;
dc->vmsd = &vmstate_pci_multi_serial;
diff --git a/hw/vhost.c b/hw/vhost.c
index 4e1cb47418..cee8aad4a1 100644
--- a/hw/vhost.c
+++ b/hw/vhost.c
@@ -612,7 +612,7 @@ static void vhost_log_stop(MemoryListener *listener,
/* FIXME: implement */
}
-static int vhost_virtqueue_init(struct vhost_dev *dev,
+static int vhost_virtqueue_start(struct vhost_dev *dev,
struct VirtIODevice *vdev,
struct vhost_virtqueue *vq,
unsigned idx)
@@ -681,16 +681,11 @@ static int vhost_virtqueue_init(struct vhost_dev *dev,
goto fail_kick;
}
- file.fd = event_notifier_get_fd(virtio_queue_get_guest_notifier(vvq));
- r = ioctl(dev->control, VHOST_SET_VRING_CALL, &file);
- if (r) {
- r = -errno;
- goto fail_call;
- }
+ /* Clear and discard previous events if any. */
+ event_notifier_test_and_clear(&vq->masked_notifier);
return 0;
-fail_call:
fail_kick:
fail_alloc:
cpu_physical_memory_unmap(vq->ring, virtio_queue_get_ring_size(vdev, idx),
@@ -708,7 +703,7 @@ fail_alloc_desc:
return r;
}
-static void vhost_virtqueue_cleanup(struct vhost_dev *dev,
+static void vhost_virtqueue_stop(struct vhost_dev *dev,
struct VirtIODevice *vdev,
struct vhost_virtqueue *vq,
unsigned idx)
@@ -746,11 +741,39 @@ static void vhost_eventfd_del(MemoryListener *listener,
{
}
+static int vhost_virtqueue_init(struct vhost_dev *dev,
+ struct vhost_virtqueue *vq, int n)
+{
+ struct vhost_vring_file file = {
+ .index = n,
+ };
+ int r = event_notifier_init(&vq->masked_notifier, 0);
+ if (r < 0) {
+ return r;
+ }
+
+ file.fd = event_notifier_get_fd(&vq->masked_notifier);
+ r = ioctl(dev->control, VHOST_SET_VRING_CALL, &file);
+ if (r) {
+ r = -errno;
+ goto fail_call;
+ }
+ return 0;
+fail_call:
+ event_notifier_cleanup(&vq->masked_notifier);
+ return r;
+}
+
+static void vhost_virtqueue_cleanup(struct vhost_virtqueue *vq)
+{
+ event_notifier_cleanup(&vq->masked_notifier);
+}
+
int vhost_dev_init(struct vhost_dev *hdev, int devfd, const char *devpath,
bool force)
{
uint64_t features;
- int r;
+ int i, r;
if (devfd >= 0) {
hdev->control = devfd;
} else {
@@ -768,6 +791,13 @@ int vhost_dev_init(struct vhost_dev *hdev, int devfd, const char *devpath,
if (r < 0) {
goto fail;
}
+
+ for (i = 0; i < hdev->nvqs; ++i) {
+ r = vhost_virtqueue_init(hdev, hdev->vqs + i, i);
+ if (r < 0) {
+ goto fail_vq;
+ }
+ }
hdev->features = features;
hdev->memory_listener = (MemoryListener) {
@@ -795,6 +825,10 @@ int vhost_dev_init(struct vhost_dev *hdev, int devfd, const char *devpath,
memory_listener_register(&hdev->memory_listener, &address_space_memory);
hdev->force = force;
return 0;
+fail_vq:
+ while (--i >= 0) {
+ vhost_virtqueue_cleanup(hdev->vqs + i);
+ }
fail:
r = -errno;
close(hdev->control);
@@ -803,6 +837,10 @@ fail:
void vhost_dev_cleanup(struct vhost_dev *hdev)
{
+ int i;
+ for (i = 0; i < hdev->nvqs; ++i) {
+ vhost_virtqueue_cleanup(hdev->vqs + i);
+ }
memory_listener_unregister(&hdev->memory_listener);
g_free(hdev->mem);
g_free(hdev->mem_sections);
@@ -869,17 +907,53 @@ void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev)
}
}
+/* Test and clear event pending status.
+ * Should be called after unmask to avoid losing events.
+ */
+bool vhost_virtqueue_pending(struct vhost_dev *hdev, int n)
+{
+ struct vhost_virtqueue *vq = hdev->vqs + n;
+ assert(hdev->started);
+ return event_notifier_test_and_clear(&vq->masked_notifier);
+}
+
+/* Mask/unmask events from this vq. */
+void vhost_virtqueue_mask(struct vhost_dev *hdev, VirtIODevice *vdev, int n,
+ bool mask)
+{
+ struct VirtQueue *vvq = virtio_get_queue(vdev, n);
+ int r;
+
+ assert(hdev->started);
+
+ struct vhost_vring_file file = {
+ .index = n,
+ };
+ if (mask) {
+ file.fd = event_notifier_get_fd(&hdev->vqs[n].masked_notifier);
+ } else {
+ file.fd = event_notifier_get_fd(virtio_queue_get_guest_notifier(vvq));
+ }
+ r = ioctl(hdev->control, VHOST_SET_VRING_CALL, &file);
+ assert(r >= 0);
+}
+
/* Host notifiers must be enabled at this point. */
int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev)
{
int i, r;
+
+ hdev->started = true;
+
if (!vdev->binding->set_guest_notifiers) {
fprintf(stderr, "binding does not support guest notifiers\n");
r = -ENOSYS;
goto fail;
}
- r = vdev->binding->set_guest_notifiers(vdev->binding_opaque, true);
+ r = vdev->binding->set_guest_notifiers(vdev->binding_opaque,
+ hdev->nvqs,
+ true);
if (r < 0) {
fprintf(stderr, "Error binding guest notifier: %d\n", -r);
goto fail_notifiers;
@@ -895,7 +969,7 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev)
goto fail_mem;
}
for (i = 0; i < hdev->nvqs; ++i) {
- r = vhost_virtqueue_init(hdev,
+ r = vhost_virtqueue_start(hdev,
vdev,
hdev->vqs + i,
i);
@@ -916,22 +990,22 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev)
}
}
- hdev->started = true;
-
return 0;
fail_log:
fail_vq:
while (--i >= 0) {
- vhost_virtqueue_cleanup(hdev,
+ vhost_virtqueue_stop(hdev,
vdev,
hdev->vqs + i,
i);
}
fail_mem:
fail_features:
- vdev->binding->set_guest_notifiers(vdev->binding_opaque, false);
+ vdev->binding->set_guest_notifiers(vdev->binding_opaque, hdev->nvqs, false);
fail_notifiers:
fail:
+
+ hdev->started = false;
return r;
}
@@ -941,7 +1015,7 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev)
int i, r;
for (i = 0; i < hdev->nvqs; ++i) {
- vhost_virtqueue_cleanup(hdev,
+ vhost_virtqueue_stop(hdev,
vdev,
hdev->vqs + i,
i);
@@ -950,7 +1024,9 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev)
vhost_sync_dirty_bitmap(hdev, &hdev->mem_sections[i],
0, (hwaddr)~0x0ull);
}
- r = vdev->binding->set_guest_notifiers(vdev->binding_opaque, false);
+ r = vdev->binding->set_guest_notifiers(vdev->binding_opaque,
+ hdev->nvqs,
+ false);
if (r < 0) {
fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", r);
fflush(stderr);
diff --git a/hw/vhost.h b/hw/vhost.h
index 6f6a906f4f..44c61a5877 100644
--- a/hw/vhost.h
+++ b/hw/vhost.h
@@ -18,6 +18,7 @@ struct vhost_virtqueue {
void *ring;
unsigned long long ring_phys;
unsigned ring_size;
+ EventNotifier masked_notifier;
};
typedef unsigned long vhost_log_chunk_t;
@@ -53,4 +54,13 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev);
int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev);
void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev);
+/* Test and clear masked event pending status.
+ * Should be called after unmask to avoid losing events.
+ */
+bool vhost_virtqueue_pending(struct vhost_dev *hdev, int n);
+
+/* Mask/unmask events from this vq.
+ */
+void vhost_virtqueue_mask(struct vhost_dev *hdev, VirtIODevice *vdev, int n,
+ bool mask);
#endif
diff --git a/hw/vhost_net.c b/hw/vhost_net.c
index ae2785d83f..d3a04caef6 100644
--- a/hw/vhost_net.c
+++ b/hw/vhost_net.c
@@ -109,6 +109,9 @@ struct vhost_net *vhost_net_init(NetClientState *backend, int devfd,
(1 << VHOST_NET_F_VIRTIO_NET_HDR);
net->backend = r;
+ net->dev.nvqs = 2;
+ net->dev.vqs = net->vqs;
+
r = vhost_dev_init(&net->dev, devfd, "/dev/vhost-net", force);
if (r < 0) {
goto fail;
@@ -143,9 +146,6 @@ int vhost_net_start(struct vhost_net *net,
struct vhost_vring_file file = { };
int r;
- net->dev.nvqs = 2;
- net->dev.vqs = net->vqs;
-
r = vhost_dev_enable_notifiers(&net->dev, dev);
if (r < 0) {
goto fail_notifiers;
@@ -200,6 +200,17 @@ void vhost_net_cleanup(struct vhost_net *net)
vhost_dev_cleanup(&net->dev);
g_free(net);
}
+
+bool vhost_net_virtqueue_pending(VHostNetState *net, int idx)
+{
+ return vhost_virtqueue_pending(&net->dev, idx);
+}
+
+void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev,
+ int idx, bool mask)
+{
+ vhost_virtqueue_mask(&net->dev, dev, idx, mask);
+}
#else
struct vhost_net *vhost_net_init(NetClientState *backend, int devfd,
bool force)
@@ -234,4 +245,14 @@ unsigned vhost_net_get_features(struct vhost_net *net, unsigned features)
void vhost_net_ack_features(struct vhost_net *net, unsigned features)
{
}
+
+bool vhost_net_virtqueue_pending(VHostNetState *net, int idx)
+{
+ return -ENOSYS;
+}
+
+void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev,
+ int idx, bool mask)
+{
+}
#endif
diff --git a/hw/vhost_net.h b/hw/vhost_net.h
index 012aba4148..88912b85fd 100644
--- a/hw/vhost_net.h
+++ b/hw/vhost_net.h
@@ -17,4 +17,7 @@ void vhost_net_cleanup(VHostNetState *net);
unsigned vhost_net_get_features(VHostNetState *net, unsigned features);
void vhost_net_ack_features(VHostNetState *net, unsigned features);
+bool vhost_net_virtqueue_pending(VHostNetState *net, int n);
+void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev,
+ int idx, bool mask);
#endif
diff --git a/hw/virtio-net.c b/hw/virtio-net.c
index 5d03b31c1b..3bb01b1037 100644
--- a/hw/virtio-net.c
+++ b/hw/virtio-net.c
@@ -126,12 +126,12 @@ static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
if (!vhost_net_query(tap_get_vhost_net(n->nic->nc.peer), &n->vdev)) {
return;
}
+ n->vhost_started = 1;
r = vhost_net_start(tap_get_vhost_net(n->nic->nc.peer), &n->vdev);
if (r < 0) {
error_report("unable to start vhost net: %d: "
"falling back on userspace virtio", -r);
- } else {
- n->vhost_started = 1;
+ n->vhost_started = 0;
}
} else {
vhost_net_stop(tap_get_vhost_net(n->nic->nc.peer), &n->vdev);
@@ -1010,6 +1010,22 @@ static NetClientInfo net_virtio_info = {
.link_status_changed = virtio_net_set_link_status,
};
+static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
+{
+ VirtIONet *n = to_virtio_net(vdev);
+ assert(n->vhost_started);
+ return vhost_net_virtqueue_pending(tap_get_vhost_net(n->nic->nc.peer), idx);
+}
+
+static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
+ bool mask)
+{
+ VirtIONet *n = to_virtio_net(vdev);
+ assert(n->vhost_started);
+ vhost_net_virtqueue_mask(tap_get_vhost_net(n->nic->nc.peer),
+ vdev, idx, mask);
+}
+
VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
virtio_net_conf *net)
{
@@ -1026,6 +1042,8 @@ VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
n->vdev.bad_features = virtio_net_bad_features;
n->vdev.reset = virtio_net_reset;
n->vdev.set_status = virtio_net_set_status;
+ n->vdev.guest_notifier_mask = virtio_net_guest_notifier_mask;
+ n->vdev.guest_notifier_pending = virtio_net_guest_notifier_pending;
n->rx_vq = virtio_add_queue(&n->vdev, 256, virtio_net_handle_rx);
if (net->tx && strcmp(net->tx, "timer") && strcmp(net->tx, "bh")) {
diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index 08d2d1ba82..0b49739946 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -487,8 +487,6 @@ static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy,
unsigned int vector,
MSIMessage msg)
{
- VirtQueue *vq = virtio_get_queue(proxy->vdev, queue_no);
- EventNotifier *n = virtio_queue_get_guest_notifier(vq);
VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
int ret;
@@ -500,21 +498,34 @@ static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy,
irqfd->virq = ret;
}
irqfd->users++;
-
- ret = kvm_irqchip_add_irqfd_notifier(kvm_state, n, irqfd->virq);
- if (ret < 0) {
- if (--irqfd->users == 0) {
- kvm_irqchip_release_virq(kvm_state, irqfd->virq);
- }
- return ret;
- }
return 0;
}
static void kvm_virtio_pci_vq_vector_release(VirtIOPCIProxy *proxy,
- unsigned int queue_no,
unsigned int vector)
{
+ VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
+ if (--irqfd->users == 0) {
+ kvm_irqchip_release_virq(kvm_state, irqfd->virq);
+ }
+}
+
+static int kvm_virtio_pci_irqfd_use(VirtIOPCIProxy *proxy,
+ unsigned int queue_no,
+ unsigned int vector)
+{
+ VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
+ VirtQueue *vq = virtio_get_queue(proxy->vdev, queue_no);
+ EventNotifier *n = virtio_queue_get_guest_notifier(vq);
+ int ret;
+ ret = kvm_irqchip_add_irqfd_notifier(kvm_state, n, irqfd->virq);
+ return ret;
+}
+
+static void kvm_virtio_pci_irqfd_release(VirtIOPCIProxy *proxy,
+ unsigned int queue_no,
+ unsigned int vector)
+{
VirtQueue *vq = virtio_get_queue(proxy->vdev, queue_no);
EventNotifier *n = virtio_queue_get_guest_notifier(vq);
VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
@@ -522,27 +533,143 @@ static void kvm_virtio_pci_vq_vector_release(VirtIOPCIProxy *proxy,
ret = kvm_irqchip_remove_irqfd_notifier(kvm_state, n, irqfd->virq);
assert(ret == 0);
+}
- if (--irqfd->users == 0) {
- kvm_irqchip_release_virq(kvm_state, irqfd->virq);
+static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs)
+{
+ PCIDevice *dev = &proxy->pci_dev;
+ VirtIODevice *vdev = proxy->vdev;
+ unsigned int vector;
+ int ret, queue_no;
+ MSIMessage msg;
+
+ for (queue_no = 0; queue_no < nvqs; queue_no++) {
+ if (!virtio_queue_get_num(vdev, queue_no)) {
+ break;
+ }
+ vector = virtio_queue_vector(vdev, queue_no);
+ if (vector >= msix_nr_vectors_allocated(dev)) {
+ continue;
+ }
+ msg = msix_get_message(dev, vector);
+ ret = kvm_virtio_pci_vq_vector_use(proxy, queue_no, vector, msg);
+ if (ret < 0) {
+ goto undo;
+ }
+ /* If guest supports masking, set up irqfd now.
+ * Otherwise, delay until unmasked in the frontend.
+ */
+ if (proxy->vdev->guest_notifier_mask) {
+ ret = kvm_virtio_pci_irqfd_use(proxy, queue_no, vector);
+ if (ret < 0) {
+ kvm_virtio_pci_vq_vector_release(proxy, vector);
+ goto undo;
+ }
+ }
+ }
+ return 0;
+
+undo:
+ while (--queue_no >= 0) {
+ vector = virtio_queue_vector(vdev, queue_no);
+ if (vector >= msix_nr_vectors_allocated(dev)) {
+ continue;
+ }
+ if (proxy->vdev->guest_notifier_mask) {
+ kvm_virtio_pci_irqfd_release(proxy, vector, queue_no);
+ }
+ kvm_virtio_pci_vq_vector_release(proxy, vector);
+ }
+ return ret;
+}
+
+static void kvm_virtio_pci_vector_release(VirtIOPCIProxy *proxy, int nvqs)
+{
+ PCIDevice *dev = &proxy->pci_dev;
+ VirtIODevice *vdev = proxy->vdev;
+ unsigned int vector;
+ int queue_no;
+
+ for (queue_no = 0; queue_no < nvqs; queue_no++) {
+ if (!virtio_queue_get_num(vdev, queue_no)) {
+ break;
+ }
+ vector = virtio_queue_vector(vdev, queue_no);
+ if (vector >= msix_nr_vectors_allocated(dev)) {
+ continue;
+ }
+ /* If guest supports masking, clean up irqfd now.
+ * Otherwise, it was cleaned when masked in the frontend.
+ */
+ if (proxy->vdev->guest_notifier_mask) {
+ kvm_virtio_pci_irqfd_release(proxy, vector, queue_no);
+ }
+ kvm_virtio_pci_vq_vector_release(proxy, vector);
+ }
+}
+
+static int kvm_virtio_pci_vq_vector_unmask(VirtIOPCIProxy *proxy,
+ unsigned int queue_no,
+ unsigned int vector,
+ MSIMessage msg)
+{
+ VirtQueue *vq = virtio_get_queue(proxy->vdev, queue_no);
+ EventNotifier *n = virtio_queue_get_guest_notifier(vq);
+ VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
+ int ret;
+
+ if (irqfd->msg.data != msg.data || irqfd->msg.address != msg.address) {
+ ret = kvm_irqchip_update_msi_route(kvm_state, irqfd->virq, msg);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+
+ /* If guest supports masking, irqfd is already setup, unmask it.
+ * Otherwise, set it up now.
+ */
+ if (proxy->vdev->guest_notifier_mask) {
+ proxy->vdev->guest_notifier_mask(proxy->vdev, queue_no, false);
+ /* Test after unmasking to avoid losing events. */
+ if (proxy->vdev->guest_notifier_pending &&
+ proxy->vdev->guest_notifier_pending(proxy->vdev, queue_no)) {
+ event_notifier_set(n);
+ }
+ } else {
+ ret = kvm_virtio_pci_irqfd_use(proxy, queue_no, vector);
}
+ return ret;
}
-static int kvm_virtio_pci_vector_use(PCIDevice *dev, unsigned vector,
+static void kvm_virtio_pci_vq_vector_mask(VirtIOPCIProxy *proxy,
+ unsigned int queue_no,
+ unsigned int vector)
+{
+ /* If guest supports masking, keep irqfd but mask it.
+ * Otherwise, clean it up now.
+ */
+ if (proxy->vdev->guest_notifier_mask) {
+ proxy->vdev->guest_notifier_mask(proxy->vdev, queue_no, true);
+ } else {
+ kvm_virtio_pci_irqfd_release(proxy, vector, queue_no);
+ }
+}
+
+static int kvm_virtio_pci_vector_unmask(PCIDevice *dev, unsigned vector,
MSIMessage msg)
{
VirtIOPCIProxy *proxy = container_of(dev, VirtIOPCIProxy, pci_dev);
VirtIODevice *vdev = proxy->vdev;
int ret, queue_no;
- for (queue_no = 0; queue_no < VIRTIO_PCI_QUEUE_MAX; queue_no++) {
+ for (queue_no = 0; queue_no < proxy->nvqs_with_notifiers; queue_no++) {
if (!virtio_queue_get_num(vdev, queue_no)) {
break;
}
if (virtio_queue_vector(vdev, queue_no) != vector) {
continue;
}
- ret = kvm_virtio_pci_vq_vector_use(proxy, queue_no, vector, msg);
+ ret = kvm_virtio_pci_vq_vector_unmask(proxy, queue_no, vector, msg);
if (ret < 0) {
goto undo;
}
@@ -554,25 +681,25 @@ undo:
if (virtio_queue_vector(vdev, queue_no) != vector) {
continue;
}
- kvm_virtio_pci_vq_vector_release(proxy, queue_no, vector);
+ kvm_virtio_pci_vq_vector_mask(proxy, queue_no, vector);
}
return ret;
}
-static void kvm_virtio_pci_vector_release(PCIDevice *dev, unsigned vector)
+static void kvm_virtio_pci_vector_mask(PCIDevice *dev, unsigned vector)
{
VirtIOPCIProxy *proxy = container_of(dev, VirtIOPCIProxy, pci_dev);
VirtIODevice *vdev = proxy->vdev;
int queue_no;
- for (queue_no = 0; queue_no < VIRTIO_PCI_QUEUE_MAX; queue_no++) {
+ for (queue_no = 0; queue_no < proxy->nvqs_with_notifiers; queue_no++) {
if (!virtio_queue_get_num(vdev, queue_no)) {
break;
}
if (virtio_queue_vector(vdev, queue_no) != vector) {
continue;
}
- kvm_virtio_pci_vq_vector_release(proxy, queue_no, vector);
+ kvm_virtio_pci_vq_vector_mask(proxy, queue_no, vector);
}
}
@@ -587,7 +714,7 @@ static void kvm_virtio_pci_vector_poll(PCIDevice *dev,
EventNotifier *notifier;
VirtQueue *vq;
- for (queue_no = 0; queue_no < VIRTIO_PCI_QUEUE_MAX; queue_no++) {
+ for (queue_no = 0; queue_no < proxy->nvqs_with_notifiers; queue_no++) {
if (!virtio_queue_get_num(vdev, queue_no)) {
break;
}
@@ -598,7 +725,11 @@ static void kvm_virtio_pci_vector_poll(PCIDevice *dev,
}
vq = virtio_get_queue(vdev, queue_no);
notifier = virtio_queue_get_guest_notifier(vq);
- if (event_notifier_test_and_clear(notifier)) {
+ if (vdev->guest_notifier_pending) {
+ if (vdev->guest_notifier_pending(vdev, queue_no)) {
+ msix_set_pending(dev, vector);
+ }
+ } else if (event_notifier_test_and_clear(notifier)) {
msix_set_pending(dev, vector);
}
}
@@ -631,7 +762,7 @@ static bool virtio_pci_query_guest_notifiers(DeviceState *d)
return msix_enabled(&proxy->pci_dev);
}
-static int virtio_pci_set_guest_notifiers(DeviceState *d, bool assign)
+static int virtio_pci_set_guest_notifiers(DeviceState *d, int nvqs, bool assign)
{
VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
VirtIODevice *vdev = proxy->vdev;
@@ -639,14 +770,24 @@ static int virtio_pci_set_guest_notifiers(DeviceState *d, bool assign)
bool with_irqfd = msix_enabled(&proxy->pci_dev) &&
kvm_msi_via_irqfd_enabled();
+ nvqs = MIN(nvqs, VIRTIO_PCI_QUEUE_MAX);
+
+ /* When deassigning, pass a consistent nvqs value
+ * to avoid leaking notifiers.
+ */
+ assert(assign || nvqs == proxy->nvqs_with_notifiers);
+
+ proxy->nvqs_with_notifiers = nvqs;
+
/* Must unset vector notifier while guest notifier is still assigned */
if (proxy->vector_irqfd && !assign) {
msix_unset_vector_notifiers(&proxy->pci_dev);
+ kvm_virtio_pci_vector_release(proxy, nvqs);
g_free(proxy->vector_irqfd);
proxy->vector_irqfd = NULL;
}
- for (n = 0; n < VIRTIO_PCI_QUEUE_MAX; n++) {
+ for (n = 0; n < nvqs; n++) {
if (!virtio_queue_get_num(vdev, n)) {
break;
}
@@ -663,17 +804,25 @@ static int virtio_pci_set_guest_notifiers(DeviceState *d, bool assign)
proxy->vector_irqfd =
g_malloc0(sizeof(*proxy->vector_irqfd) *
msix_nr_vectors_allocated(&proxy->pci_dev));
+ r = kvm_virtio_pci_vector_use(proxy, nvqs);
+ if (r < 0) {
+ goto assign_error;
+ }
r = msix_set_vector_notifiers(&proxy->pci_dev,
- kvm_virtio_pci_vector_use,
- kvm_virtio_pci_vector_release,
+ kvm_virtio_pci_vector_unmask,
+ kvm_virtio_pci_vector_mask,
kvm_virtio_pci_vector_poll);
if (r < 0) {
- goto assign_error;
+ goto notifiers_error;
}
}
return 0;
+notifiers_error:
+ assert(assign);
+ kvm_virtio_pci_vector_release(proxy, nvqs);
+
assign_error:
/* We get here on assignment failure. Recover by undoing for VQs 0 .. n. */
assert(assign);
diff --git a/hw/virtio-pci.h b/hw/virtio-pci.h
index b58d9a2d19..9ff3139fe9 100644
--- a/hw/virtio-pci.h
+++ b/hw/virtio-pci.h
@@ -27,6 +27,7 @@
#define VIRTIO_PCI_FLAG_USE_IOEVENTFD (1 << VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT)
typedef struct {
+ MSIMessage msg;
int virq;
unsigned int users;
} VirtIOIRQFD;
@@ -51,6 +52,7 @@ typedef struct {
bool ioeventfd_disabled;
bool ioeventfd_started;
VirtIOIRQFD *vector_irqfd;
+ int nvqs_with_notifiers;
} VirtIOPCIProxy;
void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev);
diff --git a/hw/virtio.h b/hw/virtio.h
index 1dec9dce07..b9f1873fd6 100644
--- a/hw/virtio.h
+++ b/hw/virtio.h
@@ -99,7 +99,7 @@ typedef struct {
int (*load_done)(DeviceState *d, QEMUFile *f);
unsigned (*get_features)(DeviceState *d);
bool (*query_guest_notifiers)(DeviceState *d);
- int (*set_guest_notifiers)(DeviceState *d, bool assigned);
+ int (*set_guest_notifiers)(DeviceState *d, int nvqs, bool assigned);
int (*set_host_notifier)(DeviceState *d, int n, bool assigned);
void (*vmstate_change)(DeviceState *d, bool running);
} VirtIOBindings;
@@ -126,6 +126,19 @@ struct VirtIODevice
void (*set_config)(VirtIODevice *vdev, const uint8_t *config);
void (*reset)(VirtIODevice *vdev);
void (*set_status)(VirtIODevice *vdev, uint8_t val);
+ /* Test and clear event pending status.
+ * Should be called after unmask to avoid losing events.
+ * If backend does not support masking,
+ * must check in frontend instead.
+ */
+ bool (*guest_notifier_pending)(VirtIODevice *vdev, int n);
+ /* Mask/unmask events from this vq. Any events reported
+ * while masked will become pending.
+ * If backend does not support masking,
+ * must mask in frontend instead.
+ */
+ void (*guest_notifier_mask)(VirtIODevice *vdev, int n, bool mask);
+
VirtQueue *vq;
const VirtIOBindings *binding;
DeviceState *binding_opaque;