summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/Makefile.objs2
-rw-r--r--net/dump.c234
-rw-r--r--net/filter-buffer.c186
-rw-r--r--net/filter.c236
-rw-r--r--net/hub.c4
-rw-r--r--net/l2tpv3.c4
-rw-r--r--net/net.c152
-rw-r--r--net/netmap.c24
-rw-r--r--net/queue.c24
-rw-r--r--net/slirp.c4
-rw-r--r--net/socket.c4
-rw-r--r--net/tap-bsd.c38
-rw-r--r--net/tap-linux.c4
-rw-r--r--net/tap-win32.c53
-rw-r--r--net/tap.c8
-rw-r--r--net/vde.c4
-rw-r--r--net/vhost-user.c178
17 files changed, 981 insertions, 178 deletions
diff --git a/net/Makefile.objs b/net/Makefile.objs
index ec19cb31d9..5fa2f9731d 100644
--- a/net/Makefile.objs
+++ b/net/Makefile.objs
@@ -13,3 +13,5 @@ common-obj-$(CONFIG_HAIKU) += tap-haiku.o
common-obj-$(CONFIG_SLIRP) += slirp.o
common-obj-$(CONFIG_VDE) += vde.o
common-obj-$(CONFIG_NETMAP) += netmap.o
+common-obj-y += filter.o
+common-obj-y += filter-buffer.o
diff --git a/net/dump.c b/net/dump.c
index 02c8064be0..ce16a4b0e3 100644
--- a/net/dump.c
+++ b/net/dump.c
@@ -25,12 +25,13 @@
#include "clients.h"
#include "qemu-common.h"
#include "qemu/error-report.h"
+#include "qemu/iov.h"
#include "qemu/log.h"
#include "qemu/timer.h"
-#include "hub.h"
+#include "qapi/visitor.h"
+#include "net/filter.h"
typedef struct DumpState {
- NetClientState nc;
int64_t start_ts;
int fd;
int pcap_caplen;
@@ -57,27 +58,32 @@ struct pcap_sf_pkthdr {
uint32_t len;
};
-static ssize_t dump_receive(NetClientState *nc, const uint8_t *buf, size_t size)
+static ssize_t dump_receive_iov(DumpState *s, const struct iovec *iov, int cnt)
{
- DumpState *s = DO_UPCAST(DumpState, nc, nc);
struct pcap_sf_pkthdr hdr;
int64_t ts;
int caplen;
+ size_t size = iov_size(iov, cnt);
+ struct iovec dumpiov[cnt + 1];
/* Early return in case of previous error. */
if (s->fd < 0) {
return size;
}
- ts = muldiv64(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), 1000000, get_ticks_per_sec());
+ ts = qemu_clock_get_us(QEMU_CLOCK_VIRTUAL);
caplen = size > s->pcap_caplen ? s->pcap_caplen : size;
hdr.ts.tv_sec = ts / 1000000 + s->start_ts;
hdr.ts.tv_usec = ts % 1000000;
hdr.caplen = caplen;
hdr.len = size;
- if (write(s->fd, &hdr, sizeof(hdr)) != sizeof(hdr) ||
- write(s->fd, buf, caplen) != caplen) {
+
+ dumpiov[0].iov_base = &hdr;
+ dumpiov[0].iov_len = sizeof(hdr);
+ cnt = iov_copy(&dumpiov[1], cnt, iov, cnt, 0, caplen);
+
+ if (writev(s->fd, dumpiov, cnt + 1) != sizeof(hdr) + caplen) {
qemu_log("-net dump write error - stop dump\n");
close(s->fd);
s->fd = -1;
@@ -86,27 +92,16 @@ static ssize_t dump_receive(NetClientState *nc, const uint8_t *buf, size_t size)
return size;
}
-static void dump_cleanup(NetClientState *nc)
+static void dump_cleanup(DumpState *s)
{
- DumpState *s = DO_UPCAST(DumpState, nc, nc);
-
close(s->fd);
+ s->fd = -1;
}
-static NetClientInfo net_dump_info = {
- .type = NET_CLIENT_OPTIONS_KIND_DUMP,
- .size = sizeof(DumpState),
- .receive = dump_receive,
- .cleanup = dump_cleanup,
-};
-
-static int net_dump_init(NetClientState *peer, const char *device,
- const char *name, const char *filename, int len,
- Error **errp)
+static int net_dump_state_init(DumpState *s, const char *filename,
+ int len, Error **errp)
{
struct pcap_file_hdr hdr;
- NetClientState *nc;
- DumpState *s;
struct tm tm;
int fd;
@@ -130,13 +125,6 @@ static int net_dump_init(NetClientState *peer, const char *device,
return -1;
}
- nc = qemu_new_net_client(&net_dump_info, peer, device, name);
-
- snprintf(nc->info_str, sizeof(nc->info_str),
- "dump to %s (len=%d)", filename, len);
-
- s = DO_UPCAST(DumpState, nc, nc);
-
s->fd = fd;
s->pcap_caplen = len;
@@ -146,16 +134,61 @@ static int net_dump_init(NetClientState *peer, const char *device,
return 0;
}
+/* Dumping via VLAN netclient */
+
+struct DumpNetClient {
+ NetClientState nc;
+ DumpState ds;
+};
+typedef struct DumpNetClient DumpNetClient;
+
+static ssize_t dumpclient_receive(NetClientState *nc, const uint8_t *buf,
+ size_t size)
+{
+ DumpNetClient *dc = DO_UPCAST(DumpNetClient, nc, nc);
+ struct iovec iov = {
+ .iov_base = (void *)buf,
+ .iov_len = size
+ };
+
+ return dump_receive_iov(&dc->ds, &iov, 1);
+}
+
+static ssize_t dumpclient_receive_iov(NetClientState *nc,
+ const struct iovec *iov, int cnt)
+{
+ DumpNetClient *dc = DO_UPCAST(DumpNetClient, nc, nc);
+
+ return dump_receive_iov(&dc->ds, iov, cnt);
+}
+
+static void dumpclient_cleanup(NetClientState *nc)
+{
+ DumpNetClient *dc = DO_UPCAST(DumpNetClient, nc, nc);
+
+ dump_cleanup(&dc->ds);
+}
+
+static NetClientInfo net_dump_info = {
+ .type = NET_CLIENT_OPTIONS_KIND_DUMP,
+ .size = sizeof(DumpNetClient),
+ .receive = dumpclient_receive,
+ .receive_iov = dumpclient_receive_iov,
+ .cleanup = dumpclient_cleanup,
+};
+
int net_init_dump(const NetClientOptions *opts, const char *name,
NetClientState *peer, Error **errp)
{
- int len;
+ int len, rc;
const char *file;
char def_file[128];
const NetdevDumpOptions *dump;
+ NetClientState *nc;
+ DumpNetClient *dnc;
- assert(opts->kind == NET_CLIENT_OPTIONS_KIND_DUMP);
- dump = opts->dump;
+ assert(opts->type == NET_CLIENT_OPTIONS_KIND_DUMP);
+ dump = opts->u.dump;
assert(peer);
@@ -182,5 +215,140 @@ int net_init_dump(const NetClientOptions *opts, const char *name,
len = 65536;
}
- return net_dump_init(peer, "dump", name, file, len, errp);
+ nc = qemu_new_net_client(&net_dump_info, peer, "dump", name);
+ snprintf(nc->info_str, sizeof(nc->info_str),
+ "dump to %s (len=%d)", file, len);
+
+ dnc = DO_UPCAST(DumpNetClient, nc, nc);
+ rc = net_dump_state_init(&dnc->ds, file, len, errp);
+ if (rc) {
+ qemu_del_net_client(nc);
+ }
+ return rc;
}
+
+/* Dumping via filter */
+
+#define TYPE_FILTER_DUMP "filter-dump"
+
+#define FILTER_DUMP(obj) \
+ OBJECT_CHECK(NetFilterDumpState, (obj), TYPE_FILTER_DUMP)
+
+struct NetFilterDumpState {
+ NetFilterState nfs;
+ DumpState ds;
+ char *filename;
+ uint32_t maxlen;
+};
+typedef struct NetFilterDumpState NetFilterDumpState;
+
+static ssize_t filter_dump_receive_iov(NetFilterState *nf, NetClientState *sndr,
+ unsigned flags, const struct iovec *iov,
+ int iovcnt, NetPacketSent *sent_cb)
+{
+ NetFilterDumpState *nfds = FILTER_DUMP(nf);
+
+ dump_receive_iov(&nfds->ds, iov, iovcnt);
+ return 0;
+}
+
+static void filter_dump_cleanup(NetFilterState *nf)
+{
+ NetFilterDumpState *nfds = FILTER_DUMP(nf);
+
+ dump_cleanup(&nfds->ds);
+}
+
+static void filter_dump_setup(NetFilterState *nf, Error **errp)
+{
+ NetFilterDumpState *nfds = FILTER_DUMP(nf);
+
+ if (!nfds->filename) {
+ error_setg(errp, "dump filter needs 'file' property set!");
+ return;
+ }
+
+ net_dump_state_init(&nfds->ds, nfds->filename, nfds->maxlen, errp);
+}
+
+static void filter_dump_get_maxlen(Object *obj, Visitor *v, void *opaque,
+ const char *name, Error **errp)
+{
+ NetFilterDumpState *nfds = FILTER_DUMP(obj);
+ uint32_t value = nfds->maxlen;
+
+ visit_type_uint32(v, &value, name, errp);
+}
+
+static void filter_dump_set_maxlen(Object *obj, Visitor *v, void *opaque,
+ const char *name, Error **errp)
+{
+ NetFilterDumpState *nfds = FILTER_DUMP(obj);
+ Error *local_err = NULL;
+ uint32_t value;
+
+ visit_type_uint32(v, &value, name, &local_err);
+ if (local_err) {
+ goto out;
+ }
+ if (value == 0) {
+ error_setg(&local_err, "Property '%s.%s' doesn't take value '%u'",
+ object_get_typename(obj), name, value);
+ goto out;
+ }
+ nfds->maxlen = value;
+
+out:
+ error_propagate(errp, local_err);
+}
+
+static char *file_dump_get_filename(Object *obj, Error **errp)
+{
+ NetFilterDumpState *nfds = FILTER_DUMP(obj);
+
+ return g_strdup(nfds->filename);
+}
+
+static void file_dump_set_filename(Object *obj, const char *value, Error **errp)
+{
+ NetFilterDumpState *nfds = FILTER_DUMP(obj);
+
+ g_free(nfds->filename);
+ nfds->filename = g_strdup(value);
+}
+
+static void filter_dump_instance_init(Object *obj)
+{
+ NetFilterDumpState *nfds = FILTER_DUMP(obj);
+
+ nfds->maxlen = 65536;
+
+ object_property_add(obj, "maxlen", "int", filter_dump_get_maxlen,
+ filter_dump_set_maxlen, NULL, NULL, NULL);
+ object_property_add_str(obj, "file", file_dump_get_filename,
+ file_dump_set_filename, NULL);
+}
+
+static void filter_dump_class_init(ObjectClass *oc, void *data)
+{
+ NetFilterClass *nfc = NETFILTER_CLASS(oc);
+
+ nfc->setup = filter_dump_setup;
+ nfc->cleanup = filter_dump_cleanup;
+ nfc->receive_iov = filter_dump_receive_iov;
+}
+
+static const TypeInfo filter_dump_info = {
+ .name = TYPE_FILTER_DUMP,
+ .parent = TYPE_NETFILTER,
+ .class_init = filter_dump_class_init,
+ .instance_init = filter_dump_instance_init,
+ .instance_size = sizeof(NetFilterDumpState),
+};
+
+static void filter_dump_register_types(void)
+{
+ type_register_static(&filter_dump_info);
+}
+
+type_init(filter_dump_register_types);
diff --git a/net/filter-buffer.c b/net/filter-buffer.c
new file mode 100644
index 0000000000..57be149413
--- /dev/null
+++ b/net/filter-buffer.c
@@ -0,0 +1,186 @@
+/*
+ * Copyright (c) 2015 FUJITSU LIMITED
+ * Author: Yang Hongyang <yanghy@cn.fujitsu.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later. See the COPYING file in the top-level directory.
+ */
+
+#include "net/filter.h"
+#include "net/queue.h"
+#include "qemu-common.h"
+#include "qemu/timer.h"
+#include "qemu/iov.h"
+#include "qapi/qmp/qerror.h"
+#include "qapi-visit.h"
+#include "qom/object.h"
+
+#define TYPE_FILTER_BUFFER "filter-buffer"
+
+#define FILTER_BUFFER(obj) \
+ OBJECT_CHECK(FilterBufferState, (obj), TYPE_FILTER_BUFFER)
+
+typedef struct FilterBufferState {
+ NetFilterState parent_obj;
+
+ NetQueue *incoming_queue;
+ uint32_t interval;
+ QEMUTimer release_timer;
+} FilterBufferState;
+
+static void filter_buffer_flush(NetFilterState *nf)
+{
+ FilterBufferState *s = FILTER_BUFFER(nf);
+
+ if (!qemu_net_queue_flush(s->incoming_queue)) {
+ /* Unable to empty the queue, purge remaining packets */
+ qemu_net_queue_purge(s->incoming_queue, nf->netdev);
+ }
+}
+
+static void filter_buffer_release_timer(void *opaque)
+{
+ NetFilterState *nf = opaque;
+ FilterBufferState *s = FILTER_BUFFER(nf);
+
+ /*
+ * Note: filter_buffer_flush() drops packets that can't be sent
+ * TODO: We should leave them queued. But currently there's no way
+ * for the next filter or receiver to notify us that it can receive
+ * more packets.
+ */
+ filter_buffer_flush(nf);
+ /* Timer rearmed to fire again in s->interval microseconds. */
+ timer_mod(&s->release_timer,
+ qemu_clock_get_us(QEMU_CLOCK_VIRTUAL) + s->interval);
+}
+
+/* filter APIs */
+static ssize_t filter_buffer_receive_iov(NetFilterState *nf,
+ NetClientState *sender,
+ unsigned flags,
+ const struct iovec *iov,
+ int iovcnt,
+ NetPacketSent *sent_cb)
+{
+ FilterBufferState *s = FILTER_BUFFER(nf);
+
+ /*
+ * We return size when buffer a packet, the sender will take it as
+ * a already sent packet, so sent_cb should not be called later.
+ *
+ * FIXME: Even if the guest can't receive packets for some reasons,
+ * the filter can still accept packets until its internal queue is full.
+ * For example:
+ * For some reason, receiver could not receive more packets
+ * (.can_receive() returns zero). Without a filter, at most one packet
+ * will be queued in incoming queue and sender's poll will be disabled
+ * unit its sent_cb() was called. With a filter, it will keep receiving
+ * the packets without caring about the receiver. This is suboptimal.
+ * May need more thoughts (e.g keeping sent_cb).
+ */
+ qemu_net_queue_append_iov(s->incoming_queue, sender, flags,
+ iov, iovcnt, NULL);
+ return iov_size(iov, iovcnt);
+}
+
+static void filter_buffer_cleanup(NetFilterState *nf)
+{
+ FilterBufferState *s = FILTER_BUFFER(nf);
+
+ if (s->interval) {
+ timer_del(&s->release_timer);
+ }
+
+ /* flush packets */
+ if (s->incoming_queue) {
+ filter_buffer_flush(nf);
+ g_free(s->incoming_queue);
+ }
+}
+
+static void filter_buffer_setup(NetFilterState *nf, Error **errp)
+{
+ FilterBufferState *s = FILTER_BUFFER(nf);
+
+ /*
+ * We may want to accept zero interval when VM FT solutions like MC
+ * or COLO use this filter to release packets on demand.
+ */
+ if (!s->interval) {
+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "interval",
+ "a non-zero interval");
+ return;
+ }
+
+ s->incoming_queue = qemu_new_net_queue(qemu_netfilter_pass_to_next, nf);
+ if (s->interval) {
+ timer_init_us(&s->release_timer, QEMU_CLOCK_VIRTUAL,
+ filter_buffer_release_timer, nf);
+ /* Timer armed to fire in s->interval microseconds. */
+ timer_mod(&s->release_timer,
+ qemu_clock_get_us(QEMU_CLOCK_VIRTUAL) + s->interval);
+ }
+}
+
+static void filter_buffer_class_init(ObjectClass *oc, void *data)
+{
+ NetFilterClass *nfc = NETFILTER_CLASS(oc);
+
+ nfc->setup = filter_buffer_setup;
+ nfc->cleanup = filter_buffer_cleanup;
+ nfc->receive_iov = filter_buffer_receive_iov;
+}
+
+static void filter_buffer_get_interval(Object *obj, Visitor *v, void *opaque,
+ const char *name, Error **errp)
+{
+ FilterBufferState *s = FILTER_BUFFER(obj);
+ uint32_t value = s->interval;
+
+ visit_type_uint32(v, &value, name, errp);
+}
+
+static void filter_buffer_set_interval(Object *obj, Visitor *v, void *opaque,
+ const char *name, Error **errp)
+{
+ FilterBufferState *s = FILTER_BUFFER(obj);
+ Error *local_err = NULL;
+ uint32_t value;
+
+ visit_type_uint32(v, &value, name, &local_err);
+ if (local_err) {
+ goto out;
+ }
+ if (!value) {
+ error_setg(&local_err, "Property '%s.%s' requires a positive value",
+ object_get_typename(obj), name);
+ goto out;
+ }
+ s->interval = value;
+
+out:
+ error_propagate(errp, local_err);
+}
+
+static void filter_buffer_init(Object *obj)
+{
+ object_property_add(obj, "interval", "int",
+ filter_buffer_get_interval,
+ filter_buffer_set_interval, NULL, NULL, NULL);
+}
+
+static const TypeInfo filter_buffer_info = {
+ .name = TYPE_FILTER_BUFFER,
+ .parent = TYPE_NETFILTER,
+ .class_init = filter_buffer_class_init,
+ .instance_init = filter_buffer_init,
+ .instance_size = sizeof(FilterBufferState),
+};
+
+static void register_types(void)
+{
+ type_register_static(&filter_buffer_info);
+}
+
+type_init(register_types);
diff --git a/net/filter.c b/net/filter.c
new file mode 100644
index 0000000000..1365bad026
--- /dev/null
+++ b/net/filter.c
@@ -0,0 +1,236 @@
+/*
+ * Copyright (c) 2015 FUJITSU LIMITED
+ * Author: Yang Hongyang <yanghy@cn.fujitsu.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later. See the COPYING file in the top-level directory.
+ */
+
+#include "qemu-common.h"
+#include "qapi/qmp/qerror.h"
+#include "qemu/error-report.h"
+
+#include "net/filter.h"
+#include "net/net.h"
+#include "net/vhost_net.h"
+#include "qom/object_interfaces.h"
+#include "qemu/iov.h"
+#include "qapi/string-output-visitor.h"
+
+ssize_t qemu_netfilter_receive(NetFilterState *nf,
+ NetFilterDirection direction,
+ NetClientState *sender,
+ unsigned flags,
+ const struct iovec *iov,
+ int iovcnt,
+ NetPacketSent *sent_cb)
+{
+ if (nf->direction == direction ||
+ nf->direction == NET_FILTER_DIRECTION_ALL) {
+ return NETFILTER_GET_CLASS(OBJECT(nf))->receive_iov(
+ nf, sender, flags, iov, iovcnt, sent_cb);
+ }
+
+ return 0;
+}
+
+ssize_t qemu_netfilter_pass_to_next(NetClientState *sender,
+ unsigned flags,
+ const struct iovec *iov,
+ int iovcnt,
+ void *opaque)
+{
+ int ret = 0;
+ int direction;
+ NetFilterState *nf = opaque;
+ NetFilterState *next = QTAILQ_NEXT(nf, next);
+
+ if (!sender || !sender->peer) {
+ /* no receiver, or sender been deleted, no need to pass it further */
+ goto out;
+ }
+
+ if (nf->direction == NET_FILTER_DIRECTION_ALL) {
+ if (sender == nf->netdev) {
+ /* This packet is sent by netdev itself */
+ direction = NET_FILTER_DIRECTION_TX;
+ } else {
+ direction = NET_FILTER_DIRECTION_RX;
+ }
+ } else {
+ direction = nf->direction;
+ }
+
+ while (next) {
+ /*
+ * if qemu_netfilter_pass_to_next been called, means that
+ * the packet has been hold by filter and has already retured size
+ * to the sender, so sent_cb shouldn't be called later, just
+ * pass NULL to next.
+ */
+ ret = qemu_netfilter_receive(next, direction, sender, flags, iov,
+ iovcnt, NULL);
+ if (ret) {
+ return ret;
+ }
+ next = QTAILQ_NEXT(next, next);
+ }
+
+ /*
+ * We have gone through all filters, pass it to receiver.
+ * Do the valid check again incase sender or receiver been
+ * deleted while we go through filters.
+ */
+ if (sender && sender->peer) {
+ qemu_net_queue_send_iov(sender->peer->incoming_queue,
+ sender, flags, iov, iovcnt, NULL);
+ }
+
+out:
+ /* no receiver, or sender been deleted */
+ return iov_size(iov, iovcnt);
+}
+
+static char *netfilter_get_netdev_id(Object *obj, Error **errp)
+{
+ NetFilterState *nf = NETFILTER(obj);
+
+ return g_strdup(nf->netdev_id);
+}
+
+static void netfilter_set_netdev_id(Object *obj, const char *str, Error **errp)
+{
+ NetFilterState *nf = NETFILTER(obj);
+
+ nf->netdev_id = g_strdup(str);
+}
+
+static int netfilter_get_direction(Object *obj, Error **errp G_GNUC_UNUSED)
+{
+ NetFilterState *nf = NETFILTER(obj);
+ return nf->direction;
+}
+
+static void netfilter_set_direction(Object *obj, int direction, Error **errp)
+{
+ NetFilterState *nf = NETFILTER(obj);
+ nf->direction = direction;
+}
+
+static void netfilter_init(Object *obj)
+{
+ object_property_add_str(obj, "netdev",
+ netfilter_get_netdev_id, netfilter_set_netdev_id,
+ NULL);
+ object_property_add_enum(obj, "queue", "NetFilterDirection",
+ NetFilterDirection_lookup,
+ netfilter_get_direction, netfilter_set_direction,
+ NULL);
+}
+
+static void netfilter_complete(UserCreatable *uc, Error **errp)
+{
+ NetFilterState *nf = NETFILTER(uc);
+ NetClientState *ncs[MAX_QUEUE_NUM];
+ NetFilterClass *nfc = NETFILTER_GET_CLASS(uc);
+ int queues;
+ Error *local_err = NULL;
+ char *str, *info;
+ ObjectProperty *prop;
+ ObjectPropertyIterator *iter;
+ StringOutputVisitor *ov;
+
+ if (!nf->netdev_id) {
+ error_setg(errp, "Parameter 'netdev' is required");
+ return;
+ }
+
+ queues = qemu_find_net_clients_except(nf->netdev_id, ncs,
+ NET_CLIENT_OPTIONS_KIND_NIC,
+ MAX_QUEUE_NUM);
+ if (queues < 1) {
+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "netdev",
+ "a network backend id");
+ return;
+ } else if (queues > 1) {
+ error_setg(errp, "multiqueue is not supported");
+ return;
+ }
+
+ if (get_vhost_net(ncs[0])) {
+ error_setg(errp, "Vhost is not supported");
+ return;
+ }
+
+ nf->netdev = ncs[0];
+
+ if (nfc->setup) {
+ nfc->setup(nf, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+ }
+ QTAILQ_INSERT_TAIL(&nf->netdev->filters, nf, next);
+
+ /* generate info str */
+ iter = object_property_iter_init(OBJECT(nf));
+ while ((prop = object_property_iter_next(iter))) {
+ if (!strcmp(prop->name, "type")) {
+ continue;
+ }
+ ov = string_output_visitor_new(false);
+ object_property_get(OBJECT(nf), string_output_get_visitor(ov),
+ prop->name, errp);
+ str = string_output_get_string(ov);
+ string_output_visitor_cleanup(ov);
+ info = g_strdup_printf(",%s=%s", prop->name, str);
+ g_strlcat(nf->info_str, info, sizeof(nf->info_str));
+ g_free(str);
+ g_free(info);
+ }
+ object_property_iter_free(iter);
+}
+
+static void netfilter_finalize(Object *obj)
+{
+ NetFilterState *nf = NETFILTER(obj);
+ NetFilterClass *nfc = NETFILTER_GET_CLASS(obj);
+
+ if (nfc->cleanup) {
+ nfc->cleanup(nf);
+ }
+
+ if (nf->netdev && !QTAILQ_EMPTY(&nf->netdev->filters)) {
+ QTAILQ_REMOVE(&nf->netdev->filters, nf, next);
+ }
+}
+
+static void netfilter_class_init(ObjectClass *oc, void *data)
+{
+ UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
+
+ ucc->complete = netfilter_complete;
+}
+
+static const TypeInfo netfilter_info = {
+ .name = TYPE_NETFILTER,
+ .parent = TYPE_OBJECT,
+ .abstract = true,
+ .class_size = sizeof(NetFilterClass),
+ .class_init = netfilter_class_init,
+ .instance_size = sizeof(NetFilterState),
+ .instance_init = netfilter_init,
+ .instance_finalize = netfilter_finalize,
+ .interfaces = (InterfaceInfo[]) {
+ { TYPE_USER_CREATABLE },
+ { }
+ }
+};
+
+static void register_types(void)
+{
+ type_register_static(&netfilter_info);
+}
+
+type_init(register_types);
diff --git a/net/hub.c b/net/hub.c
index 3047f12766..9ae9f012cb 100644
--- a/net/hub.c
+++ b/net/hub.c
@@ -285,9 +285,9 @@ int net_init_hubport(const NetClientOptions *opts, const char *name,
{
const NetdevHubPortOptions *hubport;
- assert(opts->kind == NET_CLIENT_OPTIONS_KIND_HUBPORT);
+ assert(opts->type == NET_CLIENT_OPTIONS_KIND_HUBPORT);
assert(!peer);
- hubport = opts->hubport;
+ hubport = opts->u.hubport;
net_hub_add_port(hubport->hubid, name);
return 0;
diff --git a/net/l2tpv3.c b/net/l2tpv3.c
index 4f9bceecc9..8e68e540ec 100644
--- a/net/l2tpv3.c
+++ b/net/l2tpv3.c
@@ -545,8 +545,8 @@ int net_init_l2tpv3(const NetClientOptions *opts,
s->queue_tail = 0;
s->header_mismatch = false;
- assert(opts->kind == NET_CLIENT_OPTIONS_KIND_L2TPV3);
- l2tpv3 = opts->l2tpv3;
+ assert(opts->type == NET_CLIENT_OPTIONS_KIND_L2TPV3);
+ l2tpv3 = opts->u.l2tpv3;
if (l2tpv3->has_ipv6 && l2tpv3->ipv6) {
s->ipv6 = l2tpv3->ipv6;
diff --git a/net/net.c b/net/net.c
index d9548245f0..9f7c9e467c 100644
--- a/net/net.c
+++ b/net/net.c
@@ -44,6 +44,7 @@
#include "qapi/opts-visitor.h"
#include "qapi/dealloc-visitor.h"
#include "sysemu/sysemu.h"
+#include "net/filter.h"
/* Net bridge is currently not supported for W32. */
#if !defined(_WIN32)
@@ -285,8 +286,9 @@ static void qemu_net_client_setup(NetClientState *nc,
}
QTAILQ_INSERT_TAIL(&net_clients, nc, next);
- nc->incoming_queue = qemu_new_net_queue(nc);
+ nc->incoming_queue = qemu_new_net_queue(qemu_deliver_packet_iov, nc);
nc->destructor = destructor;
+ QTAILQ_INIT(&nc->filters);
}
NetClientState *qemu_new_net_client(NetClientInfo *info,
@@ -384,6 +386,7 @@ void qemu_del_net_client(NetClientState *nc)
{
NetClientState *ncs[MAX_QUEUE_NUM];
int queues, i;
+ NetFilterState *nf, *next;
assert(nc->info->type != NET_CLIENT_OPTIONS_KIND_NIC);
@@ -395,6 +398,10 @@ void qemu_del_net_client(NetClientState *nc)
MAX_QUEUE_NUM);
assert(queues != 0);
+ QTAILQ_FOREACH_SAFE(nf, &nc->filters, next, next) {
+ object_unparent(OBJECT(nf));
+ }
+
/* If there is a peer NIC, delete and cleanup client, but do not free. */
if (nc->peer && nc->peer->info->type == NET_CLIENT_OPTIONS_KIND_NIC) {
NICState *nic = qemu_get_nic(nc->peer);
@@ -562,34 +569,42 @@ int qemu_can_send_packet(NetClientState *sender)
return 1;
}
-ssize_t qemu_deliver_packet(NetClientState *sender,
- unsigned flags,
- const uint8_t *data,
- size_t size,
- void *opaque)
+static ssize_t filter_receive_iov(NetClientState *nc,
+ NetFilterDirection direction,
+ NetClientState *sender,
+ unsigned flags,
+ const struct iovec *iov,
+ int iovcnt,
+ NetPacketSent *sent_cb)
{
- NetClientState *nc = opaque;
- ssize_t ret;
-
- if (nc->link_down) {
- return size;
- }
+ ssize_t ret = 0;
+ NetFilterState *nf = NULL;
- if (nc->receive_disabled) {
- return 0;
+ QTAILQ_FOREACH(nf, &nc->filters, next) {
+ ret = qemu_netfilter_receive(nf, direction, sender, flags, iov,
+ iovcnt, sent_cb);
+ if (ret) {
+ return ret;
+ }
}
- if (flags & QEMU_NET_PACKET_FLAG_RAW && nc->info->receive_raw) {
- ret = nc->info->receive_raw(nc, data, size);
- } else {
- ret = nc->info->receive(nc, data, size);
- }
+ return ret;
+}
- if (ret == 0) {
- nc->receive_disabled = 1;
- }
+static ssize_t filter_receive(NetClientState *nc,
+ NetFilterDirection direction,
+ NetClientState *sender,
+ unsigned flags,
+ const uint8_t *data,
+ size_t size,
+ NetPacketSent *sent_cb)
+{
+ struct iovec iov = {
+ .iov_base = (void *)data,
+ .iov_len = size
+ };
- return ret;
+ return filter_receive_iov(nc, direction, sender, flags, &iov, 1, sent_cb);
}
void qemu_purge_queued_packets(NetClientState *nc)
@@ -633,6 +648,7 @@ static ssize_t qemu_send_packet_async_with_flags(NetClientState *sender,
NetPacketSent *sent_cb)
{
NetQueue *queue;
+ int ret;
#ifdef DEBUG_NET
printf("qemu_send_packet_async:\n");
@@ -643,6 +659,19 @@ static ssize_t qemu_send_packet_async_with_flags(NetClientState *sender,
return size;
}
+ /* Let filters handle the packet first */
+ ret = filter_receive(sender, NET_FILTER_DIRECTION_TX,
+ sender, flags, buf, size, sent_cb);
+ if (ret) {
+ return ret;
+ }
+
+ ret = filter_receive(sender->peer, NET_FILTER_DIRECTION_RX,
+ sender, flags, buf, size, sent_cb);
+ if (ret) {
+ return ret;
+ }
+
queue = sender->peer->incoming_queue;
return qemu_net_queue_send(queue, sender, flags, buf, size, sent_cb);
@@ -668,14 +697,25 @@ ssize_t qemu_send_packet_raw(NetClientState *nc, const uint8_t *buf, int size)
}
static ssize_t nc_sendv_compat(NetClientState *nc, const struct iovec *iov,
- int iovcnt)
+ int iovcnt, unsigned flags)
{
- uint8_t buffer[NET_BUFSIZE];
+ uint8_t buf[NET_BUFSIZE];
+ uint8_t *buffer;
size_t offset;
- offset = iov_to_buf(iov, iovcnt, 0, buffer, sizeof(buffer));
+ if (iovcnt == 1) {
+ buffer = iov[0].iov_base;
+ offset = iov[0].iov_len;
+ } else {
+ buffer = buf;
+ offset = iov_to_buf(iov, iovcnt, 0, buf, sizeof(buf));
+ }
- return nc->info->receive(nc, buffer, offset);
+ if (flags & QEMU_NET_PACKET_FLAG_RAW && nc->info->receive_raw) {
+ return nc->info->receive_raw(nc, buffer, offset);
+ } else {
+ return nc->info->receive(nc, buffer, offset);
+ }
}
ssize_t qemu_deliver_packet_iov(NetClientState *sender,
@@ -698,7 +738,7 @@ ssize_t qemu_deliver_packet_iov(NetClientState *sender,
if (nc->info->receive_iov) {
ret = nc->info->receive_iov(nc, iov, iovcnt);
} else {
- ret = nc_sendv_compat(nc, iov, iovcnt);
+ ret = nc_sendv_compat(nc, iov, iovcnt, flags);
}
if (ret == 0) {
@@ -713,11 +753,25 @@ ssize_t qemu_sendv_packet_async(NetClientState *sender,
NetPacketSent *sent_cb)
{
NetQueue *queue;
+ int ret;
if (sender->link_down || !sender->peer) {
return iov_size(iov, iovcnt);
}
+ /* Let filters handle the packet first */
+ ret = filter_receive_iov(sender, NET_FILTER_DIRECTION_TX, sender,
+ QEMU_NET_PACKET_FLAG_NONE, iov, iovcnt, sent_cb);
+ if (ret) {
+ return ret;
+ }
+
+ ret = filter_receive_iov(sender->peer, NET_FILTER_DIRECTION_RX, sender,
+ QEMU_NET_PACKET_FLAG_NONE, iov, iovcnt, sent_cb);
+ if (ret) {
+ return ret;
+ }
+
queue = sender->peer->incoming_queue;
return qemu_net_queue_send_iov(queue, sender,
@@ -828,8 +882,8 @@ static int net_init_nic(const NetClientOptions *opts, const char *name,
NICInfo *nd;
const NetLegacyNicOptions *nic;
- assert(opts->kind == NET_CLIENT_OPTIONS_KIND_NIC);
- nic = opts->nic;
+ assert(opts->type == NET_CLIENT_OPTIONS_KIND_NIC);
+ nic = opts->u.nic;
idx = nic_get_free_idx();
if (idx == -1 || nb_nics >= MAX_NICS) {
@@ -930,9 +984,9 @@ static int net_client_init1(const void *object, int is_netdev, Error **errp)
opts = netdev->opts;
name = netdev->id;
- if (opts->kind == NET_CLIENT_OPTIONS_KIND_DUMP ||
- opts->kind == NET_CLIENT_OPTIONS_KIND_NIC ||
- !net_client_init_fun[opts->kind]) {
+ if (opts->type == NET_CLIENT_OPTIONS_KIND_DUMP ||
+ opts->type == NET_CLIENT_OPTIONS_KIND_NIC ||
+ !net_client_init_fun[opts->type]) {
error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "type",
"a netdev backend type");
return -1;
@@ -943,16 +997,16 @@ static int net_client_init1(const void *object, int is_netdev, Error **errp)
/* missing optional values have been initialized to "all bits zero" */
name = net->has_id ? net->id : net->name;
- if (opts->kind == NET_CLIENT_OPTIONS_KIND_NONE) {
+ if (opts->type == NET_CLIENT_OPTIONS_KIND_NONE) {
return 0; /* nothing to do */
}
- if (opts->kind == NET_CLIENT_OPTIONS_KIND_HUBPORT) {
+ if (opts->type == NET_CLIENT_OPTIONS_KIND_HUBPORT) {
error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "type",
"a net type");
return -1;
}
- if (!net_client_init_fun[opts->kind]) {
+ if (!net_client_init_fun[opts->type]) {
error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "type",
"a net backend type (maybe it is not compiled "
"into this binary)");
@@ -960,17 +1014,17 @@ static int net_client_init1(const void *object, int is_netdev, Error **errp)
}
/* Do not add to a vlan if it's a nic with a netdev= parameter. */
- if (opts->kind != NET_CLIENT_OPTIONS_KIND_NIC ||
- !opts->nic->has_netdev) {
+ if (opts->type != NET_CLIENT_OPTIONS_KIND_NIC ||
+ !opts->u.nic->has_netdev) {
peer = net_hub_add_port(net->has_vlan ? net->vlan : 0, NULL);
}
}
- if (net_client_init_fun[opts->kind](opts, name, peer, errp) < 0) {
+ if (net_client_init_fun[opts->type](opts, name, peer, errp) < 0) {
/* FIXME drop when all init functions store an Error */
if (errp && !*errp) {
error_setg(errp, QERR_DEVICE_INIT_FAILED,
- NetClientOptionsKind_lookup[opts->kind]);
+ NetClientOptionsKind_lookup[opts->type]);
}
return -1;
}
@@ -1133,10 +1187,22 @@ void qmp_netdev_del(const char *id, Error **errp)
void print_net_client(Monitor *mon, NetClientState *nc)
{
+ NetFilterState *nf;
+
monitor_printf(mon, "%s: index=%d,type=%s,%s\n", nc->name,
nc->queue_index,
NetClientOptionsKind_lookup[nc->info->type],
nc->info_str);
+ if (!QTAILQ_EMPTY(&nc->filters)) {
+ monitor_printf(mon, "filters:\n");
+ }
+ QTAILQ_FOREACH(nf, &nc->filters, next) {
+ char *path = object_get_canonical_path_component(OBJECT(nf));
+ monitor_printf(mon, " - %s: type=%s%s\n", path,
+ object_get_typename(OBJECT(nf)),
+ nf->info_str);
+ g_free(path);
+ }
}
RxFilterInfoList *qmp_query_rx_filter(bool has_name, const char *name,
@@ -1162,6 +1228,12 @@ RxFilterInfoList *qmp_query_rx_filter(bool has_name, const char *name,
continue;
}
+ /* only query information on queue 0 since the info is per nic,
+ * not per queue
+ */
+ if (nc->queue_index != 0)
+ continue;
+
if (nc->info->query_rx_filter) {
info = nc->info->query_rx_filter(nc);
entry = g_malloc0(sizeof(*entry));
diff --git a/net/netmap.c b/net/netmap.c
index 508b82947d..555836829e 100644
--- a/net/netmap.c
+++ b/net/netmap.c
@@ -90,7 +90,7 @@ pkt_copy(const void *_src, void *_dst, int l)
* Open a netmap device. We assume there is only one queue
* (which is the case for the VALE bridge).
*/
-static int netmap_open(NetmapPriv *me)
+static void netmap_open(NetmapPriv *me, Error **errp)
{
int fd;
int err;
@@ -99,9 +99,8 @@ static int netmap_open(NetmapPriv *me)
me->fd = fd = open(me->fdname, O_RDWR);
if (fd < 0) {
- error_report("Unable to open netmap device '%s' (%s)",
- me->fdname, strerror(errno));
- return -1;
+ error_setg_file_open(errp, errno, me->fdname);
+ return;
}
memset(&req, 0, sizeof(req));
pstrcpy(req.nr_name, sizeof(req.nr_name), me->ifname);
@@ -109,15 +108,14 @@ static int netmap_open(NetmapPriv *me)
req.nr_version = NETMAP_API;
err = ioctl(fd, NIOCREGIF, &req);
if (err) {
- error_report("Unable to register %s: %s", me->ifname, strerror(errno));
+ error_setg_errno(errp, errno, "Unable to register %s", me->ifname);
goto error;
}
l = me->memsize = req.nr_memsize;
me->mem = mmap(0, l, PROT_WRITE | PROT_READ, MAP_SHARED, fd, 0);
if (me->mem == MAP_FAILED) {
- error_report("Unable to mmap netmap shared memory: %s",
- strerror(errno));
+ error_setg_errno(errp, errno, "Unable to mmap netmap shared memory");
me->mem = NULL;
goto error;
}
@@ -125,11 +123,11 @@ static int netmap_open(NetmapPriv *me)
me->nifp = NETMAP_IF(me->mem, req.nr_offset);
me->tx = NETMAP_TXRING(me->nifp, 0);
me->rx = NETMAP_RXRING(me->nifp, 0);
- return 0;
+
+ return;
error:
close(me->fd);
- return -1;
}
static void netmap_send(void *opaque);
@@ -438,9 +436,9 @@ static NetClientInfo net_netmap_info = {
int net_init_netmap(const NetClientOptions *opts,
const char *name, NetClientState *peer, Error **errp)
{
- /* FIXME error_setg(errp, ...) on failure */
- const NetdevNetmapOptions *netmap_opts = opts->netmap;
+ const NetdevNetmapOptions *netmap_opts = opts->u.netmap;
NetClientState *nc;
+ Error *err = NULL;
NetmapPriv me;
NetmapState *s;
@@ -448,7 +446,9 @@ int net_init_netmap(const NetClientOptions *opts,
netmap_opts->has_devname ? netmap_opts->devname : "/dev/netmap");
/* Set default name for the port if not supplied. */
pstrcpy(me.ifname, sizeof(me.ifname), netmap_opts->ifname);
- if (netmap_open(&me)) {
+ netmap_open(&me, &err);
+ if (err) {
+ error_propagate(errp, err);
return -1;
}
/* Create the object. */
diff --git a/net/queue.c b/net/queue.c
index ebbe2bb93b..de8b9d31c6 100644
--- a/net/queue.c
+++ b/net/queue.c
@@ -52,13 +52,14 @@ struct NetQueue {
void *opaque;
uint32_t nq_maxlen;
uint32_t nq_count;
+ NetQueueDeliverFunc *deliver;
QTAILQ_HEAD(packets, NetPacket) packets;
unsigned delivering : 1;
};
-NetQueue *qemu_new_net_queue(void *opaque)
+NetQueue *qemu_new_net_queue(NetQueueDeliverFunc *deliver, void *opaque)
{
NetQueue *queue;
@@ -67,6 +68,7 @@ NetQueue *qemu_new_net_queue(void *opaque)
queue->opaque = opaque;
queue->nq_maxlen = 10000;
queue->nq_count = 0;
+ queue->deliver = deliver;
QTAILQ_INIT(&queue->packets);
@@ -110,12 +112,12 @@ static void qemu_net_queue_append(NetQueue *queue,
QTAILQ_INSERT_TAIL(&queue->packets, packet, entry);
}
-static void qemu_net_queue_append_iov(NetQueue *queue,
- NetClientState *sender,
- unsigned flags,
- const struct iovec *iov,
- int iovcnt,
- NetPacketSent *sent_cb)
+void qemu_net_queue_append_iov(NetQueue *queue,
+ NetClientState *sender,
+ unsigned flags,
+ const struct iovec *iov,
+ int iovcnt,
+ NetPacketSent *sent_cb)
{
NetPacket *packet;
size_t max_len = 0;
@@ -152,9 +154,13 @@ static ssize_t qemu_net_queue_deliver(NetQueue *queue,
size_t size)
{
ssize_t ret = -1;
+ struct iovec iov = {
+ .iov_base = (void *)data,
+ .iov_len = size
+ };
queue->delivering = 1;
- ret = qemu_deliver_packet(sender, flags, data, size, queue->opaque);
+ ret = queue->deliver(sender, flags, &iov, 1, queue->opaque);
queue->delivering = 0;
return ret;
@@ -169,7 +175,7 @@ static ssize_t qemu_net_queue_deliver_iov(NetQueue *queue,
ssize_t ret = -1;
queue->delivering = 1;
- ret = qemu_deliver_packet_iov(sender, flags, iov, iovcnt, queue->opaque);
+ ret = queue->deliver(sender, flags, iov, iovcnt, queue->opaque);
queue->delivering = 0;
return ret;
diff --git a/net/slirp.c b/net/slirp.c
index 7657b38fdf..f505570adb 100644
--- a/net/slirp.c
+++ b/net/slirp.c
@@ -746,8 +746,8 @@ int net_init_slirp(const NetClientOptions *opts, const char *name,
const NetdevUserOptions *user;
const char **dnssearch;
- assert(opts->kind == NET_CLIENT_OPTIONS_KIND_USER);
- user = opts->user;
+ assert(opts->type == NET_CLIENT_OPTIONS_KIND_USER);
+ user = opts->u.user;
vnet = user->has_net ? g_strdup(user->net) :
user->has_ip ? g_strdup_printf("%s/24", user->ip) :
diff --git a/net/socket.c b/net/socket.c
index b1e3b1c8d9..e8605d4ded 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -706,8 +706,8 @@ int net_init_socket(const NetClientOptions *opts, const char *name,
Error *err = NULL;
const NetdevSocketOptions *sock;
- assert(opts->kind == NET_CLIENT_OPTIONS_KIND_SOCKET);
- sock = opts->socket;
+ assert(opts->type == NET_CLIENT_OPTIONS_KIND_SOCKET);
+ sock = opts->u.socket;
if (sock->has_fd + sock->has_listen + sock->has_connect + sock->has_mcast +
sock->has_udp != 1) {
diff --git a/net/tap-bsd.c b/net/tap-bsd.c
index 7028d9be95..0103a97bf6 100644
--- a/net/tap-bsd.c
+++ b/net/tap-bsd.c
@@ -109,8 +109,7 @@ int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
#define PATH_NET_TAP "/dev/tap"
-int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
- int vnet_hdr_required, int mq_required, Error **errp)
+static int tap_open_clone(char *ifname, int ifname_size, Error **errp)
{
int fd, s, ret;
struct ifreq ifr;
@@ -126,7 +125,8 @@ int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
ret = ioctl(fd, TAPGIFNAME, (void *)&ifr);
if (ret < 0) {
error_setg_errno(errp, errno, "could not get tap interface name");
- goto error;
+ close(fd);
+ return -1;
}
if (ifname[0] != '\0') {
@@ -135,19 +135,47 @@ int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
if (s < 0) {
error_setg_errno(errp, errno,
"could not open socket to set interface name");
- goto error;
+ close(fd);
+ return -1;
}
ifr.ifr_data = ifname;
ret = ioctl(s, SIOCSIFNAME, (void *)&ifr);
close(s);
if (ret < 0) {
error_setg(errp, "could not set tap interface name");
- goto error;
+ close(fd);
+ return -1;
}
} else {
pstrcpy(ifname, ifname_size, ifr.ifr_name);
}
+ return fd;
+}
+
+int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
+ int vnet_hdr_required, int mq_required, Error **errp)
+{
+ int fd = -1;
+
+ /* If the specified tap device already exists just use it. */
+ if (ifname[0] != '\0') {
+ char dname[100];
+ snprintf(dname, sizeof dname, "/dev/%s", ifname);
+ TFR(fd = open(dname, O_RDWR));
+ if (fd < 0 && errno != ENOENT) {
+ error_setg_errno(errp, errno, "could not open %s", dname);
+ return -1;
+ }
+ }
+
+ if (fd < 0) {
+ /* Tap device not specified or does not exist. */
+ if ((fd = tap_open_clone(ifname, ifname_size, errp)) < 0) {
+ return -1;
+ }
+ }
+
if (*vnet_hdr) {
/* BSD doesn't have IFF_VNET_HDR */
*vnet_hdr = 0;
diff --git a/net/tap-linux.c b/net/tap-linux.c
index aefa05d081..419a2455a0 100644
--- a/net/tap-linux.c
+++ b/net/tap-linux.c
@@ -303,7 +303,7 @@ int tap_fd_set_vnet_le(int fd, int is_le)
return -errno;
}
- error_report("TUNSETVNETLE ioctl() failed: %s.\n", strerror(errno));
+ error_report("TUNSETVNETLE ioctl() failed: %s.", strerror(errno));
abort();
}
@@ -320,7 +320,7 @@ int tap_fd_set_vnet_be(int fd, int is_be)
return -errno;
}
- error_report("TUNSETVNETBE ioctl() failed: %s.\n", strerror(errno));
+ error_report("TUNSETVNETBE ioctl() failed: %s.", strerror(errno));
abort();
}
diff --git a/net/tap-win32.c b/net/tap-win32.c
index ed56acf2ef..af26061489 100644
--- a/net/tap-win32.c
+++ b/net/tap-win32.c
@@ -77,7 +77,12 @@
//#define DEBUG_TAP_WIN32
-#define TUN_ASYNCHRONOUS_WRITES 1
+/* FIXME: The asynch write path appears to be broken at
+ * present. WriteFile() ignores the lpNumberOfBytesWritten parameter
+ * for overlapped writes, with the result we return zero bytes sent,
+ * and after handling a single packet, receive is disabled for this
+ * interface. */
+/* #define TUN_ASYNCHRONOUS_WRITES 1 */
#define TUN_BUFFER_SIZE 1560
#define TUN_MAX_BUFFER_COUNT 32
@@ -356,7 +361,8 @@ static int get_device_guid(
&len);
if (status != ERROR_SUCCESS || name_type != REG_SZ) {
- return -1;
+ ++i;
+ continue;
}
else {
if (is_tap_win32_dev(enum_name)) {
@@ -460,27 +466,48 @@ static int tap_win32_write(tap_win32_overlapped_t *overlapped,
BOOL result;
DWORD error;
+#ifdef TUN_ASYNCHRONOUS_WRITES
result = GetOverlappedResult( overlapped->handle, &overlapped->write_overlapped,
&write_size, FALSE);
if (!result && GetLastError() == ERROR_IO_INCOMPLETE)
WaitForSingleObject(overlapped->write_event, INFINITE);
+#endif
result = WriteFile(overlapped->handle, buffer, size,
&write_size, &overlapped->write_overlapped);
+#ifdef TUN_ASYNCHRONOUS_WRITES
+ /* FIXME: we can't sensibly set write_size here, without waiting
+ * for the IO to complete! Moreover, we can't return zero,
+ * because that will disable receive on this interface, and we
+ * also can't assume it will succeed and return the full size,
+ * because that will result in the buffer being reclaimed while
+ * the IO is in progress. */
+#error Async writes are broken. Please disable TUN_ASYNCHRONOUS_WRITES.
+#else /* !TUN_ASYNCHRONOUS_WRITES */
if (!result) {
- switch (error = GetLastError())
- {
- case ERROR_IO_PENDING:
-#ifndef TUN_ASYNCHRONOUS_WRITES
- WaitForSingleObject(overlapped->write_event, INFINITE);
-#endif
- break;
- default:
- return -1;
+ error = GetLastError();
+ if (error == ERROR_IO_PENDING) {
+ result = GetOverlappedResult(overlapped->handle,
+ &overlapped->write_overlapped,
+ &write_size, TRUE);
}
}
+#endif
+
+ if (!result) {
+#ifdef DEBUG_TAP_WIN32
+ LPTSTR msgbuf;
+ error = GetLastError();
+ FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER|FORMAT_MESSAGE_FROM_SYSTEM,
+ NULL, error, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
+ &msgbuf, 0, NULL);
+ fprintf(stderr, "Tap-Win32: Error WriteFile %d - %s\n", error, msgbuf);
+ LocalFree(msgbuf);
+#endif
+ return 0;
+ }
return write_size;
}
@@ -777,8 +804,8 @@ int net_init_tap(const NetClientOptions *opts, const char *name,
/* FIXME error_setg(errp, ...) on failure */
const NetdevTapOptions *tap;
- assert(opts->kind == NET_CLIENT_OPTIONS_KIND_TAP);
- tap = opts->tap;
+ assert(opts->type == NET_CLIENT_OPTIONS_KIND_TAP);
+ tap = opts->u.tap;
if (!tap->has_ifname) {
error_report("tap: no interface name");
diff --git a/net/tap.c b/net/tap.c
index 079d732d78..6642bc42ba 100644
--- a/net/tap.c
+++ b/net/tap.c
@@ -624,8 +624,8 @@ int net_init_bridge(const NetClientOptions *opts, const char *name,
TAPState *s;
int fd, vnet_hdr;
- assert(opts->kind == NET_CLIENT_OPTIONS_KIND_BRIDGE);
- bridge = opts->bridge;
+ assert(opts->type == NET_CLIENT_OPTIONS_KIND_BRIDGE);
+ bridge = opts->u.bridge;
helper = bridge->has_helper ? bridge->helper : DEFAULT_BRIDGE_HELPER;
br = bridge->has_br ? bridge->br : DEFAULT_BRIDGE_INTERFACE;
@@ -787,8 +787,8 @@ int net_init_tap(const NetClientOptions *opts, const char *name,
const char *vhostfdname;
char ifname[128];
- assert(opts->kind == NET_CLIENT_OPTIONS_KIND_TAP);
- tap = opts->tap;
+ assert(opts->type == NET_CLIENT_OPTIONS_KIND_TAP);
+ tap = opts->u.tap;
queues = tap->has_queues ? tap->queues : 1;
vhostfdname = tap->has_vhostfd ? tap->vhostfd : NULL;
/* QEMU vlans does not support multiqueue tap, in this case peer is set.
diff --git a/net/vde.c b/net/vde.c
index dacaa64b47..4475d929e6 100644
--- a/net/vde.c
+++ b/net/vde.c
@@ -115,8 +115,8 @@ int net_init_vde(const NetClientOptions *opts, const char *name,
/* FIXME error_setg(errp, ...) on failure */
const NetdevVdeOptions *vde;
- assert(opts->kind == NET_CLIENT_OPTIONS_KIND_VDE);
- vde = opts->vde;
+ assert(opts->type == NET_CLIENT_OPTIONS_KIND_VDE);
+ vde = opts->u.vde;
/* missing optional values have been initialized to "all bits zero" */
if (net_vde_init(peer, "vde", name, vde->sock, vde->port, vde->group,
diff --git a/net/vhost-user.c b/net/vhost-user.c
index 93dcecd664..b368a90219 100644
--- a/net/vhost-user.c
+++ b/net/vhost-user.c
@@ -14,6 +14,8 @@
#include "sysemu/char.h"
#include "qemu/config-file.h"
#include "qemu/error-report.h"
+#include "qmp-commands.h"
+#include "trace.h"
typedef struct VhostUserState {
NetClientState nc;
@@ -39,37 +41,106 @@ static int vhost_user_running(VhostUserState *s)
return (s->vhost_net) ? 1 : 0;
}
-static int vhost_user_start(VhostUserState *s)
+static void vhost_user_stop(int queues, NetClientState *ncs[])
{
- VhostNetOptions options;
+ VhostUserState *s;
+ int i;
- if (vhost_user_running(s)) {
- return 0;
+ for (i = 0; i < queues; i++) {
+ assert (ncs[i]->info->type == NET_CLIENT_OPTIONS_KIND_VHOST_USER);
+
+ s = DO_UPCAST(VhostUserState, nc, ncs[i]);
+ if (!vhost_user_running(s)) {
+ continue;
+ }
+
+ if (s->vhost_net) {
+ vhost_net_cleanup(s->vhost_net);
+ s->vhost_net = NULL;
+ }
}
+}
+
+static int vhost_user_start(int queues, NetClientState *ncs[])
+{
+ VhostNetOptions options;
+ VhostUserState *s;
+ int max_queues;
+ int i;
options.backend_type = VHOST_BACKEND_TYPE_USER;
- options.net_backend = &s->nc;
- options.opaque = s->chr;
- s->vhost_net = vhost_net_init(&options);
+ for (i = 0; i < queues; i++) {
+ assert (ncs[i]->info->type == NET_CLIENT_OPTIONS_KIND_VHOST_USER);
+
+ s = DO_UPCAST(VhostUserState, nc, ncs[i]);
+ if (vhost_user_running(s)) {
+ continue;
+ }
+
+ options.net_backend = ncs[i];
+ options.opaque = s->chr;
+ s->vhost_net = vhost_net_init(&options);
+ if (!s->vhost_net) {
+ error_report("failed to init vhost_net for queue %d\n", i);
+ goto err;
+ }
+
+ if (i == 0) {
+ max_queues = vhost_net_get_max_queues(s->vhost_net);
+ if (queues > max_queues) {
+ error_report("you are asking more queues than "
+ "supported: %d\n", max_queues);
+ goto err;
+ }
+ }
+ }
- return vhost_user_running(s) ? 0 : -1;
+ return 0;
+
+err:
+ vhost_user_stop(i + 1, ncs);
+ return -1;
}
-static void vhost_user_stop(VhostUserState *s)
+static ssize_t vhost_user_receive(NetClientState *nc, const uint8_t *buf,
+ size_t size)
{
- if (vhost_user_running(s)) {
- vhost_net_cleanup(s->vhost_net);
+ /* In case of RARP (message size is 60) notify backup to send a fake RARP.
+ This fake RARP will be sent by backend only for guest
+ without GUEST_ANNOUNCE capability.
+ */
+ if (size == 60) {
+ VhostUserState *s = DO_UPCAST(VhostUserState, nc, nc);
+ int r;
+ static int display_rarp_failure = 1;
+ char mac_addr[6];
+
+ /* extract guest mac address from the RARP message */
+ memcpy(mac_addr, &buf[6], 6);
+
+ r = vhost_net_notify_migration_done(s->vhost_net, mac_addr);
+
+ if ((r != 0) && (display_rarp_failure)) {
+ fprintf(stderr,
+ "Vhost user backend fails to broadcast fake RARP\n");
+ fflush(stderr);
+ display_rarp_failure = 0;
+ }
}
- s->vhost_net = 0;
+ return size;
}
static void vhost_user_cleanup(NetClientState *nc)
{
VhostUserState *s = DO_UPCAST(VhostUserState, nc, nc);
- vhost_user_stop(s);
+ if (s->vhost_net) {
+ vhost_net_cleanup(s->vhost_net);
+ s->vhost_net = NULL;
+ }
+
qemu_purge_queued_packets(nc);
}
@@ -90,64 +161,64 @@ static bool vhost_user_has_ufo(NetClientState *nc)
static NetClientInfo net_vhost_user_info = {
.type = NET_CLIENT_OPTIONS_KIND_VHOST_USER,
.size = sizeof(VhostUserState),
+ .receive = vhost_user_receive,
.cleanup = vhost_user_cleanup,
.has_vnet_hdr = vhost_user_has_vnet_hdr,
.has_ufo = vhost_user_has_ufo,
};
-static void net_vhost_link_down(VhostUserState *s, bool link_down)
-{
- s->nc.link_down = link_down;
-
- if (s->nc.peer) {
- s->nc.peer->link_down = link_down;
- }
-
- if (s->nc.info->link_status_changed) {
- s->nc.info->link_status_changed(&s->nc);
- }
-
- if (s->nc.peer && s->nc.peer->info->link_status_changed) {
- s->nc.peer->info->link_status_changed(s->nc.peer);
- }
-}
-
static void net_vhost_user_event(void *opaque, int event)
{
- VhostUserState *s = opaque;
-
+ const char *name = opaque;
+ NetClientState *ncs[MAX_QUEUE_NUM];
+ VhostUserState *s;
+ Error *err = NULL;
+ int queues;
+
+ queues = qemu_find_net_clients_except(name, ncs,
+ NET_CLIENT_OPTIONS_KIND_NIC,
+ MAX_QUEUE_NUM);
+ s = DO_UPCAST(VhostUserState, nc, ncs[0]);
+ trace_vhost_user_event(s->chr->label, event);
switch (event) {
case CHR_EVENT_OPENED:
- vhost_user_start(s);
- net_vhost_link_down(s, false);
- error_report("chardev \"%s\" went up", s->chr->label);
+ if (vhost_user_start(queues, ncs) < 0) {
+ exit(1);
+ }
+ qmp_set_link(name, true, &err);
break;
case CHR_EVENT_CLOSED:
- net_vhost_link_down(s, true);
- vhost_user_stop(s);
- error_report("chardev \"%s\" went down", s->chr->label);
+ qmp_set_link(name, false, &err);
+ vhost_user_stop(queues, ncs);
break;
}
+
+ if (err) {
+ error_report_err(err);
+ }
}
static int net_vhost_user_init(NetClientState *peer, const char *device,
- const char *name, CharDriverState *chr)
+ const char *name, CharDriverState *chr,
+ int queues)
{
NetClientState *nc;
VhostUserState *s;
+ int i;
- nc = qemu_new_net_client(&net_vhost_user_info, peer, device, name);
+ for (i = 0; i < queues; i++) {
+ nc = qemu_new_net_client(&net_vhost_user_info, peer, device, name);
- snprintf(nc->info_str, sizeof(nc->info_str), "vhost-user to %s",
- chr->label);
+ snprintf(nc->info_str, sizeof(nc->info_str), "vhost-user%d to %s",
+ i, chr->label);
- s = DO_UPCAST(VhostUserState, nc, nc);
+ nc->queue_index = i;
- /* We don't provide a receive callback */
- s->nc.receive_disabled = 1;
- s->chr = chr;
+ s = DO_UPCAST(VhostUserState, nc, nc);
+ s->chr = chr;
+ }
- qemu_chr_add_handlers(s->chr, NULL, NULL, net_vhost_user_event, s);
+ qemu_chr_add_handlers(chr, NULL, NULL, net_vhost_user_event, (void*)name);
return 0;
}
@@ -226,11 +297,12 @@ static int net_vhost_check_net(void *opaque, QemuOpts *opts, Error **errp)
int net_init_vhost_user(const NetClientOptions *opts, const char *name,
NetClientState *peer, Error **errp)
{
+ int queues;
const NetdevVhostUserOptions *vhost_user_opts;
CharDriverState *chr;
- assert(opts->kind == NET_CLIENT_OPTIONS_KIND_VHOST_USER);
- vhost_user_opts = opts->vhost_user;
+ assert(opts->type == NET_CLIENT_OPTIONS_KIND_VHOST_USER);
+ vhost_user_opts = opts->u.vhost_user;
chr = net_vhost_parse_chardev(vhost_user_opts, errp);
if (!chr) {
@@ -243,6 +315,12 @@ int net_init_vhost_user(const NetClientOptions *opts, const char *name,
return -1;
}
+ queues = vhost_user_opts->has_queues ? vhost_user_opts->queues : 1;
+ if (queues < 1) {
+ error_setg(errp,
+ "vhost-user number of queues must be bigger than zero");
+ return -1;
+ }
- return net_vhost_user_init(peer, "vhost_user", name, chr);
+ return net_vhost_user_init(peer, "vhost_user", name, chr, queues);
}