diff options
Diffstat (limited to 'net')
-rw-r--r-- | net/Makefile.objs | 2 | ||||
-rw-r--r-- | net/dump.c | 234 | ||||
-rw-r--r-- | net/filter-buffer.c | 186 | ||||
-rw-r--r-- | net/filter.c | 236 | ||||
-rw-r--r-- | net/hub.c | 4 | ||||
-rw-r--r-- | net/l2tpv3.c | 4 | ||||
-rw-r--r-- | net/net.c | 152 | ||||
-rw-r--r-- | net/netmap.c | 24 | ||||
-rw-r--r-- | net/queue.c | 24 | ||||
-rw-r--r-- | net/slirp.c | 4 | ||||
-rw-r--r-- | net/socket.c | 4 | ||||
-rw-r--r-- | net/tap-bsd.c | 38 | ||||
-rw-r--r-- | net/tap-linux.c | 4 | ||||
-rw-r--r-- | net/tap-win32.c | 53 | ||||
-rw-r--r-- | net/tap.c | 8 | ||||
-rw-r--r-- | net/vde.c | 4 | ||||
-rw-r--r-- | net/vhost-user.c | 178 |
17 files changed, 981 insertions, 178 deletions
diff --git a/net/Makefile.objs b/net/Makefile.objs index ec19cb31d9..5fa2f9731d 100644 --- a/net/Makefile.objs +++ b/net/Makefile.objs @@ -13,3 +13,5 @@ common-obj-$(CONFIG_HAIKU) += tap-haiku.o common-obj-$(CONFIG_SLIRP) += slirp.o common-obj-$(CONFIG_VDE) += vde.o common-obj-$(CONFIG_NETMAP) += netmap.o +common-obj-y += filter.o +common-obj-y += filter-buffer.o diff --git a/net/dump.c b/net/dump.c index 02c8064be0..ce16a4b0e3 100644 --- a/net/dump.c +++ b/net/dump.c @@ -25,12 +25,13 @@ #include "clients.h" #include "qemu-common.h" #include "qemu/error-report.h" +#include "qemu/iov.h" #include "qemu/log.h" #include "qemu/timer.h" -#include "hub.h" +#include "qapi/visitor.h" +#include "net/filter.h" typedef struct DumpState { - NetClientState nc; int64_t start_ts; int fd; int pcap_caplen; @@ -57,27 +58,32 @@ struct pcap_sf_pkthdr { uint32_t len; }; -static ssize_t dump_receive(NetClientState *nc, const uint8_t *buf, size_t size) +static ssize_t dump_receive_iov(DumpState *s, const struct iovec *iov, int cnt) { - DumpState *s = DO_UPCAST(DumpState, nc, nc); struct pcap_sf_pkthdr hdr; int64_t ts; int caplen; + size_t size = iov_size(iov, cnt); + struct iovec dumpiov[cnt + 1]; /* Early return in case of previous error. */ if (s->fd < 0) { return size; } - ts = muldiv64(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), 1000000, get_ticks_per_sec()); + ts = qemu_clock_get_us(QEMU_CLOCK_VIRTUAL); caplen = size > s->pcap_caplen ? s->pcap_caplen : size; hdr.ts.tv_sec = ts / 1000000 + s->start_ts; hdr.ts.tv_usec = ts % 1000000; hdr.caplen = caplen; hdr.len = size; - if (write(s->fd, &hdr, sizeof(hdr)) != sizeof(hdr) || - write(s->fd, buf, caplen) != caplen) { + + dumpiov[0].iov_base = &hdr; + dumpiov[0].iov_len = sizeof(hdr); + cnt = iov_copy(&dumpiov[1], cnt, iov, cnt, 0, caplen); + + if (writev(s->fd, dumpiov, cnt + 1) != sizeof(hdr) + caplen) { qemu_log("-net dump write error - stop dump\n"); close(s->fd); s->fd = -1; @@ -86,27 +92,16 @@ static ssize_t dump_receive(NetClientState *nc, const uint8_t *buf, size_t size) return size; } -static void dump_cleanup(NetClientState *nc) +static void dump_cleanup(DumpState *s) { - DumpState *s = DO_UPCAST(DumpState, nc, nc); - close(s->fd); + s->fd = -1; } -static NetClientInfo net_dump_info = { - .type = NET_CLIENT_OPTIONS_KIND_DUMP, - .size = sizeof(DumpState), - .receive = dump_receive, - .cleanup = dump_cleanup, -}; - -static int net_dump_init(NetClientState *peer, const char *device, - const char *name, const char *filename, int len, - Error **errp) +static int net_dump_state_init(DumpState *s, const char *filename, + int len, Error **errp) { struct pcap_file_hdr hdr; - NetClientState *nc; - DumpState *s; struct tm tm; int fd; @@ -130,13 +125,6 @@ static int net_dump_init(NetClientState *peer, const char *device, return -1; } - nc = qemu_new_net_client(&net_dump_info, peer, device, name); - - snprintf(nc->info_str, sizeof(nc->info_str), - "dump to %s (len=%d)", filename, len); - - s = DO_UPCAST(DumpState, nc, nc); - s->fd = fd; s->pcap_caplen = len; @@ -146,16 +134,61 @@ static int net_dump_init(NetClientState *peer, const char *device, return 0; } +/* Dumping via VLAN netclient */ + +struct DumpNetClient { + NetClientState nc; + DumpState ds; +}; +typedef struct DumpNetClient DumpNetClient; + +static ssize_t dumpclient_receive(NetClientState *nc, const uint8_t *buf, + size_t size) +{ + DumpNetClient *dc = DO_UPCAST(DumpNetClient, nc, nc); + struct iovec iov = { + .iov_base = (void *)buf, + .iov_len = size + }; + + return dump_receive_iov(&dc->ds, &iov, 1); +} + +static ssize_t dumpclient_receive_iov(NetClientState *nc, + const struct iovec *iov, int cnt) +{ + DumpNetClient *dc = DO_UPCAST(DumpNetClient, nc, nc); + + return dump_receive_iov(&dc->ds, iov, cnt); +} + +static void dumpclient_cleanup(NetClientState *nc) +{ + DumpNetClient *dc = DO_UPCAST(DumpNetClient, nc, nc); + + dump_cleanup(&dc->ds); +} + +static NetClientInfo net_dump_info = { + .type = NET_CLIENT_OPTIONS_KIND_DUMP, + .size = sizeof(DumpNetClient), + .receive = dumpclient_receive, + .receive_iov = dumpclient_receive_iov, + .cleanup = dumpclient_cleanup, +}; + int net_init_dump(const NetClientOptions *opts, const char *name, NetClientState *peer, Error **errp) { - int len; + int len, rc; const char *file; char def_file[128]; const NetdevDumpOptions *dump; + NetClientState *nc; + DumpNetClient *dnc; - assert(opts->kind == NET_CLIENT_OPTIONS_KIND_DUMP); - dump = opts->dump; + assert(opts->type == NET_CLIENT_OPTIONS_KIND_DUMP); + dump = opts->u.dump; assert(peer); @@ -182,5 +215,140 @@ int net_init_dump(const NetClientOptions *opts, const char *name, len = 65536; } - return net_dump_init(peer, "dump", name, file, len, errp); + nc = qemu_new_net_client(&net_dump_info, peer, "dump", name); + snprintf(nc->info_str, sizeof(nc->info_str), + "dump to %s (len=%d)", file, len); + + dnc = DO_UPCAST(DumpNetClient, nc, nc); + rc = net_dump_state_init(&dnc->ds, file, len, errp); + if (rc) { + qemu_del_net_client(nc); + } + return rc; } + +/* Dumping via filter */ + +#define TYPE_FILTER_DUMP "filter-dump" + +#define FILTER_DUMP(obj) \ + OBJECT_CHECK(NetFilterDumpState, (obj), TYPE_FILTER_DUMP) + +struct NetFilterDumpState { + NetFilterState nfs; + DumpState ds; + char *filename; + uint32_t maxlen; +}; +typedef struct NetFilterDumpState NetFilterDumpState; + +static ssize_t filter_dump_receive_iov(NetFilterState *nf, NetClientState *sndr, + unsigned flags, const struct iovec *iov, + int iovcnt, NetPacketSent *sent_cb) +{ + NetFilterDumpState *nfds = FILTER_DUMP(nf); + + dump_receive_iov(&nfds->ds, iov, iovcnt); + return 0; +} + +static void filter_dump_cleanup(NetFilterState *nf) +{ + NetFilterDumpState *nfds = FILTER_DUMP(nf); + + dump_cleanup(&nfds->ds); +} + +static void filter_dump_setup(NetFilterState *nf, Error **errp) +{ + NetFilterDumpState *nfds = FILTER_DUMP(nf); + + if (!nfds->filename) { + error_setg(errp, "dump filter needs 'file' property set!"); + return; + } + + net_dump_state_init(&nfds->ds, nfds->filename, nfds->maxlen, errp); +} + +static void filter_dump_get_maxlen(Object *obj, Visitor *v, void *opaque, + const char *name, Error **errp) +{ + NetFilterDumpState *nfds = FILTER_DUMP(obj); + uint32_t value = nfds->maxlen; + + visit_type_uint32(v, &value, name, errp); +} + +static void filter_dump_set_maxlen(Object *obj, Visitor *v, void *opaque, + const char *name, Error **errp) +{ + NetFilterDumpState *nfds = FILTER_DUMP(obj); + Error *local_err = NULL; + uint32_t value; + + visit_type_uint32(v, &value, name, &local_err); + if (local_err) { + goto out; + } + if (value == 0) { + error_setg(&local_err, "Property '%s.%s' doesn't take value '%u'", + object_get_typename(obj), name, value); + goto out; + } + nfds->maxlen = value; + +out: + error_propagate(errp, local_err); +} + +static char *file_dump_get_filename(Object *obj, Error **errp) +{ + NetFilterDumpState *nfds = FILTER_DUMP(obj); + + return g_strdup(nfds->filename); +} + +static void file_dump_set_filename(Object *obj, const char *value, Error **errp) +{ + NetFilterDumpState *nfds = FILTER_DUMP(obj); + + g_free(nfds->filename); + nfds->filename = g_strdup(value); +} + +static void filter_dump_instance_init(Object *obj) +{ + NetFilterDumpState *nfds = FILTER_DUMP(obj); + + nfds->maxlen = 65536; + + object_property_add(obj, "maxlen", "int", filter_dump_get_maxlen, + filter_dump_set_maxlen, NULL, NULL, NULL); + object_property_add_str(obj, "file", file_dump_get_filename, + file_dump_set_filename, NULL); +} + +static void filter_dump_class_init(ObjectClass *oc, void *data) +{ + NetFilterClass *nfc = NETFILTER_CLASS(oc); + + nfc->setup = filter_dump_setup; + nfc->cleanup = filter_dump_cleanup; + nfc->receive_iov = filter_dump_receive_iov; +} + +static const TypeInfo filter_dump_info = { + .name = TYPE_FILTER_DUMP, + .parent = TYPE_NETFILTER, + .class_init = filter_dump_class_init, + .instance_init = filter_dump_instance_init, + .instance_size = sizeof(NetFilterDumpState), +}; + +static void filter_dump_register_types(void) +{ + type_register_static(&filter_dump_info); +} + +type_init(filter_dump_register_types); diff --git a/net/filter-buffer.c b/net/filter-buffer.c new file mode 100644 index 0000000000..57be149413 --- /dev/null +++ b/net/filter-buffer.c @@ -0,0 +1,186 @@ +/* + * Copyright (c) 2015 FUJITSU LIMITED + * Author: Yang Hongyang <yanghy@cn.fujitsu.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * later. See the COPYING file in the top-level directory. + */ + +#include "net/filter.h" +#include "net/queue.h" +#include "qemu-common.h" +#include "qemu/timer.h" +#include "qemu/iov.h" +#include "qapi/qmp/qerror.h" +#include "qapi-visit.h" +#include "qom/object.h" + +#define TYPE_FILTER_BUFFER "filter-buffer" + +#define FILTER_BUFFER(obj) \ + OBJECT_CHECK(FilterBufferState, (obj), TYPE_FILTER_BUFFER) + +typedef struct FilterBufferState { + NetFilterState parent_obj; + + NetQueue *incoming_queue; + uint32_t interval; + QEMUTimer release_timer; +} FilterBufferState; + +static void filter_buffer_flush(NetFilterState *nf) +{ + FilterBufferState *s = FILTER_BUFFER(nf); + + if (!qemu_net_queue_flush(s->incoming_queue)) { + /* Unable to empty the queue, purge remaining packets */ + qemu_net_queue_purge(s->incoming_queue, nf->netdev); + } +} + +static void filter_buffer_release_timer(void *opaque) +{ + NetFilterState *nf = opaque; + FilterBufferState *s = FILTER_BUFFER(nf); + + /* + * Note: filter_buffer_flush() drops packets that can't be sent + * TODO: We should leave them queued. But currently there's no way + * for the next filter or receiver to notify us that it can receive + * more packets. + */ + filter_buffer_flush(nf); + /* Timer rearmed to fire again in s->interval microseconds. */ + timer_mod(&s->release_timer, + qemu_clock_get_us(QEMU_CLOCK_VIRTUAL) + s->interval); +} + +/* filter APIs */ +static ssize_t filter_buffer_receive_iov(NetFilterState *nf, + NetClientState *sender, + unsigned flags, + const struct iovec *iov, + int iovcnt, + NetPacketSent *sent_cb) +{ + FilterBufferState *s = FILTER_BUFFER(nf); + + /* + * We return size when buffer a packet, the sender will take it as + * a already sent packet, so sent_cb should not be called later. + * + * FIXME: Even if the guest can't receive packets for some reasons, + * the filter can still accept packets until its internal queue is full. + * For example: + * For some reason, receiver could not receive more packets + * (.can_receive() returns zero). Without a filter, at most one packet + * will be queued in incoming queue and sender's poll will be disabled + * unit its sent_cb() was called. With a filter, it will keep receiving + * the packets without caring about the receiver. This is suboptimal. + * May need more thoughts (e.g keeping sent_cb). + */ + qemu_net_queue_append_iov(s->incoming_queue, sender, flags, + iov, iovcnt, NULL); + return iov_size(iov, iovcnt); +} + +static void filter_buffer_cleanup(NetFilterState *nf) +{ + FilterBufferState *s = FILTER_BUFFER(nf); + + if (s->interval) { + timer_del(&s->release_timer); + } + + /* flush packets */ + if (s->incoming_queue) { + filter_buffer_flush(nf); + g_free(s->incoming_queue); + } +} + +static void filter_buffer_setup(NetFilterState *nf, Error **errp) +{ + FilterBufferState *s = FILTER_BUFFER(nf); + + /* + * We may want to accept zero interval when VM FT solutions like MC + * or COLO use this filter to release packets on demand. + */ + if (!s->interval) { + error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "interval", + "a non-zero interval"); + return; + } + + s->incoming_queue = qemu_new_net_queue(qemu_netfilter_pass_to_next, nf); + if (s->interval) { + timer_init_us(&s->release_timer, QEMU_CLOCK_VIRTUAL, + filter_buffer_release_timer, nf); + /* Timer armed to fire in s->interval microseconds. */ + timer_mod(&s->release_timer, + qemu_clock_get_us(QEMU_CLOCK_VIRTUAL) + s->interval); + } +} + +static void filter_buffer_class_init(ObjectClass *oc, void *data) +{ + NetFilterClass *nfc = NETFILTER_CLASS(oc); + + nfc->setup = filter_buffer_setup; + nfc->cleanup = filter_buffer_cleanup; + nfc->receive_iov = filter_buffer_receive_iov; +} + +static void filter_buffer_get_interval(Object *obj, Visitor *v, void *opaque, + const char *name, Error **errp) +{ + FilterBufferState *s = FILTER_BUFFER(obj); + uint32_t value = s->interval; + + visit_type_uint32(v, &value, name, errp); +} + +static void filter_buffer_set_interval(Object *obj, Visitor *v, void *opaque, + const char *name, Error **errp) +{ + FilterBufferState *s = FILTER_BUFFER(obj); + Error *local_err = NULL; + uint32_t value; + + visit_type_uint32(v, &value, name, &local_err); + if (local_err) { + goto out; + } + if (!value) { + error_setg(&local_err, "Property '%s.%s' requires a positive value", + object_get_typename(obj), name); + goto out; + } + s->interval = value; + +out: + error_propagate(errp, local_err); +} + +static void filter_buffer_init(Object *obj) +{ + object_property_add(obj, "interval", "int", + filter_buffer_get_interval, + filter_buffer_set_interval, NULL, NULL, NULL); +} + +static const TypeInfo filter_buffer_info = { + .name = TYPE_FILTER_BUFFER, + .parent = TYPE_NETFILTER, + .class_init = filter_buffer_class_init, + .instance_init = filter_buffer_init, + .instance_size = sizeof(FilterBufferState), +}; + +static void register_types(void) +{ + type_register_static(&filter_buffer_info); +} + +type_init(register_types); diff --git a/net/filter.c b/net/filter.c new file mode 100644 index 0000000000..1365bad026 --- /dev/null +++ b/net/filter.c @@ -0,0 +1,236 @@ +/* + * Copyright (c) 2015 FUJITSU LIMITED + * Author: Yang Hongyang <yanghy@cn.fujitsu.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * later. See the COPYING file in the top-level directory. + */ + +#include "qemu-common.h" +#include "qapi/qmp/qerror.h" +#include "qemu/error-report.h" + +#include "net/filter.h" +#include "net/net.h" +#include "net/vhost_net.h" +#include "qom/object_interfaces.h" +#include "qemu/iov.h" +#include "qapi/string-output-visitor.h" + +ssize_t qemu_netfilter_receive(NetFilterState *nf, + NetFilterDirection direction, + NetClientState *sender, + unsigned flags, + const struct iovec *iov, + int iovcnt, + NetPacketSent *sent_cb) +{ + if (nf->direction == direction || + nf->direction == NET_FILTER_DIRECTION_ALL) { + return NETFILTER_GET_CLASS(OBJECT(nf))->receive_iov( + nf, sender, flags, iov, iovcnt, sent_cb); + } + + return 0; +} + +ssize_t qemu_netfilter_pass_to_next(NetClientState *sender, + unsigned flags, + const struct iovec *iov, + int iovcnt, + void *opaque) +{ + int ret = 0; + int direction; + NetFilterState *nf = opaque; + NetFilterState *next = QTAILQ_NEXT(nf, next); + + if (!sender || !sender->peer) { + /* no receiver, or sender been deleted, no need to pass it further */ + goto out; + } + + if (nf->direction == NET_FILTER_DIRECTION_ALL) { + if (sender == nf->netdev) { + /* This packet is sent by netdev itself */ + direction = NET_FILTER_DIRECTION_TX; + } else { + direction = NET_FILTER_DIRECTION_RX; + } + } else { + direction = nf->direction; + } + + while (next) { + /* + * if qemu_netfilter_pass_to_next been called, means that + * the packet has been hold by filter and has already retured size + * to the sender, so sent_cb shouldn't be called later, just + * pass NULL to next. + */ + ret = qemu_netfilter_receive(next, direction, sender, flags, iov, + iovcnt, NULL); + if (ret) { + return ret; + } + next = QTAILQ_NEXT(next, next); + } + + /* + * We have gone through all filters, pass it to receiver. + * Do the valid check again incase sender or receiver been + * deleted while we go through filters. + */ + if (sender && sender->peer) { + qemu_net_queue_send_iov(sender->peer->incoming_queue, + sender, flags, iov, iovcnt, NULL); + } + +out: + /* no receiver, or sender been deleted */ + return iov_size(iov, iovcnt); +} + +static char *netfilter_get_netdev_id(Object *obj, Error **errp) +{ + NetFilterState *nf = NETFILTER(obj); + + return g_strdup(nf->netdev_id); +} + +static void netfilter_set_netdev_id(Object *obj, const char *str, Error **errp) +{ + NetFilterState *nf = NETFILTER(obj); + + nf->netdev_id = g_strdup(str); +} + +static int netfilter_get_direction(Object *obj, Error **errp G_GNUC_UNUSED) +{ + NetFilterState *nf = NETFILTER(obj); + return nf->direction; +} + +static void netfilter_set_direction(Object *obj, int direction, Error **errp) +{ + NetFilterState *nf = NETFILTER(obj); + nf->direction = direction; +} + +static void netfilter_init(Object *obj) +{ + object_property_add_str(obj, "netdev", + netfilter_get_netdev_id, netfilter_set_netdev_id, + NULL); + object_property_add_enum(obj, "queue", "NetFilterDirection", + NetFilterDirection_lookup, + netfilter_get_direction, netfilter_set_direction, + NULL); +} + +static void netfilter_complete(UserCreatable *uc, Error **errp) +{ + NetFilterState *nf = NETFILTER(uc); + NetClientState *ncs[MAX_QUEUE_NUM]; + NetFilterClass *nfc = NETFILTER_GET_CLASS(uc); + int queues; + Error *local_err = NULL; + char *str, *info; + ObjectProperty *prop; + ObjectPropertyIterator *iter; + StringOutputVisitor *ov; + + if (!nf->netdev_id) { + error_setg(errp, "Parameter 'netdev' is required"); + return; + } + + queues = qemu_find_net_clients_except(nf->netdev_id, ncs, + NET_CLIENT_OPTIONS_KIND_NIC, + MAX_QUEUE_NUM); + if (queues < 1) { + error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "netdev", + "a network backend id"); + return; + } else if (queues > 1) { + error_setg(errp, "multiqueue is not supported"); + return; + } + + if (get_vhost_net(ncs[0])) { + error_setg(errp, "Vhost is not supported"); + return; + } + + nf->netdev = ncs[0]; + + if (nfc->setup) { + nfc->setup(nf, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + } + QTAILQ_INSERT_TAIL(&nf->netdev->filters, nf, next); + + /* generate info str */ + iter = object_property_iter_init(OBJECT(nf)); + while ((prop = object_property_iter_next(iter))) { + if (!strcmp(prop->name, "type")) { + continue; + } + ov = string_output_visitor_new(false); + object_property_get(OBJECT(nf), string_output_get_visitor(ov), + prop->name, errp); + str = string_output_get_string(ov); + string_output_visitor_cleanup(ov); + info = g_strdup_printf(",%s=%s", prop->name, str); + g_strlcat(nf->info_str, info, sizeof(nf->info_str)); + g_free(str); + g_free(info); + } + object_property_iter_free(iter); +} + +static void netfilter_finalize(Object *obj) +{ + NetFilterState *nf = NETFILTER(obj); + NetFilterClass *nfc = NETFILTER_GET_CLASS(obj); + + if (nfc->cleanup) { + nfc->cleanup(nf); + } + + if (nf->netdev && !QTAILQ_EMPTY(&nf->netdev->filters)) { + QTAILQ_REMOVE(&nf->netdev->filters, nf, next); + } +} + +static void netfilter_class_init(ObjectClass *oc, void *data) +{ + UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc); + + ucc->complete = netfilter_complete; +} + +static const TypeInfo netfilter_info = { + .name = TYPE_NETFILTER, + .parent = TYPE_OBJECT, + .abstract = true, + .class_size = sizeof(NetFilterClass), + .class_init = netfilter_class_init, + .instance_size = sizeof(NetFilterState), + .instance_init = netfilter_init, + .instance_finalize = netfilter_finalize, + .interfaces = (InterfaceInfo[]) { + { TYPE_USER_CREATABLE }, + { } + } +}; + +static void register_types(void) +{ + type_register_static(&netfilter_info); +} + +type_init(register_types); @@ -285,9 +285,9 @@ int net_init_hubport(const NetClientOptions *opts, const char *name, { const NetdevHubPortOptions *hubport; - assert(opts->kind == NET_CLIENT_OPTIONS_KIND_HUBPORT); + assert(opts->type == NET_CLIENT_OPTIONS_KIND_HUBPORT); assert(!peer); - hubport = opts->hubport; + hubport = opts->u.hubport; net_hub_add_port(hubport->hubid, name); return 0; diff --git a/net/l2tpv3.c b/net/l2tpv3.c index 4f9bceecc9..8e68e540ec 100644 --- a/net/l2tpv3.c +++ b/net/l2tpv3.c @@ -545,8 +545,8 @@ int net_init_l2tpv3(const NetClientOptions *opts, s->queue_tail = 0; s->header_mismatch = false; - assert(opts->kind == NET_CLIENT_OPTIONS_KIND_L2TPV3); - l2tpv3 = opts->l2tpv3; + assert(opts->type == NET_CLIENT_OPTIONS_KIND_L2TPV3); + l2tpv3 = opts->u.l2tpv3; if (l2tpv3->has_ipv6 && l2tpv3->ipv6) { s->ipv6 = l2tpv3->ipv6; @@ -44,6 +44,7 @@ #include "qapi/opts-visitor.h" #include "qapi/dealloc-visitor.h" #include "sysemu/sysemu.h" +#include "net/filter.h" /* Net bridge is currently not supported for W32. */ #if !defined(_WIN32) @@ -285,8 +286,9 @@ static void qemu_net_client_setup(NetClientState *nc, } QTAILQ_INSERT_TAIL(&net_clients, nc, next); - nc->incoming_queue = qemu_new_net_queue(nc); + nc->incoming_queue = qemu_new_net_queue(qemu_deliver_packet_iov, nc); nc->destructor = destructor; + QTAILQ_INIT(&nc->filters); } NetClientState *qemu_new_net_client(NetClientInfo *info, @@ -384,6 +386,7 @@ void qemu_del_net_client(NetClientState *nc) { NetClientState *ncs[MAX_QUEUE_NUM]; int queues, i; + NetFilterState *nf, *next; assert(nc->info->type != NET_CLIENT_OPTIONS_KIND_NIC); @@ -395,6 +398,10 @@ void qemu_del_net_client(NetClientState *nc) MAX_QUEUE_NUM); assert(queues != 0); + QTAILQ_FOREACH_SAFE(nf, &nc->filters, next, next) { + object_unparent(OBJECT(nf)); + } + /* If there is a peer NIC, delete and cleanup client, but do not free. */ if (nc->peer && nc->peer->info->type == NET_CLIENT_OPTIONS_KIND_NIC) { NICState *nic = qemu_get_nic(nc->peer); @@ -562,34 +569,42 @@ int qemu_can_send_packet(NetClientState *sender) return 1; } -ssize_t qemu_deliver_packet(NetClientState *sender, - unsigned flags, - const uint8_t *data, - size_t size, - void *opaque) +static ssize_t filter_receive_iov(NetClientState *nc, + NetFilterDirection direction, + NetClientState *sender, + unsigned flags, + const struct iovec *iov, + int iovcnt, + NetPacketSent *sent_cb) { - NetClientState *nc = opaque; - ssize_t ret; - - if (nc->link_down) { - return size; - } + ssize_t ret = 0; + NetFilterState *nf = NULL; - if (nc->receive_disabled) { - return 0; + QTAILQ_FOREACH(nf, &nc->filters, next) { + ret = qemu_netfilter_receive(nf, direction, sender, flags, iov, + iovcnt, sent_cb); + if (ret) { + return ret; + } } - if (flags & QEMU_NET_PACKET_FLAG_RAW && nc->info->receive_raw) { - ret = nc->info->receive_raw(nc, data, size); - } else { - ret = nc->info->receive(nc, data, size); - } + return ret; +} - if (ret == 0) { - nc->receive_disabled = 1; - } +static ssize_t filter_receive(NetClientState *nc, + NetFilterDirection direction, + NetClientState *sender, + unsigned flags, + const uint8_t *data, + size_t size, + NetPacketSent *sent_cb) +{ + struct iovec iov = { + .iov_base = (void *)data, + .iov_len = size + }; - return ret; + return filter_receive_iov(nc, direction, sender, flags, &iov, 1, sent_cb); } void qemu_purge_queued_packets(NetClientState *nc) @@ -633,6 +648,7 @@ static ssize_t qemu_send_packet_async_with_flags(NetClientState *sender, NetPacketSent *sent_cb) { NetQueue *queue; + int ret; #ifdef DEBUG_NET printf("qemu_send_packet_async:\n"); @@ -643,6 +659,19 @@ static ssize_t qemu_send_packet_async_with_flags(NetClientState *sender, return size; } + /* Let filters handle the packet first */ + ret = filter_receive(sender, NET_FILTER_DIRECTION_TX, + sender, flags, buf, size, sent_cb); + if (ret) { + return ret; + } + + ret = filter_receive(sender->peer, NET_FILTER_DIRECTION_RX, + sender, flags, buf, size, sent_cb); + if (ret) { + return ret; + } + queue = sender->peer->incoming_queue; return qemu_net_queue_send(queue, sender, flags, buf, size, sent_cb); @@ -668,14 +697,25 @@ ssize_t qemu_send_packet_raw(NetClientState *nc, const uint8_t *buf, int size) } static ssize_t nc_sendv_compat(NetClientState *nc, const struct iovec *iov, - int iovcnt) + int iovcnt, unsigned flags) { - uint8_t buffer[NET_BUFSIZE]; + uint8_t buf[NET_BUFSIZE]; + uint8_t *buffer; size_t offset; - offset = iov_to_buf(iov, iovcnt, 0, buffer, sizeof(buffer)); + if (iovcnt == 1) { + buffer = iov[0].iov_base; + offset = iov[0].iov_len; + } else { + buffer = buf; + offset = iov_to_buf(iov, iovcnt, 0, buf, sizeof(buf)); + } - return nc->info->receive(nc, buffer, offset); + if (flags & QEMU_NET_PACKET_FLAG_RAW && nc->info->receive_raw) { + return nc->info->receive_raw(nc, buffer, offset); + } else { + return nc->info->receive(nc, buffer, offset); + } } ssize_t qemu_deliver_packet_iov(NetClientState *sender, @@ -698,7 +738,7 @@ ssize_t qemu_deliver_packet_iov(NetClientState *sender, if (nc->info->receive_iov) { ret = nc->info->receive_iov(nc, iov, iovcnt); } else { - ret = nc_sendv_compat(nc, iov, iovcnt); + ret = nc_sendv_compat(nc, iov, iovcnt, flags); } if (ret == 0) { @@ -713,11 +753,25 @@ ssize_t qemu_sendv_packet_async(NetClientState *sender, NetPacketSent *sent_cb) { NetQueue *queue; + int ret; if (sender->link_down || !sender->peer) { return iov_size(iov, iovcnt); } + /* Let filters handle the packet first */ + ret = filter_receive_iov(sender, NET_FILTER_DIRECTION_TX, sender, + QEMU_NET_PACKET_FLAG_NONE, iov, iovcnt, sent_cb); + if (ret) { + return ret; + } + + ret = filter_receive_iov(sender->peer, NET_FILTER_DIRECTION_RX, sender, + QEMU_NET_PACKET_FLAG_NONE, iov, iovcnt, sent_cb); + if (ret) { + return ret; + } + queue = sender->peer->incoming_queue; return qemu_net_queue_send_iov(queue, sender, @@ -828,8 +882,8 @@ static int net_init_nic(const NetClientOptions *opts, const char *name, NICInfo *nd; const NetLegacyNicOptions *nic; - assert(opts->kind == NET_CLIENT_OPTIONS_KIND_NIC); - nic = opts->nic; + assert(opts->type == NET_CLIENT_OPTIONS_KIND_NIC); + nic = opts->u.nic; idx = nic_get_free_idx(); if (idx == -1 || nb_nics >= MAX_NICS) { @@ -930,9 +984,9 @@ static int net_client_init1(const void *object, int is_netdev, Error **errp) opts = netdev->opts; name = netdev->id; - if (opts->kind == NET_CLIENT_OPTIONS_KIND_DUMP || - opts->kind == NET_CLIENT_OPTIONS_KIND_NIC || - !net_client_init_fun[opts->kind]) { + if (opts->type == NET_CLIENT_OPTIONS_KIND_DUMP || + opts->type == NET_CLIENT_OPTIONS_KIND_NIC || + !net_client_init_fun[opts->type]) { error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "type", "a netdev backend type"); return -1; @@ -943,16 +997,16 @@ static int net_client_init1(const void *object, int is_netdev, Error **errp) /* missing optional values have been initialized to "all bits zero" */ name = net->has_id ? net->id : net->name; - if (opts->kind == NET_CLIENT_OPTIONS_KIND_NONE) { + if (opts->type == NET_CLIENT_OPTIONS_KIND_NONE) { return 0; /* nothing to do */ } - if (opts->kind == NET_CLIENT_OPTIONS_KIND_HUBPORT) { + if (opts->type == NET_CLIENT_OPTIONS_KIND_HUBPORT) { error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "type", "a net type"); return -1; } - if (!net_client_init_fun[opts->kind]) { + if (!net_client_init_fun[opts->type]) { error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "type", "a net backend type (maybe it is not compiled " "into this binary)"); @@ -960,17 +1014,17 @@ static int net_client_init1(const void *object, int is_netdev, Error **errp) } /* Do not add to a vlan if it's a nic with a netdev= parameter. */ - if (opts->kind != NET_CLIENT_OPTIONS_KIND_NIC || - !opts->nic->has_netdev) { + if (opts->type != NET_CLIENT_OPTIONS_KIND_NIC || + !opts->u.nic->has_netdev) { peer = net_hub_add_port(net->has_vlan ? net->vlan : 0, NULL); } } - if (net_client_init_fun[opts->kind](opts, name, peer, errp) < 0) { + if (net_client_init_fun[opts->type](opts, name, peer, errp) < 0) { /* FIXME drop when all init functions store an Error */ if (errp && !*errp) { error_setg(errp, QERR_DEVICE_INIT_FAILED, - NetClientOptionsKind_lookup[opts->kind]); + NetClientOptionsKind_lookup[opts->type]); } return -1; } @@ -1133,10 +1187,22 @@ void qmp_netdev_del(const char *id, Error **errp) void print_net_client(Monitor *mon, NetClientState *nc) { + NetFilterState *nf; + monitor_printf(mon, "%s: index=%d,type=%s,%s\n", nc->name, nc->queue_index, NetClientOptionsKind_lookup[nc->info->type], nc->info_str); + if (!QTAILQ_EMPTY(&nc->filters)) { + monitor_printf(mon, "filters:\n"); + } + QTAILQ_FOREACH(nf, &nc->filters, next) { + char *path = object_get_canonical_path_component(OBJECT(nf)); + monitor_printf(mon, " - %s: type=%s%s\n", path, + object_get_typename(OBJECT(nf)), + nf->info_str); + g_free(path); + } } RxFilterInfoList *qmp_query_rx_filter(bool has_name, const char *name, @@ -1162,6 +1228,12 @@ RxFilterInfoList *qmp_query_rx_filter(bool has_name, const char *name, continue; } + /* only query information on queue 0 since the info is per nic, + * not per queue + */ + if (nc->queue_index != 0) + continue; + if (nc->info->query_rx_filter) { info = nc->info->query_rx_filter(nc); entry = g_malloc0(sizeof(*entry)); diff --git a/net/netmap.c b/net/netmap.c index 508b82947d..555836829e 100644 --- a/net/netmap.c +++ b/net/netmap.c @@ -90,7 +90,7 @@ pkt_copy(const void *_src, void *_dst, int l) * Open a netmap device. We assume there is only one queue * (which is the case for the VALE bridge). */ -static int netmap_open(NetmapPriv *me) +static void netmap_open(NetmapPriv *me, Error **errp) { int fd; int err; @@ -99,9 +99,8 @@ static int netmap_open(NetmapPriv *me) me->fd = fd = open(me->fdname, O_RDWR); if (fd < 0) { - error_report("Unable to open netmap device '%s' (%s)", - me->fdname, strerror(errno)); - return -1; + error_setg_file_open(errp, errno, me->fdname); + return; } memset(&req, 0, sizeof(req)); pstrcpy(req.nr_name, sizeof(req.nr_name), me->ifname); @@ -109,15 +108,14 @@ static int netmap_open(NetmapPriv *me) req.nr_version = NETMAP_API; err = ioctl(fd, NIOCREGIF, &req); if (err) { - error_report("Unable to register %s: %s", me->ifname, strerror(errno)); + error_setg_errno(errp, errno, "Unable to register %s", me->ifname); goto error; } l = me->memsize = req.nr_memsize; me->mem = mmap(0, l, PROT_WRITE | PROT_READ, MAP_SHARED, fd, 0); if (me->mem == MAP_FAILED) { - error_report("Unable to mmap netmap shared memory: %s", - strerror(errno)); + error_setg_errno(errp, errno, "Unable to mmap netmap shared memory"); me->mem = NULL; goto error; } @@ -125,11 +123,11 @@ static int netmap_open(NetmapPriv *me) me->nifp = NETMAP_IF(me->mem, req.nr_offset); me->tx = NETMAP_TXRING(me->nifp, 0); me->rx = NETMAP_RXRING(me->nifp, 0); - return 0; + + return; error: close(me->fd); - return -1; } static void netmap_send(void *opaque); @@ -438,9 +436,9 @@ static NetClientInfo net_netmap_info = { int net_init_netmap(const NetClientOptions *opts, const char *name, NetClientState *peer, Error **errp) { - /* FIXME error_setg(errp, ...) on failure */ - const NetdevNetmapOptions *netmap_opts = opts->netmap; + const NetdevNetmapOptions *netmap_opts = opts->u.netmap; NetClientState *nc; + Error *err = NULL; NetmapPriv me; NetmapState *s; @@ -448,7 +446,9 @@ int net_init_netmap(const NetClientOptions *opts, netmap_opts->has_devname ? netmap_opts->devname : "/dev/netmap"); /* Set default name for the port if not supplied. */ pstrcpy(me.ifname, sizeof(me.ifname), netmap_opts->ifname); - if (netmap_open(&me)) { + netmap_open(&me, &err); + if (err) { + error_propagate(errp, err); return -1; } /* Create the object. */ diff --git a/net/queue.c b/net/queue.c index ebbe2bb93b..de8b9d31c6 100644 --- a/net/queue.c +++ b/net/queue.c @@ -52,13 +52,14 @@ struct NetQueue { void *opaque; uint32_t nq_maxlen; uint32_t nq_count; + NetQueueDeliverFunc *deliver; QTAILQ_HEAD(packets, NetPacket) packets; unsigned delivering : 1; }; -NetQueue *qemu_new_net_queue(void *opaque) +NetQueue *qemu_new_net_queue(NetQueueDeliverFunc *deliver, void *opaque) { NetQueue *queue; @@ -67,6 +68,7 @@ NetQueue *qemu_new_net_queue(void *opaque) queue->opaque = opaque; queue->nq_maxlen = 10000; queue->nq_count = 0; + queue->deliver = deliver; QTAILQ_INIT(&queue->packets); @@ -110,12 +112,12 @@ static void qemu_net_queue_append(NetQueue *queue, QTAILQ_INSERT_TAIL(&queue->packets, packet, entry); } -static void qemu_net_queue_append_iov(NetQueue *queue, - NetClientState *sender, - unsigned flags, - const struct iovec *iov, - int iovcnt, - NetPacketSent *sent_cb) +void qemu_net_queue_append_iov(NetQueue *queue, + NetClientState *sender, + unsigned flags, + const struct iovec *iov, + int iovcnt, + NetPacketSent *sent_cb) { NetPacket *packet; size_t max_len = 0; @@ -152,9 +154,13 @@ static ssize_t qemu_net_queue_deliver(NetQueue *queue, size_t size) { ssize_t ret = -1; + struct iovec iov = { + .iov_base = (void *)data, + .iov_len = size + }; queue->delivering = 1; - ret = qemu_deliver_packet(sender, flags, data, size, queue->opaque); + ret = queue->deliver(sender, flags, &iov, 1, queue->opaque); queue->delivering = 0; return ret; @@ -169,7 +175,7 @@ static ssize_t qemu_net_queue_deliver_iov(NetQueue *queue, ssize_t ret = -1; queue->delivering = 1; - ret = qemu_deliver_packet_iov(sender, flags, iov, iovcnt, queue->opaque); + ret = queue->deliver(sender, flags, iov, iovcnt, queue->opaque); queue->delivering = 0; return ret; diff --git a/net/slirp.c b/net/slirp.c index 7657b38fdf..f505570adb 100644 --- a/net/slirp.c +++ b/net/slirp.c @@ -746,8 +746,8 @@ int net_init_slirp(const NetClientOptions *opts, const char *name, const NetdevUserOptions *user; const char **dnssearch; - assert(opts->kind == NET_CLIENT_OPTIONS_KIND_USER); - user = opts->user; + assert(opts->type == NET_CLIENT_OPTIONS_KIND_USER); + user = opts->u.user; vnet = user->has_net ? g_strdup(user->net) : user->has_ip ? g_strdup_printf("%s/24", user->ip) : diff --git a/net/socket.c b/net/socket.c index b1e3b1c8d9..e8605d4ded 100644 --- a/net/socket.c +++ b/net/socket.c @@ -706,8 +706,8 @@ int net_init_socket(const NetClientOptions *opts, const char *name, Error *err = NULL; const NetdevSocketOptions *sock; - assert(opts->kind == NET_CLIENT_OPTIONS_KIND_SOCKET); - sock = opts->socket; + assert(opts->type == NET_CLIENT_OPTIONS_KIND_SOCKET); + sock = opts->u.socket; if (sock->has_fd + sock->has_listen + sock->has_connect + sock->has_mcast + sock->has_udp != 1) { diff --git a/net/tap-bsd.c b/net/tap-bsd.c index 7028d9be95..0103a97bf6 100644 --- a/net/tap-bsd.c +++ b/net/tap-bsd.c @@ -109,8 +109,7 @@ int tap_open(char *ifname, int ifname_size, int *vnet_hdr, #define PATH_NET_TAP "/dev/tap" -int tap_open(char *ifname, int ifname_size, int *vnet_hdr, - int vnet_hdr_required, int mq_required, Error **errp) +static int tap_open_clone(char *ifname, int ifname_size, Error **errp) { int fd, s, ret; struct ifreq ifr; @@ -126,7 +125,8 @@ int tap_open(char *ifname, int ifname_size, int *vnet_hdr, ret = ioctl(fd, TAPGIFNAME, (void *)&ifr); if (ret < 0) { error_setg_errno(errp, errno, "could not get tap interface name"); - goto error; + close(fd); + return -1; } if (ifname[0] != '\0') { @@ -135,19 +135,47 @@ int tap_open(char *ifname, int ifname_size, int *vnet_hdr, if (s < 0) { error_setg_errno(errp, errno, "could not open socket to set interface name"); - goto error; + close(fd); + return -1; } ifr.ifr_data = ifname; ret = ioctl(s, SIOCSIFNAME, (void *)&ifr); close(s); if (ret < 0) { error_setg(errp, "could not set tap interface name"); - goto error; + close(fd); + return -1; } } else { pstrcpy(ifname, ifname_size, ifr.ifr_name); } + return fd; +} + +int tap_open(char *ifname, int ifname_size, int *vnet_hdr, + int vnet_hdr_required, int mq_required, Error **errp) +{ + int fd = -1; + + /* If the specified tap device already exists just use it. */ + if (ifname[0] != '\0') { + char dname[100]; + snprintf(dname, sizeof dname, "/dev/%s", ifname); + TFR(fd = open(dname, O_RDWR)); + if (fd < 0 && errno != ENOENT) { + error_setg_errno(errp, errno, "could not open %s", dname); + return -1; + } + } + + if (fd < 0) { + /* Tap device not specified or does not exist. */ + if ((fd = tap_open_clone(ifname, ifname_size, errp)) < 0) { + return -1; + } + } + if (*vnet_hdr) { /* BSD doesn't have IFF_VNET_HDR */ *vnet_hdr = 0; diff --git a/net/tap-linux.c b/net/tap-linux.c index aefa05d081..419a2455a0 100644 --- a/net/tap-linux.c +++ b/net/tap-linux.c @@ -303,7 +303,7 @@ int tap_fd_set_vnet_le(int fd, int is_le) return -errno; } - error_report("TUNSETVNETLE ioctl() failed: %s.\n", strerror(errno)); + error_report("TUNSETVNETLE ioctl() failed: %s.", strerror(errno)); abort(); } @@ -320,7 +320,7 @@ int tap_fd_set_vnet_be(int fd, int is_be) return -errno; } - error_report("TUNSETVNETBE ioctl() failed: %s.\n", strerror(errno)); + error_report("TUNSETVNETBE ioctl() failed: %s.", strerror(errno)); abort(); } diff --git a/net/tap-win32.c b/net/tap-win32.c index ed56acf2ef..af26061489 100644 --- a/net/tap-win32.c +++ b/net/tap-win32.c @@ -77,7 +77,12 @@ //#define DEBUG_TAP_WIN32 -#define TUN_ASYNCHRONOUS_WRITES 1 +/* FIXME: The asynch write path appears to be broken at + * present. WriteFile() ignores the lpNumberOfBytesWritten parameter + * for overlapped writes, with the result we return zero bytes sent, + * and after handling a single packet, receive is disabled for this + * interface. */ +/* #define TUN_ASYNCHRONOUS_WRITES 1 */ #define TUN_BUFFER_SIZE 1560 #define TUN_MAX_BUFFER_COUNT 32 @@ -356,7 +361,8 @@ static int get_device_guid( &len); if (status != ERROR_SUCCESS || name_type != REG_SZ) { - return -1; + ++i; + continue; } else { if (is_tap_win32_dev(enum_name)) { @@ -460,27 +466,48 @@ static int tap_win32_write(tap_win32_overlapped_t *overlapped, BOOL result; DWORD error; +#ifdef TUN_ASYNCHRONOUS_WRITES result = GetOverlappedResult( overlapped->handle, &overlapped->write_overlapped, &write_size, FALSE); if (!result && GetLastError() == ERROR_IO_INCOMPLETE) WaitForSingleObject(overlapped->write_event, INFINITE); +#endif result = WriteFile(overlapped->handle, buffer, size, &write_size, &overlapped->write_overlapped); +#ifdef TUN_ASYNCHRONOUS_WRITES + /* FIXME: we can't sensibly set write_size here, without waiting + * for the IO to complete! Moreover, we can't return zero, + * because that will disable receive on this interface, and we + * also can't assume it will succeed and return the full size, + * because that will result in the buffer being reclaimed while + * the IO is in progress. */ +#error Async writes are broken. Please disable TUN_ASYNCHRONOUS_WRITES. +#else /* !TUN_ASYNCHRONOUS_WRITES */ if (!result) { - switch (error = GetLastError()) - { - case ERROR_IO_PENDING: -#ifndef TUN_ASYNCHRONOUS_WRITES - WaitForSingleObject(overlapped->write_event, INFINITE); -#endif - break; - default: - return -1; + error = GetLastError(); + if (error == ERROR_IO_PENDING) { + result = GetOverlappedResult(overlapped->handle, + &overlapped->write_overlapped, + &write_size, TRUE); } } +#endif + + if (!result) { +#ifdef DEBUG_TAP_WIN32 + LPTSTR msgbuf; + error = GetLastError(); + FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER|FORMAT_MESSAGE_FROM_SYSTEM, + NULL, error, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), + &msgbuf, 0, NULL); + fprintf(stderr, "Tap-Win32: Error WriteFile %d - %s\n", error, msgbuf); + LocalFree(msgbuf); +#endif + return 0; + } return write_size; } @@ -777,8 +804,8 @@ int net_init_tap(const NetClientOptions *opts, const char *name, /* FIXME error_setg(errp, ...) on failure */ const NetdevTapOptions *tap; - assert(opts->kind == NET_CLIENT_OPTIONS_KIND_TAP); - tap = opts->tap; + assert(opts->type == NET_CLIENT_OPTIONS_KIND_TAP); + tap = opts->u.tap; if (!tap->has_ifname) { error_report("tap: no interface name"); @@ -624,8 +624,8 @@ int net_init_bridge(const NetClientOptions *opts, const char *name, TAPState *s; int fd, vnet_hdr; - assert(opts->kind == NET_CLIENT_OPTIONS_KIND_BRIDGE); - bridge = opts->bridge; + assert(opts->type == NET_CLIENT_OPTIONS_KIND_BRIDGE); + bridge = opts->u.bridge; helper = bridge->has_helper ? bridge->helper : DEFAULT_BRIDGE_HELPER; br = bridge->has_br ? bridge->br : DEFAULT_BRIDGE_INTERFACE; @@ -787,8 +787,8 @@ int net_init_tap(const NetClientOptions *opts, const char *name, const char *vhostfdname; char ifname[128]; - assert(opts->kind == NET_CLIENT_OPTIONS_KIND_TAP); - tap = opts->tap; + assert(opts->type == NET_CLIENT_OPTIONS_KIND_TAP); + tap = opts->u.tap; queues = tap->has_queues ? tap->queues : 1; vhostfdname = tap->has_vhostfd ? tap->vhostfd : NULL; /* QEMU vlans does not support multiqueue tap, in this case peer is set. @@ -115,8 +115,8 @@ int net_init_vde(const NetClientOptions *opts, const char *name, /* FIXME error_setg(errp, ...) on failure */ const NetdevVdeOptions *vde; - assert(opts->kind == NET_CLIENT_OPTIONS_KIND_VDE); - vde = opts->vde; + assert(opts->type == NET_CLIENT_OPTIONS_KIND_VDE); + vde = opts->u.vde; /* missing optional values have been initialized to "all bits zero" */ if (net_vde_init(peer, "vde", name, vde->sock, vde->port, vde->group, diff --git a/net/vhost-user.c b/net/vhost-user.c index 93dcecd664..b368a90219 100644 --- a/net/vhost-user.c +++ b/net/vhost-user.c @@ -14,6 +14,8 @@ #include "sysemu/char.h" #include "qemu/config-file.h" #include "qemu/error-report.h" +#include "qmp-commands.h" +#include "trace.h" typedef struct VhostUserState { NetClientState nc; @@ -39,37 +41,106 @@ static int vhost_user_running(VhostUserState *s) return (s->vhost_net) ? 1 : 0; } -static int vhost_user_start(VhostUserState *s) +static void vhost_user_stop(int queues, NetClientState *ncs[]) { - VhostNetOptions options; + VhostUserState *s; + int i; - if (vhost_user_running(s)) { - return 0; + for (i = 0; i < queues; i++) { + assert (ncs[i]->info->type == NET_CLIENT_OPTIONS_KIND_VHOST_USER); + + s = DO_UPCAST(VhostUserState, nc, ncs[i]); + if (!vhost_user_running(s)) { + continue; + } + + if (s->vhost_net) { + vhost_net_cleanup(s->vhost_net); + s->vhost_net = NULL; + } } +} + +static int vhost_user_start(int queues, NetClientState *ncs[]) +{ + VhostNetOptions options; + VhostUserState *s; + int max_queues; + int i; options.backend_type = VHOST_BACKEND_TYPE_USER; - options.net_backend = &s->nc; - options.opaque = s->chr; - s->vhost_net = vhost_net_init(&options); + for (i = 0; i < queues; i++) { + assert (ncs[i]->info->type == NET_CLIENT_OPTIONS_KIND_VHOST_USER); + + s = DO_UPCAST(VhostUserState, nc, ncs[i]); + if (vhost_user_running(s)) { + continue; + } + + options.net_backend = ncs[i]; + options.opaque = s->chr; + s->vhost_net = vhost_net_init(&options); + if (!s->vhost_net) { + error_report("failed to init vhost_net for queue %d\n", i); + goto err; + } + + if (i == 0) { + max_queues = vhost_net_get_max_queues(s->vhost_net); + if (queues > max_queues) { + error_report("you are asking more queues than " + "supported: %d\n", max_queues); + goto err; + } + } + } - return vhost_user_running(s) ? 0 : -1; + return 0; + +err: + vhost_user_stop(i + 1, ncs); + return -1; } -static void vhost_user_stop(VhostUserState *s) +static ssize_t vhost_user_receive(NetClientState *nc, const uint8_t *buf, + size_t size) { - if (vhost_user_running(s)) { - vhost_net_cleanup(s->vhost_net); + /* In case of RARP (message size is 60) notify backup to send a fake RARP. + This fake RARP will be sent by backend only for guest + without GUEST_ANNOUNCE capability. + */ + if (size == 60) { + VhostUserState *s = DO_UPCAST(VhostUserState, nc, nc); + int r; + static int display_rarp_failure = 1; + char mac_addr[6]; + + /* extract guest mac address from the RARP message */ + memcpy(mac_addr, &buf[6], 6); + + r = vhost_net_notify_migration_done(s->vhost_net, mac_addr); + + if ((r != 0) && (display_rarp_failure)) { + fprintf(stderr, + "Vhost user backend fails to broadcast fake RARP\n"); + fflush(stderr); + display_rarp_failure = 0; + } } - s->vhost_net = 0; + return size; } static void vhost_user_cleanup(NetClientState *nc) { VhostUserState *s = DO_UPCAST(VhostUserState, nc, nc); - vhost_user_stop(s); + if (s->vhost_net) { + vhost_net_cleanup(s->vhost_net); + s->vhost_net = NULL; + } + qemu_purge_queued_packets(nc); } @@ -90,64 +161,64 @@ static bool vhost_user_has_ufo(NetClientState *nc) static NetClientInfo net_vhost_user_info = { .type = NET_CLIENT_OPTIONS_KIND_VHOST_USER, .size = sizeof(VhostUserState), + .receive = vhost_user_receive, .cleanup = vhost_user_cleanup, .has_vnet_hdr = vhost_user_has_vnet_hdr, .has_ufo = vhost_user_has_ufo, }; -static void net_vhost_link_down(VhostUserState *s, bool link_down) -{ - s->nc.link_down = link_down; - - if (s->nc.peer) { - s->nc.peer->link_down = link_down; - } - - if (s->nc.info->link_status_changed) { - s->nc.info->link_status_changed(&s->nc); - } - - if (s->nc.peer && s->nc.peer->info->link_status_changed) { - s->nc.peer->info->link_status_changed(s->nc.peer); - } -} - static void net_vhost_user_event(void *opaque, int event) { - VhostUserState *s = opaque; - + const char *name = opaque; + NetClientState *ncs[MAX_QUEUE_NUM]; + VhostUserState *s; + Error *err = NULL; + int queues; + + queues = qemu_find_net_clients_except(name, ncs, + NET_CLIENT_OPTIONS_KIND_NIC, + MAX_QUEUE_NUM); + s = DO_UPCAST(VhostUserState, nc, ncs[0]); + trace_vhost_user_event(s->chr->label, event); switch (event) { case CHR_EVENT_OPENED: - vhost_user_start(s); - net_vhost_link_down(s, false); - error_report("chardev \"%s\" went up", s->chr->label); + if (vhost_user_start(queues, ncs) < 0) { + exit(1); + } + qmp_set_link(name, true, &err); break; case CHR_EVENT_CLOSED: - net_vhost_link_down(s, true); - vhost_user_stop(s); - error_report("chardev \"%s\" went down", s->chr->label); + qmp_set_link(name, false, &err); + vhost_user_stop(queues, ncs); break; } + + if (err) { + error_report_err(err); + } } static int net_vhost_user_init(NetClientState *peer, const char *device, - const char *name, CharDriverState *chr) + const char *name, CharDriverState *chr, + int queues) { NetClientState *nc; VhostUserState *s; + int i; - nc = qemu_new_net_client(&net_vhost_user_info, peer, device, name); + for (i = 0; i < queues; i++) { + nc = qemu_new_net_client(&net_vhost_user_info, peer, device, name); - snprintf(nc->info_str, sizeof(nc->info_str), "vhost-user to %s", - chr->label); + snprintf(nc->info_str, sizeof(nc->info_str), "vhost-user%d to %s", + i, chr->label); - s = DO_UPCAST(VhostUserState, nc, nc); + nc->queue_index = i; - /* We don't provide a receive callback */ - s->nc.receive_disabled = 1; - s->chr = chr; + s = DO_UPCAST(VhostUserState, nc, nc); + s->chr = chr; + } - qemu_chr_add_handlers(s->chr, NULL, NULL, net_vhost_user_event, s); + qemu_chr_add_handlers(chr, NULL, NULL, net_vhost_user_event, (void*)name); return 0; } @@ -226,11 +297,12 @@ static int net_vhost_check_net(void *opaque, QemuOpts *opts, Error **errp) int net_init_vhost_user(const NetClientOptions *opts, const char *name, NetClientState *peer, Error **errp) { + int queues; const NetdevVhostUserOptions *vhost_user_opts; CharDriverState *chr; - assert(opts->kind == NET_CLIENT_OPTIONS_KIND_VHOST_USER); - vhost_user_opts = opts->vhost_user; + assert(opts->type == NET_CLIENT_OPTIONS_KIND_VHOST_USER); + vhost_user_opts = opts->u.vhost_user; chr = net_vhost_parse_chardev(vhost_user_opts, errp); if (!chr) { @@ -243,6 +315,12 @@ int net_init_vhost_user(const NetClientOptions *opts, const char *name, return -1; } + queues = vhost_user_opts->has_queues ? vhost_user_opts->queues : 1; + if (queues < 1) { + error_setg(errp, + "vhost-user number of queues must be bigger than zero"); + return -1; + } - return net_vhost_user_init(peer, "vhost_user", name, chr); + return net_vhost_user_init(peer, "vhost_user", name, chr, queues); } |