Merge branch 'sandbox/xuwc/devel-riscv-py2' into devel-py2

Change-Id: I4afa9c5351396b4aa3a45713523b38de970dbe4a Signed-off-by: wanchao-xu <wanchao.xu@samsung.com>
author: wanchao-xu <wanchao.xu@samsung.com> 2024-02-23 09:56:11 +0800
committer: wanchao-xu <wanchao.xu@samsung.com> 2024-02-23 09:56:29 +0800
commit: d39f72b68d5a576b5cd2178a40e2e4013ae14398 (patch)
tree: e5cd61a749edfcd55a69209f0eb34abb66a38e67 /net
parent: 0889ee8339e51dfdf78c39a8f15b6e5fe5ab49d5 (diff)
parent: e021b4728ed2aea486975a63a342ece1d2178b18 (diff)
download: qemu-arm-static-d39f72b68d5a576b5cd2178a40e2e4013ae14398.tar.gz
qemu-arm-static-d39f72b68d5a576b5cd2178a40e2e4013ae14398.tar.bz2
qemu-arm-static-d39f72b68d5a576b5cd2178a40e2e4013ae14398.zip
42 files changed, 8682 insertions, 1386 deletions
diff --git a/net/Makefile.objs b/net/Makefile.objs
index 4854a14fe..c5d076d19 100644
--- a/net/Makefile.objs
+++ b/net/Makefile.objs
@@ -2,12 +2,31 @@ common-obj-y = net.o queue.o checksum.o util.o hub.o
 common-obj-y += socket.o
 common-obj-y += dump.o
 common-obj-y += eth.o
-common-obj-$(CONFIG_POSIX) += tap.o
-common-obj-$(CONFIG_LINUX) += tap-linux.o
-common-obj-$(CONFIG_WIN32) += tap-win32.o
-common-obj-$(CONFIG_BSD) += tap-bsd.o
-common-obj-$(CONFIG_SOLARIS) += tap-solaris.o
-common-obj-$(CONFIG_AIX) += tap-aix.o
-common-obj-$(CONFIG_HAIKU) += tap-haiku.o
+common-obj-y += announce.o
+common-obj-$(CONFIG_L2TPV3) += l2tpv3.o
+common-obj-$(call land,$(CONFIG_VIRTIO_NET),$(CONFIG_VHOST_NET_USER)) += vhost-user.o
+common-obj-$(call land,$(call lnot,$(CONFIG_VIRTIO_NET)),$(CONFIG_VHOST_NET_USER)) += vhost-user-stub.o
+common-obj-$(CONFIG_ALL) += vhost-user-stub.o
 common-obj-$(CONFIG_SLIRP) += slirp.o
+slirp.o-cflags := $(SLIRP_CFLAGS)
+slirp.o-libs := $(SLIRP_LIBS)
 common-obj-$(CONFIG_VDE) += vde.o
+common-obj-$(CONFIG_NETMAP) += netmap.o
+common-obj-y += filter.o
+common-obj-y += filter-buffer.o
+common-obj-y += filter-mirror.o
+common-obj-y += colo-compare.o
+common-obj-y += colo.o
+common-obj-y += filter-rewriter.o
+common-obj-y += filter-replay.o
+
+tap-obj-$(CONFIG_LINUX) = tap-linux.o
+tap-obj-$(CONFIG_BSD) = tap-bsd.o
+tap-obj-$(CONFIG_SOLARIS) = tap-solaris.o
+tap-obj-y ?= tap-stub.o
+common-obj-$(CONFIG_POSIX) += tap.o $(tap-obj-y)
+common-obj-$(CONFIG_WIN32) += tap-win32.o
+
+vde.o-libs = $(VDE_LIBS)
+
+common-obj-$(CONFIG_CAN_BUS) += can/
diff --git a/net/announce.c b/net/announce.c
new file mode 100644
index 000000000..db90d3bd4
--- /dev/null
+++ b/net/announce.c
@@ -0,0 +1,203 @@
+/*
+ *  Self-announce
+ *  (c) 2017-2019 Red Hat, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "net/announce.h"
+#include "net/net.h"
+#include "qapi/clone-visitor.h"
+#include "qapi/qapi-visit-net.h"
+#include "qapi/qapi-commands-net.h"
+#include "trace.h"
+
+static GData *named_timers;
+
+int64_t qemu_announce_timer_step(AnnounceTimer *timer)
+{
+    int64_t step;
+
+    step =  timer->params.initial +
+            (timer->params.rounds - timer->round - 1) *
+            timer->params.step;
+
+    if (step < 0 || step > timer->params.max) {
+        step = timer->params.max;
+    }
+    timer_mod(timer->tm, qemu_clock_get_ms(timer->type) + step);
+
+    return step;
+}
+
+/*
+ * If 'free_named' is true, then remove the timer from the list
+ * and free the timer itself.
+ */
+void qemu_announce_timer_del(AnnounceTimer *timer, bool free_named)
+{
+    bool free_timer = false;
+    if (timer->tm) {
+        timer_del(timer->tm);
+        timer_free(timer->tm);
+        timer->tm = NULL;
+    }
+    qapi_free_strList(timer->params.interfaces);
+    timer->params.interfaces = NULL;
+    if (free_named && timer->params.has_id) {
+        AnnounceTimer *list_timer;
+        /*
+         * Sanity check: There should only be one timer on the list with
+         * the id.
+         */
+        list_timer = g_datalist_get_data(&named_timers, timer->params.id);
+        assert(timer == list_timer);
+        free_timer = true;
+        g_datalist_remove_data(&named_timers, timer->params.id);
+    }
+    trace_qemu_announce_timer_del(free_named, free_timer, timer->params.id);
+    g_free(timer->params.id);
+    timer->params.id = NULL;
+
+    if (free_timer) {
+        g_free(timer);
+    }
+}
+
+/*
+ * Under BQL/main thread
+ * Reset the timer to the given parameters/type/notifier.
+ */
+void qemu_announce_timer_reset(AnnounceTimer *timer,
+                               AnnounceParameters *params,
+                               QEMUClockType type,
+                               QEMUTimerCB *cb,
+                               void *opaque)
+{
+    /*
+     * We're under the BQL, so the current timer can't
+     * be firing, so we should be able to delete it.
+     */
+    qemu_announce_timer_del(timer, false);
+
+    QAPI_CLONE_MEMBERS(AnnounceParameters, &timer->params, params);
+    timer->round = params->rounds;
+    timer->type = type;
+    timer->tm = timer_new_ms(type, cb, opaque);
+}
+
+#ifndef ETH_P_RARP
+#define ETH_P_RARP 0x8035
+#endif
+#define ARP_HTYPE_ETH 0x0001
+#define ARP_PTYPE_IP 0x0800
+#define ARP_OP_REQUEST_REV 0x3
+
+static int announce_self_create(uint8_t *buf,
+                                uint8_t *mac_addr)
+{
+    /* Ethernet header. */
+    memset(buf, 0xff, 6);         /* destination MAC addr */
+    memcpy(buf + 6, mac_addr, 6); /* source MAC addr */
+    *(uint16_t *)(buf + 12) = htons(ETH_P_RARP); /* ethertype */
+
+    /* RARP header. */
+    *(uint16_t *)(buf + 14) = htons(ARP_HTYPE_ETH); /* hardware addr space */
+    *(uint16_t *)(buf + 16) = htons(ARP_PTYPE_IP); /* protocol addr space */
+    *(buf + 18) = 6; /* hardware addr length (ethernet) */
+    *(buf + 19) = 4; /* protocol addr length (IPv4) */
+    *(uint16_t *)(buf + 20) = htons(ARP_OP_REQUEST_REV); /* opcode */
+    memcpy(buf + 22, mac_addr, 6); /* source hw addr */
+    memset(buf + 28, 0x00, 4);     /* source protocol addr */
+    memcpy(buf + 32, mac_addr, 6); /* target hw addr */
+    memset(buf + 38, 0x00, 4);     /* target protocol addr */
+
+    /* Padding to get up to 60 bytes (ethernet min packet size, minus FCS). */
+    memset(buf + 42, 0x00, 18);
+
+    return 60; /* len (FCS will be added by hardware) */
+}
+
+static void qemu_announce_self_iter(NICState *nic, void *opaque)
+{
+    AnnounceTimer *timer = opaque;
+    uint8_t buf[60];
+    int len;
+    bool skip;
+
+    if (timer->params.has_interfaces) {
+        strList *entry = timer->params.interfaces;
+        /* Skip unless we find our name in the requested list */
+        skip = true;
+
+        while (entry) {
+            if (!strcmp(entry->value, nic->ncs->name)) {
+                /* Found us */
+                skip = false;
+                break;
+            }
+            entry = entry->next;
+        }
+    } else {
+        skip = false;
+    }
+
+    trace_qemu_announce_self_iter(timer->params.has_id ? timer->params.id : "_",
+                                  nic->ncs->name,
+                                  qemu_ether_ntoa(&nic->conf->macaddr), skip);
+
+    if (!skip) {
+        len = announce_self_create(buf, nic->conf->macaddr.a);
+
+        qemu_send_packet_raw(qemu_get_queue(nic), buf, len);
+
+        /* if the NIC provides it's own announcement support, use it as well */
+        if (nic->ncs->info->announce) {
+            nic->ncs->info->announce(nic->ncs);
+        }
+    }
+}
+static void qemu_announce_self_once(void *opaque)
+{
+    AnnounceTimer *timer = (AnnounceTimer *)opaque;
+
+    qemu_foreach_nic(qemu_announce_self_iter, timer);
+
+    if (--timer->round) {
+        qemu_announce_timer_step(timer);
+    } else {
+        qemu_announce_timer_del(timer, true);
+    }
+}
+
+void qemu_announce_self(AnnounceTimer *timer, AnnounceParameters *params)
+{
+    qemu_announce_timer_reset(timer, params, QEMU_CLOCK_REALTIME,
+                              qemu_announce_self_once, timer);
+    if (params->rounds) {
+        qemu_announce_self_once(timer);
+    } else {
+        qemu_announce_timer_del(timer, true);
+    }
+}
+
+void qmp_announce_self(AnnounceParameters *params, Error **errp)
+{
+    AnnounceTimer *named_timer;
+    if (!params->has_id) {
+        params->id = g_strdup("");
+        params->has_id = true;
+    }
+
+    named_timer = g_datalist_get_data(&named_timers, params->id);
+
+    if (!named_timer) {
+        named_timer = g_new0(AnnounceTimer, 1);
+        g_datalist_set_data(&named_timers, params->id, named_timer);
+    }
+
+    qemu_announce_self(named_timer, params);
+}
diff --git a/net/can/Makefile.objs b/net/can/Makefile.objs
new file mode 100644
index 000000000..9f35dc5c8
--- /dev/null
+++ b/net/can/Makefile.objs
@@ -0,0 +1,2 @@
+common-obj-y += can_core.o can_host.o
+common-obj-$(CONFIG_LINUX) += can_socketcan.o
diff --git a/net/can/can_core.c b/net/can/can_core.c
new file mode 100644
index 000000000..90f4d8576
--- /dev/null
+++ b/net/can/can_core.c
@@ -0,0 +1,140 @@
+/*
+ * CAN common CAN bus emulation support
+ *
+ * Copyright (c) 2013-2014 Jin Yang
+ * Copyright (c) 2014-2018 Pavel Pisa
+ *
+ * Initial development supported by Google GSoC 2013 from RTEMS project slot
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "chardev/char.h"
+#include "qemu/module.h"
+#include "qemu/sockets.h"
+#include "qapi/error.h"
+#include "net/can_emu.h"
+#include "qom/object_interfaces.h"
+
+struct CanBusState {
+    Object object;
+
+    QTAILQ_HEAD(, CanBusClientState) clients;
+};
+
+static void can_bus_instance_init(Object *object)
+{
+    CanBusState *bus = (CanBusState *)object;
+
+    QTAILQ_INIT(&bus->clients);
+}
+
+int can_bus_insert_client(CanBusState *bus, CanBusClientState *client)
+{
+    client->bus = bus;
+    QTAILQ_INSERT_TAIL(&bus->clients, client, next);
+    return 0;
+}
+
+int can_bus_remove_client(CanBusClientState *client)
+{
+    CanBusState *bus = client->bus;
+    if (bus == NULL) {
+        return 0;
+    }
+
+    QTAILQ_REMOVE(&bus->clients, client, next);
+    client->bus = NULL;
+    return 1;
+}
+
+ssize_t can_bus_client_send(CanBusClientState *client,
+             const struct qemu_can_frame *frames, size_t frames_cnt)
+{
+    int ret = 0;
+    CanBusState *bus = client->bus;
+    CanBusClientState *peer;
+    if (bus == NULL) {
+        return -1;
+    }
+
+    QTAILQ_FOREACH(peer, &bus->clients, next) {
+        if (peer->info->can_receive(peer)) {
+            if (peer == client) {
+                /* No loopback support for now */
+                continue;
+            }
+            if (peer->info->receive(peer, frames, frames_cnt) > 0) {
+                ret = 1;
+            }
+        }
+    }
+
+    return ret;
+}
+
+int can_bus_filter_match(struct qemu_can_filter *filter, qemu_canid_t can_id)
+{
+    int m;
+    if (((can_id | filter->can_mask) & QEMU_CAN_ERR_FLAG)) {
+        return (filter->can_mask & QEMU_CAN_ERR_FLAG) != 0;
+    }
+    m = (can_id & filter->can_mask) == (filter->can_id & filter->can_mask);
+    return filter->can_id & QEMU_CAN_INV_FILTER ? !m : m;
+}
+
+int can_bus_client_set_filters(CanBusClientState *client,
+             const struct qemu_can_filter *filters, size_t filters_cnt)
+{
+    return 0;
+}
+
+
+static bool can_bus_can_be_deleted(UserCreatable *uc)
+{
+    return false;
+}
+
+static void can_bus_class_init(ObjectClass *klass,
+                                void *class_data G_GNUC_UNUSED)
+{
+    UserCreatableClass *uc_klass = USER_CREATABLE_CLASS(klass);
+
+    uc_klass->can_be_deleted = can_bus_can_be_deleted;
+}
+
+static const TypeInfo can_bus_info = {
+    .parent = TYPE_OBJECT,
+    .name = TYPE_CAN_BUS,
+    .instance_size = sizeof(CanBusState),
+    .instance_init = can_bus_instance_init,
+    .class_init = can_bus_class_init,
+    .interfaces = (InterfaceInfo[]) {
+        { TYPE_USER_CREATABLE },
+        { }
+    }
+};
+
+static void can_bus_register_types(void)
+{
+    type_register_static(&can_bus_info);
+}
+
+type_init(can_bus_register_types);
diff --git a/net/can/can_host.c b/net/can/can_host.c
new file mode 100644
index 000000000..1dfaf0ced
--- /dev/null
+++ b/net/can/can_host.c
@@ -0,0 +1,114 @@
+/*
+ * CAN generic CAN host connection support
+ *
+ * Copyright (c) 2013-2014 Jin Yang
+ * Copyright (c) 2014-2018 Pavel Pisa
+ *
+ * Initial development supported by Google GSoC 2013 from RTEMS project slot
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "chardev/char.h"
+#include "qemu/module.h"
+#include "qemu/sockets.h"
+#include "qapi/error.h"
+#include "qom/object_interfaces.h"
+#include "net/can_emu.h"
+#include "net/can_host.h"
+
+struct CanBusState {
+    Object object;
+
+    QTAILQ_HEAD(, CanBusClientState) clients;
+};
+
+static void can_host_disconnect(CanHostState *ch)
+{
+    CanHostClass *chc = CAN_HOST_GET_CLASS(ch);
+
+    can_bus_remove_client(&ch->bus_client);
+    chc->disconnect(ch);
+}
+
+static void can_host_connect(CanHostState *ch, Error **errp)
+{
+    CanHostClass *chc = CAN_HOST_GET_CLASS(ch);
+    Error *local_err = NULL;
+
+    chc->connect(ch, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    can_bus_insert_client(ch->bus, &ch->bus_client);
+}
+
+static void can_host_unparent(Object *obj)
+{
+    can_host_disconnect(CAN_HOST(obj));
+}
+
+static void can_host_complete(UserCreatable *uc, Error **errp)
+{
+    can_host_connect(CAN_HOST(uc), errp);
+}
+
+static void can_host_instance_init(Object *obj)
+{
+    CanHostState *ch = CAN_HOST(obj);
+
+    object_property_add_link(obj, "canbus", TYPE_CAN_BUS,
+                             (Object **)&ch->bus,
+                             object_property_allow_set_link,
+                             OBJ_PROP_LINK_STRONG,
+                             &error_abort);
+}
+
+static void can_host_class_init(ObjectClass *klass,
+                                void *class_data G_GNUC_UNUSED)
+{
+    UserCreatableClass *uc_klass = USER_CREATABLE_CLASS(klass);
+
+    klass->unparent = can_host_unparent;
+    uc_klass->complete = can_host_complete;
+}
+
+static const TypeInfo can_host_info = {
+    .parent = TYPE_OBJECT,
+    .name = TYPE_CAN_HOST,
+    .instance_size = sizeof(CanHostState),
+    .class_size = sizeof(CanHostClass),
+    .abstract = true,
+    .instance_init = can_host_instance_init,
+    .class_init = can_host_class_init,
+    .interfaces = (InterfaceInfo[]) {
+        { TYPE_USER_CREATABLE },
+        { }
+    }
+};
+
+static void can_host_register_types(void)
+{
+    type_register_static(&can_host_info);
+}
+
+type_init(can_host_register_types);
diff --git a/net/can/can_socketcan.c b/net/can/can_socketcan.c
new file mode 100644
index 000000000..8a6ffad40
--- /dev/null
+++ b/net/can/can_socketcan.c
@@ -0,0 +1,289 @@
+/*
+ * CAN c support to connect to the Linux host SocketCAN interfaces
+ *
+ * Copyright (c) 2013-2014 Jin Yang
+ * Copyright (c) 2014-2018 Pavel Pisa
+ *
+ * Initial development supported by Google GSoC 2013 from RTEMS project slot
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "qemu/main-loop.h"
+#include "qemu/module.h"
+#include "qapi/error.h"
+#include "chardev/char.h"
+#include "qemu/sockets.h"
+#include "qemu/error-report.h"
+#include "net/can_emu.h"
+#include "net/can_host.h"
+
+#include <sys/ioctl.h>
+#include <net/if.h>
+#include <linux/can.h>
+#include <linux/can/raw.h>
+
+#ifndef DEBUG_CAN
+#define DEBUG_CAN 0
+#endif /*DEBUG_CAN*/
+
+#define TYPE_CAN_HOST_SOCKETCAN "can-host-socketcan"
+#define CAN_HOST_SOCKETCAN(obj) \
+     OBJECT_CHECK(CanHostSocketCAN, (obj), TYPE_CAN_HOST_SOCKETCAN)
+
+#define CAN_READ_BUF_LEN  5
+typedef struct CanHostSocketCAN {
+    CanHostState       parent;
+    char               *ifname;
+
+    qemu_can_filter    *rfilter;
+    int                rfilter_num;
+    can_err_mask_t     err_mask;
+
+    qemu_can_frame     buf[CAN_READ_BUF_LEN];
+    int                bufcnt;
+    int                bufptr;
+
+    int                fd;
+} CanHostSocketCAN;
+
+/* Check that QEMU and Linux kernel flags encoding and structure matches */
+QEMU_BUILD_BUG_ON(QEMU_CAN_EFF_FLAG != CAN_EFF_FLAG);
+QEMU_BUILD_BUG_ON(QEMU_CAN_RTR_FLAG != CAN_RTR_FLAG);
+QEMU_BUILD_BUG_ON(QEMU_CAN_ERR_FLAG != CAN_ERR_FLAG);
+QEMU_BUILD_BUG_ON(QEMU_CAN_INV_FILTER != CAN_INV_FILTER);
+QEMU_BUILD_BUG_ON(offsetof(qemu_can_frame, data)
+                  != offsetof(struct can_frame, data));
+
+static void can_host_socketcan_display_msg(struct qemu_can_frame *msg)
+{
+    int i;
+
+    qemu_log_lock();
+    qemu_log("[cansocketcan]: %03X [%01d] %s %s",
+             msg->can_id & QEMU_CAN_EFF_MASK,
+             msg->can_dlc,
+             msg->can_id & QEMU_CAN_EFF_FLAG ? "EFF" : "SFF",
+             msg->can_id & QEMU_CAN_RTR_FLAG ? "RTR" : "DAT");
+
+    for (i = 0; i < msg->can_dlc; i++) {
+        qemu_log(" %02X", msg->data[i]);
+    }
+    qemu_log("\n");
+    qemu_log_flush();
+    qemu_log_unlock();
+}
+
+static void can_host_socketcan_read(void *opaque)
+{
+    CanHostSocketCAN *c = opaque;
+    CanHostState *ch = CAN_HOST(c);
+
+    /* CAN_READ_BUF_LEN for multiple messages syscall is possible for future */
+    c->bufcnt = read(c->fd, c->buf, sizeof(qemu_can_frame));
+    if (c->bufcnt < 0) {
+        warn_report("CAN bus host read failed (%s)", strerror(errno));
+        return;
+    }
+
+    can_bus_client_send(&ch->bus_client, c->buf, 1);
+
+    if (DEBUG_CAN) {
+        can_host_socketcan_display_msg(c->buf);
+    }
+}
+
+static int can_host_socketcan_can_receive(CanBusClientState *client)
+{
+    return 1;
+}
+
+static ssize_t can_host_socketcan_receive(CanBusClientState *client,
+                            const qemu_can_frame *frames, size_t frames_cnt)
+{
+    CanHostState *ch = container_of(client, CanHostState, bus_client);
+    CanHostSocketCAN *c = CAN_HOST_SOCKETCAN(ch);
+
+    size_t len = sizeof(qemu_can_frame);
+    int res;
+
+    if (c->fd < 0) {
+        return -1;
+    }
+
+    res = write(c->fd, frames, len);
+
+    if (!res) {
+        warn_report("[cansocketcan]: write message to host returns zero");
+        return -1;
+    }
+
+    if (res != len) {
+        if (res < 0) {
+            warn_report("[cansocketcan]: write to host failed (%s)",
+                        strerror(errno));
+        } else {
+            warn_report("[cansocketcan]: write to host truncated");
+        }
+        return -1;
+    }
+
+    return 1;
+}
+
+static void can_host_socketcan_disconnect(CanHostState *ch)
+{
+    CanHostSocketCAN *c = CAN_HOST_SOCKETCAN(ch);
+
+    if (c->fd >= 0) {
+        qemu_set_fd_handler(c->fd, NULL, NULL, c);
+        close(c->fd);
+        c->fd = -1;
+    }
+
+    g_free(c->rfilter);
+    c->rfilter = NULL;
+    c->rfilter_num = 0;
+}
+
+static CanBusClientInfo can_host_socketcan_bus_client_info = {
+    .can_receive = can_host_socketcan_can_receive,
+    .receive = can_host_socketcan_receive,
+};
+
+static void can_host_socketcan_connect(CanHostState *ch, Error **errp)
+{
+    CanHostSocketCAN *c = CAN_HOST_SOCKETCAN(ch);
+    int s; /* can raw socket */
+    struct sockaddr_can addr;
+    struct ifreq ifr;
+
+    /* open socket */
+    s = qemu_socket(PF_CAN, SOCK_RAW, CAN_RAW);
+    if (s < 0) {
+        error_setg_errno(errp, errno, "failed to create CAN_RAW socket");
+        return;
+    }
+
+    addr.can_family = AF_CAN;
+    memset(&ifr.ifr_name, 0, sizeof(ifr.ifr_name));
+    strcpy(ifr.ifr_name, c->ifname);
+    if (ioctl(s, SIOCGIFINDEX, &ifr) < 0) {
+        error_setg_errno(errp, errno,
+                         "SocketCAN host interface %s not available", c->ifname);
+        goto fail;
+    }
+    addr.can_ifindex = ifr.ifr_ifindex;
+
+    c->err_mask = 0xffffffff; /* Receive error frame. */
+    setsockopt(s, SOL_CAN_RAW, CAN_RAW_ERR_FILTER,
+                   &c->err_mask, sizeof(c->err_mask));
+
+    c->rfilter_num = 1;
+    c->rfilter = g_new(struct qemu_can_filter, c->rfilter_num);
+
+    /* Receive all data frame. If |= CAN_INV_FILTER no data. */
+    c->rfilter[0].can_id = 0;
+    c->rfilter[0].can_mask = 0;
+    c->rfilter[0].can_mask &= ~CAN_ERR_FLAG;
+
+    setsockopt(s, SOL_CAN_RAW, CAN_RAW_FILTER, c->rfilter,
+               c->rfilter_num * sizeof(struct qemu_can_filter));
+
+    if (bind(s, (struct sockaddr *)&addr, sizeof(addr)) < 0) {
+        error_setg_errno(errp, errno, "failed to bind to host interface %s",
+                         c->ifname);
+        goto fail;
+    }
+
+    c->fd = s;
+    ch->bus_client.info = &can_host_socketcan_bus_client_info;
+    qemu_set_fd_handler(c->fd, can_host_socketcan_read, NULL, c);
+    return;
+
+fail:
+    close(s);
+    g_free(c->rfilter);
+    c->rfilter = NULL;
+    c->rfilter_num = 0;
+}
+
+static char *can_host_socketcan_get_if(Object *obj, Error **errp)
+{
+    CanHostSocketCAN *c = CAN_HOST_SOCKETCAN(obj);
+
+    return g_strdup(c->ifname);
+}
+
+static void can_host_socketcan_set_if(Object *obj, const char *value, Error **errp)
+{
+    CanHostSocketCAN *c = CAN_HOST_SOCKETCAN(obj);
+    struct ifreq ifr;
+
+    if (strlen(value) >= sizeof(ifr.ifr_name)) {
+        error_setg(errp, "CAN interface name longer than %zd characters",
+                   sizeof(ifr.ifr_name) - 1);
+        return;
+    }
+
+    if (c->fd != -1) {
+        error_setg(errp, "CAN interface already connected");
+        return;
+    }
+
+    g_free(c->ifname);
+    c->ifname = g_strdup(value);
+}
+
+static void can_host_socketcan_instance_init(Object *obj)
+{
+    CanHostSocketCAN *c = CAN_HOST_SOCKETCAN(obj);
+
+    c->fd = -1;
+}
+
+static void can_host_socketcan_class_init(ObjectClass *klass,
+                                          void *class_data G_GNUC_UNUSED)
+{
+    CanHostClass *chc = CAN_HOST_CLASS(klass);
+
+    object_class_property_add_str(klass, "if",
+                                  can_host_socketcan_get_if,
+                                  can_host_socketcan_set_if,
+                                  &error_abort);
+    chc->connect = can_host_socketcan_connect;
+    chc->disconnect = can_host_socketcan_disconnect;
+}
+
+static const TypeInfo can_host_socketcan_info = {
+    .parent = TYPE_CAN_HOST,
+    .name = TYPE_CAN_HOST_SOCKETCAN,
+    .instance_size = sizeof(CanHostSocketCAN),
+    .instance_init = can_host_socketcan_instance_init,
+    .class_init = can_host_socketcan_class_init,
+};
+
+static void can_host_register_types(void)
+{
+    type_register_static(&can_host_socketcan_info);
+}
+
+type_init(can_host_register_types);
diff --git a/net/checksum.c b/net/checksum.c
index 14c08550e..aaa400023 100644
--- a/net/checksum.c
+++ b/net/checksum.c
@@ -15,31 +15,34 @@
  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
-#include "qemu-common.h"
+#include "qemu/osdep.h"
 #include "net/checksum.h"
-
-#define PROTO_TCP  6
-#define PROTO_UDP 17
+#include "net/eth.h"
 
 uint32_t net_checksum_add_cont(int len, uint8_t *buf, int seq)
 {
-    uint32_t sum = 0;
+    uint32_t sum1 = 0, sum2 = 0;
     int i;
 
-    for (i = seq; i < seq + len; i++) {
-        if (i & 1) {
-            sum += (uint32_t)buf[i - seq];
-        } else {
-            sum += (uint32_t)buf[i - seq] << 8;
-        }
+    for (i = 0; i < len - 1; i += 2) {
+        sum1 += (uint32_t)buf[i];
+        sum2 += (uint32_t)buf[i + 1];
+    }
+    if (i < len) {
+        sum1 += (uint32_t)buf[i];
+    }
+
+    if (seq & 1) {
+        return sum1 + (sum2 << 8);
+    } else {
+        return sum2 + (sum1 << 8);
     }
-    return sum;
 }
 
 uint16_t net_checksum_finish(uint32_t sum)
 {
     while (sum>>16)
-	sum = (sum & 0xFFFF)+(sum >> 16);
+        sum = (sum & 0xFFFF)+(sum >> 16);
     return ~sum;
 }
 
@@ -56,44 +59,118 @@ uint16_t net_checksum_tcpudp(uint16_t length, uint16_t proto,
 
 void net_checksum_calculate(uint8_t *data, int length)
 {
-    int hlen, plen, proto, csum_offset;
-    uint16_t csum;
-
-    if ((data[14] & 0xf0) != 0x40)
-	return; /* not IPv4 */
-    hlen  = (data[14] & 0x0f) * 4;
-    plen  = (data[16] << 8 | data[17]) - hlen;
-    proto = data[23];
-
-    switch (proto) {
-    case PROTO_TCP:
-	csum_offset = 16;
-	break;
-    case PROTO_UDP:
-	csum_offset = 6;
-	break;
+    int mac_hdr_len, ip_len;
+    struct ip_header *ip;
+
+    /*
+     * Note: We cannot assume "data" is aligned, so the all code uses
+     * some macros that take care of possible unaligned access for
+     * struct members (just in case).
+     */
+
+    /* Ensure we have at least an Eth header */
+    if (length < sizeof(struct eth_header)) {
+        return;
+    }
+
+    /* Handle the optionnal VLAN headers */
+    switch (lduw_be_p(&PKT_GET_ETH_HDR(data)->h_proto)) {
+    case ETH_P_VLAN:
+        mac_hdr_len = sizeof(struct eth_header) +
+                     sizeof(struct vlan_header);
+        break;
+    case ETH_P_DVLAN:
+        if (lduw_be_p(&PKT_GET_VLAN_HDR(data)->h_proto) == ETH_P_VLAN) {
+            mac_hdr_len = sizeof(struct eth_header) +
+                         2 * sizeof(struct vlan_header);
+        } else {
+            mac_hdr_len = sizeof(struct eth_header) +
+                         sizeof(struct vlan_header);
+        }
+        break;
     default:
-	return;
+        mac_hdr_len = sizeof(struct eth_header);
+        break;
+    }
+
+    length -= mac_hdr_len;
+
+    /* Now check we have an IP header (with an optionnal VLAN header) */
+    if (length < sizeof(struct ip_header)) {
+        return;
+    }
+
+    ip = (struct ip_header *)(data + mac_hdr_len);
+
+    if (IP_HEADER_VERSION(ip) != IP_HEADER_VERSION_4) {
+        return; /* not IPv4 */
+    }
+
+    ip_len = lduw_be_p(&ip->ip_len);
+
+    /* Last, check that we have enough data for the all IP frame */
+    if (length < ip_len) {
+        return;
+    }
+
+    ip_len -= IP_HDR_GET_LEN(ip);
+
+    switch (ip->ip_p) {
+    case IP_PROTO_TCP:
+    {
+        uint16_t csum;
+        tcp_header *tcp = (tcp_header *)(ip + 1);
+
+        if (ip_len < sizeof(tcp_header)) {
+            return;
+        }
+
+        /* Set csum to 0 */
+        stw_he_p(&tcp->th_sum, 0);
+
+        csum = net_checksum_tcpudp(ip_len, ip->ip_p,
+                                   (uint8_t *)&ip->ip_src,
+                                   (uint8_t *)tcp);
+
+        /* Store computed csum */
+        stw_be_p(&tcp->th_sum, csum);
+
+        break;
     }
+    case IP_PROTO_UDP:
+    {
+        uint16_t csum;
+        udp_header *udp = (udp_header *)(ip + 1);
+
+        if (ip_len < sizeof(udp_header)) {
+            return;
+        }
+
+        /* Set csum to 0 */
+        stw_he_p(&udp->uh_sum, 0);
 
-    if (plen < csum_offset+2)
-	return;
+        csum = net_checksum_tcpudp(ip_len, ip->ip_p,
+                                   (uint8_t *)&ip->ip_src,
+                                   (uint8_t *)udp);
 
-    data[14+hlen+csum_offset]   = 0;
-    data[14+hlen+csum_offset+1] = 0;
-    csum = net_checksum_tcpudp(plen, proto, data+14+12, data+14+hlen);
-    data[14+hlen+csum_offset]   = csum >> 8;
-    data[14+hlen+csum_offset+1] = csum & 0xff;
+        /* Store computed csum */
+        stw_be_p(&udp->uh_sum, csum);
+
+        break;
+    }
+    default:
+        /* Can't handle any other protocol */
+        break;
+    }
 }
 
 uint32_t
 net_checksum_add_iov(const struct iovec *iov, const unsigned int iov_cnt,
-                     uint32_t iov_off, uint32_t size)
+                     uint32_t iov_off, uint32_t size, uint32_t csum_offset)
 {
     size_t iovec_off, buf_off;
     unsigned int i;
     uint32_t res = 0;
-    uint32_t seq = 0;
 
     iovec_off = 0;
     buf_off = 0;
@@ -102,8 +179,8 @@ net_checksum_add_iov(const struct iovec *iov, const unsigned int iov_cnt,
             size_t len = MIN((iovec_off + iov[i].iov_len) - iov_off , size);
             void *chunk_buf = iov[i].iov_base + (iov_off - iovec_off);
 
-            res += net_checksum_add_cont(len, chunk_buf, seq);
-            seq += len;
+            res += net_checksum_add_cont(len, chunk_buf, csum_offset);
+            csum_offset += len;
 
             buf_off += len;
             iov_off += len;
diff --git a/net/clients.h b/net/clients.h
index 77932942b..a6ef267e1 100644
--- a/net/clients.h
+++ b/net/clients.h
@@ -25,31 +25,40 @@
 #define QEMU_NET_CLIENTS_H
 
 #include "net/net.h"
-#include "qapi-types.h"
 
-int net_init_dump(const NetClientOptions *opts, const char *name,
-                  NetClientState *peer);
+int net_init_dump(const Netdev *netdev, const char *name,
+                  NetClientState *peer, Error **errp);
 
 #ifdef CONFIG_SLIRP
-int net_init_slirp(const NetClientOptions *opts, const char *name,
-                   NetClientState *peer);
+int net_init_slirp(const Netdev *netdev, const char *name,
+                   NetClientState *peer, Error **errp);
 #endif
 
-int net_init_hubport(const NetClientOptions *opts, const char *name,
-                     NetClientState *peer);
+int net_init_hubport(const Netdev *netdev, const char *name,
+                     NetClientState *peer, Error **errp);
 
-int net_init_socket(const NetClientOptions *opts, const char *name,
-                    NetClientState *peer);
+int net_init_socket(const Netdev *netdev, const char *name,
+                    NetClientState *peer, Error **errp);
 
-int net_init_tap(const NetClientOptions *opts, const char *name,
-                 NetClientState *peer);
+int net_init_tap(const Netdev *netdev, const char *name,
+                 NetClientState *peer, Error **errp);
 
-int net_init_bridge(const NetClientOptions *opts, const char *name,
-                    NetClientState *peer);
+int net_init_bridge(const Netdev *netdev, const char *name,
+                    NetClientState *peer, Error **errp);
 
+int net_init_l2tpv3(const Netdev *netdev, const char *name,
+                    NetClientState *peer, Error **errp);
 #ifdef CONFIG_VDE
-int net_init_vde(const NetClientOptions *opts, const char *name,
-                 NetClientState *peer);
+int net_init_vde(const Netdev *netdev, const char *name,
+                 NetClientState *peer, Error **errp);
 #endif
 
+#ifdef CONFIG_NETMAP
+int net_init_netmap(const Netdev *netdev, const char *name,
+                    NetClientState *peer, Error **errp);
+#endif
+
+int net_init_vhost_user(const Netdev *netdev, const char *name,
+                        NetClientState *peer, Error **errp);
+
 #endif /* QEMU_NET_CLIENTS_H */
diff --git a/net/colo-compare.c b/net/colo-compare.c
new file mode 100644
index 000000000..7ee17f2cf
--- /dev/null
+++ b/net/colo-compare.c
@@ -0,0 +1,1259 @@
+/*
+ * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO)
+ * (a.k.a. Fault Tolerance or Continuous Replication)
+ *
+ * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
+ * Copyright (c) 2016 FUJITSU LIMITED
+ * Copyright (c) 2016 Intel Corporation
+ *
+ * Author: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "qemu/error-report.h"
+#include "trace.h"
+#include "qapi/error.h"
+#include "net/net.h"
+#include "net/eth.h"
+#include "qom/object_interfaces.h"
+#include "qemu/iov.h"
+#include "qom/object.h"
+#include "net/queue.h"
+#include "chardev/char-fe.h"
+#include "qemu/sockets.h"
+#include "colo.h"
+#include "sysemu/iothread.h"
+#include "net/colo-compare.h"
+#include "migration/colo.h"
+#include "migration/migration.h"
+#include "util.h"
+
+#define TYPE_COLO_COMPARE "colo-compare"
+#define COLO_COMPARE(obj) \
+    OBJECT_CHECK(CompareState, (obj), TYPE_COLO_COMPARE)
+
+static QTAILQ_HEAD(, CompareState) net_compares =
+       QTAILQ_HEAD_INITIALIZER(net_compares);
+
+static NotifierList colo_compare_notifiers =
+    NOTIFIER_LIST_INITIALIZER(colo_compare_notifiers);
+
+#define COMPARE_READ_LEN_MAX NET_BUFSIZE
+#define MAX_QUEUE_SIZE 1024
+
+#define COLO_COMPARE_FREE_PRIMARY     0x01
+#define COLO_COMPARE_FREE_SECONDARY   0x02
+
+/* TODO: Should be configurable */
+#define REGULAR_PACKET_CHECK_MS 3000
+
+static QemuMutex event_mtx;
+static QemuCond event_complete_cond;
+static int event_unhandled_count;
+
+/*
+ *  + CompareState ++
+ *  |               |
+ *  +---------------+   +---------------+         +---------------+
+ *  |   conn list   + - >      conn     + ------- >      conn     + -- > ......
+ *  +---------------+   +---------------+         +---------------+
+ *  |               |     |           |             |          |
+ *  +---------------+ +---v----+  +---v----+    +---v----+ +---v----+
+ *                    |primary |  |secondary    |primary | |secondary
+ *                    |packet  |  |packet  +    |packet  | |packet  +
+ *                    +--------+  +--------+    +--------+ +--------+
+ *                        |           |             |          |
+ *                    +---v----+  +---v----+    +---v----+ +---v----+
+ *                    |primary |  |secondary    |primary | |secondary
+ *                    |packet  |  |packet  +    |packet  | |packet  +
+ *                    +--------+  +--------+    +--------+ +--------+
+ *                        |           |             |          |
+ *                    +---v----+  +---v----+    +---v----+ +---v----+
+ *                    |primary |  |secondary    |primary | |secondary
+ *                    |packet  |  |packet  +    |packet  | |packet  +
+ *                    +--------+  +--------+    +--------+ +--------+
+ */
+typedef struct CompareState {
+    Object parent;
+
+    char *pri_indev;
+    char *sec_indev;
+    char *outdev;
+    char *notify_dev;
+    CharBackend chr_pri_in;
+    CharBackend chr_sec_in;
+    CharBackend chr_out;
+    CharBackend chr_notify_dev;
+    SocketReadState pri_rs;
+    SocketReadState sec_rs;
+    SocketReadState notify_rs;
+    bool vnet_hdr;
+
+    /*
+     * Record the connection that through the NIC
+     * Element type: Connection
+     */
+    GQueue conn_list;
+    /* Record the connection without repetition */
+    GHashTable *connection_track_table;
+
+    IOThread *iothread;
+    GMainContext *worker_context;
+    QEMUTimer *packet_check_timer;
+
+    QEMUBH *event_bh;
+    enum colo_event event;
+
+    QTAILQ_ENTRY(CompareState) next;
+} CompareState;
+
+typedef struct CompareClass {
+    ObjectClass parent_class;
+} CompareClass;
+
+enum {
+    PRIMARY_IN = 0,
+    SECONDARY_IN,
+};
+
+
+static int compare_chr_send(CompareState *s,
+                            const uint8_t *buf,
+                            uint32_t size,
+                            uint32_t vnet_hdr_len,
+                            bool notify_remote_frame);
+
+static bool packet_matches_str(const char *str,
+                               const uint8_t *buf,
+                               uint32_t packet_len)
+{
+    if (packet_len != strlen(str)) {
+        return false;
+    }
+
+    return !memcmp(str, buf, strlen(str));
+}
+
+static void notify_remote_frame(CompareState *s)
+{
+    char msg[] = "DO_CHECKPOINT";
+    int ret = 0;
+
+    ret = compare_chr_send(s, (uint8_t *)msg, strlen(msg), 0, true);
+    if (ret < 0) {
+        error_report("Notify Xen COLO-frame failed");
+    }
+}
+
+static void colo_compare_inconsistency_notify(CompareState *s)
+{
+    if (s->notify_dev) {
+        notify_remote_frame(s);
+    } else {
+        notifier_list_notify(&colo_compare_notifiers,
+                             migrate_get_current());
+    }
+}
+
+static gint seq_sorter(Packet *a, Packet *b, gpointer data)
+{
+    struct tcp_hdr *atcp, *btcp;
+
+    atcp = (struct tcp_hdr *)(a->transport_header);
+    btcp = (struct tcp_hdr *)(b->transport_header);
+    return ntohl(atcp->th_seq) - ntohl(btcp->th_seq);
+}
+
+static void fill_pkt_tcp_info(void *data, uint32_t *max_ack)
+{
+    Packet *pkt = data;
+    struct tcp_hdr *tcphd;
+
+    tcphd = (struct tcp_hdr *)pkt->transport_header;
+
+    pkt->tcp_seq = ntohl(tcphd->th_seq);
+    pkt->tcp_ack = ntohl(tcphd->th_ack);
+    *max_ack = *max_ack > pkt->tcp_ack ? *max_ack : pkt->tcp_ack;
+    pkt->header_size = pkt->transport_header - (uint8_t *)pkt->data
+                       + (tcphd->th_off << 2) - pkt->vnet_hdr_len;
+    pkt->payload_size = pkt->size - pkt->header_size;
+    pkt->seq_end = pkt->tcp_seq + pkt->payload_size;
+    pkt->flags = tcphd->th_flags;
+}
+
+/*
+ * Return 1 on success, if return 0 means the
+ * packet will be dropped
+ */
+static int colo_insert_packet(GQueue *queue, Packet *pkt, uint32_t *max_ack)
+{
+    if (g_queue_get_length(queue) <= MAX_QUEUE_SIZE) {
+        if (pkt->ip->ip_p == IPPROTO_TCP) {
+            fill_pkt_tcp_info(pkt, max_ack);
+            g_queue_insert_sorted(queue,
+                                  pkt,
+                                  (GCompareDataFunc)seq_sorter,
+                                  NULL);
+        } else {
+            g_queue_push_tail(queue, pkt);
+        }
+        return 1;
+    }
+    return 0;
+}
+
+/*
+ * Return 0 on success, if return -1 means the pkt
+ * is unsupported(arp and ipv6) and will be sent later
+ */
+static int packet_enqueue(CompareState *s, int mode, Connection **con)
+{
+    ConnectionKey key;
+    Packet *pkt = NULL;
+    Connection *conn;
+
+    if (mode == PRIMARY_IN) {
+        pkt = packet_new(s->pri_rs.buf,
+                         s->pri_rs.packet_len,
+                         s->pri_rs.vnet_hdr_len);
+    } else {
+        pkt = packet_new(s->sec_rs.buf,
+                         s->sec_rs.packet_len,
+                         s->sec_rs.vnet_hdr_len);
+    }
+
+    if (parse_packet_early(pkt)) {
+        packet_destroy(pkt, NULL);
+        pkt = NULL;
+        return -1;
+    }
+    fill_connection_key(pkt, &key);
+
+    conn = connection_get(s->connection_track_table,
+                          &key,
+                          &s->conn_list);
+
+    if (!conn->processing) {
+        g_queue_push_tail(&s->conn_list, conn);
+        conn->processing = true;
+    }
+
+    if (mode == PRIMARY_IN) {
+        if (!colo_insert_packet(&conn->primary_list, pkt, &conn->pack)) {
+            error_report("colo compare primary queue size too big,"
+                         "drop packet");
+        }
+    } else {
+        if (!colo_insert_packet(&conn->secondary_list, pkt, &conn->sack)) {
+            error_report("colo compare secondary queue size too big,"
+                         "drop packet");
+        }
+    }
+    *con = conn;
+
+    return 0;
+}
+
+static inline bool after(uint32_t seq1, uint32_t seq2)
+{
+        return (int32_t)(seq1 - seq2) > 0;
+}
+
+static void colo_release_primary_pkt(CompareState *s, Packet *pkt)
+{
+    int ret;
+    ret = compare_chr_send(s,
+                           pkt->data,
+                           pkt->size,
+                           pkt->vnet_hdr_len,
+                           false);
+    if (ret < 0) {
+        error_report("colo send primary packet failed");
+    }
+    trace_colo_compare_main("packet same and release packet");
+    packet_destroy(pkt, NULL);
+}
+
+/*
+ * The IP packets sent by primary and secondary
+ * will be compared in here
+ * TODO support ip fragment, Out-Of-Order
+ * return:    0  means packet same
+ *            > 0 || < 0 means packet different
+ */
+static int colo_compare_packet_payload(Packet *ppkt,
+                                       Packet *spkt,
+                                       uint16_t poffset,
+                                       uint16_t soffset,
+                                       uint16_t len)
+
+{
+    if (trace_event_get_state_backends(TRACE_COLO_COMPARE_MISCOMPARE)) {
+        char pri_ip_src[20], pri_ip_dst[20], sec_ip_src[20], sec_ip_dst[20];
+
+        strcpy(pri_ip_src, inet_ntoa(ppkt->ip->ip_src));
+        strcpy(pri_ip_dst, inet_ntoa(ppkt->ip->ip_dst));
+        strcpy(sec_ip_src, inet_ntoa(spkt->ip->ip_src));
+        strcpy(sec_ip_dst, inet_ntoa(spkt->ip->ip_dst));
+
+        trace_colo_compare_ip_info(ppkt->size, pri_ip_src,
+                                   pri_ip_dst, spkt->size,
+                                   sec_ip_src, sec_ip_dst);
+    }
+
+    return memcmp(ppkt->data + poffset, spkt->data + soffset, len);
+}
+
+/*
+ * return true means that the payload is consist and
+ * need to make the next comparison, false means do
+ * the checkpoint
+*/
+static bool colo_mark_tcp_pkt(Packet *ppkt, Packet *spkt,
+                              int8_t *mark, uint32_t max_ack)
+{
+    *mark = 0;
+
+    if (ppkt->tcp_seq == spkt->tcp_seq && ppkt->seq_end == spkt->seq_end) {
+        if (!colo_compare_packet_payload(ppkt, spkt,
+                                        ppkt->header_size, spkt->header_size,
+                                        ppkt->payload_size)) {
+            *mark = COLO_COMPARE_FREE_SECONDARY | COLO_COMPARE_FREE_PRIMARY;
+            return true;
+        }
+    }
+
+    /* one part of secondary packet payload still need to be compared */
+    if (!after(ppkt->seq_end, spkt->seq_end)) {
+        if (!colo_compare_packet_payload(ppkt, spkt,
+                                        ppkt->header_size + ppkt->offset,
+                                        spkt->header_size + spkt->offset,
+                                        ppkt->payload_size - ppkt->offset)) {
+            if (!after(ppkt->tcp_ack, max_ack)) {
+                *mark = COLO_COMPARE_FREE_PRIMARY;
+                spkt->offset += ppkt->payload_size - ppkt->offset;
+                return true;
+            } else {
+                /* secondary guest hasn't ack the data, don't send
+                 * out this packet
+                 */
+                return false;
+            }
+        }
+    } else {
+        /* primary packet is longer than secondary packet, compare
+         * the same part and mark the primary packet offset
+         */
+        if (!colo_compare_packet_payload(ppkt, spkt,
+                                        ppkt->header_size + ppkt->offset,
+                                        spkt->header_size + spkt->offset,
+                                        spkt->payload_size - spkt->offset)) {
+            *mark = COLO_COMPARE_FREE_SECONDARY;
+            ppkt->offset += spkt->payload_size - spkt->offset;
+            return true;
+        }
+    }
+
+    return false;
+}
+
+static void colo_compare_tcp(CompareState *s, Connection *conn)
+{
+    Packet *ppkt = NULL, *spkt = NULL;
+    int8_t mark;
+
+    /*
+     * If ppkt and spkt have the same payload, but ppkt's ACK
+     * is greater than spkt's ACK, in this case we can not
+     * send the ppkt because it will cause the secondary guest
+     * to miss sending some data in the next. Therefore, we
+     * record the maximum ACK in the current queue at both
+     * primary side and secondary side. Only when the ack is
+     * less than the smaller of the two maximum ack, then we
+     * can ensure that the packet's payload is acknowledged by
+     * primary and secondary.
+    */
+    uint32_t min_ack = conn->pack > conn->sack ? conn->sack : conn->pack;
+
+pri:
+    if (g_queue_is_empty(&conn->primary_list)) {
+        return;
+    }
+    ppkt = g_queue_pop_head(&conn->primary_list);
+sec:
+    if (g_queue_is_empty(&conn->secondary_list)) {
+        g_queue_push_head(&conn->primary_list, ppkt);
+        return;
+    }
+    spkt = g_queue_pop_head(&conn->secondary_list);
+
+    if (ppkt->tcp_seq == ppkt->seq_end) {
+        colo_release_primary_pkt(s, ppkt);
+        ppkt = NULL;
+    }
+
+    if (ppkt && conn->compare_seq && !after(ppkt->seq_end, conn->compare_seq)) {
+        trace_colo_compare_main("pri: this packet has compared");
+        colo_release_primary_pkt(s, ppkt);
+        ppkt = NULL;
+    }
+
+    if (spkt->tcp_seq == spkt->seq_end) {
+        packet_destroy(spkt, NULL);
+        if (!ppkt) {
+            goto pri;
+        } else {
+            goto sec;
+        }
+    } else {
+        if (conn->compare_seq && !after(spkt->seq_end, conn->compare_seq)) {
+            trace_colo_compare_main("sec: this packet has compared");
+            packet_destroy(spkt, NULL);
+            if (!ppkt) {
+                goto pri;
+            } else {
+                goto sec;
+            }
+        }
+        if (!ppkt) {
+            g_queue_push_head(&conn->secondary_list, spkt);
+            goto pri;
+        }
+    }
+
+    if (colo_mark_tcp_pkt(ppkt, spkt, &mark, min_ack)) {
+        trace_colo_compare_tcp_info("pri",
+                                    ppkt->tcp_seq, ppkt->tcp_ack,
+                                    ppkt->header_size, ppkt->payload_size,
+                                    ppkt->offset, ppkt->flags);
+
+        trace_colo_compare_tcp_info("sec",
+                                    spkt->tcp_seq, spkt->tcp_ack,
+                                    spkt->header_size, spkt->payload_size,
+                                    spkt->offset, spkt->flags);
+
+        if (mark == COLO_COMPARE_FREE_PRIMARY) {
+            conn->compare_seq = ppkt->seq_end;
+            colo_release_primary_pkt(s, ppkt);
+            g_queue_push_head(&conn->secondary_list, spkt);
+            goto pri;
+        }
+        if (mark == COLO_COMPARE_FREE_SECONDARY) {
+            conn->compare_seq = spkt->seq_end;
+            packet_destroy(spkt, NULL);
+            goto sec;
+        }
+        if (mark == (COLO_COMPARE_FREE_PRIMARY | COLO_COMPARE_FREE_SECONDARY)) {
+            conn->compare_seq = ppkt->seq_end;
+            colo_release_primary_pkt(s, ppkt);
+            packet_destroy(spkt, NULL);
+            goto pri;
+        }
+    } else {
+        g_queue_push_head(&conn->primary_list, ppkt);
+        g_queue_push_head(&conn->secondary_list, spkt);
+
+        qemu_hexdump((char *)ppkt->data, stderr,
+                     "colo-compare ppkt", ppkt->size);
+        qemu_hexdump((char *)spkt->data, stderr,
+                     "colo-compare spkt", spkt->size);
+
+        colo_compare_inconsistency_notify(s);
+    }
+}
+
+
+/*
+ * Called from the compare thread on the primary
+ * for compare udp packet
+ */
+static int colo_packet_compare_udp(Packet *spkt, Packet *ppkt)
+{
+    uint16_t network_header_length = ppkt->ip->ip_hl << 2;
+    uint16_t offset = network_header_length + ETH_HLEN + ppkt->vnet_hdr_len;
+
+    trace_colo_compare_main("compare udp");
+
+    /*
+     * Because of ppkt and spkt are both in the same connection,
+     * The ppkt's src ip, dst ip, src port, dst port, ip_proto all are
+     * same with spkt. In addition, IP header's Identification is a random
+     * field, we can handle it in IP fragmentation function later.
+     * COLO just concern the response net packet payload from primary guest
+     * and secondary guest are same or not, So we ignored all IP header include
+     * other field like TOS,TTL,IP Checksum. we only need to compare
+     * the ip payload here.
+     */
+    if (ppkt->size != spkt->size) {
+        trace_colo_compare_main("UDP: payload size of packets are different");
+        return -1;
+    }
+    if (colo_compare_packet_payload(ppkt, spkt, offset, offset,
+                                    ppkt->size - offset)) {
+        trace_colo_compare_udp_miscompare("primary pkt size", ppkt->size);
+        trace_colo_compare_udp_miscompare("Secondary pkt size", spkt->size);
+        if (trace_event_get_state_backends(TRACE_COLO_COMPARE_MISCOMPARE)) {
+            qemu_hexdump((char *)ppkt->data, stderr, "colo-compare pri pkt",
+                         ppkt->size);
+            qemu_hexdump((char *)spkt->data, stderr, "colo-compare sec pkt",
+                         spkt->size);
+        }
+        return -1;
+    } else {
+        return 0;
+    }
+}
+
+/*
+ * Called from the compare thread on the primary
+ * for compare icmp packet
+ */
+static int colo_packet_compare_icmp(Packet *spkt, Packet *ppkt)
+{
+    uint16_t network_header_length = ppkt->ip->ip_hl << 2;
+    uint16_t offset = network_header_length + ETH_HLEN + ppkt->vnet_hdr_len;
+
+    trace_colo_compare_main("compare icmp");
+
+    /*
+     * Because of ppkt and spkt are both in the same connection,
+     * The ppkt's src ip, dst ip, src port, dst port, ip_proto all are
+     * same with spkt. In addition, IP header's Identification is a random
+     * field, we can handle it in IP fragmentation function later.
+     * COLO just concern the response net packet payload from primary guest
+     * and secondary guest are same or not, So we ignored all IP header include
+     * other field like TOS,TTL,IP Checksum. we only need to compare
+     * the ip payload here.
+     */
+    if (ppkt->size != spkt->size) {
+        trace_colo_compare_main("ICMP: payload size of packets are different");
+        return -1;
+    }
+    if (colo_compare_packet_payload(ppkt, spkt, offset, offset,
+                                    ppkt->size - offset)) {
+        trace_colo_compare_icmp_miscompare("primary pkt size",
+                                           ppkt->size);
+        trace_colo_compare_icmp_miscompare("Secondary pkt size",
+                                           spkt->size);
+        if (trace_event_get_state_backends(TRACE_COLO_COMPARE_MISCOMPARE)) {
+            qemu_hexdump((char *)ppkt->data, stderr, "colo-compare pri pkt",
+                         ppkt->size);
+            qemu_hexdump((char *)spkt->data, stderr, "colo-compare sec pkt",
+                         spkt->size);
+        }
+        return -1;
+    } else {
+        return 0;
+    }
+}
+
+/*
+ * Called from the compare thread on the primary
+ * for compare other packet
+ */
+static int colo_packet_compare_other(Packet *spkt, Packet *ppkt)
+{
+    uint16_t offset = ppkt->vnet_hdr_len;
+
+    trace_colo_compare_main("compare other");
+    if (trace_event_get_state_backends(TRACE_COLO_COMPARE_MISCOMPARE)) {
+        char pri_ip_src[20], pri_ip_dst[20], sec_ip_src[20], sec_ip_dst[20];
+
+        strcpy(pri_ip_src, inet_ntoa(ppkt->ip->ip_src));
+        strcpy(pri_ip_dst, inet_ntoa(ppkt->ip->ip_dst));
+        strcpy(sec_ip_src, inet_ntoa(spkt->ip->ip_src));
+        strcpy(sec_ip_dst, inet_ntoa(spkt->ip->ip_dst));
+
+        trace_colo_compare_ip_info(ppkt->size, pri_ip_src,
+                                   pri_ip_dst, spkt->size,
+                                   sec_ip_src, sec_ip_dst);
+    }
+
+    if (ppkt->size != spkt->size) {
+        trace_colo_compare_main("Other: payload size of packets are different");
+        return -1;
+    }
+    return colo_compare_packet_payload(ppkt, spkt, offset, offset,
+                                       ppkt->size - offset);
+}
+
+static int colo_old_packet_check_one(Packet *pkt, int64_t *check_time)
+{
+    int64_t now = qemu_clock_get_ms(QEMU_CLOCK_HOST);
+
+    if ((now - pkt->creation_ms) > (*check_time)) {
+        trace_colo_old_packet_check_found(pkt->creation_ms);
+        return 0;
+    } else {
+        return 1;
+    }
+}
+
+void colo_compare_register_notifier(Notifier *notify)
+{
+    notifier_list_add(&colo_compare_notifiers, notify);
+}
+
+void colo_compare_unregister_notifier(Notifier *notify)
+{
+    notifier_remove(notify);
+}
+
+static int colo_old_packet_check_one_conn(Connection *conn,
+                                          CompareState *s)
+{
+    GList *result = NULL;
+    int64_t check_time = REGULAR_PACKET_CHECK_MS;
+
+    result = g_queue_find_custom(&conn->primary_list,
+                                 &check_time,
+                                 (GCompareFunc)colo_old_packet_check_one);
+
+    if (result) {
+        /* Do checkpoint will flush old packet */
+        colo_compare_inconsistency_notify(s);
+        return 0;
+    }
+
+    return 1;
+}
+
+/*
+ * Look for old packets that the secondary hasn't matched,
+ * if we have some then we have to checkpoint to wake
+ * the secondary up.
+ */
+static void colo_old_packet_check(void *opaque)
+{
+    CompareState *s = opaque;
+
+    /*
+     * If we find one old packet, stop finding job and notify
+     * COLO frame do checkpoint.
+     */
+    g_queue_find_custom(&s->conn_list, s,
+                        (GCompareFunc)colo_old_packet_check_one_conn);
+}
+
+static void colo_compare_packet(CompareState *s, Connection *conn,
+                                int (*HandlePacket)(Packet *spkt,
+                                Packet *ppkt))
+{
+    Packet *pkt = NULL;
+    GList *result = NULL;
+
+    while (!g_queue_is_empty(&conn->primary_list) &&
+           !g_queue_is_empty(&conn->secondary_list)) {
+        pkt = g_queue_pop_head(&conn->primary_list);
+        result = g_queue_find_custom(&conn->secondary_list,
+                 pkt, (GCompareFunc)HandlePacket);
+
+        if (result) {
+            colo_release_primary_pkt(s, pkt);
+            g_queue_remove(&conn->secondary_list, result->data);
+        } else {
+            /*
+             * If one packet arrive late, the secondary_list or
+             * primary_list will be empty, so we can't compare it
+             * until next comparison. If the packets in the list are
+             * timeout, it will trigger a checkpoint request.
+             */
+            trace_colo_compare_main("packet different");
+            g_queue_push_head(&conn->primary_list, pkt);
+
+            colo_compare_inconsistency_notify(s);
+            break;
+        }
+    }
+}
+
+/*
+ * Called from the compare thread on the primary
+ * for compare packet with secondary list of the
+ * specified connection when a new packet was
+ * queued to it.
+ */
+static void colo_compare_connection(void *opaque, void *user_data)
+{
+    CompareState *s = user_data;
+    Connection *conn = opaque;
+
+    switch (conn->ip_proto) {
+    case IPPROTO_TCP:
+        colo_compare_tcp(s, conn);
+        break;
+    case IPPROTO_UDP:
+        colo_compare_packet(s, conn, colo_packet_compare_udp);
+        break;
+    case IPPROTO_ICMP:
+        colo_compare_packet(s, conn, colo_packet_compare_icmp);
+        break;
+    default:
+        colo_compare_packet(s, conn, colo_packet_compare_other);
+        break;
+    }
+}
+
+static int compare_chr_send(CompareState *s,
+                            const uint8_t *buf,
+                            uint32_t size,
+                            uint32_t vnet_hdr_len,
+                            bool notify_remote_frame)
+{
+    int ret = 0;
+    uint32_t len = htonl(size);
+
+    if (!size) {
+        return 0;
+    }
+
+    if (notify_remote_frame) {
+        ret = qemu_chr_fe_write_all(&s->chr_notify_dev,
+                                    (uint8_t *)&len,
+                                    sizeof(len));
+    } else {
+        ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)&len, sizeof(len));
+    }
+
+    if (ret != sizeof(len)) {
+        goto err;
+    }
+
+    if (s->vnet_hdr) {
+        /*
+         * We send vnet header len make other module(like filter-redirector)
+         * know how to parse net packet correctly.
+         */
+        len = htonl(vnet_hdr_len);
+
+        if (!notify_remote_frame) {
+            ret = qemu_chr_fe_write_all(&s->chr_out,
+                                        (uint8_t *)&len,
+                                        sizeof(len));
+        }
+
+        if (ret != sizeof(len)) {
+            goto err;
+        }
+    }
+
+    if (notify_remote_frame) {
+        ret = qemu_chr_fe_write_all(&s->chr_notify_dev,
+                                    (uint8_t *)buf,
+                                    size);
+    } else {
+        ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)buf, size);
+    }
+
+    if (ret != size) {
+        goto err;
+    }
+
+    return 0;
+
+err:
+    return ret < 0 ? ret : -EIO;
+}
+
+static int compare_chr_can_read(void *opaque)
+{
+    return COMPARE_READ_LEN_MAX;
+}
+
+/*
+ * Called from the main thread on the primary for packets
+ * arriving over the socket from the primary.
+ */
+static void compare_pri_chr_in(void *opaque, const uint8_t *buf, int size)
+{
+    CompareState *s = COLO_COMPARE(opaque);
+    int ret;
+
+    ret = net_fill_rstate(&s->pri_rs, buf, size);
+    if (ret == -1) {
+        qemu_chr_fe_set_handlers(&s->chr_pri_in, NULL, NULL, NULL, NULL,
+                                 NULL, NULL, true);
+        error_report("colo-compare primary_in error");
+    }
+}
+
+/*
+ * Called from the main thread on the primary for packets
+ * arriving over the socket from the secondary.
+ */
+static void compare_sec_chr_in(void *opaque, const uint8_t *buf, int size)
+{
+    CompareState *s = COLO_COMPARE(opaque);
+    int ret;
+
+    ret = net_fill_rstate(&s->sec_rs, buf, size);
+    if (ret == -1) {
+        qemu_chr_fe_set_handlers(&s->chr_sec_in, NULL, NULL, NULL, NULL,
+                                 NULL, NULL, true);
+        error_report("colo-compare secondary_in error");
+    }
+}
+
+static void compare_notify_chr(void *opaque, const uint8_t *buf, int size)
+{
+    CompareState *s = COLO_COMPARE(opaque);
+    int ret;
+
+    ret = net_fill_rstate(&s->notify_rs, buf, size);
+    if (ret == -1) {
+        qemu_chr_fe_set_handlers(&s->chr_notify_dev, NULL, NULL, NULL, NULL,
+                                 NULL, NULL, true);
+        error_report("colo-compare notify_dev error");
+    }
+}
+
+/*
+ * Check old packet regularly so it can watch for any packets
+ * that the secondary hasn't produced equivalents of.
+ */
+static void check_old_packet_regular(void *opaque)
+{
+    CompareState *s = opaque;
+
+    /* if have old packet we will notify checkpoint */
+    colo_old_packet_check(s);
+    timer_mod(s->packet_check_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
+                REGULAR_PACKET_CHECK_MS);
+}
+
+/* Public API, Used for COLO frame to notify compare event */
+void colo_notify_compares_event(void *opaque, int event, Error **errp)
+{
+    CompareState *s;
+
+    qemu_mutex_lock(&event_mtx);
+    QTAILQ_FOREACH(s, &net_compares, next) {
+        s->event = event;
+        qemu_bh_schedule(s->event_bh);
+        event_unhandled_count++;
+    }
+    /* Wait all compare threads to finish handling this event */
+    while (event_unhandled_count > 0) {
+        qemu_cond_wait(&event_complete_cond, &event_mtx);
+    }
+
+    qemu_mutex_unlock(&event_mtx);
+}
+
+static void colo_compare_timer_init(CompareState *s)
+{
+    AioContext *ctx = iothread_get_aio_context(s->iothread);
+
+    s->packet_check_timer = aio_timer_new(ctx, QEMU_CLOCK_VIRTUAL,
+                                SCALE_MS, check_old_packet_regular,
+                                s);
+    timer_mod(s->packet_check_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
+                    REGULAR_PACKET_CHECK_MS);
+}
+
+static void colo_compare_timer_del(CompareState *s)
+{
+    if (s->packet_check_timer) {
+        timer_del(s->packet_check_timer);
+        timer_free(s->packet_check_timer);
+        s->packet_check_timer = NULL;
+    }
+ }
+
+static void colo_flush_packets(void *opaque, void *user_data);
+
+static void colo_compare_handle_event(void *opaque)
+{
+    CompareState *s = opaque;
+
+    switch (s->event) {
+    case COLO_EVENT_CHECKPOINT:
+        g_queue_foreach(&s->conn_list, colo_flush_packets, s);
+        break;
+    case COLO_EVENT_FAILOVER:
+        break;
+    default:
+        break;
+    }
+
+    qemu_mutex_lock(&event_mtx);
+    assert(event_unhandled_count > 0);
+    event_unhandled_count--;
+    qemu_cond_broadcast(&event_complete_cond);
+    qemu_mutex_unlock(&event_mtx);
+}
+
+static void colo_compare_iothread(CompareState *s)
+{
+    object_ref(OBJECT(s->iothread));
+    s->worker_context = iothread_get_g_main_context(s->iothread);
+
+    qemu_chr_fe_set_handlers(&s->chr_pri_in, compare_chr_can_read,
+                             compare_pri_chr_in, NULL, NULL,
+                             s, s->worker_context, true);
+    qemu_chr_fe_set_handlers(&s->chr_sec_in, compare_chr_can_read,
+                             compare_sec_chr_in, NULL, NULL,
+                             s, s->worker_context, true);
+    if (s->notify_dev) {
+        qemu_chr_fe_set_handlers(&s->chr_notify_dev, compare_chr_can_read,
+                                 compare_notify_chr, NULL, NULL,
+                                 s, s->worker_context, true);
+    }
+
+    colo_compare_timer_init(s);
+    s->event_bh = qemu_bh_new(colo_compare_handle_event, s);
+}
+
+static char *compare_get_pri_indev(Object *obj, Error **errp)
+{
+    CompareState *s = COLO_COMPARE(obj);
+
+    return g_strdup(s->pri_indev);
+}
+
+static void compare_set_pri_indev(Object *obj, const char *value, Error **errp)
+{
+    CompareState *s = COLO_COMPARE(obj);
+
+    g_free(s->pri_indev);
+    s->pri_indev = g_strdup(value);
+}
+
+static char *compare_get_sec_indev(Object *obj, Error **errp)
+{
+    CompareState *s = COLO_COMPARE(obj);
+
+    return g_strdup(s->sec_indev);
+}
+
+static void compare_set_sec_indev(Object *obj, const char *value, Error **errp)
+{
+    CompareState *s = COLO_COMPARE(obj);
+
+    g_free(s->sec_indev);
+    s->sec_indev = g_strdup(value);
+}
+
+static char *compare_get_outdev(Object *obj, Error **errp)
+{
+    CompareState *s = COLO_COMPARE(obj);
+
+    return g_strdup(s->outdev);
+}
+
+static void compare_set_outdev(Object *obj, const char *value, Error **errp)
+{
+    CompareState *s = COLO_COMPARE(obj);
+
+    g_free(s->outdev);
+    s->outdev = g_strdup(value);
+}
+
+static bool compare_get_vnet_hdr(Object *obj, Error **errp)
+{
+    CompareState *s = COLO_COMPARE(obj);
+
+    return s->vnet_hdr;
+}
+
+static void compare_set_vnet_hdr(Object *obj,
+                                 bool value,
+                                 Error **errp)
+{
+    CompareState *s = COLO_COMPARE(obj);
+
+    s->vnet_hdr = value;
+}
+
+static char *compare_get_notify_dev(Object *obj, Error **errp)
+{
+    CompareState *s = COLO_COMPARE(obj);
+
+    return g_strdup(s->notify_dev);
+}
+
+static void compare_set_notify_dev(Object *obj, const char *value, Error **errp)
+{
+    CompareState *s = COLO_COMPARE(obj);
+
+    g_free(s->notify_dev);
+    s->notify_dev = g_strdup(value);
+}
+
+static void compare_pri_rs_finalize(SocketReadState *pri_rs)
+{
+    CompareState *s = container_of(pri_rs, CompareState, pri_rs);
+    Connection *conn = NULL;
+
+    if (packet_enqueue(s, PRIMARY_IN, &conn)) {
+        trace_colo_compare_main("primary: unsupported packet in");
+        compare_chr_send(s,
+                         pri_rs->buf,
+                         pri_rs->packet_len,
+                         pri_rs->vnet_hdr_len,
+                         false);
+    } else {
+        /* compare packet in the specified connection */
+        colo_compare_connection(conn, s);
+    }
+}
+
+static void compare_sec_rs_finalize(SocketReadState *sec_rs)
+{
+    CompareState *s = container_of(sec_rs, CompareState, sec_rs);
+    Connection *conn = NULL;
+
+    if (packet_enqueue(s, SECONDARY_IN, &conn)) {
+        trace_colo_compare_main("secondary: unsupported packet in");
+    } else {
+        /* compare packet in the specified connection */
+        colo_compare_connection(conn, s);
+    }
+}
+
+static void compare_notify_rs_finalize(SocketReadState *notify_rs)
+{
+    CompareState *s = container_of(notify_rs, CompareState, notify_rs);
+
+    const char msg[] = "COLO_COMPARE_GET_XEN_INIT";
+    int ret;
+
+    if (packet_matches_str("COLO_USERSPACE_PROXY_INIT",
+                           notify_rs->buf,
+                           notify_rs->packet_len)) {
+        ret = compare_chr_send(s, (uint8_t *)msg, strlen(msg), 0, true);
+        if (ret < 0) {
+            error_report("Notify Xen COLO-frame INIT failed");
+        }
+    } else if (packet_matches_str("COLO_CHECKPOINT",
+                                  notify_rs->buf,
+                                  notify_rs->packet_len)) {
+        /* colo-compare do checkpoint, flush pri packet and remove sec packet */
+        g_queue_foreach(&s->conn_list, colo_flush_packets, s);
+    } else {
+        error_report("COLO compare got unsupported instruction");
+    }
+}
+
+/*
+ * Return 0 is success.
+ * Return 1 is failed.
+ */
+static int find_and_check_chardev(Chardev **chr,
+                                  char *chr_name,
+                                  Error **errp)
+{
+    *chr = qemu_chr_find(chr_name);
+    if (*chr == NULL) {
+        error_setg(errp, "Device '%s' not found",
+                   chr_name);
+        return 1;
+    }
+
+    if (!qemu_chr_has_feature(*chr, QEMU_CHAR_FEATURE_RECONNECTABLE)) {
+        error_setg(errp, "chardev \"%s\" is not reconnectable",
+                   chr_name);
+        return 1;
+    }
+
+    if (!qemu_chr_has_feature(*chr, QEMU_CHAR_FEATURE_GCONTEXT)) {
+        error_setg(errp, "chardev \"%s\" cannot switch context",
+                   chr_name);
+        return 1;
+    }
+
+    return 0;
+}
+
+/*
+ * Called from the main thread on the primary
+ * to setup colo-compare.
+ */
+static void colo_compare_complete(UserCreatable *uc, Error **errp)
+{
+    CompareState *s = COLO_COMPARE(uc);
+    Chardev *chr;
+
+    if (!s->pri_indev || !s->sec_indev || !s->outdev || !s->iothread) {
+        error_setg(errp, "colo compare needs 'primary_in' ,"
+                   "'secondary_in','outdev','iothread' property set");
+        return;
+    } else if (!strcmp(s->pri_indev, s->outdev) ||
+               !strcmp(s->sec_indev, s->outdev) ||
+               !strcmp(s->pri_indev, s->sec_indev)) {
+        error_setg(errp, "'indev' and 'outdev' could not be same "
+                   "for compare module");
+        return;
+    }
+
+    if (find_and_check_chardev(&chr, s->pri_indev, errp) ||
+        !qemu_chr_fe_init(&s->chr_pri_in, chr, errp)) {
+        return;
+    }
+
+    if (find_and_check_chardev(&chr, s->sec_indev, errp) ||
+        !qemu_chr_fe_init(&s->chr_sec_in, chr, errp)) {
+        return;
+    }
+
+    if (find_and_check_chardev(&chr, s->outdev, errp) ||
+        !qemu_chr_fe_init(&s->chr_out, chr, errp)) {
+        return;
+    }
+
+    net_socket_rs_init(&s->pri_rs, compare_pri_rs_finalize, s->vnet_hdr);
+    net_socket_rs_init(&s->sec_rs, compare_sec_rs_finalize, s->vnet_hdr);
+
+    /* Try to enable remote notify chardev, currently just for Xen COLO */
+    if (s->notify_dev) {
+        if (find_and_check_chardev(&chr, s->notify_dev, errp) ||
+            !qemu_chr_fe_init(&s->chr_notify_dev, chr, errp)) {
+            return;
+        }
+
+        net_socket_rs_init(&s->notify_rs, compare_notify_rs_finalize,
+                           s->vnet_hdr);
+    }
+
+    QTAILQ_INSERT_TAIL(&net_compares, s, next);
+
+    g_queue_init(&s->conn_list);
+
+    qemu_mutex_init(&event_mtx);
+    qemu_cond_init(&event_complete_cond);
+
+    s->connection_track_table = g_hash_table_new_full(connection_key_hash,
+                                                      connection_key_equal,
+                                                      g_free,
+                                                      connection_destroy);
+
+    colo_compare_iothread(s);
+    return;
+}
+
+static void colo_flush_packets(void *opaque, void *user_data)
+{
+    CompareState *s = user_data;
+    Connection *conn = opaque;
+    Packet *pkt = NULL;
+
+    while (!g_queue_is_empty(&conn->primary_list)) {
+        pkt = g_queue_pop_head(&conn->primary_list);
+        compare_chr_send(s,
+                         pkt->data,
+                         pkt->size,
+                         pkt->vnet_hdr_len,
+                         false);
+        packet_destroy(pkt, NULL);
+    }
+    while (!g_queue_is_empty(&conn->secondary_list)) {
+        pkt = g_queue_pop_head(&conn->secondary_list);
+        packet_destroy(pkt, NULL);
+    }
+}
+
+static void colo_compare_class_init(ObjectClass *oc, void *data)
+{
+    UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
+
+    ucc->complete = colo_compare_complete;
+}
+
+static void colo_compare_init(Object *obj)
+{
+    CompareState *s = COLO_COMPARE(obj);
+
+    object_property_add_str(obj, "primary_in",
+                            compare_get_pri_indev, compare_set_pri_indev,
+                            NULL);
+    object_property_add_str(obj, "secondary_in",
+                            compare_get_sec_indev, compare_set_sec_indev,
+                            NULL);
+    object_property_add_str(obj, "outdev",
+                            compare_get_outdev, compare_set_outdev,
+                            NULL);
+    object_property_add_link(obj, "iothread", TYPE_IOTHREAD,
+                            (Object **)&s->iothread,
+                            object_property_allow_set_link,
+                            OBJ_PROP_LINK_STRONG, NULL);
+    /* This parameter just for Xen COLO */
+    object_property_add_str(obj, "notify_dev",
+                            compare_get_notify_dev, compare_set_notify_dev,
+                            NULL);
+
+    s->vnet_hdr = false;
+    object_property_add_bool(obj, "vnet_hdr_support", compare_get_vnet_hdr,
+                             compare_set_vnet_hdr, NULL);
+}
+
+static void colo_compare_finalize(Object *obj)
+{
+    CompareState *s = COLO_COMPARE(obj);
+    CompareState *tmp = NULL;
+
+    qemu_chr_fe_deinit(&s->chr_pri_in, false);
+    qemu_chr_fe_deinit(&s->chr_sec_in, false);
+    qemu_chr_fe_deinit(&s->chr_out, false);
+    if (s->notify_dev) {
+        qemu_chr_fe_deinit(&s->chr_notify_dev, false);
+    }
+
+    if (s->iothread) {
+        colo_compare_timer_del(s);
+    }
+
+    qemu_bh_delete(s->event_bh);
+
+    QTAILQ_FOREACH(tmp, &net_compares, next) {
+        if (tmp == s) {
+            QTAILQ_REMOVE(&net_compares, s, next);
+            break;
+        }
+    }
+
+    /* Release all unhandled packets after compare thead exited */
+    g_queue_foreach(&s->conn_list, colo_flush_packets, s);
+
+    g_queue_clear(&s->conn_list);
+
+    if (s->connection_track_table) {
+        g_hash_table_destroy(s->connection_track_table);
+    }
+
+    if (s->iothread) {
+        object_unref(OBJECT(s->iothread));
+    }
+
+    qemu_mutex_destroy(&event_mtx);
+    qemu_cond_destroy(&event_complete_cond);
+
+    g_free(s->pri_indev);
+    g_free(s->sec_indev);
+    g_free(s->outdev);
+    g_free(s->notify_dev);
+}
+
+static const TypeInfo colo_compare_info = {
+    .name = TYPE_COLO_COMPARE,
+    .parent = TYPE_OBJECT,
+    .instance_size = sizeof(CompareState),
+    .instance_init = colo_compare_init,
+    .instance_finalize = colo_compare_finalize,
+    .class_size = sizeof(CompareClass),
+    .class_init = colo_compare_class_init,
+    .interfaces = (InterfaceInfo[]) {
+        { TYPE_USER_CREATABLE },
+        { }
+    }
+};
+
+static void register_types(void)
+{
+    type_register_static(&colo_compare_info);
+}
+
+type_init(register_types);
diff --git a/net/colo-compare.h b/net/colo-compare.h
new file mode 100644
index 000000000..22ddd512e
--- /dev/null
+++ b/net/colo-compare.h
@@ -0,0 +1,24 @@
+/*
+ * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO)
+ * (a.k.a. Fault Tolerance or Continuous Replication)
+ *
+ * Copyright (c) 2017 HUAWEI TECHNOLOGIES CO., LTD.
+ * Copyright (c) 2017 FUJITSU LIMITED
+ * Copyright (c) 2017 Intel Corporation
+ *
+ * Authors:
+ *    zhanghailiang <zhang.zhanghailiang@huawei.com>
+ *    Zhang Chen <zhangckid@gmail.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+#ifndef QEMU_COLO_COMPARE_H
+#define QEMU_COLO_COMPARE_H
+
+void colo_notify_compares_event(void *opaque, int event, Error **errp);
+void colo_compare_register_notifier(Notifier *notify);
+void colo_compare_unregister_notifier(Notifier *notify);
+
+#endif /* QEMU_COLO_COMPARE_H */
diff --git a/net/colo.c b/net/colo.c
new file mode 100644
index 000000000..8196b3583
--- /dev/null
+++ b/net/colo.c
@@ -0,0 +1,232 @@
+/*
+ * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO)
+ * (a.k.a. Fault Tolerance or Continuous Replication)
+ *
+ * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
+ * Copyright (c) 2016 FUJITSU LIMITED
+ * Copyright (c) 2016 Intel Corporation
+ *
+ * Author: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "trace.h"
+#include "colo.h"
+#include "util.h"
+
+uint32_t connection_key_hash(const void *opaque)
+{
+    const ConnectionKey *key = opaque;
+    uint32_t a, b, c;
+
+    /* Jenkins hash */
+    a = b = c = JHASH_INITVAL + sizeof(*key);
+    a += key->src.s_addr;
+    b += key->dst.s_addr;
+    c += (key->src_port | key->dst_port << 16);
+    __jhash_mix(a, b, c);
+
+    a += key->ip_proto;
+    __jhash_final(a, b, c);
+
+    return c;
+}
+
+int connection_key_equal(const void *key1, const void *key2)
+{
+    return memcmp(key1, key2, sizeof(ConnectionKey)) == 0;
+}
+
+int parse_packet_early(Packet *pkt)
+{
+    int network_length;
+    static const uint8_t vlan[] = {0x81, 0x00};
+    uint8_t *data = pkt->data + pkt->vnet_hdr_len;
+    uint16_t l3_proto;
+    ssize_t l2hdr_len = eth_get_l2_hdr_length(data);
+
+    if (pkt->size < ETH_HLEN + pkt->vnet_hdr_len) {
+        trace_colo_proxy_main("pkt->size < ETH_HLEN");
+        return 1;
+    }
+
+    /*
+     * TODO: support vlan.
+     */
+    if (!memcmp(&data[12], vlan, sizeof(vlan))) {
+        trace_colo_proxy_main("COLO-proxy don't support vlan");
+        return 1;
+    }
+
+    pkt->network_header = data + l2hdr_len;
+
+    const struct iovec l2vec = {
+        .iov_base = (void *) data,
+        .iov_len = l2hdr_len
+    };
+    l3_proto = eth_get_l3_proto(&l2vec, 1, l2hdr_len);
+
+    if (l3_proto != ETH_P_IP) {
+        return 1;
+    }
+
+    network_length = pkt->ip->ip_hl * 4;
+    if (pkt->size < l2hdr_len + network_length + pkt->vnet_hdr_len) {
+        trace_colo_proxy_main("pkt->size < network_header + network_length");
+        return 1;
+    }
+    pkt->transport_header = pkt->network_header + network_length;
+
+    return 0;
+}
+
+void extract_ip_and_port(uint32_t tmp_ports, ConnectionKey *key, Packet *pkt)
+{
+        key->src = pkt->ip->ip_src;
+        key->dst = pkt->ip->ip_dst;
+        key->src_port = ntohs(tmp_ports >> 16);
+        key->dst_port = ntohs(tmp_ports & 0xffff);
+}
+
+void fill_connection_key(Packet *pkt, ConnectionKey *key)
+{
+    uint32_t tmp_ports;
+
+    memset(key, 0, sizeof(*key));
+    key->ip_proto = pkt->ip->ip_p;
+
+    switch (key->ip_proto) {
+    case IPPROTO_TCP:
+    case IPPROTO_UDP:
+    case IPPROTO_DCCP:
+    case IPPROTO_ESP:
+    case IPPROTO_SCTP:
+    case IPPROTO_UDPLITE:
+        tmp_ports = *(uint32_t *)(pkt->transport_header);
+        extract_ip_and_port(tmp_ports, key, pkt);
+        break;
+    case IPPROTO_AH:
+        tmp_ports = *(uint32_t *)(pkt->transport_header + 4);
+        extract_ip_and_port(tmp_ports, key, pkt);
+        break;
+    default:
+        break;
+    }
+}
+
+void reverse_connection_key(ConnectionKey *key)
+{
+    struct in_addr tmp_ip;
+    uint16_t tmp_port;
+
+    tmp_ip = key->src;
+    key->src = key->dst;
+    key->dst = tmp_ip;
+
+    tmp_port = key->src_port;
+    key->src_port = key->dst_port;
+    key->dst_port = tmp_port;
+}
+
+Connection *connection_new(ConnectionKey *key)
+{
+    Connection *conn = g_slice_new(Connection);
+
+    conn->ip_proto = key->ip_proto;
+    conn->processing = false;
+    conn->offset = 0;
+    conn->tcp_state = TCPS_CLOSED;
+    conn->pack = 0;
+    conn->sack = 0;
+    g_queue_init(&conn->primary_list);
+    g_queue_init(&conn->secondary_list);
+
+    return conn;
+}
+
+void connection_destroy(void *opaque)
+{
+    Connection *conn = opaque;
+
+    g_queue_foreach(&conn->primary_list, packet_destroy, NULL);
+    g_queue_clear(&conn->primary_list);
+    g_queue_foreach(&conn->secondary_list, packet_destroy, NULL);
+    g_queue_clear(&conn->secondary_list);
+    g_slice_free(Connection, conn);
+}
+
+Packet *packet_new(const void *data, int size, int vnet_hdr_len)
+{
+    Packet *pkt = g_slice_new(Packet);
+
+    pkt->data = g_memdup(data, size);
+    pkt->size = size;
+    pkt->creation_ms = qemu_clock_get_ms(QEMU_CLOCK_HOST);
+    pkt->vnet_hdr_len = vnet_hdr_len;
+    pkt->tcp_seq = 0;
+    pkt->tcp_ack = 0;
+    pkt->seq_end = 0;
+    pkt->header_size = 0;
+    pkt->payload_size = 0;
+    pkt->offset = 0;
+    pkt->flags = 0;
+
+    return pkt;
+}
+
+void packet_destroy(void *opaque, void *user_data)
+{
+    Packet *pkt = opaque;
+
+    g_free(pkt->data);
+    g_slice_free(Packet, pkt);
+}
+
+/*
+ * Clear hashtable, stop this hash growing really huge
+ */
+void connection_hashtable_reset(GHashTable *connection_track_table)
+{
+    g_hash_table_remove_all(connection_track_table);
+}
+
+/* if not found, create a new connection and add to hash table */
+Connection *connection_get(GHashTable *connection_track_table,
+                           ConnectionKey *key,
+                           GQueue *conn_list)
+{
+    Connection *conn = g_hash_table_lookup(connection_track_table, key);
+
+    if (conn == NULL) {
+        ConnectionKey *new_key = g_memdup(key, sizeof(*key));
+
+        conn = connection_new(key);
+
+        if (g_hash_table_size(connection_track_table) > HASHTABLE_MAX_SIZE) {
+            trace_colo_proxy_main("colo proxy connection hashtable full,"
+                                  " clear it");
+            connection_hashtable_reset(connection_track_table);
+            /*
+             * clear the conn_list
+             */
+            while (!g_queue_is_empty(conn_list)) {
+                connection_destroy(g_queue_pop_head(conn_list));
+            }
+        }
+
+        g_hash_table_insert(connection_track_table, new_key, conn);
+    }
+
+    return conn;
+}
+
+bool connection_has_tracked(GHashTable *connection_track_table,
+                            ConnectionKey *key)
+{
+    Connection *conn = g_hash_table_lookup(connection_track_table, key);
+
+    return conn ? true : false;
+}
diff --git a/net/colo.h b/net/colo.h
new file mode 100644
index 000000000..679314b1c
--- /dev/null
+++ b/net/colo.h
@@ -0,0 +1,106 @@
+/*
+ * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO)
+ * (a.k.a. Fault Tolerance or Continuous Replication)
+ *
+ * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
+ * Copyright (c) 2016 FUJITSU LIMITED
+ * Copyright (c) 2016 Intel Corporation
+ *
+ * Author: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+#ifndef NET_COLO_H
+#define NET_COLO_H
+
+#include "qemu/jhash.h"
+#include "qemu/timer.h"
+#include "net/eth.h"
+
+#define HASHTABLE_MAX_SIZE 16384
+
+#ifndef IPPROTO_DCCP
+#define IPPROTO_DCCP 33
+#endif
+
+#ifndef IPPROTO_SCTP
+#define IPPROTO_SCTP 132
+#endif
+
+#ifndef IPPROTO_UDPLITE
+#define IPPROTO_UDPLITE 136
+#endif
+
+typedef struct Packet {
+    void *data;
+    union {
+        uint8_t *network_header;
+        struct ip *ip;
+    };
+    uint8_t *transport_header;
+    int size;
+    /* Time of packet creation, in wall clock ms */
+    int64_t creation_ms;
+    /* Get vnet_hdr_len from filter */
+    uint32_t vnet_hdr_len;
+    uint32_t tcp_seq; /* sequence number */
+    uint32_t tcp_ack; /* acknowledgement number */
+    /* the sequence number of the last byte of the packet */
+    uint32_t seq_end;
+    uint8_t header_size;  /* the header length */
+    uint16_t payload_size; /* the payload length */
+    /* record the payload offset(the length that has been compared) */
+    uint16_t offset;
+    uint8_t flags; /* Flags(aka Control bits) */
+} Packet;
+
+typedef struct ConnectionKey {
+    /* (src, dst) must be grouped, in the same way than in IP header */
+    struct in_addr src;
+    struct in_addr dst;
+    uint16_t src_port;
+    uint16_t dst_port;
+    uint8_t ip_proto;
+} QEMU_PACKED ConnectionKey;
+
+typedef struct Connection {
+    /* connection primary send queue: element type: Packet */
+    GQueue primary_list;
+    /* connection secondary send queue: element type: Packet */
+    GQueue secondary_list;
+    /* flag to enqueue unprocessed_connections */
+    bool processing;
+    uint8_t ip_proto;
+    /* record the sequence number that has been compared */
+    uint32_t compare_seq;
+    /* the maximum of acknowledgement number in primary_list queue */
+    uint32_t pack;
+    /* the maximum of acknowledgement number in secondary_list queue */
+    uint32_t sack;
+    /* offset = secondary_seq - primary_seq */
+    uint32_t  offset;
+
+    int tcp_state; /* TCP FSM state */
+    uint32_t fin_ack_seq; /* the seq of 'fin=1,ack=1' */
+} Connection;
+
+uint32_t connection_key_hash(const void *opaque);
+int connection_key_equal(const void *opaque1, const void *opaque2);
+int parse_packet_early(Packet *pkt);
+void extract_ip_and_port(uint32_t tmp_ports, ConnectionKey *key, Packet *pkt);
+void fill_connection_key(Packet *pkt, ConnectionKey *key);
+void reverse_connection_key(ConnectionKey *key);
+Connection *connection_new(ConnectionKey *key);
+void connection_destroy(void *opaque);
+Connection *connection_get(GHashTable *connection_track_table,
+                           ConnectionKey *key,
+                           GQueue *conn_list);
+bool connection_has_tracked(GHashTable *connection_track_table,
+                            ConnectionKey *key);
+void connection_hashtable_reset(GHashTable *connection_track_table);
+Packet *packet_new(const void *data, int size, int vnet_hdr_len);
+void packet_destroy(void *opaque, void *user_data);
+
+#endif /* NET_COLO_H */
diff --git a/net/dump.c b/net/dump.c
index 411972172..23b3628dd 100644
--- a/net/dump.c
+++ b/net/dump.c
@@ -22,15 +22,19 @@
  * THE SOFTWARE.
  */
 
-#include "clients.h"
+#include "qemu/osdep.h"
 #include "qemu-common.h"
+#include "clients.h"
+#include "qapi/error.h"
 #include "qemu/error-report.h"
+#include "qemu/iov.h"
 #include "qemu/log.h"
+#include "qemu/module.h"
 #include "qemu/timer.h"
-#include "hub.h"
+#include "qapi/visitor.h"
+#include "net/filter.h"
 
 typedef struct DumpState {
-    NetClientState nc;
     int64_t start_ts;
     int fd;
     int pcap_caplen;
@@ -57,28 +61,33 @@ struct pcap_sf_pkthdr {
     uint32_t len;
 };
 
-static ssize_t dump_receive(NetClientState *nc, const uint8_t *buf, size_t size)
+static ssize_t dump_receive_iov(DumpState *s, const struct iovec *iov, int cnt)
 {
-    DumpState *s = DO_UPCAST(DumpState, nc, nc);
     struct pcap_sf_pkthdr hdr;
     int64_t ts;
     int caplen;
+    size_t size = iov_size(iov, cnt);
+    struct iovec dumpiov[cnt + 1];
 
     /* Early return in case of previous error. */
     if (s->fd < 0) {
         return size;
     }
 
-    ts = muldiv64(qemu_get_clock_ns(vm_clock), 1000000, get_ticks_per_sec());
+    ts = qemu_clock_get_us(QEMU_CLOCK_VIRTUAL);
     caplen = size > s->pcap_caplen ? s->pcap_caplen : size;
 
     hdr.ts.tv_sec = ts / 1000000 + s->start_ts;
     hdr.ts.tv_usec = ts % 1000000;
     hdr.caplen = caplen;
     hdr.len = size;
-    if (write(s->fd, &hdr, sizeof(hdr)) != sizeof(hdr) ||
-        write(s->fd, buf, caplen) != caplen) {
-        qemu_log("-net dump write error - stop dump\n");
+
+    dumpiov[0].iov_base = &hdr;
+    dumpiov[0].iov_len = sizeof(hdr);
+    cnt = iov_copy(&dumpiov[1], cnt, iov, cnt, 0, caplen);
+
+    if (writev(s->fd, dumpiov, cnt + 1) != sizeof(hdr) + caplen) {
+        error_report("network dump write error - stopping dump");
         close(s->fd);
         s->fd = -1;
     }
@@ -86,32 +95,22 @@ static ssize_t dump_receive(NetClientState *nc, const uint8_t *buf, size_t size)
     return size;
 }
 
-static void dump_cleanup(NetClientState *nc)
+static void dump_cleanup(DumpState *s)
 {
-    DumpState *s = DO_UPCAST(DumpState, nc, nc);
-
     close(s->fd);
+    s->fd = -1;
 }
 
-static NetClientInfo net_dump_info = {
-    .type = NET_CLIENT_OPTIONS_KIND_DUMP,
-    .size = sizeof(DumpState),
-    .receive = dump_receive,
-    .cleanup = dump_cleanup,
-};
-
-static int net_dump_init(NetClientState *peer, const char *device,
-                         const char *name, const char *filename, int len)
+static int net_dump_state_init(DumpState *s, const char *filename,
+                               int len, Error **errp)
 {
     struct pcap_file_hdr hdr;
-    NetClientState *nc;
-    DumpState *s;
     struct tm tm;
     int fd;
 
     fd = open(filename, O_CREAT | O_TRUNC | O_WRONLY | O_BINARY, 0644);
     if (fd < 0) {
-        error_report("-net dump: can't open %s", filename);
+        error_setg_errno(errp, errno, "net dump: can't open %s", filename);
         return -1;
     }
 
@@ -124,18 +123,11 @@ static int net_dump_init(NetClientState *peer, const char *device,
     hdr.linktype = 1;
 
     if (write(fd, &hdr, sizeof(hdr)) < sizeof(hdr)) {
-        error_report("-net dump write error: %s", strerror(errno));
+        error_setg_errno(errp, errno, "net dump write error");
         close(fd);
         return -1;
     }
 
-    nc = qemu_new_net_client(&net_dump_info, peer, device, name);
-
-    snprintf(nc->info_str, sizeof(nc->info_str),
-             "dump to %s (len=%d)", filename, len);
-
-    s = DO_UPCAST(DumpState, nc, nc);
-
     s->fd = fd;
     s->pcap_caplen = len;
 
@@ -145,41 +137,134 @@ static int net_dump_init(NetClientState *peer, const char *device,
     return 0;
 }
 
-int net_init_dump(const NetClientOptions *opts, const char *name,
-                  NetClientState *peer)
+#define TYPE_FILTER_DUMP "filter-dump"
+
+#define FILTER_DUMP(obj) \
+    OBJECT_CHECK(NetFilterDumpState, (obj), TYPE_FILTER_DUMP)
+
+struct NetFilterDumpState {
+    NetFilterState nfs;
+    DumpState ds;
+    char *filename;
+    uint32_t maxlen;
+};
+typedef struct NetFilterDumpState NetFilterDumpState;
+
+static ssize_t filter_dump_receive_iov(NetFilterState *nf, NetClientState *sndr,
+                                       unsigned flags, const struct iovec *iov,
+                                       int iovcnt, NetPacketSent *sent_cb)
 {
-    int len;
-    const char *file;
-    char def_file[128];
-    const NetdevDumpOptions *dump;
+    NetFilterDumpState *nfds = FILTER_DUMP(nf);
 
-    assert(opts->kind == NET_CLIENT_OPTIONS_KIND_DUMP);
-    dump = opts->dump;
+    dump_receive_iov(&nfds->ds, iov, iovcnt);
+    return 0;
+}
 
-    assert(peer);
+static void filter_dump_cleanup(NetFilterState *nf)
+{
+    NetFilterDumpState *nfds = FILTER_DUMP(nf);
 
-    if (dump->has_file) {
-        file = dump->file;
-    } else {
-        int id;
-        int ret;
+    dump_cleanup(&nfds->ds);
+}
 
-        ret = net_hub_id_for_client(peer, &id);
-        assert(ret == 0); /* peer must be on a hub */
+static void filter_dump_setup(NetFilterState *nf, Error **errp)
+{
+    NetFilterDumpState *nfds = FILTER_DUMP(nf);
 
-        snprintf(def_file, sizeof(def_file), "qemu-vlan%d.pcap", id);
-        file = def_file;
+    if (!nfds->filename) {
+        error_setg(errp, "dump filter needs 'file' property set!");
+        return;
     }
 
-    if (dump->has_len) {
-        if (dump->len > INT_MAX) {
-            error_report("invalid length: %"PRIu64, dump->len);
-            return -1;
-        }
-        len = dump->len;
-    } else {
-        len = 65536;
+    net_dump_state_init(&nfds->ds, nfds->filename, nfds->maxlen, errp);
+}
+
+static void filter_dump_get_maxlen(Object *obj, Visitor *v, const char *name,
+                                   void *opaque, Error **errp)
+{
+    NetFilterDumpState *nfds = FILTER_DUMP(obj);
+    uint32_t value = nfds->maxlen;
+
+    visit_type_uint32(v, name, &value, errp);
+}
+
+static void filter_dump_set_maxlen(Object *obj, Visitor *v, const char *name,
+                                   void *opaque, Error **errp)
+{
+    NetFilterDumpState *nfds = FILTER_DUMP(obj);
+    Error *local_err = NULL;
+    uint32_t value;
+
+    visit_type_uint32(v, name, &value, &local_err);
+    if (local_err) {
+        goto out;
+    }
+    if (value == 0) {
+        error_setg(&local_err, "Property '%s.%s' doesn't take value '%u'",
+                   object_get_typename(obj), name, value);
+        goto out;
     }
+    nfds->maxlen = value;
 
-    return net_dump_init(peer, "dump", name, file, len);
+out:
+    error_propagate(errp, local_err);
 }
+
+static char *file_dump_get_filename(Object *obj, Error **errp)
+{
+    NetFilterDumpState *nfds = FILTER_DUMP(obj);
+
+    return g_strdup(nfds->filename);
+}
+
+static void file_dump_set_filename(Object *obj, const char *value, Error **errp)
+{
+   NetFilterDumpState *nfds = FILTER_DUMP(obj);
+
+    g_free(nfds->filename);
+    nfds->filename = g_strdup(value);
+}
+
+static void filter_dump_instance_init(Object *obj)
+{
+    NetFilterDumpState *nfds = FILTER_DUMP(obj);
+
+    nfds->maxlen = 65536;
+
+    object_property_add(obj, "maxlen", "uint32", filter_dump_get_maxlen,
+                        filter_dump_set_maxlen, NULL, NULL, NULL);
+    object_property_add_str(obj, "file", file_dump_get_filename,
+                            file_dump_set_filename, NULL);
+}
+
+static void filter_dump_instance_finalize(Object *obj)
+{
+    NetFilterDumpState *nfds = FILTER_DUMP(obj);
+
+    g_free(nfds->filename);
+}
+
+static void filter_dump_class_init(ObjectClass *oc, void *data)
+{
+    NetFilterClass *nfc = NETFILTER_CLASS(oc);
+
+    nfc->setup = filter_dump_setup;
+    nfc->cleanup = filter_dump_cleanup;
+    nfc->receive_iov = filter_dump_receive_iov;
+}
+
+static const TypeInfo filter_dump_info = {
+    .name = TYPE_FILTER_DUMP,
+    .parent = TYPE_NETFILTER,
+    .class_init = filter_dump_class_init,
+    .instance_init = filter_dump_instance_init,
+    .instance_finalize = filter_dump_instance_finalize,
+    .instance_size = sizeof(NetFilterDumpState),
+};
+
+static void filter_dump_register_types(void)
+{
+    type_register_static(&filter_dump_info);
+}
+
+type_init(filter_dump_register_types);
diff --git a/net/eth.c b/net/eth.c
index 7c61132cb..1e0821c5f 100644
--- a/net/eth.c
+++ b/net/eth.c
@@ -15,13 +15,14 @@
  *
  */
 
+#include "qemu/osdep.h"
+#include "qemu/log.h"
 #include "net/eth.h"
 #include "net/checksum.h"
-#include "qemu-common.h"
 #include "net/tap.h"
 
-void eth_setup_vlan_headers(struct eth_header *ehdr, uint16_t vlan_tag,
-    bool *is_new)
+void eth_setup_vlan_headers_ex(struct eth_header *ehdr, uint16_t vlan_tag,
+    uint16_t vlan_ethtype, bool *is_new)
 {
     struct vlan_header *vhdr = PKT_GET_VLAN_HDR(ehdr);
 
@@ -35,7 +36,7 @@ void eth_setup_vlan_headers(struct eth_header *ehdr, uint16_t vlan_tag,
     default:
         /* No VLAN header, put a new one */
         vhdr->h_proto = ehdr->h_proto;
-        ehdr->h_proto = cpu_to_be16(ETH_P_VLAN);
+        ehdr->h_proto = cpu_to_be16(vlan_ethtype);
         *is_new = true;
         break;
     }
@@ -71,33 +72,106 @@ eth_get_gso_type(uint16_t l3_proto, uint8_t *l3_hdr, uint8_t l4proto)
             return VIRTIO_NET_HDR_GSO_TCPV6 | ecn_state;
         }
     }
-
-    /* Unsupported offload */
-    g_assert_not_reached();
+    qemu_log_mask(LOG_UNIMP, "%s: probably not GSO frame, "
+        "unknown L3 protocol: 0x%04"PRIx16"\n", __func__, l3_proto);
 
     return VIRTIO_NET_HDR_GSO_NONE | ecn_state;
 }
 
-void eth_get_protocols(const uint8_t *headers,
-                       uint32_t hdr_length,
+uint16_t
+eth_get_l3_proto(const struct iovec *l2hdr_iov, int iovcnt, size_t l2hdr_len)
+{
+    uint16_t proto;
+    size_t copied;
+    size_t size = iov_size(l2hdr_iov, iovcnt);
+    size_t proto_offset = l2hdr_len - sizeof(proto);
+
+    if (size < proto_offset) {
+        return ETH_P_UNKNOWN;
+    }
+
+    copied = iov_to_buf(l2hdr_iov, iovcnt, proto_offset,
+                        &proto, sizeof(proto));
+
+    return (copied == sizeof(proto)) ? be16_to_cpu(proto) : ETH_P_UNKNOWN;
+}
+
+static bool
+_eth_copy_chunk(size_t input_size,
+                const struct iovec *iov, int iovcnt,
+                size_t offset, size_t length,
+                void *buffer)
+{
+    size_t copied;
+
+    if (input_size < offset) {
+        return false;
+    }
+
+    copied = iov_to_buf(iov, iovcnt, offset, buffer, length);
+
+    if (copied < length) {
+        return false;
+    }
+
+    return true;
+}
+
+static bool
+_eth_tcp_has_data(bool is_ip4,
+                  const struct ip_header  *ip4_hdr,
+                  const struct ip6_header *ip6_hdr,
+                  size_t full_ip6hdr_len,
+                  const struct tcp_header *tcp)
+{
+    uint32_t l4len;
+
+    if (is_ip4) {
+        l4len = be16_to_cpu(ip4_hdr->ip_len) - IP_HDR_GET_LEN(ip4_hdr);
+    } else {
+        size_t opts_len = full_ip6hdr_len - sizeof(struct ip6_header);
+        l4len = be16_to_cpu(ip6_hdr->ip6_ctlun.ip6_un1.ip6_un1_plen) - opts_len;
+    }
+
+    return l4len > TCP_HEADER_DATA_OFFSET(tcp);
+}
+
+void eth_get_protocols(const struct iovec *iov, int iovcnt,
                        bool *isip4, bool *isip6,
-                       bool *isudp, bool *istcp)
+                       bool *isudp, bool *istcp,
+                       size_t *l3hdr_off,
+                       size_t *l4hdr_off,
+                       size_t *l5hdr_off,
+                       eth_ip6_hdr_info *ip6hdr_info,
+                       eth_ip4_hdr_info *ip4hdr_info,
+                       eth_l4_hdr_info  *l4hdr_info)
 {
     int proto;
-    size_t l2hdr_len = eth_get_l2_hdr_length(headers);
-    assert(hdr_length >= eth_get_l2_hdr_length(headers));
+    bool fragment = false;
+    size_t l2hdr_len = eth_get_l2_hdr_length_iov(iov, iovcnt);
+    size_t input_size = iov_size(iov, iovcnt);
+    size_t copied;
+
     *isip4 = *isip6 = *isudp = *istcp = false;
 
-    proto = eth_get_l3_proto(headers, l2hdr_len);
+    proto = eth_get_l3_proto(iov, iovcnt, l2hdr_len);
+
+    *l3hdr_off = l2hdr_len;
+
     if (proto == ETH_P_IP) {
-        *isip4 = true;
+        struct ip_header *iphdr = &ip4hdr_info->ip4_hdr;
+
+        if (input_size < l2hdr_len) {
+            return;
+        }
 
-        struct ip_header *iphdr;
+        copied = iov_to_buf(iov, iovcnt, l2hdr_len, iphdr, sizeof(*iphdr));
 
-        assert(hdr_length >=
-            eth_get_l2_hdr_length(headers) + sizeof(struct ip_header));
+        *isip4 = true;
 
-        iphdr = PKT_GET_IP_HDR(headers);
+        if (copied < sizeof(*iphdr)) {
+            return;
+        }
 
         if (IP_HEADER_VERSION(iphdr) == IP_HEADER_VERSION_4) {
             if (iphdr->ip_p == IP_PROTO_TCP) {
@@ -106,33 +180,152 @@ void eth_get_protocols(const uint8_t *headers,
                 *isudp = true;
             }
         }
-    } else if (proto == ETH_P_IPV6) {
-        uint8_t l4proto;
-        size_t full_ip6hdr_len;
 
-        struct iovec hdr_vec;
-        hdr_vec.iov_base = (void *) headers;
-        hdr_vec.iov_len = hdr_length;
+        ip4hdr_info->fragment = IP4_IS_FRAGMENT(iphdr);
+        *l4hdr_off = l2hdr_len + IP_HDR_GET_LEN(iphdr);
+
+        fragment = ip4hdr_info->fragment;
+    } else if (proto == ETH_P_IPV6) {
 
         *isip6 = true;
-        if (eth_parse_ipv6_hdr(&hdr_vec, 1, l2hdr_len,
-                              &l4proto, &full_ip6hdr_len)) {
-            if (l4proto == IP_PROTO_TCP) {
+        if (eth_parse_ipv6_hdr(iov, iovcnt, l2hdr_len,
+                               ip6hdr_info)) {
+            if (ip6hdr_info->l4proto == IP_PROTO_TCP) {
                 *istcp = true;
-            } else if (l4proto == IP_PROTO_UDP) {
+            } else if (ip6hdr_info->l4proto == IP_PROTO_UDP) {
                 *isudp = true;
             }
+        } else {
+            return;
+        }
+
+        *l4hdr_off = l2hdr_len + ip6hdr_info->full_hdr_len;
+        fragment = ip6hdr_info->fragment;
+    }
+
+    if (!fragment) {
+        if (*istcp) {
+            *istcp = _eth_copy_chunk(input_size,
+                                     iov, iovcnt,
+                                     *l4hdr_off, sizeof(l4hdr_info->hdr.tcp),
+                                     &l4hdr_info->hdr.tcp);
+
+            if (*istcp) {
+                *l5hdr_off = *l4hdr_off +
+                    TCP_HEADER_DATA_OFFSET(&l4hdr_info->hdr.tcp);
+
+                l4hdr_info->has_tcp_data =
+                    _eth_tcp_has_data(proto == ETH_P_IP,
+                                      &ip4hdr_info->ip4_hdr,
+                                      &ip6hdr_info->ip6_hdr,
+                                      *l4hdr_off - *l3hdr_off,
+                                      &l4hdr_info->hdr.tcp);
+            }
+        } else if (*isudp) {
+            *isudp = _eth_copy_chunk(input_size,
+                                     iov, iovcnt,
+                                     *l4hdr_off, sizeof(l4hdr_info->hdr.udp),
+                                     &l4hdr_info->hdr.udp);
+            *l5hdr_off = *l4hdr_off + sizeof(l4hdr_info->hdr.udp);
         }
     }
 }
 
+size_t
+eth_strip_vlan(const struct iovec *iov, int iovcnt, size_t iovoff,
+               uint8_t *new_ehdr_buf,
+               uint16_t *payload_offset, uint16_t *tci)
+{
+    struct vlan_header vlan_hdr;
+    struct eth_header *new_ehdr = (struct eth_header *) new_ehdr_buf;
+
+    size_t copied = iov_to_buf(iov, iovcnt, iovoff,
+                               new_ehdr, sizeof(*new_ehdr));
+
+    if (copied < sizeof(*new_ehdr)) {
+        return 0;
+    }
+
+    switch (be16_to_cpu(new_ehdr->h_proto)) {
+    case ETH_P_VLAN:
+    case ETH_P_DVLAN:
+        copied = iov_to_buf(iov, iovcnt, iovoff + sizeof(*new_ehdr),
+                            &vlan_hdr, sizeof(vlan_hdr));
+
+        if (copied < sizeof(vlan_hdr)) {
+            return 0;
+        }
+
+        new_ehdr->h_proto = vlan_hdr.h_proto;
+
+        *tci = be16_to_cpu(vlan_hdr.h_tci);
+        *payload_offset = iovoff + sizeof(*new_ehdr) + sizeof(vlan_hdr);
+
+        if (be16_to_cpu(new_ehdr->h_proto) == ETH_P_VLAN) {
+
+            copied = iov_to_buf(iov, iovcnt, *payload_offset,
+                                PKT_GET_VLAN_HDR(new_ehdr), sizeof(vlan_hdr));
+
+            if (copied < sizeof(vlan_hdr)) {
+                return 0;
+            }
+
+            *payload_offset += sizeof(vlan_hdr);
+
+            return sizeof(struct eth_header) + sizeof(struct vlan_header);
+        } else {
+            return sizeof(struct eth_header);
+        }
+    default:
+        return 0;
+    }
+}
+
+size_t
+eth_strip_vlan_ex(const struct iovec *iov, int iovcnt, size_t iovoff,
+                  uint16_t vet, uint8_t *new_ehdr_buf,
+                  uint16_t *payload_offset, uint16_t *tci)
+{
+    struct vlan_header vlan_hdr;
+    struct eth_header *new_ehdr = (struct eth_header *) new_ehdr_buf;
+
+    size_t copied = iov_to_buf(iov, iovcnt, iovoff,
+                               new_ehdr, sizeof(*new_ehdr));
+
+    if (copied < sizeof(*new_ehdr)) {
+        return 0;
+    }
+
+    if (be16_to_cpu(new_ehdr->h_proto) == vet) {
+        copied = iov_to_buf(iov, iovcnt, iovoff + sizeof(*new_ehdr),
+                            &vlan_hdr, sizeof(vlan_hdr));
+
+        if (copied < sizeof(vlan_hdr)) {
+            return 0;
+        }
+
+        new_ehdr->h_proto = vlan_hdr.h_proto;
+
+        *tci = be16_to_cpu(vlan_hdr.h_tci);
+        *payload_offset = iovoff + sizeof(*new_ehdr) + sizeof(vlan_hdr);
+        return sizeof(struct eth_header);
+    }
+
+    return 0;
+}
+
 void
 eth_setup_ip4_fragmentation(const void *l2hdr, size_t l2hdr_len,
                             void *l3hdr, size_t l3hdr_len,
                             size_t l3payload_len,
                             size_t frag_offset, bool more_frags)
 {
-    if (eth_get_l3_proto(l2hdr, l2hdr_len) == ETH_P_IP) {
+    const struct iovec l2vec = {
+        .iov_base = (void *) l2hdr,
+        .iov_len = l2hdr_len
+    };
+
+    if (eth_get_l3_proto(&l2vec, 1, l2hdr_len) == ETH_P_IP) {
         uint16_t orig_flags;
         struct ip_header *iphdr = (struct ip_header *) l3hdr;
         uint16_t frag_off_units = frag_offset / IP_FRAG_UNIT_SIZE;
@@ -157,7 +350,9 @@ eth_fix_ip4_checksum(void *l3hdr, size_t l3hdr_len)
 }
 
 uint32_t
-eth_calc_pseudo_hdr_csum(struct ip_header *iphdr, uint16_t csl)
+eth_calc_ip4_pseudo_hdr_csum(struct ip_header *iphdr,
+                             uint16_t csl,
+                             uint32_t *cso)
 {
     struct ip_pseudo_header ipph;
     ipph.ip_src = iphdr->ip_src;
@@ -165,7 +360,26 @@ eth_calc_pseudo_hdr_csum(struct ip_header *iphdr, uint16_t csl)
     ipph.ip_payload = cpu_to_be16(csl);
     ipph.ip_proto = iphdr->ip_p;
     ipph.zeros = 0;
-    return net_checksum_add(sizeof(ipph), (uint8_t *) &ipph);
+    *cso = sizeof(ipph);
+    return net_checksum_add(*cso, (uint8_t *) &ipph);
+}
+
+uint32_t
+eth_calc_ip6_pseudo_hdr_csum(struct ip6_header *iphdr,
+                             uint16_t csl,
+                             uint8_t l4_proto,
+                             uint32_t *cso)
+{
+    struct ip6_pseudo_header ipph;
+    ipph.ip6_src = iphdr->ip6_src;
+    ipph.ip6_dst = iphdr->ip6_dst;
+    ipph.len = cpu_to_be16(csl);
+    ipph.zero[0] = 0;
+    ipph.zero[1] = 0;
+    ipph.zero[2] = 0;
+    ipph.next_hdr = l4_proto;
+    *cso = sizeof(ipph);
+    return net_checksum_add(*cso, (uint8_t *)&ipph);
 }
 
 static bool
@@ -185,33 +399,152 @@ eth_is_ip6_extension_header_type(uint8_t hdr_type)
     }
 }
 
-bool eth_parse_ipv6_hdr(struct iovec *pkt, int pkt_frags,
-                        size_t ip6hdr_off, uint8_t *l4proto,
-                        size_t *full_hdr_len)
+static bool
+_eth_get_rss_ex_dst_addr(const struct iovec *pkt, int pkt_frags,
+                        size_t rthdr_offset,
+                        struct ip6_ext_hdr *ext_hdr,
+                        struct in6_address *dst_addr)
+{
+    struct ip6_ext_hdr_routing *rthdr = (struct ip6_ext_hdr_routing *) ext_hdr;
+
+    if ((rthdr->rtype == 2) &&
+        (rthdr->len == sizeof(struct in6_address) / 8) &&
+        (rthdr->segleft == 1)) {
+
+        size_t input_size = iov_size(pkt, pkt_frags);
+        size_t bytes_read;
+
+        if (input_size < rthdr_offset + sizeof(*ext_hdr)) {
+            return false;
+        }
+
+        bytes_read = iov_to_buf(pkt, pkt_frags,
+                                rthdr_offset + sizeof(*ext_hdr),
+                                dst_addr, sizeof(*dst_addr));
+
+        return bytes_read == sizeof(*dst_addr);
+    }
+
+    return false;
+}
+
+static bool
+_eth_get_rss_ex_src_addr(const struct iovec *pkt, int pkt_frags,
+                        size_t dsthdr_offset,
+                        struct ip6_ext_hdr *ext_hdr,
+                        struct in6_address *src_addr)
+{
+    size_t bytes_left = (ext_hdr->ip6r_len + 1) * 8 - sizeof(*ext_hdr);
+    struct ip6_option_hdr opthdr;
+    size_t opt_offset = dsthdr_offset + sizeof(*ext_hdr);
+
+    while (bytes_left > sizeof(opthdr)) {
+        size_t input_size = iov_size(pkt, pkt_frags);
+        size_t bytes_read, optlen;
+
+        if (input_size < opt_offset) {
+            return false;
+        }
+
+        bytes_read = iov_to_buf(pkt, pkt_frags, opt_offset,
+                                &opthdr, sizeof(opthdr));
+
+        if (bytes_read != sizeof(opthdr)) {
+            return false;
+        }
+
+        optlen = (opthdr.type == IP6_OPT_PAD1) ? 1
+                                               : (opthdr.len + sizeof(opthdr));
+
+        if (optlen > bytes_left) {
+            return false;
+        }
+
+        if (opthdr.type == IP6_OPT_HOME) {
+            size_t input_size = iov_size(pkt, pkt_frags);
+
+            if (input_size < opt_offset + sizeof(opthdr)) {
+                return false;
+            }
+
+            bytes_read = iov_to_buf(pkt, pkt_frags,
+                                    opt_offset + sizeof(opthdr),
+                                    src_addr, sizeof(*src_addr));
+
+            return bytes_read == sizeof(*src_addr);
+        }
+
+        opt_offset += optlen;
+        bytes_left -= optlen;
+    }
+
+    return false;
+}
+
+bool eth_parse_ipv6_hdr(const struct iovec *pkt, int pkt_frags,
+                        size_t ip6hdr_off, eth_ip6_hdr_info *info)
 {
-    struct ip6_header ip6_hdr;
     struct ip6_ext_hdr ext_hdr;
     size_t bytes_read;
+    uint8_t curr_ext_hdr_type;
+    size_t input_size = iov_size(pkt, pkt_frags);
+
+    info->rss_ex_dst_valid = false;
+    info->rss_ex_src_valid = false;
+    info->fragment = false;
+
+    if (input_size < ip6hdr_off) {
+        return false;
+    }
 
     bytes_read = iov_to_buf(pkt, pkt_frags, ip6hdr_off,
-                            &ip6_hdr, sizeof(ip6_hdr));
-    if (bytes_read < sizeof(ip6_hdr)) {
+                            &info->ip6_hdr, sizeof(info->ip6_hdr));
+    if (bytes_read < sizeof(info->ip6_hdr)) {
         return false;
     }
 
-    *full_hdr_len = sizeof(struct ip6_header);
+    info->full_hdr_len = sizeof(struct ip6_header);
+
+    curr_ext_hdr_type = info->ip6_hdr.ip6_nxt;
 
-    if (!eth_is_ip6_extension_header_type(ip6_hdr.ip6_nxt)) {
-        *l4proto = ip6_hdr.ip6_nxt;
+    if (!eth_is_ip6_extension_header_type(curr_ext_hdr_type)) {
+        info->l4proto = info->ip6_hdr.ip6_nxt;
+        info->has_ext_hdrs = false;
         return true;
     }
 
+    info->has_ext_hdrs = true;
+
     do {
-        bytes_read = iov_to_buf(pkt, pkt_frags, ip6hdr_off + *full_hdr_len,
+        if (input_size < ip6hdr_off + info->full_hdr_len) {
+            return false;
+        }
+
+        bytes_read = iov_to_buf(pkt, pkt_frags, ip6hdr_off + info->full_hdr_len,
                                 &ext_hdr, sizeof(ext_hdr));
-        *full_hdr_len += (ext_hdr.ip6r_len + 1) * IP6_EXT_GRANULARITY;
-    } while (eth_is_ip6_extension_header_type(ext_hdr.ip6r_nxt));
 
-    *l4proto = ext_hdr.ip6r_nxt;
+        if (bytes_read < sizeof(ext_hdr)) {
+            return false;
+        }
+
+        if (curr_ext_hdr_type == IP6_ROUTING) {
+            info->rss_ex_dst_valid =
+                _eth_get_rss_ex_dst_addr(pkt, pkt_frags,
+                                         ip6hdr_off + info->full_hdr_len,
+                                         &ext_hdr, &info->rss_ex_dst);
+        } else if (curr_ext_hdr_type == IP6_DESTINATON) {
+            info->rss_ex_src_valid =
+                _eth_get_rss_ex_src_addr(pkt, pkt_frags,
+                                         ip6hdr_off + info->full_hdr_len,
+                                         &ext_hdr, &info->rss_ex_src);
+        } else if (curr_ext_hdr_type == IP6_FRAGMENT) {
+            info->fragment = true;
+        }
+
+        info->full_hdr_len += (ext_hdr.ip6r_len + 1) * IP6_EXT_GRANULARITY;
+        curr_ext_hdr_type = ext_hdr.ip6r_nxt;
+    } while (eth_is_ip6_extension_header_type(curr_ext_hdr_type));
+
+    info->l4proto = ext_hdr.ip6r_nxt;
     return true;
 }
diff --git a/net/filter-buffer.c b/net/filter-buffer.c
new file mode 100644
index 000000000..88da78f82
--- /dev/null
+++ b/net/filter-buffer.c
@@ -0,0 +1,211 @@
+/*
+ * Copyright (c) 2015 FUJITSU LIMITED
+ * Author: Yang Hongyang <yanghy@cn.fujitsu.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "net/filter.h"
+#include "net/queue.h"
+#include "qapi/error.h"
+#include "qemu/timer.h"
+#include "qemu/iov.h"
+#include "qapi/qapi-builtin-visit.h"
+#include "qapi/qmp/qerror.h"
+#include "qom/object.h"
+
+#define TYPE_FILTER_BUFFER "filter-buffer"
+
+#define FILTER_BUFFER(obj) \
+    OBJECT_CHECK(FilterBufferState, (obj), TYPE_FILTER_BUFFER)
+
+typedef struct FilterBufferState {
+    NetFilterState parent_obj;
+
+    NetQueue *incoming_queue;
+    uint32_t interval;
+    QEMUTimer release_timer;
+} FilterBufferState;
+
+static void filter_buffer_flush(NetFilterState *nf)
+{
+    FilterBufferState *s = FILTER_BUFFER(nf);
+
+    if (!qemu_net_queue_flush(s->incoming_queue)) {
+        /* Unable to empty the queue, purge remaining packets */
+        qemu_net_queue_purge(s->incoming_queue, nf->netdev);
+    }
+}
+
+static void filter_buffer_release_timer(void *opaque)
+{
+    NetFilterState *nf = opaque;
+    FilterBufferState *s = FILTER_BUFFER(nf);
+
+    /*
+     * Note: filter_buffer_flush() drops packets that can't be sent
+     * TODO: We should leave them queued.  But currently there's no way
+     * for the next filter or receiver to notify us that it can receive
+     * more packets.
+     */
+    filter_buffer_flush(nf);
+    /* Timer rearmed to fire again in s->interval microseconds. */
+    timer_mod(&s->release_timer,
+              qemu_clock_get_us(QEMU_CLOCK_VIRTUAL) + s->interval);
+}
+
+/* filter APIs */
+static ssize_t filter_buffer_receive_iov(NetFilterState *nf,
+                                         NetClientState *sender,
+                                         unsigned flags,
+                                         const struct iovec *iov,
+                                         int iovcnt,
+                                         NetPacketSent *sent_cb)
+{
+    FilterBufferState *s = FILTER_BUFFER(nf);
+
+    /*
+     * We return size when buffer a packet, the sender will take it as
+     * a already sent packet, so sent_cb should not be called later.
+     *
+     * FIXME: Even if the guest can't receive packets for some reasons,
+     * the filter can still accept packets until its internal queue is full.
+     * For example:
+     *   For some reason, receiver could not receive more packets
+     * (.can_receive() returns zero). Without a filter, at most one packet
+     * will be queued in incoming queue and sender's poll will be disabled
+     * unit its sent_cb() was called. With a filter, it will keep receiving
+     * the packets without caring about the receiver. This is suboptimal.
+     * May need more thoughts (e.g keeping sent_cb).
+     */
+    qemu_net_queue_append_iov(s->incoming_queue, sender, flags,
+                              iov, iovcnt, NULL);
+    return iov_size(iov, iovcnt);
+}
+
+static void filter_buffer_cleanup(NetFilterState *nf)
+{
+    FilterBufferState *s = FILTER_BUFFER(nf);
+
+    if (s->interval) {
+        timer_del(&s->release_timer);
+    }
+
+    /* flush packets */
+    if (s->incoming_queue) {
+        filter_buffer_flush(nf);
+        g_free(s->incoming_queue);
+    }
+}
+
+static void filter_buffer_setup_timer(NetFilterState *nf)
+{
+    FilterBufferState *s = FILTER_BUFFER(nf);
+
+    if (s->interval) {
+        timer_init_us(&s->release_timer, QEMU_CLOCK_VIRTUAL,
+                      filter_buffer_release_timer, nf);
+        /* Timer armed to fire in s->interval microseconds. */
+        timer_mod(&s->release_timer,
+                  qemu_clock_get_us(QEMU_CLOCK_VIRTUAL) + s->interval);
+    }
+}
+
+static void filter_buffer_setup(NetFilterState *nf, Error **errp)
+{
+    FilterBufferState *s = FILTER_BUFFER(nf);
+
+    /*
+     * We may want to accept zero interval when VM FT solutions like MC
+     * or COLO use this filter to release packets on demand.
+     */
+    if (!s->interval) {
+        error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "interval",
+                   "a non-zero interval");
+        return;
+    }
+
+    s->incoming_queue = qemu_new_net_queue(qemu_netfilter_pass_to_next, nf);
+    filter_buffer_setup_timer(nf);
+}
+
+static void filter_buffer_status_changed(NetFilterState *nf, Error **errp)
+{
+    FilterBufferState *s = FILTER_BUFFER(nf);
+
+    if (!nf->on) {
+        if (s->interval) {
+            timer_del(&s->release_timer);
+        }
+        filter_buffer_flush(nf);
+    } else {
+        filter_buffer_setup_timer(nf);
+    }
+}
+
+static void filter_buffer_class_init(ObjectClass *oc, void *data)
+{
+    NetFilterClass *nfc = NETFILTER_CLASS(oc);
+
+    nfc->setup = filter_buffer_setup;
+    nfc->cleanup = filter_buffer_cleanup;
+    nfc->receive_iov = filter_buffer_receive_iov;
+    nfc->status_changed = filter_buffer_status_changed;
+}
+
+static void filter_buffer_get_interval(Object *obj, Visitor *v,
+                                       const char *name, void *opaque,
+                                       Error **errp)
+{
+    FilterBufferState *s = FILTER_BUFFER(obj);
+    uint32_t value = s->interval;
+
+    visit_type_uint32(v, name, &value, errp);
+}
+
+static void filter_buffer_set_interval(Object *obj, Visitor *v,
+                                       const char *name, void *opaque,
+                                       Error **errp)
+{
+    FilterBufferState *s = FILTER_BUFFER(obj);
+    Error *local_err = NULL;
+    uint32_t value;
+
+    visit_type_uint32(v, name, &value, &local_err);
+    if (local_err) {
+        goto out;
+    }
+    if (!value) {
+        error_setg(&local_err, "Property '%s.%s' requires a positive value",
+                   object_get_typename(obj), name);
+        goto out;
+    }
+    s->interval = value;
+
+out:
+    error_propagate(errp, local_err);
+}
+
+static void filter_buffer_init(Object *obj)
+{
+    object_property_add(obj, "interval", "uint32",
+                        filter_buffer_get_interval,
+                        filter_buffer_set_interval, NULL, NULL, NULL);
+}
+
+static const TypeInfo filter_buffer_info = {
+    .name = TYPE_FILTER_BUFFER,
+    .parent = TYPE_NETFILTER,
+    .class_init = filter_buffer_class_init,
+    .instance_init = filter_buffer_init,
+    .instance_size = sizeof(FilterBufferState),
+};
+
+static void register_types(void)
+{
+    type_register_static(&filter_buffer_info);
+}
+
+type_init(register_types);
diff --git a/net/filter-mirror.c b/net/filter-mirror.c
new file mode 100644
index 000000000..8d36009c5
--- /dev/null
+++ b/net/filter-mirror.c
@@ -0,0 +1,457 @@
+/*
+ * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
+ * Copyright (c) 2016 FUJITSU LIMITED
+ * Copyright (c) 2016 Intel Corporation
+ *
+ * Author: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "net/filter.h"
+#include "net/net.h"
+#include "qapi/error.h"
+#include "qom/object.h"
+#include "qemu/main-loop.h"
+#include "qemu/error-report.h"
+#include "trace.h"
+#include "chardev/char-fe.h"
+#include "qemu/iov.h"
+#include "qemu/sockets.h"
+
+#define FILTER_MIRROR(obj) \
+    OBJECT_CHECK(MirrorState, (obj), TYPE_FILTER_MIRROR)
+
+#define FILTER_REDIRECTOR(obj) \
+    OBJECT_CHECK(MirrorState, (obj), TYPE_FILTER_REDIRECTOR)
+
+#define TYPE_FILTER_MIRROR "filter-mirror"
+#define TYPE_FILTER_REDIRECTOR "filter-redirector"
+#define REDIRECTOR_MAX_LEN NET_BUFSIZE
+
+typedef struct MirrorState {
+    NetFilterState parent_obj;
+    char *indev;
+    char *outdev;
+    CharBackend chr_in;
+    CharBackend chr_out;
+    SocketReadState rs;
+    bool vnet_hdr;
+} MirrorState;
+
+static int filter_send(MirrorState *s,
+                       const struct iovec *iov,
+                       int iovcnt)
+{
+    NetFilterState *nf = NETFILTER(s);
+    int ret = 0;
+    ssize_t size = 0;
+    uint32_t len = 0;
+    char *buf;
+
+    size = iov_size(iov, iovcnt);
+    if (!size) {
+        return 0;
+    }
+
+    len = htonl(size);
+    ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)&len, sizeof(len));
+    if (ret != sizeof(len)) {
+        goto err;
+    }
+
+    if (s->vnet_hdr) {
+        /*
+         * If vnet_hdr = on, we send vnet header len to make other
+         * module(like colo-compare) know how to parse net
+         * packet correctly.
+         */
+        ssize_t vnet_hdr_len;
+
+        vnet_hdr_len = nf->netdev->vnet_hdr_len;
+
+        len = htonl(vnet_hdr_len);
+        ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)&len, sizeof(len));
+        if (ret != sizeof(len)) {
+            goto err;
+        }
+    }
+
+    buf = g_malloc(size);
+    iov_to_buf(iov, iovcnt, 0, buf, size);
+    ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)buf, size);
+    g_free(buf);
+    if (ret != size) {
+        goto err;
+    }
+
+    return 0;
+
+err:
+    return ret < 0 ? ret : -EIO;
+}
+
+static void redirector_to_filter(NetFilterState *nf,
+                                 const uint8_t *buf,
+                                 int len)
+{
+    struct iovec iov = {
+        .iov_base = (void *)buf,
+        .iov_len = len,
+    };
+
+    if (nf->direction == NET_FILTER_DIRECTION_ALL ||
+        nf->direction == NET_FILTER_DIRECTION_TX) {
+        qemu_netfilter_pass_to_next(nf->netdev, 0, &iov, 1, nf);
+    }
+
+    if (nf->direction == NET_FILTER_DIRECTION_ALL ||
+        nf->direction == NET_FILTER_DIRECTION_RX) {
+        qemu_netfilter_pass_to_next(nf->netdev->peer, 0, &iov, 1, nf);
+     }
+}
+
+static int redirector_chr_can_read(void *opaque)
+{
+    return REDIRECTOR_MAX_LEN;
+}
+
+static void redirector_chr_read(void *opaque, const uint8_t *buf, int size)
+{
+    NetFilterState *nf = opaque;
+    MirrorState *s = FILTER_REDIRECTOR(nf);
+    int ret;
+
+    ret = net_fill_rstate(&s->rs, buf, size);
+
+    if (ret == -1) {
+        qemu_chr_fe_set_handlers(&s->chr_in, NULL, NULL, NULL,
+                                 NULL, NULL, NULL, true);
+    }
+}
+
+static void redirector_chr_event(void *opaque, int event)
+{
+    NetFilterState *nf = opaque;
+    MirrorState *s = FILTER_REDIRECTOR(nf);
+
+    switch (event) {
+    case CHR_EVENT_CLOSED:
+        qemu_chr_fe_set_handlers(&s->chr_in, NULL, NULL, NULL,
+                                 NULL, NULL, NULL, true);
+        break;
+    default:
+        break;
+    }
+}
+
+static ssize_t filter_mirror_receive_iov(NetFilterState *nf,
+                                         NetClientState *sender,
+                                         unsigned flags,
+                                         const struct iovec *iov,
+                                         int iovcnt,
+                                         NetPacketSent *sent_cb)
+{
+    MirrorState *s = FILTER_MIRROR(nf);
+    int ret;
+
+    ret = filter_send(s, iov, iovcnt);
+    if (ret) {
+        error_report("filter mirror send failed(%s)", strerror(-ret));
+    }
+
+    /*
+     * we don't hope this error interrupt the normal
+     * path of net packet, so we always return zero.
+     */
+    return 0;
+}
+
+static ssize_t filter_redirector_receive_iov(NetFilterState *nf,
+                                             NetClientState *sender,
+                                             unsigned flags,
+                                             const struct iovec *iov,
+                                             int iovcnt,
+                                             NetPacketSent *sent_cb)
+{
+    MirrorState *s = FILTER_REDIRECTOR(nf);
+    int ret;
+
+    if (qemu_chr_fe_backend_connected(&s->chr_out)) {
+        ret = filter_send(s, iov, iovcnt);
+        if (ret) {
+            error_report("filter redirector send failed(%s)", strerror(-ret));
+        }
+        return iov_size(iov, iovcnt);
+    } else {
+        return 0;
+    }
+}
+
+static void filter_mirror_cleanup(NetFilterState *nf)
+{
+    MirrorState *s = FILTER_MIRROR(nf);
+
+    qemu_chr_fe_deinit(&s->chr_out, false);
+}
+
+static void filter_redirector_cleanup(NetFilterState *nf)
+{
+    MirrorState *s = FILTER_REDIRECTOR(nf);
+
+    qemu_chr_fe_deinit(&s->chr_in, false);
+    qemu_chr_fe_deinit(&s->chr_out, false);
+}
+
+static void filter_mirror_setup(NetFilterState *nf, Error **errp)
+{
+    MirrorState *s = FILTER_MIRROR(nf);
+    Chardev *chr;
+
+    if (s->outdev == NULL) {
+        error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND, "filter-mirror parameter"\
+                  " 'outdev' cannot be empty");
+        return;
+    }
+
+    chr = qemu_chr_find(s->outdev);
+    if (chr == NULL) {
+        error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
+                  "Device '%s' not found", s->outdev);
+        return;
+    }
+
+    qemu_chr_fe_init(&s->chr_out, chr, errp);
+}
+
+static void redirector_rs_finalize(SocketReadState *rs)
+{
+    MirrorState *s = container_of(rs, MirrorState, rs);
+    NetFilterState *nf = NETFILTER(s);
+
+    redirector_to_filter(nf, rs->buf, rs->packet_len);
+}
+
+static void filter_redirector_setup(NetFilterState *nf, Error **errp)
+{
+    MirrorState *s = FILTER_REDIRECTOR(nf);
+    Chardev *chr;
+
+    if (!s->indev && !s->outdev) {
+        error_setg(errp, "filter redirector needs 'indev' or "
+                   "'outdev' at least one property set");
+        return;
+    } else if (s->indev && s->outdev) {
+        if (!strcmp(s->indev, s->outdev)) {
+            error_setg(errp, "'indev' and 'outdev' could not be same "
+                       "for filter redirector");
+            return;
+        }
+    }
+
+    net_socket_rs_init(&s->rs, redirector_rs_finalize, s->vnet_hdr);
+
+    if (s->indev) {
+        chr = qemu_chr_find(s->indev);
+        if (chr == NULL) {
+            error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
+                      "IN Device '%s' not found", s->indev);
+            return;
+        }
+
+        if (!qemu_chr_fe_init(&s->chr_in, chr, errp)) {
+            return;
+        }
+
+        qemu_chr_fe_set_handlers(&s->chr_in, redirector_chr_can_read,
+                                 redirector_chr_read, redirector_chr_event,
+                                 NULL, nf, NULL, true);
+    }
+
+    if (s->outdev) {
+        chr = qemu_chr_find(s->outdev);
+        if (chr == NULL) {
+            error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
+                      "OUT Device '%s' not found", s->outdev);
+            return;
+        }
+        if (!qemu_chr_fe_init(&s->chr_out, chr, errp)) {
+            return;
+        }
+    }
+}
+
+static void filter_mirror_class_init(ObjectClass *oc, void *data)
+{
+    NetFilterClass *nfc = NETFILTER_CLASS(oc);
+
+    nfc->setup = filter_mirror_setup;
+    nfc->cleanup = filter_mirror_cleanup;
+    nfc->receive_iov = filter_mirror_receive_iov;
+}
+
+static void filter_redirector_class_init(ObjectClass *oc, void *data)
+{
+    NetFilterClass *nfc = NETFILTER_CLASS(oc);
+
+    nfc->setup = filter_redirector_setup;
+    nfc->cleanup = filter_redirector_cleanup;
+    nfc->receive_iov = filter_redirector_receive_iov;
+}
+
+static char *filter_redirector_get_indev(Object *obj, Error **errp)
+{
+    MirrorState *s = FILTER_REDIRECTOR(obj);
+
+    return g_strdup(s->indev);
+}
+
+static void filter_redirector_set_indev(Object *obj,
+                                        const char *value,
+                                        Error **errp)
+{
+    MirrorState *s = FILTER_REDIRECTOR(obj);
+
+    g_free(s->indev);
+    s->indev = g_strdup(value);
+}
+
+static char *filter_mirror_get_outdev(Object *obj, Error **errp)
+{
+    MirrorState *s = FILTER_MIRROR(obj);
+
+    return g_strdup(s->outdev);
+}
+
+static void filter_mirror_set_outdev(Object *obj,
+                                     const char *value,
+                                     Error **errp)
+{
+    MirrorState *s = FILTER_MIRROR(obj);
+
+    g_free(s->outdev);
+    s->outdev = g_strdup(value);
+    if (!s->outdev) {
+        error_setg(errp, "filter mirror needs 'outdev' "
+                   "property set");
+        return;
+    }
+}
+
+static bool filter_mirror_get_vnet_hdr(Object *obj, Error **errp)
+{
+    MirrorState *s = FILTER_MIRROR(obj);
+
+    return s->vnet_hdr;
+}
+
+static void filter_mirror_set_vnet_hdr(Object *obj, bool value, Error **errp)
+{
+    MirrorState *s = FILTER_MIRROR(obj);
+
+    s->vnet_hdr = value;
+}
+
+static char *filter_redirector_get_outdev(Object *obj, Error **errp)
+{
+    MirrorState *s = FILTER_REDIRECTOR(obj);
+
+    return g_strdup(s->outdev);
+}
+
+static void filter_redirector_set_outdev(Object *obj,
+                                         const char *value,
+                                         Error **errp)
+{
+    MirrorState *s = FILTER_REDIRECTOR(obj);
+
+    g_free(s->outdev);
+    s->outdev = g_strdup(value);
+}
+
+static bool filter_redirector_get_vnet_hdr(Object *obj, Error **errp)
+{
+    MirrorState *s = FILTER_REDIRECTOR(obj);
+
+    return s->vnet_hdr;
+}
+
+static void filter_redirector_set_vnet_hdr(Object *obj,
+                                           bool value,
+                                           Error **errp)
+{
+    MirrorState *s = FILTER_REDIRECTOR(obj);
+
+    s->vnet_hdr = value;
+}
+
+static void filter_mirror_init(Object *obj)
+{
+    MirrorState *s = FILTER_MIRROR(obj);
+
+    object_property_add_str(obj, "outdev", filter_mirror_get_outdev,
+                            filter_mirror_set_outdev, NULL);
+
+    s->vnet_hdr = false;
+    object_property_add_bool(obj, "vnet_hdr_support",
+                             filter_mirror_get_vnet_hdr,
+                             filter_mirror_set_vnet_hdr, NULL);
+}
+
+static void filter_redirector_init(Object *obj)
+{
+    MirrorState *s = FILTER_REDIRECTOR(obj);
+
+    object_property_add_str(obj, "indev", filter_redirector_get_indev,
+                            filter_redirector_set_indev, NULL);
+    object_property_add_str(obj, "outdev", filter_redirector_get_outdev,
+                            filter_redirector_set_outdev, NULL);
+
+    s->vnet_hdr = false;
+    object_property_add_bool(obj, "vnet_hdr_support",
+                             filter_redirector_get_vnet_hdr,
+                             filter_redirector_set_vnet_hdr, NULL);
+}
+
+static void filter_mirror_fini(Object *obj)
+{
+    MirrorState *s = FILTER_MIRROR(obj);
+
+    g_free(s->outdev);
+}
+
+static void filter_redirector_fini(Object *obj)
+{
+    MirrorState *s = FILTER_REDIRECTOR(obj);
+
+    g_free(s->indev);
+    g_free(s->outdev);
+}
+
+static const TypeInfo filter_redirector_info = {
+    .name = TYPE_FILTER_REDIRECTOR,
+    .parent = TYPE_NETFILTER,
+    .class_init = filter_redirector_class_init,
+    .instance_init = filter_redirector_init,
+    .instance_finalize = filter_redirector_fini,
+    .instance_size = sizeof(MirrorState),
+};
+
+static const TypeInfo filter_mirror_info = {
+    .name = TYPE_FILTER_MIRROR,
+    .parent = TYPE_NETFILTER,
+    .class_init = filter_mirror_class_init,
+    .instance_init = filter_mirror_init,
+    .instance_finalize = filter_mirror_fini,
+    .instance_size = sizeof(MirrorState),
+};
+
+static void register_types(void)
+{
+    type_register_static(&filter_mirror_info);
+    type_register_static(&filter_redirector_info);
+}
+
+type_init(register_types);
diff --git a/net/filter-replay.c b/net/filter-replay.c
new file mode 100644
index 000000000..9dda19392
--- /dev/null
+++ b/net/filter-replay.c
@@ -0,0 +1,91 @@
+/*
+ * filter-replay.c
+ *
+ * Copyright (c) 2010-2016 Institute for System Programming
+ *                         of the Russian Academy of Sciences.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "clients.h"
+#include "qemu/error-report.h"
+#include "qemu/iov.h"
+#include "qemu/log.h"
+#include "qemu/module.h"
+#include "qemu/timer.h"
+#include "qapi/visitor.h"
+#include "net/filter.h"
+#include "sysemu/replay.h"
+
+#define TYPE_FILTER_REPLAY "filter-replay"
+
+#define FILTER_REPLAY(obj) \
+    OBJECT_CHECK(NetFilterReplayState, (obj), TYPE_FILTER_REPLAY)
+
+struct NetFilterReplayState {
+    NetFilterState nfs;
+    ReplayNetState *rns;
+};
+typedef struct NetFilterReplayState NetFilterReplayState;
+
+static ssize_t filter_replay_receive_iov(NetFilterState *nf,
+                                         NetClientState *sndr,
+                                         unsigned flags,
+                                         const struct iovec *iov,
+                                         int iovcnt, NetPacketSent *sent_cb)
+{
+    NetFilterReplayState *nfrs = FILTER_REPLAY(nf);
+    switch (replay_mode) {
+    case REPLAY_MODE_RECORD:
+        if (nf->netdev == sndr) {
+            replay_net_packet_event(nfrs->rns, flags, iov, iovcnt);
+            return iov_size(iov, iovcnt);
+        }
+        return 0;
+    case REPLAY_MODE_PLAY:
+        /* Drop all packets in replay mode.
+           Packets from the log will be injected by the replay module. */
+        return iov_size(iov, iovcnt);
+    default:
+        /* Pass all the packets. */
+        return 0;
+    }
+}
+
+static void filter_replay_instance_init(Object *obj)
+{
+    NetFilterReplayState *nfrs = FILTER_REPLAY(obj);
+    nfrs->rns = replay_register_net(&nfrs->nfs);
+}
+
+static void filter_replay_instance_finalize(Object *obj)
+{
+    NetFilterReplayState *nfrs = FILTER_REPLAY(obj);
+    replay_unregister_net(nfrs->rns);
+}
+
+static void filter_replay_class_init(ObjectClass *oc, void *data)
+{
+    NetFilterClass *nfc = NETFILTER_CLASS(oc);
+
+    nfc->receive_iov = filter_replay_receive_iov;
+}
+
+static const TypeInfo filter_replay_info = {
+    .name = TYPE_FILTER_REPLAY,
+    .parent = TYPE_NETFILTER,
+    .class_init = filter_replay_class_init,
+    .instance_init = filter_replay_instance_init,
+    .instance_finalize = filter_replay_instance_finalize,
+    .instance_size = sizeof(NetFilterReplayState),
+};
+
+static void filter_replay_register_types(void)
+{
+    type_register_static(&filter_replay_info);
+}
+
+type_init(filter_replay_register_types);
diff --git a/net/filter-rewriter.c b/net/filter-rewriter.c
new file mode 100644
index 000000000..31da08a2f
--- /dev/null
+++ b/net/filter-rewriter.c
@@ -0,0 +1,442 @@
+/*
+ * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
+ * Copyright (c) 2016 FUJITSU LIMITED
+ * Copyright (c) 2016 Intel Corporation
+ *
+ * Author: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "trace.h"
+#include "colo.h"
+#include "net/filter.h"
+#include "net/net.h"
+#include "qemu/error-report.h"
+#include "qom/object.h"
+#include "qemu/main-loop.h"
+#include "qemu/iov.h"
+#include "net/checksum.h"
+#include "net/colo.h"
+#include "migration/colo.h"
+#include "util.h"
+
+#define FILTER_COLO_REWRITER(obj) \
+    OBJECT_CHECK(RewriterState, (obj), TYPE_FILTER_REWRITER)
+
+#define TYPE_FILTER_REWRITER "filter-rewriter"
+#define FAILOVER_MODE_ON  true
+#define FAILOVER_MODE_OFF false
+
+typedef struct RewriterState {
+    NetFilterState parent_obj;
+    NetQueue *incoming_queue;
+    /* hashtable to save connection */
+    GHashTable *connection_track_table;
+    bool vnet_hdr;
+    bool failover_mode;
+} RewriterState;
+
+static void filter_rewriter_failover_mode(RewriterState *s)
+{
+    s->failover_mode = FAILOVER_MODE_ON;
+}
+
+static void filter_rewriter_flush(NetFilterState *nf)
+{
+    RewriterState *s = FILTER_COLO_REWRITER(nf);
+
+    if (!qemu_net_queue_flush(s->incoming_queue)) {
+        /* Unable to empty the queue, purge remaining packets */
+        qemu_net_queue_purge(s->incoming_queue, nf->netdev);
+    }
+}
+
+/*
+ * Return 1 on success, if return 0 means the pkt
+ * is not TCP packet
+ */
+static int is_tcp_packet(Packet *pkt)
+{
+    if (!parse_packet_early(pkt) &&
+        pkt->ip->ip_p == IPPROTO_TCP) {
+        return 1;
+    } else {
+        return 0;
+    }
+}
+
+/* handle tcp packet from primary guest */
+static int handle_primary_tcp_pkt(RewriterState *rf,
+                                  Connection *conn,
+                                  Packet *pkt, ConnectionKey *key)
+{
+    struct tcp_hdr *tcp_pkt;
+
+    tcp_pkt = (struct tcp_hdr *)pkt->transport_header;
+    if (trace_event_get_state_backends(TRACE_COLO_FILTER_REWRITER_DEBUG)) {
+        trace_colo_filter_rewriter_pkt_info(__func__,
+                    inet_ntoa(pkt->ip->ip_src), inet_ntoa(pkt->ip->ip_dst),
+                    ntohl(tcp_pkt->th_seq), ntohl(tcp_pkt->th_ack),
+                    tcp_pkt->th_flags);
+        trace_colo_filter_rewriter_conn_offset(conn->offset);
+    }
+
+    if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == (TH_ACK | TH_SYN)) &&
+        conn->tcp_state == TCPS_SYN_SENT) {
+        conn->tcp_state = TCPS_ESTABLISHED;
+    }
+
+    if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_SYN)) {
+        /*
+         * we use this flag update offset func
+         * run once in independent tcp connection
+         */
+        conn->tcp_state = TCPS_SYN_RECEIVED;
+    }
+
+    if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_ACK)) {
+        if (conn->tcp_state == TCPS_SYN_RECEIVED) {
+            /*
+             * offset = secondary_seq - primary seq
+             * ack packet sent by guest from primary node,
+             * so we use th_ack - 1 get primary_seq
+             */
+            conn->offset -= (ntohl(tcp_pkt->th_ack) - 1);
+            conn->tcp_state = TCPS_ESTABLISHED;
+        }
+        if (conn->offset) {
+            /* handle packets to the secondary from the primary */
+            tcp_pkt->th_ack = htonl(ntohl(tcp_pkt->th_ack) + conn->offset);
+
+            net_checksum_calculate((uint8_t *)pkt->data + pkt->vnet_hdr_len,
+                                   pkt->size - pkt->vnet_hdr_len);
+        }
+
+        /*
+         * Passive close step 3
+         */
+        if ((conn->tcp_state == TCPS_LAST_ACK) &&
+            (ntohl(tcp_pkt->th_ack) == (conn->fin_ack_seq + 1))) {
+            conn->tcp_state = TCPS_CLOSED;
+            g_hash_table_remove(rf->connection_track_table, key);
+        }
+    }
+
+    if ((tcp_pkt->th_flags & TH_FIN) == TH_FIN) {
+        /*
+         * Passive close.
+         * Step 1:
+         * The *server* side of this connect is VM, *client* tries to close
+         * the connection. We will into CLOSE_WAIT status.
+         *
+         * Step 2:
+         * In this step we will into LAST_ACK status.
+         *
+         * We got 'fin=1, ack=1' packet from server side, we need to
+         * record the seq of 'fin=1, ack=1' packet.
+         *
+         * Step 3:
+         * We got 'ack=1' packets from client side, it acks 'fin=1, ack=1'
+         * packet from server side. From this point, we can ensure that there
+         * will be no packets in the connection, except that, some errors
+         * happen between the path of 'filter object' and vNIC, if this rare
+         * case really happen, we can still create a new connection,
+         * So it is safe to remove the connection from connection_track_table.
+         *
+         */
+        if (conn->tcp_state == TCPS_ESTABLISHED) {
+            conn->tcp_state = TCPS_CLOSE_WAIT;
+        }
+
+        /*
+         * Active close step 2.
+         */
+        if (conn->tcp_state == TCPS_FIN_WAIT_1) {
+            /*
+             * For simplify implementation, we needn't wait 2MSL time
+             * in filter rewriter. Because guest kernel will track the
+             * TCP status and wait 2MSL time, if client resend the FIN
+             * packet, guest will apply the last ACK too.
+             * So, we skip the TCPS_TIME_WAIT state here and go straight
+             * to TCPS_CLOSED state.
+             */
+            conn->tcp_state = TCPS_CLOSED;
+            g_hash_table_remove(rf->connection_track_table, key);
+        }
+    }
+
+    return 0;
+}
+
+/* handle tcp packet from secondary guest */
+static int handle_secondary_tcp_pkt(RewriterState *rf,
+                                    Connection *conn,
+                                    Packet *pkt, ConnectionKey *key)
+{
+    struct tcp_hdr *tcp_pkt;
+
+    tcp_pkt = (struct tcp_hdr *)pkt->transport_header;
+
+    if (trace_event_get_state_backends(TRACE_COLO_FILTER_REWRITER_DEBUG)) {
+        trace_colo_filter_rewriter_pkt_info(__func__,
+                    inet_ntoa(pkt->ip->ip_src), inet_ntoa(pkt->ip->ip_dst),
+                    ntohl(tcp_pkt->th_seq), ntohl(tcp_pkt->th_ack),
+                    tcp_pkt->th_flags);
+        trace_colo_filter_rewriter_conn_offset(conn->offset);
+    }
+
+    if (conn->tcp_state == TCPS_SYN_RECEIVED &&
+        ((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == (TH_ACK | TH_SYN))) {
+        /*
+         * save offset = secondary_seq and then
+         * in handle_primary_tcp_pkt make offset
+         * = secondary_seq - primary_seq
+         */
+        conn->offset = ntohl(tcp_pkt->th_seq);
+    }
+
+    /* VM active connect */
+    if (conn->tcp_state == TCPS_CLOSED &&
+        ((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_SYN)) {
+        conn->tcp_state = TCPS_SYN_SENT;
+    }
+
+    if ((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_ACK) {
+        /* Only need to adjust seq while offset is Non-zero */
+        if (conn->offset) {
+            /* handle packets to the primary from the secondary*/
+            tcp_pkt->th_seq = htonl(ntohl(tcp_pkt->th_seq) - conn->offset);
+
+            net_checksum_calculate((uint8_t *)pkt->data + pkt->vnet_hdr_len,
+                                   pkt->size - pkt->vnet_hdr_len);
+        }
+    }
+
+    /*
+     * Passive close step 2:
+     */
+    if (conn->tcp_state == TCPS_CLOSE_WAIT &&
+        (tcp_pkt->th_flags & (TH_ACK | TH_FIN)) == (TH_ACK | TH_FIN)) {
+        conn->fin_ack_seq = ntohl(tcp_pkt->th_seq);
+        conn->tcp_state = TCPS_LAST_ACK;
+    }
+
+    /*
+     * Active close
+     *
+     * Step 1:
+     * The *server* side of this connect is VM, *server* tries to close
+     * the connection.
+     *
+     * Step 2:
+     * We will into CLOSE_WAIT status.
+     * We simplify the TCPS_FIN_WAIT_2, TCPS_TIME_WAIT and
+     * CLOSING status.
+     */
+    if (conn->tcp_state == TCPS_ESTABLISHED &&
+        (tcp_pkt->th_flags & (TH_ACK | TH_FIN)) == TH_FIN) {
+        conn->tcp_state = TCPS_FIN_WAIT_1;
+    }
+
+    return 0;
+}
+
+static ssize_t colo_rewriter_receive_iov(NetFilterState *nf,
+                                         NetClientState *sender,
+                                         unsigned flags,
+                                         const struct iovec *iov,
+                                         int iovcnt,
+                                         NetPacketSent *sent_cb)
+{
+    RewriterState *s = FILTER_COLO_REWRITER(nf);
+    Connection *conn;
+    ConnectionKey key;
+    Packet *pkt;
+    ssize_t size = iov_size(iov, iovcnt);
+    ssize_t vnet_hdr_len = 0;
+    char *buf = g_malloc0(size);
+
+    iov_to_buf(iov, iovcnt, 0, buf, size);
+
+    if (s->vnet_hdr) {
+        vnet_hdr_len = nf->netdev->vnet_hdr_len;
+    }
+
+    pkt = packet_new(buf, size, vnet_hdr_len);
+    g_free(buf);
+
+    /*
+     * if we get tcp packet
+     * we will rewrite it to make secondary guest's
+     * connection established successfully
+     */
+    if (pkt && is_tcp_packet(pkt)) {
+
+        fill_connection_key(pkt, &key);
+
+        if (sender == nf->netdev) {
+            /*
+             * We need make tcp TX and RX packet
+             * into one connection.
+             */
+            reverse_connection_key(&key);
+        }
+
+        /* After failover we needn't change new TCP packet */
+        if (s->failover_mode &&
+            !connection_has_tracked(s->connection_track_table, &key)) {
+            goto out;
+        }
+
+        conn = connection_get(s->connection_track_table,
+                              &key,
+                              NULL);
+
+        if (sender == nf->netdev) {
+            /* NET_FILTER_DIRECTION_TX */
+            if (!handle_primary_tcp_pkt(s, conn, pkt, &key)) {
+                qemu_net_queue_send(s->incoming_queue, sender, 0,
+                (const uint8_t *)pkt->data, pkt->size, NULL);
+                packet_destroy(pkt, NULL);
+                pkt = NULL;
+                /*
+                 * We block the packet here,after rewrite pkt
+                 * and will send it
+                 */
+                return 1;
+            }
+        } else {
+            /* NET_FILTER_DIRECTION_RX */
+            if (!handle_secondary_tcp_pkt(s, conn, pkt, &key)) {
+                qemu_net_queue_send(s->incoming_queue, sender, 0,
+                (const uint8_t *)pkt->data, pkt->size, NULL);
+                packet_destroy(pkt, NULL);
+                pkt = NULL;
+                /*
+                 * We block the packet here,after rewrite pkt
+                 * and will send it
+                 */
+                return 1;
+            }
+        }
+    }
+
+out:
+    packet_destroy(pkt, NULL);
+    pkt = NULL;
+    return 0;
+}
+
+static void reset_seq_offset(gpointer key, gpointer value, gpointer user_data)
+{
+    Connection *conn = (Connection *)value;
+
+    conn->offset = 0;
+}
+
+static gboolean offset_is_nonzero(gpointer key,
+                                  gpointer value,
+                                  gpointer user_data)
+{
+    Connection *conn = (Connection *)value;
+
+    return conn->offset ? true : false;
+}
+
+static void colo_rewriter_handle_event(NetFilterState *nf, int event,
+                                       Error **errp)
+{
+    RewriterState *rs = FILTER_COLO_REWRITER(nf);
+
+    switch (event) {
+    case COLO_EVENT_CHECKPOINT:
+        g_hash_table_foreach(rs->connection_track_table,
+                            reset_seq_offset, NULL);
+        break;
+    case COLO_EVENT_FAILOVER:
+        if (!g_hash_table_find(rs->connection_track_table,
+                              offset_is_nonzero, NULL)) {
+            filter_rewriter_failover_mode(rs);
+        }
+        break;
+    default:
+        break;
+    }
+}
+
+static void colo_rewriter_cleanup(NetFilterState *nf)
+{
+    RewriterState *s = FILTER_COLO_REWRITER(nf);
+
+    /* flush packets */
+    if (s->incoming_queue) {
+        filter_rewriter_flush(nf);
+        g_free(s->incoming_queue);
+    }
+}
+
+static void colo_rewriter_setup(NetFilterState *nf, Error **errp)
+{
+    RewriterState *s = FILTER_COLO_REWRITER(nf);
+
+    s->connection_track_table = g_hash_table_new_full(connection_key_hash,
+                                                      connection_key_equal,
+                                                      g_free,
+                                                      connection_destroy);
+    s->incoming_queue = qemu_new_net_queue(qemu_netfilter_pass_to_next, nf);
+}
+
+static bool filter_rewriter_get_vnet_hdr(Object *obj, Error **errp)
+{
+    RewriterState *s = FILTER_COLO_REWRITER(obj);
+
+    return s->vnet_hdr;
+}
+
+static void filter_rewriter_set_vnet_hdr(Object *obj,
+                                         bool value,
+                                         Error **errp)
+{
+    RewriterState *s = FILTER_COLO_REWRITER(obj);
+
+    s->vnet_hdr = value;
+}
+
+static void filter_rewriter_init(Object *obj)
+{
+    RewriterState *s = FILTER_COLO_REWRITER(obj);
+
+    s->vnet_hdr = false;
+    s->failover_mode = FAILOVER_MODE_OFF;
+    object_property_add_bool(obj, "vnet_hdr_support",
+                             filter_rewriter_get_vnet_hdr,
+                             filter_rewriter_set_vnet_hdr, NULL);
+}
+
+static void colo_rewriter_class_init(ObjectClass *oc, void *data)
+{
+    NetFilterClass *nfc = NETFILTER_CLASS(oc);
+
+    nfc->setup = colo_rewriter_setup;
+    nfc->cleanup = colo_rewriter_cleanup;
+    nfc->receive_iov = colo_rewriter_receive_iov;
+    nfc->handle_event = colo_rewriter_handle_event;
+}
+
+static const TypeInfo colo_rewriter_info = {
+    .name = TYPE_FILTER_REWRITER,
+    .parent = TYPE_NETFILTER,
+    .class_init = colo_rewriter_class_init,
+    .instance_init = filter_rewriter_init,
+    .instance_size = sizeof(RewriterState),
+};
+
+static void register_types(void)
+{
+    type_register_static(&colo_rewriter_info);
+}
+
+type_init(register_types);
diff --git a/net/filter.c b/net/filter.c
new file mode 100644
index 000000000..4b932e79f
--- /dev/null
+++ b/net/filter.c
@@ -0,0 +1,292 @@
+/*
+ * Copyright (c) 2015 FUJITSU LIMITED
+ * Author: Yang Hongyang <yanghy@cn.fujitsu.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qapi/qmp/qerror.h"
+#include "qemu/error-report.h"
+
+#include "net/filter.h"
+#include "net/net.h"
+#include "net/vhost_net.h"
+#include "qom/object_interfaces.h"
+#include "qemu/iov.h"
+#include "qemu/module.h"
+#include "net/colo.h"
+#include "migration/colo.h"
+
+static inline bool qemu_can_skip_netfilter(NetFilterState *nf)
+{
+    return !nf->on;
+}
+
+ssize_t qemu_netfilter_receive(NetFilterState *nf,
+                               NetFilterDirection direction,
+                               NetClientState *sender,
+                               unsigned flags,
+                               const struct iovec *iov,
+                               int iovcnt,
+                               NetPacketSent *sent_cb)
+{
+    if (qemu_can_skip_netfilter(nf)) {
+        return 0;
+    }
+    if (nf->direction == direction ||
+        nf->direction == NET_FILTER_DIRECTION_ALL) {
+        return NETFILTER_GET_CLASS(OBJECT(nf))->receive_iov(
+                                   nf, sender, flags, iov, iovcnt, sent_cb);
+    }
+
+    return 0;
+}
+
+static NetFilterState *netfilter_next(NetFilterState *nf,
+                                      NetFilterDirection dir)
+{
+    NetFilterState *next;
+
+    if (dir == NET_FILTER_DIRECTION_TX) {
+        /* forward walk through filters */
+        next = QTAILQ_NEXT(nf, next);
+    } else {
+        /* reverse order */
+        next = QTAILQ_PREV(nf, next);
+    }
+
+    return next;
+}
+
+ssize_t qemu_netfilter_pass_to_next(NetClientState *sender,
+                                    unsigned flags,
+                                    const struct iovec *iov,
+                                    int iovcnt,
+                                    void *opaque)
+{
+    int ret = 0;
+    int direction;
+    NetFilterState *nf = opaque;
+    NetFilterState *next = NULL;
+
+    if (!sender || !sender->peer) {
+        /* no receiver, or sender been deleted, no need to pass it further */
+        goto out;
+    }
+
+    if (nf->direction == NET_FILTER_DIRECTION_ALL) {
+        if (sender == nf->netdev) {
+            /* This packet is sent by netdev itself */
+            direction = NET_FILTER_DIRECTION_TX;
+        } else {
+            direction = NET_FILTER_DIRECTION_RX;
+        }
+    } else {
+        direction = nf->direction;
+    }
+
+    next = netfilter_next(nf, direction);
+    while (next) {
+        /*
+         * if qemu_netfilter_pass_to_next been called, means that
+         * the packet has been hold by filter and has already retured size
+         * to the sender, so sent_cb shouldn't be called later, just
+         * pass NULL to next.
+         */
+        ret = qemu_netfilter_receive(next, direction, sender, flags, iov,
+                                     iovcnt, NULL);
+        if (ret) {
+            return ret;
+        }
+        next = netfilter_next(next, direction);
+    }
+
+    /*
+     * We have gone through all filters, pass it to receiver.
+     * Do the valid check again incase sender or receiver been
+     * deleted while we go through filters.
+     */
+    if (sender && sender->peer) {
+        qemu_net_queue_send_iov(sender->peer->incoming_queue,
+                                sender, flags, iov, iovcnt, NULL);
+    }
+
+out:
+    /* no receiver, or sender been deleted */
+    return iov_size(iov, iovcnt);
+}
+
+static char *netfilter_get_netdev_id(Object *obj, Error **errp)
+{
+    NetFilterState *nf = NETFILTER(obj);
+
+    return g_strdup(nf->netdev_id);
+}
+
+static void netfilter_set_netdev_id(Object *obj, const char *str, Error **errp)
+{
+    NetFilterState *nf = NETFILTER(obj);
+
+    nf->netdev_id = g_strdup(str);
+}
+
+static int netfilter_get_direction(Object *obj, Error **errp G_GNUC_UNUSED)
+{
+    NetFilterState *nf = NETFILTER(obj);
+    return nf->direction;
+}
+
+static void netfilter_set_direction(Object *obj, int direction, Error **errp)
+{
+    NetFilterState *nf = NETFILTER(obj);
+    nf->direction = direction;
+}
+
+static char *netfilter_get_status(Object *obj, Error **errp)
+{
+    NetFilterState *nf = NETFILTER(obj);
+
+    return nf->on ? g_strdup("on") : g_strdup("off");
+}
+
+static void netfilter_set_status(Object *obj, const char *str, Error **errp)
+{
+    NetFilterState *nf = NETFILTER(obj);
+    NetFilterClass *nfc = NETFILTER_GET_CLASS(obj);
+
+    if (strcmp(str, "on") && strcmp(str, "off")) {
+        error_setg(errp, "Invalid value for netfilter status, "
+                         "should be 'on' or 'off'");
+        return;
+    }
+    if (nf->on == !strcmp(str, "on")) {
+        return;
+    }
+    nf->on = !nf->on;
+    if (nf->netdev && nfc->status_changed) {
+        nfc->status_changed(nf, errp);
+    }
+}
+
+static void netfilter_init(Object *obj)
+{
+    NetFilterState *nf = NETFILTER(obj);
+
+    nf->on = true;
+
+    object_property_add_str(obj, "netdev",
+                            netfilter_get_netdev_id, netfilter_set_netdev_id,
+                            NULL);
+    object_property_add_enum(obj, "queue", "NetFilterDirection",
+                             &NetFilterDirection_lookup,
+                             netfilter_get_direction, netfilter_set_direction,
+                             NULL);
+    object_property_add_str(obj, "status",
+                            netfilter_get_status, netfilter_set_status,
+                            NULL);
+}
+
+static void netfilter_complete(UserCreatable *uc, Error **errp)
+{
+    NetFilterState *nf = NETFILTER(uc);
+    NetClientState *ncs[MAX_QUEUE_NUM];
+    NetFilterClass *nfc = NETFILTER_GET_CLASS(uc);
+    int queues;
+    Error *local_err = NULL;
+
+    if (!nf->netdev_id) {
+        error_setg(errp, "Parameter 'netdev' is required");
+        return;
+    }
+
+    queues = qemu_find_net_clients_except(nf->netdev_id, ncs,
+                                          NET_CLIENT_DRIVER_NIC,
+                                          MAX_QUEUE_NUM);
+    if (queues < 1) {
+        error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "netdev",
+                   "a network backend id");
+        return;
+    } else if (queues > 1) {
+        error_setg(errp, "multiqueue is not supported");
+        return;
+    }
+
+    if (get_vhost_net(ncs[0])) {
+        error_setg(errp, "Vhost is not supported");
+        return;
+    }
+
+    nf->netdev = ncs[0];
+
+    if (nfc->setup) {
+        nfc->setup(nf, &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            return;
+        }
+    }
+    QTAILQ_INSERT_TAIL(&nf->netdev->filters, nf, next);
+}
+
+static void netfilter_finalize(Object *obj)
+{
+    NetFilterState *nf = NETFILTER(obj);
+    NetFilterClass *nfc = NETFILTER_GET_CLASS(obj);
+
+    if (nfc->cleanup) {
+        nfc->cleanup(nf);
+    }
+
+    if (nf->netdev && !QTAILQ_EMPTY(&nf->netdev->filters) &&
+        QTAILQ_IN_USE(nf, next)) {
+        QTAILQ_REMOVE(&nf->netdev->filters, nf, next);
+    }
+    g_free(nf->netdev_id);
+}
+
+static void default_handle_event(NetFilterState *nf, int event, Error **errp)
+{
+    switch (event) {
+    case COLO_EVENT_CHECKPOINT:
+        break;
+    case COLO_EVENT_FAILOVER:
+        object_property_set_str(OBJECT(nf), "off", "status", errp);
+        break;
+    default:
+        break;
+    }
+}
+
+static void netfilter_class_init(ObjectClass *oc, void *data)
+{
+    UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
+    NetFilterClass *nfc = NETFILTER_CLASS(oc);
+
+    ucc->complete = netfilter_complete;
+    nfc->handle_event = default_handle_event;
+}
+
+static const TypeInfo netfilter_info = {
+    .name = TYPE_NETFILTER,
+    .parent = TYPE_OBJECT,
+    .abstract = true,
+    .class_size = sizeof(NetFilterClass),
+    .class_init = netfilter_class_init,
+    .instance_size = sizeof(NetFilterState),
+    .instance_init = netfilter_init,
+    .instance_finalize = netfilter_finalize,
+    .interfaces = (InterfaceInfo[]) {
+        { TYPE_USER_CREATABLE },
+        { }
+    }
+};
+
+static void register_types(void)
+{
+    type_register_static(&netfilter_info);
+}
+
+type_init(register_types);
diff --git a/net/hub.c b/net/hub.c
index df32074de..5795a678e 100644
--- a/net/hub.c
+++ b/net/hub.c
@@ -12,16 +12,19 @@
  *
  */
 
+#include "qemu/osdep.h"
+#include "qapi/error.h"
 #include "monitor/monitor.h"
 #include "net/net.h"
 #include "clients.h"
 #include "hub.h"
 #include "qemu/iov.h"
+#include "qemu/error-report.h"
+#include "sysemu/qtest.h"
 
 /*
  * A hub broadcasts incoming packets to all its ports except the source port.
- * Hubs can be used to provide independent network segments, also confusingly
- * named the QEMU 'vlan' feature.
+ * Hubs can be used to provide independent emulated network segments.
  */
 
 typedef struct NetHub NetHub;
@@ -130,7 +133,7 @@ static void net_hub_port_cleanup(NetClientState *nc)
 }
 
 static NetClientInfo net_hub_port_info = {
-    .type = NET_CLIENT_OPTIONS_KIND_HUBPORT,
+    .type = NET_CLIENT_DRIVER_HUBPORT,
     .size = sizeof(NetHubPort),
     .can_receive = net_hub_port_can_receive,
     .receive = net_hub_port_receive,
@@ -138,7 +141,8 @@ static NetClientInfo net_hub_port_info = {
     .cleanup = net_hub_port_cleanup,
 };
 
-static NetHubPort *net_hub_port_new(NetHub *hub, const char *name)
+static NetHubPort *net_hub_port_new(NetHub *hub, const char *name,
+                                    NetClientState *hubpeer)
 {
     NetClientState *nc;
     NetHubPort *port;
@@ -151,7 +155,7 @@ static NetHubPort *net_hub_port_new(NetHub *hub, const char *name)
         name = default_name;
     }
 
-    nc = qemu_new_net_client(&net_hub_port_info, NULL, "hub", name);
+    nc = qemu_new_net_client(&net_hub_port_info, hubpeer, "hub", name);
     port = DO_UPCAST(NetHubPort, nc, nc);
     port->id = id;
     port->hub = hub;
@@ -163,11 +167,14 @@ static NetHubPort *net_hub_port_new(NetHub *hub, const char *name)
 
 /**
  * Create a port on a given hub
+ * @hub_id: Number of the hub
  * @name: Net client name or NULL for default name.
+ * @hubpeer: Peer to use (if "netdev=id" has been specified)
  *
  * If there is no existing hub with the given id then a new hub is created.
  */
-NetClientState *net_hub_add_port(int hub_id, const char *name)
+NetClientState *net_hub_add_port(int hub_id, const char *name,
+                                 NetClientState *hubpeer)
 {
     NetHub *hub;
     NetHubPort *port;
@@ -182,7 +189,7 @@ NetClientState *net_hub_add_port(int hub_id, const char *name)
         hub = net_hub_new(hub_id);
     }
 
-    port = net_hub_port_new(hub, name);
+    port = net_hub_port_new(hub, name, hubpeer);
     return &port->nc;
 }
 
@@ -230,7 +237,7 @@ NetClientState *net_hub_port_find(int hub_id)
         }
     }
 
-    nc = net_hub_add_port(hub_id, NULL);
+    nc = net_hub_add_port(hub_id, NULL, NULL);
     return nc;
 }
 
@@ -245,9 +252,12 @@ void net_hub_info(Monitor *mon)
     QLIST_FOREACH(hub, &hubs, next) {
         monitor_printf(mon, "hub %d\n", hub->id);
         QLIST_FOREACH(port, &hub->ports, next) {
+            monitor_printf(mon, " \\ %s", port->nc.name);
             if (port->nc.peer) {
-                monitor_printf(mon, " \\ ");
+                monitor_printf(mon, ": ");
                 print_net_client(mon, port->nc.peer);
+            } else {
+                monitor_printf(mon, "\n");
             }
         }
     }
@@ -262,10 +272,10 @@ int net_hub_id_for_client(NetClientState *nc, int *id)
 {
     NetHubPort *port;
 
-    if (nc->info->type == NET_CLIENT_OPTIONS_KIND_HUBPORT) {
+    if (nc->info->type == NET_CLIENT_DRIVER_HUBPORT) {
         port = DO_UPCAST(NetHubPort, nc, nc);
     } else if (nc->peer != NULL && nc->peer->info->type ==
-            NET_CLIENT_OPTIONS_KIND_HUBPORT) {
+            NET_CLIENT_DRIVER_HUBPORT) {
         port = DO_UPCAST(NetHubPort, nc, nc->peer);
     } else {
         return -ENOENT;
@@ -277,20 +287,26 @@ int net_hub_id_for_client(NetClientState *nc, int *id)
     return 0;
 }
 
-int net_init_hubport(const NetClientOptions *opts, const char *name,
-                     NetClientState *peer)
+int net_init_hubport(const Netdev *netdev, const char *name,
+                     NetClientState *peer, Error **errp)
 {
     const NetdevHubPortOptions *hubport;
+    NetClientState *hubpeer = NULL;
 
-    assert(opts->kind == NET_CLIENT_OPTIONS_KIND_HUBPORT);
-    hubport = opts->hubport;
+    assert(netdev->type == NET_CLIENT_DRIVER_HUBPORT);
+    assert(!peer);
+    hubport = &netdev->u.hubport;
 
-    /* Treat hub port like a backend, NIC must be the one to peer */
-    if (peer) {
-        return -EINVAL;
+    if (hubport->has_netdev) {
+        hubpeer = qemu_find_netdev(hubport->netdev);
+        if (!hubpeer) {
+            error_setg(errp, "netdev '%s' not found", hubport->netdev);
+            return -1;
+        }
     }
 
-    net_hub_add_port(hubport->hubid, name);
+    net_hub_add_port(hubport->hubid, name, hubpeer);
+
     return 0;
 }
 
@@ -309,19 +325,19 @@ void net_hub_check_clients(void)
         QLIST_FOREACH(port, &hub->ports, next) {
             peer = port->nc.peer;
             if (!peer) {
-                fprintf(stderr, "Warning: hub port %s has no peer\n",
-                        port->nc.name);
+                warn_report("hub port %s has no peer", port->nc.name);
                 continue;
             }
 
             switch (peer->info->type) {
-            case NET_CLIENT_OPTIONS_KIND_NIC:
+            case NET_CLIENT_DRIVER_NIC:
                 has_nic = 1;
                 break;
-            case NET_CLIENT_OPTIONS_KIND_USER:
-            case NET_CLIENT_OPTIONS_KIND_TAP:
-            case NET_CLIENT_OPTIONS_KIND_SOCKET:
-            case NET_CLIENT_OPTIONS_KIND_VDE:
+            case NET_CLIENT_DRIVER_USER:
+            case NET_CLIENT_DRIVER_TAP:
+            case NET_CLIENT_DRIVER_SOCKET:
+            case NET_CLIENT_DRIVER_VDE:
+            case NET_CLIENT_DRIVER_VHOST_USER:
                 has_host_dev = 1;
                 break;
             default:
@@ -329,12 +345,10 @@ void net_hub_check_clients(void)
             }
         }
         if (has_host_dev && !has_nic) {
-            fprintf(stderr, "Warning: vlan %d with no nics\n", hub->id);
+            warn_report("hub %d with no nics", hub->id);
         }
-        if (has_nic && !has_host_dev) {
-            fprintf(stderr,
-                    "Warning: vlan %d is not connected to host network\n",
-                    hub->id);
+        if (has_nic && !has_host_dev && !qtest_enabled()) {
+            warn_report("hub %d is not connected to host network", hub->id);
         }
     }
 }
@@ -347,7 +361,7 @@ bool net_hub_flush(NetClientState *nc)
 
     QLIST_FOREACH(port, &source_port->hub->ports, next) {
         if (port != source_port) {
-            ret += qemu_net_queue_flush(port->nc.send_queue);
+            ret += qemu_net_queue_flush(port->nc.incoming_queue);
         }
     }
     return ret ? true : false;
diff --git a/net/hub.h b/net/hub.h
index a625effe0..66d3322fa 100644
--- a/net/hub.h
+++ b/net/hub.h
@@ -15,9 +15,9 @@
 #ifndef NET_HUB_H
 #define NET_HUB_H
 
-#include "qemu-common.h"
 
-NetClientState *net_hub_add_port(int hub_id, const char *name);
+NetClientState *net_hub_add_port(int hub_id, const char *name,
+                                 NetClientState *hubpeer);
 NetClientState *net_hub_find_client_by_name(int hub_id, const char *name);
 void net_hub_info(Monitor *mon);
 void net_hub_check_clients(void);
diff --git a/net/l2tpv3.c b/net/l2tpv3.c
new file mode 100644
index 000000000..55fea17c0
--- /dev/null
+++ b/net/l2tpv3.c
@@ -0,0 +1,742 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ * Copyright (c) 2012-2014 Cisco Systems
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include <linux/ip.h>
+#include <netdb.h>
+#include "net/net.h"
+#include "clients.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+#include "qemu/option.h"
+#include "qemu/sockets.h"
+#include "qemu/iov.h"
+#include "qemu/main-loop.h"
+
+
+/* The buffer size needs to be investigated for optimum numbers and
+ * optimum means of paging in on different systems. This size is
+ * chosen to be sufficient to accommodate one packet with some headers
+ */
+
+#define BUFFER_ALIGN sysconf(_SC_PAGESIZE)
+#define BUFFER_SIZE 2048
+#define IOVSIZE 2
+#define MAX_L2TPV3_MSGCNT 64
+#define MAX_L2TPV3_IOVCNT (MAX_L2TPV3_MSGCNT * IOVSIZE)
+
+/* Header set to 0x30000 signifies a data packet */
+
+#define L2TPV3_DATA_PACKET 0x30000
+
+/* IANA-assigned IP protocol ID for L2TPv3 */
+
+#ifndef IPPROTO_L2TP
+#define IPPROTO_L2TP 0x73
+#endif
+
+typedef struct NetL2TPV3State {
+    NetClientState nc;
+    int fd;
+
+    /*
+     * these are used for xmit - that happens packet a time
+     * and for first sign of life packet (easier to parse that once)
+     */
+
+    uint8_t *header_buf;
+    struct iovec *vec;
+
+    /*
+     * these are used for receive - try to "eat" up to 32 packets at a time
+     */
+
+    struct mmsghdr *msgvec;
+
+    /*
+     * peer address
+     */
+
+    struct sockaddr_storage *dgram_dst;
+    uint32_t dst_size;
+
+    /*
+     * L2TPv3 parameters
+     */
+
+    uint64_t rx_cookie;
+    uint64_t tx_cookie;
+    uint32_t rx_session;
+    uint32_t tx_session;
+    uint32_t header_size;
+    uint32_t counter;
+
+    /*
+    * DOS avoidance in error handling
+    */
+
+    bool header_mismatch;
+
+    /*
+     * Ring buffer handling
+     */
+
+    int queue_head;
+    int queue_tail;
+    int queue_depth;
+
+    /*
+     * Precomputed offsets
+     */
+
+    uint32_t offset;
+    uint32_t cookie_offset;
+    uint32_t counter_offset;
+    uint32_t session_offset;
+
+    /* Poll Control */
+
+    bool read_poll;
+    bool write_poll;
+
+    /* Flags */
+
+    bool ipv6;
+    bool udp;
+    bool has_counter;
+    bool pin_counter;
+    bool cookie;
+    bool cookie_is_64;
+
+} NetL2TPV3State;
+
+static void net_l2tpv3_send(void *opaque);
+static void l2tpv3_writable(void *opaque);
+
+static void l2tpv3_update_fd_handler(NetL2TPV3State *s)
+{
+    qemu_set_fd_handler(s->fd,
+                        s->read_poll ? net_l2tpv3_send : NULL,
+                        s->write_poll ? l2tpv3_writable : NULL,
+                        s);
+}
+
+static void l2tpv3_read_poll(NetL2TPV3State *s, bool enable)
+{
+    if (s->read_poll != enable) {
+        s->read_poll = enable;
+        l2tpv3_update_fd_handler(s);
+    }
+}
+
+static void l2tpv3_write_poll(NetL2TPV3State *s, bool enable)
+{
+    if (s->write_poll != enable) {
+        s->write_poll = enable;
+        l2tpv3_update_fd_handler(s);
+    }
+}
+
+static void l2tpv3_writable(void *opaque)
+{
+    NetL2TPV3State *s = opaque;
+    l2tpv3_write_poll(s, false);
+    qemu_flush_queued_packets(&s->nc);
+}
+
+static void l2tpv3_send_completed(NetClientState *nc, ssize_t len)
+{
+    NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc);
+    l2tpv3_read_poll(s, true);
+}
+
+static void l2tpv3_poll(NetClientState *nc, bool enable)
+{
+    NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc);
+    l2tpv3_write_poll(s, enable);
+    l2tpv3_read_poll(s, enable);
+}
+
+static void l2tpv3_form_header(NetL2TPV3State *s)
+{
+    uint32_t *counter;
+
+    if (s->udp) {
+        stl_be_p((uint32_t *) s->header_buf, L2TPV3_DATA_PACKET);
+    }
+    stl_be_p(
+            (uint32_t *) (s->header_buf + s->session_offset),
+            s->tx_session
+        );
+    if (s->cookie) {
+        if (s->cookie_is_64) {
+            stq_be_p(
+                (uint64_t *)(s->header_buf + s->cookie_offset),
+                s->tx_cookie
+            );
+        } else {
+            stl_be_p(
+                (uint32_t *) (s->header_buf + s->cookie_offset),
+                s->tx_cookie
+            );
+        }
+    }
+    if (s->has_counter) {
+        counter = (uint32_t *)(s->header_buf + s->counter_offset);
+        if (s->pin_counter) {
+            *counter = 0;
+        } else {
+            stl_be_p(counter, ++s->counter);
+        }
+    }
+}
+
+static ssize_t net_l2tpv3_receive_dgram_iov(NetClientState *nc,
+                    const struct iovec *iov,
+                    int iovcnt)
+{
+    NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc);
+
+    struct msghdr message;
+    int ret;
+
+    if (iovcnt > MAX_L2TPV3_IOVCNT - 1) {
+        error_report(
+            "iovec too long %d > %d, change l2tpv3.h",
+            iovcnt, MAX_L2TPV3_IOVCNT
+        );
+        return -1;
+    }
+    l2tpv3_form_header(s);
+    memcpy(s->vec + 1, iov, iovcnt * sizeof(struct iovec));
+    s->vec->iov_base = s->header_buf;
+    s->vec->iov_len = s->offset;
+    message.msg_name = s->dgram_dst;
+    message.msg_namelen = s->dst_size;
+    message.msg_iov = s->vec;
+    message.msg_iovlen = iovcnt + 1;
+    message.msg_control = NULL;
+    message.msg_controllen = 0;
+    message.msg_flags = 0;
+    do {
+        ret = sendmsg(s->fd, &message, 0);
+    } while ((ret == -1) && (errno == EINTR));
+    if (ret > 0) {
+        ret -= s->offset;
+    } else if (ret == 0) {
+        /* belt and braces - should not occur on DGRAM
+        * we should get an error and never a 0 send
+        */
+        ret = iov_size(iov, iovcnt);
+    } else {
+        /* signal upper layer that socket buffer is full */
+        ret = -errno;
+        if (ret == -EAGAIN || ret == -ENOBUFS) {
+            l2tpv3_write_poll(s, true);
+            ret = 0;
+        }
+    }
+    return ret;
+}
+
+static ssize_t net_l2tpv3_receive_dgram(NetClientState *nc,
+                    const uint8_t *buf,
+                    size_t size)
+{
+    NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc);
+
+    struct iovec *vec;
+    struct msghdr message;
+    ssize_t ret = 0;
+
+    l2tpv3_form_header(s);
+    vec = s->vec;
+    vec->iov_base = s->header_buf;
+    vec->iov_len = s->offset;
+    vec++;
+    vec->iov_base = (void *) buf;
+    vec->iov_len = size;
+    message.msg_name = s->dgram_dst;
+    message.msg_namelen = s->dst_size;
+    message.msg_iov = s->vec;
+    message.msg_iovlen = 2;
+    message.msg_control = NULL;
+    message.msg_controllen = 0;
+    message.msg_flags = 0;
+    do {
+        ret = sendmsg(s->fd, &message, 0);
+    } while ((ret == -1) && (errno == EINTR));
+    if (ret > 0) {
+        ret -= s->offset;
+    } else if (ret == 0) {
+        /* belt and braces - should not occur on DGRAM
+        * we should get an error and never a 0 send
+        */
+        ret = size;
+    } else {
+        ret = -errno;
+        if (ret == -EAGAIN || ret == -ENOBUFS) {
+            /* signal upper layer that socket buffer is full */
+            l2tpv3_write_poll(s, true);
+            ret = 0;
+        }
+    }
+    return ret;
+}
+
+static int l2tpv3_verify_header(NetL2TPV3State *s, uint8_t *buf)
+{
+
+    uint32_t *session;
+    uint64_t cookie;
+
+    if ((!s->udp) && (!s->ipv6)) {
+        buf += sizeof(struct iphdr) /* fix for ipv4 raw */;
+    }
+
+    /* we do not do a strict check for "data" packets as per
+    * the RFC spec because the pure IP spec does not have
+    * that anyway.
+    */
+
+    if (s->cookie) {
+        if (s->cookie_is_64) {
+            cookie = ldq_be_p(buf + s->cookie_offset);
+        } else {
+            cookie = ldl_be_p(buf + s->cookie_offset) & 0xffffffffULL;
+        }
+        if (cookie != s->rx_cookie) {
+            if (!s->header_mismatch) {
+                error_report("unknown cookie id");
+            }
+            return -1;
+        }
+    }
+    session = (uint32_t *) (buf + s->session_offset);
+    if (ldl_be_p(session) != s->rx_session) {
+        if (!s->header_mismatch) {
+            error_report("session mismatch");
+        }
+        return -1;
+    }
+    return 0;
+}
+
+static void net_l2tpv3_process_queue(NetL2TPV3State *s)
+{
+    int size = 0;
+    struct iovec *vec;
+    bool bad_read;
+    int data_size;
+    struct mmsghdr *msgvec;
+
+    /* go into ring mode only if there is a "pending" tail */
+    if (s->queue_depth > 0) {
+        do {
+            msgvec = s->msgvec + s->queue_tail;
+            if (msgvec->msg_len > 0) {
+                data_size = msgvec->msg_len - s->header_size;
+                vec = msgvec->msg_hdr.msg_iov;
+                if ((data_size > 0) &&
+                    (l2tpv3_verify_header(s, vec->iov_base) == 0)) {
+                    vec++;
+                    /* Use the legacy delivery for now, we will
+                     * switch to using our own ring as a queueing mechanism
+                     * at a later date
+                     */
+                    size = qemu_send_packet_async(
+                            &s->nc,
+                            vec->iov_base,
+                            data_size,
+                            l2tpv3_send_completed
+                        );
+                    if (size == 0) {
+                        l2tpv3_read_poll(s, false);
+                    }
+                    bad_read = false;
+                } else {
+                    bad_read = true;
+                    if (!s->header_mismatch) {
+                        /* report error only once */
+                        error_report("l2tpv3 header verification failed");
+                        s->header_mismatch = true;
+                    }
+                }
+            } else {
+                bad_read = true;
+            }
+            s->queue_tail = (s->queue_tail + 1) % MAX_L2TPV3_MSGCNT;
+            s->queue_depth--;
+        } while (
+                (s->queue_depth > 0) &&
+                 qemu_can_send_packet(&s->nc) &&
+                ((size > 0) || bad_read)
+            );
+    }
+}
+
+static void net_l2tpv3_send(void *opaque)
+{
+    NetL2TPV3State *s = opaque;
+    int target_count, count;
+    struct mmsghdr *msgvec;
+
+    /* go into ring mode only if there is a "pending" tail */
+
+    if (s->queue_depth) {
+
+        /* The ring buffer we use has variable intake
+         * count of how much we can read varies - adjust accordingly
+         */
+
+        target_count = MAX_L2TPV3_MSGCNT - s->queue_depth;
+
+        /* Ensure we do not overrun the ring when we have
+         * a lot of enqueued packets
+         */
+
+        if (s->queue_head + target_count > MAX_L2TPV3_MSGCNT) {
+            target_count = MAX_L2TPV3_MSGCNT - s->queue_head;
+        }
+    } else {
+
+        /* we do not have any pending packets - we can use
+        * the whole message vector linearly instead of using
+        * it as a ring
+        */
+
+        s->queue_head = 0;
+        s->queue_tail = 0;
+        target_count = MAX_L2TPV3_MSGCNT;
+    }
+
+    msgvec = s->msgvec + s->queue_head;
+    if (target_count > 0) {
+        do {
+            count = recvmmsg(
+                s->fd,
+                msgvec,
+                target_count, MSG_DONTWAIT, NULL);
+        } while ((count == -1) && (errno == EINTR));
+        if (count < 0) {
+            /* Recv error - we still need to flush packets here,
+             * (re)set queue head to current position
+             */
+            count = 0;
+        }
+        s->queue_head = (s->queue_head + count) % MAX_L2TPV3_MSGCNT;
+        s->queue_depth += count;
+    }
+    net_l2tpv3_process_queue(s);
+}
+
+static void destroy_vector(struct mmsghdr *msgvec, int count, int iovcount)
+{
+    int i, j;
+    struct iovec *iov;
+    struct mmsghdr *cleanup = msgvec;
+    if (cleanup) {
+        for (i = 0; i < count; i++) {
+            if (cleanup->msg_hdr.msg_iov) {
+                iov = cleanup->msg_hdr.msg_iov;
+                for (j = 0; j < iovcount; j++) {
+                    g_free(iov->iov_base);
+                    iov++;
+                }
+                g_free(cleanup->msg_hdr.msg_iov);
+            }
+            cleanup++;
+        }
+        g_free(msgvec);
+    }
+}
+
+static struct mmsghdr *build_l2tpv3_vector(NetL2TPV3State *s, int count)
+{
+    int i;
+    struct iovec *iov;
+    struct mmsghdr *msgvec, *result;
+
+    msgvec = g_new(struct mmsghdr, count);
+    result = msgvec;
+    for (i = 0; i < count ; i++) {
+        msgvec->msg_hdr.msg_name = NULL;
+        msgvec->msg_hdr.msg_namelen = 0;
+        iov =  g_new(struct iovec, IOVSIZE);
+        msgvec->msg_hdr.msg_iov = iov;
+        iov->iov_base = g_malloc(s->header_size);
+        iov->iov_len = s->header_size;
+        iov++ ;
+        iov->iov_base = qemu_memalign(BUFFER_ALIGN, BUFFER_SIZE);
+        iov->iov_len = BUFFER_SIZE;
+        msgvec->msg_hdr.msg_iovlen = 2;
+        msgvec->msg_hdr.msg_control = NULL;
+        msgvec->msg_hdr.msg_controllen = 0;
+        msgvec->msg_hdr.msg_flags = 0;
+        msgvec++;
+    }
+    return result;
+}
+
+static void net_l2tpv3_cleanup(NetClientState *nc)
+{
+    NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc);
+    qemu_purge_queued_packets(nc);
+    l2tpv3_read_poll(s, false);
+    l2tpv3_write_poll(s, false);
+    if (s->fd >= 0) {
+        close(s->fd);
+    }
+    destroy_vector(s->msgvec, MAX_L2TPV3_MSGCNT, IOVSIZE);
+    g_free(s->vec);
+    g_free(s->header_buf);
+    g_free(s->dgram_dst);
+}
+
+static NetClientInfo net_l2tpv3_info = {
+    .type = NET_CLIENT_DRIVER_L2TPV3,
+    .size = sizeof(NetL2TPV3State),
+    .receive = net_l2tpv3_receive_dgram,
+    .receive_iov = net_l2tpv3_receive_dgram_iov,
+    .poll = l2tpv3_poll,
+    .cleanup = net_l2tpv3_cleanup,
+};
+
+int net_init_l2tpv3(const Netdev *netdev,
+                    const char *name,
+                    NetClientState *peer, Error **errp)
+{
+    const NetdevL2TPv3Options *l2tpv3;
+    NetL2TPV3State *s;
+    NetClientState *nc;
+    int fd = -1, gairet;
+    struct addrinfo hints;
+    struct addrinfo *result = NULL;
+    char *srcport, *dstport;
+
+    nc = qemu_new_net_client(&net_l2tpv3_info, peer, "l2tpv3", name);
+
+    s = DO_UPCAST(NetL2TPV3State, nc, nc);
+
+    s->queue_head = 0;
+    s->queue_tail = 0;
+    s->header_mismatch = false;
+
+    assert(netdev->type == NET_CLIENT_DRIVER_L2TPV3);
+    l2tpv3 = &netdev->u.l2tpv3;
+
+    if (l2tpv3->has_ipv6 && l2tpv3->ipv6) {
+        s->ipv6 = l2tpv3->ipv6;
+    } else {
+        s->ipv6 = false;
+    }
+
+    if ((l2tpv3->has_offset) && (l2tpv3->offset > 256)) {
+        error_setg(errp, "offset must be less than 256 bytes");
+        goto outerr;
+    }
+
+    if (l2tpv3->has_rxcookie || l2tpv3->has_txcookie) {
+        if (l2tpv3->has_rxcookie && l2tpv3->has_txcookie) {
+            s->cookie = true;
+        } else {
+            error_setg(errp,
+                       "require both 'rxcookie' and 'txcookie' or neither");
+            goto outerr;
+        }
+    } else {
+        s->cookie = false;
+    }
+
+    if (l2tpv3->has_cookie64 || l2tpv3->cookie64) {
+        s->cookie_is_64  = true;
+    } else {
+        s->cookie_is_64  = false;
+    }
+
+    if (l2tpv3->has_udp && l2tpv3->udp) {
+        s->udp = true;
+        if (!(l2tpv3->has_srcport && l2tpv3->has_dstport)) {
+            error_setg(errp, "need both src and dst port for udp");
+            goto outerr;
+        } else {
+            srcport = l2tpv3->srcport;
+            dstport = l2tpv3->dstport;
+        }
+    } else {
+        s->udp = false;
+        srcport = NULL;
+        dstport = NULL;
+    }
+
+
+    s->offset = 4;
+    s->session_offset = 0;
+    s->cookie_offset = 4;
+    s->counter_offset = 4;
+
+    s->tx_session = l2tpv3->txsession;
+    if (l2tpv3->has_rxsession) {
+        s->rx_session = l2tpv3->rxsession;
+    } else {
+        s->rx_session = s->tx_session;
+    }
+
+    if (s->cookie) {
+        s->rx_cookie = l2tpv3->rxcookie;
+        s->tx_cookie = l2tpv3->txcookie;
+        if (s->cookie_is_64 == true) {
+            /* 64 bit cookie */
+            s->offset += 8;
+            s->counter_offset += 8;
+        } else {
+            /* 32 bit cookie */
+            s->offset += 4;
+            s->counter_offset += 4;
+        }
+    }
+
+    memset(&hints, 0, sizeof(hints));
+
+    if (s->ipv6) {
+        hints.ai_family = AF_INET6;
+    } else {
+        hints.ai_family = AF_INET;
+    }
+    if (s->udp) {
+        hints.ai_socktype = SOCK_DGRAM;
+        hints.ai_protocol = 0;
+        s->offset += 4;
+        s->counter_offset += 4;
+        s->session_offset += 4;
+        s->cookie_offset += 4;
+    } else {
+        hints.ai_socktype = SOCK_RAW;
+        hints.ai_protocol = IPPROTO_L2TP;
+    }
+
+    gairet = getaddrinfo(l2tpv3->src, srcport, &hints, &result);
+
+    if ((gairet != 0) || (result == NULL)) {
+        error_setg(errp, "could not resolve src, errno = %s",
+                   gai_strerror(gairet));
+        goto outerr;
+    }
+    fd = socket(result->ai_family, result->ai_socktype, result->ai_protocol);
+    if (fd == -1) {
+        fd = -errno;
+        error_setg(errp, "socket creation failed, errno = %d",
+                   -fd);
+        goto outerr;
+    }
+    if (bind(fd, (struct sockaddr *) result->ai_addr, result->ai_addrlen)) {
+        error_setg(errp, "could not bind socket err=%i", errno);
+        goto outerr;
+    }
+    if (result) {
+        freeaddrinfo(result);
+    }
+
+    memset(&hints, 0, sizeof(hints));
+
+    if (s->ipv6) {
+        hints.ai_family = AF_INET6;
+    } else {
+        hints.ai_family = AF_INET;
+    }
+    if (s->udp) {
+        hints.ai_socktype = SOCK_DGRAM;
+        hints.ai_protocol = 0;
+    } else {
+        hints.ai_socktype = SOCK_RAW;
+        hints.ai_protocol = IPPROTO_L2TP;
+    }
+
+    result = NULL;
+    gairet = getaddrinfo(l2tpv3->dst, dstport, &hints, &result);
+    if ((gairet != 0) || (result == NULL)) {
+        error_setg(errp, "could not resolve dst, error = %s",
+                   gai_strerror(gairet));
+        goto outerr;
+    }
+
+    s->dgram_dst = g_new0(struct sockaddr_storage, 1);
+    memcpy(s->dgram_dst, result->ai_addr, result->ai_addrlen);
+    s->dst_size = result->ai_addrlen;
+
+    if (result) {
+        freeaddrinfo(result);
+    }
+
+    if (l2tpv3->has_counter && l2tpv3->counter) {
+        s->has_counter = true;
+        s->offset += 4;
+    } else {
+        s->has_counter = false;
+    }
+
+    if (l2tpv3->has_pincounter && l2tpv3->pincounter) {
+        s->has_counter = true;  /* pin counter implies that there is counter */
+        s->pin_counter = true;
+    } else {
+        s->pin_counter = false;
+    }
+
+    if (l2tpv3->has_offset) {
+        /* extra offset */
+        s->offset += l2tpv3->offset;
+    }
+
+    if ((s->ipv6) || (s->udp)) {
+        s->header_size = s->offset;
+    } else {
+        s->header_size = s->offset + sizeof(struct iphdr);
+    }
+
+    s->msgvec = build_l2tpv3_vector(s, MAX_L2TPV3_MSGCNT);
+    s->vec = g_new(struct iovec, MAX_L2TPV3_IOVCNT);
+    s->header_buf = g_malloc(s->header_size);
+
+    qemu_set_nonblock(fd);
+
+    s->fd = fd;
+    s->counter = 0;
+
+    l2tpv3_read_poll(s, true);
+
+    snprintf(s->nc.info_str, sizeof(s->nc.info_str),
+             "l2tpv3: connected");
+    return 0;
+outerr:
+    qemu_del_net_client(nc);
+    if (fd >= 0) {
+        close(fd);
+    }
+    if (result) {
+        freeaddrinfo(result);
+    }
+    return -1;
+}
+
diff --git a/net/net.c b/net/net.c
index c0d61bf78..95fb9e143 100644
--- a/net/net.c
+++ b/net/net.c
@@ -21,113 +21,109 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  * THE SOFTWARE.
  */
-#include "config-host.h"
+
+#include "qemu/osdep.h"
 
 #include "net/net.h"
 #include "clients.h"
 #include "hub.h"
+#include "hw/qdev-properties.h"
 #include "net/slirp.h"
+#include "net/eth.h"
 #include "util.h"
 
 #include "monitor/monitor.h"
-#include "qemu-common.h"
+#include "qemu/help_option.h"
+#include "qapi/qapi-commands-net.h"
+#include "qapi/qapi-visit-net.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qerror.h"
+#include "qemu/error-report.h"
 #include "qemu/sockets.h"
+#include "qemu/cutils.h"
 #include "qemu/config-file.h"
-#include "qmp-commands.h"
-#include "hw/qdev.h"
+#include "qemu/ctype.h"
 #include "qemu/iov.h"
-#include "qapi-visit.h"
+#include "qemu/main-loop.h"
+#include "qemu/option.h"
+#include "qapi/error.h"
 #include "qapi/opts-visitor.h"
-#include "qapi/dealloc-visitor.h"
+#include "sysemu/sysemu.h"
+#include "sysemu/qtest.h"
+#include "sysemu/runstate.h"
+#include "sysemu/sysemu.h"
+#include "net/filter.h"
+#include "qapi/string-output-visitor.h"
 
 /* Net bridge is currently not supported for W32. */
 #if !defined(_WIN32)
 # define CONFIG_NET_BRIDGE
 #endif
 
+static VMChangeStateEntry *net_change_state_entry;
 static QTAILQ_HEAD(, NetClientState) net_clients;
 
-int default_net = 1;
-
 /***********************************************************/
 /* network device redirectors */
 
-#if defined(DEBUG_NET)
-static void hex_dump(FILE *f, const uint8_t *buf, int size)
-{
-    int len, i, j, c;
-
-    for(i=0;i<size;i+=16) {
-        len = size - i;
-        if (len > 16)
-            len = 16;
-        fprintf(f, "%08x ", i);
-        for(j=0;j<16;j++) {
-            if (j < len)
-                fprintf(f, " %02x", buf[i+j]);
-            else
-                fprintf(f, "   ");
-        }
-        fprintf(f, " ");
-        for(j=0;j<len;j++) {
-            c = buf[i+j];
-            if (c < ' ' || c > '~')
-                c = '.';
-            fprintf(f, "%c", c);
-        }
-        fprintf(f, "\n");
-    }
-}
-#endif
-
-static int get_str_sep(char *buf, int buf_size, const char **pp, int sep)
+int parse_host_port(struct sockaddr_in *saddr, const char *str,
+                    Error **errp)
 {
-    const char *p, *p1;
-    int len;
-    p = *pp;
-    p1 = strchr(p, sep);
-    if (!p1)
-        return -1;
-    len = p1 - p;
-    p1++;
-    if (buf_size > 0) {
-        if (len > buf_size - 1)
-            len = buf_size - 1;
-        memcpy(buf, p, len);
-        buf[len] = '\0';
-    }
-    *pp = p1;
-    return 0;
-}
-
-int parse_host_port(struct sockaddr_in *saddr, const char *str)
-{
-    char buf[512];
+    gchar **substrings;
     struct hostent *he;
-    const char *p, *r;
-    int port;
+    const char *addr, *p, *r;
+    int port, ret = 0;
+
+    substrings = g_strsplit(str, ":", 2);
+    if (!substrings || !substrings[0] || !substrings[1]) {
+        error_setg(errp, "host address '%s' doesn't contain ':' "
+                   "separating host from port", str);
+        ret = -1;
+        goto out;
+    }
+
+    addr = substrings[0];
+    p = substrings[1];
 
-    p = str;
-    if (get_str_sep(buf, sizeof(buf), &p, ':') < 0)
-        return -1;
     saddr->sin_family = AF_INET;
-    if (buf[0] == '\0') {
+    if (addr[0] == '\0') {
         saddr->sin_addr.s_addr = 0;
     } else {
-        if (qemu_isdigit(buf[0])) {
-            if (!inet_aton(buf, &saddr->sin_addr))
-                return -1;
+        if (qemu_isdigit(addr[0])) {
+            if (!inet_aton(addr, &saddr->sin_addr)) {
+                error_setg(errp, "host address '%s' is not a valid "
+                           "IPv4 address", addr);
+                ret = -1;
+                goto out;
+            }
         } else {
-            if ((he = gethostbyname(buf)) == NULL)
-                return - 1;
+            he = gethostbyname(addr);
+            if (he == NULL) {
+                error_setg(errp, "can't resolve host address '%s'", addr);
+                ret = -1;
+                goto out;
+            }
             saddr->sin_addr = *(struct in_addr *)he->h_addr;
         }
     }
     port = strtol(p, (char **)&r, 0);
-    if (r == p)
-        return -1;
+    if (r == p) {
+        error_setg(errp, "port number '%s' is invalid", p);
+        ret = -1;
+        goto out;
+    }
     saddr->sin_port = htons(port);
-    return 0;
+
+out:
+    g_strfreev(substrings);
+    return ret;
+}
+
+char *qemu_mac_strdup_printf(const uint8_t *macaddr)
+{
+    return g_strdup_printf("%.2x:%.2x:%.2x:%.2x:%.2x:%.2x",
+                           macaddr[0], macaddr[1], macaddr[2],
+                           macaddr[3], macaddr[4], macaddr[5]);
 }
 
 void qemu_format_nic_info_str(NetClientState *nc, uint8_t macaddr[6])
@@ -139,19 +135,68 @@ void qemu_format_nic_info_str(NetClientState *nc, uint8_t macaddr[6])
              macaddr[3], macaddr[4], macaddr[5]);
 }
 
+static int mac_table[256] = {0};
+
+static void qemu_macaddr_set_used(MACAddr *macaddr)
+{
+    int index;
+
+    for (index = 0x56; index < 0xFF; index++) {
+        if (macaddr->a[5] == index) {
+            mac_table[index]++;
+        }
+    }
+}
+
+static void qemu_macaddr_set_free(MACAddr *macaddr)
+{
+    int index;
+    static const MACAddr base = { .a = { 0x52, 0x54, 0x00, 0x12, 0x34, 0 } };
+
+    if (memcmp(macaddr->a, &base.a, (sizeof(base.a) - 1)) != 0) {
+        return;
+    }
+    for (index = 0x56; index < 0xFF; index++) {
+        if (macaddr->a[5] == index) {
+            mac_table[index]--;
+        }
+    }
+}
+
+static int qemu_macaddr_get_free(void)
+{
+    int index;
+
+    for (index = 0x56; index < 0xFF; index++) {
+        if (mac_table[index] == 0) {
+            return index;
+        }
+    }
+
+    return -1;
+}
+
 void qemu_macaddr_default_if_unset(MACAddr *macaddr)
 {
-    static int index = 0;
     static const MACAddr zero = { .a = { 0,0,0,0,0,0 } };
+    static const MACAddr base = { .a = { 0x52, 0x54, 0x00, 0x12, 0x34, 0 } };
+
+    if (memcmp(macaddr, &zero, sizeof(zero)) != 0) {
+        if (memcmp(macaddr->a, &base.a, (sizeof(base.a) - 1)) != 0) {
+            return;
+        } else {
+            qemu_macaddr_set_used(macaddr);
+            return;
+        }
+    }
 
-    if (memcmp(macaddr, &zero, sizeof(zero)) != 0)
-        return;
     macaddr->a[0] = 0x52;
     macaddr->a[1] = 0x54;
     macaddr->a[2] = 0x00;
     macaddr->a[3] = 0x12;
     macaddr->a[4] = 0x34;
-    macaddr->a[5] = 0x56 + index++;
+    macaddr->a[5] = qemu_macaddr_get_free();
+    qemu_macaddr_set_used(macaddr);
 }
 
 /**
@@ -162,7 +207,6 @@ void qemu_macaddr_default_if_unset(MACAddr *macaddr)
 static char *assign_name(NetClientState *nc1, const char *model)
 {
     NetClientState *nc;
-    char buf[256];
     int id = 0;
 
     QTAILQ_FOREACH(nc, &net_clients, next) {
@@ -174,15 +218,18 @@ static char *assign_name(NetClientState *nc1, const char *model)
         }
     }
 
-    snprintf(buf, sizeof(buf), "%s.%d", model, id);
-
-    return g_strdup(buf);
+    return g_strdup_printf("%s.%d", model, id);
 }
 
 static void qemu_net_client_destructor(NetClientState *nc)
 {
     g_free(nc);
 }
+static ssize_t qemu_deliver_packet_iov(NetClientState *sender,
+                                       unsigned flags,
+                                       const struct iovec *iov,
+                                       int iovcnt,
+                                       void *opaque);
 
 static void qemu_net_client_setup(NetClientState *nc,
                                   NetClientInfo *info,
@@ -206,8 +253,9 @@ static void qemu_net_client_setup(NetClientState *nc,
     }
     QTAILQ_INSERT_TAIL(&net_clients, nc, next);
 
-    nc->send_queue = qemu_new_net_queue(nc);
+    nc->incoming_queue = qemu_new_net_queue(qemu_deliver_packet_iov, nc);
     nc->destructor = destructor;
+    QTAILQ_INIT(&nc->filters);
 }
 
 NetClientState *qemu_new_net_client(NetClientInfo *info,
@@ -234,9 +282,9 @@ NICState *qemu_new_nic(NetClientInfo *info,
 {
     NetClientState **peers = conf->peers.ncs;
     NICState *nic;
-    int i, queues = MAX(1, conf->queues);
+    int i, queues = MAX(1, conf->peers.queues);
 
-    assert(info->type == NET_CLIENT_OPTIONS_KIND_NIC);
+    assert(info->type == NET_CLIENT_DRIVER_NIC);
     assert(info->size >= sizeof(NICState));
 
     nic = g_malloc0(info->size + sizeof(NetClientState) * queues);
@@ -288,8 +336,8 @@ static void qemu_cleanup_net_client(NetClientState *nc)
 
 static void qemu_free_net_client(NetClientState *nc)
 {
-    if (nc->send_queue) {
-        qemu_del_net_queue(nc->send_queue);
+    if (nc->incoming_queue) {
+        qemu_del_net_queue(nc->incoming_queue);
     }
     if (nc->peer) {
         nc->peer->peer = NULL;
@@ -305,17 +353,24 @@ void qemu_del_net_client(NetClientState *nc)
 {
     NetClientState *ncs[MAX_QUEUE_NUM];
     int queues, i;
+    NetFilterState *nf, *next;
+
+    assert(nc->info->type != NET_CLIENT_DRIVER_NIC);
 
     /* If the NetClientState belongs to a multiqueue backend, we will change all
      * other NetClientStates also.
      */
     queues = qemu_find_net_clients_except(nc->name, ncs,
-                                          NET_CLIENT_OPTIONS_KIND_NIC,
+                                          NET_CLIENT_DRIVER_NIC,
                                           MAX_QUEUE_NUM);
     assert(queues != 0);
 
+    QTAILQ_FOREACH_SAFE(nf, &nc->filters, next, next) {
+        object_unparent(OBJECT(nf));
+    }
+
     /* If there is a peer NIC, delete and cleanup client, but do not free. */
-    if (nc->peer && nc->peer->info->type == NET_CLIENT_OPTIONS_KIND_NIC) {
+    if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_NIC) {
         NICState *nic = qemu_get_nic(nc->peer);
         if (nic->peer_deleted) {
             return;
@@ -337,8 +392,6 @@ void qemu_del_net_client(NetClientState *nc)
         return;
     }
 
-    assert(nc->info->type != NET_CLIENT_OPTIONS_KIND_NIC);
-
     for (i = 0; i < queues; i++) {
         qemu_cleanup_net_client(ncs[i]);
         qemu_free_net_client(ncs[i]);
@@ -347,7 +400,9 @@ void qemu_del_net_client(NetClientState *nc)
 
 void qemu_del_nic(NICState *nic)
 {
-    int i, queues = MAX(nic->conf->queues, 1);
+    int i, queues = MAX(nic->conf->peers.queues, 1);
+
+    qemu_macaddr_set_free(&nic->conf->macaddr);
 
     /* If this is a peer NIC and peer has already been deleted, free it now. */
     if (nic->peer_deleted) {
@@ -371,7 +426,7 @@ void qemu_foreach_nic(qemu_nic_foreach func, void *opaque)
     NetClientState *nc;
 
     QTAILQ_FOREACH(nc, &net_clients, next) {
-        if (nc->info->type == NET_CLIENT_OPTIONS_KIND_NIC) {
+        if (nc->info->type == NET_CLIENT_DRIVER_NIC) {
             if (nc->queue_index == 0) {
                 func(qemu_get_nic(nc), opaque);
             }
@@ -379,49 +434,160 @@ void qemu_foreach_nic(qemu_nic_foreach func, void *opaque)
     }
 }
 
-int qemu_can_send_packet(NetClientState *sender)
+bool qemu_has_ufo(NetClientState *nc)
 {
-    if (!sender->peer) {
-        return 1;
+    if (!nc || !nc->info->has_ufo) {
+        return false;
+    }
+
+    return nc->info->has_ufo(nc);
+}
+
+bool qemu_has_vnet_hdr(NetClientState *nc)
+{
+    if (!nc || !nc->info->has_vnet_hdr) {
+        return false;
+    }
+
+    return nc->info->has_vnet_hdr(nc);
+}
+
+bool qemu_has_vnet_hdr_len(NetClientState *nc, int len)
+{
+    if (!nc || !nc->info->has_vnet_hdr_len) {
+        return false;
+    }
+
+    return nc->info->has_vnet_hdr_len(nc, len);
+}
+
+void qemu_using_vnet_hdr(NetClientState *nc, bool enable)
+{
+    if (!nc || !nc->info->using_vnet_hdr) {
+        return;
+    }
+
+    nc->info->using_vnet_hdr(nc, enable);
+}
+
+void qemu_set_offload(NetClientState *nc, int csum, int tso4, int tso6,
+                          int ecn, int ufo)
+{
+    if (!nc || !nc->info->set_offload) {
+        return;
     }
 
-    if (sender->peer->receive_disabled) {
+    nc->info->set_offload(nc, csum, tso4, tso6, ecn, ufo);
+}
+
+void qemu_set_vnet_hdr_len(NetClientState *nc, int len)
+{
+    if (!nc || !nc->info->set_vnet_hdr_len) {
+        return;
+    }
+
+    nc->vnet_hdr_len = len;
+    nc->info->set_vnet_hdr_len(nc, len);
+}
+
+int qemu_set_vnet_le(NetClientState *nc, bool is_le)
+{
+#ifdef HOST_WORDS_BIGENDIAN
+    if (!nc || !nc->info->set_vnet_le) {
+        return -ENOSYS;
+    }
+
+    return nc->info->set_vnet_le(nc, is_le);
+#else
+    return 0;
+#endif
+}
+
+int qemu_set_vnet_be(NetClientState *nc, bool is_be)
+{
+#ifdef HOST_WORDS_BIGENDIAN
+    return 0;
+#else
+    if (!nc || !nc->info->set_vnet_be) {
+        return -ENOSYS;
+    }
+
+    return nc->info->set_vnet_be(nc, is_be);
+#endif
+}
+
+int qemu_can_receive_packet(NetClientState *nc)
+{
+    if (nc->receive_disabled) {
         return 0;
-    } else if (sender->peer->info->can_receive &&
-               !sender->peer->info->can_receive(sender->peer)) {
+    } else if (nc->info->can_receive &&
+               !nc->info->can_receive(nc)) {
         return 0;
     }
     return 1;
 }
 
-ssize_t qemu_deliver_packet(NetClientState *sender,
-                            unsigned flags,
-                            const uint8_t *data,
-                            size_t size,
-                            void *opaque)
+int qemu_can_send_packet(NetClientState *sender)
 {
-    NetClientState *nc = opaque;
-    ssize_t ret;
+    int vm_running = runstate_is_running();
 
-    if (nc->link_down) {
-        return size;
+    if (!vm_running) {
+        return 0;
     }
 
-    if (nc->receive_disabled) {
-        return 0;
+    if (!sender->peer) {
+        return 1;
     }
 
-    if (flags & QEMU_NET_PACKET_FLAG_RAW && nc->info->receive_raw) {
-        ret = nc->info->receive_raw(nc, data, size);
+    return qemu_can_receive_packet(sender->peer);
+}
+
+static ssize_t filter_receive_iov(NetClientState *nc,
+                                  NetFilterDirection direction,
+                                  NetClientState *sender,
+                                  unsigned flags,
+                                  const struct iovec *iov,
+                                  int iovcnt,
+                                  NetPacketSent *sent_cb)
+{
+    ssize_t ret = 0;
+    NetFilterState *nf = NULL;
+
+    if (direction == NET_FILTER_DIRECTION_TX) {
+        QTAILQ_FOREACH(nf, &nc->filters, next) {
+            ret = qemu_netfilter_receive(nf, direction, sender, flags, iov,
+                                         iovcnt, sent_cb);
+            if (ret) {
+                return ret;
+            }
+        }
     } else {
-        ret = nc->info->receive(nc, data, size);
+        QTAILQ_FOREACH_REVERSE(nf, &nc->filters, next) {
+            ret = qemu_netfilter_receive(nf, direction, sender, flags, iov,
+                                         iovcnt, sent_cb);
+            if (ret) {
+                return ret;
+            }
+        }
     }
 
-    if (ret == 0) {
-        nc->receive_disabled = 1;
+    return ret;
+}
+
+static ssize_t filter_receive(NetClientState *nc,
+                              NetFilterDirection direction,
+                              NetClientState *sender,
+                              unsigned flags,
+                              const uint8_t *data,
+                              size_t size,
+                              NetPacketSent *sent_cb)
+{
+    struct iovec iov = {
+        .iov_base = (void *)data,
+        .iov_len = size
     };
 
-    return ret;
+    return filter_receive_iov(nc, direction, sender, flags, &iov, 1, sent_cb);
 }
 
 void qemu_purge_queued_packets(NetClientState *nc)
@@ -430,44 +596,65 @@ void qemu_purge_queued_packets(NetClientState *nc)
         return;
     }
 
-    qemu_net_queue_purge(nc->peer->send_queue, nc);
+    qemu_net_queue_purge(nc->peer->incoming_queue, nc);
 }
 
-void qemu_flush_queued_packets(NetClientState *nc)
+void qemu_flush_or_purge_queued_packets(NetClientState *nc, bool purge)
 {
     nc->receive_disabled = 0;
 
-    if (nc->peer && nc->peer->info->type == NET_CLIENT_OPTIONS_KIND_HUBPORT) {
+    if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_HUBPORT) {
         if (net_hub_flush(nc->peer)) {
             qemu_notify_event();
         }
-        return;
     }
-    if (qemu_net_queue_flush(nc->send_queue)) {
+    if (qemu_net_queue_flush(nc->incoming_queue)) {
         /* We emptied the queue successfully, signal to the IO thread to repoll
          * the file descriptor (for tap, for example).
          */
         qemu_notify_event();
+    } else if (purge) {
+        /* Unable to empty the queue, purge remaining packets */
+        qemu_net_queue_purge(nc->incoming_queue, nc);
     }
 }
 
+void qemu_flush_queued_packets(NetClientState *nc)
+{
+    qemu_flush_or_purge_queued_packets(nc, false);
+}
+
 static ssize_t qemu_send_packet_async_with_flags(NetClientState *sender,
                                                  unsigned flags,
                                                  const uint8_t *buf, int size,
                                                  NetPacketSent *sent_cb)
 {
     NetQueue *queue;
+    int ret;
 
 #ifdef DEBUG_NET
     printf("qemu_send_packet_async:\n");
-    hex_dump(stdout, buf, size);
+    qemu_hexdump((const char *)buf, stdout, "net", size);
 #endif
 
     if (sender->link_down || !sender->peer) {
         return size;
     }
 
-    queue = sender->peer->send_queue;
+    /* Let filters handle the packet first */
+    ret = filter_receive(sender, NET_FILTER_DIRECTION_TX,
+                         sender, flags, buf, size, sent_cb);
+    if (ret) {
+        return ret;
+    }
+
+    ret = filter_receive(sender->peer, NET_FILTER_DIRECTION_RX,
+                         sender, flags, buf, size, sent_cb);
+    if (ret) {
+        return ret;
+    }
+
+    queue = sender->peer->incoming_queue;
 
     return qemu_net_queue_send(queue, sender, flags, buf, size, sent_cb);
 }
@@ -480,9 +667,28 @@ ssize_t qemu_send_packet_async(NetClientState *sender,
                                              buf, size, sent_cb);
 }
 
-void qemu_send_packet(NetClientState *nc, const uint8_t *buf, int size)
+ssize_t qemu_send_packet(NetClientState *nc, const uint8_t *buf, int size)
+{
+    return qemu_send_packet_async(nc, buf, size, NULL);
+}
+
+ssize_t qemu_receive_packet(NetClientState *nc, const uint8_t *buf, int size)
+{
+    if (!qemu_can_receive_packet(nc)) {
+        return 0;
+    }
+
+    return qemu_net_queue_receive(nc->incoming_queue, buf, size);
+}
+
+ssize_t qemu_receive_packet_iov(NetClientState *nc, const struct iovec *iov,
+                                int iovcnt)
 {
-    qemu_send_packet_async(nc, buf, size, NULL);
+    if (!qemu_can_receive_packet(nc)) {
+        return 0;
+    }
+
+    return qemu_net_queue_receive_iov(nc->incoming_queue, iov, iovcnt);
 }
 
 ssize_t qemu_send_packet_raw(NetClientState *nc, const uint8_t *buf, int size)
@@ -492,25 +698,46 @@ ssize_t qemu_send_packet_raw(NetClientState *nc, const uint8_t *buf, int size)
 }
 
 static ssize_t nc_sendv_compat(NetClientState *nc, const struct iovec *iov,
-                               int iovcnt)
+                               int iovcnt, unsigned flags)
 {
-    uint8_t buffer[NET_BUFSIZE];
+    uint8_t *buf = NULL;
+    uint8_t *buffer;
     size_t offset;
+    ssize_t ret;
+
+    if (iovcnt == 1) {
+        buffer = iov[0].iov_base;
+        offset = iov[0].iov_len;
+    } else {
+        offset = iov_size(iov, iovcnt);
+        if (offset > NET_BUFSIZE) {
+            return -1;
+        }
+        buf = g_malloc(offset);
+        buffer = buf;
+        offset = iov_to_buf(iov, iovcnt, 0, buf, offset);
+    }
 
-    offset = iov_to_buf(iov, iovcnt, 0, buffer, sizeof(buffer));
+    if (flags & QEMU_NET_PACKET_FLAG_RAW && nc->info->receive_raw) {
+        ret = nc->info->receive_raw(nc, buffer, offset);
+    } else {
+        ret = nc->info->receive(nc, buffer, offset);
+    }
 
-    return nc->info->receive(nc, buffer, offset);
+    g_free(buf);
+    return ret;
 }
 
-ssize_t qemu_deliver_packet_iov(NetClientState *sender,
-                                unsigned flags,
-                                const struct iovec *iov,
-                                int iovcnt,
-                                void *opaque)
+static ssize_t qemu_deliver_packet_iov(NetClientState *sender,
+                                       unsigned flags,
+                                       const struct iovec *iov,
+                                       int iovcnt,
+                                       void *opaque)
 {
     NetClientState *nc = opaque;
     int ret;
 
+
     if (nc->link_down) {
         return iov_size(iov, iovcnt);
     }
@@ -519,10 +746,10 @@ ssize_t qemu_deliver_packet_iov(NetClientState *sender,
         return 0;
     }
 
-    if (nc->info->receive_iov) {
+    if (nc->info->receive_iov && !(flags & QEMU_NET_PACKET_FLAG_RAW)) {
         ret = nc->info->receive_iov(nc, iov, iovcnt);
     } else {
-        ret = nc_sendv_compat(nc, iov, iovcnt);
+        ret = nc_sendv_compat(nc, iov, iovcnt, flags);
     }
 
     if (ret == 0) {
@@ -537,12 +764,31 @@ ssize_t qemu_sendv_packet_async(NetClientState *sender,
                                 NetPacketSent *sent_cb)
 {
     NetQueue *queue;
+    size_t size = iov_size(iov, iovcnt);
+    int ret;
+
+    if (size > NET_BUFSIZE) {
+        return size;
+    }
 
     if (sender->link_down || !sender->peer) {
-        return iov_size(iov, iovcnt);
+        return size;
+    }
+
+    /* Let filters handle the packet first */
+    ret = filter_receive_iov(sender, NET_FILTER_DIRECTION_TX, sender,
+                             QEMU_NET_PACKET_FLAG_NONE, iov, iovcnt, sent_cb);
+    if (ret) {
+        return ret;
+    }
+
+    ret = filter_receive_iov(sender->peer, NET_FILTER_DIRECTION_RX, sender,
+                             QEMU_NET_PACKET_FLAG_NONE, iov, iovcnt, sent_cb);
+    if (ret) {
+        return ret;
     }
 
-    queue = sender->peer->send_queue;
+    queue = sender->peer->incoming_queue;
 
     return qemu_net_queue_send_iov(queue, sender,
                                    QEMU_NET_PACKET_FLAG_NONE,
@@ -560,7 +806,7 @@ NetClientState *qemu_find_netdev(const char *id)
     NetClientState *nc;
 
     QTAILQ_FOREACH(nc, &net_clients, next) {
-        if (nc->info->type == NET_CLIENT_OPTIONS_KIND_NIC)
+        if (nc->info->type == NET_CLIENT_DRIVER_NIC)
             continue;
         if (!strcmp(nc->name, id)) {
             return nc;
@@ -571,7 +817,7 @@ NetClientState *qemu_find_netdev(const char *id)
 }
 
 int qemu_find_net_clients_except(const char *id, NetClientState **ncs,
-                                 NetClientOptionsKind type, int max)
+                                 NetClientDriver type, int max)
 {
     NetClientState *nc;
     int ret = 0;
@@ -580,7 +826,7 @@ int qemu_find_net_clients_except(const char *id, NetClientState **ncs,
         if (nc->info->type == type) {
             continue;
         }
-        if (!strcmp(nc->name, id)) {
+        if (!id || !strcmp(nc->name, id)) {
             if (ret < max) {
                 ncs[ret] = nc;
             }
@@ -609,9 +855,10 @@ int qemu_show_nic_models(const char *arg, const char *const *models)
         return 0;
     }
 
-    fprintf(stderr, "qemu: Supported NIC models: ");
-    for (i = 0 ; models[i]; i++)
-        fprintf(stderr, "%s%c", models[i], models[i+1] ? ',' : '\n');
+    printf("Supported NIC models:\n");
+    for (i = 0 ; models[i]; i++) {
+        printf("%s\n", models[i]);
+    }
     return 1;
 }
 
@@ -645,19 +892,19 @@ int qemu_find_nic_model(NICInfo *nd, const char * const *models,
     return -1;
 }
 
-static int net_init_nic(const NetClientOptions *opts, const char *name,
-                        NetClientState *peer)
+static int net_init_nic(const Netdev *netdev, const char *name,
+                        NetClientState *peer, Error **errp)
 {
     int idx;
     NICInfo *nd;
     const NetLegacyNicOptions *nic;
 
-    assert(opts->kind == NET_CLIENT_OPTIONS_KIND_NIC);
-    nic = opts->nic;
+    assert(netdev->type == NET_CLIENT_DRIVER_NIC);
+    nic = &netdev->u.nic;
 
     idx = nic_get_free_idx();
     if (idx == -1 || nb_nics >= MAX_NICS) {
-        error_report("Too Many NICs");
+        error_setg(errp, "too many NICs");
         return -1;
     }
 
@@ -668,7 +915,7 @@ static int net_init_nic(const NetClientOptions *opts, const char *name,
     if (nic->has_netdev) {
         nd->netdev = qemu_find_netdev(nic->netdev);
         if (!nd->netdev) {
-            error_report("netdev '%s' not found", nic->netdev);
+            error_setg(errp, "netdev '%s' not found", nic->netdev);
             return -1;
         }
     } else {
@@ -685,14 +932,20 @@ static int net_init_nic(const NetClientOptions *opts, const char *name,
 
     if (nic->has_macaddr &&
         net_parse_macaddr(nd->macaddr.a, nic->macaddr) < 0) {
-        error_report("invalid syntax for ethernet address");
+        error_setg(errp, "invalid syntax for ethernet address");
+        return -1;
+    }
+    if (nic->has_macaddr &&
+        is_multicast_ether_addr(nd->macaddr.a)) {
+        error_setg(errp,
+                   "NIC cannot have multicast MAC address (odd 1st byte)");
         return -1;
     }
     qemu_macaddr_default_if_unset(&nd->macaddr);
 
     if (nic->has_vectors) {
         if (nic->vectors > 0x7ffffff) {
-            error_report("invalid # of vectors: %"PRIu32, nic->vectors);
+            error_setg(errp, "invalid # of vectors: %"PRIu32, nic->vectors);
             return -1;
         }
         nd->nvectors = nic->vectors;
@@ -707,229 +960,267 @@ static int net_init_nic(const NetClientOptions *opts, const char *name,
 }
 
 
-static int (* const net_client_init_fun[NET_CLIENT_OPTIONS_KIND_MAX])(
-    const NetClientOptions *opts,
+static int (* const net_client_init_fun[NET_CLIENT_DRIVER__MAX])(
+    const Netdev *netdev,
     const char *name,
-    NetClientState *peer) = {
-        [NET_CLIENT_OPTIONS_KIND_NIC]       = net_init_nic,
+    NetClientState *peer, Error **errp) = {
+        [NET_CLIENT_DRIVER_NIC]       = net_init_nic,
 #ifdef CONFIG_SLIRP
-        [NET_CLIENT_OPTIONS_KIND_USER]      = net_init_slirp,
+        [NET_CLIENT_DRIVER_USER]      = net_init_slirp,
 #endif
-        [NET_CLIENT_OPTIONS_KIND_TAP]       = net_init_tap,
-        [NET_CLIENT_OPTIONS_KIND_SOCKET]    = net_init_socket,
+        [NET_CLIENT_DRIVER_TAP]       = net_init_tap,
+        [NET_CLIENT_DRIVER_SOCKET]    = net_init_socket,
 #ifdef CONFIG_VDE
-        [NET_CLIENT_OPTIONS_KIND_VDE]       = net_init_vde,
+        [NET_CLIENT_DRIVER_VDE]       = net_init_vde,
+#endif
+#ifdef CONFIG_NETMAP
+        [NET_CLIENT_DRIVER_NETMAP]    = net_init_netmap,
 #endif
-        [NET_CLIENT_OPTIONS_KIND_DUMP]      = net_init_dump,
 #ifdef CONFIG_NET_BRIDGE
-        [NET_CLIENT_OPTIONS_KIND_BRIDGE]    = net_init_bridge,
+        [NET_CLIENT_DRIVER_BRIDGE]    = net_init_bridge,
+#endif
+        [NET_CLIENT_DRIVER_HUBPORT]   = net_init_hubport,
+#ifdef CONFIG_VHOST_NET_USER
+        [NET_CLIENT_DRIVER_VHOST_USER] = net_init_vhost_user,
+#endif
+#ifdef CONFIG_L2TPV3
+        [NET_CLIENT_DRIVER_L2TPV3]    = net_init_l2tpv3,
 #endif
-        [NET_CLIENT_OPTIONS_KIND_HUBPORT]   = net_init_hubport,
 };
 
 
-static int net_client_init1(const void *object, int is_netdev, Error **errp)
+static int net_client_init1(const void *object, bool is_netdev, Error **errp)
 {
-    union {
-        const Netdev    *netdev;
-        const NetLegacy *net;
-    } u;
-    const NetClientOptions *opts;
+    Netdev legacy = {0};
+    const Netdev *netdev;
     const char *name;
+    NetClientState *peer = NULL;
 
     if (is_netdev) {
-        u.netdev = object;
-        opts = u.netdev->opts;
-        name = u.netdev->id;
+        netdev = object;
+        name = netdev->id;
 
-        switch (opts->kind) {
-#ifdef CONFIG_SLIRP
-        case NET_CLIENT_OPTIONS_KIND_USER:
-#endif
-        case NET_CLIENT_OPTIONS_KIND_TAP:
-        case NET_CLIENT_OPTIONS_KIND_SOCKET:
-#ifdef CONFIG_VDE
-        case NET_CLIENT_OPTIONS_KIND_VDE:
-#endif
-#ifdef CONFIG_NET_BRIDGE
-        case NET_CLIENT_OPTIONS_KIND_BRIDGE:
-#endif
-        case NET_CLIENT_OPTIONS_KIND_HUBPORT:
-            break;
-
-        default:
-            error_set(errp, QERR_INVALID_PARAMETER_VALUE, "type",
-                      "a netdev backend type");
+        if (netdev->type == NET_CLIENT_DRIVER_NIC ||
+            !net_client_init_fun[netdev->type]) {
+            error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "type",
+                       "a netdev backend type");
             return -1;
         }
     } else {
-        u.net = object;
-        opts = u.net->opts;
+        const NetLegacy *net = object;
+        const NetLegacyOptions *opts = net->opts;
+        legacy.id = net->id;
+        netdev = &legacy;
         /* missing optional values have been initialized to "all bits zero" */
-        name = u.net->has_id ? u.net->id : u.net->name;
-    }
+        name = net->has_id ? net->id : net->name;
 
-    if (net_client_init_fun[opts->kind]) {
-        NetClientState *peer = NULL;
+        if (net->has_name) {
+            warn_report("The 'name' parameter is deprecated, use 'id' instead");
+        }
 
-        /* Do not add to a vlan if it's a -netdev or a nic with a netdev=
-         * parameter. */
-        if (!is_netdev &&
-            (opts->kind != NET_CLIENT_OPTIONS_KIND_NIC ||
-             !opts->nic->has_netdev)) {
-            peer = net_hub_add_port(u.net->has_vlan ? u.net->vlan : 0, NULL);
+        /* Map the old options to the new flat type */
+        switch (opts->type) {
+        case NET_LEGACY_OPTIONS_TYPE_NONE:
+            return 0; /* nothing to do */
+        case NET_LEGACY_OPTIONS_TYPE_NIC:
+            legacy.type = NET_CLIENT_DRIVER_NIC;
+            legacy.u.nic = opts->u.nic;
+            break;
+        case NET_LEGACY_OPTIONS_TYPE_USER:
+            legacy.type = NET_CLIENT_DRIVER_USER;
+            legacy.u.user = opts->u.user;
+            break;
+        case NET_LEGACY_OPTIONS_TYPE_TAP:
+            legacy.type = NET_CLIENT_DRIVER_TAP;
+            legacy.u.tap = opts->u.tap;
+            break;
+        case NET_LEGACY_OPTIONS_TYPE_L2TPV3:
+            legacy.type = NET_CLIENT_DRIVER_L2TPV3;
+            legacy.u.l2tpv3 = opts->u.l2tpv3;
+            break;
+        case NET_LEGACY_OPTIONS_TYPE_SOCKET:
+            legacy.type = NET_CLIENT_DRIVER_SOCKET;
+            legacy.u.socket = opts->u.socket;
+            break;
+        case NET_LEGACY_OPTIONS_TYPE_VDE:
+            legacy.type = NET_CLIENT_DRIVER_VDE;
+            legacy.u.vde = opts->u.vde;
+            break;
+        case NET_LEGACY_OPTIONS_TYPE_BRIDGE:
+            legacy.type = NET_CLIENT_DRIVER_BRIDGE;
+            legacy.u.bridge = opts->u.bridge;
+            break;
+        case NET_LEGACY_OPTIONS_TYPE_NETMAP:
+            legacy.type = NET_CLIENT_DRIVER_NETMAP;
+            legacy.u.netmap = opts->u.netmap;
+            break;
+        case NET_LEGACY_OPTIONS_TYPE_VHOST_USER:
+            legacy.type = NET_CLIENT_DRIVER_VHOST_USER;
+            legacy.u.vhost_user = opts->u.vhost_user;
+            break;
+        default:
+            abort();
         }
 
-        if (net_client_init_fun[opts->kind](opts, name, peer) < 0) {
-            /* TODO push error reporting into init() methods */
-            error_set(errp, QERR_DEVICE_INIT_FAILED,
-                      NetClientOptionsKind_lookup[opts->kind]);
+        if (!net_client_init_fun[netdev->type]) {
+            error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "type",
+                       "a net backend type (maybe it is not compiled "
+                       "into this binary)");
             return -1;
         }
+
+        /* Do not add to a hub if it's a nic with a netdev= parameter. */
+        if (netdev->type != NET_CLIENT_DRIVER_NIC ||
+            !opts->u.nic.has_netdev) {
+            peer = net_hub_add_port(0, NULL, NULL);
+        }
+    }
+
+    if (net_client_init_fun[netdev->type](netdev, name, peer, errp) < 0) {
+        /* FIXME drop when all init functions store an Error */
+        if (errp && !*errp) {
+            error_setg(errp, QERR_DEVICE_INIT_FAILED,
+                       NetClientDriver_str(netdev->type));
+        }
+        return -1;
     }
     return 0;
 }
 
-
-static void net_visit(Visitor *v, int is_netdev, void **object, Error **errp)
+static void show_netdevs(void)
 {
-    if (is_netdev) {
-        visit_type_Netdev(v, (Netdev **)object, NULL, errp);
-    } else {
-        visit_type_NetLegacy(v, (NetLegacy **)object, NULL, errp);
+    int idx;
+    const char *available_netdevs[] = {
+        "socket",
+        "hubport",
+        "tap",
+#ifdef CONFIG_SLIRP
+        "user",
+#endif
+#ifdef CONFIG_L2TPV3
+        "l2tpv3",
+#endif
+#ifdef CONFIG_VDE
+        "vde",
+#endif
+#ifdef CONFIG_NET_BRIDGE
+        "bridge",
+#endif
+#ifdef CONFIG_NETMAP
+        "netmap",
+#endif
+#ifdef CONFIG_POSIX
+        "vhost-user",
+#endif
+    };
+
+    printf("Available netdev backend types:\n");
+    for (idx = 0; idx < ARRAY_SIZE(available_netdevs); idx++) {
+        puts(available_netdevs[idx]);
     }
 }
 
-
-int net_client_init(QemuOpts *opts, int is_netdev, Error **errp)
+static int net_client_init(QemuOpts *opts, bool is_netdev, Error **errp)
 {
+    gchar **substrings = NULL;
     void *object = NULL;
     Error *err = NULL;
     int ret = -1;
+    Visitor *v = opts_visitor_new(opts);
 
-    {
-        OptsVisitor *ov = opts_visitor_new(opts);
+    const char *type = qemu_opt_get(opts, "type");
 
-        net_visit(opts_get_visitor(ov), is_netdev, &object, &err);
-        opts_visitor_cleanup(ov);
-    }
-
-    if (!err) {
-        ret = net_client_init1(object, is_netdev, &err);
-    }
-
-    if (object) {
-        QapiDeallocVisitor *dv = qapi_dealloc_visitor_new();
+    if (is_netdev && type && is_help_option(type)) {
+        show_netdevs();
+        exit(0);
+    } else {
+        /* Parse convenience option format ip6-net=fec0::0[/64] */
+        const char *ip6_net = qemu_opt_get(opts, "ipv6-net");
+
+        if (ip6_net) {
+            char *prefix_addr;
+            unsigned long prefix_len = 64; /* Default 64bit prefix length. */
+
+            substrings = g_strsplit(ip6_net, "/", 2);
+            if (!substrings || !substrings[0]) {
+                error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "ipv6-net",
+                           "a valid IPv6 prefix");
+                goto out;
+            }
 
-        net_visit(qapi_dealloc_get_visitor(dv), is_netdev, &object, NULL);
-        qapi_dealloc_visitor_cleanup(dv);
-    }
+            prefix_addr = substrings[0];
 
-    error_propagate(errp, err);
-    return ret;
-}
+            if (substrings[1]) {
+                /* User-specified prefix length.  */
+                int err;
 
+                err = qemu_strtoul(substrings[1], NULL, 10, &prefix_len);
+                if (err) {
+                    error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
+                               "ipv6-prefixlen", "a number");
+                    goto out;
+                }
+            }
 
-static int net_host_check_device(const char *device)
-{
-    int i;
-    const char *valid_param_list[] = { "tap", "socket", "dump"
-#ifdef CONFIG_NET_BRIDGE
-                                       , "bridge"
-#endif
-#ifdef CONFIG_SLIRP
-                                       ,"user"
-#endif
-#ifdef CONFIG_VDE
-                                       ,"vde"
-#endif
-    };
-    for (i = 0; i < sizeof(valid_param_list) / sizeof(char *); i++) {
-        if (!strncmp(valid_param_list[i], device,
-                     strlen(valid_param_list[i])))
-            return 1;
+            qemu_opt_set(opts, "ipv6-prefix", prefix_addr, &error_abort);
+            qemu_opt_set_number(opts, "ipv6-prefixlen", prefix_len,
+                                &error_abort);
+            qemu_opt_unset(opts, "ipv6-net");
+        }
     }
 
-    return 0;
-}
-
-void net_host_device_add(Monitor *mon, const QDict *qdict)
-{
-    const char *device = qdict_get_str(qdict, "device");
-    const char *opts_str = qdict_get_try_str(qdict, "opts");
-    Error *local_err = NULL;
-    QemuOpts *opts;
-
-    if (!net_host_check_device(device)) {
-        monitor_printf(mon, "invalid host network device %s\n", device);
-        return;
+    if (is_netdev) {
+        visit_type_Netdev(v, NULL, (Netdev **)&object, &err);
+    } else {
+        visit_type_NetLegacy(v, NULL, (NetLegacy **)&object, &err);
     }
 
-    opts = qemu_opts_parse(qemu_find_opts("net"), opts_str ? opts_str : "", 0);
-    if (!opts) {
-        return;
+    if (!err) {
+        ret = net_client_init1(object, is_netdev, &err);
     }
 
-    qemu_opt_set(opts, "type", device);
-
-    net_client_init(opts, 0, &local_err);
-    if (error_is_set(&local_err)) {
-        qerror_report_err(local_err);
-        error_free(local_err);
-        monitor_printf(mon, "adding host network device %s failed\n", device);
+    if (is_netdev) {
+        qapi_free_Netdev(object);
+    } else {
+        qapi_free_NetLegacy(object);
     }
-}
 
-void net_host_device_remove(Monitor *mon, const QDict *qdict)
-{
-    NetClientState *nc;
-    int vlan_id = qdict_get_int(qdict, "vlan_id");
-    const char *device = qdict_get_str(qdict, "device");
-
-    nc = net_hub_find_client_by_name(vlan_id, device);
-    if (!nc) {
-        return;
-    }
-    if (!net_host_check_device(nc->model)) {
-        monitor_printf(mon, "invalid host network device %s\n", device);
-        return;
-    }
-    qemu_del_net_client(nc);
+out:
+    error_propagate(errp, err);
+    g_strfreev(substrings);
+    visit_free(v);
+    return ret;
 }
 
 void netdev_add(QemuOpts *opts, Error **errp)
 {
-    net_client_init(opts, 1, errp);
+    net_client_init(opts, true, errp);
 }
 
-int qmp_netdev_add(Monitor *mon, const QDict *qdict, QObject **ret)
+void qmp_netdev_add(QDict *qdict, QObject **ret, Error **errp)
 {
     Error *local_err = NULL;
     QemuOptsList *opts_list;
     QemuOpts *opts;
 
     opts_list = qemu_find_opts_err("netdev", &local_err);
-    if (error_is_set(&local_err)) {
-        goto exit_err;
+    if (local_err) {
+        goto out;
     }
 
     opts = qemu_opts_from_qdict(opts_list, qdict, &local_err);
-    if (error_is_set(&local_err)) {
-        goto exit_err;
+    if (local_err) {
+        goto out;
     }
 
     netdev_add(opts, &local_err);
-    if (error_is_set(&local_err)) {
+    if (local_err) {
         qemu_opts_del(opts);
-        goto exit_err;
+        goto out;
     }
 
-    return 0;
-
-exit_err:
-    qerror_report_err(local_err);
-    error_free(local_err);
-    return -1;
+out:
+    error_propagate(errp, local_err);
 }
 
 void qmp_netdev_del(const char *id, Error **errp)
@@ -939,7 +1230,8 @@ void qmp_netdev_del(const char *id, Error **errp)
 
     nc = qemu_find_netdev(id);
     if (!nc) {
-        error_set(errp, QERR_DEVICE_NOT_FOUND, id);
+        error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
+                  "Device '%s' not found", id);
         return;
     }
 
@@ -953,12 +1245,48 @@ void qmp_netdev_del(const char *id, Error **errp)
     qemu_opts_del(opts);
 }
 
+static void netfilter_print_info(Monitor *mon, NetFilterState *nf)
+{
+    char *str;
+    ObjectProperty *prop;
+    ObjectPropertyIterator iter;
+    Visitor *v;
+
+    /* generate info str */
+    object_property_iter_init(&iter, OBJECT(nf));
+    while ((prop = object_property_iter_next(&iter))) {
+        if (!strcmp(prop->name, "type")) {
+            continue;
+        }
+        v = string_output_visitor_new(false, &str);
+        object_property_get(OBJECT(nf), v, prop->name, NULL);
+        visit_complete(v, &str);
+        visit_free(v);
+        monitor_printf(mon, ",%s=%s", prop->name, str);
+        g_free(str);
+    }
+    monitor_printf(mon, "\n");
+}
+
 void print_net_client(Monitor *mon, NetClientState *nc)
 {
+    NetFilterState *nf;
+
     monitor_printf(mon, "%s: index=%d,type=%s,%s\n", nc->name,
                    nc->queue_index,
-                   NetClientOptionsKind_lookup[nc->info->type],
+                   NetClientDriver_str(nc->info->type),
                    nc->info_str);
+    if (!QTAILQ_EMPTY(&nc->filters)) {
+        monitor_printf(mon, "filters:\n");
+    }
+    QTAILQ_FOREACH(nf, &nc->filters, next) {
+        char *path = object_get_canonical_path_component(OBJECT(nf));
+
+        monitor_printf(mon, "  - %s: type=%s", path,
+                       object_get_typename(OBJECT(nf)));
+        netfilter_print_info(mon, nf);
+        g_free(path);
+    }
 }
 
 RxFilterInfoList *qmp_query_rx_filter(bool has_name, const char *name,
@@ -976,14 +1304,20 @@ RxFilterInfoList *qmp_query_rx_filter(bool has_name, const char *name,
         }
 
         /* only query rx-filter information of NIC */
-        if (nc->info->type != NET_CLIENT_OPTIONS_KIND_NIC) {
+        if (nc->info->type != NET_CLIENT_DRIVER_NIC) {
             if (has_name) {
                 error_setg(errp, "net client(%s) isn't a NIC", name);
-                break;
+                return NULL;
             }
             continue;
         }
 
+        /* only query information on queue 0 since the info is per nic,
+         * not per queue
+         */
+        if (nc->queue_index != 0)
+            continue;
+
         if (nc->info->query_rx_filter) {
             info = nc->info->query_rx_filter(nc);
             entry = g_malloc0(sizeof(*entry));
@@ -998,21 +1332,25 @@ RxFilterInfoList *qmp_query_rx_filter(bool has_name, const char *name,
         } else if (has_name) {
             error_setg(errp, "net client(%s) doesn't support"
                        " rx-filter querying", name);
+            return NULL;
+        }
+
+        if (has_name) {
             break;
         }
     }
 
-    if (filter_list == NULL && !error_is_set(errp) && has_name) {
+    if (filter_list == NULL && has_name) {
         error_setg(errp, "invalid net client name: %s", name);
     }
 
     return filter_list;
 }
 
-void do_info_network(Monitor *mon, const QDict *qdict)
+void hmp_info_network(Monitor *mon, const QDict *qdict)
 {
     NetClientState *nc, *peer;
-    NetClientOptionsKind type;
+    NetClientDriver type;
 
     net_hub_info(mon);
 
@@ -1025,16 +1363,35 @@ void do_info_network(Monitor *mon, const QDict *qdict)
             continue;
         }
 
-        if (!peer || type == NET_CLIENT_OPTIONS_KIND_NIC) {
+        if (!peer || type == NET_CLIENT_DRIVER_NIC) {
             print_net_client(mon, nc);
         } /* else it's a netdev connected to a NIC, printed with the NIC */
-        if (peer && type == NET_CLIENT_OPTIONS_KIND_NIC) {
+        if (peer && type == NET_CLIENT_DRIVER_NIC) {
             monitor_printf(mon, " \\ ");
             print_net_client(mon, peer);
         }
     }
 }
 
+void colo_notify_filters_event(int event, Error **errp)
+{
+    NetClientState *nc;
+    NetFilterState *nf;
+    NetFilterClass *nfc = NULL;
+    Error *local_err = NULL;
+
+    QTAILQ_FOREACH(nc, &net_clients, next) {
+        QTAILQ_FOREACH(nf, &nc->filters, next) {
+            nfc = NETFILTER_GET_CLASS(OBJECT(nf));
+            nfc->handle_event(nf, event, &local_err);
+            if (local_err) {
+                error_propagate(errp, local_err);
+                return;
+            }
+        }
+    }
+}
+
 void qmp_set_link(const char *name, bool up, Error **errp)
 {
     NetClientState *ncs[MAX_QUEUE_NUM];
@@ -1042,11 +1399,12 @@ void qmp_set_link(const char *name, bool up, Error **errp)
     int queues, i;
 
     queues = qemu_find_net_clients_except(name, ncs,
-                                          NET_CLIENT_OPTIONS_KIND_MAX,
+                                          NET_CLIENT_DRIVER__MAX,
                                           MAX_QUEUE_NUM);
 
     if (queues == 0) {
-        error_set(errp, QERR_DEVICE_NOT_FOUND, name);
+        error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
+                  "Device '%s' not found", name);
         return;
     }
     nc = ncs[0];
@@ -1059,15 +1417,44 @@ void qmp_set_link(const char *name, bool up, Error **errp)
         nc->info->link_status_changed(nc);
     }
 
-    /* Notify peer. Don't update peer link status: this makes it possible to
-     * disconnect from host network without notifying the guest.
-     * FIXME: is disconnected link status change operation useful?
-     *
-     * Current behaviour is compatible with qemu vlans where there could be
-     * multiple clients that can still communicate with each other in
-     * disconnected mode. For now maintain this compatibility. */
-    if (nc->peer && nc->peer->info->link_status_changed) {
-        nc->peer->info->link_status_changed(nc->peer);
+    if (nc->peer) {
+        /* Change peer link only if the peer is NIC and then notify peer.
+         * If the peer is a HUBPORT or a backend, we do not change the
+         * link status.
+         *
+         * This behavior is compatible with qemu hubs where there could be
+         * multiple clients that can still communicate with each other in
+         * disconnected mode. For now maintain this compatibility.
+         */
+        if (nc->peer->info->type == NET_CLIENT_DRIVER_NIC) {
+            for (i = 0; i < queues; i++) {
+                ncs[i]->peer->link_down = !up;
+            }
+        }
+        if (nc->peer->info->link_status_changed) {
+            nc->peer->info->link_status_changed(nc->peer);
+        }
+    }
+}
+
+static void net_vm_change_state_handler(void *opaque, int running,
+                                        RunState state)
+{
+    NetClientState *nc;
+    NetClientState *tmp;
+
+    QTAILQ_FOREACH_SAFE(nc, &net_clients, next, tmp) {
+        if (running) {
+            /* Flush queued packets and wake up backends. */
+            if (nc->peer && qemu_can_send_packet(nc)) {
+                qemu_flush_queued_packets(nc->peer);
+            }
+        } else {
+            /* Complete all queued packets, to guarantee we don't modify
+             * state later when VM is not running.
+             */
+            qemu_flush_or_purge_queued_packets(nc, true);
+        }
     }
 }
 
@@ -1080,12 +1467,14 @@ void net_cleanup(void)
      */
     while (!QTAILQ_EMPTY(&net_clients)) {
         nc = QTAILQ_FIRST(&net_clients);
-        if (nc->info->type == NET_CLIENT_OPTIONS_KIND_NIC) {
+        if (nc->info->type == NET_CLIENT_DRIVER_NIC) {
             qemu_del_nic(qemu_get_nic(nc));
         } else {
             qemu_del_net_client(nc);
         }
     }
+
+    qemu_del_vm_change_state_handler(net_change_state_entry);
 }
 
 void net_check_clients(void)
@@ -1093,25 +1482,14 @@ void net_check_clients(void)
     NetClientState *nc;
     int i;
 
-    /* Don't warn about the default network setup that you get if
-     * no command line -net or -netdev options are specified. There
-     * are two cases that we would otherwise complain about:
-     * (1) board doesn't support a NIC but the implicit "-net nic"
-     * requested one
-     * (2) CONFIG_SLIRP not set, in which case the implicit "-net nic"
-     * sets up a nic that isn't connected to anything.
-     */
-    if (default_net) {
-        return;
-    }
-
     net_hub_check_clients();
 
     QTAILQ_FOREACH(nc, &net_clients, next) {
         if (!nc->peer) {
-            fprintf(stderr, "Warning: %s %s has no peer\n",
-                    nc->info->type == NET_CLIENT_OPTIONS_KIND_NIC ?
-                    "nic" : "netdev", nc->name);
+            warn_report("%s %s has no peer",
+                        nc->info->type == NET_CLIENT_DRIVER_NIC
+                        ? "nic" : "netdev",
+                        nc->name);
         }
     }
 
@@ -1122,61 +1500,104 @@ void net_check_clients(void)
     for (i = 0; i < MAX_NICS; i++) {
         NICInfo *nd = &nd_table[i];
         if (nd->used && !nd->instantiated) {
-            fprintf(stderr, "Warning: requested NIC (%s, model %s) "
-                    "was not created (not supported by this machine?)\n",
-                    nd->name ? nd->name : "anonymous",
-                    nd->model ? nd->model : "unspecified");
+            warn_report("requested NIC (%s, model %s) "
+                        "was not created (not supported by this machine?)",
+                        nd->name ? nd->name : "anonymous",
+                        nd->model ? nd->model : "unspecified");
         }
     }
 }
 
-static int net_init_client(QemuOpts *opts, void *dummy)
+static int net_init_client(void *dummy, QemuOpts *opts, Error **errp)
 {
-    Error *local_err = NULL;
-
-    net_client_init(opts, 0, &local_err);
-    if (error_is_set(&local_err)) {
-        qerror_report_err(local_err);
-        error_free(local_err);
-        return -1;
-    }
+    return net_client_init(opts, false, errp);
+}
 
-    return 0;
+static int net_init_netdev(void *dummy, QemuOpts *opts, Error **errp)
+{
+    return net_client_init(opts, true, errp);
 }
 
-static int net_init_netdev(QemuOpts *opts, void *dummy)
+/* For the convenience "--nic" parameter */
+static int net_param_nic(void *dummy, QemuOpts *opts, Error **errp)
 {
-    Error *local_err = NULL;
-    int ret;
+    char *mac, *nd_id;
+    int idx, ret;
+    NICInfo *ni;
+    const char *type;
+
+    type = qemu_opt_get(opts, "type");
+    if (type && g_str_equal(type, "none")) {
+        return 0;    /* Nothing to do, default_net is cleared in vl.c */
+    }
 
-    ret = net_client_init(opts, 1, &local_err);
-    if (error_is_set(&local_err)) {
-        qerror_report_err(local_err);
-        error_free(local_err);
+    idx = nic_get_free_idx();
+    if (idx == -1 || nb_nics >= MAX_NICS) {
+        error_setg(errp, "no more on-board/default NIC slots available");
         return -1;
     }
 
+    if (!type) {
+        qemu_opt_set(opts, "type", "user", &error_abort);
+    }
+
+    ni = &nd_table[idx];
+    memset(ni, 0, sizeof(*ni));
+    ni->model = qemu_opt_get_del(opts, "model");
+
+    /* Create an ID if the user did not specify one */
+    nd_id = g_strdup(qemu_opts_id(opts));
+    if (!nd_id) {
+        nd_id = g_strdup_printf("__org.qemu.nic%i", idx);
+        qemu_opts_set_id(opts, nd_id);
+    }
+
+    /* Handle MAC address */
+    mac = qemu_opt_get_del(opts, "mac");
+    if (mac) {
+        ret = net_parse_macaddr(ni->macaddr.a, mac);
+        g_free(mac);
+        if (ret) {
+            error_setg(errp, "invalid syntax for ethernet address");
+            goto out;
+        }
+        if (is_multicast_ether_addr(ni->macaddr.a)) {
+            error_setg(errp, "NIC cannot have multicast MAC address");
+            ret = -1;
+            goto out;
+        }
+    }
+    qemu_macaddr_default_if_unset(&ni->macaddr);
+
+    ret = net_client_init(opts, true, errp);
+    if (ret == 0) {
+        ni->netdev = qemu_find_netdev(nd_id);
+        ni->used = true;
+        nb_nics++;
+    }
+
+out:
+    g_free(nd_id);
     return ret;
 }
 
-int net_init_clients(void)
+int net_init_clients(Error **errp)
 {
-    QemuOptsList *net = qemu_find_opts("net");
-
-    if (default_net) {
-        /* if no clients, we use a default config */
-        qemu_opts_set(net, NULL, "type", "nic");
-#ifdef CONFIG_SLIRP
-        qemu_opts_set(net, NULL, "type", "user");
-#endif
-    }
+    net_change_state_entry =
+        qemu_add_vm_change_state_handler(net_vm_change_state_handler, NULL);
 
     QTAILQ_INIT(&net_clients);
 
-    if (qemu_opts_foreach(qemu_find_opts("netdev"), net_init_netdev, NULL, 1) == -1)
+    if (qemu_opts_foreach(qemu_find_opts("netdev"),
+                          net_init_netdev, NULL, errp)) {
         return -1;
+    }
 
-    if (qemu_opts_foreach(net, net_init_client, NULL, 1) == -1) {
+    if (qemu_opts_foreach(qemu_find_opts("nic"), net_param_nic, NULL, errp)) {
+        return -1;
+    }
+
+    if (qemu_opts_foreach(qemu_find_opts("net"), net_init_client, NULL, errp)) {
         return -1;
     }
 
@@ -1185,42 +1606,57 @@ int net_init_clients(void)
 
 int net_client_parse(QemuOptsList *opts_list, const char *optarg)
 {
-#if defined(CONFIG_SLIRP)
-    int ret;
-    if (net_slirp_parse_legacy(opts_list, optarg, &ret)) {
-        return ret;
-    }
-#endif
-
-    if (!qemu_opts_parse(opts_list, optarg, 1)) {
+    if (!qemu_opts_parse_noisily(opts_list, optarg, true)) {
         return -1;
     }
 
-    default_net = 0;
     return 0;
 }
 
 /* From FreeBSD */
 /* XXX: optimize */
-unsigned compute_mcast_idx(const uint8_t *ep)
+uint32_t net_crc32(const uint8_t *p, int len)
 {
     uint32_t crc;
     int carry, i, j;
     uint8_t b;
 
     crc = 0xffffffff;
-    for (i = 0; i < 6; i++) {
-        b = *ep++;
+    for (i = 0; i < len; i++) {
+        b = *p++;
         for (j = 0; j < 8; j++) {
             carry = ((crc & 0x80000000L) ? 1 : 0) ^ (b & 0x01);
             crc <<= 1;
             b >>= 1;
             if (carry) {
-                crc = ((crc ^ POLYNOMIAL) | carry);
+                crc = ((crc ^ POLYNOMIAL_BE) | carry);
             }
         }
     }
-    return crc >> 26;
+
+    return crc;
+}
+
+uint32_t net_crc32_le(const uint8_t *p, int len)
+{
+    uint32_t crc;
+    int carry, i, j;
+    uint8_t b;
+
+    crc = 0xffffffff;
+    for (i = 0; i < len; i++) {
+        b = *p++;
+        for (j = 0; j < 8; j++) {
+            carry = (crc & 0x1) ^ (b & 0x01);
+            crc >>= 1;
+            b >>= 1;
+            if (carry) {
+                crc ^= POLYNOMIAL_LE;
+            }
+        }
+    }
+
+    return crc;
 }
 
 QemuOptsList qemu_netdev_opts = {
@@ -1236,6 +1672,19 @@ QemuOptsList qemu_netdev_opts = {
     },
 };
 
+QemuOptsList qemu_nic_opts = {
+    .name = "nic",
+    .implied_opt_name = "type",
+    .head = QTAILQ_HEAD_INITIALIZER(qemu_nic_opts.head),
+    .desc = {
+        /*
+         * no elements => accept any params
+         * validation will happen later
+         */
+        { /* end of list */ }
+    },
+};
+
 QemuOptsList qemu_net_opts = {
     .name = "net",
     .implied_opt_name = "type",
@@ -1248,3 +1697,100 @@ QemuOptsList qemu_net_opts = {
         { /* end of list */ }
     },
 };
+
+void net_socket_rs_init(SocketReadState *rs,
+                        SocketReadStateFinalize *finalize,
+                        bool vnet_hdr)
+{
+    rs->state = 0;
+    rs->vnet_hdr = vnet_hdr;
+    rs->index = 0;
+    rs->packet_len = 0;
+    rs->vnet_hdr_len = 0;
+    memset(rs->buf, 0, sizeof(rs->buf));
+    rs->finalize = finalize;
+}
+
+/*
+ * Returns
+ * 0: success
+ * -1: error occurs
+ */
+int net_fill_rstate(SocketReadState *rs, const uint8_t *buf, int size)
+{
+    unsigned int l;
+
+    while (size > 0) {
+        /* Reassemble a packet from the network.
+         * 0 = getting length.
+         * 1 = getting vnet header length.
+         * 2 = getting data.
+         */
+        switch (rs->state) {
+        case 0:
+            l = 4 - rs->index;
+            if (l > size) {
+                l = size;
+            }
+            memcpy(rs->buf + rs->index, buf, l);
+            buf += l;
+            size -= l;
+            rs->index += l;
+            if (rs->index == 4) {
+                /* got length */
+                rs->packet_len = ntohl(*(uint32_t *)rs->buf);
+                rs->index = 0;
+                if (rs->vnet_hdr) {
+                    rs->state = 1;
+                } else {
+                    rs->state = 2;
+                    rs->vnet_hdr_len = 0;
+                }
+            }
+            break;
+        case 1:
+            l = 4 - rs->index;
+            if (l > size) {
+                l = size;
+            }
+            memcpy(rs->buf + rs->index, buf, l);
+            buf += l;
+            size -= l;
+            rs->index += l;
+            if (rs->index == 4) {
+                /* got vnet header length */
+                rs->vnet_hdr_len = ntohl(*(uint32_t *)rs->buf);
+                rs->index = 0;
+                rs->state = 2;
+            }
+            break;
+        case 2:
+            l = rs->packet_len - rs->index;
+            if (l > size) {
+                l = size;
+            }
+            if (rs->index + l <= sizeof(rs->buf)) {
+                memcpy(rs->buf + rs->index, buf, l);
+            } else {
+                fprintf(stderr, "serious error: oversized packet received,"
+                    "connection terminated.\n");
+                rs->index = rs->state = 0;
+                return -1;
+            }
+
+            rs->index += l;
+            buf += l;
+            size -= l;
+            if (rs->index >= rs->packet_len) {
+                rs->index = 0;
+                rs->state = 0;
+                assert(rs->finalize);
+                rs->finalize(rs);
+            }
+            break;
+        }
+    }
+
+    assert(size == 0);
+    return 0;
+}
diff --git a/net/netmap.c b/net/netmap.c
new file mode 100644
index 000000000..350f097f9
--- /dev/null
+++ b/net/netmap.c
@@ -0,0 +1,432 @@
+/*
+ * netmap access for qemu
+ *
+ * Copyright (c) 2012-2013 Luigi Rizzo
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+
+#include "qemu/osdep.h"
+#include <sys/ioctl.h>
+#include <net/if.h>
+#define NETMAP_WITH_LIBS
+#include <net/netmap.h>
+#include <net/netmap_user.h>
+
+#include "net/net.h"
+#include "net/tap.h"
+#include "clients.h"
+#include "sysemu/sysemu.h"
+#include "qemu/error-report.h"
+#include "qapi/error.h"
+#include "qemu/iov.h"
+#include "qemu/cutils.h"
+#include "qemu/main-loop.h"
+
+typedef struct NetmapState {
+    NetClientState      nc;
+    struct nm_desc      *nmd;
+    char                ifname[IFNAMSIZ];
+    struct netmap_ring  *tx;
+    struct netmap_ring  *rx;
+    bool                read_poll;
+    bool                write_poll;
+    struct iovec        iov[IOV_MAX];
+    int                 vnet_hdr_len;  /* Current virtio-net header length. */
+} NetmapState;
+
+#ifndef __FreeBSD__
+#define pkt_copy bcopy
+#else
+/* A fast copy routine only for multiples of 64 bytes, non overlapped. */
+static inline void
+pkt_copy(const void *_src, void *_dst, int l)
+{
+    const uint64_t *src = _src;
+    uint64_t *dst = _dst;
+    if (unlikely(l >= 1024)) {
+        bcopy(src, dst, l);
+        return;
+    }
+    for (; l > 0; l -= 64) {
+        *dst++ = *src++;
+        *dst++ = *src++;
+        *dst++ = *src++;
+        *dst++ = *src++;
+        *dst++ = *src++;
+        *dst++ = *src++;
+        *dst++ = *src++;
+        *dst++ = *src++;
+    }
+}
+#endif /* __FreeBSD__ */
+
+/*
+ * Open a netmap device. We assume there is only one queue
+ * (which is the case for the VALE bridge).
+ */
+static struct nm_desc *netmap_open(const NetdevNetmapOptions *nm_opts,
+                                   Error **errp)
+{
+    struct nm_desc *nmd;
+    struct nmreq req;
+
+    memset(&req, 0, sizeof(req));
+
+    nmd = nm_open(nm_opts->ifname, &req, NETMAP_NO_TX_POLL,
+                  NULL);
+    if (nmd == NULL) {
+        error_setg_errno(errp, errno, "Failed to nm_open() %s",
+                         nm_opts->ifname);
+        return NULL;
+    }
+
+    return nmd;
+}
+
+static void netmap_send(void *opaque);
+static void netmap_writable(void *opaque);
+
+/* Set the event-loop handlers for the netmap backend. */
+static void netmap_update_fd_handler(NetmapState *s)
+{
+    qemu_set_fd_handler(s->nmd->fd,
+                        s->read_poll ? netmap_send : NULL,
+                        s->write_poll ? netmap_writable : NULL,
+                        s);
+}
+
+/* Update the read handler. */
+static void netmap_read_poll(NetmapState *s, bool enable)
+{
+    if (s->read_poll != enable) { /* Do nothing if not changed. */
+        s->read_poll = enable;
+        netmap_update_fd_handler(s);
+    }
+}
+
+/* Update the write handler. */
+static void netmap_write_poll(NetmapState *s, bool enable)
+{
+    if (s->write_poll != enable) {
+        s->write_poll = enable;
+        netmap_update_fd_handler(s);
+    }
+}
+
+static void netmap_poll(NetClientState *nc, bool enable)
+{
+    NetmapState *s = DO_UPCAST(NetmapState, nc, nc);
+
+    if (s->read_poll != enable || s->write_poll != enable) {
+        s->write_poll = enable;
+        s->read_poll  = enable;
+        netmap_update_fd_handler(s);
+    }
+}
+
+/*
+ * The fd_write() callback, invoked if the fd is marked as
+ * writable after a poll. Unregister the handler and flush any
+ * buffered packets.
+ */
+static void netmap_writable(void *opaque)
+{
+    NetmapState *s = opaque;
+
+    netmap_write_poll(s, false);
+    qemu_flush_queued_packets(&s->nc);
+}
+
+static ssize_t netmap_receive_iov(NetClientState *nc,
+                    const struct iovec *iov, int iovcnt)
+{
+    NetmapState *s = DO_UPCAST(NetmapState, nc, nc);
+    struct netmap_ring *ring = s->tx;
+    unsigned int tail = ring->tail;
+    ssize_t totlen = 0;
+    uint32_t last;
+    uint32_t idx;
+    uint8_t *dst;
+    int j;
+    uint32_t i;
+
+    last = i = ring->head;
+
+    if (nm_ring_space(ring) < iovcnt) {
+        /* Not enough netmap slots. Tell the kernel that we have seen the new
+         * available slots (so that it notifies us again when it has more
+         * ones), but without publishing any new slots to be processed
+         * (e.g., we don't advance ring->head). */
+        ring->cur = tail;
+        netmap_write_poll(s, true);
+        return 0;
+    }
+
+    for (j = 0; j < iovcnt; j++) {
+        int iov_frag_size = iov[j].iov_len;
+        int offset = 0;
+        int nm_frag_size;
+
+        totlen += iov_frag_size;
+
+        /* Split each iovec fragment over more netmap slots, if
+           necessary. */
+        while (iov_frag_size) {
+            nm_frag_size = MIN(iov_frag_size, ring->nr_buf_size);
+
+            if (unlikely(i == tail)) {
+                /* We ran out of netmap slots while splitting the
+                   iovec fragments. */
+                ring->cur = tail;
+                netmap_write_poll(s, true);
+                return 0;
+            }
+
+            idx = ring->slot[i].buf_idx;
+            dst = (uint8_t *)NETMAP_BUF(ring, idx);
+
+            ring->slot[i].len = nm_frag_size;
+            ring->slot[i].flags = NS_MOREFRAG;
+            pkt_copy(iov[j].iov_base + offset, dst, nm_frag_size);
+
+            last = i;
+            i = nm_ring_next(ring, i);
+
+            offset += nm_frag_size;
+            iov_frag_size -= nm_frag_size;
+        }
+    }
+    /* The last slot must not have NS_MOREFRAG set. */
+    ring->slot[last].flags &= ~NS_MOREFRAG;
+
+    /* Now update ring->head and ring->cur to publish the new slots and
+     * the new wakeup point. */
+    ring->head = ring->cur = i;
+
+    ioctl(s->nmd->fd, NIOCTXSYNC, NULL);
+
+    return totlen;
+}
+
+static ssize_t netmap_receive(NetClientState *nc,
+      const uint8_t *buf, size_t size)
+{
+    struct iovec iov;
+
+    iov.iov_base = (void *)buf;
+    iov.iov_len = size;
+
+    return netmap_receive_iov(nc, &iov, 1);
+}
+
+/* Complete a previous send (backend --> guest) and enable the
+   fd_read callback. */
+static void netmap_send_completed(NetClientState *nc, ssize_t len)
+{
+    NetmapState *s = DO_UPCAST(NetmapState, nc, nc);
+
+    netmap_read_poll(s, true);
+}
+
+static void netmap_send(void *opaque)
+{
+    NetmapState *s = opaque;
+    struct netmap_ring *ring = s->rx;
+    unsigned int tail = ring->tail;
+
+    /* Keep sending while there are available slots in the netmap
+       RX ring and the forwarding path towards the peer is open. */
+    while (ring->head != tail) {
+        uint32_t i = ring->head;
+        uint32_t idx;
+        bool morefrag;
+        int iovcnt = 0;
+        int iovsize;
+
+        /* Get a (possibly multi-slot) packet. */
+        do {
+            idx = ring->slot[i].buf_idx;
+            morefrag = (ring->slot[i].flags & NS_MOREFRAG);
+            s->iov[iovcnt].iov_base = (void *)NETMAP_BUF(ring, idx);
+            s->iov[iovcnt].iov_len = ring->slot[i].len;
+            iovcnt++;
+            i = nm_ring_next(ring, i);
+        } while (i != tail && morefrag);
+
+        /* Advance ring->cur to tell the kernel that we have seen the slots. */
+        ring->cur = i;
+
+        if (unlikely(morefrag)) {
+            /* This is a truncated packet, so we can stop without releasing the
+             * incomplete slots by updating ring->head. We will hopefully
+             * re-read the complete packet the next time we are called. */
+            break;
+        }
+
+        iovsize = qemu_sendv_packet_async(&s->nc, s->iov, iovcnt,
+                                            netmap_send_completed);
+
+        /* Release the slots to the kernel. */
+        ring->head = i;
+
+        if (iovsize == 0) {
+            /* The peer does not receive anymore. Packet is queued, stop
+             * reading from the backend until netmap_send_completed(). */
+            netmap_read_poll(s, false);
+            break;
+        }
+    }
+}
+
+/* Flush and close. */
+static void netmap_cleanup(NetClientState *nc)
+{
+    NetmapState *s = DO_UPCAST(NetmapState, nc, nc);
+
+    qemu_purge_queued_packets(nc);
+
+    netmap_poll(nc, false);
+    nm_close(s->nmd);
+    s->nmd = NULL;
+}
+
+/* Offloading manipulation support callbacks. */
+static int netmap_fd_set_vnet_hdr_len(NetmapState *s, int len)
+{
+    struct nmreq req;
+
+    /* Issue a NETMAP_BDG_VNET_HDR command to change the virtio-net header
+     * length for the netmap adapter associated to 's->ifname'.
+     */
+    memset(&req, 0, sizeof(req));
+    pstrcpy(req.nr_name, sizeof(req.nr_name), s->ifname);
+    req.nr_version = NETMAP_API;
+    req.nr_cmd = NETMAP_BDG_VNET_HDR;
+    req.nr_arg1 = len;
+
+    return ioctl(s->nmd->fd, NIOCREGIF, &req);
+}
+
+static bool netmap_has_vnet_hdr_len(NetClientState *nc, int len)
+{
+    NetmapState *s = DO_UPCAST(NetmapState, nc, nc);
+    int prev_len = s->vnet_hdr_len;
+
+    /* Check that we can set the new length. */
+    if (netmap_fd_set_vnet_hdr_len(s, len)) {
+        return false;
+    }
+
+    /* Restore the previous length. */
+    if (netmap_fd_set_vnet_hdr_len(s, prev_len)) {
+        error_report("Failed to restore vnet-hdr length %d on %s: %s",
+                     prev_len, s->ifname, strerror(errno));
+        abort();
+    }
+
+    return true;
+}
+
+/* A netmap interface that supports virtio-net headers always
+ * supports UFO, so we use this callback also for the has_ufo hook. */
+static bool netmap_has_vnet_hdr(NetClientState *nc)
+{
+    return netmap_has_vnet_hdr_len(nc, sizeof(struct virtio_net_hdr));
+}
+
+static void netmap_using_vnet_hdr(NetClientState *nc, bool enable)
+{
+}
+
+static void netmap_set_vnet_hdr_len(NetClientState *nc, int len)
+{
+    NetmapState *s = DO_UPCAST(NetmapState, nc, nc);
+    int err;
+
+    err = netmap_fd_set_vnet_hdr_len(s, len);
+    if (err) {
+        error_report("Unable to set vnet-hdr length %d on %s: %s",
+                     len, s->ifname, strerror(errno));
+    } else {
+        /* Keep track of the current length. */
+        s->vnet_hdr_len = len;
+    }
+}
+
+static void netmap_set_offload(NetClientState *nc, int csum, int tso4, int tso6,
+                               int ecn, int ufo)
+{
+    NetmapState *s = DO_UPCAST(NetmapState, nc, nc);
+
+    /* Setting a virtio-net header length greater than zero automatically
+     * enables the offloadings. */
+    if (!s->vnet_hdr_len) {
+        netmap_set_vnet_hdr_len(nc, sizeof(struct virtio_net_hdr));
+    }
+}
+
+/* NetClientInfo methods */
+static NetClientInfo net_netmap_info = {
+    .type = NET_CLIENT_DRIVER_NETMAP,
+    .size = sizeof(NetmapState),
+    .receive = netmap_receive,
+    .receive_iov = netmap_receive_iov,
+    .poll = netmap_poll,
+    .cleanup = netmap_cleanup,
+    .has_ufo = netmap_has_vnet_hdr,
+    .has_vnet_hdr = netmap_has_vnet_hdr,
+    .has_vnet_hdr_len = netmap_has_vnet_hdr_len,
+    .using_vnet_hdr = netmap_using_vnet_hdr,
+    .set_offload = netmap_set_offload,
+    .set_vnet_hdr_len = netmap_set_vnet_hdr_len,
+};
+
+/* The exported init function
+ *
+ * ... -net netmap,ifname="..."
+ */
+int net_init_netmap(const Netdev *netdev,
+                    const char *name, NetClientState *peer, Error **errp)
+{
+    const NetdevNetmapOptions *netmap_opts = &netdev->u.netmap;
+    struct nm_desc *nmd;
+    NetClientState *nc;
+    Error *err = NULL;
+    NetmapState *s;
+
+    nmd = netmap_open(netmap_opts, &err);
+    if (err) {
+        error_propagate(errp, err);
+        return -1;
+    }
+    /* Create the object. */
+    nc = qemu_new_net_client(&net_netmap_info, peer, "netmap", name);
+    s = DO_UPCAST(NetmapState, nc, nc);
+    s->nmd = nmd;
+    s->tx = NETMAP_TXRING(nmd->nifp, 0);
+    s->rx = NETMAP_RXRING(nmd->nifp, 0);
+    s->vnet_hdr_len = 0;
+    pstrcpy(s->ifname, sizeof(s->ifname), netmap_opts->ifname);
+    netmap_read_poll(s, true); /* Initially only poll for reads. */
+
+    return 0;
+}
+
diff --git a/net/queue.c b/net/queue.c
index 859d02a13..7c0b72c8e 100644
--- a/net/queue.c
+++ b/net/queue.c
@@ -21,6 +21,7 @@
  * THE SOFTWARE.
  */
 
+#include "qemu/osdep.h"
 #include "net/queue.h"
 #include "qemu/queue.h"
 #include "net/net.h"
@@ -52,21 +53,23 @@ struct NetQueue {
     void *opaque;
     uint32_t nq_maxlen;
     uint32_t nq_count;
+    NetQueueDeliverFunc *deliver;
 
-    QTAILQ_HEAD(packets, NetPacket) packets;
+    QTAILQ_HEAD(, NetPacket) packets;
 
     unsigned delivering : 1;
 };
 
-NetQueue *qemu_new_net_queue(void *opaque)
+NetQueue *qemu_new_net_queue(NetQueueDeliverFunc *deliver, void *opaque)
 {
     NetQueue *queue;
 
-    queue = g_malloc0(sizeof(NetQueue));
+    queue = g_new0(NetQueue, 1);
 
     queue->opaque = opaque;
     queue->nq_maxlen = 10000;
     queue->nq_count = 0;
+    queue->deliver = deliver;
 
     QTAILQ_INIT(&queue->packets);
 
@@ -110,12 +113,12 @@ static void qemu_net_queue_append(NetQueue *queue,
     QTAILQ_INSERT_TAIL(&queue->packets, packet, entry);
 }
 
-static void qemu_net_queue_append_iov(NetQueue *queue,
-                                      NetClientState *sender,
-                                      unsigned flags,
-                                      const struct iovec *iov,
-                                      int iovcnt,
-                                      NetPacketSent *sent_cb)
+void qemu_net_queue_append_iov(NetQueue *queue,
+                               NetClientState *sender,
+                               unsigned flags,
+                               const struct iovec *iov,
+                               int iovcnt,
+                               NetPacketSent *sent_cb)
 {
     NetPacket *packet;
     size_t max_len = 0;
@@ -152,9 +155,13 @@ static ssize_t qemu_net_queue_deliver(NetQueue *queue,
                                       size_t size)
 {
     ssize_t ret = -1;
+    struct iovec iov = {
+        .iov_base = (void *)data,
+        .iov_len = size
+    };
 
     queue->delivering = 1;
-    ret = qemu_deliver_packet(sender, flags, data, size, queue->opaque);
+    ret = queue->deliver(sender, flags, &iov, 1, queue->opaque);
     queue->delivering = 0;
 
     return ret;
@@ -169,12 +176,34 @@ static ssize_t qemu_net_queue_deliver_iov(NetQueue *queue,
     ssize_t ret = -1;
 
     queue->delivering = 1;
-    ret = qemu_deliver_packet_iov(sender, flags, iov, iovcnt, queue->opaque);
+    ret = queue->deliver(sender, flags, iov, iovcnt, queue->opaque);
     queue->delivering = 0;
 
     return ret;
 }
 
+ssize_t qemu_net_queue_receive(NetQueue *queue,
+                               const uint8_t *data,
+                               size_t size)
+{
+    if (queue->delivering) {
+        return 0;
+    }
+
+    return qemu_net_queue_deliver(queue, NULL, 0, data, size);
+}
+
+ssize_t qemu_net_queue_receive_iov(NetQueue *queue,
+                                   const struct iovec *iov,
+                                   int iovcnt)
+{
+    if (queue->delivering) {
+        return 0;
+    }
+
+    return qemu_net_queue_deliver_iov(queue, NULL, 0, iov, iovcnt);
+}
+
 ssize_t qemu_net_queue_send(NetQueue *queue,
                             NetClientState *sender,
                             unsigned flags,
@@ -233,6 +262,9 @@ void qemu_net_queue_purge(NetQueue *queue, NetClientState *from)
         if (packet->sender == from) {
             QTAILQ_REMOVE(&queue->packets, packet, entry);
             queue->nq_count--;
+            if (packet->sent_cb) {
+                packet->sent_cb(packet->sender, 0);
+            }
             g_free(packet);
         }
     }
diff --git a/net/slirp.c b/net/slirp.c
index 124e953d9..c4334ee87 100644
--- a/net/slirp.c
+++ b/net/slirp.c
@@ -21,9 +21,11 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  * THE SOFTWARE.
  */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
 #include "net/slirp.h"
 
-#include "config-host.h"
 
 #ifndef _WIN32
 #include <pwd.h>
@@ -33,9 +35,17 @@
 #include "clients.h"
 #include "hub.h"
 #include "monitor/monitor.h"
+#include "qemu/error-report.h"
 #include "qemu/sockets.h"
-#include "slirp/libslirp.h"
-#include "sysemu/char.h"
+#include <libslirp.h>
+#include "chardev/char-fe.h"
+#include "sysemu/sysemu.h"
+#include "qemu/cutils.h"
+#include "qapi/error.h"
+#include "qapi/qmp/qdict.h"
+#include "util.h"
+#include "migration/register.h"
+#include "migration/qemu-file-types.h"
 
 static int get_str_sep(char *buf, int buf_size, const char **pp, int sep)
 {
@@ -60,50 +70,53 @@ static int get_str_sep(char *buf, int buf_size, const char **pp, int sep)
 /* slirp network adapter */
 
 #define SLIRP_CFG_HOSTFWD 1
-#define SLIRP_CFG_LEGACY  2
 
 struct slirp_config_str {
     struct slirp_config_str *next;
     int flags;
     char str[1024];
-    int legacy_format;
+};
+
+struct GuestFwd {
+    CharBackend hd;
+    struct in_addr server;
+    int port;
+    Slirp *slirp;
 };
 
 typedef struct SlirpState {
     NetClientState nc;
     QTAILQ_ENTRY(SlirpState) entry;
     Slirp *slirp;
+    Notifier poll_notifier;
+    Notifier exit_notifier;
 #ifndef _WIN32
-    char smb_dir[128];
+    gchar *smb_dir;
 #endif
+    GSList *fwd;
 } SlirpState;
 
 static struct slirp_config_str *slirp_configs;
-const char *legacy_tftp_prefix;
-const char *legacy_bootp_filename;
-static QTAILQ_HEAD(slirp_stacks, SlirpState) slirp_stacks =
+static QTAILQ_HEAD(, SlirpState) slirp_stacks =
     QTAILQ_HEAD_INITIALIZER(slirp_stacks);
 
-static int slirp_hostfwd(SlirpState *s, const char *redir_str,
-                         int legacy_format);
-static int slirp_guestfwd(SlirpState *s, const char *config_str,
-                          int legacy_format);
+static int slirp_hostfwd(SlirpState *s, const char *redir_str, Error **errp);
+static int slirp_guestfwd(SlirpState *s, const char *config_str, Error **errp);
 
 #ifndef _WIN32
-static const char *legacy_smb_export;
-
 static int slirp_smb(SlirpState *s, const char *exported_dir,
-                     struct in_addr vserver_addr);
+                     struct in_addr vserver_addr, Error **errp);
 static void slirp_smb_cleanup(SlirpState *s);
 #else
 static inline void slirp_smb_cleanup(SlirpState *s) { }
 #endif
 
-void slirp_output(void *opaque, const uint8_t *pkt, int pkt_len)
+static ssize_t net_slirp_send_packet(const void *pkt, size_t pkt_len,
+                                     void *opaque)
 {
     SlirpState *s = opaque;
 
-    qemu_send_packet(&s->nc, pkt, pkt_len);
+    return qemu_send_packet(&s->nc, pkt, pkt_len);
 }
 
 static ssize_t net_slirp_receive(NetClientState *nc, const uint8_t *buf, size_t size)
@@ -115,29 +128,236 @@ static ssize_t net_slirp_receive(NetClientState *nc, const uint8_t *buf, size_t
     return size;
 }
 
+static void slirp_smb_exit(Notifier *n, void *data)
+{
+    SlirpState *s = container_of(n, SlirpState, exit_notifier);
+    slirp_smb_cleanup(s);
+}
+
+static void slirp_free_fwd(gpointer data)
+{
+    struct GuestFwd *fwd = data;
+
+    qemu_chr_fe_deinit(&fwd->hd, true);
+    g_free(data);
+}
+
 static void net_slirp_cleanup(NetClientState *nc)
 {
     SlirpState *s = DO_UPCAST(SlirpState, nc, nc);
 
+    g_slist_free_full(s->fwd, slirp_free_fwd);
+    main_loop_poll_remove_notifier(&s->poll_notifier);
+    unregister_savevm(NULL, "slirp", s->slirp);
     slirp_cleanup(s->slirp);
+    if (s->exit_notifier.notify) {
+        qemu_remove_exit_notifier(&s->exit_notifier);
+    }
     slirp_smb_cleanup(s);
     QTAILQ_REMOVE(&slirp_stacks, s, entry);
 }
 
 static NetClientInfo net_slirp_info = {
-    .type = NET_CLIENT_OPTIONS_KIND_USER,
+    .type = NET_CLIENT_DRIVER_USER,
     .size = sizeof(SlirpState),
     .receive = net_slirp_receive,
     .cleanup = net_slirp_cleanup,
 };
 
+static void net_slirp_guest_error(const char *msg, void *opaque)
+{
+    qemu_log_mask(LOG_GUEST_ERROR, "%s", msg);
+}
+
+static int64_t net_slirp_clock_get_ns(void *opaque)
+{
+    return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+}
+
+static void *net_slirp_timer_new(SlirpTimerCb cb,
+                                 void *cb_opaque, void *opaque)
+{
+    return timer_new_full(NULL, QEMU_CLOCK_VIRTUAL,
+                          SCALE_MS, QEMU_TIMER_ATTR_EXTERNAL,
+                          cb, cb_opaque);
+}
+
+static void net_slirp_timer_free(void *timer, void *opaque)
+{
+    timer_del(timer);
+    timer_free(timer);
+}
+
+static void net_slirp_timer_mod(void *timer, int64_t expire_timer,
+                                void *opaque)
+{
+    timer_mod(timer, expire_timer);
+}
+
+static void net_slirp_register_poll_fd(int fd, void *opaque)
+{
+    qemu_fd_register(fd);
+}
+
+static void net_slirp_unregister_poll_fd(int fd, void *opaque)
+{
+    /* no qemu_fd_unregister */
+}
+
+static void net_slirp_notify(void *opaque)
+{
+    qemu_notify_event();
+}
+
+static const SlirpCb slirp_cb = {
+    .send_packet = net_slirp_send_packet,
+    .guest_error = net_slirp_guest_error,
+    .clock_get_ns = net_slirp_clock_get_ns,
+    .timer_new = net_slirp_timer_new,
+    .timer_free = net_slirp_timer_free,
+    .timer_mod = net_slirp_timer_mod,
+    .register_poll_fd = net_slirp_register_poll_fd,
+    .unregister_poll_fd = net_slirp_unregister_poll_fd,
+    .notify = net_slirp_notify,
+};
+
+static int slirp_poll_to_gio(int events)
+{
+    int ret = 0;
+
+    if (events & SLIRP_POLL_IN) {
+        ret |= G_IO_IN;
+    }
+    if (events & SLIRP_POLL_OUT) {
+        ret |= G_IO_OUT;
+    }
+    if (events & SLIRP_POLL_PRI) {
+        ret |= G_IO_PRI;
+    }
+    if (events & SLIRP_POLL_ERR) {
+        ret |= G_IO_ERR;
+    }
+    if (events & SLIRP_POLL_HUP) {
+        ret |= G_IO_HUP;
+    }
+
+    return ret;
+}
+
+static int net_slirp_add_poll(int fd, int events, void *opaque)
+{
+    GArray *pollfds = opaque;
+    GPollFD pfd = {
+        .fd = fd,
+        .events = slirp_poll_to_gio(events),
+    };
+    int idx = pollfds->len;
+    g_array_append_val(pollfds, pfd);
+    return idx;
+}
+
+static int slirp_gio_to_poll(int events)
+{
+    int ret = 0;
+
+    if (events & G_IO_IN) {
+        ret |= SLIRP_POLL_IN;
+    }
+    if (events & G_IO_OUT) {
+        ret |= SLIRP_POLL_OUT;
+    }
+    if (events & G_IO_PRI) {
+        ret |= SLIRP_POLL_PRI;
+    }
+    if (events & G_IO_ERR) {
+        ret |= SLIRP_POLL_ERR;
+    }
+    if (events & G_IO_HUP) {
+        ret |= SLIRP_POLL_HUP;
+    }
+
+    return ret;
+}
+
+static int net_slirp_get_revents(int idx, void *opaque)
+{
+    GArray *pollfds = opaque;
+
+    return slirp_gio_to_poll(g_array_index(pollfds, GPollFD, idx).revents);
+}
+
+static void net_slirp_poll_notify(Notifier *notifier, void *data)
+{
+    MainLoopPoll *poll = data;
+    SlirpState *s = container_of(notifier, SlirpState, poll_notifier);
+
+    switch (poll->state) {
+    case MAIN_LOOP_POLL_FILL:
+        slirp_pollfds_fill(s->slirp, &poll->timeout,
+                           net_slirp_add_poll, poll->pollfds);
+        break;
+    case MAIN_LOOP_POLL_OK:
+    case MAIN_LOOP_POLL_ERR:
+        slirp_pollfds_poll(s->slirp, poll->state == MAIN_LOOP_POLL_ERR,
+                           net_slirp_get_revents, poll->pollfds);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static ssize_t
+net_slirp_stream_read(void *buf, size_t size, void *opaque)
+{
+    QEMUFile *f = opaque;
+
+    return qemu_get_buffer(f, buf, size);
+}
+
+static ssize_t
+net_slirp_stream_write(const void *buf, size_t size, void *opaque)
+{
+    QEMUFile *f = opaque;
+
+    qemu_put_buffer(f, buf, size);
+    if (qemu_file_get_error(f)) {
+        return -1;
+    }
+
+    return size;
+}
+
+static int net_slirp_state_load(QEMUFile *f, void *opaque, int version_id)
+{
+    Slirp *slirp = opaque;
+
+    return slirp_state_load(slirp, version_id, net_slirp_stream_read, f);
+}
+
+static void net_slirp_state_save(QEMUFile *f, void *opaque)
+{
+    Slirp *slirp = opaque;
+
+    slirp_state_save(slirp, net_slirp_stream_write, f);
+}
+
+static SaveVMHandlers savevm_slirp_state = {
+    .save_state = net_slirp_state_save,
+    .load_state = net_slirp_state_load,
+};
+
 static int net_slirp_init(NetClientState *peer, const char *model,
                           const char *name, int restricted,
-                          const char *vnetwork, const char *vhost,
+                          bool ipv4, const char *vnetwork, const char *vhost,
+                          bool ipv6, const char *vprefix6, int vprefix6_len,
+                          const char *vhost6,
                           const char *vhostname, const char *tftp_export,
                           const char *bootfile, const char *vdhcp_start,
-                          const char *vnameserver, const char *smb_export,
-                          const char *vsmbserver, const char **dnssearch)
+                          const char *vnameserver, const char *vnameserver6,
+                          const char *smb_export, const char *vsmbserver,
+                          const char **dnssearch, const char *vdomainname,
+                          const char *tftp_server_name,
+                          Error **errp)
 {
     /* default settings according to historic slirp */
     struct in_addr net  = { .s_addr = htonl(0x0a000200) }; /* 10.0.2.0 */
@@ -145,6 +365,9 @@ static int net_slirp_init(NetClientState *peer, const char *model,
     struct in_addr host = { .s_addr = htonl(0x0a000202) }; /* 10.0.2.2 */
     struct in_addr dhcp = { .s_addr = htonl(0x0a00020f) }; /* 10.0.2.15 */
     struct in_addr dns  = { .s_addr = htonl(0x0a000203) }; /* 10.0.2.3 */
+    struct in6_addr ip6_prefix;
+    struct in6_addr ip6_host;
+    struct in6_addr ip6_dns;
 #ifndef _WIN32
     struct in_addr smbsrv = { .s_addr = 0 };
 #endif
@@ -156,16 +379,26 @@ static int net_slirp_init(NetClientState *peer, const char *model,
     char *end;
     struct slirp_config_str *config;
 
-    if (!tftp_export) {
-        tftp_export = legacy_tftp_prefix;
+    if (!ipv4 && (vnetwork || vhost || vnameserver)) {
+        error_setg(errp, "IPv4 disabled but netmask/host/dns provided");
+        return -1;
+    }
+
+    if (!ipv6 && (vprefix6 || vhost6 || vnameserver6)) {
+        error_setg(errp, "IPv6 disabled but prefix/host6/dns6 provided");
+        return -1;
     }
-    if (!bootfile) {
-        bootfile = legacy_bootp_filename;
+
+    if (!ipv4 && !ipv6) {
+        /* It doesn't make sense to disable both */
+        error_setg(errp, "IPv4 and IPv6 disabled");
+        return -1;
     }
 
     if (vnetwork) {
         if (get_str_sep(buf, sizeof(buf), &vnetwork, '/') < 0) {
             if (!inet_aton(vnetwork, &net)) {
+                error_setg(errp, "Failed to parse netmask");
                 return -1;
             }
             addr = ntohl(net.s_addr);
@@ -186,14 +419,19 @@ static int net_slirp_init(NetClientState *peer, const char *model,
             }
         } else {
             if (!inet_aton(buf, &net)) {
+                error_setg(errp, "Failed to parse netmask");
                 return -1;
             }
             shift = strtol(vnetwork, &end, 10);
             if (*end != '\0') {
                 if (!inet_aton(vnetwork, &mask)) {
+                    error_setg(errp,
+                               "Failed to parse netmask (trailing chars)");
                     return -1;
                 }
             } else if (shift < 4 || shift > 32) {
+                error_setg(errp,
+                           "Invalid netmask provided (must be in range 4-32)");
                 return -1;
             } else {
                 mask.s_addr = htonl(0xffffffff << (32 - shift));
@@ -206,34 +444,113 @@ static int net_slirp_init(NetClientState *peer, const char *model,
     }
 
     if (vhost && !inet_aton(vhost, &host)) {
+        error_setg(errp, "Failed to parse host");
         return -1;
     }
     if ((host.s_addr & mask.s_addr) != net.s_addr) {
+        error_setg(errp, "Host doesn't belong to network");
         return -1;
     }
 
     if (vnameserver && !inet_aton(vnameserver, &dns)) {
+        error_setg(errp, "Failed to parse DNS");
         return -1;
     }
-    if ((dns.s_addr & mask.s_addr) != net.s_addr ||
-        dns.s_addr == host.s_addr) {
+    if (restricted && (dns.s_addr & mask.s_addr) != net.s_addr) {
+        error_setg(errp, "DNS doesn't belong to network");
+        return -1;
+    }
+    if (dns.s_addr == host.s_addr) {
+        error_setg(errp, "DNS must be different from host");
         return -1;
     }
 
     if (vdhcp_start && !inet_aton(vdhcp_start, &dhcp)) {
+        error_setg(errp, "Failed to parse DHCP start address");
+        return -1;
+    }
+    if ((dhcp.s_addr & mask.s_addr) != net.s_addr) {
+        error_setg(errp, "DHCP doesn't belong to network");
         return -1;
     }
-    if ((dhcp.s_addr & mask.s_addr) != net.s_addr ||
-        dhcp.s_addr == host.s_addr || dhcp.s_addr == dns.s_addr) {
+    if (dhcp.s_addr == host.s_addr || dhcp.s_addr == dns.s_addr) {
+        error_setg(errp, "DNS must be different from host and DNS");
         return -1;
     }
 
 #ifndef _WIN32
     if (vsmbserver && !inet_aton(vsmbserver, &smbsrv)) {
+        error_setg(errp, "Failed to parse SMB address");
         return -1;
     }
 #endif
 
+    if (!vprefix6) {
+        vprefix6 = "fec0::";
+    }
+    if (!inet_pton(AF_INET6, vprefix6, &ip6_prefix)) {
+        error_setg(errp, "Failed to parse IPv6 prefix");
+        return -1;
+    }
+
+    if (!vprefix6_len) {
+        vprefix6_len = 64;
+    }
+    if (vprefix6_len < 0 || vprefix6_len > 126) {
+        error_setg(errp,
+                   "Invalid IPv6 prefix provided "
+                   "(IPv6 prefix length must be between 0 and 126)");
+        return -1;
+    }
+
+    if (vhost6) {
+        if (!inet_pton(AF_INET6, vhost6, &ip6_host)) {
+            error_setg(errp, "Failed to parse IPv6 host");
+            return -1;
+        }
+        if (!in6_equal_net(&ip6_prefix, &ip6_host, vprefix6_len)) {
+            error_setg(errp, "IPv6 Host doesn't belong to network");
+            return -1;
+        }
+    } else {
+        ip6_host = ip6_prefix;
+        ip6_host.s6_addr[15] |= 2;
+    }
+
+    if (vnameserver6) {
+        if (!inet_pton(AF_INET6, vnameserver6, &ip6_dns)) {
+            error_setg(errp, "Failed to parse IPv6 DNS");
+            return -1;
+        }
+        if (restricted && !in6_equal_net(&ip6_prefix, &ip6_dns, vprefix6_len)) {
+            error_setg(errp, "IPv6 DNS doesn't belong to network");
+            return -1;
+        }
+    } else {
+        ip6_dns = ip6_prefix;
+        ip6_dns.s6_addr[15] |= 3;
+    }
+
+    if (vdomainname && !*vdomainname) {
+        error_setg(errp, "'domainname' parameter cannot be empty");
+        return -1;
+    }
+
+    if (vdomainname && strlen(vdomainname) > 255) {
+        error_setg(errp, "'domainname' parameter cannot exceed 255 bytes");
+        return -1;
+    }
+
+    if (vhostname && strlen(vhostname) > 255) {
+        error_setg(errp, "'vhostname' parameter cannot exceed 255 bytes");
+        return -1;
+    }
+
+    if (tftp_server_name && strlen(tftp_server_name) > 255) {
+        error_setg(errp, "'tftp-server-name' parameter cannot exceed 255 bytes");
+        return -1;
+    }
+
     nc = qemu_new_net_client(&net_slirp_info, peer, model, name);
 
     snprintf(nc->info_str, sizeof(nc->info_str),
@@ -242,31 +559,50 @@ static int net_slirp_init(NetClientState *peer, const char *model,
 
     s = DO_UPCAST(SlirpState, nc, nc);
 
-    s->slirp = slirp_init(restricted, net, mask, host, vhostname,
-                          tftp_export, bootfile, dhcp, dns, dnssearch, s);
+    s->slirp = slirp_init(restricted, ipv4, net, mask, host,
+                          ipv6, ip6_prefix, vprefix6_len, ip6_host,
+                          vhostname, tftp_server_name,
+                          tftp_export, bootfile, dhcp,
+                          dns, ip6_dns, dnssearch, vdomainname,
+                          &slirp_cb, s);
     QTAILQ_INSERT_TAIL(&slirp_stacks, s, entry);
 
+    /*
+     * Make sure the current bitstream version of slirp is 4, to avoid
+     * QEMU migration incompatibilities, if upstream slirp bumped the
+     * version.
+     *
+     * FIXME: use bitfields of features? teach libslirp to save with
+     * specific version?
+     */
+    g_assert(slirp_state_version() == 4);
+    register_savevm_live("slirp", 0, slirp_state_version(),
+                         &savevm_slirp_state, s->slirp);
+
+    s->poll_notifier.notify = net_slirp_poll_notify;
+    main_loop_poll_add_notifier(&s->poll_notifier);
+
     for (config = slirp_configs; config; config = config->next) {
         if (config->flags & SLIRP_CFG_HOSTFWD) {
-            if (slirp_hostfwd(s, config->str,
-                              config->flags & SLIRP_CFG_LEGACY) < 0)
+            if (slirp_hostfwd(s, config->str, errp) < 0) {
                 goto error;
+            }
         } else {
-            if (slirp_guestfwd(s, config->str,
-                               config->flags & SLIRP_CFG_LEGACY) < 0)
+            if (slirp_guestfwd(s, config->str, errp) < 0) {
                 goto error;
+            }
         }
     }
 #ifndef _WIN32
-    if (!smb_export) {
-        smb_export = legacy_smb_export;
-    }
     if (smb_export) {
-        if (slirp_smb(s, smb_export, smbsrv) < 0)
+        if (slirp_smb(s, smb_export, smbsrv, errp) < 0) {
             goto error;
+        }
     }
 #endif
 
+    s->exit_notifier.notify = slirp_smb_exit;
+    qemu_add_exit_notifier(&s->exit_notifier);
     return 0;
 
 error:
@@ -274,15 +610,25 @@ error:
     return -1;
 }
 
-static SlirpState *slirp_lookup(Monitor *mon, const char *vlan,
-                                const char *stack)
+static SlirpState *slirp_lookup(Monitor *mon, const char *hub_id,
+                                const char *name)
 {
-
-    if (vlan) {
+    if (name) {
         NetClientState *nc;
-        nc = net_hub_find_client_by_name(strtol(vlan, NULL, 0), stack);
-        if (!nc) {
-            return NULL;
+        if (hub_id) {
+            nc = net_hub_find_client_by_name(strtol(hub_id, NULL, 0), name);
+            if (!nc) {
+                monitor_printf(mon, "unrecognized (hub-id, stackname) pair\n");
+                return NULL;
+            }
+            warn_report("Using 'hub-id' is deprecated, specify the netdev id "
+                        "directly instead");
+        } else {
+            nc = qemu_find_netdev(name);
+            if (!nc) {
+                monitor_printf(mon, "unrecognized netdev id '%s'\n", name);
+                return NULL;
+            }
         }
         if (strcmp(nc->model, "user")) {
             monitor_printf(mon, "invalid device specified\n");
@@ -298,7 +644,7 @@ static SlirpState *slirp_lookup(Monitor *mon, const char *vlan,
     }
 }
 
-void net_slirp_hostfwd_remove(Monitor *mon, const QDict *qdict)
+void hmp_hostfwd_remove(Monitor *mon, const QDict *qdict)
 {
     struct in_addr host_addr = { .s_addr = INADDR_ANY };
     int host_port;
@@ -311,9 +657,12 @@ void net_slirp_hostfwd_remove(Monitor *mon, const QDict *qdict)
     const char *arg2 = qdict_get_try_str(qdict, "arg2");
     const char *arg3 = qdict_get_try_str(qdict, "arg3");
 
-    if (arg2) {
+    if (arg3) {
         s = slirp_lookup(mon, arg1, arg2);
         src_str = arg3;
+    } else if (arg2) {
+        s = slirp_lookup(mon, NULL, arg1);
+        src_str = arg2;
     } else {
         s = slirp_lookup(mon, NULL, NULL);
         src_str = arg1;
@@ -342,10 +691,11 @@ void net_slirp_hostfwd_remove(Monitor *mon, const QDict *qdict)
         goto fail_syntax;
     }
 
-    host_port = atoi(p);
+    if (qemu_strtoi(p, NULL, 10, &host_port)) {
+        goto fail_syntax;
+    }
 
-    err = slirp_remove_hostfwd(QTAILQ_FIRST(&slirp_stacks)->slirp, is_udp,
-                               host_addr, host_port);
+    err = slirp_remove_hostfwd(s->slirp, is_udp, host_addr, host_port);
 
     monitor_printf(mon, "host forwarding rule for %s %s\n", src_str,
                    err ? "not found" : "removed");
@@ -355,8 +705,7 @@ void net_slirp_hostfwd_remove(Monitor *mon, const QDict *qdict)
     monitor_printf(mon, "invalid format\n");
 }
 
-static int slirp_hostfwd(SlirpState *s, const char *redir_str,
-                         int legacy_format)
+static int slirp_hostfwd(SlirpState *s, const char *redir_str, Error **errp)
 {
     struct in_addr host_addr = { .s_addr = INADDR_ANY };
     struct in_addr guest_addr = { .s_addr = 0 };
@@ -365,9 +714,11 @@ static int slirp_hostfwd(SlirpState *s, const char *redir_str,
     char buf[256];
     int is_udp;
     char *end;
+    const char *fail_reason = "Unknown reason";
 
     p = redir_str;
     if (!p || get_str_sep(buf, sizeof(buf), &p, ':') < 0) {
+        fail_reason = "No : separators";
         goto fail_syntax;
     }
     if (!strcmp(buf, "tcp") || buf[0] == '\0') {
@@ -375,52 +726,59 @@ static int slirp_hostfwd(SlirpState *s, const char *redir_str,
     } else if (!strcmp(buf, "udp")) {
         is_udp = 1;
     } else {
+        fail_reason = "Bad protocol name";
         goto fail_syntax;
     }
 
-    if (!legacy_format) {
-        if (get_str_sep(buf, sizeof(buf), &p, ':') < 0) {
-            goto fail_syntax;
-        }
-        if (buf[0] != '\0' && !inet_aton(buf, &host_addr)) {
-            goto fail_syntax;
-        }
+    if (get_str_sep(buf, sizeof(buf), &p, ':') < 0) {
+        fail_reason = "Missing : separator";
+        goto fail_syntax;
+    }
+    if (buf[0] != '\0' && !inet_aton(buf, &host_addr)) {
+        fail_reason = "Bad host address";
+        goto fail_syntax;
     }
 
-    if (get_str_sep(buf, sizeof(buf), &p, legacy_format ? ':' : '-') < 0) {
+    if (get_str_sep(buf, sizeof(buf), &p, '-') < 0) {
+        fail_reason = "Bad host port separator";
         goto fail_syntax;
     }
     host_port = strtol(buf, &end, 0);
-    if (*end != '\0' || host_port < 1 || host_port > 65535) {
+    if (*end != '\0' || host_port < 0 || host_port > 65535) {
+        fail_reason = "Bad host port";
         goto fail_syntax;
     }
 
     if (get_str_sep(buf, sizeof(buf), &p, ':') < 0) {
+        fail_reason = "Missing guest address";
         goto fail_syntax;
     }
     if (buf[0] != '\0' && !inet_aton(buf, &guest_addr)) {
+        fail_reason = "Bad guest address";
         goto fail_syntax;
     }
 
     guest_port = strtol(p, &end, 0);
     if (*end != '\0' || guest_port < 1 || guest_port > 65535) {
+        fail_reason = "Bad guest port";
         goto fail_syntax;
     }
 
     if (slirp_add_hostfwd(s->slirp, is_udp, host_addr, host_port, guest_addr,
                           guest_port) < 0) {
-        error_report("could not set up host forwarding rule '%s'",
-                     redir_str);
+        error_setg(errp, "Could not set up host forwarding rule '%s'",
+                   redir_str);
         return -1;
     }
     return 0;
 
  fail_syntax:
-    error_report("invalid host forwarding rule '%s'", redir_str);
+    error_setg(errp, "Invalid host forwarding rule '%s' (%s)", redir_str,
+               fail_reason);
     return -1;
 }
 
-void net_slirp_hostfwd_add(Monitor *mon, const QDict *qdict)
+void hmp_hostfwd_add(Monitor *mon, const QDict *qdict)
 {
     const char *redir_str;
     SlirpState *s;
@@ -428,33 +786,23 @@ void net_slirp_hostfwd_add(Monitor *mon, const QDict *qdict)
     const char *arg2 = qdict_get_try_str(qdict, "arg2");
     const char *arg3 = qdict_get_try_str(qdict, "arg3");
 
-    if (arg2) {
+    if (arg3) {
         s = slirp_lookup(mon, arg1, arg2);
         redir_str = arg3;
+    } else if (arg2) {
+        s = slirp_lookup(mon, NULL, arg1);
+        redir_str = arg2;
     } else {
         s = slirp_lookup(mon, NULL, NULL);
         redir_str = arg1;
     }
     if (s) {
-        slirp_hostfwd(s, redir_str, 0);
-    }
-
-}
-
-int net_slirp_redir(const char *redir_str)
-{
-    struct slirp_config_str *config;
-
-    if (QTAILQ_EMPTY(&slirp_stacks)) {
-        config = g_malloc(sizeof(*config));
-        pstrcpy(config->str, sizeof(config->str), redir_str);
-        config->flags = SLIRP_CFG_HOSTFWD | SLIRP_CFG_LEGACY;
-        config->next = slirp_configs;
-        slirp_configs = config;
-        return 0;
+        Error *err = NULL;
+        if (slirp_hostfwd(s, redir_str, &err) < 0) {
+            error_report_err(err);
+        }
     }
 
-    return slirp_hostfwd(QTAILQ_FIRST(&slirp_stacks), redir_str, 1);
 }
 
 #ifndef _WIN32
@@ -462,11 +810,10 @@ int net_slirp_redir(const char *redir_str)
 /* automatic user mode samba server configuration */
 static void slirp_smb_cleanup(SlirpState *s)
 {
-    char cmd[128];
     int ret;
 
-    if (s->smb_dir[0] != '\0') {
-        snprintf(cmd, sizeof(cmd), "rm -rf %s", s->smb_dir);
+    if (s->smb_dir) {
+        gchar *cmd = g_strdup_printf("rm -rf %s", s->smb_dir);
         ret = system(cmd);
         if (ret == -1 || !WIFEXITED(ret)) {
             error_report("'%s' failed.", cmd);
@@ -474,62 +821,72 @@ static void slirp_smb_cleanup(SlirpState *s)
             error_report("'%s' failed. Error code: %d",
                          cmd, WEXITSTATUS(ret));
         }
-        s->smb_dir[0] = '\0';
+        g_free(cmd);
+        g_free(s->smb_dir);
+        s->smb_dir = NULL;
     }
 }
 
 static int slirp_smb(SlirpState* s, const char *exported_dir,
-                     struct in_addr vserver_addr)
+                     struct in_addr vserver_addr, Error **errp)
 {
-    static int instance;
-    char smb_conf[128];
-    char smb_cmdline[128];
+    char *smb_conf;
+    char *smb_cmdline;
     struct passwd *passwd;
     FILE *f;
 
     passwd = getpwuid(geteuid());
     if (!passwd) {
-        error_report("failed to retrieve user name");
+        error_setg(errp, "Failed to retrieve user name");
         return -1;
     }
 
     if (access(CONFIG_SMBD_COMMAND, F_OK)) {
-        error_report("could not find '%s', please install it",
-                     CONFIG_SMBD_COMMAND);
+        error_setg(errp, "Could not find '%s', please install it",
+                   CONFIG_SMBD_COMMAND);
         return -1;
     }
 
     if (access(exported_dir, R_OK | X_OK)) {
-        error_report("error accessing shared directory '%s': %s",
-                     exported_dir, strerror(errno));
+        error_setg(errp, "Error accessing shared directory '%s': %s",
+                   exported_dir, strerror(errno));
         return -1;
     }
 
-    snprintf(s->smb_dir, sizeof(s->smb_dir), "/tmp/qemu-smb.%ld-%d",
-             (long)getpid(), instance++);
-    if (mkdir(s->smb_dir, 0700) < 0) {
-        error_report("could not create samba server dir '%s'", s->smb_dir);
+    s->smb_dir = g_dir_make_tmp("qemu-smb.XXXXXX", NULL);
+    if (!s->smb_dir) {
+        error_setg(errp, "Could not create samba server dir");
         return -1;
     }
-    snprintf(smb_conf, sizeof(smb_conf), "%s/%s", s->smb_dir, "smb.conf");
+    smb_conf = g_strdup_printf("%s/%s", s->smb_dir, "smb.conf");
 
     f = fopen(smb_conf, "w");
     if (!f) {
         slirp_smb_cleanup(s);
-        error_report("could not create samba server configuration file '%s'",
-                     smb_conf);
+        error_setg(errp,
+                   "Could not create samba server configuration file '%s'",
+                    smb_conf);
+        g_free(smb_conf);
         return -1;
     }
     fprintf(f,
             "[global]\n"
             "private dir=%s\n"
-            "socket address=127.0.0.1\n"
+            "interfaces=127.0.0.1\n"
+            "bind interfaces only=yes\n"
             "pid directory=%s\n"
             "lock directory=%s\n"
             "state directory=%s\n"
+            "cache directory=%s\n"
+            "ncalrpc dir=%s/ncalrpc\n"
             "log file=%s/log.smbd\n"
             "smb passwd file=%s/smbpasswd\n"
-            "security = share\n"
+            "security = user\n"
+            "map to guest = Bad User\n"
+            "load printers = no\n"
+            "printing = bsd\n"
+            "disable spoolss = yes\n"
+            "usershare max shares = 0\n"
             "[qemu]\n"
             "path=%s\n"
             "read only=no\n"
@@ -541,48 +898,30 @@ static int slirp_smb(SlirpState* s, const char *exported_dir,
             s->smb_dir,
             s->smb_dir,
             s->smb_dir,
+            s->smb_dir,
+            s->smb_dir,
             exported_dir,
             passwd->pw_name
             );
     fclose(f);
 
-    snprintf(smb_cmdline, sizeof(smb_cmdline), "%s -s %s",
-             CONFIG_SMBD_COMMAND, smb_conf);
+    smb_cmdline = g_strdup_printf("%s -l %s -s %s",
+             CONFIG_SMBD_COMMAND, s->smb_dir, smb_conf);
+    g_free(smb_conf);
 
-    if (slirp_add_exec(s->slirp, 0, smb_cmdline, &vserver_addr, 139) < 0) {
+    if (slirp_add_exec(s->slirp, smb_cmdline, &vserver_addr, 139) < 0 ||
+        slirp_add_exec(s->slirp, smb_cmdline, &vserver_addr, 445) < 0) {
         slirp_smb_cleanup(s);
-        error_report("conflicting/invalid smbserver address");
+        g_free(smb_cmdline);
+        error_setg(errp, "Conflicting/invalid smbserver address");
         return -1;
     }
-    return 0;
-}
-
-/* automatic user mode samba server configuration (legacy interface) */
-int net_slirp_smb(const char *exported_dir)
-{
-    struct in_addr vserver_addr = { .s_addr = 0 };
-
-    if (legacy_smb_export) {
-        fprintf(stderr, "-smb given twice\n");
-        return -1;
-    }
-    legacy_smb_export = exported_dir;
-    if (!QTAILQ_EMPTY(&slirp_stacks)) {
-        return slirp_smb(QTAILQ_FIRST(&slirp_stacks), exported_dir,
-                         vserver_addr);
-    }
+    g_free(smb_cmdline);
     return 0;
 }
 
 #endif /* !defined(_WIN32) */
 
-struct GuestFwd {
-    CharDriverState *hd;
-    struct in_addr server;
-    int port;
-    Slirp *slirp;
-};
-
 static int guestfwd_can_read(void *opaque)
 {
     struct GuestFwd *fwd = opaque;
@@ -595,9 +934,14 @@ static void guestfwd_read(void *opaque, const uint8_t *buf, int size)
     slirp_socket_recv(fwd->slirp, fwd->server, fwd->port, buf, size);
 }
 
-static int slirp_guestfwd(SlirpState *s, const char *config_str,
-                          int legacy_format)
+static ssize_t guestfwd_write(const void *buf, size_t len, void *chr)
 {
+    return qemu_chr_fe_write_all(chr, buf, len);
+}
+
+static int slirp_guestfwd(SlirpState *s, const char *config_str, Error **errp)
+{
+    /* TODO: IPv6 */
     struct in_addr server = { .s_addr = 0 };
     struct GuestFwd *fwd;
     const char *p;
@@ -606,53 +950,62 @@ static int slirp_guestfwd(SlirpState *s, const char *config_str,
     int port;
 
     p = config_str;
-    if (legacy_format) {
-        if (get_str_sep(buf, sizeof(buf), &p, ':') < 0) {
-            goto fail_syntax;
-        }
-    } else {
-        if (get_str_sep(buf, sizeof(buf), &p, ':') < 0) {
-            goto fail_syntax;
-        }
-        if (strcmp(buf, "tcp") && buf[0] != '\0') {
-            goto fail_syntax;
-        }
-        if (get_str_sep(buf, sizeof(buf), &p, ':') < 0) {
-            goto fail_syntax;
-        }
-        if (buf[0] != '\0' && !inet_aton(buf, &server)) {
-            goto fail_syntax;
-        }
-        if (get_str_sep(buf, sizeof(buf), &p, '-') < 0) {
-            goto fail_syntax;
-        }
+    if (get_str_sep(buf, sizeof(buf), &p, ':') < 0) {
+        goto fail_syntax;
+    }
+    if (strcmp(buf, "tcp") && buf[0] != '\0') {
+        goto fail_syntax;
+    }
+    if (get_str_sep(buf, sizeof(buf), &p, ':') < 0) {
+        goto fail_syntax;
+    }
+    if (buf[0] != '\0' && !inet_aton(buf, &server)) {
+        goto fail_syntax;
+    }
+    if (get_str_sep(buf, sizeof(buf), &p, '-') < 0) {
+        goto fail_syntax;
     }
     port = strtol(buf, &end, 10);
     if (*end != '\0' || port < 1 || port > 65535) {
         goto fail_syntax;
     }
 
-    fwd = g_malloc(sizeof(struct GuestFwd));
     snprintf(buf, sizeof(buf), "guestfwd.tcp.%d", port);
 
-    if ((strlen(p) > 4) && !strncmp(p, "cmd:", 4)) {
-        if (slirp_add_exec(s->slirp, 0, &p[4], &server, port) < 0) {
-            error_report("conflicting/invalid host:port in guest forwarding "
-                         "rule '%s'", config_str);
-            g_free(fwd);
+    if (g_str_has_prefix(p, "cmd:")) {
+        if (slirp_add_exec(s->slirp, &p[4], &server, port) < 0) {
+            error_setg(errp, "Conflicting/invalid host:port in guest "
+                       "forwarding rule '%s'", config_str);
             return -1;
         }
     } else {
-        fwd->hd = qemu_chr_new(buf, p, NULL);
-        if (!fwd->hd) {
-            error_report("could not open guest forwarding device '%s'", buf);
+        Error *err = NULL;
+        /*
+         * FIXME: sure we want to support implicit
+         * muxed monitors here?
+         */
+        Chardev *chr = qemu_chr_new_mux_mon(buf, p, NULL);
+
+        if (!chr) {
+            error_setg(errp, "Could not open guest forwarding device '%s'",
+                       buf);
+            return -1;
+        }
+
+        fwd = g_new(struct GuestFwd, 1);
+        qemu_chr_fe_init(&fwd->hd, chr, &err);
+        if (err) {
+            error_propagate(errp, err);
+            object_unparent(OBJECT(chr));
             g_free(fwd);
             return -1;
         }
 
-        if (slirp_add_exec(s->slirp, 3, fwd->hd, &server, port) < 0) {
-            error_report("conflicting/invalid host:port in guest forwarding "
-                         "rule '%s'", config_str);
+        if (slirp_add_guestfwd(s->slirp, guestfwd_write, &fwd->hd,
+                               &server, port) < 0) {
+            error_setg(errp, "Conflicting/invalid host:port in guest "
+                       "forwarding rule '%s'", config_str);
+            qemu_chr_fe_deinit(&fwd->hd, true);
             g_free(fwd);
             return -1;
         }
@@ -660,28 +1013,29 @@ static int slirp_guestfwd(SlirpState *s, const char *config_str,
         fwd->port = port;
         fwd->slirp = s->slirp;
 
-        qemu_chr_fe_claim_no_fail(fwd->hd);
-        qemu_chr_add_handlers(fwd->hd, guestfwd_can_read, guestfwd_read,
-                              NULL, fwd);
+        qemu_chr_fe_set_handlers(&fwd->hd, guestfwd_can_read, guestfwd_read,
+                                 NULL, NULL, fwd, NULL, true);
+        s->fwd = g_slist_append(s->fwd, fwd);
     }
     return 0;
 
  fail_syntax:
-    error_report("invalid guest forwarding rule '%s'", config_str);
+    error_setg(errp, "Invalid guest forwarding rule '%s'", config_str);
     return -1;
 }
 
-void do_info_usernet(Monitor *mon, const QDict *qdict)
+void hmp_info_usernet(Monitor *mon, const QDict *qdict)
 {
     SlirpState *s;
 
     QTAILQ_FOREACH(s, &slirp_stacks, entry) {
         int id;
-        bool got_vlan_id = net_hub_id_for_client(&s->nc, &id) == 0;
-        monitor_printf(mon, "VLAN %d (%s):\n",
-                       got_vlan_id ? id : -1,
-                       s->nc.name);
-        slirp_connection_info(s->slirp, mon);
+        bool got_hub_id = net_hub_id_for_client(&s->nc, &id) == 0;
+        char *info = slirp_connection_info(s->slirp);
+        monitor_printf(mon, "Hub %d (%s):\n%s",
+                       got_hub_id ? id : -1,
+                       s->nc.name, info);
+        g_free(info);
     }
 }
 
@@ -726,17 +1080,27 @@ static const char **slirp_dnssearch(const StringList *dnsname)
     return ret;
 }
 
-int net_init_slirp(const NetClientOptions *opts, const char *name,
-                   NetClientState *peer)
+int net_init_slirp(const Netdev *netdev, const char *name,
+                   NetClientState *peer, Error **errp)
 {
     struct slirp_config_str *config;
     char *vnet;
     int ret;
     const NetdevUserOptions *user;
     const char **dnssearch;
+    bool ipv4 = true, ipv6 = true;
 
-    assert(opts->kind == NET_CLIENT_OPTIONS_KIND_USER);
-    user = opts->user;
+    assert(netdev->type == NET_CLIENT_DRIVER_USER);
+    user = &netdev->u.user;
+
+    if ((user->has_ipv6 && user->ipv6 && !user->has_ipv4) ||
+        (user->has_ipv4 && !user->ipv4)) {
+        ipv4 = 0;
+    }
+    if ((user->has_ipv4 && user->ipv4 && !user->has_ipv6) ||
+        (user->has_ipv6 && !user->ipv6)) {
+        ipv6 = 0;
+    }
 
     vnet = user->has_net ? g_strdup(user->net) :
            user->has_ip  ? g_strdup_printf("%s/24", user->ip) :
@@ -749,10 +1113,14 @@ int net_init_slirp(const NetClientOptions *opts, const char *name,
     net_init_slirp_configs(user->hostfwd, SLIRP_CFG_HOSTFWD);
     net_init_slirp_configs(user->guestfwd, 0);
 
-    ret = net_slirp_init(peer, "user", name, user->q_restrict, vnet,
-                         user->host, user->hostname, user->tftp,
-                         user->bootfile, user->dhcpstart, user->dns, user->smb,
-                         user->smbserver, dnssearch);
+    ret = net_slirp_init(peer, "user", name, user->q_restrict,
+                         ipv4, vnet, user->host,
+                         ipv6, user->ipv6_prefix, user->ipv6_prefixlen,
+                         user->ipv6_host, user->hostname, user->tftp,
+                         user->bootfile, user->dhcpstart,
+                         user->dns, user->ipv6_dns, user->smb,
+                         user->smbserver, dnssearch, user->domainname,
+                         user->tftp_server_name, errp);
 
     while (slirp_configs) {
         config = slirp_configs;
@@ -765,30 +1133,3 @@ int net_init_slirp(const NetClientOptions *opts, const char *name,
 
     return ret;
 }
-
-int net_slirp_parse_legacy(QemuOptsList *opts_list, const char *optarg, int *ret)
-{
-    if (strcmp(opts_list->name, "net") != 0 ||
-        strncmp(optarg, "channel,", strlen("channel,")) != 0) {
-        return 0;
-    }
-
-    /* handle legacy -net channel,port:chr */
-    optarg += strlen("channel,");
-
-    if (QTAILQ_EMPTY(&slirp_stacks)) {
-        struct slirp_config_str *config;
-
-        config = g_malloc(sizeof(*config));
-        pstrcpy(config->str, sizeof(config->str), optarg);
-        config->flags = SLIRP_CFG_LEGACY;
-        config->next = slirp_configs;
-        slirp_configs = config;
-        *ret = 0;
-    } else {
-        *ret = slirp_guestfwd(QTAILQ_FIRST(&slirp_stacks), optarg, 1);
-    }
-
-    return 1;
-}
-
diff --git a/net/socket.c b/net/socket.c
index 87af1d3d3..c92354049 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -21,26 +21,25 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  * THE SOFTWARE.
  */
-#include "config-host.h"
+#include "qemu/osdep.h"
 
 #include "net/net.h"
 #include "clients.h"
 #include "monitor/monitor.h"
+#include "qapi/error.h"
 #include "qemu-common.h"
 #include "qemu/error-report.h"
 #include "qemu/option.h"
 #include "qemu/sockets.h"
 #include "qemu/iov.h"
+#include "qemu/main-loop.h"
 
 typedef struct NetSocketState {
     NetClientState nc;
     int listen_fd;
     int fd;
-    int state; /* 0 = getting length, 1 = getting data */
-    unsigned int index;
-    unsigned int packet_len;
+    SocketReadState rs;
     unsigned int send_index;      /* number of bytes sent (only SOCK_STREAM) */
-    uint8_t buf[NET_BUFSIZE];
     struct sockaddr_in dgram_dst; /* contains inet host and port destination iff connectionless (SOCK_DGRAM) */
     IOHandler *send_fn;           /* differs between SOCK_STREAM/SOCK_DGRAM */
     bool read_poll;               /* waiting to receive data? */
@@ -50,21 +49,12 @@ typedef struct NetSocketState {
 static void net_socket_accept(void *opaque);
 static void net_socket_writable(void *opaque);
 
-/* Only read packets from socket when peer can receive them */
-static int net_socket_can_send(void *opaque)
-{
-    NetSocketState *s = opaque;
-
-    return qemu_can_send_packet(&s->nc);
-}
-
 static void net_socket_update_fd_handler(NetSocketState *s)
 {
-    qemu_set_fd_handler2(s->fd,
-                         s->read_poll  ? net_socket_can_send : NULL,
-                         s->read_poll  ? s->send_fn : NULL,
-                         s->write_poll ? net_socket_writable : NULL,
-                         s);
+    qemu_set_fd_handler(s->fd,
+                        s->read_poll ? s->send_fn : NULL,
+                        s->write_poll ? net_socket_writable : NULL,
+                        s);
 }
 
 static void net_socket_read_poll(NetSocketState *s, bool enable)
@@ -129,9 +119,13 @@ static ssize_t net_socket_receive_dgram(NetClientState *nc, const uint8_t *buf,
     ssize_t ret;
 
     do {
-        ret = qemu_sendto(s->fd, buf, size, 0,
-                          (struct sockaddr *)&s->dgram_dst,
-                          sizeof(s->dgram_dst));
+        if (s->dgram_dst.sin_family != AF_UNIX) {
+            ret = qemu_sendto(s->fd, buf, size, 0,
+                              (struct sockaddr *)&s->dgram_dst,
+                              sizeof(s->dgram_dst));
+        } else {
+            ret = send(s->fd, buf, size, 0);
+        }
     } while (ret == -1 && errno == EINTR);
 
     if (ret == -1 && errno == EAGAIN) {
@@ -141,18 +135,37 @@ static ssize_t net_socket_receive_dgram(NetClientState *nc, const uint8_t *buf,
     return ret;
 }
 
+static void net_socket_send_completed(NetClientState *nc, ssize_t len)
+{
+    NetSocketState *s = DO_UPCAST(NetSocketState, nc, nc);
+
+    if (!s->read_poll) {
+        net_socket_read_poll(s, true);
+    }
+}
+
+static void net_socket_rs_finalize(SocketReadState *rs)
+{
+    NetSocketState *s = container_of(rs, NetSocketState, rs);
+
+    if (qemu_send_packet_async(&s->nc, rs->buf,
+                               rs->packet_len,
+                               net_socket_send_completed) == 0) {
+        net_socket_read_poll(s, false);
+    }
+}
+
 static void net_socket_send(void *opaque)
 {
     NetSocketState *s = opaque;
-    int size, err;
-    unsigned l;
+    int size;
+    int ret;
     uint8_t buf1[NET_BUFSIZE];
     const uint8_t *buf;
 
     size = qemu_recv(s->fd, buf1, sizeof(buf1), 0);
     if (size < 0) {
-        err = socket_error();
-        if (err != EWOULDBLOCK)
+        if (errno != EWOULDBLOCK)
             goto eoc;
     } else if (size == 0) {
         /* end of connection */
@@ -165,57 +178,18 @@ static void net_socket_send(void *opaque)
         closesocket(s->fd);
 
         s->fd = -1;
-        s->state = 0;
-        s->index = 0;
-        s->packet_len = 0;
+        net_socket_rs_init(&s->rs, net_socket_rs_finalize, false);
         s->nc.link_down = true;
-        memset(s->buf, 0, sizeof(s->buf));
         memset(s->nc.info_str, 0, sizeof(s->nc.info_str));
 
         return;
     }
     buf = buf1;
-    while (size > 0) {
-        /* reassemble a packet from the network */
-        switch(s->state) {
-        case 0:
-            l = 4 - s->index;
-            if (l > size)
-                l = size;
-            memcpy(s->buf + s->index, buf, l);
-            buf += l;
-            size -= l;
-            s->index += l;
-            if (s->index == 4) {
-                /* got length */
-                s->packet_len = ntohl(*(uint32_t *)s->buf);
-                s->index = 0;
-                s->state = 1;
-            }
-            break;
-        case 1:
-            l = s->packet_len - s->index;
-            if (l > size)
-                l = size;
-            if (s->index + l <= sizeof(s->buf)) {
-                memcpy(s->buf + s->index, buf, l);
-            } else {
-                fprintf(stderr, "serious error: oversized packet received,"
-                    "connection terminated.\n");
-                s->state = 0;
-                goto eoc;
-            }
 
-            s->index += l;
-            buf += l;
-            size -= l;
-            if (s->index >= s->packet_len) {
-                qemu_send_packet(&s->nc, s->buf, s->packet_len);
-                s->index = 0;
-                s->state = 0;
-            }
-            break;
-        }
+    ret = net_fill_rstate(&s->rs, buf, size);
+
+    if (ret == -1) {
+        goto eoc;
     }
 }
 
@@ -224,7 +198,7 @@ static void net_socket_send_dgram(void *opaque)
     NetSocketState *s = opaque;
     int size;
 
-    size = qemu_recv(s->fd, s->buf, sizeof(s->buf), 0);
+    size = qemu_recv(s->fd, s->rs.buf, sizeof(s->rs.buf), 0);
     if (size < 0)
         return;
     if (size == 0) {
@@ -233,10 +207,15 @@ static void net_socket_send_dgram(void *opaque)
         net_socket_write_poll(s, false);
         return;
     }
-    qemu_send_packet(&s->nc, s->buf, size);
+    if (qemu_send_packet_async(&s->nc, s->rs.buf, size,
+                               net_socket_send_completed) == 0) {
+        net_socket_read_poll(s, false);
+    }
 }
 
-static int net_socket_mcast_create(struct sockaddr_in *mcastaddr, struct in_addr *localaddr)
+static int net_socket_mcast_create(struct sockaddr_in *mcastaddr,
+                                   struct in_addr *localaddr,
+                                   Error **errp)
 {
     struct ip_mreq imr;
     int fd;
@@ -248,29 +227,36 @@ static int net_socket_mcast_create(struct sockaddr_in *mcastaddr, struct in_addr
 #endif
 
     if (!IN_MULTICAST(ntohl(mcastaddr->sin_addr.s_addr))) {
-        fprintf(stderr, "qemu: error: specified mcastaddr \"%s\" (0x%08x) "
-                "does not contain a multicast address\n",
-                inet_ntoa(mcastaddr->sin_addr),
-                (int)ntohl(mcastaddr->sin_addr.s_addr));
+        error_setg(errp, "specified mcastaddr %s (0x%08x) "
+                   "does not contain a multicast address",
+                   inet_ntoa(mcastaddr->sin_addr),
+                   (int)ntohl(mcastaddr->sin_addr.s_addr));
         return -1;
-
     }
+
     fd = qemu_socket(PF_INET, SOCK_DGRAM, 0);
     if (fd < 0) {
-        perror("socket(PF_INET, SOCK_DGRAM)");
+        error_setg_errno(errp, errno, "can't create datagram socket");
         return -1;
     }
 
+    /* Allow multiple sockets to bind the same multicast ip and port by setting
+     * SO_REUSEADDR. This is the only situation where SO_REUSEADDR should be set
+     * on windows. Use socket_set_fast_reuse otherwise as it sets SO_REUSEADDR
+     * only on posix systems.
+     */
     val = 1;
     ret = qemu_setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val));
     if (ret < 0) {
-        perror("setsockopt(SOL_SOCKET, SO_REUSEADDR)");
+        error_setg_errno(errp, errno,
+                         "can't set socket option SO_REUSEADDR");
         goto fail;
     }
 
     ret = bind(fd, (struct sockaddr *)mcastaddr, sizeof(*mcastaddr));
     if (ret < 0) {
-        perror("bind");
+        error_setg_errno(errp, errno, "can't bind ip=%s to socket",
+                         inet_ntoa(mcastaddr->sin_addr));
         goto fail;
     }
 
@@ -285,7 +271,9 @@ static int net_socket_mcast_create(struct sockaddr_in *mcastaddr, struct in_addr
     ret = qemu_setsockopt(fd, IPPROTO_IP, IP_ADD_MEMBERSHIP,
                           &imr, sizeof(struct ip_mreq));
     if (ret < 0) {
-        perror("setsockopt(IP_ADD_MEMBERSHIP)");
+        error_setg_errno(errp, errno,
+                         "can't add socket to multicast group %s",
+                         inet_ntoa(imr.imr_multiaddr));
         goto fail;
     }
 
@@ -294,7 +282,8 @@ static int net_socket_mcast_create(struct sockaddr_in *mcastaddr, struct in_addr
     ret = qemu_setsockopt(fd, IPPROTO_IP, IP_MULTICAST_LOOP,
                           &loop, sizeof(loop));
     if (ret < 0) {
-        perror("setsockopt(SOL_IP, IP_MULTICAST_LOOP)");
+        error_setg_errno(errp, errno,
+                         "can't force multicast message to loopback");
         goto fail;
     }
 
@@ -303,7 +292,8 @@ static int net_socket_mcast_create(struct sockaddr_in *mcastaddr, struct in_addr
         ret = qemu_setsockopt(fd, IPPROTO_IP, IP_MULTICAST_IF,
                               localaddr, sizeof(*localaddr));
         if (ret < 0) {
-            perror("setsockopt(IP_MULTICAST_IF)");
+            error_setg_errno(errp, errno,
+                             "can't set the default network send interface");
             goto fail;
         }
     }
@@ -333,7 +323,7 @@ static void net_socket_cleanup(NetClientState *nc)
 }
 
 static NetClientInfo net_dgram_socket_info = {
-    .type = NET_CLIENT_OPTIONS_KIND_SOCKET,
+    .type = NET_CLIENT_DRIVER_SOCKET,
     .size = sizeof(NetSocketState),
     .receive = net_socket_receive_dgram,
     .cleanup = net_socket_cleanup,
@@ -342,62 +332,72 @@ static NetClientInfo net_dgram_socket_info = {
 static NetSocketState *net_socket_fd_init_dgram(NetClientState *peer,
                                                 const char *model,
                                                 const char *name,
-                                                int fd, int is_connected)
+                                                int fd, int is_connected,
+                                                const char *mcast,
+                                                Error **errp)
 {
     struct sockaddr_in saddr;
     int newfd;
-    socklen_t saddr_len;
     NetClientState *nc;
     NetSocketState *s;
+    SocketAddress *sa;
+    SocketAddressType sa_type;
+
+    sa = socket_local_address(fd, errp);
+    if (!sa) {
+        return NULL;
+    }
+    sa_type = sa->type;
+    qapi_free_SocketAddress(sa);
 
     /* fd passed: multicast: "learn" dgram_dst address from bound address and save it
      * Because this may be "shared" socket from a "master" process, datagrams would be recv()
      * by ONLY ONE process: we must "clone" this dgram socket --jjo
      */
 
-    if (is_connected) {
-        if (getsockname(fd, (struct sockaddr *) &saddr, &saddr_len) == 0) {
+    if (is_connected && mcast != NULL) {
+            if (parse_host_port(&saddr, mcast, errp) < 0) {
+                goto err;
+            }
             /* must be bound */
             if (saddr.sin_addr.s_addr == 0) {
-                fprintf(stderr, "qemu: error: init_dgram: fd=%d unbound, "
-                        "cannot setup multicast dst addr\n", fd);
+                error_setg(errp, "can't setup multicast destination address");
                 goto err;
             }
             /* clone dgram socket */
-            newfd = net_socket_mcast_create(&saddr, NULL);
+            newfd = net_socket_mcast_create(&saddr, NULL, errp);
             if (newfd < 0) {
-                /* error already reported by net_socket_mcast_create() */
                 goto err;
             }
             /* clone newfd to fd, close newfd */
             dup2(newfd, fd);
             close(newfd);
 
-        } else {
-            fprintf(stderr,
-                    "qemu: error: init_dgram: fd=%d failed getsockname(): %s\n",
-                    fd, strerror(errno));
-            goto err;
-        }
     }
 
     nc = qemu_new_net_client(&net_dgram_socket_info, peer, model, name);
 
-    snprintf(nc->info_str, sizeof(nc->info_str),
-            "socket: fd=%d (%s mcast=%s:%d)",
-            fd, is_connected ? "cloned" : "",
-            inet_ntoa(saddr.sin_addr), ntohs(saddr.sin_port));
-
     s = DO_UPCAST(NetSocketState, nc, nc);
 
     s->fd = fd;
     s->listen_fd = -1;
     s->send_fn = net_socket_send_dgram;
+    net_socket_rs_init(&s->rs, net_socket_rs_finalize, false);
     net_socket_read_poll(s, true);
 
     /* mcast: save bound address as dst */
-    if (is_connected) {
+    if (is_connected && mcast != NULL) {
         s->dgram_dst = saddr;
+        snprintf(nc->info_str, sizeof(nc->info_str),
+                 "socket: fd=%d (cloned mcast=%s:%d)",
+                 fd, inet_ntoa(saddr.sin_addr), ntohs(saddr.sin_port));
+    } else {
+        if (sa_type == SOCKET_ADDRESS_TYPE_UNIX) {
+            s->dgram_dst.sin_family = AF_UNIX;
+        }
+
+        snprintf(nc->info_str, sizeof(nc->info_str),
+                 "socket: fd=%d %s", fd, SocketAddressType_str(sa_type));
     }
 
     return s;
@@ -415,7 +415,7 @@ static void net_socket_connect(void *opaque)
 }
 
 static NetClientInfo net_socket_info = {
-    .type = NET_CLIENT_OPTIONS_KIND_SOCKET,
+    .type = NET_CLIENT_DRIVER_SOCKET,
     .size = sizeof(NetSocketState),
     .receive = net_socket_receive,
     .cleanup = net_socket_cleanup,
@@ -437,6 +437,7 @@ static NetSocketState *net_socket_fd_init_stream(NetClientState *peer,
 
     s->fd = fd;
     s->listen_fd = -1;
+    net_socket_rs_init(&s->rs, net_socket_rs_finalize, false);
 
     /* Disable Nagle algorithm on TCP sockets to reduce latency */
     socket_set_nodelay(fd);
@@ -451,26 +452,27 @@ static NetSocketState *net_socket_fd_init_stream(NetClientState *peer,
 
 static NetSocketState *net_socket_fd_init(NetClientState *peer,
                                           const char *model, const char *name,
-                                          int fd, int is_connected)
+                                          int fd, int is_connected,
+                                          const char *mc, Error **errp)
 {
     int so_type = -1, optlen=sizeof(so_type);
 
     if(getsockopt(fd, SOL_SOCKET, SO_TYPE, (char *)&so_type,
         (socklen_t *)&optlen)< 0) {
-        fprintf(stderr, "qemu: error: getsockopt(SO_TYPE) for fd=%d failed\n",
-                fd);
+        error_setg(errp, "can't get socket option SO_TYPE");
         closesocket(fd);
         return NULL;
     }
     switch(so_type) {
     case SOCK_DGRAM:
-        return net_socket_fd_init_dgram(peer, model, name, fd, is_connected);
+        return net_socket_fd_init_dgram(peer, model, name, fd, is_connected,
+                                        mc, errp);
     case SOCK_STREAM:
         return net_socket_fd_init_stream(peer, model, name, fd, is_connected);
     default:
-        /* who knows ... this could be a eg. a pty, do warn and continue as stream */
-        fprintf(stderr, "qemu: warning: socket type=%d for fd=%d is not SOCK_DGRAM or SOCK_STREAM\n", so_type, fd);
-        return net_socket_fd_init_stream(peer, model, name, fd, is_connected);
+        error_setg(errp, "socket type=%d for fd=%d must be either"
+                   " SOCK_DGRAM or SOCK_STREAM", so_type, fd);
+        closesocket(fd);
     }
     return NULL;
 }
@@ -504,36 +506,37 @@ static void net_socket_accept(void *opaque)
 static int net_socket_listen_init(NetClientState *peer,
                                   const char *model,
                                   const char *name,
-                                  const char *host_str)
+                                  const char *host_str,
+                                  Error **errp)
 {
     NetClientState *nc;
     NetSocketState *s;
     struct sockaddr_in saddr;
-    int fd, val, ret;
+    int fd, ret;
 
-    if (parse_host_port(&saddr, host_str) < 0)
+    if (parse_host_port(&saddr, host_str, errp) < 0) {
         return -1;
+    }
 
     fd = qemu_socket(PF_INET, SOCK_STREAM, 0);
     if (fd < 0) {
-        perror("socket");
+        error_setg_errno(errp, errno, "can't create stream socket");
         return -1;
     }
     qemu_set_nonblock(fd);
 
-    /* allow fast reuse */
-    val = 1;
-    qemu_setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val));
+    socket_set_fast_reuse(fd);
 
     ret = bind(fd, (struct sockaddr *)&saddr, sizeof(saddr));
     if (ret < 0) {
-        perror("bind");
+        error_setg_errno(errp, errno, "can't bind ip=%s to socket",
+                         inet_ntoa(saddr.sin_addr));
         closesocket(fd);
         return -1;
     }
     ret = listen(fd, 0);
     if (ret < 0) {
-        perror("listen");
+        error_setg_errno(errp, errno, "can't listen on socket");
         closesocket(fd);
         return -1;
     }
@@ -543,6 +546,7 @@ static int net_socket_listen_init(NetClientState *peer,
     s->fd = -1;
     s->listen_fd = fd;
     s->nc.link_down = true;
+    net_socket_rs_init(&s->rs, net_socket_rs_finalize, false);
 
     qemu_set_fd_handler(s->listen_fd, net_socket_accept, NULL, s);
     return 0;
@@ -551,18 +555,20 @@ static int net_socket_listen_init(NetClientState *peer,
 static int net_socket_connect_init(NetClientState *peer,
                                    const char *model,
                                    const char *name,
-                                   const char *host_str)
+                                   const char *host_str,
+                                   Error **errp)
 {
     NetSocketState *s;
-    int fd, connected, ret, err;
+    int fd, connected, ret;
     struct sockaddr_in saddr;
 
-    if (parse_host_port(&saddr, host_str) < 0)
+    if (parse_host_port(&saddr, host_str, errp) < 0) {
         return -1;
+    }
 
     fd = qemu_socket(PF_INET, SOCK_STREAM, 0);
     if (fd < 0) {
-        perror("socket");
+        error_setg_errno(errp, errno, "can't create stream socket");
         return -1;
     }
     qemu_set_nonblock(fd);
@@ -571,16 +577,14 @@ static int net_socket_connect_init(NetClientState *peer,
     for(;;) {
         ret = connect(fd, (struct sockaddr *)&saddr, sizeof(saddr));
         if (ret < 0) {
-            err = socket_error();
-            if (err == EINTR || err == EWOULDBLOCK) {
-            } else if (err == EINPROGRESS) {
-                break;
-#ifdef _WIN32
-            } else if (err == WSAEALREADY || err == WSAEINVAL) {
+            if (errno == EINTR || errno == EWOULDBLOCK) {
+                /* continue */
+            } else if (errno == EINPROGRESS ||
+                       errno == EALREADY ||
+                       errno == EINVAL) {
                 break;
-#endif
             } else {
-                perror("connect");
+                error_setg_errno(errp, errno, "can't connect socket");
                 closesocket(fd);
                 return -1;
             }
@@ -589,9 +593,11 @@ static int net_socket_connect_init(NetClientState *peer,
             break;
         }
     }
-    s = net_socket_fd_init(peer, model, name, fd, connected);
-    if (!s)
+    s = net_socket_fd_init(peer, model, name, fd, connected, NULL, errp);
+    if (!s) {
         return -1;
+    }
+
     snprintf(s->nc.info_str, sizeof(s->nc.info_str),
              "socket: connect to %s:%d",
              inet_ntoa(saddr.sin_addr), ntohs(saddr.sin_port));
@@ -602,31 +608,38 @@ static int net_socket_mcast_init(NetClientState *peer,
                                  const char *model,
                                  const char *name,
                                  const char *host_str,
-                                 const char *localaddr_str)
+                                 const char *localaddr_str,
+                                 Error **errp)
 {
     NetSocketState *s;
     int fd;
     struct sockaddr_in saddr;
     struct in_addr localaddr, *param_localaddr;
 
-    if (parse_host_port(&saddr, host_str) < 0)
+    if (parse_host_port(&saddr, host_str, errp) < 0) {
         return -1;
+    }
 
     if (localaddr_str != NULL) {
-        if (inet_aton(localaddr_str, &localaddr) == 0)
+        if (inet_aton(localaddr_str, &localaddr) == 0) {
+            error_setg(errp, "localaddr '%s' is not a valid IPv4 address",
+                       localaddr_str);
             return -1;
+        }
         param_localaddr = &localaddr;
     } else {
         param_localaddr = NULL;
     }
 
-    fd = net_socket_mcast_create(&saddr, param_localaddr);
-    if (fd < 0)
+    fd = net_socket_mcast_create(&saddr, param_localaddr, errp);
+    if (fd < 0) {
         return -1;
+    }
 
-    s = net_socket_fd_init(peer, model, name, fd, 0);
-    if (!s)
+    s = net_socket_fd_init(peer, model, name, fd, 0, NULL, errp);
+    if (!s) {
         return -1;
+    }
 
     s->dgram_dst = saddr;
 
@@ -641,42 +654,44 @@ static int net_socket_udp_init(NetClientState *peer,
                                  const char *model,
                                  const char *name,
                                  const char *rhost,
-                                 const char *lhost)
+                                 const char *lhost,
+                                 Error **errp)
 {
     NetSocketState *s;
-    int fd, val, ret;
+    int fd, ret;
     struct sockaddr_in laddr, raddr;
 
-    if (parse_host_port(&laddr, lhost) < 0) {
+    if (parse_host_port(&laddr, lhost, errp) < 0) {
         return -1;
     }
 
-    if (parse_host_port(&raddr, rhost) < 0) {
+    if (parse_host_port(&raddr, rhost, errp) < 0) {
         return -1;
     }
 
     fd = qemu_socket(PF_INET, SOCK_DGRAM, 0);
     if (fd < 0) {
-        perror("socket(PF_INET, SOCK_DGRAM)");
+        error_setg_errno(errp, errno, "can't create datagram socket");
         return -1;
     }
-    val = 1;
-    ret = qemu_setsockopt(fd, SOL_SOCKET, SO_REUSEADDR,
-                          &val, sizeof(val));
+
+    ret = socket_set_fast_reuse(fd);
     if (ret < 0) {
-        perror("setsockopt(SOL_SOCKET, SO_REUSEADDR)");
+        error_setg_errno(errp, errno,
+                         "can't set socket option SO_REUSEADDR");
         closesocket(fd);
         return -1;
     }
     ret = bind(fd, (struct sockaddr *)&laddr, sizeof(laddr));
     if (ret < 0) {
-        perror("bind");
+        error_setg_errno(errp, errno, "can't bind ip=%s to socket",
+                         inet_ntoa(laddr.sin_addr));
         closesocket(fd);
         return -1;
     }
     qemu_set_nonblock(fd);
 
-    s = net_socket_fd_init(peer, model, name, fd, 0);
+    s = net_socket_fd_init(peer, model, name, fd, 0, NULL, errp);
     if (!s) {
         return -1;
     }
@@ -689,50 +704,52 @@ static int net_socket_udp_init(NetClientState *peer,
     return 0;
 }
 
-int net_init_socket(const NetClientOptions *opts, const char *name,
-                    NetClientState *peer)
+int net_init_socket(const Netdev *netdev, const char *name,
+                    NetClientState *peer, Error **errp)
 {
     const NetdevSocketOptions *sock;
 
-    assert(opts->kind == NET_CLIENT_OPTIONS_KIND_SOCKET);
-    sock = opts->socket;
+    assert(netdev->type == NET_CLIENT_DRIVER_SOCKET);
+    sock = &netdev->u.socket;
 
     if (sock->has_fd + sock->has_listen + sock->has_connect + sock->has_mcast +
         sock->has_udp != 1) {
-        error_report("exactly one of fd=, listen=, connect=, mcast= or udp="
-                     " is required");
+        error_setg(errp, "exactly one of listen=, connect=, mcast= or udp="
+                   " is required");
         return -1;
     }
 
     if (sock->has_localaddr && !sock->has_mcast && !sock->has_udp) {
-        error_report("localaddr= is only valid with mcast= or udp=");
+        error_setg(errp, "localaddr= is only valid with mcast= or udp=");
         return -1;
     }
 
     if (sock->has_fd) {
         int fd;
 
-        fd = monitor_handle_fd_param(cur_mon, sock->fd);
+        fd = monitor_fd_param(cur_mon, sock->fd, errp);
         if (fd == -1) {
             return -1;
         }
         qemu_set_nonblock(fd);
-        if (!net_socket_fd_init(peer, "socket", name, fd, 1)) {
+        if (!net_socket_fd_init(peer, "socket", name, fd, 1, sock->mcast,
+                                errp)) {
             return -1;
         }
         return 0;
     }
 
     if (sock->has_listen) {
-        if (net_socket_listen_init(peer, "socket", name, sock->listen) == -1) {
+        if (net_socket_listen_init(peer, "socket", name, sock->listen, errp)
+            < 0) {
             return -1;
         }
         return 0;
     }
 
     if (sock->has_connect) {
-        if (net_socket_connect_init(peer, "socket", name, sock->connect) ==
-            -1) {
+        if (net_socket_connect_init(peer, "socket", name, sock->connect, errp)
+            < 0) {
             return -1;
         }
         return 0;
@@ -742,7 +759,7 @@ int net_init_socket(const NetClientOptions *opts, const char *name,
         /* if sock->localaddr is missing, it has been initialized to "all bits
          * zero" */
         if (net_socket_mcast_init(peer, "socket", name, sock->mcast,
-            sock->localaddr) == -1) {
+                                  sock->localaddr, errp) < 0) {
             return -1;
         }
         return 0;
@@ -750,11 +767,11 @@ int net_init_socket(const NetClientOptions *opts, const char *name,
 
     assert(sock->has_udp);
     if (!sock->has_localaddr) {
-        error_report("localaddr= is mandatory with udp=");
+        error_setg(errp, "localaddr= is mandatory with udp=");
         return -1;
     }
-    if (net_socket_udp_init(peer, "socket", name, sock->udp, sock->localaddr) ==
-        -1) {
+    if (net_socket_udp_init(peer, "socket", name, sock->udp, sock->localaddr,
+                            errp) < 0) {
         return -1;
     }
     return 0;
diff --git a/net/tap-bsd.c b/net/tap-bsd.c
index f61d58096..a5c3707f8 100644
--- a/net/tap-bsd.c
+++ b/net/tap-bsd.c
@@ -22,19 +22,26 @@
  * THE SOFTWARE.
  */
 
-#include "tap_int.h"
+#include "qemu/osdep.h"
 #include "qemu-common.h"
-#include "sysemu/sysemu.h"
+#include "qapi/error.h"
+#include "tap_int.h"
+#include "qemu/cutils.h"
 #include "qemu/error-report.h"
 
-#ifdef __NetBSD__
+#if defined(__NetBSD__) || defined(__FreeBSD__)
 #include <sys/ioctl.h>
 #include <net/if.h>
 #include <net/if_tap.h>
 #endif
 
+#if defined(__OpenBSD__)
+#include <sys/param.h>
+#endif
+
+#ifndef __FreeBSD__
 int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
-             int vnet_hdr_required, int mq_required)
+             int vnet_hdr_required, int mq_required, Error **errp)
 {
     int fd;
 #ifdef TAPGIFNAME
@@ -44,8 +51,6 @@ int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
     struct stat s;
 #endif
 
-#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || \
-    defined(__OpenBSD__) || defined(__APPLE__)
     /* if no ifname is given, always start the search from tap0/tun0. */
     int i;
     char dname[100];
@@ -54,7 +59,7 @@ int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
         if (*ifname) {
             snprintf(dname, sizeof dname, "/dev/%s", ifname);
         } else {
-#if defined(__OpenBSD__)
+#if defined(__OpenBSD__) && OpenBSD < 201605
             snprintf(dname, sizeof dname, "/dev/tun%d", i);
 #else
             snprintf(dname, sizeof dname, "/dev/tap%d", i);
@@ -72,32 +77,19 @@ int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
         }
     }
     if (fd < 0) {
-        error_report("warning: could not open %s (%s): no virtual network emulation",
-                   dname, strerror(errno));
-        return -1;
-    }
-#else
-    TFR(fd = open("/dev/tap", O_RDWR));
-    if (fd < 0) {
-        fprintf(stderr,
-            "warning: could not open /dev/tap: no virtual network emulation: %s\n",
-            strerror(errno));
+        error_setg_errno(errp, errno, "could not open %s", dname);
         return -1;
     }
-#endif
 
 #ifdef TAPGIFNAME
     if (ioctl(fd, TAPGIFNAME, (void *)&ifr) < 0) {
-        fprintf(stderr, "warning: could not get tap name: %s\n",
-            strerror(errno));
+        error_setg_errno(errp, errno, "could not get tap name");
         return -1;
     }
     pstrcpy(ifname, ifname_size, ifr.ifr_name);
 #else
     if (fstat(fd, &s) < 0) {
-        fprintf(stderr,
-            "warning: could not stat /dev/tap: no virtual network emulation: %s\n",
-            strerror(errno));
+        error_setg_errno(errp, errno, "could not stat %s", dname);
         return -1;
     }
     dev = devname(s.st_rdev, S_IFCHR);
@@ -109,8 +101,8 @@ int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
         *vnet_hdr = 0;
 
         if (vnet_hdr_required && !*vnet_hdr) {
-            error_report("vnet_hdr=1 requested, but no kernel "
-                         "support for IFF_VNET_HDR available");
+            error_setg(errp, "vnet_hdr=1 requested, but no kernel "
+                       "support for IFF_VNET_HDR available");
             close(fd);
             return -1;
         }
@@ -119,9 +111,104 @@ int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
     return fd;
 }
 
-int tap_set_sndbuf(int fd, const NetdevTapOptions *tap)
+#else /* __FreeBSD__ */
+
+#define PATH_NET_TAP "/dev/tap"
+
+static int tap_open_clone(char *ifname, int ifname_size, Error **errp)
+{
+    int fd, s, ret;
+    struct ifreq ifr;
+
+    TFR(fd = open(PATH_NET_TAP, O_RDWR));
+    if (fd < 0) {
+        error_setg_errno(errp, errno, "could not open %s", PATH_NET_TAP);
+        return -1;
+    }
+
+    memset(&ifr, 0, sizeof(ifr));
+
+    ret = ioctl(fd, TAPGIFNAME, (void *)&ifr);
+    if (ret < 0) {
+        error_setg_errno(errp, errno, "could not get tap interface name");
+        close(fd);
+        return -1;
+    }
+
+    if (ifname[0] != '\0') {
+        /* User requested the interface to have a specific name */
+        s = socket(AF_LOCAL, SOCK_DGRAM, 0);
+        if (s < 0) {
+            error_setg_errno(errp, errno,
+                             "could not open socket to set interface name");
+            close(fd);
+            return -1;
+        }
+        ifr.ifr_data = ifname;
+        ret = ioctl(s, SIOCSIFNAME, (void *)&ifr);
+        close(s);
+        if (ret < 0) {
+            error_setg(errp, "could not set tap interface name");
+            close(fd);
+            return -1;
+        }
+    } else {
+        pstrcpy(ifname, ifname_size, ifr.ifr_name);
+    }
+
+    return fd;
+}
+
+int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
+             int vnet_hdr_required, int mq_required, Error **errp)
+{
+    int fd = -1;
+
+    /* If the specified tap device already exists just use it. */
+    if (ifname[0] != '\0') {
+        char dname[100];
+        snprintf(dname, sizeof dname, "/dev/%s", ifname);
+        TFR(fd = open(dname, O_RDWR));
+        if (fd < 0 && errno != ENOENT) {
+            error_setg_errno(errp, errno, "could not open %s", dname);
+            return -1;
+        }
+    }
+
+    if (fd < 0) {
+        /* Tap device not specified or does not exist. */
+        if ((fd = tap_open_clone(ifname, ifname_size, errp)) < 0) {
+            return -1;
+        }
+    }
+
+    if (*vnet_hdr) {
+        /* BSD doesn't have IFF_VNET_HDR */
+        *vnet_hdr = 0;
+
+        if (vnet_hdr_required && !*vnet_hdr) {
+            error_setg(errp, "vnet_hdr=1 requested, but no kernel "
+                       "support for IFF_VNET_HDR available");
+            goto error;
+        }
+    }
+    if (mq_required) {
+        error_setg(errp, "mq_required requested, but no kernel support"
+                   " for IFF_MULTI_QUEUE available");
+        goto error;
+    }
+
+    fcntl(fd, F_SETFL, O_NONBLOCK);
+    return fd;
+
+error:
+    close(fd);
+    return -1;
+}
+#endif /* __FreeBSD__ */
+
+void tap_set_sndbuf(int fd, const NetdevTapOptions *tap, Error **errp)
 {
-    return 0;
 }
 
 int tap_probe_vnet_hdr(int fd)
@@ -143,6 +230,16 @@ void tap_fd_set_vnet_hdr_len(int fd, int len)
 {
 }
 
+int tap_fd_set_vnet_le(int fd, int is_le)
+{
+    return -EINVAL;
+}
+
+int tap_fd_set_vnet_be(int fd, int is_be)
+{
+    return -EINVAL;
+}
+
 void tap_fd_set_offload(int fd, int csum, int tso4,
                         int tso6, int ecn, int ufo)
 {
diff --git a/net/tap-haiku.c b/net/tap-haiku.c
deleted file mode 100644
index e5ce436d2..000000000
--- a/net/tap-haiku.c
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * QEMU System Emulator
- *
- * Copyright (c) 2003-2008 Fabrice Bellard
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#include "tap_int.h"
-#include <stdio.h>
-
-int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
-             int vnet_hdr_required, int mq_required)
-{
-    fprintf(stderr, "no tap on Haiku\n");
-    return -1;
-}
-
-int tap_set_sndbuf(int fd, const NetdevTapOptions *tap)
-{
-    return 0;
-}
-
-int tap_probe_vnet_hdr(int fd)
-{
-    return 0;
-}
-
-int tap_probe_has_ufo(int fd)
-{
-    return 0;
-}
-
-int tap_probe_vnet_hdr_len(int fd, int len)
-{
-    return 0;
-}
-
-void tap_fd_set_vnet_hdr_len(int fd, int len)
-{
-}
-
-void tap_fd_set_offload(int fd, int csum, int tso4,
-                        int tso6, int ecn, int ufo)
-{
-}
-
-int tap_fd_enable(int fd)
-{
-    return -1;
-}
-
-int tap_fd_disable(int fd)
-{
-    return -1;
-}
-
-int tap_fd_get_ifname(int fd, char *ifname)
-{
-    return -1;
-}
diff --git a/net/tap-linux.c b/net/tap-linux.c
index 36c09e24d..e0dd442ee 100644
--- a/net/tap-linux.c
+++ b/net/tap-linux.c
@@ -23,6 +23,8 @@
  * THE SOFTWARE.
  */
 
+#include "qemu/osdep.h"
+#include "qemu-common.h"
 #include "tap_int.h"
 #include "tap-linux.h"
 #include "net/tap.h"
@@ -30,14 +32,14 @@
 #include <net/if.h>
 #include <sys/ioctl.h>
 
-#include "sysemu/sysemu.h"
-#include "qemu-common.h"
+#include "qapi/error.h"
 #include "qemu/error-report.h"
+#include "qemu/cutils.h"
 
 #define PATH_NET_TUN "/dev/net/tun"
 
 int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
-             int vnet_hdr_required, int mq_required)
+             int vnet_hdr_required, int mq_required, Error **errp)
 {
     struct ifreq ifr;
     int fd, ret;
@@ -46,20 +48,23 @@ int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
 
     TFR(fd = open(PATH_NET_TUN, O_RDWR));
     if (fd < 0) {
-        error_report("could not open %s: %m", PATH_NET_TUN);
+        error_setg_errno(errp, errno, "could not open %s", PATH_NET_TUN);
         return -1;
     }
     memset(&ifr, 0, sizeof(ifr));
     ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
 
-    if (ioctl(fd, TUNGETFEATURES, &features) == 0 &&
-        features & IFF_ONE_QUEUE) {
+    if (ioctl(fd, TUNGETFEATURES, &features) == -1) {
+        warn_report("TUNGETFEATURES failed: %s", strerror(errno));
+        features = 0;
+    }
+
+    if (features & IFF_ONE_QUEUE) {
         ifr.ifr_flags |= IFF_ONE_QUEUE;
     }
 
     if (*vnet_hdr) {
-        if (ioctl(fd, TUNGETFEATURES, &features) == 0 &&
-            features & IFF_VNET_HDR) {
+        if (features & IFF_VNET_HDR) {
             *vnet_hdr = 1;
             ifr.ifr_flags |= IFF_VNET_HDR;
         } else {
@@ -67,8 +72,8 @@ int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
         }
 
         if (vnet_hdr_required && !*vnet_hdr) {
-            error_report("vnet_hdr=1 requested, but no kernel "
-                         "support for IFF_VNET_HDR available");
+            error_setg(errp, "vnet_hdr=1 requested, but no kernel "
+                       "support for IFF_VNET_HDR available");
             close(fd);
             return -1;
         }
@@ -82,10 +87,9 @@ int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
     }
 
     if (mq_required) {
-        if ((ioctl(fd, TUNGETFEATURES, &features) != 0) ||
-            !(features & IFF_MULTI_QUEUE)) {
-            error_report("multiqueue required, but no kernel "
-                         "support for IFF_MULTI_QUEUE available");
+        if (!(features & IFF_MULTI_QUEUE)) {
+            error_setg(errp, "multiqueue required, but no kernel "
+                       "support for IFF_MULTI_QUEUE available");
             close(fd);
             return -1;
         } else {
@@ -100,9 +104,11 @@ int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
     ret = ioctl(fd, TUNSETIFF, (void *) &ifr);
     if (ret != 0) {
         if (ifname[0] != '\0') {
-            error_report("could not configure %s (%s): %m", PATH_NET_TUN, ifr.ifr_name);
+            error_setg_errno(errp, errno, "could not configure %s (%s)",
+                             PATH_NET_TUN, ifr.ifr_name);
         } else {
-            error_report("could not configure %s: %m", PATH_NET_TUN);
+            error_setg_errno(errp, errno, "could not configure %s",
+                             PATH_NET_TUN);
         }
         close(fd);
         return -1;
@@ -124,7 +130,7 @@ int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
  */
 #define TAP_DEFAULT_SNDBUF 0
 
-int tap_set_sndbuf(int fd, const NetdevTapOptions *tap)
+void tap_set_sndbuf(int fd, const NetdevTapOptions *tap, Error **errp)
 {
     int sndbuf;
 
@@ -137,10 +143,8 @@ int tap_set_sndbuf(int fd, const NetdevTapOptions *tap)
     }
 
     if (ioctl(fd, TUNSETSNDBUF, &sndbuf) == -1 && tap->has_sndbuf) {
-        error_report("TUNSETSNDBUF ioctl failed: %s", strerror(errno));
-        return -1;
+        error_setg_errno(errp, errno, "TUNSETSNDBUF ioctl failed");
     }
-    return 0;
 }
 
 int tap_probe_vnet_hdr(int fd)
@@ -196,6 +200,40 @@ void tap_fd_set_vnet_hdr_len(int fd, int len)
     }
 }
 
+int tap_fd_set_vnet_le(int fd, int is_le)
+{
+    int arg = is_le ? 1 : 0;
+
+    if (!ioctl(fd, TUNSETVNETLE, &arg)) {
+        return 0;
+    }
+
+    /* Check if our kernel supports TUNSETVNETLE */
+    if (errno == EINVAL) {
+        return -errno;
+    }
+
+    error_report("TUNSETVNETLE ioctl() failed: %s.", strerror(errno));
+    abort();
+}
+
+int tap_fd_set_vnet_be(int fd, int is_be)
+{
+    int arg = is_be ? 1 : 0;
+
+    if (!ioctl(fd, TUNSETVNETBE, &arg)) {
+        return 0;
+    }
+
+    /* Check if our kernel supports TUNSETVNETBE */
+    if (errno == EINVAL) {
+        return -errno;
+    }
+
+    error_report("TUNSETVNETBE ioctl() failed: %s.", strerror(errno));
+    abort();
+}
+
 void tap_fd_set_offload(int fd, int csum, int tso4,
                         int tso6, int ecn, int ufo)
 {
diff --git a/net/tap-linux.h b/net/tap-linux.h
index 1cf35d41b..2f36d100f 100644
--- a/net/tap-linux.h
+++ b/net/tap-linux.h
@@ -16,7 +16,6 @@
 #ifndef QEMU_TAP_LINUX_H
 #define QEMU_TAP_LINUX_H
 
-#include <stdint.h>
 #ifdef __linux__
 
 #include <linux/ioctl.h>
@@ -30,6 +29,8 @@
 #define TUNGETVNETHDRSZ _IOR('T', 215, int)
 #define TUNSETVNETHDRSZ _IOW('T', 216, int)
 #define TUNSETQUEUE  _IOW('T', 217, int)
+#define TUNSETVNETLE _IOW('T', 220, int)
+#define TUNSETVNETBE _IOW('T', 222, int)
 
 #endif
 
@@ -49,4 +50,4 @@
 #define TUN_F_TSO_ECN	0x08	/* I can handle TSO with ECN bits. */
 #define TUN_F_UFO	0x10	/* I can handle UFO packets */
 
-#endif /* QEMU_TAP_H */
+#endif /* QEMU_TAP_LINUX_H */
diff --git a/net/tap-solaris.c b/net/tap-solaris.c
index 9c7278f1b..4725d2314 100644
--- a/net/tap-solaris.c
+++ b/net/tap-solaris.c
@@ -22,10 +22,12 @@
  * THE SOFTWARE.
  */
 
+#include "qemu/osdep.h"
+#include "qapi/error.h"
 #include "tap_int.h"
-#include "sysemu/sysemu.h"
+#include "qemu/ctype.h"
+#include "qemu/cutils.h"
 
-#include <sys/stat.h>
 #include <sys/ethernet.h>
 #include <sys/sockio.h>
 #include <netinet/arp.h>
@@ -36,7 +38,6 @@
 #include <netinet/udp.h>
 #include <netinet/tcp.h>
 #include <net/if.h>
-#include <syslog.h>
 #include <stropts.h>
 #include "qemu/error-report.h"
 
@@ -56,8 +57,10 @@ ssize_t tap_read_packet(int tapfd, uint8_t *buf, int maxlen)
  * Allocate TAP device, returns opened fd.
  * Stores dev name in the first arg(must be large enough).
  */
-static int tap_alloc(char *dev, size_t dev_size)
+static int tap_alloc(char *dev, size_t dev_size, Error **errp)
 {
+    /* FIXME leaks like a sieve on error paths */
+    /* FIXME suspicious: many errors are reported, then ignored */
     int tap_fd, if_fd, ppa = -1;
     static int ip_fd = 0;
     char *ptr;
@@ -83,14 +86,14 @@ static int tap_alloc(char *dev, size_t dev_size)
 
     TFR(ip_fd = open("/dev/udp", O_RDWR, 0));
     if (ip_fd < 0) {
-       syslog(LOG_ERR, "Can't open /dev/ip (actually /dev/udp)");
-       return -1;
+        error_setg(errp, "Can't open /dev/ip (actually /dev/udp)");
+        return -1;
     }
 
     TFR(tap_fd = open("/dev/tap", O_RDWR, 0));
     if (tap_fd < 0) {
-       syslog(LOG_ERR, "Can't open /dev/tap");
-       return -1;
+        error_setg(errp, "Can't open /dev/tap");
+        return -1;
     }
 
     /* Assign a new PPA and get its unit number. */
@@ -99,20 +102,20 @@ static int tap_alloc(char *dev, size_t dev_size)
     strioc_ppa.ic_len = sizeof(ppa);
     strioc_ppa.ic_dp = (char *)&ppa;
     if ((ppa = ioctl (tap_fd, I_STR, &strioc_ppa)) < 0)
-       syslog (LOG_ERR, "Can't assign new interface");
+        error_report("Can't assign new interface");
 
     TFR(if_fd = open("/dev/tap", O_RDWR, 0));
     if (if_fd < 0) {
-       syslog(LOG_ERR, "Can't open /dev/tap (2)");
-       return -1;
+        error_setg(errp, "Can't open /dev/tap (2)");
+        return -1;
     }
     if(ioctl(if_fd, I_PUSH, "ip") < 0){
-       syslog(LOG_ERR, "Can't push IP module");
-       return -1;
+        error_setg(errp, "Can't push IP module");
+        return -1;
     }
 
     if (ioctl(if_fd, SIOCGLIFFLAGS, &ifr) < 0)
-	syslog(LOG_ERR, "Can't get flags\n");
+        error_report("Can't get flags");
 
     snprintf (actual_name, 32, "tap%d", ppa);
     pstrcpy(ifr.lifr_name, sizeof(ifr.lifr_name), actual_name);
@@ -121,22 +124,22 @@ static int tap_alloc(char *dev, size_t dev_size)
     /* Assign ppa according to the unit number returned by tun device */
 
     if (ioctl (if_fd, SIOCSLIFNAME, &ifr) < 0)
-        syslog (LOG_ERR, "Can't set PPA %d", ppa);
+        error_report("Can't set PPA %d", ppa);
     if (ioctl(if_fd, SIOCGLIFFLAGS, &ifr) <0)
-        syslog (LOG_ERR, "Can't get flags\n");
+        error_report("Can't get flags");
     /* Push arp module to if_fd */
     if (ioctl (if_fd, I_PUSH, "arp") < 0)
-        syslog (LOG_ERR, "Can't push ARP module (2)");
+        error_report("Can't push ARP module (2)");
 
     /* Push arp module to ip_fd */
     if (ioctl (ip_fd, I_POP, NULL) < 0)
-        syslog (LOG_ERR, "I_POP failed\n");
+        error_report("I_POP failed");
     if (ioctl (ip_fd, I_PUSH, "arp") < 0)
-        syslog (LOG_ERR, "Can't push ARP module (3)\n");
+        error_report("Can't push ARP module (3)");
     /* Open arp_fd */
     TFR(arp_fd = open ("/dev/tap", O_RDWR, 0));
     if (arp_fd < 0)
-       syslog (LOG_ERR, "Can't open %s\n", "/dev/tap");
+        error_report("Can't open %s", "/dev/tap");
 
     /* Set ifname to arp */
     strioc_if.ic_cmd = SIOCSLIFNAME;
@@ -144,16 +147,16 @@ static int tap_alloc(char *dev, size_t dev_size)
     strioc_if.ic_len = sizeof(ifr);
     strioc_if.ic_dp = (char *)&ifr;
     if (ioctl(arp_fd, I_STR, &strioc_if) < 0){
-        syslog (LOG_ERR, "Can't set ifname to arp\n");
+        error_report("Can't set ifname to arp");
     }
 
     if((ip_muxid = ioctl(ip_fd, I_LINK, if_fd)) < 0){
-       syslog(LOG_ERR, "Can't link TAP device to IP");
-       return -1;
+        error_setg(errp, "Can't link TAP device to IP");
+        return -1;
     }
 
     if ((arp_muxid = ioctl (ip_fd, link_type, arp_fd)) < 0)
-        syslog (LOG_ERR, "Can't link TAP device to ARP");
+        error_report("Can't link TAP device to ARP");
 
     close (if_fd);
 
@@ -166,7 +169,7 @@ static int tap_alloc(char *dev, size_t dev_size)
     {
       ioctl (ip_fd, I_PUNLINK , arp_muxid);
       ioctl (ip_fd, I_PUNLINK, ip_muxid);
-      syslog (LOG_ERR, "Can't set multiplexor id");
+      error_report("Can't set multiplexor id");
     }
 
     snprintf(dev, dev_size, "tap%d", ppa);
@@ -174,13 +177,14 @@ static int tap_alloc(char *dev, size_t dev_size)
 }
 
 int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
-             int vnet_hdr_required, int mq_required)
+             int vnet_hdr_required, int mq_required, Error **errp)
 {
     char  dev[10]="";
     int fd;
-    if( (fd = tap_alloc(dev, sizeof(dev))) < 0 ){
-       fprintf(stderr, "Cannot allocate TAP device\n");
-       return -1;
+
+    fd = tap_alloc(dev, sizeof(dev), errp);
+    if (fd < 0) {
+        return -1;
     }
     pstrcpy(ifname, ifname_size, dev);
     if (*vnet_hdr) {
@@ -188,8 +192,8 @@ int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
         *vnet_hdr = 0;
 
         if (vnet_hdr_required && !*vnet_hdr) {
-            error_report("vnet_hdr=1 requested, but no kernel "
-                         "support for IFF_VNET_HDR available");
+            error_setg(errp, "vnet_hdr=1 requested, but no kernel "
+                       "support for IFF_VNET_HDR available");
             close(fd);
             return -1;
         }
@@ -198,9 +202,8 @@ int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
     return fd;
 }
 
-int tap_set_sndbuf(int fd, const NetdevTapOptions *tap)
+void tap_set_sndbuf(int fd, const NetdevTapOptions *tap, Error **errp)
 {
-    return 0;
 }
 
 int tap_probe_vnet_hdr(int fd)
@@ -222,6 +225,16 @@ void tap_fd_set_vnet_hdr_len(int fd, int len)
 {
 }
 
+int tap_fd_set_vnet_le(int fd, int is_le)
+{
+    return -EINVAL;
+}
+
+int tap_fd_set_vnet_be(int fd, int is_be)
+{
+    return -EINVAL;
+}
+
 void tap_fd_set_offload(int fd, int csum, int tso4,
                         int tso6, int ecn, int ufo)
 {
diff --git a/net/tap-aix.c b/net/tap-stub.c
index 804d16448..a9ab8f829 100644
--- a/net/tap-aix.c
+++ b/net/tap-stub.c
@@ -22,19 +22,19 @@
  * THE SOFTWARE.
  */
 
+#include "qemu/osdep.h"
+#include "qapi/error.h"
 #include "tap_int.h"
-#include <stdio.h>
 
 int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
-             int vnet_hdr_required, int mq_required)
+             int vnet_hdr_required, int mq_required, Error **errp)
 {
-    fprintf(stderr, "no tap on AIX\n");
+    error_setg(errp, "tap is not supported in this build");
     return -1;
 }
 
-int tap_set_sndbuf(int fd, const NetdevTapOptions *tap)
+void tap_set_sndbuf(int fd, const NetdevTapOptions *tap, Error **errp)
 {
-    return 0;
 }
 
 int tap_probe_vnet_hdr(int fd)
@@ -56,6 +56,16 @@ void tap_fd_set_vnet_hdr_len(int fd, int len)
 {
 }
 
+int tap_fd_set_vnet_le(int fd, int is_le)
+{
+    return -EINVAL;
+}
+
+int tap_fd_set_vnet_be(int fd, int is_be)
+{
+    return -EINVAL;
+}
+
 void tap_fd_set_offload(int fd, int csum, int tso4,
                         int tso6, int ecn, int ufo)
 {
@@ -75,4 +85,3 @@ int tap_fd_get_ifname(int fd, char *ifname)
 {
     return -1;
 }
-
diff --git a/net/tap-win32.c b/net/tap-win32.c
index 91e9e844a..2b5dcda36 100644
--- a/net/tap-win32.c
+++ b/net/tap-win32.c
@@ -26,15 +26,15 @@
  *  distribution); if not, see <http://www.gnu.org/licenses/>.
  */
 
+#include "qemu/osdep.h"
 #include "tap_int.h"
 
 #include "qemu-common.h"
 #include "clients.h"            /* net_init_tap */
 #include "net/net.h"
 #include "net/tap.h"            /* tap_has_ufo, ... */
-#include "sysemu/sysemu.h"
 #include "qemu/error-report.h"
-#include <stdio.h>
+#include "qemu/main-loop.h"
 #include <windows.h>
 #include <winioctl.h>
 
@@ -77,7 +77,12 @@
 
 //#define DEBUG_TAP_WIN32
 
-#define TUN_ASYNCHRONOUS_WRITES 1
+/* FIXME: The asynch write path appears to be broken at
+ * present. WriteFile() ignores the lpNumberOfBytesWritten parameter
+ * for overlapped writes, with the result we return zero bytes sent,
+ * and after handling a single packet, receive is disabled for this
+ * interface. */
+/* #define TUN_ASYNCHRONOUS_WRITES 1 */
 
 #define TUN_BUFFER_SIZE 1560
 #define TUN_MAX_BUFFER_COUNT 32
@@ -356,7 +361,8 @@ static int get_device_guid(
                 &len);
 
             if (status != ERROR_SUCCESS || name_type != REG_SZ) {
-                    return -1;
+                ++i;
+                continue;
             }
             else {
                 if (is_tap_win32_dev(enum_name)) {
@@ -460,27 +466,48 @@ static int tap_win32_write(tap_win32_overlapped_t *overlapped,
     BOOL result;
     DWORD error;
 
+#ifdef TUN_ASYNCHRONOUS_WRITES
     result = GetOverlappedResult( overlapped->handle, &overlapped->write_overlapped,
                                   &write_size, FALSE);
 
     if (!result && GetLastError() == ERROR_IO_INCOMPLETE)
         WaitForSingleObject(overlapped->write_event, INFINITE);
+#endif
 
     result = WriteFile(overlapped->handle, buffer, size,
                        &write_size, &overlapped->write_overlapped);
 
+#ifdef TUN_ASYNCHRONOUS_WRITES
+    /* FIXME: we can't sensibly set write_size here, without waiting
+     * for the IO to complete! Moreover, we can't return zero,
+     * because that will disable receive on this interface, and we
+     * also can't assume it will succeed and return the full size,
+     * because that will result in the buffer being reclaimed while
+     * the IO is in progress. */
+#error Async writes are broken. Please disable TUN_ASYNCHRONOUS_WRITES.
+#else /* !TUN_ASYNCHRONOUS_WRITES */
     if (!result) {
-        switch (error = GetLastError())
-        {
-        case ERROR_IO_PENDING:
-#ifndef TUN_ASYNCHRONOUS_WRITES
-            WaitForSingleObject(overlapped->write_event, INFINITE);
-#endif
-            break;
-        default:
-            return -1;
+        error = GetLastError();
+        if (error == ERROR_IO_PENDING) {
+            result = GetOverlappedResult(overlapped->handle,
+                                         &overlapped->write_overlapped,
+                                         &write_size, TRUE);
         }
     }
+#endif
+
+    if (!result) {
+#ifdef DEBUG_TAP_WIN32
+        LPTSTR msgbuf;
+        error = GetLastError();
+        FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER|FORMAT_MESSAGE_FROM_SYSTEM,
+                      NULL, error, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
+                      &msgbuf, 0, NULL);
+        fprintf(stderr, "Tap-Win32: Error WriteFile %d - %s\n", error, msgbuf);
+        LocalFree(msgbuf);
+#endif
+        return 0;
+    }
 
     return write_size;
 }
@@ -669,11 +696,70 @@ static void tap_win32_send(void *opaque)
     }
 }
 
+static bool tap_has_ufo(NetClientState *nc)
+{
+    return false;
+}
+
+static bool tap_has_vnet_hdr(NetClientState *nc)
+{
+    return false;
+}
+
+int tap_probe_vnet_hdr_len(int fd, int len)
+{
+    return 0;
+}
+
+void tap_fd_set_vnet_hdr_len(int fd, int len)
+{
+}
+
+int tap_fd_set_vnet_le(int fd, int is_le)
+{
+    return -EINVAL;
+}
+
+int tap_fd_set_vnet_be(int fd, int is_be)
+{
+    return -EINVAL;
+}
+
+static void tap_using_vnet_hdr(NetClientState *nc, bool using_vnet_hdr)
+{
+}
+
+static void tap_set_offload(NetClientState *nc, int csum, int tso4,
+                     int tso6, int ecn, int ufo)
+{
+}
+
+struct vhost_net *tap_get_vhost_net(NetClientState *nc)
+{
+    return NULL;
+}
+
+static bool tap_has_vnet_hdr_len(NetClientState *nc, int len)
+{
+    return false;
+}
+
+static void tap_set_vnet_hdr_len(NetClientState *nc, int len)
+{
+    abort();
+}
+
 static NetClientInfo net_tap_win32_info = {
-    .type = NET_CLIENT_OPTIONS_KIND_TAP,
+    .type = NET_CLIENT_DRIVER_TAP,
     .size = sizeof(TAPState),
     .receive = tap_receive,
     .cleanup = tap_cleanup,
+    .has_ufo = tap_has_ufo,
+    .has_vnet_hdr = tap_has_vnet_hdr,
+    .has_vnet_hdr_len = tap_has_vnet_hdr_len,
+    .using_vnet_hdr = tap_using_vnet_hdr,
+    .set_offload = tap_set_offload,
+    .set_vnet_hdr_len = tap_set_vnet_hdr_len,
 };
 
 static int tap_win32_init(NetClientState *peer, const char *model,
@@ -702,13 +788,14 @@ static int tap_win32_init(NetClientState *peer, const char *model,
     return 0;
 }
 
-int net_init_tap(const NetClientOptions *opts, const char *name,
-                 NetClientState *peer)
+int net_init_tap(const Netdev *netdev, const char *name,
+                 NetClientState *peer, Error **errp)
 {
+    /* FIXME error_setg(errp, ...) on failure */
     const NetdevTapOptions *tap;
 
-    assert(opts->kind == NET_CLIENT_OPTIONS_KIND_TAP);
-    tap = opts->tap;
+    assert(netdev->type == NET_CLIENT_DRIVER_TAP);
+    tap = &netdev->u.tap;
 
     if (!tap->has_ifname) {
         error_report("tap: no interface name");
@@ -722,49 +809,6 @@ int net_init_tap(const NetClientOptions *opts, const char *name,
     return 0;
 }
 
-bool tap_has_ufo(NetClientState *nc)
-{
-    return false;
-}
-
-int tap_has_vnet_hdr(NetClientState *nc)
-{
-    return 0;
-}
-
-int tap_probe_vnet_hdr_len(int fd, int len)
-{
-    return 0;
-}
-
-void tap_fd_set_vnet_hdr_len(int fd, int len)
-{
-}
-
-void tap_using_vnet_hdr(NetClientState *nc, bool using_vnet_hdr)
-{
-}
-
-void tap_set_offload(NetClientState *nc, int csum, int tso4,
-                     int tso6, int ecn, int ufo)
-{
-}
-
-struct vhost_net *tap_get_vhost_net(NetClientState *nc)
-{
-    return NULL;
-}
-
-int tap_has_vnet_hdr_len(NetClientState *nc, int len)
-{
-    return 0;
-}
-
-void tap_set_vnet_hdr_len(NetClientState *nc, int len)
-{
-    abort();
-}
-
 int tap_enable(NetClientState *nc)
 {
     abort();
diff --git a/net/tap.c b/net/tap.c
index 39c1cda3e..6207f61f8 100644
--- a/net/tap.c
+++ b/net/tap.c
@@ -23,12 +23,11 @@
  * THE SOFTWARE.
  */
 
+#include "qemu/osdep.h"
 #include "tap_int.h"
 
-#include "config-host.h"
 
 #include <sys/ioctl.h>
-#include <sys/stat.h>
 #include <sys/wait.h>
 #include <sys/socket.h>
 #include <net/if.h>
@@ -37,8 +36,12 @@
 #include "clients.h"
 #include "monitor/monitor.h"
 #include "sysemu/sysemu.h"
+#include "qapi/error.h"
 #include "qemu-common.h"
+#include "qemu/cutils.h"
 #include "qemu/error-report.h"
+#include "qemu/main-loop.h"
+#include "qemu/sockets.h"
 
 #include "net/tap.h"
 
@@ -57,21 +60,21 @@ typedef struct TAPState {
     bool enabled;
     VHostNetState *vhost_net;
     unsigned host_vnet_hdr_len;
+    Notifier exit;
 } TAPState;
 
-static int launch_script(const char *setup_script, const char *ifname, int fd);
+static void launch_script(const char *setup_script, const char *ifname,
+                          int fd, Error **errp);
 
-static int tap_can_send(void *opaque);
 static void tap_send(void *opaque);
 static void tap_writable(void *opaque);
 
 static void tap_update_fd_handler(TAPState *s)
 {
-    qemu_set_fd_handler2(s->fd,
-                         s->read_poll && s->enabled ? tap_can_send : NULL,
-                         s->read_poll && s->enabled ? tap_send     : NULL,
-                         s->write_poll && s->enabled ? tap_writable : NULL,
-                         s);
+    qemu_set_fd_handler(s->fd,
+                        s->read_poll && s->enabled ? tap_send : NULL,
+                        s->write_poll && s->enabled ? tap_writable : NULL,
+                        s);
 }
 
 static void tap_read_poll(TAPState *s, bool enable)
@@ -165,13 +168,6 @@ static ssize_t tap_receive(NetClientState *nc, const uint8_t *buf, size_t size)
     return tap_write_packet(s, iov, 1);
 }
 
-static int tap_can_send(void *opaque)
-{
-    TAPState *s = opaque;
-
-    return qemu_can_send_packet(&s->nc);
-}
-
 #ifndef __sun__
 ssize_t tap_read_packet(int tapfd, uint8_t *buf, int maxlen)
 {
@@ -189,8 +185,9 @@ static void tap_send(void *opaque)
 {
     TAPState *s = opaque;
     int size;
+    int packets = 0;
 
-    do {
+    while (true) {
         uint8_t *buf = s->buf;
 
         size = tap_read_packet(s->fd, s->buf, sizeof(s->buf));
@@ -206,42 +203,56 @@ static void tap_send(void *opaque)
         size = qemu_send_packet_async(&s->nc, buf, size, tap_send_completed);
         if (size == 0) {
             tap_read_poll(s, false);
+            break;
+        } else if (size < 0) {
+            break;
+        }
+
+        /*
+         * When the host keeps receiving more packets while tap_send() is
+         * running we can hog the QEMU global mutex.  Limit the number of
+         * packets that are processed per tap_send() callback to prevent
+         * stalling the guest.
+         */
+        packets++;
+        if (packets >= 50) {
+            break;
         }
-    } while (size > 0 && qemu_can_send_packet(&s->nc));
+    }
 }
 
-bool tap_has_ufo(NetClientState *nc)
+static bool tap_has_ufo(NetClientState *nc)
 {
     TAPState *s = DO_UPCAST(TAPState, nc, nc);
 
-    assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP);
+    assert(nc->info->type == NET_CLIENT_DRIVER_TAP);
 
     return s->has_ufo;
 }
 
-int tap_has_vnet_hdr(NetClientState *nc)
+static bool tap_has_vnet_hdr(NetClientState *nc)
 {
     TAPState *s = DO_UPCAST(TAPState, nc, nc);
 
-    assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP);
+    assert(nc->info->type == NET_CLIENT_DRIVER_TAP);
 
     return !!s->host_vnet_hdr_len;
 }
 
-int tap_has_vnet_hdr_len(NetClientState *nc, int len)
+static bool tap_has_vnet_hdr_len(NetClientState *nc, int len)
 {
     TAPState *s = DO_UPCAST(TAPState, nc, nc);
 
-    assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP);
+    assert(nc->info->type == NET_CLIENT_DRIVER_TAP);
 
-    return tap_probe_vnet_hdr_len(s->fd, len);
+    return !!tap_probe_vnet_hdr_len(s->fd, len);
 }
 
-void tap_set_vnet_hdr_len(NetClientState *nc, int len)
+static void tap_set_vnet_hdr_len(NetClientState *nc, int len)
 {
     TAPState *s = DO_UPCAST(TAPState, nc, nc);
 
-    assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP);
+    assert(nc->info->type == NET_CLIENT_DRIVER_TAP);
     assert(len == sizeof(struct virtio_net_hdr_mrg_rxbuf) ||
            len == sizeof(struct virtio_net_hdr));
 
@@ -249,17 +260,31 @@ void tap_set_vnet_hdr_len(NetClientState *nc, int len)
     s->host_vnet_hdr_len = len;
 }
 
-void tap_using_vnet_hdr(NetClientState *nc, bool using_vnet_hdr)
+static void tap_using_vnet_hdr(NetClientState *nc, bool using_vnet_hdr)
 {
     TAPState *s = DO_UPCAST(TAPState, nc, nc);
 
-    assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP);
+    assert(nc->info->type == NET_CLIENT_DRIVER_TAP);
     assert(!!s->host_vnet_hdr_len == using_vnet_hdr);
 
     s->using_vnet_hdr = using_vnet_hdr;
 }
 
-void tap_set_offload(NetClientState *nc, int csum, int tso4,
+static int tap_set_vnet_le(NetClientState *nc, bool is_le)
+{
+    TAPState *s = DO_UPCAST(TAPState, nc, nc);
+
+    return tap_fd_set_vnet_le(s->fd, is_le);
+}
+
+static int tap_set_vnet_be(NetClientState *nc, bool is_be)
+{
+    TAPState *s = DO_UPCAST(TAPState, nc, nc);
+
+    return tap_fd_set_vnet_be(s->fd, is_be);
+}
+
+static void tap_set_offload(NetClientState *nc, int csum, int tso4,
                      int tso6, int ecn, int ufo)
 {
     TAPState *s = DO_UPCAST(TAPState, nc, nc);
@@ -270,19 +295,33 @@ void tap_set_offload(NetClientState *nc, int csum, int tso4,
     tap_fd_set_offload(s->fd, csum, tso4, tso6, ecn, ufo);
 }
 
+static void tap_exit_notify(Notifier *notifier, void *data)
+{
+    TAPState *s = container_of(notifier, TAPState, exit);
+    Error *err = NULL;
+
+    if (s->down_script[0]) {
+        launch_script(s->down_script, s->down_script_arg, s->fd, &err);
+        if (err) {
+            error_report_err(err);
+        }
+    }
+}
+
 static void tap_cleanup(NetClientState *nc)
 {
     TAPState *s = DO_UPCAST(TAPState, nc, nc);
 
     if (s->vhost_net) {
         vhost_net_cleanup(s->vhost_net);
+        g_free(s->vhost_net);
         s->vhost_net = NULL;
     }
 
     qemu_purge_queued_packets(nc);
 
-    if (s->down_script[0])
-        launch_script(s->down_script, s->down_script_arg, s->fd);
+    tap_exit_notify(&s->exit, NULL);
+    qemu_remove_exit_notifier(&s->exit);
 
     tap_read_poll(s, false);
     tap_write_poll(s, false);
@@ -300,20 +339,28 @@ static void tap_poll(NetClientState *nc, bool enable)
 int tap_get_fd(NetClientState *nc)
 {
     TAPState *s = DO_UPCAST(TAPState, nc, nc);
-    assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP);
+    assert(nc->info->type == NET_CLIENT_DRIVER_TAP);
     return s->fd;
 }
 
 /* fd support */
 
 static NetClientInfo net_tap_info = {
-    .type = NET_CLIENT_OPTIONS_KIND_TAP,
+    .type = NET_CLIENT_DRIVER_TAP,
     .size = sizeof(TAPState),
     .receive = tap_receive,
     .receive_raw = tap_receive_raw,
     .receive_iov = tap_receive_iov,
     .poll = tap_poll,
     .cleanup = tap_cleanup,
+    .has_ufo = tap_has_ufo,
+    .has_vnet_hdr = tap_has_vnet_hdr,
+    .has_vnet_hdr_len = tap_has_vnet_hdr_len,
+    .using_vnet_hdr = tap_using_vnet_hdr,
+    .set_offload = tap_set_offload,
+    .set_vnet_hdr_len = tap_set_vnet_hdr_len,
+    .set_vnet_le = tap_set_vnet_le,
+    .set_vnet_be = tap_set_vnet_be,
 };
 
 static TAPState *net_tap_fd_init(NetClientState *peer,
@@ -344,10 +391,15 @@ static TAPState *net_tap_fd_init(NetClientState *peer,
     }
     tap_read_poll(s, true);
     s->vhost_net = NULL;
+
+    s->exit.notify = tap_exit_notify;
+    qemu_add_exit_notifier(&s->exit);
+
     return s;
 }
 
-static int launch_script(const char *setup_script, const char *ifname, int fd)
+static void launch_script(const char *setup_script, const char *ifname,
+                          int fd, Error **errp)
 {
     int pid, status;
     char *args[3];
@@ -355,14 +407,16 @@ static int launch_script(const char *setup_script, const char *ifname, int fd)
 
     /* try to launch network script */
     pid = fork();
+    if (pid < 0) {
+        error_setg_errno(errp, errno, "could not launch network script %s",
+                         setup_script);
+        return;
+    }
     if (pid == 0) {
         int open_max = sysconf(_SC_OPEN_MAX), i;
 
-        for (i = 0; i < open_max; i++) {
-            if (i != STDIN_FILENO &&
-                i != STDOUT_FILENO &&
-                i != STDERR_FILENO &&
-                i != fd) {
+        for (i = 3; i < open_max; i++) {
+            if (i != fd) {
                 close(i);
             }
         }
@@ -372,17 +426,17 @@ static int launch_script(const char *setup_script, const char *ifname, int fd)
         *parg = NULL;
         execv(setup_script, args);
         _exit(1);
-    } else if (pid > 0) {
+    } else {
         while (waitpid(pid, &status, 0) != pid) {
             /* loop */
         }
 
         if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
-            return 0;
+            return;
         }
+        error_setg(errp, "network script %s failed with status %d",
+                   setup_script, status);
     }
-    fprintf(stderr, "%s: could not launch network script\n", setup_script);
-    return -1;
 }
 
 static int recv_fd(int c)
@@ -419,7 +473,8 @@ static int recv_fd(int c)
     return len;
 }
 
-static int net_bridge_run_helper(const char *helper, const char *bridge)
+static int net_bridge_run_helper(const char *helper, const char *bridge,
+                                 Error **errp)
 {
     sigset_t oldmask, mask;
     int pid, status;
@@ -432,37 +487,39 @@ static int net_bridge_run_helper(const char *helper, const char *bridge)
     sigprocmask(SIG_BLOCK, &mask, &oldmask);
 
     if (socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) {
+        error_setg_errno(errp, errno, "socketpair() failed");
         return -1;
     }
 
     /* try to launch bridge helper */
     pid = fork();
+    if (pid < 0) {
+        error_setg_errno(errp, errno, "Can't fork bridge helper");
+        return -1;
+    }
     if (pid == 0) {
         int open_max = sysconf(_SC_OPEN_MAX), i;
-        char fd_buf[6+10];
-        char br_buf[6+IFNAMSIZ] = {0};
-        char helper_cmd[PATH_MAX + sizeof(fd_buf) + sizeof(br_buf) + 15];
-
-        for (i = 0; i < open_max; i++) {
-            if (i != STDIN_FILENO &&
-                i != STDOUT_FILENO &&
-                i != STDERR_FILENO &&
-                i != sv[1]) {
+        char *fd_buf = NULL;
+        char *br_buf = NULL;
+        char *helper_cmd = NULL;
+
+        for (i = 3; i < open_max; i++) {
+            if (i != sv[1]) {
                 close(i);
             }
         }
 
-        snprintf(fd_buf, sizeof(fd_buf), "%s%d", "--fd=", sv[1]);
+        fd_buf = g_strdup_printf("%s%d", "--fd=", sv[1]);
 
         if (strrchr(helper, ' ') || strrchr(helper, '\t')) {
             /* assume helper is a command */
 
             if (strstr(helper, "--br=") == NULL) {
-                snprintf(br_buf, sizeof(br_buf), "%s%s", "--br=", bridge);
+                br_buf = g_strdup_printf("%s%s", "--br=", bridge);
             }
 
-            snprintf(helper_cmd, sizeof(helper_cmd), "%s %s %s %s",
-                     helper, "--use-vnet", fd_buf, br_buf);
+            helper_cmd = g_strdup_printf("%s %s %s %s", helper,
+                            "--use-vnet", fd_buf, br_buf ? br_buf : "");
 
             parg = args;
             *parg++ = (char *)"sh";
@@ -471,10 +528,11 @@ static int net_bridge_run_helper(const char *helper, const char *bridge)
             *parg++ = NULL;
 
             execv("/bin/sh", args);
+            g_free(helper_cmd);
         } else {
             /* assume helper is just the executable path name */
 
-            snprintf(br_buf, sizeof(br_buf), "%s%s", "--br=", bridge);
+            br_buf = g_strdup_printf("%s%s", "--br=", bridge);
 
             parg = args;
             *parg++ = (char *)helper;
@@ -485,16 +543,20 @@ static int net_bridge_run_helper(const char *helper, const char *bridge)
 
             execv(helper, args);
         }
+        g_free(fd_buf);
+        g_free(br_buf);
         _exit(1);
 
-    } else if (pid > 0) {
+    } else {
         int fd;
+        int saved_errno;
 
         close(sv[1]);
 
         do {
             fd = recv_fd(sv[0]);
         } while (fd == -1 && errno == EINTR);
+        saved_errno = errno;
 
         close(sv[0]);
 
@@ -503,47 +565,40 @@ static int net_bridge_run_helper(const char *helper, const char *bridge)
         }
         sigprocmask(SIG_SETMASK, &oldmask, NULL);
         if (fd < 0) {
-            fprintf(stderr, "failed to recv file descriptor\n");
+            error_setg_errno(errp, saved_errno,
+                             "failed to recv file descriptor");
             return -1;
         }
-
-        if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
-            return fd;
+        if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) {
+            error_setg(errp, "bridge helper failed");
+            return -1;
         }
+        return fd;
     }
-    fprintf(stderr, "failed to launch bridge helper\n");
-    return -1;
 }
 
-int net_init_bridge(const NetClientOptions *opts, const char *name,
-                    NetClientState *peer)
+int net_init_bridge(const Netdev *netdev, const char *name,
+                    NetClientState *peer, Error **errp)
 {
     const NetdevBridgeOptions *bridge;
     const char *helper, *br;
-
     TAPState *s;
     int fd, vnet_hdr;
 
-    assert(opts->kind == NET_CLIENT_OPTIONS_KIND_BRIDGE);
-    bridge = opts->bridge;
+    assert(netdev->type == NET_CLIENT_DRIVER_BRIDGE);
+    bridge = &netdev->u.bridge;
 
     helper = bridge->has_helper ? bridge->helper : DEFAULT_BRIDGE_HELPER;
     br     = bridge->has_br     ? bridge->br     : DEFAULT_BRIDGE_INTERFACE;
 
-    fd = net_bridge_run_helper(helper, br);
+    fd = net_bridge_run_helper(helper, br, errp);
     if (fd == -1) {
         return -1;
     }
 
-    fcntl(fd, F_SETFL, O_NONBLOCK);
-
+    qemu_set_nonblock(fd);
     vnet_hdr = tap_probe_vnet_hdr(fd);
-
     s = net_tap_fd_init(peer, "bridge", name, fd, vnet_hdr);
-    if (!s) {
-        close(fd);
-        return -1;
-    }
 
     snprintf(s->nc.info_str, sizeof(s->nc.info_str), "helper=%s,br=%s", helper,
              br);
@@ -553,8 +608,9 @@ int net_init_bridge(const NetClientOptions *opts, const char *name,
 
 static int net_tap_init(const NetdevTapOptions *tap, int *vnet_hdr,
                         const char *setup_script, char *ifname,
-                        size_t ifname_sz, int mq_required)
+                        size_t ifname_sz, int mq_required, Error **errp)
 {
+    Error *err = NULL;
     int fd, vnet_hdr_required;
 
     if (tap->has_vnet_hdr) {
@@ -566,17 +622,20 @@ static int net_tap_init(const NetdevTapOptions *tap, int *vnet_hdr,
     }
 
     TFR(fd = tap_open(ifname, ifname_sz, vnet_hdr, vnet_hdr_required,
-                      mq_required));
+                      mq_required, errp));
     if (fd < 0) {
         return -1;
     }
 
     if (setup_script &&
         setup_script[0] != '\0' &&
-        strcmp(setup_script, "no") != 0 &&
-        launch_script(setup_script, ifname, fd)) {
-        close(fd);
-        return -1;
+        strcmp(setup_script, "no") != 0) {
+        launch_script(setup_script, ifname, fd, &err);
+        if (err) {
+            error_propagate(errp, err);
+            close(fd);
+            return -1;
+        }
     }
 
     return fd;
@@ -584,22 +643,20 @@ static int net_tap_init(const NetdevTapOptions *tap, int *vnet_hdr,
 
 #define MAX_TAP_QUEUES 1024
 
-static int net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer,
-                            const char *model, const char *name,
-                            const char *ifname, const char *script,
-                            const char *downscript, const char *vhostfdname,
-                            int vnet_hdr, int fd)
+static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer,
+                             const char *model, const char *name,
+                             const char *ifname, const char *script,
+                             const char *downscript, const char *vhostfdname,
+                             int vnet_hdr, int fd, Error **errp)
 {
-    TAPState *s;
+    Error *err = NULL;
+    TAPState *s = net_tap_fd_init(peer, model, name, fd, vnet_hdr);
+    int vhostfd;
 
-    s = net_tap_fd_init(peer, model, name, fd, vnet_hdr);
-    if (!s) {
-        close(fd);
-        return -1;
-    }
-
-    if (tap_set_sndbuf(s->fd, tap) < 0) {
-        return -1;
+    tap_set_sndbuf(s->fd, tap, &err);
+    if (err) {
+        error_propagate(errp, err);
+        return;
     }
 
     if (tap->has_fd || tap->has_fds) {
@@ -621,29 +678,55 @@ static int net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer,
 
     if (tap->has_vhost ? tap->vhost :
         vhostfdname || (tap->has_vhostforce && tap->vhostforce)) {
-        int vhostfd;
+        VhostNetOptions options;
 
-        if (tap->has_vhostfd || tap->has_vhostfds) {
-            vhostfd = monitor_handle_fd_param(cur_mon, vhostfdname);
+        options.backend_type = VHOST_BACKEND_TYPE_KERNEL;
+        options.net_backend = &s->nc;
+        if (tap->has_poll_us) {
+            options.busyloop_timeout = tap->poll_us;
+        } else {
+            options.busyloop_timeout = 0;
+        }
+
+        if (vhostfdname) {
+            vhostfd = monitor_fd_param(cur_mon, vhostfdname, &err);
             if (vhostfd == -1) {
-                return -1;
+                if (tap->has_vhostforce && tap->vhostforce) {
+                    error_propagate(errp, err);
+                } else {
+                    warn_report_err(err);
+                }
+                return;
             }
+            qemu_set_nonblock(vhostfd);
         } else {
-            vhostfd = -1;
+            vhostfd = open("/dev/vhost-net", O_RDWR);
+            if (vhostfd < 0) {
+                if (tap->has_vhostforce && tap->vhostforce) {
+                    error_setg_errno(errp, errno,
+                                     "tap: open vhost char device failed");
+                } else {
+                    warn_report("tap: open vhost char device failed: %s",
+                                strerror(errno));
+                }
+                return;
+            }
+            qemu_set_nonblock(vhostfd);
         }
+        options.opaque = (void *)(uintptr_t)vhostfd;
 
-        s->vhost_net = vhost_net_init(&s->nc, vhostfd,
-                                      tap->has_vhostforce && tap->vhostforce);
+        s->vhost_net = vhost_net_init(&options);
         if (!s->vhost_net) {
-            error_report("vhost-net requested but could not be initialized");
-            return -1;
+            if (tap->has_vhostforce && tap->vhostforce) {
+                error_setg(errp, VHOST_NET_INIT_FAILED);
+            } else {
+                warn_report(VHOST_NET_INIT_FAILED);
+            }
+            return;
         }
-    } else if (tap->has_vhostfd || tap->has_vhostfds) {
-        error_report("vhostfd= is not valid without vhost");
-        return -1;
+    } else if (vhostfdname) {
+        error_setg(errp, "vhostfd(s)= is not valid without vhost");
     }
-
-    return 0;
 }
 
 static int get_fds(char *str, char *fds[], int max)
@@ -672,26 +755,27 @@ static int get_fds(char *str, char *fds[], int max)
     return i;
 }
 
-int net_init_tap(const NetClientOptions *opts, const char *name,
-                 NetClientState *peer)
+int net_init_tap(const Netdev *netdev, const char *name,
+                 NetClientState *peer, Error **errp)
 {
     const NetdevTapOptions *tap;
     int fd, vnet_hdr = 0, i = 0, queues;
     /* for the no-fd, no-helper case */
     const char *script = NULL; /* suppress wrong "uninit'd use" gcc warning */
     const char *downscript = NULL;
+    Error *err = NULL;
     const char *vhostfdname;
     char ifname[128];
 
-    assert(opts->kind == NET_CLIENT_OPTIONS_KIND_TAP);
-    tap = opts->tap;
+    assert(netdev->type == NET_CLIENT_DRIVER_TAP);
+    tap = &netdev->u.tap;
     queues = tap->has_queues ? tap->queues : 1;
     vhostfdname = tap->has_vhostfd ? tap->vhostfd : NULL;
 
-    /* QEMU vlans does not support multiqueue tap, in this case peer is set.
+    /* QEMU hubs do not support multiqueue tap, in this case peer is set.
      * For -netdev, peer is always NULL. */
     if (peer && (tap->has_queues || tap->has_fds || tap->has_vhostfds)) {
-        error_report("Multiqueue tap cannot be used with QEMU vlans");
+        error_setg(errp, "Multiqueue tap cannot be used with hubs");
         return -1;
     }
 
@@ -699,96 +783,128 @@ int net_init_tap(const NetClientOptions *opts, const char *name,
         if (tap->has_ifname || tap->has_script || tap->has_downscript ||
             tap->has_vnet_hdr || tap->has_helper || tap->has_queues ||
             tap->has_fds || tap->has_vhostfds) {
-            error_report("ifname=, script=, downscript=, vnet_hdr=, "
-                         "helper=, queues=, fds=, and vhostfds= "
-                         "are invalid with fd=");
+            error_setg(errp, "ifname=, script=, downscript=, vnet_hdr=, "
+                       "helper=, queues=, fds=, and vhostfds= "
+                       "are invalid with fd=");
             return -1;
         }
 
-        fd = monitor_handle_fd_param(cur_mon, tap->fd);
+        fd = monitor_fd_param(cur_mon, tap->fd, &err);
         if (fd == -1) {
+            error_propagate(errp, err);
             return -1;
         }
 
-        fcntl(fd, F_SETFL, O_NONBLOCK);
+        qemu_set_nonblock(fd);
 
         vnet_hdr = tap_probe_vnet_hdr(fd);
 
-        if (net_init_tap_one(tap, peer, "tap", name, NULL,
-                             script, downscript,
-                             vhostfdname, vnet_hdr, fd)) {
+        net_init_tap_one(tap, peer, "tap", name, NULL,
+                         script, downscript,
+                         vhostfdname, vnet_hdr, fd, &err);
+        if (err) {
+            error_propagate(errp, err);
             return -1;
         }
     } else if (tap->has_fds) {
-        char *fds[MAX_TAP_QUEUES];
-        char *vhost_fds[MAX_TAP_QUEUES];
-        int nfds, nvhosts;
+        char **fds;
+        char **vhost_fds;
+        int nfds = 0, nvhosts = 0;
+        int ret = 0;
 
         if (tap->has_ifname || tap->has_script || tap->has_downscript ||
             tap->has_vnet_hdr || tap->has_helper || tap->has_queues ||
             tap->has_vhostfd) {
-            error_report("ifname=, script=, downscript=, vnet_hdr=, "
-                         "helper=, queues=, and vhostfd= "
-                         "are invalid with fds=");
+            error_setg(errp, "ifname=, script=, downscript=, vnet_hdr=, "
+                       "helper=, queues=, and vhostfd= "
+                       "are invalid with fds=");
             return -1;
         }
 
+        fds = g_new0(char *, MAX_TAP_QUEUES);
+        vhost_fds = g_new0(char *, MAX_TAP_QUEUES);
+
         nfds = get_fds(tap->fds, fds, MAX_TAP_QUEUES);
         if (tap->has_vhostfds) {
             nvhosts = get_fds(tap->vhostfds, vhost_fds, MAX_TAP_QUEUES);
             if (nfds != nvhosts) {
-                error_report("The number of fds passed does not match the "
-                             "number of vhostfds passed");
-                return -1;
+                error_setg(errp, "The number of fds passed does not match "
+                           "the number of vhostfds passed");
+                ret = -1;
+                goto free_fail;
             }
         }
 
         for (i = 0; i < nfds; i++) {
-            fd = monitor_handle_fd_param(cur_mon, fds[i]);
+            fd = monitor_fd_param(cur_mon, fds[i], &err);
             if (fd == -1) {
-                return -1;
+                error_propagate(errp, err);
+                ret = -1;
+                goto free_fail;
             }
 
-            fcntl(fd, F_SETFL, O_NONBLOCK);
+            qemu_set_nonblock(fd);
 
             if (i == 0) {
                 vnet_hdr = tap_probe_vnet_hdr(fd);
             } else if (vnet_hdr != tap_probe_vnet_hdr(fd)) {
-                error_report("vnet_hdr not consistent across given tap fds");
-                return -1;
+                error_setg(errp,
+                           "vnet_hdr not consistent across given tap fds");
+                ret = -1;
+                goto free_fail;
             }
 
-            if (net_init_tap_one(tap, peer, "tap", name, ifname,
-                                 script, downscript,
-                                 tap->has_vhostfds ? vhost_fds[i] : NULL,
-                                 vnet_hdr, fd)) {
-                return -1;
+            net_init_tap_one(tap, peer, "tap", name, ifname,
+                             script, downscript,
+                             tap->has_vhostfds ? vhost_fds[i] : NULL,
+                             vnet_hdr, fd, &err);
+            if (err) {
+                error_propagate(errp, err);
+                ret = -1;
+                goto free_fail;
             }
         }
+
+free_fail:
+        for (i = 0; i < nvhosts; i++) {
+            g_free(vhost_fds[i]);
+        }
+        for (i = 0; i < nfds; i++) {
+            g_free(fds[i]);
+        }
+        g_free(fds);
+        g_free(vhost_fds);
+        return ret;
     } else if (tap->has_helper) {
         if (tap->has_ifname || tap->has_script || tap->has_downscript ||
             tap->has_vnet_hdr || tap->has_queues || tap->has_vhostfds) {
-            error_report("ifname=, script=, downscript=, and vnet_hdr= "
-                         "queues=, and vhostfds= are invalid with helper=");
+            error_setg(errp, "ifname=, script=, downscript=, vnet_hdr=, "
+                       "queues=, and vhostfds= are invalid with helper=");
             return -1;
         }
 
-        fd = net_bridge_run_helper(tap->helper, DEFAULT_BRIDGE_INTERFACE);
+        fd = net_bridge_run_helper(tap->helper,
+                                   tap->has_br ?
+                                   tap->br : DEFAULT_BRIDGE_INTERFACE,
+                                   errp);
         if (fd == -1) {
             return -1;
         }
 
-        fcntl(fd, F_SETFL, O_NONBLOCK);
+        qemu_set_nonblock(fd);
         vnet_hdr = tap_probe_vnet_hdr(fd);
 
-        if (net_init_tap_one(tap, peer, "bridge", name, ifname,
-                             script, downscript, vhostfdname,
-                             vnet_hdr, fd)) {
+        net_init_tap_one(tap, peer, "bridge", name, ifname,
+                         script, downscript, vhostfdname,
+                         vnet_hdr, fd, &err);
+        if (err) {
+            error_propagate(errp, err);
+            close(fd);
             return -1;
         }
     } else {
         if (tap->has_vhostfds) {
-            error_report("vhostfds= is invalid if fds= wasn't specified");
+            error_setg(errp, "vhostfds= is invalid if fds= wasn't specified");
             return -1;
         }
         script = tap->has_script ? tap->script : DEFAULT_NETWORK_SCRIPT;
@@ -803,22 +919,26 @@ int net_init_tap(const NetClientOptions *opts, const char *name,
 
         for (i = 0; i < queues; i++) {
             fd = net_tap_init(tap, &vnet_hdr, i >= 1 ? "no" : script,
-                              ifname, sizeof ifname, queues > 1);
+                              ifname, sizeof ifname, queues > 1, errp);
             if (fd == -1) {
                 return -1;
             }
 
             if (queues > 1 && i == 0 && !tap->has_ifname) {
                 if (tap_fd_get_ifname(fd, ifname)) {
-                    error_report("Fail to get ifname");
+                    error_setg(errp, "Fail to get ifname");
+                    close(fd);
                     return -1;
                 }
             }
 
-            if (net_init_tap_one(tap, peer, "tap", name, ifname,
-                                 i >= 1 ? "no" : script,
-                                 i >= 1 ? "no" : downscript,
-                                 vhostfdname, vnet_hdr, fd)) {
+            net_init_tap_one(tap, peer, "tap", name, ifname,
+                             i >= 1 ? "no" : script,
+                             i >= 1 ? "no" : downscript,
+                             vhostfdname, vnet_hdr, fd, &err);
+            if (err) {
+                error_propagate(errp, err);
+                close(fd);
                 return -1;
             }
         }
@@ -830,7 +950,7 @@ int net_init_tap(const NetClientOptions *opts, const char *name,
 VHostNetState *tap_get_vhost_net(NetClientState *nc)
 {
     TAPState *s = DO_UPCAST(TAPState, nc, nc);
-    assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP);
+    assert(nc->info->type == NET_CLIENT_DRIVER_TAP);
     return s->vhost_net;
 }
 
diff --git a/net/tap_int.h b/net/tap_int.h
index 86bb224bc..e3194b23f 100644
--- a/net/tap_int.h
+++ b/net/tap_int.h
@@ -23,28 +23,26 @@
  * THE SOFTWARE.
  */
 
-#ifndef QEMU_TAP_H
-#define QEMU_TAP_H
+#ifndef NET_TAP_INT_H
+#define NET_TAP_INT_H
 
-#include "qemu-common.h"
-#include "qapi-types.h"
-
-#define DEFAULT_NETWORK_SCRIPT "/etc/qemu-ifup"
-#define DEFAULT_NETWORK_DOWN_SCRIPT "/etc/qemu-ifdown"
+#include "qapi/qapi-types-net.h"
 
 int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
-             int vnet_hdr_required, int mq_required);
+             int vnet_hdr_required, int mq_required, Error **errp);
 
 ssize_t tap_read_packet(int tapfd, uint8_t *buf, int maxlen);
 
-int tap_set_sndbuf(int fd, const NetdevTapOptions *tap);
+void tap_set_sndbuf(int fd, const NetdevTapOptions *tap, Error **errp);
 int tap_probe_vnet_hdr(int fd);
 int tap_probe_vnet_hdr_len(int fd, int len);
 int tap_probe_has_ufo(int fd);
 void tap_fd_set_offload(int fd, int csum, int tso4, int tso6, int ecn, int ufo);
 void tap_fd_set_vnet_hdr_len(int fd, int len);
+int tap_fd_set_vnet_le(int fd, int vnet_is_le);
+int tap_fd_set_vnet_be(int fd, int vnet_is_be);
 int tap_fd_enable(int fd);
 int tap_fd_disable(int fd);
 int tap_fd_get_ifname(int fd, char *ifname);
 
-#endif /* QEMU_TAP_H */
+#endif /* NET_TAP_INT_H */
diff --git a/net/trace-events b/net/trace-events
new file mode 100644
index 000000000..02c13fd0b
--- /dev/null
+++ b/net/trace-events
@@ -0,0 +1,25 @@
+# See docs/devel/tracing.txt for syntax documentation.
+
+# announce.c
+qemu_announce_self_iter(const char *id, const char *name, const char *mac, int skip) "%s:%s:%s skip: %d"
+qemu_announce_timer_del(bool free_named, bool free_timer, char *id) "free named: %d free timer: %d id: %s"
+
+# vhost-user.c
+vhost_user_event(const char *chr, int event) "chr: %s got event: %d"
+
+# colo.c
+colo_proxy_main(const char *chr) ": %s"
+
+# colo-compare.c
+colo_compare_main(const char *chr) ": %s"
+colo_compare_udp_miscompare(const char *sta, int size) ": %s = %d"
+colo_compare_icmp_miscompare(const char *sta, int size) ": %s = %d"
+colo_compare_ip_info(int psize, const char *sta, const char *stb, int ssize, const char *stc, const char *std) "ppkt size = %d, ip_src = %s, ip_dst = %s, spkt size = %d, ip_src = %s, ip_dst = %s"
+colo_old_packet_check_found(int64_t old_time) "%" PRId64
+colo_compare_miscompare(void) ""
+colo_compare_tcp_info(const char *pkt, uint32_t seq, uint32_t ack, int hdlen, int pdlen, int offset, int flags) "%s: seq/ack= %u/%u hdlen= %d pdlen= %d offset= %d flags=%d"
+
+# filter-rewriter.c
+colo_filter_rewriter_debug(void) ""
+colo_filter_rewriter_pkt_info(const char *func, const char *src, const char *dst, uint32_t seq, uint32_t ack, uint32_t flag) "%s: src/dst: %s/%s p: seq/ack=%u/%u  flags=0x%x"
+colo_filter_rewriter_conn_offset(uint32_t offset) ": offset=%u"
diff --git a/net/util.c b/net/util.c
index 7e9507679..0b3dbfe5d 100644
--- a/net/util.c
+++ b/net/util.c
@@ -22,9 +22,8 @@
  * THE SOFTWARE.
  */
 
+#include "qemu/osdep.h"
 #include "util.h"
-#include <errno.h>
-#include <stdlib.h>
 
 int net_parse_macaddr(uint8_t *macaddr, const char *p)
 {
diff --git a/net/util.h b/net/util.h
index 10c7da95f..358185fd5 100644
--- a/net/util.h
+++ b/net/util.h
@@ -25,7 +25,61 @@
 #ifndef QEMU_NET_UTIL_H
 #define QEMU_NET_UTIL_H
 
-#include <stdint.h>
+
+/*
+ * Structure of an internet header, naked of options.
+ */
+struct ip {
+#ifdef HOST_WORDS_BIGENDIAN
+    uint8_t ip_v:4,         /* version */
+            ip_hl:4;        /* header length */
+#else
+    uint8_t ip_hl:4,        /* header length */
+            ip_v:4;         /* version */
+#endif
+    uint8_t ip_tos;         /* type of service */
+    uint16_t ip_len;        /* total length */
+    uint16_t ip_id;         /* identification */
+    uint16_t ip_off;        /* fragment offset field */
+#define IP_DF 0x4000        /* don't fragment flag */
+#define IP_MF 0x2000        /* more fragments flag */
+#define IP_OFFMASK 0x1fff   /* mask for fragmenting bits */
+    uint8_t ip_ttl;         /* time to live */
+    uint8_t ip_p;           /* protocol */
+    uint16_t ip_sum;        /* checksum */
+    struct in_addr ip_src, ip_dst;  /* source and dest address */
+} QEMU_PACKED;
+
+static inline bool in6_equal_net(const struct in6_addr *a,
+                                 const struct in6_addr *b,
+                                 int prefix_len)
+{
+    if (memcmp(a, b, prefix_len / 8) != 0) {
+        return 0;
+    }
+
+    if (prefix_len % 8 == 0) {
+        return 1;
+    }
+
+    return a->s6_addr[prefix_len / 8] >> (8 - (prefix_len % 8))
+        == b->s6_addr[prefix_len / 8] >> (8 - (prefix_len % 8));
+}
+
+#define TCPS_CLOSED             0       /* closed */
+#define TCPS_LISTEN             1       /* listening for connection */
+#define TCPS_SYN_SENT           2       /* active, have sent syn */
+#define TCPS_SYN_RECEIVED       3       /* have send and received syn */
+/* states < TCPS_ESTABLISHED are those where connections not established */
+#define TCPS_ESTABLISHED        4       /* established */
+#define TCPS_CLOSE_WAIT         5       /* rcvd fin, waiting for close */
+/* states > TCPS_CLOSE_WAIT are those where user has closed */
+#define TCPS_FIN_WAIT_1         6       /* have closed, sent fin */
+#define TCPS_CLOSING            7       /* closed xchd FIN; await FIN ACK */
+#define TCPS_LAST_ACK           8       /* had fin and close; await FIN ACK */
+/* states > TCPS_CLOSE_WAIT && < TCPS_FIN_WAIT_2 await ACK of FIN */
+#define TCPS_FIN_WAIT_2         9       /* have closed, fin is acked */
+#define TCPS_TIME_WAIT          10      /* in 2*msl quiet wait after close */
 
 int net_parse_macaddr(uint8_t *macaddr, const char *p);
 
diff --git a/net/vde.c b/net/vde.c
index 2a619fbc8..99189cccb 100644
--- a/net/vde.c
+++ b/net/vde.c
@@ -21,7 +21,7 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  * THE SOFTWARE.
  */
-#include "config-host.h"
+#include "qemu/osdep.h"
 
 #include <libvdeplug.h>
 
@@ -30,6 +30,7 @@
 #include "qemu-common.h"
 #include "qemu/option.h"
 #include "qemu/main-loop.h"
+#include "qapi/error.h"
 
 typedef struct VDEState {
     NetClientState nc;
@@ -68,7 +69,7 @@ static void vde_cleanup(NetClientState *nc)
 }
 
 static NetClientInfo net_vde_info = {
-    .type = NET_CLIENT_OPTIONS_KIND_VDE,
+    .type = NET_CLIENT_DRIVER_VDE,
     .size = sizeof(VDEState),
     .receive = vde_receive,
     .cleanup = vde_cleanup,
@@ -76,7 +77,7 @@ static NetClientInfo net_vde_info = {
 
 static int net_vde_init(NetClientState *peer, const char *model,
                         const char *name, const char *sock,
-                        int port, const char *group, int mode)
+                        int port, const char *group, int mode, Error **errp)
 {
     NetClientState *nc;
     VDEState *s;
@@ -92,6 +93,7 @@ static int net_vde_init(NetClientState *peer, const char *model,
 
     vde = vde_open(init_sock, (char *)"QEMU", &args);
     if (!vde){
+        error_setg_errno(errp, errno, "Could not open vde");
         return -1;
     }
 
@@ -109,17 +111,17 @@ static int net_vde_init(NetClientState *peer, const char *model,
     return 0;
 }
 
-int net_init_vde(const NetClientOptions *opts, const char *name,
-                 NetClientState *peer)
+int net_init_vde(const Netdev *netdev, const char *name,
+                 NetClientState *peer, Error **errp)
 {
     const NetdevVdeOptions *vde;
 
-    assert(opts->kind == NET_CLIENT_OPTIONS_KIND_VDE);
-    vde = opts->vde;
+    assert(netdev->type == NET_CLIENT_DRIVER_VDE);
+    vde = &netdev->u.vde;
 
     /* missing optional values have been initialized to "all bits zero" */
     if (net_vde_init(peer, "vde", name, vde->sock, vde->port, vde->group,
-                     vde->has_mode ? vde->mode : 0700) == -1) {
+                     vde->has_mode ? vde->mode : 0700, errp) == -1) {
         return -1;
     }
 
diff --git a/net/vhost-user-stub.c b/net/vhost-user-stub.c
new file mode 100644
index 000000000..52ab4e13f
--- /dev/null
+++ b/net/vhost-user-stub.c
@@ -0,0 +1,23 @@
+/*
+ * vhost-user-stub.c
+ *
+ * Copyright (c) 2018 Red Hat, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "clients.h"
+#include "net/vhost_net.h"
+#include "net/vhost-user.h"
+#include "qemu/error-report.h"
+#include "qapi/error.h"
+
+int net_init_vhost_user(const Netdev *netdev, const char *name,
+                        NetClientState *peer, Error **errp)
+{
+    error_setg(errp, "vhost-user requires frontend driver virtio-net-*");
+    return -1;
+}
diff --git a/net/vhost-user.c b/net/vhost-user.c
new file mode 100644
index 000000000..014199d60
--- /dev/null
+++ b/net/vhost-user.c
@@ -0,0 +1,441 @@
+/*
+ * vhost-user.c
+ *
+ * Copyright (c) 2013 Virtual Open Systems Sarl.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "clients.h"
+#include "net/vhost_net.h"
+#include "net/vhost-user.h"
+#include "hw/virtio/vhost-user.h"
+#include "chardev/char-fe.h"
+#include "qapi/error.h"
+#include "qapi/qapi-commands-net.h"
+#include "qemu/config-file.h"
+#include "qemu/error-report.h"
+#include "qemu/option.h"
+#include "trace.h"
+
+typedef struct NetVhostUserState {
+    NetClientState nc;
+    CharBackend chr; /* only queue index 0 */
+    VhostUserState *vhost_user;
+    VHostNetState *vhost_net;
+    guint watch;
+    uint64_t acked_features;
+    bool started;
+} NetVhostUserState;
+
+VHostNetState *vhost_user_get_vhost_net(NetClientState *nc)
+{
+    NetVhostUserState *s = DO_UPCAST(NetVhostUserState, nc, nc);
+    assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_USER);
+    return s->vhost_net;
+}
+
+uint64_t vhost_user_get_acked_features(NetClientState *nc)
+{
+    NetVhostUserState *s = DO_UPCAST(NetVhostUserState, nc, nc);
+    assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_USER);
+    return s->acked_features;
+}
+
+static void vhost_user_stop(int queues, NetClientState *ncs[])
+{
+    NetVhostUserState *s;
+    int i;
+
+    for (i = 0; i < queues; i++) {
+        assert(ncs[i]->info->type == NET_CLIENT_DRIVER_VHOST_USER);
+
+        s = DO_UPCAST(NetVhostUserState, nc, ncs[i]);
+
+        if (s->vhost_net) {
+            /* save acked features */
+            uint64_t features = vhost_net_get_acked_features(s->vhost_net);
+            if (features) {
+                s->acked_features = features;
+            }
+            vhost_net_cleanup(s->vhost_net);
+        }
+    }
+}
+
+static int vhost_user_start(int queues, NetClientState *ncs[],
+                            VhostUserState *be)
+{
+    VhostNetOptions options;
+    struct vhost_net *net = NULL;
+    NetVhostUserState *s;
+    int max_queues;
+    int i;
+
+    options.backend_type = VHOST_BACKEND_TYPE_USER;
+
+    for (i = 0; i < queues; i++) {
+        assert(ncs[i]->info->type == NET_CLIENT_DRIVER_VHOST_USER);
+
+        s = DO_UPCAST(NetVhostUserState, nc, ncs[i]);
+
+        options.net_backend = ncs[i];
+        options.opaque      = be;
+        options.busyloop_timeout = 0;
+        net = vhost_net_init(&options);
+        if (!net) {
+            error_report("failed to init vhost_net for queue %d", i);
+            goto err;
+        }
+
+        if (i == 0) {
+            max_queues = vhost_net_get_max_queues(net);
+            if (queues > max_queues) {
+                error_report("you are asking more queues than supported: %d",
+                             max_queues);
+                goto err;
+            }
+        }
+
+        if (s->vhost_net) {
+            vhost_net_cleanup(s->vhost_net);
+            g_free(s->vhost_net);
+        }
+        s->vhost_net = net;
+    }
+
+    return 0;
+
+err:
+    if (net) {
+        vhost_net_cleanup(net);
+        g_free(net);
+    }
+    vhost_user_stop(i, ncs);
+    return -1;
+}
+
+static ssize_t vhost_user_receive(NetClientState *nc, const uint8_t *buf,
+                                  size_t size)
+{
+    /* In case of RARP (message size is 60) notify backup to send a fake RARP.
+       This fake RARP will be sent by backend only for guest
+       without GUEST_ANNOUNCE capability.
+     */
+    if (size == 60) {
+        NetVhostUserState *s = DO_UPCAST(NetVhostUserState, nc, nc);
+        int r;
+        static int display_rarp_failure = 1;
+        char mac_addr[6];
+
+        /* extract guest mac address from the RARP message */
+        memcpy(mac_addr, &buf[6], 6);
+
+        r = vhost_net_notify_migration_done(s->vhost_net, mac_addr);
+
+        if ((r != 0) && (display_rarp_failure)) {
+            fprintf(stderr,
+                    "Vhost user backend fails to broadcast fake RARP\n");
+            fflush(stderr);
+            display_rarp_failure = 0;
+        }
+    }
+
+    return size;
+}
+
+static void net_vhost_user_cleanup(NetClientState *nc)
+{
+    NetVhostUserState *s = DO_UPCAST(NetVhostUserState, nc, nc);
+
+    if (s->vhost_net) {
+        vhost_net_cleanup(s->vhost_net);
+        g_free(s->vhost_net);
+        s->vhost_net = NULL;
+    }
+    if (nc->queue_index == 0) {
+        if (s->watch) {
+            g_source_remove(s->watch);
+            s->watch = 0;
+        }
+        qemu_chr_fe_deinit(&s->chr, true);
+        if (s->vhost_user) {
+            vhost_user_cleanup(s->vhost_user);
+            g_free(s->vhost_user);
+            s->vhost_user = NULL;
+        }
+    }
+
+    qemu_purge_queued_packets(nc);
+}
+
+static int vhost_user_set_vnet_endianness(NetClientState *nc,
+                                          bool enable)
+{
+    /* Nothing to do.  If the server supports
+     * VHOST_USER_PROTOCOL_F_CROSS_ENDIAN, it will get the
+     * vnet header endianness from there.  If it doesn't, negotiation
+     * fails.
+     */
+    return 0;
+}
+
+static bool vhost_user_has_vnet_hdr(NetClientState *nc)
+{
+    assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_USER);
+
+    return true;
+}
+
+static bool vhost_user_has_ufo(NetClientState *nc)
+{
+    assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_USER);
+
+    return true;
+}
+
+static NetClientInfo net_vhost_user_info = {
+        .type = NET_CLIENT_DRIVER_VHOST_USER,
+        .size = sizeof(NetVhostUserState),
+        .receive = vhost_user_receive,
+        .cleanup = net_vhost_user_cleanup,
+        .has_vnet_hdr = vhost_user_has_vnet_hdr,
+        .has_ufo = vhost_user_has_ufo,
+        .set_vnet_be = vhost_user_set_vnet_endianness,
+        .set_vnet_le = vhost_user_set_vnet_endianness,
+};
+
+static gboolean net_vhost_user_watch(GIOChannel *chan, GIOCondition cond,
+                                           void *opaque)
+{
+    NetVhostUserState *s = opaque;
+
+    qemu_chr_fe_disconnect(&s->chr);
+
+    return TRUE;
+}
+
+static void net_vhost_user_event(void *opaque, int event);
+
+static void chr_closed_bh(void *opaque)
+{
+    const char *name = opaque;
+    NetClientState *ncs[MAX_QUEUE_NUM];
+    NetVhostUserState *s;
+    Error *err = NULL;
+    int queues;
+
+    queues = qemu_find_net_clients_except(name, ncs,
+                                          NET_CLIENT_DRIVER_NIC,
+                                          MAX_QUEUE_NUM);
+    assert(queues < MAX_QUEUE_NUM);
+
+    s = DO_UPCAST(NetVhostUserState, nc, ncs[0]);
+
+    if (s->vhost_net) {
+        s->acked_features = vhost_net_get_acked_features(s->vhost_net);
+    }
+
+    qmp_set_link(name, false, &err);
+
+    qemu_chr_fe_set_handlers(&s->chr, NULL, NULL, net_vhost_user_event,
+                             NULL, opaque, NULL, true);
+
+    if (err) {
+        error_report_err(err);
+    }
+}
+
+static void net_vhost_user_event(void *opaque, int event)
+{
+    const char *name = opaque;
+    NetClientState *ncs[MAX_QUEUE_NUM];
+    NetVhostUserState *s;
+    Chardev *chr;
+    Error *err = NULL;
+    int queues;
+
+    queues = qemu_find_net_clients_except(name, ncs,
+                                          NET_CLIENT_DRIVER_NIC,
+                                          MAX_QUEUE_NUM);
+    assert(queues < MAX_QUEUE_NUM);
+
+    s = DO_UPCAST(NetVhostUserState, nc, ncs[0]);
+    chr = qemu_chr_fe_get_driver(&s->chr);
+    trace_vhost_user_event(chr->label, event);
+    switch (event) {
+    case CHR_EVENT_OPENED:
+        if (vhost_user_start(queues, ncs, s->vhost_user) < 0) {
+            qemu_chr_fe_disconnect(&s->chr);
+            return;
+        }
+        s->watch = qemu_chr_fe_add_watch(&s->chr, G_IO_HUP,
+                                         net_vhost_user_watch, s);
+        qmp_set_link(name, true, &err);
+        s->started = true;
+        break;
+    case CHR_EVENT_CLOSED:
+        /* a close event may happen during a read/write, but vhost
+         * code assumes the vhost_dev remains setup, so delay the
+         * stop & clear to idle.
+         * FIXME: better handle failure in vhost code, remove bh
+         */
+        if (s->watch) {
+            AioContext *ctx = qemu_get_current_aio_context();
+
+            g_source_remove(s->watch);
+            s->watch = 0;
+            qemu_chr_fe_set_handlers(&s->chr, NULL, NULL, NULL, NULL,
+                                     NULL, NULL, false);
+
+            aio_bh_schedule_oneshot(ctx, chr_closed_bh, opaque);
+        }
+        break;
+    }
+
+    if (err) {
+        error_report_err(err);
+    }
+}
+
+static int net_vhost_user_init(NetClientState *peer, const char *device,
+                               const char *name, Chardev *chr,
+                               int queues)
+{
+    Error *err = NULL;
+    NetClientState *nc, *nc0 = NULL;
+    NetVhostUserState *s = NULL;
+    VhostUserState *user;
+    int i;
+
+    assert(name);
+    assert(queues > 0);
+
+    user = g_new0(struct VhostUserState, 1);
+    for (i = 0; i < queues; i++) {
+        nc = qemu_new_net_client(&net_vhost_user_info, peer, device, name);
+        snprintf(nc->info_str, sizeof(nc->info_str), "vhost-user%d to %s",
+                 i, chr->label);
+        nc->queue_index = i;
+        if (!nc0) {
+            nc0 = nc;
+            s = DO_UPCAST(NetVhostUserState, nc, nc);
+            if (!qemu_chr_fe_init(&s->chr, chr, &err) ||
+                !vhost_user_init(user, &s->chr, &err)) {
+                error_report_err(err);
+                goto err;
+            }
+        }
+        s = DO_UPCAST(NetVhostUserState, nc, nc);
+        s->vhost_user = user;
+    }
+
+    s = DO_UPCAST(NetVhostUserState, nc, nc0);
+    do {
+        if (qemu_chr_fe_wait_connected(&s->chr, &err) < 0) {
+            error_report_err(err);
+            goto err;
+        }
+        qemu_chr_fe_set_handlers(&s->chr, NULL, NULL,
+                                 net_vhost_user_event, NULL, nc0->name, NULL,
+                                 true);
+    } while (!s->started);
+
+    assert(s->vhost_net);
+
+    return 0;
+
+err:
+    if (user) {
+        vhost_user_cleanup(user);
+        g_free(user);
+        if (s) {
+            s->vhost_user = NULL;
+        }
+    }
+    if (nc0) {
+        qemu_del_net_client(nc0);
+    }
+
+    return -1;
+}
+
+static Chardev *net_vhost_claim_chardev(
+    const NetdevVhostUserOptions *opts, Error **errp)
+{
+    Chardev *chr = qemu_chr_find(opts->chardev);
+
+    if (chr == NULL) {
+        error_setg(errp, "chardev \"%s\" not found", opts->chardev);
+        return NULL;
+    }
+
+    if (!qemu_chr_has_feature(chr, QEMU_CHAR_FEATURE_RECONNECTABLE)) {
+        error_setg(errp, "chardev \"%s\" is not reconnectable",
+                   opts->chardev);
+        return NULL;
+    }
+    if (!qemu_chr_has_feature(chr, QEMU_CHAR_FEATURE_FD_PASS)) {
+        error_setg(errp, "chardev \"%s\" does not support FD passing",
+                   opts->chardev);
+        return NULL;
+    }
+
+    return chr;
+}
+
+static int net_vhost_check_net(void *opaque, QemuOpts *opts, Error **errp)
+{
+    const char *name = opaque;
+    const char *driver, *netdev;
+
+    driver = qemu_opt_get(opts, "driver");
+    netdev = qemu_opt_get(opts, "netdev");
+
+    if (!driver || !netdev) {
+        return 0;
+    }
+
+    if (strcmp(netdev, name) == 0 &&
+        !g_str_has_prefix(driver, "virtio-net-")) {
+        error_setg(errp, "vhost-user requires frontend driver virtio-net-*");
+        return -1;
+    }
+
+    return 0;
+}
+
+int net_init_vhost_user(const Netdev *netdev, const char *name,
+                        NetClientState *peer, Error **errp)
+{
+    int queues;
+    const NetdevVhostUserOptions *vhost_user_opts;
+    Chardev *chr;
+
+    assert(netdev->type == NET_CLIENT_DRIVER_VHOST_USER);
+    vhost_user_opts = &netdev->u.vhost_user;
+
+    chr = net_vhost_claim_chardev(vhost_user_opts, errp);
+    if (!chr) {
+        return -1;
+    }
+
+    /* verify net frontend */
+    if (qemu_opts_foreach(qemu_find_opts("device"), net_vhost_check_net,
+                          (char *)name, errp)) {
+        return -1;
+    }
+
+    queues = vhost_user_opts->has_queues ? vhost_user_opts->queues : 1;
+    if (queues < 1 || queues > MAX_QUEUE_NUM) {
+        error_setg(errp,
+                   "vhost-user number of queues must be in range [1, %d]",
+                   MAX_QUEUE_NUM);
+        return -1;
+    }
+
+    return net_vhost_user_init(peer, "vhost_user", name, chr, queues);
+}
author	wanchao-xu <wanchao.xu@samsung.com>	2024-02-23 09:56:11 +0800
committer	wanchao-xu <wanchao.xu@samsung.com>	2024-02-23 09:56:29 +0800
commit	d39f72b68d5a576b5cd2178a40e2e4013ae14398 (patch)
tree	e5cd61a749edfcd55a69209f0eb34abb66a38e67 /net
parent	0889ee8339e51dfdf78c39a8f15b6e5fe5ab49d5 (diff)
parent	e021b4728ed2aea486975a63a342ece1d2178b18 (diff)
download	qemu-arm-static-d39f72b68d5a576b5cd2178a40e2e4013ae14398.tar.gz qemu-arm-static-d39f72b68d5a576b5cd2178a40e2e4013ae14398.tar.bz2 qemu-arm-static-d39f72b68d5a576b5cd2178a40e2e4013ae14398.zip