diff options
author | SeokYeon Hwang <syeon.hwang@samsung.com> | 2016-09-12 15:41:37 +0900 |
---|---|---|
committer | SeokYeon Hwang <syeon.hwang@samsung.com> | 2016-09-12 15:41:37 +0900 |
commit | 590861b31f5f1f7140d637173d8d9bac8d41ccc6 (patch) | |
tree | 7e2e5afd3ac4d896b310de7a980c509e6dddfd2b /net | |
parent | 64d5068524fc31f8941aeba31d6a34f935adf479 (diff) | |
parent | 1dc33ed90bf1fe1c2014dffa0d9e863c520d953a (diff) | |
download | qemu-590861b31f5f1f7140d637173d8d9bac8d41ccc6.tar.gz qemu-590861b31f5f1f7140d637173d8d9bac8d41ccc6.tar.bz2 qemu-590861b31f5f1f7140d637173d8d9bac8d41ccc6.zip |
Merge tag 'v2.7.0' into develop_qemu_2.7
v2.7.0 release
Change-Id: Id5feb5a9404ab064f9ea3d0aa0d95eef17020fa3
Signed-off-by: SeokYeon Hwang <syeon.hwang@samsung.com>
Diffstat (limited to 'net')
-rw-r--r-- | net/checksum.c | 128 | ||||
-rw-r--r-- | net/clients.h | 20 | ||||
-rw-r--r-- | net/dump.c | 8 | ||||
-rw-r--r-- | net/eth.c | 410 | ||||
-rw-r--r-- | net/filter-mirror.c | 66 | ||||
-rw-r--r-- | net/filter.c | 2 | ||||
-rw-r--r-- | net/hub.c | 24 | ||||
-rw-r--r-- | net/l2tpv3.c | 8 | ||||
-rw-r--r-- | net/net.c | 268 | ||||
-rw-r--r-- | net/netmap.c | 7 | ||||
-rw-r--r-- | net/slirp.c | 21 | ||||
-rw-r--r-- | net/socket.c | 87 | ||||
-rw-r--r-- | net/tap-linux.h | 2 | ||||
-rw-r--r-- | net/tap-win32.c | 8 | ||||
-rw-r--r-- | net/tap.c | 87 | ||||
-rw-r--r-- | net/tap_int.h | 6 | ||||
-rw-r--r-- | net/trace-events | 4 | ||||
-rw-r--r-- | net/vde.c | 8 | ||||
-rw-r--r-- | net/vhost-user.c | 105 |
19 files changed, 890 insertions, 379 deletions
diff --git a/net/checksum.c b/net/checksum.c index d0fa424cc1..23323b0760 100644 --- a/net/checksum.c +++ b/net/checksum.c @@ -18,9 +18,7 @@ #include "qemu/osdep.h" #include "qemu-common.h" #include "net/checksum.h" - -#define PROTO_TCP 6 -#define PROTO_UDP 17 +#include "net/eth.h" uint32_t net_checksum_add_cont(int len, uint8_t *buf, int seq) { @@ -57,50 +55,118 @@ uint16_t net_checksum_tcpudp(uint16_t length, uint16_t proto, void net_checksum_calculate(uint8_t *data, int length) { - int hlen, plen, proto, csum_offset; - uint16_t csum; + int mac_hdr_len, ip_len; + struct ip_header *ip; + + /* + * Note: We cannot assume "data" is aligned, so the all code uses + * some macros that take care of possible unaligned access for + * struct members (just in case). + */ - /* Ensure data has complete L2 & L3 headers. */ - if (length < 14 + 20) { + /* Ensure we have at least an Eth header */ + if (length < sizeof(struct eth_header)) { return; } - if ((data[14] & 0xf0) != 0x40) - return; /* not IPv4 */ - hlen = (data[14] & 0x0f) * 4; - plen = (data[16] << 8 | data[17]) - hlen; - proto = data[23]; - - switch (proto) { - case PROTO_TCP: - csum_offset = 16; - break; - case PROTO_UDP: - csum_offset = 6; - break; + /* Handle the optionnal VLAN headers */ + switch (lduw_be_p(&PKT_GET_ETH_HDR(data)->h_proto)) { + case ETH_P_VLAN: + mac_hdr_len = sizeof(struct eth_header) + + sizeof(struct vlan_header); + break; + case ETH_P_DVLAN: + if (lduw_be_p(&PKT_GET_VLAN_HDR(data)->h_proto) == ETH_P_VLAN) { + mac_hdr_len = sizeof(struct eth_header) + + 2 * sizeof(struct vlan_header); + } else { + mac_hdr_len = sizeof(struct eth_header) + + sizeof(struct vlan_header); + } + break; default: - return; + mac_hdr_len = sizeof(struct eth_header); + break; } - if (plen < csum_offset + 2 || 14 + hlen + plen > length) { + length -= mac_hdr_len; + + /* Now check we have an IP header (with an optionnal VLAN header) */ + if (length < sizeof(struct ip_header)) { return; } - data[14+hlen+csum_offset] = 0; - data[14+hlen+csum_offset+1] = 0; - csum = net_checksum_tcpudp(plen, proto, data+14+12, data+14+hlen); - data[14+hlen+csum_offset] = csum >> 8; - data[14+hlen+csum_offset+1] = csum & 0xff; + ip = (struct ip_header *)(data + mac_hdr_len); + + if (IP_HEADER_VERSION(ip) != IP_HEADER_VERSION_4) { + return; /* not IPv4 */ + } + + ip_len = lduw_be_p(&ip->ip_len); + + /* Last, check that we have enough data for the all IP frame */ + if (length < ip_len) { + return; + } + + ip_len -= IP_HDR_GET_LEN(ip); + + switch (ip->ip_p) { + case IP_PROTO_TCP: + { + uint16_t csum; + tcp_header *tcp = (tcp_header *)(ip + 1); + + if (ip_len < sizeof(tcp_header)) { + return; + } + + /* Set csum to 0 */ + stw_he_p(&tcp->th_sum, 0); + + csum = net_checksum_tcpudp(ip_len, ip->ip_p, + (uint8_t *)&ip->ip_src, + (uint8_t *)tcp); + + /* Store computed csum */ + stw_be_p(&tcp->th_sum, csum); + + break; + } + case IP_PROTO_UDP: + { + uint16_t csum; + udp_header *udp = (udp_header *)(ip + 1); + + if (ip_len < sizeof(udp_header)) { + return; + } + + /* Set csum to 0 */ + stw_he_p(&udp->uh_sum, 0); + + csum = net_checksum_tcpudp(ip_len, ip->ip_p, + (uint8_t *)&ip->ip_src, + (uint8_t *)udp); + + /* Store computed csum */ + stw_be_p(&udp->uh_sum, csum); + + break; + } + default: + /* Can't handle any other protocol */ + break; + } } uint32_t net_checksum_add_iov(const struct iovec *iov, const unsigned int iov_cnt, - uint32_t iov_off, uint32_t size) + uint32_t iov_off, uint32_t size, uint32_t csum_offset) { size_t iovec_off, buf_off; unsigned int i; uint32_t res = 0; - uint32_t seq = 0; iovec_off = 0; buf_off = 0; @@ -109,8 +175,8 @@ net_checksum_add_iov(const struct iovec *iov, const unsigned int iov_cnt, size_t len = MIN((iovec_off + iov[i].iov_len) - iov_off , size); void *chunk_buf = iov[i].iov_base + (iov_off - iovec_off); - res += net_checksum_add_cont(len, chunk_buf, seq); - seq += len; + res += net_checksum_add_cont(len, chunk_buf, csum_offset); + csum_offset += len; buf_off += len; iov_off += len; diff --git a/net/clients.h b/net/clients.h index d47530e82f..5cae479730 100644 --- a/net/clients.h +++ b/net/clients.h @@ -27,39 +27,39 @@ #include "net/net.h" #include "qapi-types.h" -int net_init_dump(const NetClientOptions *opts, const char *name, +int net_init_dump(const Netdev *netdev, const char *name, NetClientState *peer, Error **errp); #ifdef CONFIG_SLIRP -int net_init_slirp(const NetClientOptions *opts, const char *name, +int net_init_slirp(const Netdev *netdev, const char *name, NetClientState *peer, Error **errp); #endif -int net_init_hubport(const NetClientOptions *opts, const char *name, +int net_init_hubport(const Netdev *netdev, const char *name, NetClientState *peer, Error **errp); -int net_init_socket(const NetClientOptions *opts, const char *name, +int net_init_socket(const Netdev *netdev, const char *name, NetClientState *peer, Error **errp); -int net_init_tap(const NetClientOptions *opts, const char *name, +int net_init_tap(const Netdev *netdev, const char *name, NetClientState *peer, Error **errp); -int net_init_bridge(const NetClientOptions *opts, const char *name, +int net_init_bridge(const Netdev *netdev, const char *name, NetClientState *peer, Error **errp); -int net_init_l2tpv3(const NetClientOptions *opts, const char *name, +int net_init_l2tpv3(const Netdev *netdev, const char *name, NetClientState *peer, Error **errp); #ifdef CONFIG_VDE -int net_init_vde(const NetClientOptions *opts, const char *name, +int net_init_vde(const Netdev *netdev, const char *name, NetClientState *peer, Error **errp); #endif #ifdef CONFIG_NETMAP -int net_init_netmap(const NetClientOptions *opts, const char *name, +int net_init_netmap(const Netdev *netdev, const char *name, NetClientState *peer, Error **errp); #endif -int net_init_vhost_user(const NetClientOptions *opts, const char *name, +int net_init_vhost_user(const Netdev *netdev, const char *name, NetClientState *peer, Error **errp); #endif /* QEMU_NET_CLIENTS_H */ diff --git a/net/dump.c b/net/dump.c index 41f7673efd..89a149b5dd 100644 --- a/net/dump.c +++ b/net/dump.c @@ -172,14 +172,14 @@ static void dumpclient_cleanup(NetClientState *nc) } static NetClientInfo net_dump_info = { - .type = NET_CLIENT_OPTIONS_KIND_DUMP, + .type = NET_CLIENT_DRIVER_DUMP, .size = sizeof(DumpNetClient), .receive = dumpclient_receive, .receive_iov = dumpclient_receive_iov, .cleanup = dumpclient_cleanup, }; -int net_init_dump(const NetClientOptions *opts, const char *name, +int net_init_dump(const Netdev *netdev, const char *name, NetClientState *peer, Error **errp) { int len, rc; @@ -189,8 +189,8 @@ int net_init_dump(const NetClientOptions *opts, const char *name, NetClientState *nc; DumpNetClient *dnc; - assert(opts->type == NET_CLIENT_OPTIONS_KIND_DUMP); - dump = opts->u.dump.data; + assert(netdev->type == NET_CLIENT_DRIVER_DUMP); + dump = &netdev->u.dump; assert(peer); @@ -21,8 +21,8 @@ #include "qemu-common.h" #include "net/tap.h" -void eth_setup_vlan_headers(struct eth_header *ehdr, uint16_t vlan_tag, - bool *is_new) +void eth_setup_vlan_headers_ex(struct eth_header *ehdr, uint16_t vlan_tag, + uint16_t vlan_ethtype, bool *is_new) { struct vlan_header *vhdr = PKT_GET_VLAN_HDR(ehdr); @@ -36,7 +36,7 @@ void eth_setup_vlan_headers(struct eth_header *ehdr, uint16_t vlan_tag, default: /* No VLAN header, put a new one */ vhdr->h_proto = ehdr->h_proto; - ehdr->h_proto = cpu_to_be16(ETH_P_VLAN); + ehdr->h_proto = cpu_to_be16(vlan_ethtype); *is_new = true; break; } @@ -79,26 +79,100 @@ eth_get_gso_type(uint16_t l3_proto, uint8_t *l3_hdr, uint8_t l4proto) return VIRTIO_NET_HDR_GSO_NONE | ecn_state; } -void eth_get_protocols(const uint8_t *headers, - uint32_t hdr_length, +uint16_t +eth_get_l3_proto(const struct iovec *l2hdr_iov, int iovcnt, size_t l2hdr_len) +{ + uint16_t proto; + size_t copied; + size_t size = iov_size(l2hdr_iov, iovcnt); + size_t proto_offset = l2hdr_len - sizeof(proto); + + if (size < proto_offset) { + return ETH_P_UNKNOWN; + } + + copied = iov_to_buf(l2hdr_iov, iovcnt, proto_offset, + &proto, sizeof(proto)); + + return (copied == sizeof(proto)) ? be16_to_cpu(proto) : ETH_P_UNKNOWN; +} + +static bool +_eth_copy_chunk(size_t input_size, + const struct iovec *iov, int iovcnt, + size_t offset, size_t length, + void *buffer) +{ + size_t copied; + + if (input_size < offset) { + return false; + } + + copied = iov_to_buf(iov, iovcnt, offset, buffer, length); + + if (copied < length) { + return false; + } + + return true; +} + +static bool +_eth_tcp_has_data(bool is_ip4, + const struct ip_header *ip4_hdr, + const struct ip6_header *ip6_hdr, + size_t full_ip6hdr_len, + const struct tcp_header *tcp) +{ + uint32_t l4len; + + if (is_ip4) { + l4len = be16_to_cpu(ip4_hdr->ip_len) - IP_HDR_GET_LEN(ip4_hdr); + } else { + size_t opts_len = full_ip6hdr_len - sizeof(struct ip6_header); + l4len = be16_to_cpu(ip6_hdr->ip6_ctlun.ip6_un1.ip6_un1_plen) - opts_len; + } + + return l4len > TCP_HEADER_DATA_OFFSET(tcp); +} + +void eth_get_protocols(const struct iovec *iov, int iovcnt, bool *isip4, bool *isip6, - bool *isudp, bool *istcp) + bool *isudp, bool *istcp, + size_t *l3hdr_off, + size_t *l4hdr_off, + size_t *l5hdr_off, + eth_ip6_hdr_info *ip6hdr_info, + eth_ip4_hdr_info *ip4hdr_info, + eth_l4_hdr_info *l4hdr_info) { int proto; - size_t l2hdr_len = eth_get_l2_hdr_length(headers); - assert(hdr_length >= eth_get_l2_hdr_length(headers)); + bool fragment = false; + size_t l2hdr_len = eth_get_l2_hdr_length_iov(iov, iovcnt); + size_t input_size = iov_size(iov, iovcnt); + size_t copied; + *isip4 = *isip6 = *isudp = *istcp = false; - proto = eth_get_l3_proto(headers, l2hdr_len); + proto = eth_get_l3_proto(iov, iovcnt, l2hdr_len); + + *l3hdr_off = l2hdr_len; + if (proto == ETH_P_IP) { - *isip4 = true; + struct ip_header *iphdr = &ip4hdr_info->ip4_hdr; - struct ip_header *iphdr; + if (input_size < l2hdr_len) { + return; + } + + copied = iov_to_buf(iov, iovcnt, l2hdr_len, iphdr, sizeof(*iphdr)); - assert(hdr_length >= - eth_get_l2_hdr_length(headers) + sizeof(struct ip_header)); + *isip4 = true; - iphdr = PKT_GET_IP_HDR(headers); + if (copied < sizeof(*iphdr)) { + return; + } if (IP_HEADER_VERSION(iphdr) == IP_HEADER_VERSION_4) { if (iphdr->ip_p == IP_PROTO_TCP) { @@ -107,24 +181,135 @@ void eth_get_protocols(const uint8_t *headers, *isudp = true; } } - } else if (proto == ETH_P_IPV6) { - uint8_t l4proto; - size_t full_ip6hdr_len; - struct iovec hdr_vec; - hdr_vec.iov_base = (void *) headers; - hdr_vec.iov_len = hdr_length; + ip4hdr_info->fragment = IP4_IS_FRAGMENT(iphdr); + *l4hdr_off = l2hdr_len + IP_HDR_GET_LEN(iphdr); + + fragment = ip4hdr_info->fragment; + } else if (proto == ETH_P_IPV6) { *isip6 = true; - if (eth_parse_ipv6_hdr(&hdr_vec, 1, l2hdr_len, - &l4proto, &full_ip6hdr_len)) { - if (l4proto == IP_PROTO_TCP) { + if (eth_parse_ipv6_hdr(iov, iovcnt, l2hdr_len, + ip6hdr_info)) { + if (ip6hdr_info->l4proto == IP_PROTO_TCP) { *istcp = true; - } else if (l4proto == IP_PROTO_UDP) { + } else if (ip6hdr_info->l4proto == IP_PROTO_UDP) { *isudp = true; } + } else { + return; + } + + *l4hdr_off = l2hdr_len + ip6hdr_info->full_hdr_len; + fragment = ip6hdr_info->fragment; + } + + if (!fragment) { + if (*istcp) { + *istcp = _eth_copy_chunk(input_size, + iov, iovcnt, + *l4hdr_off, sizeof(l4hdr_info->hdr.tcp), + &l4hdr_info->hdr.tcp); + + if (*istcp) { + *l5hdr_off = *l4hdr_off + + TCP_HEADER_DATA_OFFSET(&l4hdr_info->hdr.tcp); + + l4hdr_info->has_tcp_data = + _eth_tcp_has_data(proto == ETH_P_IP, + &ip4hdr_info->ip4_hdr, + &ip6hdr_info->ip6_hdr, + *l4hdr_off - *l3hdr_off, + &l4hdr_info->hdr.tcp); + } + } else if (*isudp) { + *isudp = _eth_copy_chunk(input_size, + iov, iovcnt, + *l4hdr_off, sizeof(l4hdr_info->hdr.udp), + &l4hdr_info->hdr.udp); + *l5hdr_off = *l4hdr_off + sizeof(l4hdr_info->hdr.udp); + } + } +} + +bool +eth_strip_vlan(const struct iovec *iov, int iovcnt, size_t iovoff, + uint8_t *new_ehdr_buf, + uint16_t *payload_offset, uint16_t *tci) +{ + struct vlan_header vlan_hdr; + struct eth_header *new_ehdr = (struct eth_header *) new_ehdr_buf; + + size_t copied = iov_to_buf(iov, iovcnt, iovoff, + new_ehdr, sizeof(*new_ehdr)); + + if (copied < sizeof(*new_ehdr)) { + return false; + } + + switch (be16_to_cpu(new_ehdr->h_proto)) { + case ETH_P_VLAN: + case ETH_P_DVLAN: + copied = iov_to_buf(iov, iovcnt, iovoff + sizeof(*new_ehdr), + &vlan_hdr, sizeof(vlan_hdr)); + + if (copied < sizeof(vlan_hdr)) { + return false; + } + + new_ehdr->h_proto = vlan_hdr.h_proto; + + *tci = be16_to_cpu(vlan_hdr.h_tci); + *payload_offset = iovoff + sizeof(*new_ehdr) + sizeof(vlan_hdr); + + if (be16_to_cpu(new_ehdr->h_proto) == ETH_P_VLAN) { + + copied = iov_to_buf(iov, iovcnt, *payload_offset, + PKT_GET_VLAN_HDR(new_ehdr), sizeof(vlan_hdr)); + + if (copied < sizeof(vlan_hdr)) { + return false; + } + + *payload_offset += sizeof(vlan_hdr); + } + return true; + default: + return false; + } +} + +bool +eth_strip_vlan_ex(const struct iovec *iov, int iovcnt, size_t iovoff, + uint16_t vet, uint8_t *new_ehdr_buf, + uint16_t *payload_offset, uint16_t *tci) +{ + struct vlan_header vlan_hdr; + struct eth_header *new_ehdr = (struct eth_header *) new_ehdr_buf; + + size_t copied = iov_to_buf(iov, iovcnt, iovoff, + new_ehdr, sizeof(*new_ehdr)); + + if (copied < sizeof(*new_ehdr)) { + return false; + } + + if (be16_to_cpu(new_ehdr->h_proto) == vet) { + copied = iov_to_buf(iov, iovcnt, iovoff + sizeof(*new_ehdr), + &vlan_hdr, sizeof(vlan_hdr)); + + if (copied < sizeof(vlan_hdr)) { + return false; } + + new_ehdr->h_proto = vlan_hdr.h_proto; + + *tci = be16_to_cpu(vlan_hdr.h_tci); + *payload_offset = iovoff + sizeof(*new_ehdr) + sizeof(vlan_hdr); + return true; } + + return false; } void @@ -133,7 +318,12 @@ eth_setup_ip4_fragmentation(const void *l2hdr, size_t l2hdr_len, size_t l3payload_len, size_t frag_offset, bool more_frags) { - if (eth_get_l3_proto(l2hdr, l2hdr_len) == ETH_P_IP) { + const struct iovec l2vec = { + .iov_base = (void *) l2hdr, + .iov_len = l2hdr_len + }; + + if (eth_get_l3_proto(&l2vec, 1, l2hdr_len) == ETH_P_IP) { uint16_t orig_flags; struct ip_header *iphdr = (struct ip_header *) l3hdr; uint16_t frag_off_units = frag_offset / IP_FRAG_UNIT_SIZE; @@ -158,7 +348,9 @@ eth_fix_ip4_checksum(void *l3hdr, size_t l3hdr_len) } uint32_t -eth_calc_pseudo_hdr_csum(struct ip_header *iphdr, uint16_t csl) +eth_calc_ip4_pseudo_hdr_csum(struct ip_header *iphdr, + uint16_t csl, + uint32_t *cso) { struct ip_pseudo_header ipph; ipph.ip_src = iphdr->ip_src; @@ -166,7 +358,26 @@ eth_calc_pseudo_hdr_csum(struct ip_header *iphdr, uint16_t csl) ipph.ip_payload = cpu_to_be16(csl); ipph.ip_proto = iphdr->ip_p; ipph.zeros = 0; - return net_checksum_add(sizeof(ipph), (uint8_t *) &ipph); + *cso = sizeof(ipph); + return net_checksum_add(*cso, (uint8_t *) &ipph); +} + +uint32_t +eth_calc_ip6_pseudo_hdr_csum(struct ip6_header *iphdr, + uint16_t csl, + uint8_t l4_proto, + uint32_t *cso) +{ + struct ip6_pseudo_header ipph; + ipph.ip6_src = iphdr->ip6_src; + ipph.ip6_dst = iphdr->ip6_dst; + ipph.len = cpu_to_be16(csl); + ipph.zero[0] = 0; + ipph.zero[1] = 0; + ipph.zero[2] = 0; + ipph.next_hdr = l4_proto; + *cso = sizeof(ipph); + return net_checksum_add(*cso, (uint8_t *)&ipph); } static bool @@ -186,33 +397,152 @@ eth_is_ip6_extension_header_type(uint8_t hdr_type) } } -bool eth_parse_ipv6_hdr(struct iovec *pkt, int pkt_frags, - size_t ip6hdr_off, uint8_t *l4proto, - size_t *full_hdr_len) +static bool +_eth_get_rss_ex_dst_addr(const struct iovec *pkt, int pkt_frags, + size_t rthdr_offset, + struct ip6_ext_hdr *ext_hdr, + struct in6_address *dst_addr) +{ + struct ip6_ext_hdr_routing *rthdr = (struct ip6_ext_hdr_routing *) ext_hdr; + + if ((rthdr->rtype == 2) && + (rthdr->len == sizeof(struct in6_address) / 8) && + (rthdr->segleft == 1)) { + + size_t input_size = iov_size(pkt, pkt_frags); + size_t bytes_read; + + if (input_size < rthdr_offset + sizeof(*ext_hdr)) { + return false; + } + + bytes_read = iov_to_buf(pkt, pkt_frags, + rthdr_offset + sizeof(*ext_hdr), + dst_addr, sizeof(*dst_addr)); + + return bytes_read == sizeof(dst_addr); + } + + return false; +} + +static bool +_eth_get_rss_ex_src_addr(const struct iovec *pkt, int pkt_frags, + size_t dsthdr_offset, + struct ip6_ext_hdr *ext_hdr, + struct in6_address *src_addr) +{ + size_t bytes_left = (ext_hdr->ip6r_len + 1) * 8 - sizeof(*ext_hdr); + struct ip6_option_hdr opthdr; + size_t opt_offset = dsthdr_offset + sizeof(*ext_hdr); + + while (bytes_left > sizeof(opthdr)) { + size_t input_size = iov_size(pkt, pkt_frags); + size_t bytes_read, optlen; + + if (input_size < opt_offset) { + return false; + } + + bytes_read = iov_to_buf(pkt, pkt_frags, opt_offset, + &opthdr, sizeof(opthdr)); + + if (bytes_read != sizeof(opthdr)) { + return false; + } + + optlen = (opthdr.type == IP6_OPT_PAD1) ? 1 + : (opthdr.len + sizeof(opthdr)); + + if (optlen > bytes_left) { + return false; + } + + if (opthdr.type == IP6_OPT_HOME) { + size_t input_size = iov_size(pkt, pkt_frags); + + if (input_size < opt_offset + sizeof(opthdr)) { + return false; + } + + bytes_read = iov_to_buf(pkt, pkt_frags, + opt_offset + sizeof(opthdr), + src_addr, sizeof(*src_addr)); + + return bytes_read == sizeof(src_addr); + } + + opt_offset += optlen; + bytes_left -= optlen; + } + + return false; +} + +bool eth_parse_ipv6_hdr(const struct iovec *pkt, int pkt_frags, + size_t ip6hdr_off, eth_ip6_hdr_info *info) { - struct ip6_header ip6_hdr; struct ip6_ext_hdr ext_hdr; size_t bytes_read; + uint8_t curr_ext_hdr_type; + size_t input_size = iov_size(pkt, pkt_frags); + + info->rss_ex_dst_valid = false; + info->rss_ex_src_valid = false; + info->fragment = false; + + if (input_size < ip6hdr_off) { + return false; + } bytes_read = iov_to_buf(pkt, pkt_frags, ip6hdr_off, - &ip6_hdr, sizeof(ip6_hdr)); - if (bytes_read < sizeof(ip6_hdr)) { + &info->ip6_hdr, sizeof(info->ip6_hdr)); + if (bytes_read < sizeof(info->ip6_hdr)) { return false; } - *full_hdr_len = sizeof(struct ip6_header); + info->full_hdr_len = sizeof(struct ip6_header); + + curr_ext_hdr_type = info->ip6_hdr.ip6_nxt; - if (!eth_is_ip6_extension_header_type(ip6_hdr.ip6_nxt)) { - *l4proto = ip6_hdr.ip6_nxt; + if (!eth_is_ip6_extension_header_type(curr_ext_hdr_type)) { + info->l4proto = info->ip6_hdr.ip6_nxt; + info->has_ext_hdrs = false; return true; } + info->has_ext_hdrs = true; + do { - bytes_read = iov_to_buf(pkt, pkt_frags, ip6hdr_off + *full_hdr_len, + if (input_size < ip6hdr_off + info->full_hdr_len) { + return false; + } + + bytes_read = iov_to_buf(pkt, pkt_frags, ip6hdr_off + info->full_hdr_len, &ext_hdr, sizeof(ext_hdr)); - *full_hdr_len += (ext_hdr.ip6r_len + 1) * IP6_EXT_GRANULARITY; - } while (eth_is_ip6_extension_header_type(ext_hdr.ip6r_nxt)); - *l4proto = ext_hdr.ip6r_nxt; + if (bytes_read < sizeof(ext_hdr)) { + return false; + } + + if (curr_ext_hdr_type == IP6_ROUTING) { + info->rss_ex_dst_valid = + _eth_get_rss_ex_dst_addr(pkt, pkt_frags, + ip6hdr_off + info->full_hdr_len, + &ext_hdr, &info->rss_ex_dst); + } else if (curr_ext_hdr_type == IP6_DESTINATON) { + info->rss_ex_src_valid = + _eth_get_rss_ex_src_addr(pkt, pkt_frags, + ip6hdr_off + info->full_hdr_len, + &ext_hdr, &info->rss_ex_src); + } else if (curr_ext_hdr_type == IP6_FRAGMENT) { + info->fragment = true; + } + + info->full_hdr_len += (ext_hdr.ip6r_len + 1) * IP6_EXT_GRANULARITY; + curr_ext_hdr_type = ext_hdr.ip6r_nxt; + } while (eth_is_ip6_extension_header_type(curr_ext_hdr_type)); + + info->l4proto = ext_hdr.ip6r_nxt; return true; } diff --git a/net/filter-mirror.c b/net/filter-mirror.c index c0c4dc60b6..35df37451d 100644 --- a/net/filter-mirror.c +++ b/net/filter-mirror.c @@ -40,10 +40,7 @@ typedef struct MirrorState { char *outdev; CharDriverState *chr_in; CharDriverState *chr_out; - int state; /* 0 = getting length, 1 = getting data */ - unsigned int index; - unsigned int packet_len; - uint8_t buf[REDIRECTOR_MAX_LEN]; + SocketReadState rs; } MirrorState; static int filter_mirror_send(CharDriverState *chr_out, @@ -108,51 +105,12 @@ static void redirector_chr_read(void *opaque, const uint8_t *buf, int size) { NetFilterState *nf = opaque; MirrorState *s = FILTER_REDIRECTOR(nf); - unsigned int l; - - while (size > 0) { - /* reassemble a packet from the network */ - switch (s->state) { /* 0 = getting length, 1 = getting data */ - case 0: - l = 4 - s->index; - if (l > size) { - l = size; - } - memcpy(s->buf + s->index, buf, l); - buf += l; - size -= l; - s->index += l; - if (s->index == 4) { - /* got length */ - s->packet_len = ntohl(*(uint32_t *)s->buf); - s->index = 0; - s->state = 1; - } - break; - case 1: - l = s->packet_len - s->index; - if (l > size) { - l = size; - } - if (s->index + l <= sizeof(s->buf)) { - memcpy(s->buf + s->index, buf, l); - } else { - error_report("serious error: oversized packet received."); - s->index = s->state = 0; - qemu_chr_add_handlers(s->chr_in, NULL, NULL, NULL, NULL); - return; - } - - s->index += l; - buf += l; - size -= l; - if (s->index >= s->packet_len) { - s->index = 0; - s->state = 0; - redirector_to_filter(nf, s->buf, s->packet_len); - } - break; - } + int ret; + + ret = net_fill_rstate(&s->rs, buf, size); + + if (ret == -1) { + qemu_chr_add_handlers(s->chr_in, NULL, NULL, NULL, NULL); } } @@ -258,6 +216,14 @@ static void filter_mirror_setup(NetFilterState *nf, Error **errp) } } +static void redirector_rs_finalize(SocketReadState *rs) +{ + MirrorState *s = container_of(rs, MirrorState, rs); + NetFilterState *nf = NETFILTER(s); + + redirector_to_filter(nf, rs->buf, rs->packet_len); +} + static void filter_redirector_setup(NetFilterState *nf, Error **errp) { MirrorState *s = FILTER_REDIRECTOR(nf); @@ -274,7 +240,7 @@ static void filter_redirector_setup(NetFilterState *nf, Error **errp) } } - s->state = s->index = 0; + net_socket_rs_init(&s->rs, redirector_rs_finalize); if (s->indev) { s->chr_in = qemu_chr_find(s->indev); diff --git a/net/filter.c b/net/filter.c index 8ac79f3b7b..888fe6dd93 100644 --- a/net/filter.c +++ b/net/filter.c @@ -201,7 +201,7 @@ static void netfilter_complete(UserCreatable *uc, Error **errp) } queues = qemu_find_net_clients_except(nf->netdev_id, ncs, - NET_CLIENT_OPTIONS_KIND_NIC, + NET_CLIENT_DRIVER_NIC, MAX_QUEUE_NUM); if (queues < 1) { error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "netdev", @@ -131,7 +131,7 @@ static void net_hub_port_cleanup(NetClientState *nc) } static NetClientInfo net_hub_port_info = { - .type = NET_CLIENT_OPTIONS_KIND_HUBPORT, + .type = NET_CLIENT_DRIVER_HUBPORT, .size = sizeof(NetHubPort), .can_receive = net_hub_port_can_receive, .receive = net_hub_port_receive, @@ -266,10 +266,10 @@ int net_hub_id_for_client(NetClientState *nc, int *id) { NetHubPort *port; - if (nc->info->type == NET_CLIENT_OPTIONS_KIND_HUBPORT) { + if (nc->info->type == NET_CLIENT_DRIVER_HUBPORT) { port = DO_UPCAST(NetHubPort, nc, nc); } else if (nc->peer != NULL && nc->peer->info->type == - NET_CLIENT_OPTIONS_KIND_HUBPORT) { + NET_CLIENT_DRIVER_HUBPORT) { port = DO_UPCAST(NetHubPort, nc, nc->peer); } else { return -ENOENT; @@ -281,14 +281,14 @@ int net_hub_id_for_client(NetClientState *nc, int *id) return 0; } -int net_init_hubport(const NetClientOptions *opts, const char *name, +int net_init_hubport(const Netdev *netdev, const char *name, NetClientState *peer, Error **errp) { const NetdevHubPortOptions *hubport; - assert(opts->type == NET_CLIENT_OPTIONS_KIND_HUBPORT); + assert(netdev->type == NET_CLIENT_DRIVER_HUBPORT); assert(!peer); - hubport = opts->u.hubport.data; + hubport = &netdev->u.hubport; net_hub_add_port(hubport->hubid, name); return 0; @@ -315,14 +315,14 @@ void net_hub_check_clients(void) } switch (peer->info->type) { - case NET_CLIENT_OPTIONS_KIND_NIC: + case NET_CLIENT_DRIVER_NIC: has_nic = 1; break; - case NET_CLIENT_OPTIONS_KIND_USER: - case NET_CLIENT_OPTIONS_KIND_TAP: - case NET_CLIENT_OPTIONS_KIND_SOCKET: - case NET_CLIENT_OPTIONS_KIND_VDE: - case NET_CLIENT_OPTIONS_KIND_VHOST_USER: + case NET_CLIENT_DRIVER_USER: + case NET_CLIENT_DRIVER_TAP: + case NET_CLIENT_DRIVER_SOCKET: + case NET_CLIENT_DRIVER_VDE: + case NET_CLIENT_DRIVER_VHOST_USER: has_host_dev = 1; break; default: diff --git a/net/l2tpv3.c b/net/l2tpv3.c index 5c668f7376..6745b78990 100644 --- a/net/l2tpv3.c +++ b/net/l2tpv3.c @@ -516,7 +516,7 @@ static void net_l2tpv3_cleanup(NetClientState *nc) } static NetClientInfo net_l2tpv3_info = { - .type = NET_CLIENT_OPTIONS_KIND_L2TPV3, + .type = NET_CLIENT_DRIVER_L2TPV3, .size = sizeof(NetL2TPV3State), .receive = net_l2tpv3_receive_dgram, .receive_iov = net_l2tpv3_receive_dgram_iov, @@ -524,7 +524,7 @@ static NetClientInfo net_l2tpv3_info = { .cleanup = net_l2tpv3_cleanup, }; -int net_init_l2tpv3(const NetClientOptions *opts, +int net_init_l2tpv3(const Netdev *netdev, const char *name, NetClientState *peer, Error **errp) { @@ -545,8 +545,8 @@ int net_init_l2tpv3(const NetClientOptions *opts, s->queue_tail = 0; s->header_mismatch = false; - assert(opts->type == NET_CLIENT_OPTIONS_KIND_L2TPV3); - l2tpv3 = opts->u.l2tpv3.data; + assert(netdev->type == NET_CLIENT_DRIVER_L2TPV3); + l2tpv3 = &netdev->u.l2tpv3; if (l2tpv3->has_ipv6 && l2tpv3->ipv6) { s->ipv6 = l2tpv3->ipv6; @@ -76,8 +76,6 @@ const char *host_net_devices[] = { NULL, }; -int default_net = 1; - /***********************************************************/ /* network device redirectors */ @@ -291,7 +289,7 @@ NICState *qemu_new_nic(NetClientInfo *info, NICState *nic; int i, queues = MAX(1, conf->peers.queues); - assert(info->type == NET_CLIENT_OPTIONS_KIND_NIC); + assert(info->type == NET_CLIENT_DRIVER_NIC); assert(info->size >= sizeof(NICState)); nic = g_malloc0(info->size + sizeof(NetClientState) * queues); @@ -362,13 +360,13 @@ void qemu_del_net_client(NetClientState *nc) int queues, i; NetFilterState *nf, *next; - assert(nc->info->type != NET_CLIENT_OPTIONS_KIND_NIC); + assert(nc->info->type != NET_CLIENT_DRIVER_NIC); /* If the NetClientState belongs to a multiqueue backend, we will change all * other NetClientStates also. */ queues = qemu_find_net_clients_except(nc->name, ncs, - NET_CLIENT_OPTIONS_KIND_NIC, + NET_CLIENT_DRIVER_NIC, MAX_QUEUE_NUM); assert(queues != 0); @@ -377,7 +375,7 @@ void qemu_del_net_client(NetClientState *nc) } /* If there is a peer NIC, delete and cleanup client, but do not free. */ - if (nc->peer && nc->peer->info->type == NET_CLIENT_OPTIONS_KIND_NIC) { + if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_NIC) { NICState *nic = qemu_get_nic(nc->peer); if (nic->peer_deleted) { return; @@ -433,7 +431,7 @@ void qemu_foreach_nic(qemu_nic_foreach func, void *opaque) NetClientState *nc; QTAILQ_FOREACH(nc, &net_clients, next) { - if (nc->info->type == NET_CLIENT_OPTIONS_KIND_NIC) { + if (nc->info->type == NET_CLIENT_DRIVER_NIC) { if (nc->queue_index == 0) { func(qemu_get_nic(nc), opaque); } @@ -605,7 +603,7 @@ void qemu_flush_or_purge_queued_packets(NetClientState *nc, bool purge) { nc->receive_disabled = 0; - if (nc->peer && nc->peer->info->type == NET_CLIENT_OPTIONS_KIND_HUBPORT) { + if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_HUBPORT) { if (net_hub_flush(nc->peer)) { qemu_notify_event(); } @@ -724,7 +722,7 @@ ssize_t qemu_deliver_packet_iov(NetClientState *sender, return 0; } - if (nc->info->receive_iov) { + if (nc->info->receive_iov && !(flags & QEMU_NET_PACKET_FLAG_RAW)) { ret = nc->info->receive_iov(nc, iov, iovcnt); } else { ret = nc_sendv_compat(nc, iov, iovcnt, flags); @@ -779,7 +777,7 @@ NetClientState *qemu_find_netdev(const char *id) NetClientState *nc; QTAILQ_FOREACH(nc, &net_clients, next) { - if (nc->info->type == NET_CLIENT_OPTIONS_KIND_NIC) + if (nc->info->type == NET_CLIENT_DRIVER_NIC) continue; if (!strcmp(nc->name, id)) { return nc; @@ -790,7 +788,7 @@ NetClientState *qemu_find_netdev(const char *id) } int qemu_find_net_clients_except(const char *id, NetClientState **ncs, - NetClientOptionsKind type, int max) + NetClientDriver type, int max) { NetClientState *nc; int ret = 0; @@ -864,15 +862,15 @@ int qemu_find_nic_model(NICInfo *nd, const char * const *models, return -1; } -static int net_init_nic(const NetClientOptions *opts, const char *name, +static int net_init_nic(const Netdev *netdev, const char *name, NetClientState *peer, Error **errp) { int idx; NICInfo *nd; const NetLegacyNicOptions *nic; - assert(opts->type == NET_CLIENT_OPTIONS_KIND_NIC); - nic = opts->u.nic.data; + assert(netdev->type == NET_CLIENT_DRIVER_NIC); + nic = &netdev->u.nic; idx = nic_get_free_idx(); if (idx == -1 || nb_nics >= MAX_NICS) { @@ -932,70 +930,111 @@ static int net_init_nic(const NetClientOptions *opts, const char *name, } -static int (* const net_client_init_fun[NET_CLIENT_OPTIONS_KIND__MAX])( - const NetClientOptions *opts, +static int (* const net_client_init_fun[NET_CLIENT_DRIVER__MAX])( + const Netdev *netdev, const char *name, NetClientState *peer, Error **errp) = { - [NET_CLIENT_OPTIONS_KIND_NIC] = net_init_nic, + [NET_CLIENT_DRIVER_NIC] = net_init_nic, #ifdef CONFIG_SLIRP - [NET_CLIENT_OPTIONS_KIND_USER] = net_init_slirp, + [NET_CLIENT_DRIVER_USER] = net_init_slirp, #endif - [NET_CLIENT_OPTIONS_KIND_TAP] = net_init_tap, - [NET_CLIENT_OPTIONS_KIND_SOCKET] = net_init_socket, + [NET_CLIENT_DRIVER_TAP] = net_init_tap, + [NET_CLIENT_DRIVER_SOCKET] = net_init_socket, #ifdef CONFIG_VDE - [NET_CLIENT_OPTIONS_KIND_VDE] = net_init_vde, + [NET_CLIENT_DRIVER_VDE] = net_init_vde, #endif #ifdef CONFIG_NETMAP - [NET_CLIENT_OPTIONS_KIND_NETMAP] = net_init_netmap, + [NET_CLIENT_DRIVER_NETMAP] = net_init_netmap, #endif - [NET_CLIENT_OPTIONS_KIND_DUMP] = net_init_dump, + [NET_CLIENT_DRIVER_DUMP] = net_init_dump, #ifdef CONFIG_NET_BRIDGE - [NET_CLIENT_OPTIONS_KIND_BRIDGE] = net_init_bridge, + [NET_CLIENT_DRIVER_BRIDGE] = net_init_bridge, #endif - [NET_CLIENT_OPTIONS_KIND_HUBPORT] = net_init_hubport, + [NET_CLIENT_DRIVER_HUBPORT] = net_init_hubport, #ifdef CONFIG_VHOST_NET_USED - [NET_CLIENT_OPTIONS_KIND_VHOST_USER] = net_init_vhost_user, + [NET_CLIENT_DRIVER_VHOST_USER] = net_init_vhost_user, #endif #ifdef CONFIG_L2TPV3 - [NET_CLIENT_OPTIONS_KIND_L2TPV3] = net_init_l2tpv3, + [NET_CLIENT_DRIVER_L2TPV3] = net_init_l2tpv3, #endif }; -static int net_client_init1(const void *object, int is_netdev, Error **errp) +static int net_client_init1(const void *object, bool is_netdev, Error **errp) { - const NetClientOptions *opts; + Netdev legacy = {0}; + const Netdev *netdev; const char *name; NetClientState *peer = NULL; if (is_netdev) { - const Netdev *netdev = object; - opts = netdev->opts; + netdev = object; name = netdev->id; - if (opts->type == NET_CLIENT_OPTIONS_KIND_DUMP || - opts->type == NET_CLIENT_OPTIONS_KIND_NIC || - !net_client_init_fun[opts->type]) { + if (netdev->type == NET_CLIENT_DRIVER_DUMP || + netdev->type == NET_CLIENT_DRIVER_NIC || + !net_client_init_fun[netdev->type]) { error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "type", "a netdev backend type"); return -1; } } else { const NetLegacy *net = object; - opts = net->opts; + const NetLegacyOptions *opts = net->opts; + legacy.id = net->id; + netdev = &legacy; /* missing optional values have been initialized to "all bits zero" */ name = net->has_id ? net->id : net->name; - if (opts->type == NET_CLIENT_OPTIONS_KIND_NONE) { + /* Map the old options to the new flat type */ + switch (opts->type) { + case NET_LEGACY_OPTIONS_KIND_NONE: return 0; /* nothing to do */ - } - if (opts->type == NET_CLIENT_OPTIONS_KIND_HUBPORT) { - error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "type", - "a net type"); - return -1; + case NET_LEGACY_OPTIONS_KIND_NIC: + legacy.type = NET_CLIENT_DRIVER_NIC; + legacy.u.nic = *opts->u.nic.data; + break; + case NET_LEGACY_OPTIONS_KIND_USER: + legacy.type = NET_CLIENT_DRIVER_USER; + legacy.u.user = *opts->u.user.data; + break; + case NET_LEGACY_OPTIONS_KIND_TAP: + legacy.type = NET_CLIENT_DRIVER_TAP; + legacy.u.tap = *opts->u.tap.data; + break; + case NET_LEGACY_OPTIONS_KIND_L2TPV3: + legacy.type = NET_CLIENT_DRIVER_L2TPV3; + legacy.u.l2tpv3 = *opts->u.l2tpv3.data; + break; + case NET_LEGACY_OPTIONS_KIND_SOCKET: + legacy.type = NET_CLIENT_DRIVER_SOCKET; + legacy.u.socket = *opts->u.socket.data; + break; + case NET_LEGACY_OPTIONS_KIND_VDE: + legacy.type = NET_CLIENT_DRIVER_VDE; + legacy.u.vde = *opts->u.vde.data; + break; + case NET_LEGACY_OPTIONS_KIND_DUMP: + legacy.type = NET_CLIENT_DRIVER_DUMP; + legacy.u.dump = *opts->u.dump.data; + break; + case NET_LEGACY_OPTIONS_KIND_BRIDGE: + legacy.type = NET_CLIENT_DRIVER_BRIDGE; + legacy.u.bridge = *opts->u.bridge.data; + break; + case NET_LEGACY_OPTIONS_KIND_NETMAP: + legacy.type = NET_CLIENT_DRIVER_NETMAP; + legacy.u.netmap = *opts->u.netmap.data; + break; + case NET_LEGACY_OPTIONS_KIND_VHOST_USER: + legacy.type = NET_CLIENT_DRIVER_VHOST_USER; + legacy.u.vhost_user = *opts->u.vhost_user.data; + break; + default: + abort(); } - if (!net_client_init_fun[opts->type]) { + if (!net_client_init_fun[netdev->type]) { error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "type", "a net backend type (maybe it is not compiled " "into this binary)"); @@ -1003,17 +1042,17 @@ static int net_client_init1(const void *object, int is_netdev, Error **errp) } /* Do not add to a vlan if it's a nic with a netdev= parameter. */ - if (opts->type != NET_CLIENT_OPTIONS_KIND_NIC || + if (netdev->type != NET_CLIENT_DRIVER_NIC || !opts->u.nic.data->has_netdev) { peer = net_hub_add_port(net->has_vlan ? net->vlan : 0, NULL); } } - if (net_client_init_fun[opts->type](opts, name, peer, errp) < 0) { + if (net_client_init_fun[netdev->type](netdev, name, peer, errp) < 0) { /* FIXME drop when all init functions store an Error */ if (errp && !*errp) { error_setg(errp, QERR_DEVICE_INIT_FAILED, - NetClientOptionsKind_lookup[opts->type]); + NetClientDriver_lookup[netdev->type]); } return -1; } @@ -1021,13 +1060,12 @@ static int net_client_init1(const void *object, int is_netdev, Error **errp) } -int net_client_init(QemuOpts *opts, int is_netdev, Error **errp) +int net_client_init(QemuOpts *opts, bool is_netdev, Error **errp) { void *object = NULL; Error *err = NULL; int ret = -1; - OptsVisitor *ov = opts_visitor_new(opts); - Visitor *v = opts_get_visitor(ov); + Visitor *v = opts_visitor_new(opts); { /* Parse convenience option format ip6-net=fec0::0[/64] */ @@ -1077,7 +1115,7 @@ int net_client_init(QemuOpts *opts, int is_netdev, Error **errp) } error_propagate(errp, err); - opts_visitor_cleanup(ov); + visit_free(v); return ret; } @@ -1115,7 +1153,7 @@ void hmp_host_net_add(Monitor *mon, const QDict *qdict) qemu_opt_set(opts, "type", device, &error_abort); - net_client_init(opts, 0, &local_err); + net_client_init(opts, false, &local_err); if (local_err) { error_report_err(local_err); monitor_printf(mon, "adding host network device %s failed\n", device); @@ -1134,7 +1172,7 @@ void hmp_host_net_remove(Monitor *mon, const QDict *qdict) device, vlan_id); return; } - if (nc->info->type == NET_CLIENT_OPTIONS_KIND_NIC) { + if (nc->info->type == NET_CLIENT_DRIVER_NIC) { error_report("invalid host network device '%s'", device); return; } @@ -1145,7 +1183,7 @@ void hmp_host_net_remove(Monitor *mon, const QDict *qdict) void netdev_add(QemuOpts *opts, Error **errp) { - net_client_init(opts, 1, errp); + net_client_init(opts, true, errp); } void qmp_netdev_add(QDict *qdict, QObject **ret, Error **errp) @@ -1201,7 +1239,7 @@ static void netfilter_print_info(Monitor *mon, NetFilterState *nf) char *str; ObjectProperty *prop; ObjectPropertyIterator iter; - StringOutputVisitor *ov; + Visitor *v; /* generate info str */ object_property_iter_init(&iter, OBJECT(nf)); @@ -1209,11 +1247,10 @@ static void netfilter_print_info(Monitor *mon, NetFilterState *nf) if (!strcmp(prop->name, "type")) { continue; } - ov = string_output_visitor_new(false); - object_property_get(OBJECT(nf), string_output_get_visitor(ov), - prop->name, NULL); - str = string_output_get_string(ov); - string_output_visitor_cleanup(ov); + v = string_output_visitor_new(false, &str); + object_property_get(OBJECT(nf), v, prop->name, NULL); + visit_complete(v, &str); + visit_free(v); monitor_printf(mon, ",%s=%s", prop->name, str); g_free(str); } @@ -1226,7 +1263,7 @@ void print_net_client(Monitor *mon, NetClientState *nc) monitor_printf(mon, "%s: index=%d,type=%s,%s\n", nc->name, nc->queue_index, - NetClientOptionsKind_lookup[nc->info->type], + NetClientDriver_lookup[nc->info->type], nc->info_str); if (!QTAILQ_EMPTY(&nc->filters)) { monitor_printf(mon, "filters:\n"); @@ -1256,7 +1293,7 @@ RxFilterInfoList *qmp_query_rx_filter(bool has_name, const char *name, } /* only query rx-filter information of NIC */ - if (nc->info->type != NET_CLIENT_OPTIONS_KIND_NIC) { + if (nc->info->type != NET_CLIENT_DRIVER_NIC) { if (has_name) { error_setg(errp, "net client(%s) isn't a NIC", name); return NULL; @@ -1302,7 +1339,7 @@ RxFilterInfoList *qmp_query_rx_filter(bool has_name, const char *name, void hmp_info_network(Monitor *mon, const QDict *qdict) { NetClientState *nc, *peer; - NetClientOptionsKind type; + NetClientDriver type; net_hub_info(mon); @@ -1315,10 +1352,10 @@ void hmp_info_network(Monitor *mon, const QDict *qdict) continue; } - if (!peer || type == NET_CLIENT_OPTIONS_KIND_NIC) { + if (!peer || type == NET_CLIENT_DRIVER_NIC) { print_net_client(mon, nc); } /* else it's a netdev connected to a NIC, printed with the NIC */ - if (peer && type == NET_CLIENT_OPTIONS_KIND_NIC) { + if (peer && type == NET_CLIENT_DRIVER_NIC) { monitor_printf(mon, " \\ "); print_net_client(mon, peer); } @@ -1332,7 +1369,7 @@ void qmp_set_link(const char *name, bool up, Error **errp) int queues, i; queues = qemu_find_net_clients_except(name, ncs, - NET_CLIENT_OPTIONS_KIND__MAX, + NET_CLIENT_DRIVER__MAX, MAX_QUEUE_NUM); if (queues == 0) { @@ -1359,7 +1396,7 @@ void qmp_set_link(const char *name, bool up, Error **errp) * multiple clients that can still communicate with each other in * disconnected mode. For now maintain this compatibility. */ - if (nc->peer->info->type == NET_CLIENT_OPTIONS_KIND_NIC) { + if (nc->peer->info->type == NET_CLIENT_DRIVER_NIC) { for (i = 0; i < queues; i++) { ncs[i]->peer->link_down = !up; } @@ -1400,7 +1437,7 @@ LinkInfo *qmp_get_link(const char *name, Error **errp) info = g_malloc0(sizeof (struct LinkInfo)); queues = qemu_find_net_clients_except(name, ncs, - NET_CLIENT_OPTIONS_KIND__MAX, + NET_CLIENT_DRIVER__MAX, MAX_QUEUE_NUM); if (queues == 0) { @@ -1425,7 +1462,7 @@ void net_cleanup(void) */ while (!QTAILQ_EMPTY(&net_clients)) { nc = QTAILQ_FIRST(&net_clients); - if (nc->info->type == NET_CLIENT_OPTIONS_KIND_NIC) { + if (nc->info->type == NET_CLIENT_DRIVER_NIC) { qemu_del_nic(qemu_get_nic(nc)); } else { qemu_del_net_client(nc); @@ -1440,24 +1477,12 @@ void net_check_clients(void) NetClientState *nc; int i; - /* Don't warn about the default network setup that you get if - * no command line -net or -netdev options are specified. There - * are two cases that we would otherwise complain about: - * (1) board doesn't support a NIC but the implicit "-net nic" - * requested one - * (2) CONFIG_SLIRP not set, in which case the implicit "-net nic" - * sets up a nic that isn't connected to anything. - */ - if (default_net) { - return; - } - net_hub_check_clients(); QTAILQ_FOREACH(nc, &net_clients, next) { if (!nc->peer) { fprintf(stderr, "Warning: %s %s has no peer\n", - nc->info->type == NET_CLIENT_OPTIONS_KIND_NIC ? + nc->info->type == NET_CLIENT_DRIVER_NIC ? "nic" : "netdev", nc->name); } } @@ -1481,7 +1506,7 @@ static int net_init_client(void *dummy, QemuOpts *opts, Error **errp) { Error *local_err = NULL; - net_client_init(opts, 0, &local_err); + net_client_init(opts, false, &local_err); if (local_err) { error_report_err(local_err); return -1; @@ -1495,7 +1520,7 @@ static int net_init_netdev(void *dummy, QemuOpts *opts, Error **errp) Error *local_err = NULL; int ret; - ret = net_client_init(opts, 1, &local_err); + ret = net_client_init(opts, true, &local_err); if (local_err) { error_report_err(local_err); return -1; @@ -1508,14 +1533,6 @@ int net_init_clients(void) { QemuOptsList *net = qemu_find_opts("net"); - if (default_net) { - /* if no clients, we use a default config */ - qemu_opts_set(net, NULL, "type", "nic", &error_abort); -#ifdef CONFIG_SLIRP - qemu_opts_set(net, NULL, "type", "user", &error_abort); -#endif - } - net_change_state_entry = qemu_add_vm_change_state_handler(net_vm_change_state_handler, NULL); @@ -1546,7 +1563,6 @@ int net_client_parse(QemuOptsList *opts_list, const char *optarg) return -1; } - default_net = 0; return 0; } @@ -1598,3 +1614,73 @@ QemuOptsList qemu_net_opts = { { /* end of list */ } }, }; + +void net_socket_rs_init(SocketReadState *rs, + SocketReadStateFinalize *finalize) +{ + rs->state = 0; + rs->index = 0; + rs->packet_len = 0; + memset(rs->buf, 0, sizeof(rs->buf)); + rs->finalize = finalize; +} + +/* + * Returns + * 0: success + * -1: error occurs + */ +int net_fill_rstate(SocketReadState *rs, const uint8_t *buf, int size) +{ + unsigned int l; + + while (size > 0) { + /* reassemble a packet from the network */ + switch (rs->state) { /* 0 = getting length, 1 = getting data */ + case 0: + l = 4 - rs->index; + if (l > size) { + l = size; + } + memcpy(rs->buf + rs->index, buf, l); + buf += l; + size -= l; + rs->index += l; + if (rs->index == 4) { + /* got length */ + rs->packet_len = ntohl(*(uint32_t *)rs->buf); + rs->index = 0; + rs->state = 1; + } + break; + case 1: + l = rs->packet_len - rs->index; + if (l > size) { + l = size; + } + if (rs->index + l <= sizeof(rs->buf)) { + memcpy(rs->buf + rs->index, buf, l); + } else { + fprintf(stderr, "serious error: oversized packet received," + "connection terminated.\n"); + rs->index = rs->state = 0; + return -1; + } + + rs->index += l; + buf += l; + size -= l; + if (rs->index >= rs->packet_len) { + rs->index = 0; + rs->state = 0; + if (rs->finalize) { + rs->finalize(rs); + } + } + break; + } + } + + assert(size == 0); + return 0; +} diff --git a/net/netmap.c b/net/netmap.c index 6cc0db5ee1..2d11a8f4be 100644 --- a/net/netmap.c +++ b/net/netmap.c @@ -26,7 +26,6 @@ #include "qemu/osdep.h" #include <sys/ioctl.h> #include <net/if.h> -#include <sys/mman.h> #define NETMAP_WITH_LIBS #include <net/netmap.h> #include <net/netmap_user.h> @@ -401,7 +400,7 @@ static void netmap_set_offload(NetClientState *nc, int csum, int tso4, int tso6, /* NetClientInfo methods */ static NetClientInfo net_netmap_info = { - .type = NET_CLIENT_OPTIONS_KIND_NETMAP, + .type = NET_CLIENT_DRIVER_NETMAP, .size = sizeof(NetmapState), .receive = netmap_receive, .receive_iov = netmap_receive_iov, @@ -419,10 +418,10 @@ static NetClientInfo net_netmap_info = { * * ... -net netmap,ifname="..." */ -int net_init_netmap(const NetClientOptions *opts, +int net_init_netmap(const Netdev *netdev, const char *name, NetClientState *peer, Error **errp) { - const NetdevNetmapOptions *netmap_opts = opts->u.netmap.data; + const NetdevNetmapOptions *netmap_opts = &netdev->u.netmap; struct nm_desc *nmd; NetClientState *nc; Error *err = NULL; diff --git a/net/slirp.c b/net/slirp.c index 3acbc512da..e810ee30a5 100644 --- a/net/slirp.c +++ b/net/slirp.c @@ -38,6 +38,7 @@ #include "slirp/libslirp.h" #include "slirp/ip6.h" #include "sysemu/char.h" +#include "sysemu/sysemu.h" #include "qemu/cutils.h" static int get_str_sep(char *buf, int buf_size, const char **pp, int sep) @@ -76,6 +77,7 @@ typedef struct SlirpState { NetClientState nc; QTAILQ_ENTRY(SlirpState) entry; Slirp *slirp; + Notifier exit_notifier; #ifndef _WIN32 char smb_dir[128]; #endif @@ -118,17 +120,26 @@ static ssize_t net_slirp_receive(NetClientState *nc, const uint8_t *buf, size_t return size; } +static void slirp_smb_exit(Notifier *n, void *data) +{ + SlirpState *s = container_of(n, SlirpState, exit_notifier); + slirp_smb_cleanup(s); +} + static void net_slirp_cleanup(NetClientState *nc) { SlirpState *s = DO_UPCAST(SlirpState, nc, nc); slirp_cleanup(s->slirp); + if (s->exit_notifier.notify) { + qemu_remove_exit_notifier(&s->exit_notifier); + } slirp_smb_cleanup(s); QTAILQ_REMOVE(&slirp_stacks, s, entry); } static NetClientInfo net_slirp_info = { - .type = NET_CLIENT_OPTIONS_KIND_USER, + .type = NET_CLIENT_DRIVER_USER, .size = sizeof(SlirpState), .receive = net_slirp_receive, .cleanup = net_slirp_cleanup, @@ -349,6 +360,8 @@ static int net_slirp_init(NetClientState *peer, const char *model, } #endif + s->exit_notifier.notify = slirp_smb_exit; + qemu_add_exit_notifier(&s->exit_notifier); return 0; error: @@ -867,7 +880,7 @@ static const char **slirp_dnssearch(const StringList *dnsname) return ret; } -int net_init_slirp(const NetClientOptions *opts, const char *name, +int net_init_slirp(const Netdev *netdev, const char *name, NetClientState *peer, Error **errp) { /* FIXME error_setg(errp, ...) on failure */ @@ -878,8 +891,8 @@ int net_init_slirp(const NetClientOptions *opts, const char *name, const char **dnssearch; bool ipv4 = true, ipv6 = true; - assert(opts->type == NET_CLIENT_OPTIONS_KIND_USER); - user = opts->u.user.data; + assert(netdev->type == NET_CLIENT_DRIVER_USER); + user = &netdev->u.user; if ((user->has_ipv6 && user->ipv6 && !user->has_ipv4) || (user->has_ipv4 && !user->ipv4)) { diff --git a/net/socket.c b/net/socket.c index 9fa2cd8d51..3f98eefb34 100644 --- a/net/socket.c +++ b/net/socket.c @@ -38,11 +38,8 @@ typedef struct NetSocketState { NetClientState nc; int listen_fd; int fd; - int state; /* 0 = getting length, 1 = getting data */ - unsigned int index; - unsigned int packet_len; + SocketReadState rs; unsigned int send_index; /* number of bytes sent (only SOCK_STREAM) */ - uint8_t buf[NET_BUFSIZE]; struct sockaddr_in dgram_dst; /* contains inet host and port destination iff connectionless (SOCK_DGRAM) */ IOHandler *send_fn; /* differs between SOCK_STREAM/SOCK_DGRAM */ bool read_poll; /* waiting to receive data? */ @@ -143,11 +140,22 @@ static void net_socket_send_completed(NetClientState *nc, ssize_t len) } } +static void net_socket_rs_finalize(SocketReadState *rs) +{ + NetSocketState *s = container_of(rs, NetSocketState, rs); + + if (qemu_send_packet_async(&s->nc, rs->buf, + rs->packet_len, + net_socket_send_completed) == 0) { + net_socket_read_poll(s, false); + } +} + static void net_socket_send(void *opaque) { NetSocketState *s = opaque; int size; - unsigned l; + int ret; uint8_t buf1[NET_BUFSIZE]; const uint8_t *buf; @@ -166,61 +174,18 @@ static void net_socket_send(void *opaque) closesocket(s->fd); s->fd = -1; - s->state = 0; - s->index = 0; - s->packet_len = 0; + net_socket_rs_init(&s->rs, net_socket_rs_finalize); s->nc.link_down = true; - memset(s->buf, 0, sizeof(s->buf)); memset(s->nc.info_str, 0, sizeof(s->nc.info_str)); return; } buf = buf1; - while (size > 0) { - /* reassemble a packet from the network */ - switch(s->state) { - case 0: - l = 4 - s->index; - if (l > size) - l = size; - memcpy(s->buf + s->index, buf, l); - buf += l; - size -= l; - s->index += l; - if (s->index == 4) { - /* got length */ - s->packet_len = ntohl(*(uint32_t *)s->buf); - s->index = 0; - s->state = 1; - } - break; - case 1: - l = s->packet_len - s->index; - if (l > size) - l = size; - if (s->index + l <= sizeof(s->buf)) { - memcpy(s->buf + s->index, buf, l); - } else { - fprintf(stderr, "serious error: oversized packet received," - "connection terminated.\n"); - s->state = 0; - goto eoc; - } - s->index += l; - buf += l; - size -= l; - if (s->index >= s->packet_len) { - s->index = 0; - s->state = 0; - if (qemu_send_packet_async(&s->nc, s->buf, s->packet_len, - net_socket_send_completed) == 0) { - net_socket_read_poll(s, false); - break; - } - } - break; - } + ret = net_fill_rstate(&s->rs, buf, size); + + if (ret == -1) { + goto eoc; } } @@ -229,7 +194,7 @@ static void net_socket_send_dgram(void *opaque) NetSocketState *s = opaque; int size; - size = qemu_recv(s->fd, s->buf, sizeof(s->buf), 0); + size = qemu_recv(s->fd, s->rs.buf, sizeof(s->rs.buf), 0); if (size < 0) return; if (size == 0) { @@ -238,7 +203,7 @@ static void net_socket_send_dgram(void *opaque) net_socket_write_poll(s, false); return; } - if (qemu_send_packet_async(&s->nc, s->buf, size, + if (qemu_send_packet_async(&s->nc, s->rs.buf, size, net_socket_send_completed) == 0) { net_socket_read_poll(s, false); } @@ -346,7 +311,7 @@ static void net_socket_cleanup(NetClientState *nc) } static NetClientInfo net_dgram_socket_info = { - .type = NET_CLIENT_OPTIONS_KIND_SOCKET, + .type = NET_CLIENT_DRIVER_SOCKET, .size = sizeof(NetSocketState), .receive = net_socket_receive_dgram, .cleanup = net_socket_cleanup, @@ -401,6 +366,7 @@ static NetSocketState *net_socket_fd_init_dgram(NetClientState *peer, s->fd = fd; s->listen_fd = -1; s->send_fn = net_socket_send_dgram; + net_socket_rs_init(&s->rs, net_socket_rs_finalize); net_socket_read_poll(s, true); /* mcast: save bound address as dst */ @@ -429,7 +395,7 @@ static void net_socket_connect(void *opaque) } static NetClientInfo net_socket_info = { - .type = NET_CLIENT_OPTIONS_KIND_SOCKET, + .type = NET_CLIENT_DRIVER_SOCKET, .size = sizeof(NetSocketState), .receive = net_socket_receive, .cleanup = net_socket_cleanup, @@ -451,6 +417,7 @@ static NetSocketState *net_socket_fd_init_stream(NetClientState *peer, s->fd = fd; s->listen_fd = -1; + net_socket_rs_init(&s->rs, net_socket_rs_finalize); /* Disable Nagle algorithm on TCP sockets to reduce latency */ socket_set_nodelay(fd); @@ -697,15 +664,15 @@ static int net_socket_udp_init(NetClientState *peer, return 0; } -int net_init_socket(const NetClientOptions *opts, const char *name, +int net_init_socket(const Netdev *netdev, const char *name, NetClientState *peer, Error **errp) { /* FIXME error_setg(errp, ...) on failure */ Error *err = NULL; const NetdevSocketOptions *sock; - assert(opts->type == NET_CLIENT_OPTIONS_KIND_SOCKET); - sock = opts->u.socket.data; + assert(netdev->type == NET_CLIENT_DRIVER_SOCKET); + sock = &netdev->u.socket; if (sock->has_fd + sock->has_listen + sock->has_connect + sock->has_mcast + sock->has_udp != 1) { diff --git a/net/tap-linux.h b/net/tap-linux.h index 1dc3a9f279..2f36d100fc 100644 --- a/net/tap-linux.h +++ b/net/tap-linux.h @@ -50,4 +50,4 @@ #define TUN_F_TSO_ECN 0x08 /* I can handle TSO with ECN bits. */ #define TUN_F_UFO 0x10 /* I can handle UFO packets */ -#endif /* QEMU_TAP_H */ +#endif /* QEMU_TAP_LINUX_H */ diff --git a/net/tap-win32.c b/net/tap-win32.c index 5bce5ef7f4..69d8357af2 100644 --- a/net/tap-win32.c +++ b/net/tap-win32.c @@ -760,7 +760,7 @@ static void tap_set_vnet_hdr_len(NetClientState *nc, int len) } static NetClientInfo net_tap_win32_info = { - .type = NET_CLIENT_OPTIONS_KIND_TAP, + .type = NET_CLIENT_DRIVER_TAP, .size = sizeof(TAPState), .receive = tap_receive, .cleanup = tap_cleanup, @@ -798,14 +798,14 @@ static int tap_win32_init(NetClientState *peer, const char *model, return 0; } -int net_init_tap(const NetClientOptions *opts, const char *name, +int net_init_tap(const Netdev *netdev, const char *name, NetClientState *peer, Error **errp) { /* FIXME error_setg(errp, ...) on failure */ const NetdevTapOptions *tap; - assert(opts->type == NET_CLIENT_OPTIONS_KIND_TAP); - tap = opts->u.tap.data; + assert(netdev->type == NET_CLIENT_DRIVER_TAP); + tap = &netdev->u.tap; if (!tap->has_ifname) { error_report("tap: no interface name"); @@ -64,6 +64,7 @@ typedef struct TAPState { bool enabled; VHostNetState *vhost_net; unsigned host_vnet_hdr_len; + Notifier exit; } TAPState; static void launch_script(const char *setup_script, const char *ifname, @@ -228,7 +229,7 @@ static bool tap_has_ufo(NetClientState *nc) { TAPState *s = DO_UPCAST(TAPState, nc, nc); - assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP); + assert(nc->info->type == NET_CLIENT_DRIVER_TAP); return s->has_ufo; } @@ -237,7 +238,7 @@ static bool tap_has_vnet_hdr(NetClientState *nc) { TAPState *s = DO_UPCAST(TAPState, nc, nc); - assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP); + assert(nc->info->type == NET_CLIENT_DRIVER_TAP); return !!s->host_vnet_hdr_len; } @@ -246,7 +247,7 @@ static bool tap_has_vnet_hdr_len(NetClientState *nc, int len) { TAPState *s = DO_UPCAST(TAPState, nc, nc); - assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP); + assert(nc->info->type == NET_CLIENT_DRIVER_TAP); return !!tap_probe_vnet_hdr_len(s->fd, len); } @@ -255,7 +256,7 @@ static void tap_set_vnet_hdr_len(NetClientState *nc, int len) { TAPState *s = DO_UPCAST(TAPState, nc, nc); - assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP); + assert(nc->info->type == NET_CLIENT_DRIVER_TAP); assert(len == sizeof(struct virtio_net_hdr_mrg_rxbuf) || len == sizeof(struct virtio_net_hdr)); @@ -267,7 +268,7 @@ static void tap_using_vnet_hdr(NetClientState *nc, bool using_vnet_hdr) { TAPState *s = DO_UPCAST(TAPState, nc, nc); - assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP); + assert(nc->info->type == NET_CLIENT_DRIVER_TAP); assert(!!s->host_vnet_hdr_len == using_vnet_hdr); s->using_vnet_hdr = using_vnet_hdr; @@ -298,24 +299,33 @@ static void tap_set_offload(NetClientState *nc, int csum, int tso4, tap_fd_set_offload(s->fd, csum, tso4, tso6, ecn, ufo); } +static void tap_exit_notify(Notifier *notifier, void *data) +{ + TAPState *s = container_of(notifier, TAPState, exit); + Error *err = NULL; + + if (s->down_script[0]) { + launch_script(s->down_script, s->down_script_arg, s->fd, &err); + if (err) { + error_report_err(err); + } + } +} + static void tap_cleanup(NetClientState *nc) { TAPState *s = DO_UPCAST(TAPState, nc, nc); - Error *err = NULL; if (s->vhost_net) { vhost_net_cleanup(s->vhost_net); + g_free(s->vhost_net); s->vhost_net = NULL; } qemu_purge_queued_packets(nc); - if (s->down_script[0]) { - launch_script(s->down_script, s->down_script_arg, s->fd, &err); - if (err) { - error_report_err(err); - } - } + tap_exit_notify(&s->exit, NULL); + qemu_remove_exit_notifier(&s->exit); tap_read_poll(s, false); tap_write_poll(s, false); @@ -333,14 +343,14 @@ static void tap_poll(NetClientState *nc, bool enable) int tap_get_fd(NetClientState *nc) { TAPState *s = DO_UPCAST(TAPState, nc, nc); - assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP); + assert(nc->info->type == NET_CLIENT_DRIVER_TAP); return s->fd; } /* fd support */ static NetClientInfo net_tap_info = { - .type = NET_CLIENT_OPTIONS_KIND_TAP, + .type = NET_CLIENT_DRIVER_TAP, .size = sizeof(TAPState), .receive = tap_receive, .receive_raw = tap_receive_raw, @@ -385,6 +395,10 @@ static TAPState *net_tap_fd_init(NetClientState *peer, } tap_read_poll(s, true); s->vhost_net = NULL; + + s->exit.notify = tap_exit_notify; + qemu_add_exit_notifier(&s->exit); + return s; } @@ -620,7 +634,7 @@ static int net_bridge_run_helper(const char *helper, const char *bridge, } } -int net_init_bridge(const NetClientOptions *opts, const char *name, +int net_init_bridge(const Netdev *netdev, const char *name, NetClientState *peer, Error **errp) { const NetdevBridgeOptions *bridge; @@ -628,8 +642,8 @@ int net_init_bridge(const NetClientOptions *opts, const char *name, TAPState *s; int fd, vnet_hdr; - assert(opts->type == NET_CLIENT_OPTIONS_KIND_BRIDGE); - bridge = opts->u.bridge.data; + assert(netdev->type == NET_CLIENT_DRIVER_BRIDGE); + bridge = &netdev->u.bridge; helper = bridge->has_helper ? bridge->helper : DEFAULT_BRIDGE_HELPER; br = bridge->has_br ? bridge->br : DEFAULT_BRIDGE_INTERFACE; @@ -725,6 +739,11 @@ static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer, options.backend_type = VHOST_BACKEND_TYPE_KERNEL; options.net_backend = &s->nc; + if (tap->has_poll_us) { + options.busyloop_timeout = tap->poll_us; + } else { + options.busyloop_timeout = 0; + } if (vhostfdname) { vhostfd = monitor_fd_param(cur_mon, vhostfdname, &err); @@ -749,7 +768,7 @@ static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer, return; } } else if (vhostfdname) { - error_setg(errp, "vhostfd= is not valid without vhost"); + error_setg(errp, "vhostfd(s)= is not valid without vhost"); } } @@ -779,7 +798,7 @@ static int get_fds(char *str, char *fds[], int max) return i; } -int net_init_tap(const NetClientOptions *opts, const char *name, +int net_init_tap(const Netdev *netdev, const char *name, NetClientState *peer, Error **errp) { const NetdevTapOptions *tap; @@ -791,8 +810,8 @@ int net_init_tap(const NetClientOptions *opts, const char *name, const char *vhostfdname; char ifname[128]; - assert(opts->type == NET_CLIENT_OPTIONS_KIND_TAP); - tap = opts->u.tap.data; + assert(netdev->type == NET_CLIENT_DRIVER_TAP); + tap = &netdev->u.tap; queues = tap->has_queues ? tap->queues : 1; vhostfdname = tap->has_vhostfd ? tap->vhostfd : NULL; /* QEMU vlans does not support multiqueue tap, in this case peer is set. @@ -830,8 +849,8 @@ int net_init_tap(const NetClientOptions *opts, const char *name, return -1; } } else if (tap->has_fds) { - char *fds[MAX_TAP_QUEUES]; - char *vhost_fds[MAX_TAP_QUEUES]; + char **fds = g_new0(char *, MAX_TAP_QUEUES); + char **vhost_fds = g_new0(char *, MAX_TAP_QUEUES); int nfds, nvhosts; if (tap->has_ifname || tap->has_script || tap->has_downscript || @@ -849,7 +868,7 @@ int net_init_tap(const NetClientOptions *opts, const char *name, if (nfds != nvhosts) { error_setg(errp, "The number of fds passed does not match " "the number of vhostfds passed"); - return -1; + goto free_fail; } } @@ -857,7 +876,7 @@ int net_init_tap(const NetClientOptions *opts, const char *name, fd = monitor_fd_param(cur_mon, fds[i], &err); if (fd == -1) { error_propagate(errp, err); - return -1; + goto free_fail; } fcntl(fd, F_SETFL, O_NONBLOCK); @@ -867,7 +886,7 @@ int net_init_tap(const NetClientOptions *opts, const char *name, } else if (vnet_hdr != tap_probe_vnet_hdr(fd)) { error_setg(errp, "vnet_hdr not consistent across given tap fds"); - return -1; + goto free_fail; } net_init_tap_one(tap, peer, "tap", name, ifname, @@ -876,9 +895,21 @@ int net_init_tap(const NetClientOptions *opts, const char *name, vnet_hdr, fd, &err); if (err) { error_propagate(errp, err); - return -1; + goto free_fail; } } + g_free(fds); + g_free(vhost_fds); + return 0; + +free_fail: + for (i = 0; i < nfds; i++) { + g_free(fds[i]); + g_free(vhost_fds[i]); + } + g_free(fds); + g_free(vhost_fds); + return -1; } else if (tap->has_helper) { if (tap->has_ifname || tap->has_script || tap->has_downscript || tap->has_vnet_hdr || tap->has_queues || tap->has_vhostfds) { @@ -960,7 +991,7 @@ int net_init_tap(const NetClientOptions *opts, const char *name, VHostNetState *tap_get_vhost_net(NetClientState *nc) { TAPState *s = DO_UPCAST(TAPState, nc, nc); - assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP); + assert(nc->info->type == NET_CLIENT_DRIVER_TAP); return s->vhost_net; } diff --git a/net/tap_int.h b/net/tap_int.h index 2378021c45..ae6888f74a 100644 --- a/net/tap_int.h +++ b/net/tap_int.h @@ -23,8 +23,8 @@ * THE SOFTWARE. */ -#ifndef QEMU_TAP_H -#define QEMU_TAP_H +#ifndef NET_TAP_INT_H +#define NET_TAP_INT_H #include "qemu-common.h" #include "qapi-types.h" @@ -46,4 +46,4 @@ int tap_fd_enable(int fd); int tap_fd_disable(int fd); int tap_fd_get_ifname(int fd, char *ifname); -#endif /* QEMU_TAP_H */ +#endif /* NET_TAP_INT_H */ diff --git a/net/trace-events b/net/trace-events new file mode 100644 index 0000000000..65c46a48fb --- /dev/null +++ b/net/trace-events @@ -0,0 +1,4 @@ +# See docs/tracing.txt for syntax documentation. + +# net/vhost-user.c +vhost_user_event(const char *chr, int event) "chr: %s got event: %d" @@ -68,7 +68,7 @@ static void vde_cleanup(NetClientState *nc) } static NetClientInfo net_vde_info = { - .type = NET_CLIENT_OPTIONS_KIND_VDE, + .type = NET_CLIENT_DRIVER_VDE, .size = sizeof(VDEState), .receive = vde_receive, .cleanup = vde_cleanup, @@ -109,14 +109,14 @@ static int net_vde_init(NetClientState *peer, const char *model, return 0; } -int net_init_vde(const NetClientOptions *opts, const char *name, +int net_init_vde(const Netdev *netdev, const char *name, NetClientState *peer, Error **errp) { /* FIXME error_setg(errp, ...) on failure */ const NetdevVdeOptions *vde; - assert(opts->type == NET_CLIENT_OPTIONS_KIND_VDE); - vde = opts->u.vde.data; + assert(netdev->type == NET_CLIENT_DRIVER_VDE); + vde = &netdev->u.vde; /* missing optional values have been initialized to "all bits zero" */ if (net_vde_init(peer, "vde", name, vde->sock, vde->port, vde->group, diff --git a/net/vhost-user.c b/net/vhost-user.c index 1b9e73a2dc..b0595f8781 100644 --- a/net/vhost-user.c +++ b/net/vhost-user.c @@ -22,6 +22,9 @@ typedef struct VhostUserState { NetClientState nc; CharDriverState *chr; VHostNetState *vhost_net; + guint watch; + uint64_t acked_features; + bool started; } VhostUserState; typedef struct VhostUserChardevProps { @@ -32,13 +35,15 @@ typedef struct VhostUserChardevProps { VHostNetState *vhost_user_get_vhost_net(NetClientState *nc) { VhostUserState *s = DO_UPCAST(VhostUserState, nc, nc); - assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_VHOST_USER); + assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_USER); return s->vhost_net; } -static int vhost_user_running(VhostUserState *s) +uint64_t vhost_user_get_acked_features(NetClientState *nc) { - return (s->vhost_net) ? 1 : 0; + VhostUserState *s = DO_UPCAST(VhostUserState, nc, nc); + assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_USER); + return s->acked_features; } static void vhost_user_stop(int queues, NetClientState *ncs[]) @@ -47,16 +52,17 @@ static void vhost_user_stop(int queues, NetClientState *ncs[]) int i; for (i = 0; i < queues; i++) { - assert (ncs[i]->info->type == NET_CLIENT_OPTIONS_KIND_VHOST_USER); + assert(ncs[i]->info->type == NET_CLIENT_DRIVER_VHOST_USER); s = DO_UPCAST(VhostUserState, nc, ncs[i]); - if (!vhost_user_running(s)) { - continue; - } if (s->vhost_net) { + /* save acked features */ + uint64_t features = vhost_net_get_acked_features(s->vhost_net); + if (features) { + s->acked_features = features; + } vhost_net_cleanup(s->vhost_net); - s->vhost_net = NULL; } } } @@ -64,6 +70,7 @@ static void vhost_user_stop(int queues, NetClientState *ncs[]) static int vhost_user_start(int queues, NetClientState *ncs[]) { VhostNetOptions options; + struct vhost_net *net = NULL; VhostUserState *s; int max_queues; int i; @@ -71,35 +78,42 @@ static int vhost_user_start(int queues, NetClientState *ncs[]) options.backend_type = VHOST_BACKEND_TYPE_USER; for (i = 0; i < queues; i++) { - assert (ncs[i]->info->type == NET_CLIENT_OPTIONS_KIND_VHOST_USER); + assert(ncs[i]->info->type == NET_CLIENT_DRIVER_VHOST_USER); s = DO_UPCAST(VhostUserState, nc, ncs[i]); - if (vhost_user_running(s)) { - continue; - } options.net_backend = ncs[i]; options.opaque = s->chr; - s->vhost_net = vhost_net_init(&options); - if (!s->vhost_net) { + options.busyloop_timeout = 0; + net = vhost_net_init(&options); + if (!net) { error_report("failed to init vhost_net for queue %d", i); goto err; } if (i == 0) { - max_queues = vhost_net_get_max_queues(s->vhost_net); + max_queues = vhost_net_get_max_queues(net); if (queues > max_queues) { error_report("you are asking more queues than supported: %d", max_queues); goto err; } } + + if (s->vhost_net) { + vhost_net_cleanup(s->vhost_net); + g_free(s->vhost_net); + } + s->vhost_net = net; } return 0; err: - vhost_user_stop(i + 1, ncs); + if (net) { + vhost_net_cleanup(net); + } + vhost_user_stop(i, ncs); return -1; } @@ -138,28 +152,34 @@ static void vhost_user_cleanup(NetClientState *nc) if (s->vhost_net) { vhost_net_cleanup(s->vhost_net); + g_free(s->vhost_net); s->vhost_net = NULL; } + if (s->chr) { + qemu_chr_add_handlers(s->chr, NULL, NULL, NULL, NULL); + qemu_chr_fe_release(s->chr); + s->chr = NULL; + } qemu_purge_queued_packets(nc); } static bool vhost_user_has_vnet_hdr(NetClientState *nc) { - assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_VHOST_USER); + assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_USER); return true; } static bool vhost_user_has_ufo(NetClientState *nc) { - assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_VHOST_USER); + assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_USER); return true; } static NetClientInfo net_vhost_user_info = { - .type = NET_CLIENT_OPTIONS_KIND_VHOST_USER, + .type = NET_CLIENT_DRIVER_VHOST_USER, .size = sizeof(VhostUserState), .receive = vhost_user_receive, .cleanup = vhost_user_cleanup, @@ -167,6 +187,16 @@ static NetClientInfo net_vhost_user_info = { .has_ufo = vhost_user_has_ufo, }; +static gboolean net_vhost_user_watch(GIOChannel *chan, GIOCondition cond, + void *opaque) +{ + VhostUserState *s = opaque; + + qemu_chr_disconnect(s->chr); + + return FALSE; +} + static void net_vhost_user_event(void *opaque, int event) { const char *name = opaque; @@ -176,7 +206,7 @@ static void net_vhost_user_event(void *opaque, int event) int queues; queues = qemu_find_net_clients_except(name, ncs, - NET_CLIENT_OPTIONS_KIND_NIC, + NET_CLIENT_DRIVER_NIC, MAX_QUEUE_NUM); assert(queues < MAX_QUEUE_NUM); @@ -184,14 +214,20 @@ static void net_vhost_user_event(void *opaque, int event) trace_vhost_user_event(s->chr->label, event); switch (event) { case CHR_EVENT_OPENED: + s->watch = qemu_chr_fe_add_watch(s->chr, G_IO_HUP, + net_vhost_user_watch, s); if (vhost_user_start(queues, ncs) < 0) { - exit(1); + qemu_chr_disconnect(s->chr); + return; } qmp_set_link(name, true, &err); + s->started = true; break; case CHR_EVENT_CLOSED: qmp_set_link(name, false, &err); vhost_user_stop(queues, ncs); + g_source_remove(s->watch); + s->watch = 0; break; } @@ -204,7 +240,7 @@ static int net_vhost_user_init(NetClientState *peer, const char *device, const char *name, CharDriverState *chr, int queues) { - NetClientState *nc; + NetClientState *nc, *nc0 = NULL; VhostUserState *s; int i; @@ -213,6 +249,9 @@ static int net_vhost_user_init(NetClientState *peer, const char *device, for (i = 0; i < queues; i++) { nc = qemu_new_net_client(&net_vhost_user_info, peer, device, name); + if (!nc0) { + nc0 = nc; + } snprintf(nc->info_str, sizeof(nc->info_str), "vhost-user%d to %s", i, chr->label); @@ -223,7 +262,18 @@ static int net_vhost_user_init(NetClientState *peer, const char *device, s->chr = chr; } - qemu_chr_add_handlers(chr, NULL, NULL, net_vhost_user_event, nc[0].name); + s = DO_UPCAST(VhostUserState, nc, nc0); + do { + Error *err = NULL; + if (qemu_chr_wait_connected(chr, &err) < 0) { + error_report_err(err); + return -1; + } + qemu_chr_add_handlers(chr, NULL, NULL, + net_vhost_user_event, nc0->name); + } while (!s->started); + + assert(s->vhost_net); return 0; } @@ -280,7 +330,6 @@ static int net_vhost_check_net(void *opaque, QemuOpts *opts, Error **errp) { const char *name = opaque; const char *driver, *netdev; - const char virtio_name[] = "virtio-net-"; driver = qemu_opt_get(opts, "driver"); netdev = qemu_opt_get(opts, "netdev"); @@ -290,7 +339,7 @@ static int net_vhost_check_net(void *opaque, QemuOpts *opts, Error **errp) } if (strcmp(netdev, name) == 0 && - strncmp(driver, virtio_name, strlen(virtio_name)) != 0) { + !g_str_has_prefix(driver, "virtio-net-")) { error_setg(errp, "vhost-user requires frontend driver virtio-net-*"); return -1; } @@ -298,15 +347,15 @@ static int net_vhost_check_net(void *opaque, QemuOpts *opts, Error **errp) return 0; } -int net_init_vhost_user(const NetClientOptions *opts, const char *name, +int net_init_vhost_user(const Netdev *netdev, const char *name, NetClientState *peer, Error **errp) { int queues; const NetdevVhostUserOptions *vhost_user_opts; CharDriverState *chr; - assert(opts->type == NET_CLIENT_OPTIONS_KIND_VHOST_USER); - vhost_user_opts = opts->u.vhost_user.data; + assert(netdev->type == NET_CLIENT_DRIVER_VHOST_USER); + vhost_user_opts = &netdev->u.vhost_user; chr = net_vhost_parse_chardev(vhost_user_opts, errp); if (!chr) { |