From ffcdedb667b6db8ee31c7efa76a3ec59d9c3b0fc Mon Sep 17 00:00:00 2001 From: "nikolay@redhat.com" Date: Sat, 6 Apr 2013 00:54:37 +0000 Subject: Revert "bonding: remove sysfs before removing devices" This reverts commit 4de79c737b200492195ebc54a887075327e1ec1d. This patch introduces a new bug which causes access to freed memory. In bond_uninit: list_del(&bond->bond_list); bond_list is linked in bond_net's dev_list which is freed by unregister_pernet_subsys. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/bonding') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 171b10f167a..a51241b2e62 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -4902,8 +4902,8 @@ static void __exit bonding_exit(void) bond_destroy_debugfs(); - unregister_pernet_subsys(&bond_net_ops); rtnl_link_unregister(&bond_link_ops); + unregister_pernet_subsys(&bond_net_ops); #ifdef CONFIG_NET_POLL_CONTROLLER /* -- cgit v1.2.3 From 69b0216ac255f523556fa3d4ff030d857eaaa37f Mon Sep 17 00:00:00 2001 From: "nikolay@redhat.com" Date: Sat, 6 Apr 2013 00:54:38 +0000 Subject: bonding: fix bonding_masters race condition in bond unloading While the bonding module is unloading, it is considered that after rtnl_link_unregister all bond devices are destroyed but since no synchronization mechanism exists, a new bond device can be created via bonding_masters before unregister_pernet_subsys which would lead to multiple problems (e.g. NULL pointer dereference, wrong RIP, list corruption). This patch fixes the issue by removing any bond devices left in the netns after bonding_masters is removed from sysfs. Signed-off-by: Nikolay Aleksandrov Acked-by: Veaceslav Falico Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'drivers/net/bonding') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index a51241b2e62..07401a3e256 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -4846,9 +4846,18 @@ static int __net_init bond_net_init(struct net *net) static void __net_exit bond_net_exit(struct net *net) { struct bond_net *bn = net_generic(net, bond_net_id); + struct bonding *bond, *tmp_bond; + LIST_HEAD(list); bond_destroy_sysfs(bn); bond_destroy_proc_dir(bn); + + /* Kill off any bonds created after unregistering bond rtnl ops */ + rtnl_lock(); + list_for_each_entry_safe(bond, tmp_bond, &bn->dev_list, bond_list) + unregister_netdevice_queue(bond->dev, &list); + unregister_netdevice_many(&list); + rtnl_unlock(); } static struct pernet_operations bond_net_ops = { -- cgit v1.2.3 From 6101391d4a381cc0c661d8765235b3cad7da09e5 Mon Sep 17 00:00:00 2001 From: "nikolay@redhat.com" Date: Thu, 11 Apr 2013 09:18:55 +0000 Subject: bonding: fix netdev event NULL pointer dereference In commit 471cb5a33dcbd7c529684a2ac7ba4451414ee4a7 ("bonding: remove usage of dev->master") a bug was introduced which causes a NULL pointer dereference. If a bond device is in mode 6 (ALB) and a slave is added it will dereference a NULL pointer in bond_slave_netdev_event(). This is because in bond_enslave we have bond_alb_init_slave() which changes the MAC address of the slave and causes a NETDEV_CHANGEADDR. Then we have in bond_slave_netdev_event(): struct slave *slave = bond_slave_get_rtnl(slave_dev); struct bonding *bond = slave->bond; bond_slave_get_rtnl() dereferences slave_dev->rx_handler_data which at that time is NULL since netdev_rx_handler_register() is called later. This is fixed by checking if slave is NULL before dereferencing it. v2: Comment style changed. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'drivers/net/bonding') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 07401a3e256..e074c6b5551 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -3168,11 +3168,20 @@ static int bond_slave_netdev_event(unsigned long event, struct net_device *slave_dev) { struct slave *slave = bond_slave_get_rtnl(slave_dev); - struct bonding *bond = slave->bond; - struct net_device *bond_dev = slave->bond->dev; + struct bonding *bond; + struct net_device *bond_dev; u32 old_speed; u8 old_duplex; + /* A netdev event can be generated while enslaving a device + * before netdev_rx_handler_register is called in which case + * slave will be NULL + */ + if (!slave) + return NOTIFY_DONE; + bond_dev = slave->bond->dev; + bond = slave->bond; + switch (event) { case NETDEV_UNREGISTER: if (bond->setup_by_slave) -- cgit v1.2.3 From b6a5a7b9a528a8b4c8bec940b607c5dd9102b8cc Mon Sep 17 00:00:00 2001 From: "nikolay@redhat.com" Date: Thu, 11 Apr 2013 09:18:56 +0000 Subject: bonding: IFF_BONDING is not stripped on enslave failure While enslaving a new device and after IFF_BONDING flag is set, in case of failure it is not stripped from the device's priv_flags while cleaning up, which could lead to other problems. Cleaning at err_close because the flag is set after dev_open(). v2: no change Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/net/bonding') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index e074c6b5551..a61a760484f 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1906,6 +1906,7 @@ err_detach: write_unlock_bh(&bond->lock); err_close: + slave_dev->priv_flags &= ~IFF_BONDING; dev_close(slave_dev); err_unset_master: -- cgit v1.2.3 From 4394542ca4ec9f28c3c8405063d200b1e7c347d7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 15 Apr 2013 17:03:24 +0000 Subject: bonding: fix l23 and l34 load balancing in forwarding path Since commit 6b923cb7188d46 (bonding: support for IPv6 transmit hashing) bonding doesn't properly hash traffic in forwarding setups. Vitaly V. Bursov diagnosed that skb_network_header_len() returned 0 in this case. More generally, the transport header might not be in the skb head. Use pskb_may_pull() & skb_header_pointer() to get it right, and use proto_ports_offset() in bond_xmit_hash_policy_l34() to get support for more protocols than TCP and UDP. Reported-by: Vitaly V. Bursov Signed-off-by: Eric Dumazet Cc: Jay Vosburgh Cc: Andy Gospodarek Cc: John Eaglesham Tested-by: Vitaly V. Bursov Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 55 ++++++++++++++++++++++------------------- 1 file changed, 30 insertions(+), 25 deletions(-) (limited to 'drivers/net/bonding') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index a61a760484f..dea8ce20fea 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -3296,20 +3296,22 @@ static int bond_xmit_hash_policy_l2(struct sk_buff *skb, int count) */ static int bond_xmit_hash_policy_l23(struct sk_buff *skb, int count) { - struct ethhdr *data = (struct ethhdr *)skb->data; - struct iphdr *iph; - struct ipv6hdr *ipv6h; + const struct ethhdr *data; + const struct iphdr *iph; + const struct ipv6hdr *ipv6h; u32 v6hash; - __be32 *s, *d; + const __be32 *s, *d; if (skb->protocol == htons(ETH_P_IP) && - skb_network_header_len(skb) >= sizeof(*iph)) { + pskb_network_may_pull(skb, sizeof(*iph))) { iph = ip_hdr(skb); + data = (struct ethhdr *)skb->data; return ((ntohl(iph->saddr ^ iph->daddr) & 0xffff) ^ (data->h_dest[5] ^ data->h_source[5])) % count; } else if (skb->protocol == htons(ETH_P_IPV6) && - skb_network_header_len(skb) >= sizeof(*ipv6h)) { + pskb_network_may_pull(skb, sizeof(*ipv6h))) { ipv6h = ipv6_hdr(skb); + data = (struct ethhdr *)skb->data; s = &ipv6h->saddr.s6_addr32[0]; d = &ipv6h->daddr.s6_addr32[0]; v6hash = (s[1] ^ d[1]) ^ (s[2] ^ d[2]) ^ (s[3] ^ d[3]); @@ -3328,33 +3330,36 @@ static int bond_xmit_hash_policy_l23(struct sk_buff *skb, int count) static int bond_xmit_hash_policy_l34(struct sk_buff *skb, int count) { u32 layer4_xor = 0; - struct iphdr *iph; - struct ipv6hdr *ipv6h; - __be32 *s, *d; - __be16 *layer4hdr; + const struct iphdr *iph; + const struct ipv6hdr *ipv6h; + const __be32 *s, *d; + const __be16 *l4 = NULL; + __be16 _l4[2]; + int noff = skb_network_offset(skb); + int poff; if (skb->protocol == htons(ETH_P_IP) && - skb_network_header_len(skb) >= sizeof(*iph)) { + pskb_may_pull(skb, noff + sizeof(*iph))) { iph = ip_hdr(skb); - if (!ip_is_fragment(iph) && - (iph->protocol == IPPROTO_TCP || - iph->protocol == IPPROTO_UDP) && - (skb_headlen(skb) - skb_network_offset(skb) >= - iph->ihl * sizeof(u32) + sizeof(*layer4hdr) * 2)) { - layer4hdr = (__be16 *)((u32 *)iph + iph->ihl); - layer4_xor = ntohs(*layer4hdr ^ *(layer4hdr + 1)); + poff = proto_ports_offset(iph->protocol); + + if (!ip_is_fragment(iph) && poff >= 0) { + l4 = skb_header_pointer(skb, noff + (iph->ihl << 2) + poff, + sizeof(_l4), &_l4); + if (l4) + layer4_xor = ntohs(l4[0] ^ l4[1]); } return (layer4_xor ^ ((ntohl(iph->saddr ^ iph->daddr)) & 0xffff)) % count; } else if (skb->protocol == htons(ETH_P_IPV6) && - skb_network_header_len(skb) >= sizeof(*ipv6h)) { + pskb_may_pull(skb, noff + sizeof(*ipv6h))) { ipv6h = ipv6_hdr(skb); - if ((ipv6h->nexthdr == IPPROTO_TCP || - ipv6h->nexthdr == IPPROTO_UDP) && - (skb_headlen(skb) - skb_network_offset(skb) >= - sizeof(*ipv6h) + sizeof(*layer4hdr) * 2)) { - layer4hdr = (__be16 *)(ipv6h + 1); - layer4_xor = ntohs(*layer4hdr ^ *(layer4hdr + 1)); + poff = proto_ports_offset(ipv6h->nexthdr); + if (poff >= 0) { + l4 = skb_header_pointer(skb, noff + sizeof(*ipv6h) + poff, + sizeof(_l4), &_l4); + if (l4) + layer4_xor = ntohs(l4[0] ^ l4[1]); } s = &ipv6h->saddr.s6_addr32[0]; d = &ipv6h->daddr.s6_addr32[0]; -- cgit v1.2.3 From 25e40305d4f4399bc8ecf9c9b7cf43493bb40bbd Mon Sep 17 00:00:00 2001 From: "nikolay@redhat.com" Date: Thu, 18 Apr 2013 07:33:34 +0000 Subject: bonding: mc addresses don't get deleted on enslave failure Add bond_mc_list_flush() after err_detach as that's the first error path after the addresses are added. The main issue is the mc addresses' refcount which only gets bumped up. v2: update log message and don't move code unnecessarily Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/net/bonding') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index dea8ce20fea..4cecb80df85 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1901,6 +1901,11 @@ err_dest_symlinks: bond_destroy_slave_symlinks(bond_dev, slave_dev); err_detach: + if (!USES_PRIMARY(bond->params.mode)) { + netif_addr_lock_bh(bond_dev); + bond_mc_list_flush(bond_dev, slave_dev); + netif_addr_unlock_bh(bond_dev); + } write_lock_bh(&bond->lock); bond_detach_slave(bond, new_slave); write_unlock_bh(&bond->lock); -- cgit v1.2.3 From a506e7b479e1215c230e4b87fedc246cf748537f Mon Sep 17 00:00:00 2001 From: "nikolay@redhat.com" Date: Thu, 18 Apr 2013 07:33:35 +0000 Subject: bonding: vlans don't get deleted on enslave failure The main problem is with vid refcount which only gets bumped up. Delete the vlans after err_detach as that's the first error path after the vlans are added. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/net/bonding') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 4cecb80df85..dd67c49070d 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1906,6 +1906,7 @@ err_detach: bond_mc_list_flush(bond_dev, slave_dev); netif_addr_unlock_bh(bond_dev); } + bond_del_vlans_from_slave(bond, slave_dev); write_lock_bh(&bond->lock); bond_detach_slave(bond, new_slave); write_unlock_bh(&bond->lock); -- cgit v1.2.3 From 3c5913b53fefc9d9e15a2d0f93042766658d9f3f Mon Sep 17 00:00:00 2001 From: "nikolay@redhat.com" Date: Thu, 18 Apr 2013 07:33:36 +0000 Subject: bonding: primary_slave & curr_active_slave are not cleaned on enslave failure On enslave failure primary_slave can point to new_slave which is to be freed, and the same applies to curr_active_slave. So check if this is the case and clean up properly after err_detach because that's the first error code path after they're set. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'drivers/net/bonding') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index dd67c49070d..1137d5eac45 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1909,7 +1909,17 @@ err_detach: bond_del_vlans_from_slave(bond, slave_dev); write_lock_bh(&bond->lock); bond_detach_slave(bond, new_slave); + if (bond->primary_slave == new_slave) + bond->primary_slave = NULL; write_unlock_bh(&bond->lock); + if (bond->curr_active_slave == new_slave) { + read_lock(&bond->lock); + write_lock_bh(&bond->curr_slave_lock); + bond_change_active_slave(bond, NULL); + bond_select_active_slave(bond); + write_unlock_bh(&bond->curr_slave_lock); + read_unlock(&bond->lock); + } err_close: slave_dev->priv_flags &= ~IFF_BONDING; -- cgit v1.2.3 From fc7a72ac86e2956dd405b0c604fea45a2702f567 Mon Sep 17 00:00:00 2001 From: "nikolay@redhat.com" Date: Thu, 18 Apr 2013 07:33:37 +0000 Subject: bonding: disable netpoll on enslave failure slave_disable_netpoll() is not called upon enslave failure which would lead to a memory leak. Call slave_disable_netpoll() after err_detach as that's the first error path after enabling netpoll on that slave. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/net/bonding') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 1137d5eac45..ae35b28a39e 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1920,6 +1920,7 @@ err_detach: write_unlock_bh(&bond->curr_slave_lock); read_unlock(&bond->lock); } + slave_disable_netpoll(new_slave); err_close: slave_dev->priv_flags &= ~IFF_BONDING; -- cgit v1.2.3 From d632ce989c811863a1ce9b1c53dae2cf06b35c49 Mon Sep 17 00:00:00 2001 From: "nikolay@redhat.com" Date: Thu, 18 Apr 2013 07:33:38 +0000 Subject: bonding: in bond_mc_swap() bond's mc addr list is walked without lock Use netif_addr_lock_bh() to acquire the appropriate lock before walking. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/net/bonding') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index ae35b28a39e..dbbea0eec13 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -846,8 +846,10 @@ static void bond_mc_swap(struct bonding *bond, struct slave *new_active, if (bond->dev->flags & IFF_ALLMULTI) dev_set_allmulti(old_active->dev, -1); + netif_addr_lock_bh(bond->dev); netdev_for_each_mc_addr(ha, bond->dev) dev_mc_del(old_active->dev, ha->addr); + netif_addr_unlock_bh(bond->dev); } if (new_active) { @@ -858,8 +860,10 @@ static void bond_mc_swap(struct bonding *bond, struct slave *new_active, if (bond->dev->flags & IFF_ALLMULTI) dev_set_allmulti(new_active->dev, 1); + netif_addr_lock_bh(bond->dev); netdev_for_each_mc_addr(ha, bond->dev) dev_mc_add(new_active->dev, ha->addr); + netif_addr_unlock_bh(bond->dev); } } -- cgit v1.2.3