From ffcdedb667b6db8ee31c7efa76a3ec59d9c3b0fc Mon Sep 17 00:00:00 2001
From: "nikolay@redhat.com" <nikolay@redhat.com>
Date: Sat, 6 Apr 2013 00:54:37 +0000
Subject: Revert "bonding: remove sysfs before removing devices"

This reverts commit 4de79c737b200492195ebc54a887075327e1ec1d.

This patch introduces a new bug which causes access to freed memory.
In bond_uninit: list_del(&bond->bond_list);
bond_list is linked in bond_net's dev_list which is freed by
unregister_pernet_subsys.

Signed-off-by: Nikolay Aleksandrov <nikolay@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/net/bonding')

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 171b10f167a..a51241b2e62 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -4902,8 +4902,8 @@ static void __exit bonding_exit(void)
 
 	bond_destroy_debugfs();
 
-	unregister_pernet_subsys(&bond_net_ops);
 	rtnl_link_unregister(&bond_link_ops);
+	unregister_pernet_subsys(&bond_net_ops);
 
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	/*
-- 
cgit v1.2.3


From 69b0216ac255f523556fa3d4ff030d857eaaa37f Mon Sep 17 00:00:00 2001
From: "nikolay@redhat.com" <nikolay@redhat.com>
Date: Sat, 6 Apr 2013 00:54:38 +0000
Subject: bonding: fix bonding_masters race condition in bond unloading

While the bonding module is unloading, it is considered that after
rtnl_link_unregister all bond devices are destroyed but since no
synchronization mechanism exists, a new bond device can be created
via bonding_masters before unregister_pernet_subsys which would
lead to multiple problems (e.g. NULL pointer dereference, wrong RIP,
list corruption).

This patch fixes the issue by removing any bond devices left in the
netns after bonding_masters is removed from sysfs.

Signed-off-by: Nikolay Aleksandrov <nikolay@redhat.com>
Acked-by: Veaceslav Falico <vfalico@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'drivers/net/bonding')

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index a51241b2e62..07401a3e256 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -4846,9 +4846,18 @@ static int __net_init bond_net_init(struct net *net)
 static void __net_exit bond_net_exit(struct net *net)
 {
 	struct bond_net *bn = net_generic(net, bond_net_id);
+	struct bonding *bond, *tmp_bond;
+	LIST_HEAD(list);
 
 	bond_destroy_sysfs(bn);
 	bond_destroy_proc_dir(bn);
+
+	/* Kill off any bonds created after unregistering bond rtnl ops */
+	rtnl_lock();
+	list_for_each_entry_safe(bond, tmp_bond, &bn->dev_list, bond_list)
+		unregister_netdevice_queue(bond->dev, &list);
+	unregister_netdevice_many(&list);
+	rtnl_unlock();
 }
 
 static struct pernet_operations bond_net_ops = {
-- 
cgit v1.2.3


From 6101391d4a381cc0c661d8765235b3cad7da09e5 Mon Sep 17 00:00:00 2001
From: "nikolay@redhat.com" <nikolay@redhat.com>
Date: Thu, 11 Apr 2013 09:18:55 +0000
Subject: bonding: fix netdev event NULL pointer dereference

In commit 471cb5a33dcbd7c529684a2ac7ba4451414ee4a7 ("bonding: remove
usage of dev->master") a bug was introduced which causes a NULL pointer
dereference. If a bond device is in mode 6 (ALB) and a slave is added
it will dereference a NULL pointer in bond_slave_netdev_event().
This is because in bond_enslave we have bond_alb_init_slave() which
changes the MAC address of the slave and causes a NETDEV_CHANGEADDR.
Then we have in bond_slave_netdev_event():
        struct slave *slave = bond_slave_get_rtnl(slave_dev);
        struct bonding *bond = slave->bond;
bond_slave_get_rtnl() dereferences slave_dev->rx_handler_data which at
that time is NULL since netdev_rx_handler_register() is called later.

This is fixed by checking if slave is NULL before dereferencing it.

v2: Comment style changed.

Signed-off-by: Nikolay Aleksandrov <nikolay@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

(limited to 'drivers/net/bonding')

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 07401a3e256..e074c6b5551 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -3168,11 +3168,20 @@ static int bond_slave_netdev_event(unsigned long event,
 				   struct net_device *slave_dev)
 {
 	struct slave *slave = bond_slave_get_rtnl(slave_dev);
-	struct bonding *bond = slave->bond;
-	struct net_device *bond_dev = slave->bond->dev;
+	struct bonding *bond;
+	struct net_device *bond_dev;
 	u32 old_speed;
 	u8 old_duplex;
 
+	/* A netdev event can be generated while enslaving a device
+	 * before netdev_rx_handler_register is called in which case
+	 * slave will be NULL
+	 */
+	if (!slave)
+		return NOTIFY_DONE;
+	bond_dev = slave->bond->dev;
+	bond = slave->bond;
+
 	switch (event) {
 	case NETDEV_UNREGISTER:
 		if (bond->setup_by_slave)
-- 
cgit v1.2.3


From b6a5a7b9a528a8b4c8bec940b607c5dd9102b8cc Mon Sep 17 00:00:00 2001
From: "nikolay@redhat.com" <nikolay@redhat.com>
Date: Thu, 11 Apr 2013 09:18:56 +0000
Subject: bonding: IFF_BONDING is not stripped on enslave failure

While enslaving a new device and after IFF_BONDING flag is set, in case
of failure it is not stripped from the device's priv_flags while
cleaning up, which could lead to other problems.
Cleaning at err_close because the flag is set after dev_open().

v2: no change

Signed-off-by: Nikolay Aleksandrov <nikolay@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'drivers/net/bonding')

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index e074c6b5551..a61a760484f 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1906,6 +1906,7 @@ err_detach:
 	write_unlock_bh(&bond->lock);
 
 err_close:
+	slave_dev->priv_flags &= ~IFF_BONDING;
 	dev_close(slave_dev);
 
 err_unset_master:
-- 
cgit v1.2.3


From 4394542ca4ec9f28c3c8405063d200b1e7c347d7 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 15 Apr 2013 17:03:24 +0000
Subject: bonding: fix l23 and l34 load balancing in forwarding path

Since commit 6b923cb7188d46 (bonding: support for IPv6 transmit hashing)
bonding doesn't properly hash traffic in forwarding setups.

Vitaly V. Bursov diagnosed that skb_network_header_len() returned 0 in
this case.

More generally, the transport header might not be in the skb head.

Use pskb_may_pull() & skb_header_pointer() to get it right, and use
proto_ports_offset() in bond_xmit_hash_policy_l34() to get support for
more protocols than TCP and UDP.

Reported-by: Vitaly V. Bursov <vitalyb@telenet.dn.ua>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Jay Vosburgh <fubar@us.ibm.com>
Cc: Andy Gospodarek <andy@greyhouse.net>
Cc: John Eaglesham <linux@8192.net>
Tested-by: Vitaly V. Bursov <vitalyb@telenet.dn.ua>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c | 55 ++++++++++++++++++++++-------------------
 1 file changed, 30 insertions(+), 25 deletions(-)

(limited to 'drivers/net/bonding')

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index a61a760484f..dea8ce20fea 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -3296,20 +3296,22 @@ static int bond_xmit_hash_policy_l2(struct sk_buff *skb, int count)
  */
 static int bond_xmit_hash_policy_l23(struct sk_buff *skb, int count)
 {
-	struct ethhdr *data = (struct ethhdr *)skb->data;
-	struct iphdr *iph;
-	struct ipv6hdr *ipv6h;
+	const struct ethhdr *data;
+	const struct iphdr *iph;
+	const struct ipv6hdr *ipv6h;
 	u32 v6hash;
-	__be32 *s, *d;
+	const __be32 *s, *d;
 
 	if (skb->protocol == htons(ETH_P_IP) &&
-	    skb_network_header_len(skb) >= sizeof(*iph)) {
+	    pskb_network_may_pull(skb, sizeof(*iph))) {
 		iph = ip_hdr(skb);
+		data = (struct ethhdr *)skb->data;
 		return ((ntohl(iph->saddr ^ iph->daddr) & 0xffff) ^
 			(data->h_dest[5] ^ data->h_source[5])) % count;
 	} else if (skb->protocol == htons(ETH_P_IPV6) &&
-		   skb_network_header_len(skb) >= sizeof(*ipv6h)) {
+		   pskb_network_may_pull(skb, sizeof(*ipv6h))) {
 		ipv6h = ipv6_hdr(skb);
+		data = (struct ethhdr *)skb->data;
 		s = &ipv6h->saddr.s6_addr32[0];
 		d = &ipv6h->daddr.s6_addr32[0];
 		v6hash = (s[1] ^ d[1]) ^ (s[2] ^ d[2]) ^ (s[3] ^ d[3]);
@@ -3328,33 +3330,36 @@ static int bond_xmit_hash_policy_l23(struct sk_buff *skb, int count)
 static int bond_xmit_hash_policy_l34(struct sk_buff *skb, int count)
 {
 	u32 layer4_xor = 0;
-	struct iphdr *iph;
-	struct ipv6hdr *ipv6h;
-	__be32 *s, *d;
-	__be16 *layer4hdr;
+	const struct iphdr *iph;
+	const struct ipv6hdr *ipv6h;
+	const __be32 *s, *d;
+	const __be16 *l4 = NULL;
+	__be16 _l4[2];
+	int noff = skb_network_offset(skb);
+	int poff;
 
 	if (skb->protocol == htons(ETH_P_IP) &&
-	    skb_network_header_len(skb) >= sizeof(*iph)) {
+	    pskb_may_pull(skb, noff + sizeof(*iph))) {
 		iph = ip_hdr(skb);
-		if (!ip_is_fragment(iph) &&
-		    (iph->protocol == IPPROTO_TCP ||
-		     iph->protocol == IPPROTO_UDP) &&
-		    (skb_headlen(skb) - skb_network_offset(skb) >=
-		     iph->ihl * sizeof(u32) + sizeof(*layer4hdr) * 2)) {
-			layer4hdr = (__be16 *)((u32 *)iph + iph->ihl);
-			layer4_xor = ntohs(*layer4hdr ^ *(layer4hdr + 1));
+		poff = proto_ports_offset(iph->protocol);
+
+		if (!ip_is_fragment(iph) && poff >= 0) {
+			l4 = skb_header_pointer(skb, noff + (iph->ihl << 2) + poff,
+						sizeof(_l4), &_l4);
+			if (l4)
+				layer4_xor = ntohs(l4[0] ^ l4[1]);
 		}
 		return (layer4_xor ^
 			((ntohl(iph->saddr ^ iph->daddr)) & 0xffff)) % count;
 	} else if (skb->protocol == htons(ETH_P_IPV6) &&
-		   skb_network_header_len(skb) >= sizeof(*ipv6h)) {
+		   pskb_may_pull(skb, noff + sizeof(*ipv6h))) {
 		ipv6h = ipv6_hdr(skb);
-		if ((ipv6h->nexthdr == IPPROTO_TCP ||
-		     ipv6h->nexthdr == IPPROTO_UDP) &&
-		    (skb_headlen(skb) - skb_network_offset(skb) >=
-		     sizeof(*ipv6h) + sizeof(*layer4hdr) * 2)) {
-			layer4hdr = (__be16 *)(ipv6h + 1);
-			layer4_xor = ntohs(*layer4hdr ^ *(layer4hdr + 1));
+		poff = proto_ports_offset(ipv6h->nexthdr);
+		if (poff >= 0) {
+			l4 = skb_header_pointer(skb, noff + sizeof(*ipv6h) + poff,
+						sizeof(_l4), &_l4);
+			if (l4)
+				layer4_xor = ntohs(l4[0] ^ l4[1]);
 		}
 		s = &ipv6h->saddr.s6_addr32[0];
 		d = &ipv6h->daddr.s6_addr32[0];
-- 
cgit v1.2.3


From 25e40305d4f4399bc8ecf9c9b7cf43493bb40bbd Mon Sep 17 00:00:00 2001
From: "nikolay@redhat.com" <nikolay@redhat.com>
Date: Thu, 18 Apr 2013 07:33:34 +0000
Subject: bonding: mc addresses don't get deleted on enslave failure

Add bond_mc_list_flush() after err_detach as that's the first error path
after the addresses are added. The main issue is the mc addresses' refcount
which only gets bumped up.

v2: update log message and don't move code unnecessarily

Signed-off-by: Nikolay Aleksandrov <nikolay@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'drivers/net/bonding')

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index dea8ce20fea..4cecb80df85 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1901,6 +1901,11 @@ err_dest_symlinks:
 	bond_destroy_slave_symlinks(bond_dev, slave_dev);
 
 err_detach:
+	if (!USES_PRIMARY(bond->params.mode)) {
+		netif_addr_lock_bh(bond_dev);
+		bond_mc_list_flush(bond_dev, slave_dev);
+		netif_addr_unlock_bh(bond_dev);
+	}
 	write_lock_bh(&bond->lock);
 	bond_detach_slave(bond, new_slave);
 	write_unlock_bh(&bond->lock);
-- 
cgit v1.2.3


From a506e7b479e1215c230e4b87fedc246cf748537f Mon Sep 17 00:00:00 2001
From: "nikolay@redhat.com" <nikolay@redhat.com>
Date: Thu, 18 Apr 2013 07:33:35 +0000
Subject: bonding: vlans don't get deleted on enslave failure

The main problem is with vid refcount which only gets bumped up.
Delete the vlans after err_detach as that's the first error path
after the vlans are added.

Signed-off-by: Nikolay Aleksandrov <nikolay@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'drivers/net/bonding')

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 4cecb80df85..dd67c49070d 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1906,6 +1906,7 @@ err_detach:
 		bond_mc_list_flush(bond_dev, slave_dev);
 		netif_addr_unlock_bh(bond_dev);
 	}
+	bond_del_vlans_from_slave(bond, slave_dev);
 	write_lock_bh(&bond->lock);
 	bond_detach_slave(bond, new_slave);
 	write_unlock_bh(&bond->lock);
-- 
cgit v1.2.3


From 3c5913b53fefc9d9e15a2d0f93042766658d9f3f Mon Sep 17 00:00:00 2001
From: "nikolay@redhat.com" <nikolay@redhat.com>
Date: Thu, 18 Apr 2013 07:33:36 +0000
Subject: bonding: primary_slave & curr_active_slave are not cleaned on enslave
 failure

On enslave failure primary_slave can point to new_slave which is to be
freed, and the same applies to curr_active_slave. So check if this is
the case and clean up properly after err_detach because that's the first
error code path after they're set.

Signed-off-by: Nikolay Aleksandrov <nikolay@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'drivers/net/bonding')

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index dd67c49070d..1137d5eac45 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1909,7 +1909,17 @@ err_detach:
 	bond_del_vlans_from_slave(bond, slave_dev);
 	write_lock_bh(&bond->lock);
 	bond_detach_slave(bond, new_slave);
+	if (bond->primary_slave == new_slave)
+		bond->primary_slave = NULL;
 	write_unlock_bh(&bond->lock);
+	if (bond->curr_active_slave == new_slave) {
+		read_lock(&bond->lock);
+		write_lock_bh(&bond->curr_slave_lock);
+		bond_change_active_slave(bond, NULL);
+		bond_select_active_slave(bond);
+		write_unlock_bh(&bond->curr_slave_lock);
+		read_unlock(&bond->lock);
+	}
 
 err_close:
 	slave_dev->priv_flags &= ~IFF_BONDING;
-- 
cgit v1.2.3


From fc7a72ac86e2956dd405b0c604fea45a2702f567 Mon Sep 17 00:00:00 2001
From: "nikolay@redhat.com" <nikolay@redhat.com>
Date: Thu, 18 Apr 2013 07:33:37 +0000
Subject: bonding: disable netpoll on enslave failure

slave_disable_netpoll() is not called upon enslave failure which would
lead to a memory leak. Call slave_disable_netpoll() after err_detach as
that's the first error path after enabling netpoll on that slave.

Signed-off-by: Nikolay Aleksandrov <nikolay@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'drivers/net/bonding')

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 1137d5eac45..ae35b28a39e 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1920,6 +1920,7 @@ err_detach:
 		write_unlock_bh(&bond->curr_slave_lock);
 		read_unlock(&bond->lock);
 	}
+	slave_disable_netpoll(new_slave);
 
 err_close:
 	slave_dev->priv_flags &= ~IFF_BONDING;
-- 
cgit v1.2.3


From d632ce989c811863a1ce9b1c53dae2cf06b35c49 Mon Sep 17 00:00:00 2001
From: "nikolay@redhat.com" <nikolay@redhat.com>
Date: Thu, 18 Apr 2013 07:33:38 +0000
Subject: bonding: in bond_mc_swap() bond's mc addr list is walked without lock

Use netif_addr_lock_bh() to acquire the appropriate lock before walking.

Signed-off-by: Nikolay Aleksandrov <nikolay@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'drivers/net/bonding')

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index ae35b28a39e..dbbea0eec13 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -846,8 +846,10 @@ static void bond_mc_swap(struct bonding *bond, struct slave *new_active,
 		if (bond->dev->flags & IFF_ALLMULTI)
 			dev_set_allmulti(old_active->dev, -1);
 
+		netif_addr_lock_bh(bond->dev);
 		netdev_for_each_mc_addr(ha, bond->dev)
 			dev_mc_del(old_active->dev, ha->addr);
+		netif_addr_unlock_bh(bond->dev);
 	}
 
 	if (new_active) {
@@ -858,8 +860,10 @@ static void bond_mc_swap(struct bonding *bond, struct slave *new_active,
 		if (bond->dev->flags & IFF_ALLMULTI)
 			dev_set_allmulti(new_active->dev, 1);
 
+		netif_addr_lock_bh(bond->dev);
 		netdev_for_each_mc_addr(ha, bond->dev)
 			dev_mc_add(new_active->dev, ha->addr);
+		netif_addr_unlock_bh(bond->dev);
 	}
 }
 
-- 
cgit v1.2.3