summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/networking/bonding.txt59
-rw-r--r--drivers/net/bonding/bond_main.c182
-rw-r--r--drivers/net/bonding/bond_sysfs.c54
-rw-r--r--drivers/net/bonding/bonding.h32
-rw-r--r--include/linux/if.h1
-rw-r--r--include/linux/netdevice.h7
6 files changed, 325 insertions, 10 deletions
diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt
index afac780445c..dc942eaf490 100644
--- a/Documentation/networking/bonding.txt
+++ b/Documentation/networking/bonding.txt
@@ -192,6 +192,17 @@ or, for backwards compatibility, the option value. E.g.,
arp_interval
Specifies the ARP link monitoring frequency in milliseconds.
+
+ The ARP monitor works by periodically checking the slave
+ devices to determine whether they have sent or received
+ traffic recently (the precise criteria depends upon the
+ bonding mode, and the state of the slave). Regular traffic is
+ generated via ARP probes issued for the addresses specified by
+ the arp_ip_target option.
+
+ This behavior can be modified by the arp_validate option,
+ below.
+
If ARP monitoring is used in an etherchannel compatible mode
(modes 0 and 2), the switch should be configured in a mode
that evenly distributes packets across all links. If the
@@ -213,6 +224,54 @@ arp_ip_target
maximum number of targets that can be specified is 16. The
default value is no IP addresses.
+arp_validate
+
+ Specifies whether or not ARP probes and replies should be
+ validated in the active-backup mode. This causes the ARP
+ monitor to examine the incoming ARP requests and replies, and
+ only consider a slave to be up if it is receiving the
+ appropriate ARP traffic.
+
+ Possible values are:
+
+ none or 0
+
+ No validation is performed. This is the default.
+
+ active or 1
+
+ Validation is performed only for the active slave.
+
+ backup or 2
+
+ Validation is performed only for backup slaves.
+
+ all or 3
+
+ Validation is performed for all slaves.
+
+ For the active slave, the validation checks ARP replies to
+ confirm that they were generated by an arp_ip_target. Since
+ backup slaves do not typically receive these replies, the
+ validation performed for backup slaves is on the ARP request
+ sent out via the active slave. It is possible that some
+ switch or network configurations may result in situations
+ wherein the backup slaves do not receive the ARP requests; in
+ such a situation, validation of backup slaves must be
+ disabled.
+
+ This option is useful in network configurations in which
+ multiple bonding hosts are concurrently issuing ARPs to one or
+ more targets beyond a common switch. Should the link between
+ the switch and target fail (but not the switch itself), the
+ probe traffic generated by the multiple bonding instances will
+ fool the standard ARP monitor into considering the links as
+ still up. Use of the arp_validate option can resolve this, as
+ the ARP monitor will only consider ARP requests and replies
+ associated with its own instance of bonding.
+
+ This option was added in bonding version 3.1.0.
+
downdelay
Specifies the time, in milliseconds, to wait before disabling
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index bafe62f7c9b..fd521b05db8 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -96,6 +96,7 @@ static char *lacp_rate = NULL;
static char *xmit_hash_policy = NULL;
static int arp_interval = BOND_LINK_ARP_INTERV;
static char *arp_ip_target[BOND_MAX_ARP_TARGETS] = { NULL, };
+static char *arp_validate = NULL;
struct bond_params bonding_defaults;
module_param(max_bonds, int, 0);
@@ -127,6 +128,8 @@ module_param(arp_interval, int, 0);
MODULE_PARM_DESC(arp_interval, "arp interval in milliseconds");
module_param_array(arp_ip_target, charp, NULL, 0);
MODULE_PARM_DESC(arp_ip_target, "arp targets in n.n.n.n form");
+module_param(arp_validate, charp, 0);
+MODULE_PARM_DESC(arp_validate, "validate src/dst of ARP probes: none (default), active, backup or all");
/*----------------------------- Global variables ----------------------------*/
@@ -170,6 +173,14 @@ struct bond_parm_tbl xmit_hashtype_tbl[] = {
{ NULL, -1},
};
+struct bond_parm_tbl arp_validate_tbl[] = {
+{ "none", BOND_ARP_VALIDATE_NONE},
+{ "active", BOND_ARP_VALIDATE_ACTIVE},
+{ "backup", BOND_ARP_VALIDATE_BACKUP},
+{ "all", BOND_ARP_VALIDATE_ALL},
+{ NULL, -1},
+};
+
/*-------------------------- Forward declarations ---------------------------*/
static void bond_send_gratuitous_arp(struct bonding *bond);
@@ -1424,6 +1435,8 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
bond_compute_features(bond);
+ new_slave->last_arp_rx = jiffies;
+
if (bond->params.miimon && !bond->params.use_carrier) {
link_reporting = bond_check_dev_link(bond, slave_dev, 1);
@@ -1785,7 +1798,8 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev)
dev_set_mac_address(slave_dev, &addr);
slave_dev->priv_flags &= ~(IFF_MASTER_8023AD | IFF_MASTER_ALB |
- IFF_SLAVE_INACTIVE | IFF_BONDING);
+ IFF_SLAVE_INACTIVE | IFF_BONDING |
+ IFF_SLAVE_NEEDARP);
kfree(slave);
@@ -2298,6 +2312,25 @@ static int bond_has_ip(struct bonding *bond)
return 0;
}
+static int bond_has_this_ip(struct bonding *bond, u32 ip)
+{
+ struct vlan_entry *vlan, *vlan_next;
+
+ if (ip == bond->master_ip)
+ return 1;
+
+ if (list_empty(&bond->vlan_list))
+ return 0;
+
+ list_for_each_entry_safe(vlan, vlan_next, &bond->vlan_list,
+ vlan_list) {
+ if (ip == vlan->vlan_ip)
+ return 1;
+ }
+
+ return 0;
+}
+
/*
* We go to the (large) trouble of VLAN tagging ARP frames because
* switches in VLAN mode (especially if ports are configured as
@@ -2436,6 +2469,93 @@ static void bond_send_gratuitous_arp(struct bonding *bond)
}
}
+static void bond_validate_arp(struct bonding *bond, struct slave *slave, u32 sip, u32 tip)
+{
+ int i;
+ u32 *targets = bond->params.arp_targets;
+
+ targets = bond->params.arp_targets;
+ for (i = 0; (i < BOND_MAX_ARP_TARGETS) && targets[i]; i++) {
+ dprintk("bva: sip %u.%u.%u.%u tip %u.%u.%u.%u t[%d] "
+ "%u.%u.%u.%u bhti(tip) %d\n",
+ NIPQUAD(sip), NIPQUAD(tip), i, NIPQUAD(targets[i]),
+ bond_has_this_ip(bond, tip));
+ if (sip == targets[i]) {
+ if (bond_has_this_ip(bond, tip))
+ slave->last_arp_rx = jiffies;
+ return;
+ }
+ }
+}
+
+static int bond_arp_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
+{
+ struct arphdr *arp;
+ struct slave *slave;
+ struct bonding *bond;
+ unsigned char *arp_ptr;
+ u32 sip, tip;
+
+ if (!(dev->priv_flags & IFF_BONDING) || !(dev->flags & IFF_MASTER))
+ goto out;
+
+ bond = dev->priv;
+ read_lock(&bond->lock);
+
+ dprintk("bond_arp_rcv: bond %s skb->dev %s orig_dev %s\n",
+ bond->dev->name, skb->dev ? skb->dev->name : "NULL",
+ orig_dev ? orig_dev->name : "NULL");
+
+ slave = bond_get_slave_by_dev(bond, orig_dev);
+ if (!slave || !slave_do_arp_validate(bond, slave))
+ goto out_unlock;
+
+ /* ARP header, plus 2 device addresses, plus 2 IP addresses. */
+ if (!pskb_may_pull(skb, (sizeof(struct arphdr) +
+ (2 * dev->addr_len) +
+ (2 * sizeof(u32)))))
+ goto out_unlock;
+
+ arp = skb->nh.arph;
+ if (arp->ar_hln != dev->addr_len ||
+ skb->pkt_type == PACKET_OTHERHOST ||
+ skb->pkt_type == PACKET_LOOPBACK ||
+ arp->ar_hrd != htons(ARPHRD_ETHER) ||
+ arp->ar_pro != htons(ETH_P_IP) ||
+ arp->ar_pln != 4)
+ goto out_unlock;
+
+ arp_ptr = (unsigned char *)(arp + 1);
+ arp_ptr += dev->addr_len;
+ memcpy(&sip, arp_ptr, 4);
+ arp_ptr += 4 + dev->addr_len;
+ memcpy(&tip, arp_ptr, 4);
+
+ dprintk("bond_arp_rcv: %s %s/%d av %d sv %d sip %u.%u.%u.%u"
+ " tip %u.%u.%u.%u\n", bond->dev->name, slave->dev->name,
+ slave->state, bond->params.arp_validate,
+ slave_do_arp_validate(bond, slave), NIPQUAD(sip), NIPQUAD(tip));
+
+ /*
+ * Backup slaves won't see the ARP reply, but do come through
+ * here for each ARP probe (so we swap the sip/tip to validate
+ * the probe). In a "redundant switch, common router" type of
+ * configuration, the ARP probe will (hopefully) travel from
+ * the active, through one switch, the router, then the other
+ * switch before reaching the backup.
+ */
+ if (slave->state == BOND_STATE_ACTIVE)
+ bond_validate_arp(bond, slave, sip, tip);
+ else
+ bond_validate_arp(bond, slave, tip, sip);
+
+out_unlock:
+ read_unlock(&bond->lock);
+out:
+ dev_kfree_skb(skb);
+ return NET_RX_SUCCESS;
+}
+
/*
* this function is called regularly to monitor each slave's link
* ensuring that traffic is being sent and received when arp monitoring
@@ -2600,7 +2720,8 @@ void bond_activebackup_arp_mon(struct net_device *bond_dev)
*/
bond_for_each_slave(bond, slave, i) {
if (slave->link != BOND_LINK_UP) {
- if ((jiffies - slave->dev->last_rx) <= delta_in_ticks) {
+ if ((jiffies - slave_last_rx(bond, slave)) <=
+ delta_in_ticks) {
slave->link = BOND_LINK_UP;
@@ -2645,7 +2766,7 @@ void bond_activebackup_arp_mon(struct net_device *bond_dev)
if ((slave != bond->curr_active_slave) &&
(!bond->current_arp_slave) &&
- (((jiffies - slave->dev->last_rx) >= 3*delta_in_ticks) &&
+ (((jiffies - slave_last_rx(bond, slave)) >= 3*delta_in_ticks) &&
bond_has_ip(bond))) {
/* a backup slave has gone down; three times
* the delta allows the current slave to be
@@ -2692,7 +2813,7 @@ void bond_activebackup_arp_mon(struct net_device *bond_dev)
* if it is up and needs to take over as the curr_active_slave
*/
if ((((jiffies - slave->dev->trans_start) >= (2*delta_in_ticks)) ||
- (((jiffies - slave->dev->last_rx) >= (2*delta_in_ticks)) &&
+ (((jiffies - slave_last_rx(bond, slave)) >= (2*delta_in_ticks)) &&
bond_has_ip(bond))) &&
((jiffies - slave->jiffies) >= 2*delta_in_ticks)) {
@@ -3315,6 +3436,21 @@ static void bond_unregister_lacpdu(struct bonding *bond)
dev_remove_pack(&(BOND_AD_INFO(bond).ad_pkt_type));
}
+void bond_register_arp(struct bonding *bond)
+{
+ struct packet_type *pt = &bond->arp_mon_pt;
+
+ pt->type = htons(ETH_P_ARP);
+ pt->dev = NULL; /*bond->dev;XXX*/
+ pt->func = bond_arp_rcv;
+ dev_add_pack(pt);
+}
+
+void bond_unregister_arp(struct bonding *bond)
+{
+ dev_remove_pack(&bond->arp_mon_pt);
+}
+
/*---------------------------- Hashing Policies -----------------------------*/
/*
@@ -3401,6 +3537,9 @@ static int bond_open(struct net_device *bond_dev)
} else {
arp_timer->function = (void *)&bond_loadbalance_arp_mon;
}
+ if (bond->params.arp_validate)
+ bond_register_arp(bond);
+
add_timer(arp_timer);
}
@@ -3428,6 +3567,9 @@ static int bond_close(struct net_device *bond_dev)
bond_unregister_lacpdu(bond);
}
+ if (bond->params.arp_validate)
+ bond_unregister_arp(bond);
+
write_lock_bh(&bond->lock);
@@ -4281,6 +4423,8 @@ int bond_parse_parm(char *mode_arg, struct bond_parm_tbl *tbl)
static int bond_check_params(struct bond_params *params)
{
+ int arp_validate_value;
+
/*
* Convert string parameters.
*/
@@ -4484,6 +4628,29 @@ static int bond_check_params(struct bond_params *params)
arp_interval = 0;
}
+ if (arp_validate) {
+ if (bond_mode != BOND_MODE_ACTIVEBACKUP) {
+ printk(KERN_ERR DRV_NAME
+ ": arp_validate only supported in active-backup mode\n");
+ return -EINVAL;
+ }
+ if (!arp_interval) {
+ printk(KERN_ERR DRV_NAME
+ ": arp_validate requires arp_interval\n");
+ return -EINVAL;
+ }
+
+ arp_validate_value = bond_parse_parm(arp_validate,
+ arp_validate_tbl);
+ if (arp_validate_value == -1) {
+ printk(KERN_ERR DRV_NAME
+ ": Error: invalid arp_validate \"%s\"\n",
+ arp_validate == NULL ? "NULL" : arp_validate);
+ return -EINVAL;
+ }
+ } else
+ arp_validate_value = 0;
+
if (miimon) {
printk(KERN_INFO DRV_NAME
": MII link monitoring set to %d ms\n",
@@ -4492,8 +4659,10 @@ static int bond_check_params(struct bond_params *params)
int i;
printk(KERN_INFO DRV_NAME
- ": ARP monitoring set to %d ms with %d target(s):",
- arp_interval, arp_ip_count);
+ ": ARP monitoring set to %d ms, validate %s, with %d target(s):",
+ arp_interval,
+ arp_validate_tbl[arp_validate_value].modename,
+ arp_ip_count);
for (i = 0; i < arp_ip_count; i++)
printk (" %s", arp_ip_target[i]);
@@ -4527,6 +4696,7 @@ static int bond_check_params(struct bond_params *params)
params->xmit_policy = xmit_hashtype;
params->miimon = miimon;
params->arp_interval = arp_interval;
+ params->arp_validate = arp_validate_value;
params->updelay = updelay;
params->downdelay = downdelay;
params->use_carrier = use_carrier;
diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
index 15b6a29bb4d..ced9ed8f995 100644
--- a/drivers/net/bonding/bond_sysfs.c
+++ b/drivers/net/bonding/bond_sysfs.c
@@ -51,6 +51,7 @@ extern struct bond_params bonding_defaults;
extern struct bond_parm_tbl bond_mode_tbl[];
extern struct bond_parm_tbl bond_lacp_tbl[];
extern struct bond_parm_tbl xmit_hashtype_tbl[];
+extern struct bond_parm_tbl arp_validate_tbl[];
static int expected_refcount = -1;
static struct class *netdev_class;
@@ -503,6 +504,53 @@ out:
static CLASS_DEVICE_ATTR(xmit_hash_policy, S_IRUGO | S_IWUSR, bonding_show_xmit_hash, bonding_store_xmit_hash);
/*
+ * Show and set arp_validate.
+ */
+static ssize_t bonding_show_arp_validate(struct class_device *cd, char *buf)
+{
+ struct bonding *bond = to_bond(cd);
+
+ return sprintf(buf, "%s %d\n",
+ arp_validate_tbl[bond->params.arp_validate].modename,
+ bond->params.arp_validate) + 1;
+}
+
+static ssize_t bonding_store_arp_validate(struct class_device *cd, const char *buf, size_t count)
+{
+ int new_value;
+ struct bonding *bond = to_bond(cd);
+
+ new_value = bond_parse_parm((char *)buf, arp_validate_tbl);
+ if (new_value < 0) {
+ printk(KERN_ERR DRV_NAME
+ ": %s: Ignoring invalid arp_validate value %s\n",
+ bond->dev->name, buf);
+ return -EINVAL;
+ }
+ if (new_value && (bond->params.mode != BOND_MODE_ACTIVEBACKUP)) {
+ printk(KERN_ERR DRV_NAME
+ ": %s: arp_validate only supported in active-backup mode.\n",
+ bond->dev->name);
+ return -EINVAL;
+ }
+ printk(KERN_INFO DRV_NAME ": %s: setting arp_validate to %s (%d).\n",
+ bond->dev->name, arp_validate_tbl[new_value].modename,
+ new_value);
+
+ if (!bond->params.arp_validate && new_value) {
+ bond_register_arp(bond);
+ } else if (bond->params.arp_validate && !new_value) {
+ bond_unregister_arp(bond);
+ }
+
+ bond->params.arp_validate = new_value;
+
+ return count;
+}
+
+static CLASS_DEVICE_ATTR(arp_validate, S_IRUGO | S_IWUSR, bonding_show_arp_validate, bonding_store_arp_validate);
+
+/*
* Show and set the arp timer interval. There are two tricky bits
* here. First, if ARP monitoring is activated, then we must disable
* MII monitoring. Second, if the ARP timer isn't running, we must
@@ -914,6 +962,11 @@ static ssize_t bonding_store_miimon(struct class_device *cd, const char *buf, si
"ARP monitoring. Disabling ARP monitoring...\n",
bond->dev->name);
bond->params.arp_interval = 0;
+ if (bond->params.arp_validate) {
+ bond_unregister_arp(bond);
+ bond->params.arp_validate =
+ BOND_ARP_VALIDATE_NONE;
+ }
/* Kill ARP timer, else it brings bond's link down */
if (bond->mii_timer.function) {
printk(KERN_INFO DRV_NAME
@@ -1273,6 +1326,7 @@ static CLASS_DEVICE_ATTR(ad_partner_mac, S_IRUGO, bonding_show_ad_partner_mac, N
static struct attribute *per_bond_attrs[] = {
&class_device_attr_slaves.attr,
&class_device_attr_mode.attr,
+ &class_device_attr_arp_validate.attr,
&class_device_attr_arp_interval.attr,
&class_device_attr_arp_ip_target.attr,
&class_device_attr_downdelay.attr,
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index 17caafe5824..db16fee40a5 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -22,8 +22,8 @@
#include "bond_3ad.h"
#include "bond_alb.h"
-#define DRV_VERSION "3.0.3"
-#define DRV_RELDATE "March 23, 2006"
+#define DRV_VERSION "3.1.0-test"
+#define DRV_RELDATE "September 9, 2006"
#define DRV_NAME "bonding"
#define DRV_DESCRIPTION "Ethernet Channel Bonding Driver"
@@ -126,6 +126,7 @@ struct bond_params {
int xmit_policy;
int miimon;
int arp_interval;
+ int arp_validate;
int use_carrier;
int updelay;
int downdelay;
@@ -151,6 +152,7 @@ struct slave {
struct slave *prev;
int delay;
u32 jiffies;
+ u32 last_arp_rx;
s8 link; /* one of BOND_LINK_XXXX */
s8 state; /* one of BOND_STATE_XXXX */
u32 original_flags;
@@ -198,6 +200,7 @@ struct bonding {
struct bond_params params;
struct list_head vlan_list;
struct vlan_group *vlgrp;
+ struct packet_type arp_mon_pt;
};
/**
@@ -228,6 +231,25 @@ static inline struct bonding *bond_get_bond_by_slave(struct slave *slave)
return (struct bonding *)slave->dev->master->priv;
}
+#define BOND_ARP_VALIDATE_NONE 0
+#define BOND_ARP_VALIDATE_ACTIVE (1 << BOND_STATE_ACTIVE)
+#define BOND_ARP_VALIDATE_BACKUP (1 << BOND_STATE_BACKUP)
+#define BOND_ARP_VALIDATE_ALL (BOND_ARP_VALIDATE_ACTIVE | \
+ BOND_ARP_VALIDATE_BACKUP)
+
+extern inline int slave_do_arp_validate(struct bonding *bond, struct slave *slave)
+{
+ return bond->params.arp_validate & (1 << slave->state);
+}
+
+extern inline u32 slave_last_rx(struct bonding *bond, struct slave *slave)
+{
+ if (slave_do_arp_validate(bond, slave))
+ return slave->last_arp_rx;
+
+ return slave->dev->last_rx;
+}
+
static inline void bond_set_slave_inactive_flags(struct slave *slave)
{
struct bonding *bond = slave->dev->master->priv;
@@ -235,12 +257,14 @@ static inline void bond_set_slave_inactive_flags(struct slave *slave)
bond->params.mode != BOND_MODE_ALB)
slave->state = BOND_STATE_BACKUP;
slave->dev->priv_flags |= IFF_SLAVE_INACTIVE;
+ if (slave_do_arp_validate(bond, slave))
+ slave->dev->priv_flags |= IFF_SLAVE_NEEDARP;
}
static inline void bond_set_slave_active_flags(struct slave *slave)
{
slave->state = BOND_STATE_ACTIVE;
- slave->dev->priv_flags &= ~IFF_SLAVE_INACTIVE;
+ slave->dev->priv_flags &= ~(IFF_SLAVE_INACTIVE | IFF_SLAVE_NEEDARP);
}
static inline void bond_set_master_3ad_flags(struct bonding *bond)
@@ -284,6 +308,8 @@ int bond_parse_parm(char *mode_arg, struct bond_parm_tbl *tbl);
const char *bond_mode_name(int mode);
void bond_select_active_slave(struct bonding *bond);
void bond_change_active_slave(struct bonding *bond, struct slave *new_active);
+void bond_register_arp(struct bonding *);
+void bond_unregister_arp(struct bonding *);
#endif /* _LINUX_BONDING_H */
diff --git a/include/linux/if.h b/include/linux/if.h
index a023ec1274f..8018c2e22c0 100644
--- a/include/linux/if.h
+++ b/include/linux/if.h
@@ -60,6 +60,7 @@
#define IFF_MASTER_8023AD 0x8 /* bonding master, 802.3ad. */
#define IFF_MASTER_ALB 0x10 /* bonding master, balance-alb. */
#define IFF_BONDING 0x20 /* bonding master or slave */
+#define IFF_SLAVE_NEEDARP 0x40 /* need ARPs for validation */
#define IF_GET_IFACE 0x0001 /* for querying only */
#define IF_GET_PROTO 0x0002
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 43289127b45..afd80eff272 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1016,7 +1016,8 @@ static inline int netif_needs_gso(struct net_device *dev, struct sk_buff *skb)
}
/* On bonding slaves other than the currently active slave, suppress
- * duplicates except for 802.3ad ETH_P_SLOW and alb non-mcast/bcast.
+ * duplicates except for 802.3ad ETH_P_SLOW, alb non-mcast/bcast, and
+ * ARP on active-backup slaves with arp_validate enabled.
*/
static inline int skb_bond_should_drop(struct sk_buff *skb)
{
@@ -1025,6 +1026,10 @@ static inline int skb_bond_should_drop(struct sk_buff *skb)
if (master &&
(dev->priv_flags & IFF_SLAVE_INACTIVE)) {
+ if ((dev->priv_flags & IFF_SLAVE_NEEDARP) &&
+ skb->protocol == __constant_htons(ETH_P_ARP))
+ return 0;
+
if (master->priv_flags & IFF_MASTER_ALB) {
if (skb->pkt_type != PACKET_BROADCAST &&
skb->pkt_type != PACKET_MULTICAST)