summaryrefslogtreecommitdiffstats
path: root/kernel/drivers/net/bonding/bond_main.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/drivers/net/bonding/bond_main.c')
-rw-r--r--kernel/drivers/net/bonding/bond_main.c192
1 files changed, 125 insertions, 67 deletions
diff --git a/kernel/drivers/net/bonding/bond_main.c b/kernel/drivers/net/bonding/bond_main.c
index 16d87bf8a..28bbca0af 100644
--- a/kernel/drivers/net/bonding/bond_main.c
+++ b/kernel/drivers/net/bonding/bond_main.c
@@ -76,7 +76,7 @@
#include <net/netns/generic.h>
#include <net/pkt_sched.h>
#include <linux/rculist.h>
-#include <net/flow_keys.h>
+#include <net/flow_dissector.h>
#include <net/switchdev.h>
#include <net/bonding.h>
#include <net/bond_3ad.h>
@@ -214,6 +214,8 @@ static void bond_uninit(struct net_device *bond_dev);
static struct rtnl_link_stats64 *bond_get_stats(struct net_device *bond_dev,
struct rtnl_link_stats64 *stats);
static void bond_slave_arr_handler(struct work_struct *work);
+static bool bond_time_in_interval(struct bonding *bond, unsigned long last_act,
+ int mod);
/*---------------------------- General routines -----------------------------*/
@@ -709,40 +711,57 @@ out:
}
-static bool bond_should_change_active(struct bonding *bond)
+static struct slave *bond_choose_primary_or_current(struct bonding *bond)
{
struct slave *prim = rtnl_dereference(bond->primary_slave);
struct slave *curr = rtnl_dereference(bond->curr_active_slave);
- if (!prim || !curr || curr->link != BOND_LINK_UP)
- return true;
+ if (!prim || prim->link != BOND_LINK_UP) {
+ if (!curr || curr->link != BOND_LINK_UP)
+ return NULL;
+ return curr;
+ }
+
if (bond->force_primary) {
bond->force_primary = false;
- return true;
+ return prim;
+ }
+
+ if (!curr || curr->link != BOND_LINK_UP)
+ return prim;
+
+ /* At this point, prim and curr are both up */
+ switch (bond->params.primary_reselect) {
+ case BOND_PRI_RESELECT_ALWAYS:
+ return prim;
+ case BOND_PRI_RESELECT_BETTER:
+ if (prim->speed < curr->speed)
+ return curr;
+ if (prim->speed == curr->speed && prim->duplex <= curr->duplex)
+ return curr;
+ return prim;
+ case BOND_PRI_RESELECT_FAILURE:
+ return curr;
+ default:
+ netdev_err(bond->dev, "impossible primary_reselect %d\n",
+ bond->params.primary_reselect);
+ return curr;
}
- if (bond->params.primary_reselect == BOND_PRI_RESELECT_BETTER &&
- (prim->speed < curr->speed ||
- (prim->speed == curr->speed && prim->duplex <= curr->duplex)))
- return false;
- if (bond->params.primary_reselect == BOND_PRI_RESELECT_FAILURE)
- return false;
- return true;
}
/**
- * find_best_interface - select the best available slave to be the active one
+ * bond_find_best_slave - select the best available slave to be the active one
* @bond: our bonding struct
*/
static struct slave *bond_find_best_slave(struct bonding *bond)
{
- struct slave *slave, *bestslave = NULL, *primary;
+ struct slave *slave, *bestslave = NULL;
struct list_head *iter;
int mintime = bond->params.updelay;
- primary = rtnl_dereference(bond->primary_slave);
- if (primary && primary->link == BOND_LINK_UP &&
- bond_should_change_active(bond))
- return primary;
+ slave = bond_choose_primary_or_current(bond);
+ if (slave)
+ return slave;
bond_for_each_slave(bond, slave, iter) {
if (slave->link == BOND_LINK_UP)
@@ -769,6 +788,7 @@ static bool bond_should_notify_peers(struct bonding *bond)
slave ? slave->dev->name : "NULL");
if (!slave || !bond->send_peer_notif ||
+ !netif_carrier_ok(bond->dev) ||
test_bit(__LINK_STATE_LINKWATCH_PENDING, &slave->dev->state))
return false;
@@ -961,7 +981,6 @@ static void bond_poll_controller(struct net_device *bond_dev)
if (bond_3ad_get_active_agg_info(bond, &ad_info))
return;
- rcu_read_lock_bh();
bond_for_each_slave_rcu(bond, slave, iter) {
ops = slave->dev->netdev_ops;
if (!bond_slave_is_up(slave) || !ops->ndo_poll_controller)
@@ -982,7 +1001,6 @@ static void bond_poll_controller(struct net_device *bond_dev)
ops->ndo_poll_controller(slave->dev);
up(&ni->dev_lock);
}
- rcu_read_unlock_bh();
}
static void bond_netpoll_cleanup(struct net_device *bond_dev)
@@ -1035,10 +1053,7 @@ static netdev_features_t bond_fix_features(struct net_device *dev,
netdev_features_t mask;
struct slave *slave;
- /* If any slave has the offload feature flag set,
- * set the offload flag on the bond.
- */
- mask = features | NETIF_F_HW_SWITCH_OFFLOAD;
+ mask = features;
features &= ~NETIF_F_ONE_FOR_ALL;
features |= NETIF_F_ALL_FOR_ALL;
@@ -1058,7 +1073,7 @@ static netdev_features_t bond_fix_features(struct net_device *dev,
NETIF_F_HIGHDMA | NETIF_F_LRO)
#define BOND_ENC_FEATURES (NETIF_F_ALL_CSUM | NETIF_F_SG | NETIF_F_RXCSUM |\
- NETIF_F_TSO)
+ NETIF_F_ALL_TSO)
static void bond_compute_features(struct bonding *bond)
{
@@ -1194,7 +1209,6 @@ static int bond_master_upper_dev_link(struct net_device *bond_dev,
err = netdev_master_upper_dev_link_private(slave_dev, bond_dev, slave);
if (err)
return err;
- slave_dev->flags |= IFF_SLAVE;
rtmsg_ifinfo(RTM_NEWLINK, slave_dev, IFF_SLAVE, GFP_KERNEL);
return 0;
}
@@ -1452,6 +1466,9 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
}
}
+ /* set slave flag before open to prevent IPv6 addrconf */
+ slave_dev->flags |= IFF_SLAVE;
+
/* open the slave since the application closed it */
res = dev_open(slave_dev);
if (res) {
@@ -1712,6 +1729,7 @@ err_close:
dev_close(slave_dev);
err_restore_mac:
+ slave_dev->flags &= ~IFF_SLAVE;
if (!bond->params.fail_over_mac ||
BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) {
/* XXX TODO - fom follow mode needs to change master's
@@ -1731,9 +1749,17 @@ err_free:
err_undo_flags:
/* Enslave of first slave has failed and we need to fix master's mac */
- if (!bond_has_slaves(bond) &&
- ether_addr_equal_64bits(bond_dev->dev_addr, slave_dev->dev_addr))
- eth_hw_addr_random(bond_dev);
+ if (!bond_has_slaves(bond)) {
+ if (ether_addr_equal_64bits(bond_dev->dev_addr,
+ slave_dev->dev_addr))
+ eth_hw_addr_random(bond_dev);
+ if (bond_dev->type != ARPHRD_ETHER) {
+ dev_close(bond_dev);
+ ether_setup(bond_dev);
+ bond_dev->flags |= IFF_MASTER;
+ bond_dev->priv_flags &= ~IFF_TX_SKB_SHARING;
+ }
+ }
return res;
}
@@ -2394,7 +2420,7 @@ int bond_arp_rcv(const struct sk_buff *skb, struct bonding *bond,
struct slave *slave)
{
struct arphdr *arp = (struct arphdr *)skb->data;
- struct slave *curr_active_slave;
+ struct slave *curr_active_slave, *curr_arp_slave;
unsigned char *arp_ptr;
__be32 sip, tip;
int alen, is_arp = skb->protocol == __cpu_to_be16(ETH_P_ARP);
@@ -2441,26 +2467,41 @@ int bond_arp_rcv(const struct sk_buff *skb, struct bonding *bond,
&sip, &tip);
curr_active_slave = rcu_dereference(bond->curr_active_slave);
+ curr_arp_slave = rcu_dereference(bond->current_arp_slave);
- /* Backup slaves won't see the ARP reply, but do come through
- * here for each ARP probe (so we swap the sip/tip to validate
- * the probe). In a "redundant switch, common router" type of
- * configuration, the ARP probe will (hopefully) travel from
- * the active, through one switch, the router, then the other
- * switch before reaching the backup.
+ /* We 'trust' the received ARP enough to validate it if:
+ *
+ * (a) the slave receiving the ARP is active (which includes the
+ * current ARP slave, if any), or
+ *
+ * (b) the receiving slave isn't active, but there is a currently
+ * active slave and it received valid arp reply(s) after it became
+ * the currently active slave, or
+ *
+ * (c) there is an ARP slave that sent an ARP during the prior ARP
+ * interval, and we receive an ARP reply on any slave. We accept
+ * these because switch FDB update delays may deliver the ARP
+ * reply to a slave other than the sender of the ARP request.
*
- * We 'trust' the arp requests if there is an active slave and
- * it received valid arp reply(s) after it became active. This
- * is done to avoid endless looping when we can't reach the
+ * Note: for (b), backup slaves are receiving the broadcast ARP
+ * request, not a reply. This request passes from the sending
+ * slave through the L2 switch(es) to the receiving slave. Since
+ * this is checking the request, sip/tip are swapped for
+ * validation.
+ *
+ * This is done to avoid endless looping when we can't reach the
* arp_ip_target and fool ourselves with our own arp requests.
*/
-
if (bond_is_active_slave(slave))
bond_validate_arp(bond, slave, sip, tip);
else if (curr_active_slave &&
time_after(slave_last_rx(bond, curr_active_slave),
curr_active_slave->last_link_up))
bond_validate_arp(bond, slave, tip, sip);
+ else if (curr_arp_slave && (arp->ar_op == htons(ARPOP_REPLY)) &&
+ bond_time_in_interval(bond,
+ dev_trans_start(curr_arp_slave->dev), 1))
+ bond_validate_arp(bond, slave, sip, tip);
out_unlock:
if (arp != (struct arphdr *)skb->data)
@@ -2923,8 +2964,6 @@ static int bond_slave_netdev_event(unsigned long event,
struct slave *slave = bond_slave_get_rtnl(slave_dev), *primary;
struct bonding *bond;
struct net_device *bond_dev;
- u32 old_speed;
- u8 old_duplex;
/* A netdev event can be generated while enslaving a device
* before netdev_rx_handler_register is called in which case
@@ -2945,17 +2984,9 @@ static int bond_slave_netdev_event(unsigned long event,
break;
case NETDEV_UP:
case NETDEV_CHANGE:
- old_speed = slave->speed;
- old_duplex = slave->duplex;
-
bond_update_speed_duplex(slave);
-
- if (BOND_MODE(bond) == BOND_MODE_8023AD) {
- if (old_speed != slave->speed)
- bond_3ad_adapter_speed_changed(slave);
- if (old_duplex != slave->duplex)
- bond_3ad_adapter_duplex_changed(slave);
- }
+ if (BOND_MODE(bond) == BOND_MODE_8023AD)
+ bond_3ad_adapter_speed_duplex_changed(slave);
/* Fallthrough */
case NETDEV_DOWN:
/* Refresh slave-array if applicable!
@@ -3075,16 +3106,15 @@ static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb,
int noff, proto = -1;
if (bond->params.xmit_policy > BOND_XMIT_POLICY_LAYER23)
- return skb_flow_dissect(skb, fk);
+ return skb_flow_dissect_flow_keys(skb, fk, 0);
- fk->ports = 0;
+ fk->ports.ports = 0;
noff = skb_network_offset(skb);
if (skb->protocol == htons(ETH_P_IP)) {
if (unlikely(!pskb_may_pull(skb, noff + sizeof(*iph))))
return false;
iph = ip_hdr(skb);
- fk->src = iph->saddr;
- fk->dst = iph->daddr;
+ iph_to_flow_copy_v4addrs(fk, iph);
noff += iph->ihl << 2;
if (!ip_is_fragment(iph))
proto = iph->protocol;
@@ -3092,15 +3122,14 @@ static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb,
if (unlikely(!pskb_may_pull(skb, noff + sizeof(*iph6))))
return false;
iph6 = ipv6_hdr(skb);
- fk->src = (__force __be32)ipv6_addr_hash(&iph6->saddr);
- fk->dst = (__force __be32)ipv6_addr_hash(&iph6->daddr);
+ iph_to_flow_copy_v6addrs(fk, iph6);
noff += sizeof(*iph6);
proto = iph6->nexthdr;
} else {
return false;
}
if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34 && proto >= 0)
- fk->ports = skb_flow_get_ports(skb, noff, proto);
+ fk->ports.ports = skb_flow_get_ports(skb, noff, proto);
return true;
}
@@ -3118,6 +3147,10 @@ u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb)
struct flow_keys flow;
u32 hash;
+ if (bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP34 &&
+ skb->l4_hash)
+ return skb->hash;
+
if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER2 ||
!bond_flow_dissect(bond, skb, &flow))
return bond_eth_hash(skb);
@@ -3126,8 +3159,9 @@ u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb)
bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP23)
hash = bond_eth_hash(skb);
else
- hash = (__force u32)flow.ports;
- hash ^= (__force u32)flow.dst ^ (__force u32)flow.src;
+ hash = (__force u32)flow.ports.ports;
+ hash ^= (__force u32)flow_get_u32_dst(&flow) ^
+ (__force u32)flow_get_u32_src(&flow);
hash ^= (hash >> 16);
hash ^= (hash >> 8);
@@ -3759,7 +3793,6 @@ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave)
struct slave *slave;
struct list_head *iter;
struct bond_up_slave *new_arr, *old_arr;
- int slaves_in_agg;
int agg_id = 0;
int ret = 0;
@@ -3790,7 +3823,6 @@ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave)
}
goto out;
}
- slaves_in_agg = ad_info.ports;
agg_id = ad_info.aggregator_id;
}
bond_for_each_slave(bond, slave, iter) {
@@ -4060,8 +4092,12 @@ static const struct net_device_ops bond_netdev_ops = {
.ndo_add_slave = bond_enslave,
.ndo_del_slave = bond_release,
.ndo_fix_features = bond_fix_features,
- .ndo_bridge_setlink = ndo_dflt_netdev_switch_port_bridge_setlink,
- .ndo_bridge_dellink = ndo_dflt_netdev_switch_port_bridge_dellink,
+ .ndo_bridge_setlink = switchdev_port_bridge_setlink,
+ .ndo_bridge_getlink = switchdev_port_bridge_getlink,
+ .ndo_bridge_dellink = switchdev_port_bridge_dellink,
+ .ndo_fdb_add = switchdev_port_fdb_add,
+ .ndo_fdb_del = switchdev_port_fdb_del,
+ .ndo_fdb_dump = switchdev_port_fdb_dump,
.ndo_features_check = passthru_features_check,
};
@@ -4097,9 +4133,8 @@ void bond_setup(struct net_device *bond_dev)
SET_NETDEV_DEVTYPE(bond_dev, &bond_type);
/* Initialize the device options */
- bond_dev->tx_queue_len = 0;
bond_dev->flags |= IFF_MASTER|IFF_MULTICAST;
- bond_dev->priv_flags |= IFF_BONDING | IFF_UNICAST_FLT;
+ bond_dev->priv_flags |= IFF_BONDING | IFF_UNICAST_FLT | IFF_NO_QUEUE;
bond_dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING);
/* don't acquire bond device's netif_tx_lock when transmitting */
@@ -4161,6 +4196,8 @@ static int bond_check_params(struct bond_params *params)
struct bond_opt_value newval;
const struct bond_opt_value *valptr;
int arp_all_targets_value;
+ u16 ad_actor_sys_prio = 0;
+ u16 ad_user_port_key = 0;
/* Convert string parameters. */
if (mode) {
@@ -4455,6 +4492,24 @@ static int bond_check_params(struct bond_params *params)
fail_over_mac_value = BOND_FOM_NONE;
}
+ bond_opt_initstr(&newval, "default");
+ valptr = bond_opt_parse(
+ bond_opt_get(BOND_OPT_AD_ACTOR_SYS_PRIO),
+ &newval);
+ if (!valptr) {
+ pr_err("Error: No ad_actor_sys_prio default value");
+ return -EINVAL;
+ }
+ ad_actor_sys_prio = valptr->value;
+
+ valptr = bond_opt_parse(bond_opt_get(BOND_OPT_AD_USER_PORT_KEY),
+ &newval);
+ if (!valptr) {
+ pr_err("Error: No ad_user_port_key default value");
+ return -EINVAL;
+ }
+ ad_user_port_key = valptr->value;
+
if (lp_interval == 0) {
pr_warn("Warning: ip_interval must be between 1 and %d, so it was reset to %d\n",
INT_MAX, BOND_ALB_DEFAULT_LP_INTERVAL);
@@ -4483,6 +4538,9 @@ static int bond_check_params(struct bond_params *params)
params->lp_interval = lp_interval;
params->packets_per_slave = packets_per_slave;
params->tlb_dynamic_lb = 1; /* Default value */
+ params->ad_actor_sys_prio = ad_actor_sys_prio;
+ eth_zero_addr(params->ad_actor_system);
+ params->ad_user_port_key = ad_user_port_key;
if (packets_per_slave > 0) {
params->reciprocal_packets_per_slave =
reciprocal_value(packets_per_slave);