summaryrefslogtreecommitdiffstats
path: root/kernel/net/ipv4/fib_semantics.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/net/ipv4/fib_semantics.c')
-rw-r--r--kernel/net/ipv4/fib_semantics.c520
1 files changed, 396 insertions, 124 deletions
diff --git a/kernel/net/ipv4/fib_semantics.c b/kernel/net/ipv4/fib_semantics.c
index 8d695b665..d97268e8f 100644
--- a/kernel/net/ipv4/fib_semantics.c
+++ b/kernel/net/ipv4/fib_semantics.c
@@ -42,6 +42,7 @@
#include <net/ip_fib.h>
#include <net/netlink.h>
#include <net/nexthop.h>
+#include <net/lwtunnel.h>
#include "fib_lookup.h"
@@ -56,8 +57,7 @@ static unsigned int fib_info_cnt;
static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
#ifdef CONFIG_IP_ROUTE_MULTIPATH
-
-static DEFINE_SPINLOCK(fib_multipath_lock);
+u32 fib_multipath_secret __read_mostly;
#define for_nexthops(fi) { \
int nhsel; const struct fib_nh *nh; \
@@ -208,6 +208,7 @@ static void free_fib_info_rcu(struct rcu_head *head)
change_nexthops(fi) {
if (nexthop_nh->nh_dev)
dev_put(nexthop_nh->nh_dev);
+ lwtstate_put(nexthop_nh->nh_lwtstate);
free_nh_exceptions(nexthop_nh);
rt_fibinfo_free_cpus(nexthop_nh->nh_pcpu_rth_output);
rt_fibinfo_free(&nexthop_nh->nh_rth_input);
@@ -266,7 +267,8 @@ static inline int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
#ifdef CONFIG_IP_ROUTE_CLASSID
nh->nh_tclassid != onh->nh_tclassid ||
#endif
- ((nh->nh_flags ^ onh->nh_flags) & ~RTNH_F_DEAD))
+ lwtunnel_cmp_encap(nh->nh_lwtstate, onh->nh_lwtstate) ||
+ ((nh->nh_flags ^ onh->nh_flags) & ~RTNH_COMPARE_MASK))
return -1;
onh++;
} endfor_nexthops(fi);
@@ -318,7 +320,7 @@ static struct fib_info *fib_find_info(const struct fib_info *nfi)
nfi->fib_type == fi->fib_type &&
memcmp(nfi->fib_metrics, fi->fib_metrics,
sizeof(u32) * RTAX_MAX) == 0 &&
- ((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_F_DEAD) == 0 &&
+ !((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_COMPARE_MASK) &&
(nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
return fi;
}
@@ -366,6 +368,7 @@ static inline size_t fib_nlmsg_size(struct fib_info *fi)
payload += nla_total_size((RTAX_MAX * nla_total_size(4)));
if (fi->fib_nhs) {
+ size_t nh_encapsize = 0;
/* Also handles the special case fib_nhs == 1 */
/* each nexthop is packed in an attribute */
@@ -374,8 +377,21 @@ static inline size_t fib_nlmsg_size(struct fib_info *fi)
/* may contain flow and gateway attribute */
nhsize += 2 * nla_total_size(4);
+ /* grab encap info */
+ for_nexthops(fi) {
+ if (nh->nh_lwtstate) {
+ /* RTA_ENCAP_TYPE */
+ nh_encapsize += lwtunnel_get_encap_size(
+ nh->nh_lwtstate);
+ /* RTA_ENCAP */
+ nh_encapsize += nla_total_size(2);
+ }
+ } endfor_nexthops(fi);
+
/* all nexthops are packed in a nested attribute */
- payload += nla_total_size(fi->fib_nhs * nhsize);
+ payload += nla_total_size((fi->fib_nhs * nhsize) +
+ nh_encapsize);
+
}
return payload;
@@ -421,13 +437,15 @@ static int fib_detect_death(struct fib_info *fi, int order,
if (n) {
state = n->nud_state;
neigh_release(n);
+ } else {
+ return 0;
}
if (state == NUD_REACHABLE)
return 0;
if ((state & NUD_VALID) && order != dflt)
return 0;
if ((state & NUD_VALID) ||
- (*last_idx < 0 && order > dflt)) {
+ (*last_idx < 0 && order > dflt && state != NUD_INCOMPLETE)) {
*last_resort = fi;
*last_idx = order;
}
@@ -452,6 +470,9 @@ static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining)
static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
int remaining, struct fib_config *cfg)
{
+ struct net *net = cfg->fc_nlinfo.nl_net;
+ int ret;
+
change_nexthops(fi) {
int attrlen;
@@ -475,18 +496,130 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
if (nexthop_nh->nh_tclassid)
fi->fib_net->ipv4.fib_num_tclassid_users++;
#endif
+ nla = nla_find(attrs, attrlen, RTA_ENCAP);
+ if (nla) {
+ struct lwtunnel_state *lwtstate;
+ struct net_device *dev = NULL;
+ struct nlattr *nla_entype;
+
+ nla_entype = nla_find(attrs, attrlen,
+ RTA_ENCAP_TYPE);
+ if (!nla_entype)
+ goto err_inval;
+ if (cfg->fc_oif)
+ dev = __dev_get_by_index(net, cfg->fc_oif);
+ ret = lwtunnel_build_state(dev, nla_get_u16(
+ nla_entype),
+ nla, AF_INET, cfg,
+ &lwtstate);
+ if (ret)
+ goto errout;
+ nexthop_nh->nh_lwtstate =
+ lwtstate_get(lwtstate);
+ }
}
rtnh = rtnh_next(rtnh, &remaining);
} endfor_nexthops(fi);
return 0;
+
+err_inval:
+ ret = -EINVAL;
+
+errout:
+ return ret;
}
-#endif
+static void fib_rebalance(struct fib_info *fi)
+{
+ int total;
+ int w;
+ struct in_device *in_dev;
+
+ if (fi->fib_nhs < 2)
+ return;
+
+ total = 0;
+ for_nexthops(fi) {
+ if (nh->nh_flags & RTNH_F_DEAD)
+ continue;
+
+ in_dev = __in_dev_get_rtnl(nh->nh_dev);
+
+ if (in_dev &&
+ IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
+ nh->nh_flags & RTNH_F_LINKDOWN)
+ continue;
+
+ total += nh->nh_weight;
+ } endfor_nexthops(fi);
+
+ w = 0;
+ change_nexthops(fi) {
+ int upper_bound;
+
+ in_dev = __in_dev_get_rtnl(nexthop_nh->nh_dev);
+
+ if (nexthop_nh->nh_flags & RTNH_F_DEAD) {
+ upper_bound = -1;
+ } else if (in_dev &&
+ IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
+ nexthop_nh->nh_flags & RTNH_F_LINKDOWN) {
+ upper_bound = -1;
+ } else {
+ w += nexthop_nh->nh_weight;
+ upper_bound = DIV_ROUND_CLOSEST_ULL((u64)w << 31,
+ total) - 1;
+ }
+
+ atomic_set(&nexthop_nh->nh_upper_bound, upper_bound);
+ } endfor_nexthops(fi);
+
+ net_get_random_once(&fib_multipath_secret,
+ sizeof(fib_multipath_secret));
+}
+
+static inline void fib_add_weight(struct fib_info *fi,
+ const struct fib_nh *nh)
+{
+ fi->fib_weight += nh->nh_weight;
+}
+
+#else /* CONFIG_IP_ROUTE_MULTIPATH */
+
+#define fib_rebalance(fi) do { } while (0)
+#define fib_add_weight(fi, nh) do { } while (0)
+
+#endif /* CONFIG_IP_ROUTE_MULTIPATH */
+
+static int fib_encap_match(struct net *net, u16 encap_type,
+ struct nlattr *encap,
+ int oif, const struct fib_nh *nh,
+ const struct fib_config *cfg)
+{
+ struct lwtunnel_state *lwtstate;
+ struct net_device *dev = NULL;
+ int ret, result = 0;
+
+ if (encap_type == LWTUNNEL_ENCAP_NONE)
+ return 0;
+
+ if (oif)
+ dev = __dev_get_by_index(net, oif);
+ ret = lwtunnel_build_state(dev, encap_type, encap,
+ AF_INET, cfg, &lwtstate);
+ if (!ret) {
+ result = lwtunnel_cmp_encap(lwtstate, nh->nh_lwtstate);
+ lwtstate_free(lwtstate);
+ }
+
+ return result;
+}
int fib_nh_match(struct fib_config *cfg, struct fib_info *fi)
{
+ struct net *net = cfg->fc_nlinfo.nl_net;
#ifdef CONFIG_IP_ROUTE_MULTIPATH
struct rtnexthop *rtnh;
int remaining;
@@ -496,6 +629,12 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi)
return 1;
if (cfg->fc_oif || cfg->fc_gw) {
+ if (cfg->fc_encap) {
+ if (fib_encap_match(net, cfg->fc_encap_type,
+ cfg->fc_encap, cfg->fc_oif,
+ fi->fib_nh, cfg))
+ return 1;
+ }
if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) &&
(!cfg->fc_gw || cfg->fc_gw == fi->fib_nh->nh_gw))
return 0;
@@ -585,7 +724,7 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi)
static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
struct fib_nh *nh)
{
- int err;
+ int err = 0;
struct net *net;
struct net_device *dev;
@@ -594,16 +733,20 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
struct fib_result res;
if (nh->nh_flags & RTNH_F_ONLINK) {
+ unsigned int addr_type;
if (cfg->fc_scope >= RT_SCOPE_LINK)
return -EINVAL;
- if (inet_addr_type(net, nh->nh_gw) != RTN_UNICAST)
- return -EINVAL;
dev = __dev_get_by_index(net, nh->nh_oif);
if (!dev)
return -ENODEV;
if (!(dev->flags & IFF_UP))
return -ENETDOWN;
+ addr_type = inet_addr_type_dev_table(net, dev, nh->nh_gw);
+ if (addr_type != RTN_UNICAST)
+ return -EINVAL;
+ if (!netif_carrier_ok(dev))
+ nh->nh_flags |= RTNH_F_LINKDOWN;
nh->nh_dev = dev;
dev_hold(dev);
nh->nh_scope = RT_SCOPE_LINK;
@@ -611,6 +754,7 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
}
rcu_read_lock();
{
+ struct fib_table *tbl = NULL;
struct flowi4 fl4 = {
.daddr = nh->nh_gw,
.flowi4_scope = cfg->fc_scope + 1,
@@ -621,7 +765,24 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
/* It is not necessary, but requires a bit of thinking */
if (fl4.flowi4_scope < RT_SCOPE_LINK)
fl4.flowi4_scope = RT_SCOPE_LINK;
- err = fib_lookup(net, &fl4, &res);
+
+ if (cfg->fc_table)
+ tbl = fib_get_table(net, cfg->fc_table);
+
+ if (tbl)
+ err = fib_table_lookup(tbl, &fl4, &res,
+ FIB_LOOKUP_IGNORE_LINKSTATE |
+ FIB_LOOKUP_NOREF);
+
+ /* on error or if no table given do full lookup. This
+ * is needed for example when nexthops are in the local
+ * table rather than the given table
+ */
+ if (!tbl || err) {
+ err = fib_lookup(net, &fl4, &res,
+ FIB_LOOKUP_IGNORE_LINKSTATE);
+ }
+
if (err) {
rcu_read_unlock();
return err;
@@ -636,6 +797,8 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
if (!dev)
goto out;
dev_hold(dev);
+ if (!netif_carrier_ok(dev))
+ nh->nh_flags |= RTNH_F_LINKDOWN;
err = (dev->flags & IFF_UP) ? 0 : -ENETDOWN;
} else {
struct in_device *in_dev;
@@ -654,6 +817,8 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
nh->nh_dev = in_dev->dev;
dev_hold(nh->nh_dev);
nh->nh_scope = RT_SCOPE_HOST;
+ if (!netif_carrier_ok(nh->nh_dev))
+ nh->nh_flags |= RTNH_F_LINKDOWN;
err = 0;
}
out:
@@ -713,8 +878,6 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash,
struct hlist_head *dest;
unsigned int new_hash;
- hlist_del(&fi->fib_hash);
-
new_hash = fib_info_hashfn(fi);
dest = &new_info_hash[new_hash];
hlist_add_head(&fi->fib_hash, dest);
@@ -731,8 +894,6 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash,
struct hlist_head *ldest;
unsigned int new_hash;
- hlist_del(&fi->fib_lhash);
-
new_hash = fib_laddr_hashfn(fi->fib_prefsrc);
ldest = &new_laddrhash[new_hash];
hlist_add_head(&fi->fib_lhash, ldest);
@@ -757,6 +918,74 @@ __be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh)
return nh->nh_saddr;
}
+static bool fib_valid_prefsrc(struct fib_config *cfg, __be32 fib_prefsrc)
+{
+ if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst ||
+ fib_prefsrc != cfg->fc_dst) {
+ u32 tb_id = cfg->fc_table;
+ int rc;
+
+ if (tb_id == RT_TABLE_MAIN)
+ tb_id = RT_TABLE_LOCAL;
+
+ rc = inet_addr_type_table(cfg->fc_nlinfo.nl_net,
+ fib_prefsrc, tb_id);
+
+ if (rc != RTN_LOCAL && tb_id != RT_TABLE_LOCAL) {
+ rc = inet_addr_type_table(cfg->fc_nlinfo.nl_net,
+ fib_prefsrc, RT_TABLE_LOCAL);
+ }
+
+ if (rc != RTN_LOCAL)
+ return false;
+ }
+ return true;
+}
+
+static int
+fib_convert_metrics(struct fib_info *fi, const struct fib_config *cfg)
+{
+ bool ecn_ca = false;
+ struct nlattr *nla;
+ int remaining;
+
+ if (!cfg->fc_mx)
+ return 0;
+
+ nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
+ int type = nla_type(nla);
+ u32 val;
+
+ if (!type)
+ continue;
+ if (type > RTAX_MAX)
+ return -EINVAL;
+
+ if (type == RTAX_CC_ALGO) {
+ char tmp[TCP_CA_NAME_MAX];
+
+ nla_strlcpy(tmp, nla, sizeof(tmp));
+ val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
+ if (val == TCP_CA_UNSPEC)
+ return -EINVAL;
+ } else {
+ val = nla_get_u32(nla);
+ }
+ if (type == RTAX_ADVMSS && val > 65535 - 40)
+ val = 65535 - 40;
+ if (type == RTAX_MTU && val > 65535 - 15)
+ val = 65535 - 15;
+ if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
+ return -EINVAL;
+ fi->fib_metrics[type - 1] = val;
+ }
+
+ if (ecn_ca)
+ fi->fib_metrics[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
+
+ return 0;
+}
+
struct fib_info *fib_create_info(struct fib_config *cfg)
{
int err;
@@ -829,36 +1058,9 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
goto failure;
} endfor_nexthops(fi)
- if (cfg->fc_mx) {
- struct nlattr *nla;
- int remaining;
-
- nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
- int type = nla_type(nla);
-
- if (type) {
- u32 val;
-
- if (type > RTAX_MAX)
- goto err_inval;
- if (type == RTAX_CC_ALGO) {
- char tmp[TCP_CA_NAME_MAX];
-
- nla_strlcpy(tmp, nla, sizeof(tmp));
- val = tcp_ca_get_key_by_name(tmp);
- if (val == TCP_CA_UNSPEC)
- goto err_inval;
- } else {
- val = nla_get_u32(nla);
- }
- if (type == RTAX_ADVMSS && val > 65535 - 40)
- val = 65535 - 40;
- if (type == RTAX_MTU && val > 65535 - 15)
- val = 65535 - 15;
- fi->fib_metrics[type - 1] = val;
- }
- }
- }
+ err = fib_convert_metrics(fi, cfg);
+ if (err)
+ goto failure;
if (cfg->fc_mp) {
#ifdef CONFIG_IP_ROUTE_MULTIPATH
@@ -879,6 +1081,22 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
} else {
struct fib_nh *nh = fi->fib_nh;
+ if (cfg->fc_encap) {
+ struct lwtunnel_state *lwtstate;
+ struct net_device *dev = NULL;
+
+ if (cfg->fc_encap_type == LWTUNNEL_ENCAP_NONE)
+ goto err_inval;
+ if (cfg->fc_oif)
+ dev = __dev_get_by_index(net, cfg->fc_oif);
+ err = lwtunnel_build_state(dev, cfg->fc_encap_type,
+ cfg->fc_encap, AF_INET, cfg,
+ &lwtstate);
+ if (err)
+ goto failure;
+
+ nh->nh_lwtstate = lwtstate_get(lwtstate);
+ }
nh->nh_oif = cfg->fc_oif;
nh->nh_gw = cfg->fc_gw;
nh->nh_flags = cfg->fc_flags;
@@ -924,24 +1142,29 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
if (!nh->nh_dev)
goto failure;
} else {
+ int linkdown = 0;
+
change_nexthops(fi) {
err = fib_check_nh(cfg, fi, nexthop_nh);
if (err != 0)
goto failure;
+ if (nexthop_nh->nh_flags & RTNH_F_LINKDOWN)
+ linkdown++;
} endfor_nexthops(fi)
+ if (linkdown == fi->fib_nhs)
+ fi->fib_flags |= RTNH_F_LINKDOWN;
}
- if (fi->fib_prefsrc) {
- if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst ||
- fi->fib_prefsrc != cfg->fc_dst)
- if (inet_addr_type(net, fi->fib_prefsrc) != RTN_LOCAL)
- goto err_inval;
- }
+ if (fi->fib_prefsrc && !fib_valid_prefsrc(cfg, fi->fib_prefsrc))
+ goto err_inval;
change_nexthops(fi) {
fib_info_update_nh_saddr(net, nexthop_nh);
+ fib_add_weight(fi, nexthop_nh);
} endfor_nexthops(fi)
+ fib_rebalance(fi);
+
link_it:
ofi = fib_find_info(fi);
if (ofi) {
@@ -1027,17 +1250,27 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
nla_put_in_addr(skb, RTA_PREFSRC, fi->fib_prefsrc))
goto nla_put_failure;
if (fi->fib_nhs == 1) {
+ struct in_device *in_dev;
+
if (fi->fib_nh->nh_gw &&
nla_put_in_addr(skb, RTA_GATEWAY, fi->fib_nh->nh_gw))
goto nla_put_failure;
if (fi->fib_nh->nh_oif &&
nla_put_u32(skb, RTA_OIF, fi->fib_nh->nh_oif))
goto nla_put_failure;
+ if (fi->fib_nh->nh_flags & RTNH_F_LINKDOWN) {
+ in_dev = __in_dev_get_rtnl(fi->fib_nh->nh_dev);
+ if (in_dev &&
+ IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev))
+ rtm->rtm_flags |= RTNH_F_DEAD;
+ }
#ifdef CONFIG_IP_ROUTE_CLASSID
if (fi->fib_nh[0].nh_tclassid &&
nla_put_u32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid))
goto nla_put_failure;
#endif
+ if (fi->fib_nh->nh_lwtstate)
+ lwtunnel_fill_encap(skb, fi->fib_nh->nh_lwtstate);
}
#ifdef CONFIG_IP_ROUTE_MULTIPATH
if (fi->fib_nhs > 1) {
@@ -1049,11 +1282,19 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
goto nla_put_failure;
for_nexthops(fi) {
+ struct in_device *in_dev;
+
rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
if (!rtnh)
goto nla_put_failure;
rtnh->rtnh_flags = nh->nh_flags & 0xFF;
+ if (nh->nh_flags & RTNH_F_LINKDOWN) {
+ in_dev = __in_dev_get_rtnl(nh->nh_dev);
+ if (in_dev &&
+ IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev))
+ rtnh->rtnh_flags |= RTNH_F_DEAD;
+ }
rtnh->rtnh_hops = nh->nh_weight - 1;
rtnh->rtnh_ifindex = nh->nh_oif;
@@ -1065,6 +1306,8 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
nla_put_u32(skb, RTA_FLOW, nh->nh_tclassid))
goto nla_put_failure;
#endif
+ if (nh->nh_lwtstate)
+ lwtunnel_fill_encap(skb, nh->nh_lwtstate);
/* length of rtnetlink header + attributes */
rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh;
} endfor_nexthops(fi);
@@ -1107,7 +1350,13 @@ int fib_sync_down_addr(struct net *net, __be32 local)
return ret;
}
-int fib_sync_down_dev(struct net_device *dev, int force)
+/* Event force Flags Description
+ * NETDEV_CHANGE 0 LINKDOWN Carrier OFF, not for scope host
+ * NETDEV_DOWN 0 LINKDOWN|DEAD Link down, not for scope host
+ * NETDEV_DOWN 1 LINKDOWN|DEAD Last address removed
+ * NETDEV_UNREGISTER 1 LINKDOWN|DEAD Device removed
+ */
+int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force)
{
int ret = 0;
int scope = RT_SCOPE_NOWHERE;
@@ -1133,49 +1382,79 @@ int fib_sync_down_dev(struct net_device *dev, int force)
dead++;
else if (nexthop_nh->nh_dev == dev &&
nexthop_nh->nh_scope != scope) {
- nexthop_nh->nh_flags |= RTNH_F_DEAD;
-#ifdef CONFIG_IP_ROUTE_MULTIPATH
- spin_lock_bh(&fib_multipath_lock);
- fi->fib_power -= nexthop_nh->nh_power;
- nexthop_nh->nh_power = 0;
- spin_unlock_bh(&fib_multipath_lock);
-#endif
+ switch (event) {
+ case NETDEV_DOWN:
+ case NETDEV_UNREGISTER:
+ nexthop_nh->nh_flags |= RTNH_F_DEAD;
+ /* fall through */
+ case NETDEV_CHANGE:
+ nexthop_nh->nh_flags |= RTNH_F_LINKDOWN;
+ break;
+ }
dead++;
}
#ifdef CONFIG_IP_ROUTE_MULTIPATH
- if (force > 1 && nexthop_nh->nh_dev == dev) {
+ if (event == NETDEV_UNREGISTER &&
+ nexthop_nh->nh_dev == dev) {
dead = fi->fib_nhs;
break;
}
#endif
} endfor_nexthops(fi)
if (dead == fi->fib_nhs) {
- fi->fib_flags |= RTNH_F_DEAD;
+ switch (event) {
+ case NETDEV_DOWN:
+ case NETDEV_UNREGISTER:
+ fi->fib_flags |= RTNH_F_DEAD;
+ /* fall through */
+ case NETDEV_CHANGE:
+ fi->fib_flags |= RTNH_F_LINKDOWN;
+ break;
+ }
ret++;
}
+
+ fib_rebalance(fi);
}
return ret;
}
/* Must be invoked inside of an RCU protected region. */
-void fib_select_default(struct fib_result *res)
+void fib_select_default(const struct flowi4 *flp, struct fib_result *res)
{
struct fib_info *fi = NULL, *last_resort = NULL;
struct hlist_head *fa_head = res->fa_head;
struct fib_table *tb = res->table;
+ u8 slen = 32 - res->prefixlen;
int order = -1, last_idx = -1;
- struct fib_alias *fa;
+ struct fib_alias *fa, *fa1 = NULL;
+ u32 last_prio = res->fi->fib_priority;
+ u8 last_tos = 0;
hlist_for_each_entry_rcu(fa, fa_head, fa_list) {
struct fib_info *next_fi = fa->fa_info;
+ if (fa->fa_slen != slen)
+ continue;
+ if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos)
+ continue;
+ if (fa->tb_id != tb->tb_id)
+ continue;
+ if (next_fi->fib_priority > last_prio &&
+ fa->fa_tos == last_tos) {
+ if (last_tos)
+ continue;
+ break;
+ }
+ if (next_fi->fib_flags & RTNH_F_DEAD)
+ continue;
+ last_tos = fa->fa_tos;
+ last_prio = next_fi->fib_priority;
+
if (next_fi->fib_scope != res->scope ||
fa->fa_type != RTN_UNICAST)
continue;
-
- if (next_fi->fib_priority > res->fi->fib_priority)
- break;
if (!next_fi->fib_nh[0].nh_gw ||
next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
continue;
@@ -1185,10 +1464,11 @@ void fib_select_default(struct fib_result *res)
if (!fi) {
if (next_fi != res->fi)
break;
+ fa1 = fa;
} else if (!fib_detect_death(fi, order, &last_resort,
- &last_idx, tb->tb_default)) {
+ &last_idx, fa1->fa_default)) {
fib_result_assign(res, fi);
- tb->tb_default = order;
+ fa1->fa_default = order;
goto out;
}
fi = next_fi;
@@ -1196,31 +1476,30 @@ void fib_select_default(struct fib_result *res)
}
if (order <= 0 || !fi) {
- tb->tb_default = -1;
+ if (fa1)
+ fa1->fa_default = -1;
goto out;
}
if (!fib_detect_death(fi, order, &last_resort, &last_idx,
- tb->tb_default)) {
+ fa1->fa_default)) {
fib_result_assign(res, fi);
- tb->tb_default = order;
+ fa1->fa_default = order;
goto out;
}
if (last_idx >= 0)
fib_result_assign(res, last_resort);
- tb->tb_default = last_idx;
+ fa1->fa_default = last_idx;
out:
return;
}
-#ifdef CONFIG_IP_ROUTE_MULTIPATH
-
/*
* Dead device goes up. We wake up dead nexthops.
* It takes sense only on multipath routes.
*/
-int fib_sync_up(struct net_device *dev)
+int fib_sync_up(struct net_device *dev, unsigned int nh_flags)
{
struct fib_info *prev_fi;
unsigned int hash;
@@ -1231,6 +1510,13 @@ int fib_sync_up(struct net_device *dev)
if (!(dev->flags & IFF_UP))
return 0;
+ if (nh_flags & RTNH_F_DEAD) {
+ unsigned int flags = dev_get_flags(dev);
+
+ if (flags & (IFF_RUNNING | IFF_LOWER_UP))
+ nh_flags |= RTNH_F_LINKDOWN;
+ }
+
prev_fi = NULL;
hash = fib_devindex_hashfn(dev->ifindex);
head = &fib_info_devhash[hash];
@@ -1247,7 +1533,7 @@ int fib_sync_up(struct net_device *dev)
prev_fi = fi;
alive = 0;
change_nexthops(fi) {
- if (!(nexthop_nh->nh_flags & RTNH_F_DEAD)) {
+ if (!(nexthop_nh->nh_flags & nh_flags)) {
alive++;
continue;
}
@@ -1258,71 +1544,57 @@ int fib_sync_up(struct net_device *dev)
!__in_dev_get_rtnl(dev))
continue;
alive++;
- spin_lock_bh(&fib_multipath_lock);
- nexthop_nh->nh_power = 0;
- nexthop_nh->nh_flags &= ~RTNH_F_DEAD;
- spin_unlock_bh(&fib_multipath_lock);
+ nexthop_nh->nh_flags &= ~nh_flags;
} endfor_nexthops(fi)
if (alive > 0) {
- fi->fib_flags &= ~RTNH_F_DEAD;
+ fi->fib_flags &= ~nh_flags;
ret++;
}
+
+ fib_rebalance(fi);
}
return ret;
}
-/*
- * The algorithm is suboptimal, but it provides really
- * fair weighted route distribution.
- */
-void fib_select_multipath(struct fib_result *res)
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+
+void fib_select_multipath(struct fib_result *res, int hash)
{
struct fib_info *fi = res->fi;
- int w;
- spin_lock_bh(&fib_multipath_lock);
- if (fi->fib_power <= 0) {
- int power = 0;
- change_nexthops(fi) {
- if (!(nexthop_nh->nh_flags & RTNH_F_DEAD)) {
- power += nexthop_nh->nh_weight;
- nexthop_nh->nh_power = nexthop_nh->nh_weight;
- }
- } endfor_nexthops(fi);
- fi->fib_power = power;
- if (power <= 0) {
- spin_unlock_bh(&fib_multipath_lock);
- /* Race condition: route has just become dead. */
- res->nh_sel = 0;
- return;
- }
- }
-
-
- /* w should be random number [0..fi->fib_power-1],
- * it is pretty bad approximation.
- */
-
- w = jiffies % fi->fib_power;
+ for_nexthops(fi) {
+ if (hash > atomic_read(&nh->nh_upper_bound))
+ continue;
- change_nexthops(fi) {
- if (!(nexthop_nh->nh_flags & RTNH_F_DEAD) &&
- nexthop_nh->nh_power) {
- w -= nexthop_nh->nh_power;
- if (w <= 0) {
- nexthop_nh->nh_power--;
- fi->fib_power--;
- res->nh_sel = nhsel;
- spin_unlock_bh(&fib_multipath_lock);
- return;
- }
- }
+ res->nh_sel = nhsel;
+ return;
} endfor_nexthops(fi);
/* Race condition: route has just become dead. */
res->nh_sel = 0;
- spin_unlock_bh(&fib_multipath_lock);
}
#endif
+
+void fib_select_path(struct net *net, struct fib_result *res,
+ struct flowi4 *fl4, int mp_hash)
+{
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+ if (res->fi->fib_nhs > 1 && fl4->flowi4_oif == 0) {
+ if (mp_hash < 0)
+ mp_hash = get_hash_from_flowi4(fl4) >> 1;
+
+ fib_select_multipath(res, mp_hash);
+ }
+ else
+#endif
+ if (!res->prefixlen &&
+ res->table->tb_num_default > 1 &&
+ res->type == RTN_UNICAST && !fl4->flowi4_oif)
+ fib_select_default(fl4, res);
+
+ if (!fl4->saddr)
+ fl4->saddr = FIB_RES_PREFSRC(net, *res);
+}
+EXPORT_SYMBOL_GPL(fib_select_path);