summaryrefslogtreecommitdiffstats
path: root/kernel/net/ipv4/fib_frontend.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/net/ipv4/fib_frontend.c')
-rw-r--r--kernel/net/ipv4/fib_frontend.c130
1 files changed, 98 insertions, 32 deletions
diff --git a/kernel/net/ipv4/fib_frontend.c b/kernel/net/ipv4/fib_frontend.c
index 872494e6e..473447593 100644
--- a/kernel/net/ipv4/fib_frontend.c
+++ b/kernel/net/ipv4/fib_frontend.c
@@ -45,6 +45,8 @@
#include <net/ip_fib.h>
#include <net/rtnetlink.h>
#include <net/xfrm.h>
+#include <net/l3mdev.h>
+#include <trace/events/fib.h>
#ifndef CONFIG_IP_MULTIPLE_TABLES
@@ -211,12 +213,12 @@ void fib_flush_external(struct net *net)
*/
static inline unsigned int __inet_dev_addr_type(struct net *net,
const struct net_device *dev,
- __be32 addr)
+ __be32 addr, u32 tb_id)
{
struct flowi4 fl4 = { .daddr = addr };
struct fib_result res;
unsigned int ret = RTN_BROADCAST;
- struct fib_table *local_table;
+ struct fib_table *table;
if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
return RTN_BROADCAST;
@@ -225,10 +227,10 @@ static inline unsigned int __inet_dev_addr_type(struct net *net,
rcu_read_lock();
- local_table = fib_get_table(net, RT_TABLE_LOCAL);
- if (local_table) {
+ table = fib_get_table(net, tb_id);
+ if (table) {
ret = RTN_UNICAST;
- if (!fib_table_lookup(local_table, &fl4, &res, FIB_LOOKUP_NOREF)) {
+ if (!fib_table_lookup(table, &fl4, &res, FIB_LOOKUP_NOREF)) {
if (!dev || dev == res.fi->fib_dev)
ret = res.type;
}
@@ -238,19 +240,40 @@ static inline unsigned int __inet_dev_addr_type(struct net *net,
return ret;
}
+unsigned int inet_addr_type_table(struct net *net, __be32 addr, u32 tb_id)
+{
+ return __inet_dev_addr_type(net, NULL, addr, tb_id);
+}
+EXPORT_SYMBOL(inet_addr_type_table);
+
unsigned int inet_addr_type(struct net *net, __be32 addr)
{
- return __inet_dev_addr_type(net, NULL, addr);
+ return __inet_dev_addr_type(net, NULL, addr, RT_TABLE_LOCAL);
}
EXPORT_SYMBOL(inet_addr_type);
unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
__be32 addr)
{
- return __inet_dev_addr_type(net, dev, addr);
+ u32 rt_table = l3mdev_fib_table(dev) ? : RT_TABLE_LOCAL;
+
+ return __inet_dev_addr_type(net, dev, addr, rt_table);
}
EXPORT_SYMBOL(inet_dev_addr_type);
+/* inet_addr_type with dev == NULL but using the table from a dev
+ * if one is associated
+ */
+unsigned int inet_addr_type_dev_table(struct net *net,
+ const struct net_device *dev,
+ __be32 addr)
+{
+ u32 rt_table = l3mdev_fib_table(dev) ? : RT_TABLE_LOCAL;
+
+ return __inet_dev_addr_type(net, NULL, addr, rt_table);
+}
+EXPORT_SYMBOL(inet_addr_type_dev_table);
+
__be32 fib_compute_spec_dst(struct sk_buff *skb)
{
struct net_device *dev = skb->dev;
@@ -280,7 +303,8 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb)
fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos);
fl4.flowi4_scope = scope;
fl4.flowi4_mark = IN_DEV_SRC_VMARK(in_dev) ? skb->mark : 0;
- if (!fib_lookup(net, &fl4, &res))
+ fl4.flowi4_tun_key.tun_id = 0;
+ if (!fib_lookup(net, &fl4, &res, 0))
return FIB_RES_PREFSRC(net, res);
} else {
scope = RT_SCOPE_LINK;
@@ -308,18 +332,24 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
bool dev_match;
fl4.flowi4_oif = 0;
- fl4.flowi4_iif = oif ? : LOOPBACK_IFINDEX;
+ fl4.flowi4_iif = l3mdev_master_ifindex_rcu(dev);
+ if (!fl4.flowi4_iif)
+ fl4.flowi4_iif = oif ? : LOOPBACK_IFINDEX;
fl4.daddr = src;
fl4.saddr = dst;
fl4.flowi4_tos = tos;
fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
+ fl4.flowi4_tun_key.tun_id = 0;
+ fl4.flowi4_flags = 0;
no_addr = idev->ifa_list == NULL;
fl4.flowi4_mark = IN_DEV_SRC_VMARK(idev) ? skb->mark : 0;
+ trace_fib_validate_source(dev, &fl4);
+
net = dev_net(dev);
- if (fib_lookup(net, &fl4, &res))
+ if (fib_lookup(net, &fl4, &res, 0))
goto last_resort;
if (res.type != RTN_UNICAST &&
(res.type != RTN_LOCAL || !IN_DEV_ACCEPT_LOCAL(idev)))
@@ -337,6 +367,9 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
if (nh->nh_dev == dev) {
dev_match = true;
break;
+ } else if (l3mdev_master_ifindex_rcu(nh->nh_dev) == dev->ifindex) {
+ dev_match = true;
+ break;
}
}
#else
@@ -354,7 +387,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
fl4.flowi4_oif = dev->ifindex;
ret = 0;
- if (fib_lookup(net, &fl4, &res) == 0) {
+ if (fib_lookup(net, &fl4, &res, FIB_LOOKUP_IGNORE_LINKSTATE) == 0) {
if (res.type == RTN_UNICAST)
ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
}
@@ -494,9 +527,12 @@ static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
addr = sk_extract_addr(&rt->rt_gateway);
if (rt->rt_gateway.sa_family == AF_INET && addr) {
+ unsigned int addr_type;
+
cfg->fc_gw = addr;
+ addr_type = inet_addr_type_table(net, addr, cfg->fc_table);
if (rt->rt_flags & RTF_GATEWAY &&
- inet_addr_type(net, addr) == RTN_UNICAST)
+ addr_type == RTN_UNICAST)
cfg->fc_scope = RT_SCOPE_UNIVERSE;
}
@@ -591,6 +627,8 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = {
[RTA_METRICS] = { .type = NLA_NESTED },
[RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
[RTA_FLOW] = { .type = NLA_U32 },
+ [RTA_ENCAP_TYPE] = { .type = NLA_U16 },
+ [RTA_ENCAP] = { .type = NLA_NESTED },
};
static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
@@ -656,6 +694,12 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
case RTA_TABLE:
cfg->fc_table = nla_get_u32(attr);
break;
+ case RTA_ENCAP:
+ cfg->fc_encap = attr;
+ break;
+ case RTA_ENCAP_TYPE:
+ cfg->fc_encap_type = nla_get_u16(attr);
+ break;
}
}
@@ -760,6 +804,7 @@ out:
static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
{
struct net *net = dev_net(ifa->ifa_dev->dev);
+ u32 tb_id = l3mdev_fib_table(ifa->ifa_dev->dev);
struct fib_table *tb;
struct fib_config cfg = {
.fc_protocol = RTPROT_KERNEL,
@@ -774,11 +819,10 @@ static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifad
},
};
- if (type == RTN_UNICAST)
- tb = fib_new_table(net, RT_TABLE_MAIN);
- else
- tb = fib_new_table(net, RT_TABLE_LOCAL);
+ if (!tb_id)
+ tb_id = (type == RTN_UNICAST) ? RT_TABLE_MAIN : RT_TABLE_LOCAL;
+ tb = fib_new_table(net, tb_id);
if (!tb)
return;
@@ -823,9 +867,10 @@ void fib_add_ifaddr(struct in_ifaddr *ifa)
if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags & IFA_F_SECONDARY) &&
(prefix != addr || ifa->ifa_prefixlen < 32)) {
- fib_magic(RTM_NEWROUTE,
- dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
- prefix, ifa->ifa_prefixlen, prim);
+ if (!(ifa->ifa_flags & IFA_F_NOPREFIXROUTE))
+ fib_magic(RTM_NEWROUTE,
+ dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
+ prefix, ifa->ifa_prefixlen, prim);
/* Add network specific broadcasts, when it takes a sense */
if (ifa->ifa_prefixlen < 31) {
@@ -870,9 +915,10 @@ void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim)
}
} else if (!ipv4_is_zeronet(any) &&
(any != ifa->ifa_local || ifa->ifa_prefixlen < 32)) {
- fib_magic(RTM_DELROUTE,
- dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
- any, ifa->ifa_prefixlen, prim);
+ if (!(ifa->ifa_flags & IFA_F_NOPREFIXROUTE))
+ fib_magic(RTM_DELROUTE,
+ dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
+ any, ifa->ifa_prefixlen, prim);
subnet = 1;
}
@@ -960,11 +1006,14 @@ void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim)
fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
}
if (!(ok & LOCAL_OK)) {
+ unsigned int addr_type;
+
fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
/* Check, that this local address finally disappeared. */
- if (gone &&
- inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) {
+ addr_type = inet_addr_type_dev_table(dev_net(dev), dev,
+ ifa->ifa_local);
+ if (gone && addr_type != RTN_LOCAL) {
/* And the last, but not the least thing.
* We must flush stray FIB entries.
*
@@ -1063,9 +1112,10 @@ static void nl_fib_lookup_exit(struct net *net)
net->ipv4.fibnl = NULL;
}
-static void fib_disable_ip(struct net_device *dev, int force)
+static void fib_disable_ip(struct net_device *dev, unsigned long event,
+ bool force)
{
- if (fib_sync_down_dev(dev, force))
+ if (fib_sync_down_dev(dev, event, force))
fib_flush(dev_net(dev));
rt_cache_flush(dev_net(dev));
arp_ifdown(dev);
@@ -1081,7 +1131,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
case NETDEV_UP:
fib_add_ifaddr(ifa);
#ifdef CONFIG_IP_ROUTE_MULTIPATH
- fib_sync_up(dev);
+ fib_sync_up(dev, RTNH_F_DEAD);
#endif
atomic_inc(&net->ipv4.dev_addr_genid);
rt_cache_flush(dev_net(dev));
@@ -1093,7 +1143,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
/* Last address was deleted from this interface.
* Disable IP.
*/
- fib_disable_ip(dev, 1);
+ fib_disable_ip(dev, event, true);
} else {
rt_cache_flush(dev_net(dev));
}
@@ -1105,11 +1155,13 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct netdev_notifier_changeupper_info *info;
struct in_device *in_dev;
struct net *net = dev_net(dev);
+ unsigned int flags;
if (event == NETDEV_UNREGISTER) {
- fib_disable_ip(dev, 2);
+ fib_disable_ip(dev, event, true);
rt_flush_dev(dev);
return NOTIFY_DONE;
}
@@ -1124,18 +1176,32 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
fib_add_ifaddr(ifa);
} endfor_ifa(in_dev);
#ifdef CONFIG_IP_ROUTE_MULTIPATH
- fib_sync_up(dev);
+ fib_sync_up(dev, RTNH_F_DEAD);
#endif
atomic_inc(&net->ipv4.dev_addr_genid);
rt_cache_flush(net);
break;
case NETDEV_DOWN:
- fib_disable_ip(dev, 0);
+ fib_disable_ip(dev, event, false);
break;
- case NETDEV_CHANGEMTU:
case NETDEV_CHANGE:
+ flags = dev_get_flags(dev);
+ if (flags & (IFF_RUNNING | IFF_LOWER_UP))
+ fib_sync_up(dev, RTNH_F_LINKDOWN);
+ else
+ fib_sync_down_dev(dev, event, false);
+ /* fall through */
+ case NETDEV_CHANGEMTU:
rt_cache_flush(net);
break;
+ case NETDEV_CHANGEUPPER:
+ info = ptr;
+ /* flush all routes if dev is linked to or unlinked from
+ * an L3 master device (e.g., VRF)
+ */
+ if (info->upper_dev && netif_is_l3_master(info->upper_dev))
+ fib_disable_ip(dev, NETDEV_DOWN, true);
+ break;
}
return NOTIFY_DONE;
}