summaryrefslogtreecommitdiffstats
path: root/kernel/net/ipv6
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/net/ipv6')
-rw-r--r--kernel/net/ipv6/Kconfig30
-rw-r--r--kernel/net/ipv6/Makefile2
-rw-r--r--kernel/net/ipv6/addrconf.c512
-rw-r--r--kernel/net/ipv6/addrconf_core.c11
-rw-r--r--kernel/net/ipv6/addrlabel.c2
-rw-r--r--kernel/net/ipv6/af_inet6.c34
-rw-r--r--kernel/net/ipv6/ah6.c4
-rw-r--r--kernel/net/ipv6/datagram.c19
-rw-r--r--kernel/net/ipv6/esp6.c201
-rw-r--r--kernel/net/ipv6/exthdrs.c5
-rw-r--r--kernel/net/ipv6/fib6_rules.c25
-rw-r--r--kernel/net/ipv6/icmp.c44
-rw-r--r--kernel/net/ipv6/ila.c229
-rw-r--r--kernel/net/ipv6/inet6_connection_sock.c98
-rw-r--r--kernel/net/ipv6/inet6_hashtables.c19
-rw-r--r--kernel/net/ipv6/ip6_fib.c65
-rw-r--r--kernel/net/ipv6/ip6_flowlabel.c9
-rw-r--r--kernel/net/ipv6/ip6_gre.c106
-rw-r--r--kernel/net/ipv6/ip6_input.c20
-rw-r--r--kernel/net/ipv6/ip6_offload.c12
-rw-r--r--kernel/net/ipv6/ip6_output.c268
-rw-r--r--kernel/net/ipv6/ip6_tunnel.c149
-rw-r--r--kernel/net/ipv6/ip6_udp_tunnel.c13
-rw-r--r--kernel/net/ipv6/ip6_vti.c2
-rw-r--r--kernel/net/ipv6/ip6mr.c32
-rw-r--r--kernel/net/ipv6/ipv6_sockglue.c33
-rw-r--r--kernel/net/ipv6/mcast.c11
-rw-r--r--kernel/net/ipv6/mcast_snoop.c216
-rw-r--r--kernel/net/ipv6/mip6.c16
-rw-r--r--kernel/net/ipv6/ndisc.c94
-rw-r--r--kernel/net/ipv6/netfilter.c9
-rw-r--r--kernel/net/ipv6/netfilter/Kconfig17
-rw-r--r--kernel/net/ipv6/netfilter/Makefile3
-rw-r--r--kernel/net/ipv6/netfilter/ip6_tables.c162
-rw-r--r--kernel/net/ipv6/netfilter/ip6t_REJECT.c11
-rw-r--r--kernel/net/ipv6/netfilter/ip6t_SYNPROXY.c31
-rw-r--r--kernel/net/ipv6/netfilter/ip6t_rpfilter.c6
-rw-r--r--kernel/net/ipv6/netfilter/ip6table_filter.c6
-rw-r--r--kernel/net/ipv6/netfilter/ip6table_mangle.c18
-rw-r--r--kernel/net/ipv6/netfilter/ip6table_nat.c26
-rw-r--r--kernel/net/ipv6/netfilter/ip6table_raw.c6
-rw-r--r--kernel/net/ipv6/netfilter/ip6table_security.c7
-rw-r--r--kernel/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c20
-rw-r--r--kernel/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c12
-rw-r--r--kernel/net/ipv6/netfilter/nf_conntrack_reasm.c33
-rw-r--r--kernel/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c32
-rw-r--r--kernel/net/ipv6/netfilter/nf_dup_ipv6.c82
-rw-r--r--kernel/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c48
-rw-r--r--kernel/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c2
-rw-r--r--kernel/net/ipv6/netfilter/nf_nat_proto_icmpv6.c2
-rw-r--r--kernel/net/ipv6/netfilter/nf_reject_ipv6.c6
-rw-r--r--kernel/net/ipv6/netfilter/nf_tables_ipv6.c10
-rw-r--r--kernel/net/ipv6/netfilter/nft_chain_nat_ipv6.c22
-rw-r--r--kernel/net/ipv6/netfilter/nft_chain_route_ipv6.c14
-rw-r--r--kernel/net/ipv6/netfilter/nft_dup_ipv6.c108
-rw-r--r--kernel/net/ipv6/netfilter/nft_redir_ipv6.c3
-rw-r--r--kernel/net/ipv6/netfilter/nft_reject_ipv6.c7
-rw-r--r--kernel/net/ipv6/output_core.c38
-rw-r--r--kernel/net/ipv6/raw.c31
-rw-r--r--kernel/net/ipv6/reassembly.c30
-rw-r--r--kernel/net/ipv6/route.c932
-rw-r--r--kernel/net/ipv6/sit.c28
-rw-r--r--kernel/net/ipv6/syncookies.c34
-rw-r--r--kernel/net/ipv6/sysctl_net_ipv6.c23
-rw-r--r--kernel/net/ipv6/tcp_ipv6.c291
-rw-r--r--kernel/net/ipv6/tunnel6.c12
-rw-r--r--kernel/net/ipv6/udp.c22
-rw-r--r--kernel/net/ipv6/xfrm6_input.c4
-rw-r--r--kernel/net/ipv6/xfrm6_mode_tunnel.c5
-rw-r--r--kernel/net/ipv6/xfrm6_output.c40
-rw-r--r--kernel/net/ipv6/xfrm6_policy.c92
71 files changed, 2985 insertions, 1581 deletions
diff --git a/kernel/net/ipv6/Kconfig b/kernel/net/ipv6/Kconfig
index 438a73aa7..983bb9997 100644
--- a/kernel/net/ipv6/Kconfig
+++ b/kernel/net/ipv6/Kconfig
@@ -5,16 +5,15 @@
# IPv6 as module will cause a CRASH if you try to unload it
menuconfig IPV6
tristate "The IPv6 protocol"
- default m
+ default y
---help---
- This is complemental support for the IP version 6.
- You will still be able to do traditional IPv4 networking as well.
+ Support for IP version 6 (IPv6).
For general information about IPv6, see
<https://en.wikipedia.org/wiki/IPv6>.
- For Linux IPv6 development information, see <http://www.linux-ipv6.org>.
- For specific information about IPv6 under Linux, read the HOWTO at
- <http://www.bieringer.de/linux/IPv6/>.
+ For specific information about IPv6 under Linux, see
+ Documentation/networking/ipv6.txt and read the HOWTO at
+ <http://www.tldp.org/HOWTO/Linux+IPv6-HOWTO/>
To compile this protocol support as a module, choose M here: the
module will be called ipv6.
@@ -93,6 +92,25 @@ config IPV6_MIP6
If unsure, say N.
+config IPV6_ILA
+ tristate "IPv6: Identifier Locator Addressing (ILA)"
+ select LWTUNNEL
+ ---help---
+ Support for IPv6 Identifier Locator Addressing (ILA).
+
+ ILA is a mechanism to do network virtualization without
+ encapsulation. The basic concept of ILA is that we split an
+ IPv6 address into a 64 bit locator and 64 bit identifier. The
+ identifier is the identity of an entity in communication
+ ("who") and the locator expresses the location of the
+ entity ("where").
+
+ ILA can be configured using the "encap ila" option with
+ "ip -6 route" command. ILA is described in
+ https://tools.ietf.org/html/draft-herbert-nvo3-ila-00.
+
+ If unsure, say N.
+
config INET6_XFRM_TUNNEL
tristate
select INET6_TUNNEL
diff --git a/kernel/net/ipv6/Makefile b/kernel/net/ipv6/Makefile
index 2e8c06108..2c900c7b7 100644
--- a/kernel/net/ipv6/Makefile
+++ b/kernel/net/ipv6/Makefile
@@ -34,6 +34,7 @@ obj-$(CONFIG_INET6_XFRM_MODE_TUNNEL) += xfrm6_mode_tunnel.o
obj-$(CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION) += xfrm6_mode_ro.o
obj-$(CONFIG_INET6_XFRM_MODE_BEET) += xfrm6_mode_beet.o
obj-$(CONFIG_IPV6_MIP6) += mip6.o
+obj-$(CONFIG_IPV6_ILA) += ila.o
obj-$(CONFIG_NETFILTER) += netfilter/
obj-$(CONFIG_IPV6_VTI) += ip6_vti.o
@@ -48,4 +49,5 @@ obj-$(subst m,y,$(CONFIG_IPV6)) += inet6_hashtables.o
ifneq ($(CONFIG_IPV6),)
obj-$(CONFIG_NET_UDP_TUNNEL) += ip6_udp_tunnel.o
+obj-y += mcast_snoop.o
endif
diff --git a/kernel/net/ipv6/addrconf.c b/kernel/net/ipv6/addrconf.c
index 37b70e82b..e8d3da081 100644
--- a/kernel/net/ipv6/addrconf.c
+++ b/kernel/net/ipv6/addrconf.c
@@ -81,6 +81,7 @@
#include <net/ip.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
+#include <net/l3mdev.h>
#include <linux/if_tunnel.h>
#include <linux/rtnetlink.h>
#include <linux/netconf.h>
@@ -195,6 +196,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
.max_addresses = IPV6_MAX_ADDRESSES,
.accept_ra_defrtr = 1,
.accept_ra_from_local = 0,
+ .accept_ra_min_hop_limit= 1,
.accept_ra_pinfo = 1,
#ifdef CONFIG_IPV6_ROUTER_PREF
.accept_ra_rtr_pref = 1,
@@ -211,7 +213,9 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
.accept_ra_mtu = 1,
.stable_secret = {
.initialized = false,
- }
+ },
+ .use_oif_addrs_only = 0,
+ .ignore_routes_with_linkdown = 0,
};
static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
@@ -236,6 +240,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
.max_addresses = IPV6_MAX_ADDRESSES,
.accept_ra_defrtr = 1,
.accept_ra_from_local = 0,
+ .accept_ra_min_hop_limit= 1,
.accept_ra_pinfo = 1,
#ifdef CONFIG_IPV6_ROUTER_PREF
.accept_ra_rtr_pref = 1,
@@ -253,6 +258,8 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
.stable_secret = {
.initialized = false,
},
+ .use_oif_addrs_only = 0,
+ .ignore_routes_with_linkdown = 0,
};
/* Check if a valid qdisc is available */
@@ -343,6 +350,12 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
setup_timer(&ndev->rs_timer, addrconf_rs_timer,
(unsigned long)ndev);
memcpy(&ndev->cnf, dev_net(dev)->ipv6.devconf_dflt, sizeof(ndev->cnf));
+
+ if (ndev->cnf.stable_secret.initialized)
+ ndev->addr_gen_mode = IN6_ADDR_GEN_MODE_STABLE_PRIVACY;
+ else
+ ndev->addr_gen_mode = IN6_ADDR_GEN_MODE_EUI64;
+
ndev->cnf.mtu6 = dev->mtu;
ndev->cnf.sysctl = NULL;
ndev->nd_parms = neigh_parms_alloc(dev, &nd_tbl);
@@ -411,6 +424,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
if (err) {
ipv6_mc_destroy_dev(ndev);
del_timer(&ndev->regen_timer);
+ snmp6_unregister_dev(ndev);
goto err_release;
}
/* protected by rtnl_lock */
@@ -468,6 +482,9 @@ static int inet6_netconf_msgsize_devconf(int type)
if (type == -1 || type == NETCONFA_PROXY_NEIGH)
size += nla_total_size(4);
+ if (type == -1 || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
+ size += nla_total_size(4);
+
return size;
}
@@ -504,6 +521,11 @@ static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
nla_put_s32(skb, NETCONFA_PROXY_NEIGH, devconf->proxy_ndp) < 0)
goto nla_put_failure;
+ if ((type == -1 || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
+ nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
+ devconf->ignore_routes_with_linkdown) < 0)
+ goto nla_put_failure;
+
nlmsg_end(skb, nlh);
return 0;
@@ -540,6 +562,7 @@ static const struct nla_policy devconf_ipv6_policy[NETCONFA_MAX+1] = {
[NETCONFA_IFINDEX] = { .len = sizeof(int) },
[NETCONFA_FORWARDING] = { .len = sizeof(int) },
[NETCONFA_PROXY_NEIGH] = { .len = sizeof(int) },
+ [NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN] = { .len = sizeof(int) },
};
static int inet6_netconf_get_devconf(struct sk_buff *in_skb,
@@ -560,7 +583,7 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb,
if (err < 0)
goto errout;
- err = EINVAL;
+ err = -EINVAL;
if (!tb[NETCONFA_IFINDEX])
goto errout;
@@ -762,6 +785,63 @@ static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int newf)
rt6_purge_dflt_routers(net);
return 1;
}
+
+static void addrconf_linkdown_change(struct net *net, __s32 newf)
+{
+ struct net_device *dev;
+ struct inet6_dev *idev;
+
+ for_each_netdev(net, dev) {
+ idev = __in6_dev_get(dev);
+ if (idev) {
+ int changed = (!idev->cnf.ignore_routes_with_linkdown) ^ (!newf);
+
+ idev->cnf.ignore_routes_with_linkdown = newf;
+ if (changed)
+ inet6_netconf_notify_devconf(dev_net(dev),
+ NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
+ dev->ifindex,
+ &idev->cnf);
+ }
+ }
+}
+
+static int addrconf_fixup_linkdown(struct ctl_table *table, int *p, int newf)
+{
+ struct net *net;
+ int old;
+
+ if (!rtnl_trylock())
+ return restart_syscall();
+
+ net = (struct net *)table->extra2;
+ old = *p;
+ *p = newf;
+
+ if (p == &net->ipv6.devconf_dflt->ignore_routes_with_linkdown) {
+ if ((!newf) ^ (!old))
+ inet6_netconf_notify_devconf(net,
+ NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
+ NETCONFA_IFINDEX_DEFAULT,
+ net->ipv6.devconf_dflt);
+ rtnl_unlock();
+ return 0;
+ }
+
+ if (p == &net->ipv6.devconf_all->ignore_routes_with_linkdown) {
+ net->ipv6.devconf_dflt->ignore_routes_with_linkdown = newf;
+ addrconf_linkdown_change(net, newf);
+ if ((!newf) ^ (!old))
+ inet6_netconf_notify_devconf(net,
+ NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
+ NETCONFA_IFINDEX_ALL,
+ net->ipv6.devconf_all);
+ }
+ rtnl_unlock();
+
+ return 1;
+}
+
#endif
/* Nobody refers to this ifaddr, destroy it */
@@ -1358,15 +1438,96 @@ out:
return ret;
}
+static int __ipv6_dev_get_saddr(struct net *net,
+ struct ipv6_saddr_dst *dst,
+ struct inet6_dev *idev,
+ struct ipv6_saddr_score *scores,
+ int hiscore_idx)
+{
+ struct ipv6_saddr_score *score = &scores[1 - hiscore_idx], *hiscore = &scores[hiscore_idx];
+
+ read_lock_bh(&idev->lock);
+ list_for_each_entry(score->ifa, &idev->addr_list, if_list) {
+ int i;
+
+ /*
+ * - Tentative Address (RFC2462 section 5.4)
+ * - A tentative address is not considered
+ * "assigned to an interface" in the traditional
+ * sense, unless it is also flagged as optimistic.
+ * - Candidate Source Address (section 4)
+ * - In any case, anycast addresses, multicast
+ * addresses, and the unspecified address MUST
+ * NOT be included in a candidate set.
+ */
+ if ((score->ifa->flags & IFA_F_TENTATIVE) &&
+ (!(score->ifa->flags & IFA_F_OPTIMISTIC)))
+ continue;
+
+ score->addr_type = __ipv6_addr_type(&score->ifa->addr);
+
+ if (unlikely(score->addr_type == IPV6_ADDR_ANY ||
+ score->addr_type & IPV6_ADDR_MULTICAST)) {
+ net_dbg_ratelimited("ADDRCONF: unspecified / multicast address assigned as unicast address on %s",
+ idev->dev->name);
+ continue;
+ }
+
+ score->rule = -1;
+ bitmap_zero(score->scorebits, IPV6_SADDR_RULE_MAX);
+
+ for (i = 0; i < IPV6_SADDR_RULE_MAX; i++) {
+ int minihiscore, miniscore;
+
+ minihiscore = ipv6_get_saddr_eval(net, hiscore, dst, i);
+ miniscore = ipv6_get_saddr_eval(net, score, dst, i);
+
+ if (minihiscore > miniscore) {
+ if (i == IPV6_SADDR_RULE_SCOPE &&
+ score->scopedist > 0) {
+ /*
+ * special case:
+ * each remaining entry
+ * has too small (not enough)
+ * scope, because ifa entries
+ * are sorted by their scope
+ * values.
+ */
+ goto out;
+ }
+ break;
+ } else if (minihiscore < miniscore) {
+ if (hiscore->ifa)
+ in6_ifa_put(hiscore->ifa);
+
+ in6_ifa_hold(score->ifa);
+
+ swap(hiscore, score);
+ hiscore_idx = 1 - hiscore_idx;
+
+ /* restore our iterator */
+ score->ifa = hiscore->ifa;
+
+ break;
+ }
+ }
+ }
+out:
+ read_unlock_bh(&idev->lock);
+ return hiscore_idx;
+}
+
int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev,
const struct in6_addr *daddr, unsigned int prefs,
struct in6_addr *saddr)
{
- struct ipv6_saddr_score scores[2],
- *score = &scores[0], *hiscore = &scores[1];
+ struct ipv6_saddr_score scores[2], *hiscore;
struct ipv6_saddr_dst dst;
+ struct inet6_dev *idev;
struct net_device *dev;
int dst_type;
+ bool use_oif_addr = false;
+ int hiscore_idx = 0;
dst_type = __ipv6_addr_type(daddr);
dst.addr = daddr;
@@ -1375,105 +1536,50 @@ int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev,
dst.label = ipv6_addr_label(net, daddr, dst_type, dst.ifindex);
dst.prefs = prefs;
- hiscore->rule = -1;
- hiscore->ifa = NULL;
+ scores[hiscore_idx].rule = -1;
+ scores[hiscore_idx].ifa = NULL;
rcu_read_lock();
- for_each_netdev_rcu(net, dev) {
- struct inet6_dev *idev;
-
- /* Candidate Source Address (section 4)
- * - multicast and link-local destination address,
- * the set of candidate source address MUST only
- * include addresses assigned to interfaces
- * belonging to the same link as the outgoing
- * interface.
- * (- For site-local destination addresses, the
- * set of candidate source addresses MUST only
- * include addresses assigned to interfaces
- * belonging to the same site as the outgoing
- * interface.)
- */
- if (((dst_type & IPV6_ADDR_MULTICAST) ||
- dst.scope <= IPV6_ADDR_SCOPE_LINKLOCAL) &&
- dst.ifindex && dev->ifindex != dst.ifindex)
- continue;
-
- idev = __in6_dev_get(dev);
- if (!idev)
- continue;
-
- read_lock_bh(&idev->lock);
- list_for_each_entry(score->ifa, &idev->addr_list, if_list) {
- int i;
-
- /*
- * - Tentative Address (RFC2462 section 5.4)
- * - A tentative address is not considered
- * "assigned to an interface" in the traditional
- * sense, unless it is also flagged as optimistic.
- * - Candidate Source Address (section 4)
- * - In any case, anycast addresses, multicast
- * addresses, and the unspecified address MUST
- * NOT be included in a candidate set.
- */
- if ((score->ifa->flags & IFA_F_TENTATIVE) &&
- (!(score->ifa->flags & IFA_F_OPTIMISTIC)))
- continue;
-
- score->addr_type = __ipv6_addr_type(&score->ifa->addr);
+ /* Candidate Source Address (section 4)
+ * - multicast and link-local destination address,
+ * the set of candidate source address MUST only
+ * include addresses assigned to interfaces
+ * belonging to the same link as the outgoing
+ * interface.
+ * (- For site-local destination addresses, the
+ * set of candidate source addresses MUST only
+ * include addresses assigned to interfaces
+ * belonging to the same site as the outgoing
+ * interface.)
+ * - "It is RECOMMENDED that the candidate source addresses
+ * be the set of unicast addresses assigned to the
+ * interface that will be used to send to the destination
+ * (the 'outgoing' interface)." (RFC 6724)
+ */
+ if (dst_dev) {
+ idev = __in6_dev_get(dst_dev);
+ if ((dst_type & IPV6_ADDR_MULTICAST) ||
+ dst.scope <= IPV6_ADDR_SCOPE_LINKLOCAL ||
+ (idev && idev->cnf.use_oif_addrs_only)) {
+ use_oif_addr = true;
+ }
+ }
- if (unlikely(score->addr_type == IPV6_ADDR_ANY ||
- score->addr_type & IPV6_ADDR_MULTICAST)) {
- net_dbg_ratelimited("ADDRCONF: unspecified / multicast address assigned as unicast address on %s",
- dev->name);
+ if (use_oif_addr) {
+ if (idev)
+ hiscore_idx = __ipv6_dev_get_saddr(net, &dst, idev, scores, hiscore_idx);
+ } else {
+ for_each_netdev_rcu(net, dev) {
+ idev = __in6_dev_get(dev);
+ if (!idev)
continue;
- }
-
- score->rule = -1;
- bitmap_zero(score->scorebits, IPV6_SADDR_RULE_MAX);
-
- for (i = 0; i < IPV6_SADDR_RULE_MAX; i++) {
- int minihiscore, miniscore;
-
- minihiscore = ipv6_get_saddr_eval(net, hiscore, &dst, i);
- miniscore = ipv6_get_saddr_eval(net, score, &dst, i);
-
- if (minihiscore > miniscore) {
- if (i == IPV6_SADDR_RULE_SCOPE &&
- score->scopedist > 0) {
- /*
- * special case:
- * each remaining entry
- * has too small (not enough)
- * scope, because ifa entries
- * are sorted by their scope
- * values.
- */
- goto try_nextdev;
- }
- break;
- } else if (minihiscore < miniscore) {
- if (hiscore->ifa)
- in6_ifa_put(hiscore->ifa);
-
- in6_ifa_hold(score->ifa);
-
- swap(hiscore, score);
-
- /* restore our iterator */
- score->ifa = hiscore->ifa;
-
- break;
- }
- }
+ hiscore_idx = __ipv6_dev_get_saddr(net, &dst, idev, scores, hiscore_idx);
}
-try_nextdev:
- read_unlock_bh(&idev->lock);
}
rcu_read_unlock();
+ hiscore = &scores[hiscore_idx];
if (!hiscore->ifa)
return -EADDRNOTAVAIL;
@@ -1845,37 +1951,6 @@ static void addrconf_leave_anycast(struct inet6_ifaddr *ifp)
__ipv6_dev_ac_dec(ifp->idev, &addr);
}
-static int addrconf_ifid_eui48(u8 *eui, struct net_device *dev)
-{
- if (dev->addr_len != ETH_ALEN)
- return -1;
- memcpy(eui, dev->dev_addr, 3);
- memcpy(eui + 5, dev->dev_addr + 3, 3);
-
- /*
- * The zSeries OSA network cards can be shared among various
- * OS instances, but the OSA cards have only one MAC address.
- * This leads to duplicate address conflicts in conjunction
- * with IPv6 if more than one instance uses the same card.
- *
- * The driver for these cards can deliver a unique 16-bit
- * identifier for each instance sharing the same card. It is
- * placed instead of 0xFFFE in the interface identifier. The
- * "u" bit of the interface identifier is not inverted in this
- * case. Hence the resulting interface identifier has local
- * scope according to RFC2373.
- */
- if (dev->dev_id) {
- eui[3] = (dev->dev_id >> 8) & 0xFF;
- eui[4] = dev->dev_id & 0xFF;
- } else {
- eui[3] = 0xFF;
- eui[4] = 0xFE;
- eui[0] ^= 2;
- }
- return 0;
-}
-
static int addrconf_ifid_eui64(u8 *eui, struct net_device *dev)
{
if (dev->addr_len != IEEE802154_ADDR_LEN)
@@ -2079,7 +2154,7 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev,
unsigned long expires, u32 flags)
{
struct fib6_config cfg = {
- .fc_table = RT6_TABLE_PREFIX,
+ .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_PREFIX,
.fc_metric = IP6_RT_PRIO_ADDRCONF,
.fc_ifindex = dev->ifindex,
.fc_expires = expires,
@@ -2112,8 +2187,9 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
struct fib6_node *fn;
struct rt6_info *rt = NULL;
struct fib6_table *table;
+ u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_PREFIX;
- table = fib6_get_table(dev_net(dev), RT6_TABLE_PREFIX);
+ table = fib6_get_table(dev_net(dev), tb_id);
if (!table)
return NULL;
@@ -2121,6 +2197,8 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
fn = fib6_locate(&table->tb6_root, pfx, plen, NULL, 0);
if (!fn)
goto out;
+
+ noflags |= RTF_CACHE;
for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
if (rt->dst.dev->ifindex != dev->ifindex)
continue;
@@ -2142,7 +2220,7 @@ out:
static void addrconf_add_mroute(struct net_device *dev)
{
struct fib6_config cfg = {
- .fc_table = RT6_TABLE_LOCAL,
+ .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_LOCAL,
.fc_metric = IP6_RT_PRIO_ADDRCONF,
.fc_ifindex = dev->ifindex,
.fc_dst_len = 8,
@@ -2383,7 +2461,7 @@ ok:
#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
if (in6_dev->cnf.optimistic_dad &&
!net->ipv6.devconf_all->forwarding && sllao)
- addr_flags = IFA_F_OPTIMISTIC;
+ addr_flags |= IFA_F_OPTIMISTIC;
#endif
/* Do not allow to create too much of autoconfigured
@@ -2960,6 +3038,10 @@ static void addrconf_addr_gen(struct inet6_dev *idev, bool prefix_route)
{
struct in6_addr addr;
+ /* no link local addresses on L3 master devices */
+ if (netif_is_l3_master(idev->dev))
+ return;
+
ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0);
if (idev->addr_gen_mode == IN6_ADDR_GEN_MODE_STABLE_PRIVACY) {
@@ -3050,6 +3132,8 @@ static void addrconf_gre_config(struct net_device *dev)
}
addrconf_addr_gen(idev, true);
+ if (dev->flags & IFF_POINTOPOINT)
+ addrconf_add_mroute(dev);
}
#endif
@@ -3070,6 +3154,32 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
}
break;
+ case NETDEV_CHANGEMTU:
+ /* if MTU under IPV6_MIN_MTU stop IPv6 on this interface. */
+ if (dev->mtu < IPV6_MIN_MTU) {
+ addrconf_ifdown(dev, 1);
+ break;
+ }
+
+ if (idev) {
+ rt6_mtu_change(dev, dev->mtu);
+ idev->cnf.mtu6 = dev->mtu;
+ break;
+ }
+
+ /* allocate new idev */
+ idev = ipv6_add_dev(dev);
+ if (IS_ERR(idev))
+ break;
+
+ /* device is still not ready */
+ if (!(idev->if_flags & IF_READY))
+ break;
+
+ run_pending = 1;
+
+ /* fall through */
+
case NETDEV_UP:
case NETDEV_CHANGE:
if (dev->flags & IFF_SLAVE)
@@ -3093,7 +3203,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
idev->if_flags |= IF_READY;
run_pending = 1;
}
- } else {
+ } else if (event == NETDEV_CHANGE) {
if (!addrconf_qdisc_ok(dev)) {
/* device is still not ready. */
break;
@@ -3158,24 +3268,6 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
}
break;
- case NETDEV_CHANGEMTU:
- if (idev && dev->mtu >= IPV6_MIN_MTU) {
- rt6_mtu_change(dev, dev->mtu);
- idev->cnf.mtu6 = dev->mtu;
- break;
- }
-
- if (!idev && dev->mtu >= IPV6_MIN_MTU) {
- idev = ipv6_add_dev(dev);
- if (!IS_ERR(idev))
- break;
- }
-
- /*
- * if MTU under IPV6_MIN_MTU.
- * Stop IPv6 on this interface.
- */
-
case NETDEV_DOWN:
case NETDEV_UNREGISTER:
/*
@@ -3414,6 +3506,7 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp)
{
struct inet6_dev *idev = ifp->idev;
struct net_device *dev = idev->dev;
+ bool notify = false;
addrconf_join_solict(dev, &ifp->addr);
@@ -3459,7 +3552,7 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp)
/* Because optimistic nodes can use this address,
* notify listeners. If DAD fails, RTM_DELADDR is sent.
*/
- ipv6_ifa_notify(RTM_NEWADDR, ifp);
+ notify = true;
}
}
@@ -3467,6 +3560,8 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp)
out:
spin_unlock(&ifp->lock);
read_unlock_bh(&idev->lock);
+ if (notify)
+ ipv6_ifa_notify(RTM_NEWADDR, ifp);
}
static void addrconf_dad_start(struct inet6_ifaddr *ifp)
@@ -3556,7 +3651,7 @@ static void addrconf_dad_work(struct work_struct *w)
/* send a neighbour solicitation for our addr */
addrconf_addr_solict_mult(&ifp->addr, &mcaddr);
- ndisc_send_ns(ifp->idev->dev, NULL, &ifp->addr, &mcaddr, &in6addr_any);
+ ndisc_send_ns(ifp->idev->dev, &ifp->addr, &mcaddr, &in6addr_any);
out:
in6_ifa_put(ifp);
rtnl_unlock();
@@ -4558,6 +4653,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
array[DEVCONF_MAX_DESYNC_FACTOR] = cnf->max_desync_factor;
array[DEVCONF_MAX_ADDRESSES] = cnf->max_addresses;
array[DEVCONF_ACCEPT_RA_DEFRTR] = cnf->accept_ra_defrtr;
+ array[DEVCONF_ACCEPT_RA_MIN_HOP_LIMIT] = cnf->accept_ra_min_hop_limit;
array[DEVCONF_ACCEPT_RA_PINFO] = cnf->accept_ra_pinfo;
#ifdef CONFIG_IPV6_ROUTER_PREF
array[DEVCONF_ACCEPT_RA_RTR_PREF] = cnf->accept_ra_rtr_pref;
@@ -4583,7 +4679,9 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
array[DEVCONF_SUPPRESS_FRAG_NDISC] = cnf->suppress_frag_ndisc;
array[DEVCONF_ACCEPT_RA_FROM_LOCAL] = cnf->accept_ra_from_local;
array[DEVCONF_ACCEPT_RA_MTU] = cnf->accept_ra_mtu;
+ array[DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN] = cnf->ignore_routes_with_linkdown;
/* we omit DEVCONF_STABLE_SECRET for now */
+ array[DEVCONF_USE_OIF_ADDRS_ONLY] = cnf->use_oif_addrs_only;
}
static inline size_t inet6_ifla6_size(void)
@@ -4603,6 +4701,7 @@ static inline size_t inet6_if_nlmsg_size(void)
+ nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */
+ nla_total_size(4) /* IFLA_MTU */
+ nla_total_size(4) /* IFLA_LINK */
+ + nla_total_size(1) /* IFLA_OPERSTATE */
+ nla_total_size(inet6_ifla6_size()); /* IFLA_PROTINFO */
}
@@ -4622,18 +4721,24 @@ static inline void __snmp6_fill_statsdev(u64 *stats, atomic_long_t *mib,
}
static inline void __snmp6_fill_stats64(u64 *stats, void __percpu *mib,
- int items, int bytes, size_t syncpoff)
+ int bytes, size_t syncpoff)
{
- int i;
- int pad = bytes - sizeof(u64) * items;
+ int i, c;
+ u64 buff[IPSTATS_MIB_MAX];
+ int pad = bytes - sizeof(u64) * IPSTATS_MIB_MAX;
+
BUG_ON(pad < 0);
- /* Use put_unaligned() because stats may not be aligned for u64. */
- put_unaligned(items, &stats[0]);
- for (i = 1; i < items; i++)
- put_unaligned(snmp_fold_field64(mib, i, syncpoff), &stats[i]);
+ memset(buff, 0, sizeof(buff));
+ buff[0] = IPSTATS_MIB_MAX;
- memset(&stats[items], 0, pad);
+ for_each_possible_cpu(c) {
+ for (i = 1; i < IPSTATS_MIB_MAX; i++)
+ buff[i] += snmp_get_cpu_field64(mib, c, i, syncpoff);
+ }
+
+ memcpy(stats, buff, IPSTATS_MIB_MAX * sizeof(u64));
+ memset(&stats[IPSTATS_MIB_MAX], 0, pad);
}
static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype,
@@ -4641,8 +4746,8 @@ static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype,
{
switch (attrtype) {
case IFLA_INET6_STATS:
- __snmp6_fill_stats64(stats, idev->stats.ipv6,
- IPSTATS_MIB_MAX, bytes, offsetof(struct ipstats_mib, syncp));
+ __snmp6_fill_stats64(stats, idev->stats.ipv6, bytes,
+ offsetof(struct ipstats_mib, syncp));
break;
case IFLA_INET6_ICMP6STATS:
__snmp6_fill_statsdev(stats, idev->stats.icmpv6dev->mibs, ICMP6_MIB_MAX, bytes);
@@ -4650,7 +4755,8 @@ static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype,
}
}
-static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev)
+static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev,
+ u32 ext_filter_mask)
{
struct nlattr *nla;
struct ifla_cacheinfo ci;
@@ -4670,6 +4776,9 @@ static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev)
/* XXX - MC not implemented */
+ if (ext_filter_mask & RTEXT_FILTER_SKIP_STATS)
+ return 0;
+
nla = nla_reserve(skb, IFLA_INET6_STATS, IPSTATS_MIB_MAX * sizeof(u64));
if (!nla)
goto nla_put_failure;
@@ -4697,7 +4806,8 @@ nla_put_failure:
return -EMSGSIZE;
}
-static size_t inet6_get_link_af_size(const struct net_device *dev)
+static size_t inet6_get_link_af_size(const struct net_device *dev,
+ u32 ext_filter_mask)
{
if (!__in6_dev_get(dev))
return 0;
@@ -4705,14 +4815,15 @@ static size_t inet6_get_link_af_size(const struct net_device *dev)
return inet6_ifla6_size();
}
-static int inet6_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
+static int inet6_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
+ u32 ext_filter_mask)
{
struct inet6_dev *idev = __in6_dev_get(dev);
if (!idev)
return -ENODATA;
- if (inet6_fill_ifla6_attrs(skb, idev) < 0)
+ if (inet6_fill_ifla6_attrs(skb, idev, ext_filter_mask) < 0)
return -EMSGSIZE;
return 0;
@@ -4859,13 +4970,15 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr)) ||
nla_put_u32(skb, IFLA_MTU, dev->mtu) ||
(dev->ifindex != dev_get_iflink(dev) &&
- nla_put_u32(skb, IFLA_LINK, dev_get_iflink(dev))))
+ nla_put_u32(skb, IFLA_LINK, dev_get_iflink(dev))) ||
+ nla_put_u8(skb, IFLA_OPERSTATE,
+ netif_running(dev) ? dev->operstate : IF_OPER_DOWN))
goto nla_put_failure;
protoinfo = nla_nest_start(skb, IFLA_PROTINFO);
if (!protoinfo)
goto nla_put_failure;
- if (inet6_fill_ifla6_attrs(skb, idev) < 0)
+ if (inet6_fill_ifla6_attrs(skb, idev, 0) < 0)
goto nla_put_failure;
nla_nest_end(skb, protoinfo);
@@ -5046,13 +5159,12 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
rt = addrconf_get_prefix_route(&ifp->peer_addr, 128,
ifp->idev->dev, 0, 0);
- if (rt && ip6_del_rt(rt))
- dst_free(&rt->dst);
+ if (rt)
+ ip6_del_rt(rt);
}
dst_hold(&ifp->rt->dst);
- if (ip6_del_rt(ifp->rt))
- dst_free(&ifp->rt->dst);
+ ip6_del_rt(ifp->rt);
rt_genid_bump_ipv6(net);
break;
@@ -5260,13 +5372,10 @@ static int addrconf_sysctl_stable_secret(struct ctl_table *ctl, int write,
goto out;
}
- if (!write) {
- err = snprintf(str, sizeof(str), "%pI6",
- &secret->secret);
- if (err >= sizeof(str)) {
- err = -EIO;
- goto out;
- }
+ err = snprintf(str, sizeof(str), "%pI6", &secret->secret);
+ if (err >= sizeof(str)) {
+ err = -EIO;
+ goto out;
}
err = proc_dostring(&lctl, write, buffer, lenp, ppos);
@@ -5304,6 +5413,34 @@ out:
return err;
}
+static
+int addrconf_sysctl_ignore_routes_with_linkdown(struct ctl_table *ctl,
+ int write,
+ void __user *buffer,
+ size_t *lenp,
+ loff_t *ppos)
+{
+ int *valp = ctl->data;
+ int val = *valp;
+ loff_t pos = *ppos;
+ struct ctl_table lctl;
+ int ret;
+
+ /* ctl->data points to idev->cnf.ignore_routes_when_linkdown
+ * we should not modify it until we get the rtnl lock.
+ */
+ lctl = *ctl;
+ lctl.data = &val;
+
+ ret = proc_dointvec(&lctl, write, buffer, lenp, ppos);
+
+ if (write)
+ ret = addrconf_fixup_linkdown(ctl, valp, val);
+ if (ret)
+ *ppos = pos;
+ return ret;
+}
+
static struct addrconf_sysctl_table
{
struct ctl_table_header *sysctl_header;
@@ -5454,6 +5591,13 @@ static struct addrconf_sysctl_table
.proc_handler = proc_dointvec,
},
{
+ .procname = "accept_ra_min_hop_limit",
+ .data = &ipv6_devconf.accept_ra_min_hop_limit,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
.procname = "accept_ra_pinfo",
.data = &ipv6_devconf.accept_ra_pinfo,
.maxlen = sizeof(int),
@@ -5583,6 +5727,20 @@ static struct addrconf_sysctl_table
.proc_handler = addrconf_sysctl_stable_secret,
},
{
+ .procname = "use_oif_addrs_only",
+ .data = &ipv6_devconf.use_oif_addrs_only,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "ignore_routes_with_linkdown",
+ .data = &ipv6_devconf.ignore_routes_with_linkdown,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = addrconf_sysctl_ignore_routes_with_linkdown,
+ },
+ {
/* sentinel */
}
},
diff --git a/kernel/net/ipv6/addrconf_core.c b/kernel/net/ipv6/addrconf_core.c
index ca09bf49a..bfa941fc1 100644
--- a/kernel/net/ipv6/addrconf_core.c
+++ b/kernel/net/ipv6/addrconf_core.c
@@ -107,7 +107,16 @@ int inet6addr_notifier_call_chain(unsigned long val, void *v)
}
EXPORT_SYMBOL(inet6addr_notifier_call_chain);
-const struct ipv6_stub *ipv6_stub __read_mostly;
+static int eafnosupport_ipv6_dst_lookup(struct net *net, struct sock *u1,
+ struct dst_entry **u2,
+ struct flowi6 *u3)
+{
+ return -EAFNOSUPPORT;
+}
+
+const struct ipv6_stub *ipv6_stub __read_mostly = &(struct ipv6_stub) {
+ .ipv6_dst_lookup = eafnosupport_ipv6_dst_lookup,
+};
EXPORT_SYMBOL_GPL(ipv6_stub);
/* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */
diff --git a/kernel/net/ipv6/addrlabel.c b/kernel/net/ipv6/addrlabel.c
index 882124ebb..a8f6986dc 100644
--- a/kernel/net/ipv6/addrlabel.c
+++ b/kernel/net/ipv6/addrlabel.c
@@ -552,7 +552,7 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh)
rcu_read_lock();
p = __ipv6_addr_label(net, addr, ipv6_addr_type(addr), ifal->ifal_index);
- if (p && ip6addrlbl_hold(p))
+ if (p && !ip6addrlbl_hold(p))
p = NULL;
lseq = ip6addrlbl_table.seq;
rcu_read_unlock();
diff --git a/kernel/net/ipv6/af_inet6.c b/kernel/net/ipv6/af_inet6.c
index eef63b394..9f5137cd6 100644
--- a/kernel/net/ipv6/af_inet6.c
+++ b/kernel/net/ipv6/af_inet6.c
@@ -109,6 +109,9 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol,
int try_loading_module = 0;
int err;
+ if (protocol < 0 || protocol >= IPPROTO_MAX)
+ return -EINVAL;
+
/* Look for the requested type/protocol pair. */
lookup_protocol:
err = -ESOCKTNOSUPPORT;
@@ -167,7 +170,7 @@ lookup_protocol:
WARN_ON(!answer_prot->slab);
err = -ENOBUFS;
- sk = sk_alloc(net, PF_INET6, GFP_KERNEL, answer_prot);
+ sk = sk_alloc(net, PF_INET6, GFP_KERNEL, answer_prot, kern);
if (!sk)
goto out;
@@ -197,6 +200,7 @@ lookup_protocol:
np->mcast_hops = IPV6_DEFAULT_MCASTHOPS;
np->mc_loop = 1;
np->pmtudisc = IPV6_PMTUDISC_WANT;
+ np->autoflowlabel = ip6_default_np_autolabel(sock_net(sk));
sk->sk_ipv6only = net->ipv6.sysctl.bindv6only;
/* Init the ipv4 part of the socket since we can have sockets
@@ -342,7 +346,8 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
*/
v4addr = LOOPBACK4_IPV6;
if (!(addr_type & IPV6_ADDR_MULTICAST)) {
- if (!(inet->freebind || inet->transparent) &&
+ if (!net->ipv6.sysctl.ip_nonlocal_bind &&
+ !(inet->freebind || inet->transparent) &&
!ipv6_chk_addr(net, &addr->sin6_addr,
dev, 0)) {
err = -EADDRNOTAVAIL;
@@ -362,7 +367,8 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
np->saddr = addr->sin6_addr;
/* Make sure we are allowed to bind here. */
- if (sk->sk_prot->get_port(sk, snum)) {
+ if ((snum || !inet->bind_address_no_port) &&
+ sk->sk_prot->get_port(sk, snum)) {
inet_reset_saddr(sk);
err = -EADDRINUSE;
goto out;
@@ -425,9 +431,11 @@ void inet6_destroy_sock(struct sock *sk)
/* Free tx options */
- opt = xchg(&np->opt, NULL);
- if (opt)
- sock_kfree_s(sk, opt, opt->tot_len);
+ opt = xchg((__force struct ipv6_txoptions **)&np->opt, NULL);
+ if (opt) {
+ atomic_sub(opt->tot_len, &sk->sk_omem_alloc);
+ txopt_put(opt);
+ }
}
EXPORT_SYMBOL_GPL(inet6_destroy_sock);
@@ -656,7 +664,10 @@ int inet6_sk_rebuild_header(struct sock *sk)
fl6.fl6_sport = inet->inet_sport;
security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
- final_p = fl6_update_dst(&fl6, np->opt, &final);
+ rcu_read_lock();
+ final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt),
+ &final);
+ rcu_read_unlock();
dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
if (IS_ERR(dst)) {
@@ -665,7 +676,7 @@ int inet6_sk_rebuild_header(struct sock *sk)
return PTR_ERR(dst);
}
- __ip6_dst_store(sk, dst, NULL, NULL);
+ ip6_dst_store(sk, dst, NULL, NULL);
}
return 0;
@@ -678,8 +689,8 @@ bool ipv6_opt_accepted(const struct sock *sk, const struct sk_buff *skb,
const struct ipv6_pinfo *np = inet6_sk(sk);
if (np->rxopt.all) {
- if ((opt->hop && (np->rxopt.bits.hopopts ||
- np->rxopt.bits.ohopopts)) ||
+ if (((opt->flags & IP6SKB_HOPBYHOP) &&
+ (np->rxopt.bits.hopopts || np->rxopt.bits.ohopopts)) ||
(ip6_flowinfo((struct ipv6hdr *) skb_network_header(skb)) &&
np->rxopt.bits.rxflow) ||
(opt->srcrt && (np->rxopt.bits.srcrt ||
@@ -765,9 +776,10 @@ static int __net_init inet6_net_init(struct net *net)
net->ipv6.sysctl.bindv6only = 0;
net->ipv6.sysctl.icmpv6_time = 1*HZ;
net->ipv6.sysctl.flowlabel_consistency = 1;
- net->ipv6.sysctl.auto_flowlabels = 0;
+ net->ipv6.sysctl.auto_flowlabels = IP6_DEFAULT_AUTO_FLOW_LABELS;
net->ipv6.sysctl.idgen_retries = 3;
net->ipv6.sysctl.idgen_delay = 1 * HZ;
+ net->ipv6.sysctl.flowlabel_state_ranges = 0;
atomic_set(&net->ipv6.fib6_sernum, 1);
err = ipv6_init_mibs(net);
diff --git a/kernel/net/ipv6/ah6.c b/kernel/net/ipv6/ah6.c
index ed7d4e3f9..0630a4d5d 100644
--- a/kernel/net/ipv6/ah6.c
+++ b/kernel/net/ipv6/ah6.c
@@ -577,8 +577,10 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
work_iph = ah_alloc_tmp(ahash, nfrags + sglists, hdr_len +
ahp->icv_trunc_len + seqhi_len);
- if (!work_iph)
+ if (!work_iph) {
+ err = -ENOMEM;
goto out;
+ }
auth_data = ah_tmp_auth((u8 *)work_iph, hdr_len);
seqhi = (__be32 *)(auth_data + ahp->icv_trunc_len);
diff --git a/kernel/net/ipv6/datagram.c b/kernel/net/ipv6/datagram.c
index b10a88986..428162155 100644
--- a/kernel/net/ipv6/datagram.c
+++ b/kernel/net/ipv6/datagram.c
@@ -162,13 +162,18 @@ ipv4_connected:
fl6.fl6_dport = inet->inet_dport;
fl6.fl6_sport = inet->inet_sport;
+ if (!fl6.flowi6_oif)
+ fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex;
+
if (!fl6.flowi6_oif && (addr_type&IPV6_ADDR_MULTICAST))
fl6.flowi6_oif = np->mcast_oif;
security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
- opt = flowlabel ? flowlabel->opt : np->opt;
+ rcu_read_lock();
+ opt = flowlabel ? flowlabel->opt : rcu_dereference(np->opt);
final_p = fl6_update_dst(&fl6, opt, &final);
+ rcu_read_unlock();
dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
err = 0;
@@ -199,7 +204,7 @@ ipv4_connected:
NULL);
sk->sk_state = TCP_ESTABLISHED;
- ip6_set_txhash(sk);
+ sk_set_txhash(sk);
out:
fl6_sock_release(flowlabel);
return err;
@@ -263,7 +268,7 @@ void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
void ipv6_local_error(struct sock *sk, int err, struct flowi6 *fl6, u32 info)
{
- struct ipv6_pinfo *np = inet6_sk(sk);
+ const struct ipv6_pinfo *np = inet6_sk(sk);
struct sock_exterr_skb *serr;
struct ipv6hdr *iph;
struct sk_buff *skb;
@@ -568,8 +573,8 @@ void ip6_datagram_recv_specific_ctl(struct sock *sk, struct msghdr *msg,
}
/* HbH is allowed only once */
- if (np->rxopt.bits.hopopts && opt->hop) {
- u8 *ptr = nh + opt->hop;
+ if (np->rxopt.bits.hopopts && (opt->flags & IP6SKB_HOPBYHOP)) {
+ u8 *ptr = nh + sizeof(struct ipv6hdr);
put_cmsg(msg, SOL_IPV6, IPV6_HOPOPTS, (ptr[1]+1)<<3, ptr);
}
@@ -630,8 +635,8 @@ void ip6_datagram_recv_specific_ctl(struct sock *sk, struct msghdr *msg,
int hlim = ipv6_hdr(skb)->hop_limit;
put_cmsg(msg, SOL_IPV6, IPV6_2292HOPLIMIT, sizeof(hlim), &hlim);
}
- if (np->rxopt.bits.ohopopts && opt->hop) {
- u8 *ptr = nh + opt->hop;
+ if (np->rxopt.bits.ohopopts && (opt->flags & IP6SKB_HOPBYHOP)) {
+ u8 *ptr = nh + sizeof(struct ipv6hdr);
put_cmsg(msg, SOL_IPV6, IPV6_2292HOPOPTS, (ptr[1]+1)<<3, ptr);
}
if (np->rxopt.bits.odstopts && opt->dst0) {
diff --git a/kernel/net/ipv6/esp6.c b/kernel/net/ipv6/esp6.c
index 7c07ce36a..060a60b2f 100644
--- a/kernel/net/ipv6/esp6.c
+++ b/kernel/net/ipv6/esp6.c
@@ -76,7 +76,7 @@ static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags, int seqihlen)
len = ALIGN(len, crypto_tfm_ctx_alignment());
}
- len += sizeof(struct aead_givcrypt_request) + crypto_aead_reqsize(aead);
+ len += sizeof(struct aead_request) + crypto_aead_reqsize(aead);
len = ALIGN(len, __alignof__(struct scatterlist));
len += sizeof(struct scatterlist) * nfrags;
@@ -96,17 +96,6 @@ static inline u8 *esp_tmp_iv(struct crypto_aead *aead, void *tmp, int seqhilen)
crypto_aead_alignmask(aead) + 1) : tmp + seqhilen;
}
-static inline struct aead_givcrypt_request *esp_tmp_givreq(
- struct crypto_aead *aead, u8 *iv)
-{
- struct aead_givcrypt_request *req;
-
- req = (void *)PTR_ALIGN(iv + crypto_aead_ivsize(aead),
- crypto_tfm_ctx_alignment());
- aead_givcrypt_set_tfm(req, aead);
- return req;
-}
-
static inline struct aead_request *esp_tmp_req(struct crypto_aead *aead, u8 *iv)
{
struct aead_request *req;
@@ -125,14 +114,6 @@ static inline struct scatterlist *esp_req_sg(struct crypto_aead *aead,
__alignof__(struct scatterlist));
}
-static inline struct scatterlist *esp_givreq_sg(
- struct crypto_aead *aead, struct aead_givcrypt_request *req)
-{
- return (void *)ALIGN((unsigned long)(req + 1) +
- crypto_aead_reqsize(aead),
- __alignof__(struct scatterlist));
-}
-
static void esp_output_done(struct crypto_async_request *base, int err)
{
struct sk_buff *skb = base->data;
@@ -141,32 +122,57 @@ static void esp_output_done(struct crypto_async_request *base, int err)
xfrm_output_resume(skb, err);
}
+/* Move ESP header back into place. */
+static void esp_restore_header(struct sk_buff *skb, unsigned int offset)
+{
+ struct ip_esp_hdr *esph = (void *)(skb->data + offset);
+ void *tmp = ESP_SKB_CB(skb)->tmp;
+ __be32 *seqhi = esp_tmp_seqhi(tmp);
+
+ esph->seq_no = esph->spi;
+ esph->spi = *seqhi;
+}
+
+static void esp_output_restore_header(struct sk_buff *skb)
+{
+ esp_restore_header(skb, skb_transport_offset(skb) - sizeof(__be32));
+}
+
+static void esp_output_done_esn(struct crypto_async_request *base, int err)
+{
+ struct sk_buff *skb = base->data;
+
+ esp_output_restore_header(skb);
+ esp_output_done(base, err);
+}
+
static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
{
int err;
struct ip_esp_hdr *esph;
struct crypto_aead *aead;
- struct aead_givcrypt_request *req;
+ struct aead_request *req;
struct scatterlist *sg;
- struct scatterlist *asg;
struct sk_buff *trailer;
void *tmp;
int blksize;
int clen;
int alen;
int plen;
+ int ivlen;
int tfclen;
int nfrags;
int assoclen;
- int sglists;
int seqhilen;
u8 *iv;
u8 *tail;
__be32 *seqhi;
+ __be64 seqno;
/* skb is pure payload to encrypt */
aead = x->data;
alen = crypto_aead_authsize(aead);
+ ivlen = crypto_aead_ivsize(aead);
tfclen = 0;
if (x->tfcpad) {
@@ -187,16 +193,14 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
nfrags = err;
assoclen = sizeof(*esph);
- sglists = 1;
seqhilen = 0;
if (x->props.flags & XFRM_STATE_ESN) {
- sglists += 2;
seqhilen += sizeof(__be32);
assoclen += seqhilen;
}
- tmp = esp_alloc_tmp(aead, nfrags + sglists, seqhilen);
+ tmp = esp_alloc_tmp(aead, nfrags, seqhilen);
if (!tmp) {
err = -ENOMEM;
goto error;
@@ -204,9 +208,8 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
seqhi = esp_tmp_seqhi(tmp);
iv = esp_tmp_iv(aead, tmp, seqhilen);
- req = esp_tmp_givreq(aead, iv);
- asg = esp_givreq_sg(aead, req);
- sg = asg + sglists;
+ req = esp_tmp_req(aead, iv);
+ sg = esp_req_sg(aead, req);
/* Fill padding... */
tail = skb_tail_pointer(trailer);
@@ -227,37 +230,53 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
esph = ip_esp_hdr(skb);
*skb_mac_header(skb) = IPPROTO_ESP;
- esph->spi = x->id.spi;
esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low);
+ aead_request_set_callback(req, 0, esp_output_done, skb);
+
+ /* For ESN we move the header forward by 4 bytes to
+ * accomodate the high bits. We will move it back after
+ * encryption.
+ */
+ if ((x->props.flags & XFRM_STATE_ESN)) {
+ esph = (void *)(skb_transport_header(skb) - sizeof(__be32));
+ *seqhi = esph->spi;
+ esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.hi);
+ aead_request_set_callback(req, 0, esp_output_done_esn, skb);
+ }
+
+ esph->spi = x->id.spi;
+
sg_init_table(sg, nfrags);
skb_to_sgvec(skb, sg,
- esph->enc_data + crypto_aead_ivsize(aead) - skb->data,
- clen + alen);
+ (unsigned char *)esph - skb->data,
+ assoclen + ivlen + clen + alen);
- if ((x->props.flags & XFRM_STATE_ESN)) {
- sg_init_table(asg, 3);
- sg_set_buf(asg, &esph->spi, sizeof(__be32));
- *seqhi = htonl(XFRM_SKB_CB(skb)->seq.output.hi);
- sg_set_buf(asg + 1, seqhi, seqhilen);
- sg_set_buf(asg + 2, &esph->seq_no, sizeof(__be32));
- } else
- sg_init_one(asg, esph, sizeof(*esph));
-
- aead_givcrypt_set_callback(req, 0, esp_output_done, skb);
- aead_givcrypt_set_crypt(req, sg, sg, clen, iv);
- aead_givcrypt_set_assoc(req, asg, assoclen);
- aead_givcrypt_set_giv(req, esph->enc_data,
- XFRM_SKB_CB(skb)->seq.output.low +
- ((u64)XFRM_SKB_CB(skb)->seq.output.hi << 32));
+ aead_request_set_crypt(req, sg, sg, ivlen + clen, iv);
+ aead_request_set_ad(req, assoclen);
+
+ seqno = cpu_to_be64(XFRM_SKB_CB(skb)->seq.output.low +
+ ((u64)XFRM_SKB_CB(skb)->seq.output.hi << 32));
+
+ memset(iv, 0, ivlen);
+ memcpy(iv + ivlen - min(ivlen, 8), (u8 *)&seqno + 8 - min(ivlen, 8),
+ min(ivlen, 8));
ESP_SKB_CB(skb)->tmp = tmp;
- err = crypto_aead_givencrypt(req);
- if (err == -EINPROGRESS)
+ err = crypto_aead_encrypt(req);
+
+ switch (err) {
+ case -EINPROGRESS:
goto error;
- if (err == -EBUSY)
+ case -EBUSY:
err = NET_XMIT_DROP;
+ break;
+
+ case 0:
+ if ((x->props.flags & XFRM_STATE_ESN))
+ esp_output_restore_header(skb);
+ }
kfree(tmp);
@@ -318,25 +337,38 @@ static void esp_input_done(struct crypto_async_request *base, int err)
xfrm_input_resume(skb, esp_input_done2(skb, err));
}
+static void esp_input_restore_header(struct sk_buff *skb)
+{
+ esp_restore_header(skb, 0);
+ __skb_pull(skb, 4);
+}
+
+static void esp_input_done_esn(struct crypto_async_request *base, int err)
+{
+ struct sk_buff *skb = base->data;
+
+ esp_input_restore_header(skb);
+ esp_input_done(base, err);
+}
+
static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
{
struct ip_esp_hdr *esph;
struct crypto_aead *aead = x->data;
struct aead_request *req;
struct sk_buff *trailer;
- int elen = skb->len - sizeof(*esph) - crypto_aead_ivsize(aead);
+ int ivlen = crypto_aead_ivsize(aead);
+ int elen = skb->len - sizeof(*esph) - ivlen;
int nfrags;
int assoclen;
- int sglists;
int seqhilen;
int ret = 0;
void *tmp;
__be32 *seqhi;
u8 *iv;
struct scatterlist *sg;
- struct scatterlist *asg;
- if (!pskb_may_pull(skb, sizeof(*esph) + crypto_aead_ivsize(aead))) {
+ if (!pskb_may_pull(skb, sizeof(*esph) + ivlen)) {
ret = -EINVAL;
goto out;
}
@@ -355,16 +387,14 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
ret = -ENOMEM;
assoclen = sizeof(*esph);
- sglists = 1;
seqhilen = 0;
if (x->props.flags & XFRM_STATE_ESN) {
- sglists += 2;
seqhilen += sizeof(__be32);
assoclen += seqhilen;
}
- tmp = esp_alloc_tmp(aead, nfrags + sglists, seqhilen);
+ tmp = esp_alloc_tmp(aead, nfrags, seqhilen);
if (!tmp)
goto out;
@@ -372,36 +402,39 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
seqhi = esp_tmp_seqhi(tmp);
iv = esp_tmp_iv(aead, tmp, seqhilen);
req = esp_tmp_req(aead, iv);
- asg = esp_req_sg(aead, req);
- sg = asg + sglists;
+ sg = esp_req_sg(aead, req);
skb->ip_summed = CHECKSUM_NONE;
esph = (struct ip_esp_hdr *)skb->data;
- /* Get ivec. This can be wrong, check against another impls. */
- iv = esph->enc_data;
-
- sg_init_table(sg, nfrags);
- skb_to_sgvec(skb, sg, sizeof(*esph) + crypto_aead_ivsize(aead), elen);
+ aead_request_set_callback(req, 0, esp_input_done, skb);
+ /* For ESN we move the header forward by 4 bytes to
+ * accomodate the high bits. We will move it back after
+ * decryption.
+ */
if ((x->props.flags & XFRM_STATE_ESN)) {
- sg_init_table(asg, 3);
- sg_set_buf(asg, &esph->spi, sizeof(__be32));
- *seqhi = XFRM_SKB_CB(skb)->seq.input.hi;
- sg_set_buf(asg + 1, seqhi, seqhilen);
- sg_set_buf(asg + 2, &esph->seq_no, sizeof(__be32));
- } else
- sg_init_one(asg, esph, sizeof(*esph));
+ esph = (void *)skb_push(skb, 4);
+ *seqhi = esph->spi;
+ esph->spi = esph->seq_no;
+ esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.input.hi);
+ aead_request_set_callback(req, 0, esp_input_done_esn, skb);
+ }
- aead_request_set_callback(req, 0, esp_input_done, skb);
- aead_request_set_crypt(req, sg, sg, elen, iv);
- aead_request_set_assoc(req, asg, assoclen);
+ sg_init_table(sg, nfrags);
+ skb_to_sgvec(skb, sg, 0, skb->len);
+
+ aead_request_set_crypt(req, sg, sg, elen + ivlen, iv);
+ aead_request_set_ad(req, assoclen);
ret = crypto_aead_decrypt(req);
if (ret == -EINPROGRESS)
goto out;
+ if ((x->props.flags & XFRM_STATE_ESN))
+ esp_input_restore_header(skb);
+
ret = esp_input_done2(skb, ret);
out:
@@ -461,10 +494,16 @@ static void esp6_destroy(struct xfrm_state *x)
static int esp_init_aead(struct xfrm_state *x)
{
+ char aead_name[CRYPTO_MAX_ALG_NAME];
struct crypto_aead *aead;
int err;
- aead = crypto_alloc_aead(x->aead->alg_name, 0, 0);
+ err = -ENAMETOOLONG;
+ if (snprintf(aead_name, CRYPTO_MAX_ALG_NAME, "%s(%s)",
+ x->geniv, x->aead->alg_name) >= CRYPTO_MAX_ALG_NAME)
+ goto error;
+
+ aead = crypto_alloc_aead(aead_name, 0, 0);
err = PTR_ERR(aead);
if (IS_ERR(aead))
goto error;
@@ -503,15 +542,19 @@ static int esp_init_authenc(struct xfrm_state *x)
if ((x->props.flags & XFRM_STATE_ESN)) {
if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME,
- "authencesn(%s,%s)",
+ "%s%sauthencesn(%s,%s)%s",
+ x->geniv ?: "", x->geniv ? "(" : "",
x->aalg ? x->aalg->alg_name : "digest_null",
- x->ealg->alg_name) >= CRYPTO_MAX_ALG_NAME)
+ x->ealg->alg_name,
+ x->geniv ? ")" : "") >= CRYPTO_MAX_ALG_NAME)
goto error;
} else {
if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME,
- "authenc(%s,%s)",
+ "%s%sauthenc(%s,%s)%s",
+ x->geniv ?: "", x->geniv ? "(" : "",
x->aalg ? x->aalg->alg_name : "digest_null",
- x->ealg->alg_name) >= CRYPTO_MAX_ALG_NAME)
+ x->ealg->alg_name,
+ x->geniv ? ")" : "") >= CRYPTO_MAX_ALG_NAME)
goto error;
}
diff --git a/kernel/net/ipv6/exthdrs.c b/kernel/net/ipv6/exthdrs.c
index a7bbbe455..ea7c4d64a 100644
--- a/kernel/net/ipv6/exthdrs.c
+++ b/kernel/net/ipv6/exthdrs.c
@@ -632,7 +632,7 @@ int ipv6_parse_hopopts(struct sk_buff *skb)
return -1;
}
- opt->hop = sizeof(struct ipv6hdr);
+ opt->flags |= IP6SKB_HOPBYHOP;
if (ip6_parse_tlv(tlvprochopopt_lst, skb)) {
skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3;
opt = IP6CB(skb);
@@ -727,6 +727,7 @@ ipv6_dup_options(struct sock *sk, struct ipv6_txoptions *opt)
*((char **)&opt2->dst1opt) += dif;
if (opt2->srcrt)
*((char **)&opt2->srcrt) += dif;
+ atomic_set(&opt2->refcnt, 1);
}
return opt2;
}
@@ -790,7 +791,7 @@ ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt,
return ERR_PTR(-ENOBUFS);
memset(opt2, 0, tot_len);
-
+ atomic_set(&opt2->refcnt, 1);
opt2->tot_len = tot_len;
p = (char *)(opt2 + 1);
diff --git a/kernel/net/ipv6/fib6_rules.c b/kernel/net/ipv6/fib6_rules.c
index 2367a16ea..ed33abf57 100644
--- a/kernel/net/ipv6/fib6_rules.c
+++ b/kernel/net/ipv6/fib6_rules.c
@@ -32,6 +32,7 @@ struct fib6_rule {
struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
int flags, pol_lookup_t lookup)
{
+ struct rt6_info *rt;
struct fib_lookup_arg arg = {
.lookup_ptr = lookup,
.flags = FIB_LOOKUP_NOREF,
@@ -40,11 +41,21 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
fib_rules_lookup(net->ipv6.fib6_rules_ops,
flowi6_to_flowi(fl6), flags, &arg);
- if (arg.result)
- return arg.result;
+ rt = arg.result;
- dst_hold(&net->ipv6.ip6_null_entry->dst);
- return &net->ipv6.ip6_null_entry->dst;
+ if (!rt) {
+ dst_hold(&net->ipv6.ip6_null_entry->dst);
+ return &net->ipv6.ip6_null_entry->dst;
+ }
+
+ if (rt->rt6i_flags & RTF_REJECT &&
+ rt->dst.error == -EAGAIN) {
+ ip6_rt_put(rt);
+ rt = net->ipv6.ip6_null_entry;
+ dst_hold(&rt->dst);
+ }
+
+ return &rt->dst;
}
static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
@@ -258,11 +269,6 @@ nla_put_failure:
return -ENOBUFS;
}
-static u32 fib6_rule_default_pref(struct fib_rules_ops *ops)
-{
- return 0x3FFF;
-}
-
static size_t fib6_rule_nlmsg_payload(struct fib_rule *rule)
{
return nla_total_size(16) /* dst */
@@ -279,7 +285,6 @@ static const struct fib_rules_ops __net_initconst fib6_rules_ops_template = {
.configure = fib6_rule_configure,
.compare = fib6_rule_compare,
.fill = fib6_rule_fill,
- .default_pref = fib6_rule_default_pref,
.nlmsg_payload = fib6_rule_nlmsg_payload,
.nlgroup = RTNLGRP_IPV6_RULE,
.policy = fib6_rule_policy,
diff --git a/kernel/net/ipv6/icmp.c b/kernel/net/ipv6/icmp.c
index 2c2b5d51f..0a37ddc7a 100644
--- a/kernel/net/ipv6/icmp.c
+++ b/kernel/net/ipv6/icmp.c
@@ -68,6 +68,7 @@
#include <net/xfrm.h>
#include <net/inet_common.h>
#include <net/dsfield.h>
+#include <net/l3mdev.h>
#include <asm/uaccess.h>
@@ -207,7 +208,7 @@ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
struct inet_peer *peer;
peer = inet_getpeer_v6(net->ipv6.peers,
- &rt->rt6i_dst.addr, 1);
+ &fl6->daddr, 1);
res = inet_peer_xrlim_allow(peer, tmo);
if (peer)
inet_putpeer(peer);
@@ -329,7 +330,7 @@ static struct dst_entry *icmpv6_route_lookup(struct net *net,
struct flowi6 fl2;
int err;
- err = ip6_dst_lookup(sk, &dst, fl6);
+ err = ip6_dst_lookup(net, sk, &dst, fl6);
if (err)
return ERR_PTR(err);
@@ -337,7 +338,7 @@ static struct dst_entry *icmpv6_route_lookup(struct net *net,
* We won't send icmp if the destination is known
* anycast.
*/
- if (((struct rt6_info *)dst)->rt6i_flags & RTF_ANYCAST) {
+ if (ipv6_anycast_destination(dst, &fl6->daddr)) {
net_dbg_ratelimited("icmp6_send: acast source\n");
dst_release(dst);
return ERR_PTR(-EINVAL);
@@ -361,7 +362,7 @@ static struct dst_entry *icmpv6_route_lookup(struct net *net,
if (err)
goto relookup_failed;
- err = ip6_dst_lookup(sk, &dst2, &fl2);
+ err = ip6_dst_lookup(net, sk, &dst2, &fl2);
if (err)
goto relookup_failed;
@@ -452,7 +453,8 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
* and anycast addresses will be checked later.
*/
if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
- net_dbg_ratelimited("icmp6_send: addr_any/mcast source\n");
+ net_dbg_ratelimited("icmp6_send: addr_any/mcast source [%pI6c > %pI6c]\n",
+ &hdr->saddr, &hdr->daddr);
return;
}
@@ -460,7 +462,8 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
* Never answer to a ICMP packet.
*/
if (is_ineligible(skb)) {
- net_dbg_ratelimited("icmp6_send: no reply to icmp error\n");
+ net_dbg_ratelimited("icmp6_send: no reply to icmp error [%pI6c > %pI6c]\n",
+ &hdr->saddr, &hdr->daddr);
return;
}
@@ -496,6 +499,9 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
else if (!fl6.flowi6_oif)
fl6.flowi6_oif = np->ucast_oif;
+ if (!fl6.flowi6_oif)
+ fl6.flowi6_oif = l3mdev_master_ifindex(skb->dev);
+
dst = icmpv6_route_lookup(net, skb, sk, &fl6);
if (IS_ERR(dst))
goto out;
@@ -509,7 +515,8 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
len = skb->len - msg.offset;
len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr));
if (len < 0) {
- net_dbg_ratelimited("icmp: len problem\n");
+ net_dbg_ratelimited("icmp: len problem [%pI6c > %pI6c]\n",
+ &hdr->saddr, &hdr->daddr);
goto out_dst_release;
}
@@ -564,7 +571,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
if (!ipv6_unicast_destination(skb) &&
!(net->ipv6.sysctl.anycast_src_echo_reply &&
- ipv6_anycast_destination(skb)))
+ ipv6_anycast_destination(skb_dst(skb), saddr)))
saddr = NULL;
memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
@@ -575,7 +582,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
fl6.daddr = ipv6_hdr(skb)->saddr;
if (saddr)
fl6.saddr = *saddr;
- fl6.flowi6_oif = skb->dev->ifindex;
+ fl6.flowi6_oif = l3mdev_fib_oif(skb->dev);
fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
fl6.flowi6_mark = mark;
security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
@@ -591,7 +598,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
else if (!fl6.flowi6_oif)
fl6.flowi6_oif = np->ucast_oif;
- err = ip6_dst_lookup(sk, &dst, &fl6);
+ err = ip6_dst_lookup(net, sk, &dst, &fl6);
if (err)
goto out;
dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
@@ -781,7 +788,8 @@ static int icmpv6_rcv(struct sk_buff *skb)
if (type & ICMPV6_INFOMSG_MASK)
break;
- net_dbg_ratelimited("icmpv6: msg of unknown type\n");
+ net_dbg_ratelimited("icmpv6: msg of unknown type [%pI6c > %pI6c]\n",
+ saddr, daddr);
/*
* error of unknown type.
@@ -826,11 +834,6 @@ void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
}
-/*
- * Special lock-class for __icmpv6_sk:
- */
-static struct lock_class_key icmpv6_socket_sk_dst_lock_key;
-
static int __net_init icmpv6_sk_init(struct net *net)
{
struct sock *sk;
@@ -852,15 +855,6 @@ static int __net_init icmpv6_sk_init(struct net *net)
net->ipv6.icmp_sk[i] = sk;
- /*
- * Split off their lock-class, because sk->sk_dst_lock
- * gets used from softirqs, which is safe for
- * __icmpv6_sk (because those never get directly used
- * via userspace syscalls), but unsafe for normal sockets.
- */
- lockdep_set_class(&sk->sk_dst_lock,
- &icmpv6_socket_sk_dst_lock_key);
-
/* Enough space for 2 64K ICMP packets, including
* sk_buff struct overhead.
*/
diff --git a/kernel/net/ipv6/ila.c b/kernel/net/ipv6/ila.c
new file mode 100644
index 000000000..1a6852e1a
--- /dev/null
+++ b/kernel/net/ipv6/ila.c
@@ -0,0 +1,229 @@
+#include <linux/errno.h>
+#include <linux/ip.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/socket.h>
+#include <linux/types.h>
+#include <net/checksum.h>
+#include <net/ip.h>
+#include <net/ip6_fib.h>
+#include <net/lwtunnel.h>
+#include <net/protocol.h>
+#include <uapi/linux/ila.h>
+
+struct ila_params {
+ __be64 locator;
+ __be64 locator_match;
+ __wsum csum_diff;
+};
+
+static inline struct ila_params *ila_params_lwtunnel(
+ struct lwtunnel_state *lwstate)
+{
+ return (struct ila_params *)lwstate->data;
+}
+
+static inline __wsum compute_csum_diff8(const __be32 *from, const __be32 *to)
+{
+ __be32 diff[] = {
+ ~from[0], ~from[1], to[0], to[1],
+ };
+
+ return csum_partial(diff, sizeof(diff), 0);
+}
+
+static inline __wsum get_csum_diff(struct ipv6hdr *ip6h, struct ila_params *p)
+{
+ if (*(__be64 *)&ip6h->daddr == p->locator_match)
+ return p->csum_diff;
+ else
+ return compute_csum_diff8((__be32 *)&ip6h->daddr,
+ (__be32 *)&p->locator);
+}
+
+static void update_ipv6_locator(struct sk_buff *skb, struct ila_params *p)
+{
+ __wsum diff;
+ struct ipv6hdr *ip6h = ipv6_hdr(skb);
+ size_t nhoff = sizeof(struct ipv6hdr);
+
+ /* First update checksum */
+ switch (ip6h->nexthdr) {
+ case NEXTHDR_TCP:
+ if (likely(pskb_may_pull(skb, nhoff + sizeof(struct tcphdr)))) {
+ struct tcphdr *th = (struct tcphdr *)
+ (skb_network_header(skb) + nhoff);
+
+ diff = get_csum_diff(ip6h, p);
+ inet_proto_csum_replace_by_diff(&th->check, skb,
+ diff, true);
+ }
+ break;
+ case NEXTHDR_UDP:
+ if (likely(pskb_may_pull(skb, nhoff + sizeof(struct udphdr)))) {
+ struct udphdr *uh = (struct udphdr *)
+ (skb_network_header(skb) + nhoff);
+
+ if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) {
+ diff = get_csum_diff(ip6h, p);
+ inet_proto_csum_replace_by_diff(&uh->check, skb,
+ diff, true);
+ if (!uh->check)
+ uh->check = CSUM_MANGLED_0;
+ }
+ }
+ break;
+ case NEXTHDR_ICMP:
+ if (likely(pskb_may_pull(skb,
+ nhoff + sizeof(struct icmp6hdr)))) {
+ struct icmp6hdr *ih = (struct icmp6hdr *)
+ (skb_network_header(skb) + nhoff);
+
+ diff = get_csum_diff(ip6h, p);
+ inet_proto_csum_replace_by_diff(&ih->icmp6_cksum, skb,
+ diff, true);
+ }
+ break;
+ }
+
+ /* Now change destination address */
+ *(__be64 *)&ip6h->daddr = p->locator;
+}
+
+static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb)
+{
+ struct dst_entry *dst = skb_dst(skb);
+
+ if (skb->protocol != htons(ETH_P_IPV6))
+ goto drop;
+
+ update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate));
+
+ return dst->lwtstate->orig_output(net, sk, skb);
+
+drop:
+ kfree_skb(skb);
+ return -EINVAL;
+}
+
+static int ila_input(struct sk_buff *skb)
+{
+ struct dst_entry *dst = skb_dst(skb);
+
+ if (skb->protocol != htons(ETH_P_IPV6))
+ goto drop;
+
+ update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate));
+
+ return dst->lwtstate->orig_input(skb);
+
+drop:
+ kfree_skb(skb);
+ return -EINVAL;
+}
+
+static struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = {
+ [ILA_ATTR_LOCATOR] = { .type = NLA_U64, },
+};
+
+static int ila_build_state(struct net_device *dev, struct nlattr *nla,
+ unsigned int family, const void *cfg,
+ struct lwtunnel_state **ts)
+{
+ struct ila_params *p;
+ struct nlattr *tb[ILA_ATTR_MAX + 1];
+ size_t encap_len = sizeof(*p);
+ struct lwtunnel_state *newts;
+ const struct fib6_config *cfg6 = cfg;
+ int ret;
+
+ if (family != AF_INET6)
+ return -EINVAL;
+
+ ret = nla_parse_nested(tb, ILA_ATTR_MAX, nla,
+ ila_nl_policy);
+ if (ret < 0)
+ return ret;
+
+ if (!tb[ILA_ATTR_LOCATOR])
+ return -EINVAL;
+
+ newts = lwtunnel_state_alloc(encap_len);
+ if (!newts)
+ return -ENOMEM;
+
+ newts->len = encap_len;
+ p = ila_params_lwtunnel(newts);
+
+ p->locator = (__force __be64)nla_get_u64(tb[ILA_ATTR_LOCATOR]);
+
+ if (cfg6->fc_dst_len > sizeof(__be64)) {
+ /* Precompute checksum difference for translation since we
+ * know both the old locator and the new one.
+ */
+ p->locator_match = *(__be64 *)&cfg6->fc_dst;
+ p->csum_diff = compute_csum_diff8(
+ (__be32 *)&p->locator_match, (__be32 *)&p->locator);
+ }
+
+ newts->type = LWTUNNEL_ENCAP_ILA;
+ newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT |
+ LWTUNNEL_STATE_INPUT_REDIRECT;
+
+ *ts = newts;
+
+ return 0;
+}
+
+static int ila_fill_encap_info(struct sk_buff *skb,
+ struct lwtunnel_state *lwtstate)
+{
+ struct ila_params *p = ila_params_lwtunnel(lwtstate);
+
+ if (nla_put_u64(skb, ILA_ATTR_LOCATOR, (__force u64)p->locator))
+ goto nla_put_failure;
+
+ return 0;
+
+nla_put_failure:
+ return -EMSGSIZE;
+}
+
+static int ila_encap_nlsize(struct lwtunnel_state *lwtstate)
+{
+ /* No encapsulation overhead */
+ return 0;
+}
+
+static int ila_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
+{
+ struct ila_params *a_p = ila_params_lwtunnel(a);
+ struct ila_params *b_p = ila_params_lwtunnel(b);
+
+ return (a_p->locator != b_p->locator);
+}
+
+static const struct lwtunnel_encap_ops ila_encap_ops = {
+ .build_state = ila_build_state,
+ .output = ila_output,
+ .input = ila_input,
+ .fill_encap = ila_fill_encap_info,
+ .get_encap_size = ila_encap_nlsize,
+ .cmp_encap = ila_encap_cmp,
+};
+
+static int __init ila_init(void)
+{
+ return lwtunnel_encap_add_ops(&ila_encap_ops, LWTUNNEL_ENCAP_ILA);
+}
+
+static void __exit ila_fini(void)
+{
+ lwtunnel_encap_del_ops(&ila_encap_ops, LWTUNNEL_ENCAP_ILA);
+}
+
+module_init(ila_init);
+module_exit(ila_fini);
+MODULE_AUTHOR("Tom Herbert <tom@herbertland.com>");
+MODULE_LICENSE("GPL");
diff --git a/kernel/net/ipv6/inet6_connection_sock.c b/kernel/net/ipv6/inet6_connection_sock.c
index 6927f3fb5..a7ca2cde2 100644
--- a/kernel/net/ipv6/inet6_connection_sock.c
+++ b/kernel/net/ipv6/inet6_connection_sock.c
@@ -65,19 +65,22 @@ int inet6_csk_bind_conflict(const struct sock *sk,
}
EXPORT_SYMBOL_GPL(inet6_csk_bind_conflict);
-struct dst_entry *inet6_csk_route_req(struct sock *sk,
+struct dst_entry *inet6_csk_route_req(const struct sock *sk,
struct flowi6 *fl6,
- const struct request_sock *req)
+ const struct request_sock *req,
+ u8 proto)
{
struct inet_request_sock *ireq = inet_rsk(req);
- struct ipv6_pinfo *np = inet6_sk(sk);
+ const struct ipv6_pinfo *np = inet6_sk(sk);
struct in6_addr *final_p, final;
struct dst_entry *dst;
memset(fl6, 0, sizeof(*fl6));
- fl6->flowi6_proto = IPPROTO_TCP;
+ fl6->flowi6_proto = proto;
fl6->daddr = ireq->ir_v6_rmt_addr;
- final_p = fl6_update_dst(fl6, np->opt, &final);
+ rcu_read_lock();
+ final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final);
+ rcu_read_unlock();
fl6->saddr = ireq->ir_v6_loc_addr;
fl6->flowi6_oif = ireq->ir_iif;
fl6->flowi6_mark = ireq->ir_mark;
@@ -91,73 +94,7 @@ struct dst_entry *inet6_csk_route_req(struct sock *sk,
return dst;
}
-
-/*
- * request_sock (formerly open request) hash tables.
- */
-static u32 inet6_synq_hash(const struct in6_addr *raddr, const __be16 rport,
- const u32 rnd, const u32 synq_hsize)
-{
- u32 c;
-
- c = jhash_3words((__force u32)raddr->s6_addr32[0],
- (__force u32)raddr->s6_addr32[1],
- (__force u32)raddr->s6_addr32[2],
- rnd);
-
- c = jhash_2words((__force u32)raddr->s6_addr32[3],
- (__force u32)rport,
- c);
-
- return c & (synq_hsize - 1);
-}
-
-struct request_sock *inet6_csk_search_req(struct sock *sk,
- const __be16 rport,
- const struct in6_addr *raddr,
- const struct in6_addr *laddr,
- const int iif)
-{
- struct inet_connection_sock *icsk = inet_csk(sk);
- struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
- struct request_sock *req;
- u32 hash = inet6_synq_hash(raddr, rport, lopt->hash_rnd,
- lopt->nr_table_entries);
-
- spin_lock(&icsk->icsk_accept_queue.syn_wait_lock);
- for (req = lopt->syn_table[hash]; req != NULL; req = req->dl_next) {
- const struct inet_request_sock *ireq = inet_rsk(req);
-
- if (ireq->ir_rmt_port == rport &&
- req->rsk_ops->family == AF_INET6 &&
- ipv6_addr_equal(&ireq->ir_v6_rmt_addr, raddr) &&
- ipv6_addr_equal(&ireq->ir_v6_loc_addr, laddr) &&
- (!ireq->ir_iif || ireq->ir_iif == iif)) {
- atomic_inc(&req->rsk_refcnt);
- WARN_ON(req->sk != NULL);
- break;
- }
- }
- spin_unlock(&icsk->icsk_accept_queue.syn_wait_lock);
-
- return req;
-}
-EXPORT_SYMBOL_GPL(inet6_csk_search_req);
-
-void inet6_csk_reqsk_queue_hash_add(struct sock *sk,
- struct request_sock *req,
- const unsigned long timeout)
-{
- struct inet_connection_sock *icsk = inet_csk(sk);
- struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
- const u32 h = inet6_synq_hash(&inet_rsk(req)->ir_v6_rmt_addr,
- inet_rsk(req)->ir_rmt_port,
- lopt->hash_rnd, lopt->nr_table_entries);
-
- reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, timeout);
- inet_csk_reqsk_queue_added(sk, timeout);
-}
-EXPORT_SYMBOL_GPL(inet6_csk_reqsk_queue_hash_add);
+EXPORT_SYMBOL(inet6_csk_route_req);
void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr)
{
@@ -174,14 +111,6 @@ void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr)
EXPORT_SYMBOL_GPL(inet6_csk_addr2sockaddr);
static inline
-void __inet6_csk_dst_store(struct sock *sk, struct dst_entry *dst,
- const struct in6_addr *daddr,
- const struct in6_addr *saddr)
-{
- __ip6_dst_store(sk, dst, daddr, saddr);
-}
-
-static inline
struct dst_entry *__inet6_csk_dst_check(struct sock *sk, u32 cookie)
{
return __sk_dst_check(sk, cookie);
@@ -207,14 +136,16 @@ static struct dst_entry *inet6_csk_route_socket(struct sock *sk,
fl6->fl6_dport = inet->inet_dport;
security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
- final_p = fl6_update_dst(fl6, np->opt, &final);
+ rcu_read_lock();
+ final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final);
+ rcu_read_unlock();
dst = __inet6_csk_dst_check(sk, np->dst_cookie);
if (!dst) {
dst = ip6_dst_lookup_flow(sk, fl6, final_p);
if (!IS_ERR(dst))
- __inet6_csk_dst_store(sk, dst, NULL, NULL);
+ ip6_dst_store(sk, dst, NULL, NULL);
}
return dst;
}
@@ -240,7 +171,8 @@ int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl_unused
/* Restore final destination back after routing done */
fl6.daddr = sk->sk_v6_daddr;
- res = ip6_xmit(sk, skb, &fl6, np->opt, np->tclass);
+ res = ip6_xmit(sk, skb, &fl6, rcu_dereference(np->opt),
+ np->tclass);
rcu_read_unlock();
return res;
}
diff --git a/kernel/net/ipv6/inet6_hashtables.c b/kernel/net/ipv6/inet6_hashtables.c
index 871641bc1..21ace5a2b 100644
--- a/kernel/net/ipv6/inet6_hashtables.c
+++ b/kernel/net/ipv6/inet6_hashtables.c
@@ -114,6 +114,8 @@ static inline int compute_score(struct sock *sk, struct net *net,
return -1;
score++;
}
+ if (sk->sk_incoming_cpu == raw_smp_processor_id())
+ score++;
}
return score;
}
@@ -207,7 +209,6 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
struct sock *sk2;
const struct hlist_nulls_node *node;
struct inet_timewait_sock *tw = NULL;
- int twrefcnt = 0;
spin_lock(lock);
@@ -234,21 +235,17 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
WARN_ON(!sk_unhashed(sk));
__sk_nulls_add_node_rcu(sk, &head->chain);
if (tw) {
- twrefcnt = inet_twsk_unhash(tw);
+ sk_nulls_del_node_init_rcu((struct sock *)tw);
NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED);
}
spin_unlock(lock);
- if (twrefcnt)
- inet_twsk_put(tw);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
if (twp) {
*twp = tw;
} else if (tw) {
/* Silly. Should hash-dance instead... */
- inet_twsk_deschedule(tw);
-
- inet_twsk_put(tw);
+ inet_twsk_deschedule_put(tw);
}
return 0;
@@ -257,7 +254,7 @@ not_unique:
return -EADDRNOTAVAIL;
}
-static inline u32 inet6_sk_port_offset(const struct sock *sk)
+static u32 inet6_sk_port_offset(const struct sock *sk)
{
const struct inet_sock *inet = inet_sk(sk);
@@ -269,7 +266,11 @@ static inline u32 inet6_sk_port_offset(const struct sock *sk)
int inet6_hash_connect(struct inet_timewait_death_row *death_row,
struct sock *sk)
{
- return __inet_hash_connect(death_row, sk, inet6_sk_port_offset(sk),
+ u32 port_offset = 0;
+
+ if (!inet_sk(sk)->inet_num)
+ port_offset = inet6_sk_port_offset(sk);
+ return __inet_hash_connect(death_row, sk, port_offset,
__inet6_check_established);
}
EXPORT_SYMBOL_GPL(inet6_hash_connect);
diff --git a/kernel/net/ipv6/ip6_fib.c b/kernel/net/ipv6/ip6_fib.c
index bde57b113..0c7e276c2 100644
--- a/kernel/net/ipv6/ip6_fib.c
+++ b/kernel/net/ipv6/ip6_fib.c
@@ -32,6 +32,7 @@
#include <net/ipv6.h>
#include <net/ndisc.h>
#include <net/addrconf.h>
+#include <net/lwtunnel.h>
#include <net/ip6_fib.h>
#include <net/ip6_route.h>
@@ -154,10 +155,39 @@ static void node_free(struct fib6_node *fn)
kmem_cache_free(fib6_node_kmem, fn);
}
+static void rt6_rcu_free(struct rt6_info *rt)
+{
+ call_rcu(&rt->dst.rcu_head, dst_rcu_free);
+}
+
+static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt)
+{
+ int cpu;
+
+ if (!non_pcpu_rt->rt6i_pcpu)
+ return;
+
+ for_each_possible_cpu(cpu) {
+ struct rt6_info **ppcpu_rt;
+ struct rt6_info *pcpu_rt;
+
+ ppcpu_rt = per_cpu_ptr(non_pcpu_rt->rt6i_pcpu, cpu);
+ pcpu_rt = *ppcpu_rt;
+ if (pcpu_rt) {
+ rt6_rcu_free(pcpu_rt);
+ *ppcpu_rt = NULL;
+ }
+ }
+
+ non_pcpu_rt->rt6i_pcpu = NULL;
+}
+
static void rt6_release(struct rt6_info *rt)
{
- if (atomic_dec_and_test(&rt->rt6i_ref))
- dst_free(&rt->dst);
+ if (atomic_dec_and_test(&rt->rt6i_ref)) {
+ rt6_free_pcpu(rt);
+ rt6_rcu_free(rt);
+ }
}
static void fib6_link_table(struct net *net, struct fib6_table *tb)
@@ -234,6 +264,7 @@ struct fib6_table *fib6_get_table(struct net *net, u32 id)
return NULL;
}
+EXPORT_SYMBOL_GPL(fib6_get_table);
static void __net_init fib6_tables_init(struct net *net)
{
@@ -255,7 +286,17 @@ struct fib6_table *fib6_get_table(struct net *net, u32 id)
struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
int flags, pol_lookup_t lookup)
{
- return (struct dst_entry *) lookup(net, net->ipv6.fib6_main_tbl, fl6, flags);
+ struct rt6_info *rt;
+
+ rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, flags);
+ if (rt->rt6i_flags & RTF_REJECT &&
+ rt->dst.error == -EAGAIN) {
+ ip6_rt_put(rt);
+ rt = net->ipv6.ip6_null_entry;
+ dst_hold(&rt->dst);
+ }
+
+ return &rt->dst;
}
static void __net_init fib6_tables_init(struct net *net)
@@ -738,6 +779,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
rt6_clean_expires(iter);
else
rt6_set_expires(iter, rt->dst.expires);
+ iter->rt6i_pmtu = rt->rt6i_pmtu;
return -EEXIST;
}
/* If we have the same destination and the same metric,
@@ -820,7 +862,7 @@ add:
*ins = rt;
rt->rt6i_node = fn;
atomic_inc(&rt->rt6i_ref);
- inet6_rt_notify(RTM_NEWROUTE, rt, info);
+ inet6_rt_notify(RTM_NEWROUTE, rt, info, 0);
info->nl_net->ipv6.rt6_stats->fib_rt_entries++;
if (!(fn->fn_flags & RTN_RTINFO)) {
@@ -846,7 +888,7 @@ add:
rt->rt6i_node = fn;
rt->dst.rt6_next = iter->dst.rt6_next;
atomic_inc(&rt->rt6i_ref);
- inet6_rt_notify(RTM_NEWROUTE, rt, info);
+ inet6_rt_notify(RTM_NEWROUTE, rt, info, NLM_F_REPLACE);
if (!(fn->fn_flags & RTN_RTINFO)) {
info->nl_net->ipv6.rt6_stats->fib_route_nodes++;
fn->fn_flags |= RTN_RTINFO;
@@ -907,6 +949,10 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
int replace_required = 0;
int sernum = fib6_new_sernum(info->nl_net);
+ if (WARN_ON_ONCE((rt->dst.flags & DST_NOCACHE) &&
+ !atomic_read(&rt->dst.__refcnt)))
+ return -EINVAL;
+
if (info->nlh) {
if (!(info->nlh->nlmsg_flags & NLM_F_CREATE))
allow_create = 0;
@@ -999,6 +1045,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
fib6_start_gc(info->nl_net, rt);
if (!(rt->rt6i_flags & RTF_CACHE))
fib6_prune_clones(info->nl_net, pn);
+ rt->dst.flags &= ~DST_NOCACHE;
}
out:
@@ -1023,7 +1070,8 @@ out:
atomic_inc(&pn->leaf->rt6i_ref);
}
#endif
- dst_free(&rt->dst);
+ if (!(rt->dst.flags & DST_NOCACHE))
+ dst_free(&rt->dst);
}
return err;
@@ -1034,7 +1082,8 @@ out:
st_failure:
if (fn && !(fn->fn_flags & (RTN_RTINFO|RTN_ROOT)))
fib6_repair_tree(info->nl_net, fn);
- dst_free(&rt->dst);
+ if (!(rt->dst.flags & DST_NOCACHE))
+ dst_free(&rt->dst);
return err;
#endif
}
@@ -1384,7 +1433,7 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
fib6_purge_rt(rt, fn, net);
- inet6_rt_notify(RTM_DELROUTE, rt, info);
+ inet6_rt_notify(RTM_DELROUTE, rt, info, 0);
rt6_release(rt);
}
diff --git a/kernel/net/ipv6/ip6_flowlabel.c b/kernel/net/ipv6/ip6_flowlabel.c
index d49112501..dc2db4f7b 100644
--- a/kernel/net/ipv6/ip6_flowlabel.c
+++ b/kernel/net/ipv6/ip6_flowlabel.c
@@ -540,12 +540,13 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
}
spin_lock_bh(&ip6_sk_fl_lock);
for (sflp = &np->ipv6_fl_list;
- (sfl = rcu_dereference(*sflp)) != NULL;
+ (sfl = rcu_dereference_protected(*sflp,
+ lockdep_is_held(&ip6_sk_fl_lock))) != NULL;
sflp = &sfl->next) {
if (sfl->fl->label == freq.flr_label) {
if (freq.flr_label == (np->flow_label&IPV6_FLOWLABEL_MASK))
np->flow_label &= ~IPV6_FLOWLABEL_MASK;
- *sflp = rcu_dereference(sfl->next);
+ *sflp = sfl->next;
spin_unlock_bh(&ip6_sk_fl_lock);
fl_release(sfl->fl);
kfree_rcu(sfl, rcu);
@@ -595,6 +596,10 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
if (freq.flr_label & ~IPV6_FLOWLABEL_MASK)
return -EINVAL;
+ if (net->ipv6.sysctl.flowlabel_state_ranges &&
+ (freq.flr_label & IPV6_FLOWLABEL_STATELESS_FLAG))
+ return -ERANGE;
+
fl = fl_create(net, sk, &freq, optval, optlen, &err);
if (!fl)
return err;
diff --git a/kernel/net/ipv6/ip6_gre.c b/kernel/net/ipv6/ip6_gre.c
index 69f4f689f..e5ea177d3 100644
--- a/kernel/net/ipv6/ip6_gre.c
+++ b/kernel/net/ipv6/ip6_gre.c
@@ -404,13 +404,13 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
struct ipv6_tlv_tnl_enc_lim *tel;
__u32 mtu;
case ICMPV6_DEST_UNREACH:
- net_warn_ratelimited("%s: Path to destination invalid or inactive!\n",
- t->parms.name);
+ net_dbg_ratelimited("%s: Path to destination invalid or inactive!\n",
+ t->parms.name);
break;
case ICMPV6_TIME_EXCEED:
if (code == ICMPV6_EXC_HOPLIMIT) {
- net_warn_ratelimited("%s: Too small hop limit or routing loop in tunnel!\n",
- t->parms.name);
+ net_dbg_ratelimited("%s: Too small hop limit or routing loop in tunnel!\n",
+ t->parms.name);
}
break;
case ICMPV6_PARAMPROB:
@@ -421,12 +421,12 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (teli && teli == be32_to_cpu(info) - 2) {
tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli];
if (tel->encap_limit == 0) {
- net_warn_ratelimited("%s: Too small encapsulation limit or routing loop in tunnel!\n",
- t->parms.name);
+ net_dbg_ratelimited("%s: Too small encapsulation limit or routing loop in tunnel!\n",
+ t->parms.name);
}
} else {
- net_warn_ratelimited("%s: Recipient unable to parse tunneled packet!\n",
- t->parms.name);
+ net_dbg_ratelimited("%s: Recipient unable to parse tunneled packet!\n",
+ t->parms.name);
}
break;
case ICMPV6_PKT_TOOBIG:
@@ -634,20 +634,20 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
}
if (!fl6->flowi6_mark)
- dst = ip6_tnl_dst_check(tunnel);
+ dst = ip6_tnl_dst_get(tunnel);
if (!dst) {
- ndst = ip6_route_output(net, NULL, fl6);
+ dst = ip6_route_output(net, NULL, fl6);
- if (ndst->error)
+ if (dst->error)
goto tx_err_link_failure;
- ndst = xfrm_lookup(net, ndst, flowi6_to_flowi(fl6), NULL, 0);
- if (IS_ERR(ndst)) {
- err = PTR_ERR(ndst);
- ndst = NULL;
+ dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), NULL, 0);
+ if (IS_ERR(dst)) {
+ err = PTR_ERR(dst);
+ dst = NULL;
goto tx_err_link_failure;
}
- dst = ndst;
+ ndst = dst;
}
tdev = dst->dev;
@@ -702,12 +702,9 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
skb = new_skb;
}
- if (fl6->flowi6_mark) {
- skb_dst_set(skb, dst);
- ndst = NULL;
- } else {
- skb_dst_set_noref(skb, dst);
- }
+ if (!fl6->flowi6_mark && ndst)
+ ip6_tnl_dst_set(tunnel, ndst);
+ skb_dst_set(skb, dst);
proto = NEXTHDR_GRE;
if (encap_limit >= 0) {
@@ -729,7 +726,7 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
*/
ipv6h = ipv6_hdr(skb);
ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield),
- ip6_make_flowlabel(net, skb, fl6->flowlabel, false));
+ ip6_make_flowlabel(net, skb, fl6->flowlabel, true, fl6));
ipv6h->hop_limit = tunnel->parms.hop_limit;
ipv6h->nexthdr = proto;
ipv6h->saddr = fl6->saddr;
@@ -762,14 +759,12 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
skb_set_inner_protocol(skb, protocol);
ip6tunnel_xmit(NULL, skb, dev);
- if (ndst)
- ip6_tnl_dst_store(tunnel, ndst);
return 0;
tx_err_link_failure:
stats->tx_carrier_errors++;
dst_link_failure(skb);
tx_err_dst_release:
- dst_release(ndst);
+ dst_release(dst);
return err;
}
@@ -1183,7 +1178,8 @@ static int ip6gre_header(struct sk_buff *skb, struct net_device *dev,
ip6_flow_hdr(ipv6h, 0,
ip6_make_flowlabel(dev_net(dev), skb,
- t->fl.u.ip6.flowlabel, false));
+ t->fl.u.ip6.flowlabel, true,
+ &t->fl.u.ip6));
ipv6h->hop_limit = t->parms.hop_limit;
ipv6h->nexthdr = NEXTHDR_GRE;
ipv6h->saddr = t->parms.laddr;
@@ -1222,6 +1218,9 @@ static const struct net_device_ops ip6gre_netdev_ops = {
static void ip6gre_dev_free(struct net_device *dev)
{
+ struct ip6_tnl *t = netdev_priv(dev);
+
+ ip6_tnl_dst_destroy(t);
free_percpu(dev->tstats);
free_netdev(dev);
}
@@ -1244,9 +1243,10 @@ static void ip6gre_tunnel_setup(struct net_device *dev)
netif_keep_dst(dev);
}
-static int ip6gre_tunnel_init(struct net_device *dev)
+static int ip6gre_tunnel_init_common(struct net_device *dev)
{
struct ip6_tnl *tunnel;
+ int ret;
tunnel = netdev_priv(dev);
@@ -1254,16 +1254,37 @@ static int ip6gre_tunnel_init(struct net_device *dev)
tunnel->net = dev_net(dev);
strcpy(tunnel->parms.name, dev->name);
+ dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
+ if (!dev->tstats)
+ return -ENOMEM;
+
+ ret = ip6_tnl_dst_init(tunnel);
+ if (ret) {
+ free_percpu(dev->tstats);
+ dev->tstats = NULL;
+ return ret;
+ }
+
+ return 0;
+}
+
+static int ip6gre_tunnel_init(struct net_device *dev)
+{
+ struct ip6_tnl *tunnel;
+ int ret;
+
+ ret = ip6gre_tunnel_init_common(dev);
+ if (ret)
+ return ret;
+
+ tunnel = netdev_priv(dev);
+
memcpy(dev->dev_addr, &tunnel->parms.laddr, sizeof(struct in6_addr));
memcpy(dev->broadcast, &tunnel->parms.raddr, sizeof(struct in6_addr));
if (ipv6_addr_any(&tunnel->parms.raddr))
dev->header_ops = &ip6gre_header_ops;
- dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
- if (!dev->tstats)
- return -ENOMEM;
-
return 0;
}
@@ -1459,19 +1480,16 @@ static void ip6gre_netlink_parms(struct nlattr *data[],
static int ip6gre_tap_init(struct net_device *dev)
{
struct ip6_tnl *tunnel;
+ int ret;
- tunnel = netdev_priv(dev);
+ ret = ip6gre_tunnel_init_common(dev);
+ if (ret)
+ return ret;
- tunnel->dev = dev;
- tunnel->net = dev_net(dev);
- strcpy(tunnel->parms.name, dev->name);
+ tunnel = netdev_priv(dev);
ip6gre_tnl_link_config(tunnel, 1);
- dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
- if (!dev->tstats)
- return -ENOMEM;
-
return 0;
}
@@ -1553,13 +1571,11 @@ static int ip6gre_changelink(struct net_device *dev, struct nlattr *tb[],
return -EEXIST;
} else {
t = nt;
-
- ip6gre_tunnel_unlink(ign, t);
- ip6gre_tnl_change(t, &p, !tb[IFLA_MTU]);
- ip6gre_tunnel_link(ign, t);
- netdev_state_change(dev);
}
+ ip6gre_tunnel_unlink(ign, t);
+ ip6gre_tnl_change(t, &p, !tb[IFLA_MTU]);
+ ip6gre_tunnel_link(ign, t);
return 0;
}
diff --git a/kernel/net/ipv6/ip6_input.c b/kernel/net/ipv6/ip6_input.c
index 57990c929..9075acf08 100644
--- a/kernel/net/ipv6/ip6_input.c
+++ b/kernel/net/ipv6/ip6_input.c
@@ -45,8 +45,9 @@
#include <net/addrconf.h>
#include <net/xfrm.h>
#include <net/inet_ecn.h>
+#include <net/dst_metadata.h>
-int ip6_rcv_finish(struct sock *sk, struct sk_buff *skb)
+int ip6_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
if (sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) {
const struct inet6_protocol *ipprot;
@@ -55,7 +56,7 @@ int ip6_rcv_finish(struct sock *sk, struct sk_buff *skb)
if (ipprot && ipprot->early_demux)
ipprot->early_demux(skb);
}
- if (!skb_dst(skb))
+ if (!skb_valid_dst(skb))
ip6_route_input(skb);
return dst_input(skb);
@@ -98,7 +99,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
* arrived via the sending interface (ethX), because of the
* nature of scoping architecture. --yoshfuji
*/
- IP6CB(skb)->iif = skb_dst(skb) ? ip6_dst_idev(skb_dst(skb))->dev->ifindex : dev->ifindex;
+ IP6CB(skb)->iif = skb_valid_dst(skb) ? ip6_dst_idev(skb_dst(skb))->dev->ifindex : dev->ifindex;
if (unlikely(!pskb_may_pull(skb, sizeof(*hdr))))
goto err;
@@ -108,7 +109,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
if (hdr->version != 6)
goto err;
- IP6_ADD_STATS_BH(dev_net(dev), idev,
+ IP6_ADD_STATS_BH(net, idev,
IPSTATS_MIB_NOECTPKTS +
(ipv6_get_dsfield(hdr) & INET_ECN_MASK),
max_t(unsigned short, 1, skb_shinfo(skb)->gso_segs));
@@ -182,8 +183,8 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
/* Must drop socket now because of tproxy. */
skb_orphan(skb);
- return NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, NULL, skb,
- dev, NULL,
+ return NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING,
+ net, NULL, skb, dev, NULL,
ip6_rcv_finish);
err:
IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INHDRERRORS);
@@ -198,9 +199,8 @@ drop:
*/
-static int ip6_input_finish(struct sock *sk, struct sk_buff *skb)
+static int ip6_input_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
- struct net *net = dev_net(skb_dst(skb)->dev);
const struct inet6_protocol *ipprot;
struct inet6_dev *idev;
unsigned int nhoff;
@@ -277,8 +277,8 @@ discard:
int ip6_input(struct sk_buff *skb)
{
- return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_IN, NULL, skb,
- skb->dev, NULL,
+ return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_IN,
+ dev_net(skb->dev), NULL, skb, skb->dev, NULL,
ip6_input_finish);
}
diff --git a/kernel/net/ipv6/ip6_offload.c b/kernel/net/ipv6/ip6_offload.c
index 08b62047c..eeca943f1 100644
--- a/kernel/net/ipv6/ip6_offload.c
+++ b/kernel/net/ipv6/ip6_offload.c
@@ -264,6 +264,9 @@ static int ipv6_gro_complete(struct sk_buff *skb, int nhoff)
struct ipv6hdr *iph = (struct ipv6hdr *)(skb->data + nhoff);
int err = -ENOSYS;
+ if (skb->encapsulation)
+ skb_set_inner_network_header(skb, nhoff);
+
iph->payload_len = htons(skb->len - nhoff - sizeof(*iph));
rcu_read_lock();
@@ -280,6 +283,13 @@ out_unlock:
return err;
}
+static int sit_gro_complete(struct sk_buff *skb, int nhoff)
+{
+ skb->encapsulation = 1;
+ skb_shinfo(skb)->gso_type |= SKB_GSO_SIT;
+ return ipv6_gro_complete(skb, nhoff);
+}
+
static struct packet_offload ipv6_packet_offload __read_mostly = {
.type = cpu_to_be16(ETH_P_IPV6),
.callbacks = {
@@ -292,6 +302,8 @@ static struct packet_offload ipv6_packet_offload __read_mostly = {
static const struct net_offload sit_offload = {
.callbacks = {
.gso_segment = ipv6_gso_segment,
+ .gro_receive = ipv6_gro_receive,
+ .gro_complete = sit_gro_complete,
},
};
diff --git a/kernel/net/ipv6/ip6_output.c b/kernel/net/ipv6/ip6_output.c
index bc09cb97b..31144c486 100644
--- a/kernel/net/ipv6/ip6_output.c
+++ b/kernel/net/ipv6/ip6_output.c
@@ -55,8 +55,9 @@
#include <net/xfrm.h>
#include <net/checksum.h>
#include <linux/mroute6.h>
+#include <net/l3mdev.h>
-static int ip6_finish_output2(struct sock *sk, struct sk_buff *skb)
+static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
struct net_device *dev = dst->dev;
@@ -71,7 +72,7 @@ static int ip6_finish_output2(struct sock *sk, struct sk_buff *skb)
struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
- ((mroute6_socket(dev_net(dev), skb) &&
+ ((mroute6_socket(net, skb) &&
!(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
&ipv6_hdr(skb)->saddr))) {
@@ -82,19 +83,18 @@ static int ip6_finish_output2(struct sock *sk, struct sk_buff *skb)
*/
if (newskb)
NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
- sk, newskb, NULL, newskb->dev,
+ net, sk, newskb, NULL, newskb->dev,
dev_loopback_xmit);
if (ipv6_hdr(skb)->hop_limit == 0) {
- IP6_INC_STATS(dev_net(dev), idev,
+ IP6_INC_STATS(net, idev,
IPSTATS_MIB_OUTDISCARDS);
kfree_skb(skb);
return 0;
}
}
- IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
- skb->len);
+ IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
IPV6_ADDR_SCOPE_NODELOCAL &&
@@ -105,7 +105,7 @@ static int ip6_finish_output2(struct sock *sk, struct sk_buff *skb)
}
rcu_read_lock_bh();
- nexthop = rt6_nexthop((struct rt6_info *)dst);
+ nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
if (unlikely(!neigh))
neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
@@ -116,48 +116,49 @@ static int ip6_finish_output2(struct sock *sk, struct sk_buff *skb)
}
rcu_read_unlock_bh();
- IP6_INC_STATS(dev_net(dst->dev),
- ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
+ IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
kfree_skb(skb);
return -EINVAL;
}
-static int ip6_finish_output(struct sock *sk, struct sk_buff *skb)
+static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
dst_allfrag(skb_dst(skb)) ||
(IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
- return ip6_fragment(sk, skb, ip6_finish_output2);
+ return ip6_fragment(net, sk, skb, ip6_finish_output2);
else
- return ip6_finish_output2(sk, skb);
+ return ip6_finish_output2(net, sk, skb);
}
-int ip6_output(struct sock *sk, struct sk_buff *skb)
+int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct net_device *dev = skb_dst(skb)->dev;
struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
+
if (unlikely(idev->cnf.disable_ipv6)) {
- IP6_INC_STATS(dev_net(dev), idev,
- IPSTATS_MIB_OUTDISCARDS);
+ IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
kfree_skb(skb);
return 0;
}
- return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, sk, skb,
- NULL, dev,
+ return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
+ net, sk, skb, NULL, dev,
ip6_finish_output,
!(IP6CB(skb)->flags & IP6SKB_REROUTED));
}
/*
- * xmit an sk_buff (used by TCP, SCTP and DCCP)
+ * xmit an sk_buff (used by TCP, SCTP and DCCP)
+ * Note : socket lock is not held for SYNACK packets, but might be modified
+ * by calls to skb_set_owner_w() and ipv6_local_error(),
+ * which are using proper atomic operations or spinlocks.
*/
-
-int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
+int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
struct ipv6_txoptions *opt, int tclass)
{
struct net *net = sock_net(sk);
- struct ipv6_pinfo *np = inet6_sk(sk);
+ const struct ipv6_pinfo *np = inet6_sk(sk);
struct in6_addr *first_hop = &fl6->daddr;
struct dst_entry *dst = skb_dst(skb);
struct ipv6hdr *hdr;
@@ -186,7 +187,10 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
}
consume_skb(skb);
skb = skb2;
- skb_set_owner_w(skb, sk);
+ /* skb_set_owner_w() changes sk->sk_wmem_alloc atomically,
+ * it is safe to call in our context (socket lock not held)
+ */
+ skb_set_owner_w(skb, (struct sock *)sk);
}
if (opt->opt_flen)
ipv6_push_frag_opts(skb, opt, &proto);
@@ -207,7 +211,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
hlimit = ip6_dst_hoplimit(dst);
ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
- np->autoflowlabel));
+ np->autoflowlabel, fl6));
hdr->payload_len = htons(seg_len);
hdr->nexthdr = proto;
@@ -224,12 +228,20 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
IPSTATS_MIB_OUT, skb->len);
- return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, sk, skb,
- NULL, dst->dev, dst_output_sk);
+ /* hooks should never assume socket lock is held.
+ * we promote our socket to non const
+ */
+ return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
+ net, (struct sock *)sk, skb, NULL, dst->dev,
+ dst_output);
}
skb->dev = dst->dev;
- ipv6_local_error(sk, EMSGSIZE, fl6, mtu);
+ /* ipv6_local_error() does not require socket lock,
+ * we promote our socket to non const
+ */
+ ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
+
IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
kfree_skb(skb);
return -EMSGSIZE;
@@ -317,10 +329,11 @@ static int ip6_forward_proxy_check(struct sk_buff *skb)
return 0;
}
-static inline int ip6_forward_finish(struct sock *sk, struct sk_buff *skb)
+static inline int ip6_forward_finish(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
{
skb_sender_cpu_clear(skb);
- return dst_output_sk(sk, skb);
+ return dst_output(net, sk, skb);
}
static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
@@ -376,6 +389,9 @@ int ip6_forward(struct sk_buff *skb)
if (skb->pkt_type != PACKET_HOST)
goto drop;
+ if (unlikely(skb->sk))
+ goto drop;
+
if (skb_warn_if_lro(skb))
goto drop;
@@ -459,7 +475,7 @@ int ip6_forward(struct sk_buff *skb)
else
target = &hdr->daddr;
- peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
+ peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
/* Limit redirects both by destination (here)
and by source (inside ndisc_send_redirect)
@@ -512,8 +528,8 @@ int ip6_forward(struct sk_buff *skb)
IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
- return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, NULL, skb,
- skb->dev, dst->dev,
+ return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
+ net, NULL, skb, skb->dev, dst->dev,
ip6_forward_finish);
error:
@@ -540,8 +556,8 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
skb_copy_secmark(to, from);
}
-int ip6_fragment(struct sock *sk, struct sk_buff *skb,
- int (*output)(struct sock *, struct sk_buff *))
+int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
+ int (*output)(struct net *, struct sock *, struct sk_buff *))
{
struct sk_buff *frag;
struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
@@ -551,10 +567,9 @@ int ip6_fragment(struct sock *sk, struct sk_buff *skb,
struct frag_hdr *fh;
unsigned int mtu, hlen, left, len;
int hroom, troom;
- __be32 frag_id = 0;
+ __be32 frag_id;
int ptr, offset = 0, err = 0;
u8 *prevhdr, nexthdr = 0;
- struct net *net = dev_net(skb_dst(skb)->dev);
hlen = ip6_find_1stfragopt(skb, &prevhdr);
nexthdr = *prevhdr;
@@ -564,40 +579,50 @@ int ip6_fragment(struct sock *sk, struct sk_buff *skb,
/* We must not fragment if the socket is set to force MTU discovery
* or if the skb it not generated by a local socket.
*/
- if (unlikely(!skb->ignore_df && skb->len > mtu) ||
- (IP6CB(skb)->frag_max_size &&
- IP6CB(skb)->frag_max_size > mtu)) {
- if (skb->sk && dst_allfrag(skb_dst(skb)))
- sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
+ if (unlikely(!skb->ignore_df && skb->len > mtu))
+ goto fail_toobig;
- skb->dev = skb_dst(skb)->dev;
- icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
- IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_FRAGFAILS);
- kfree_skb(skb);
- return -EMSGSIZE;
+ if (IP6CB(skb)->frag_max_size) {
+ if (IP6CB(skb)->frag_max_size > mtu)
+ goto fail_toobig;
+
+ /* don't send fragments larger than what we received */
+ mtu = IP6CB(skb)->frag_max_size;
+ if (mtu < IPV6_MIN_MTU)
+ mtu = IPV6_MIN_MTU;
}
if (np && np->frag_size < mtu) {
if (np->frag_size)
mtu = np->frag_size;
}
+ if (mtu < hlen + sizeof(struct frag_hdr) + 8)
+ goto fail_toobig;
mtu -= hlen + sizeof(struct frag_hdr);
+ frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
+ &ipv6_hdr(skb)->saddr);
+
+ if (skb->ip_summed == CHECKSUM_PARTIAL &&
+ (err = skb_checksum_help(skb)))
+ goto fail;
+
+ hroom = LL_RESERVED_SPACE(rt->dst.dev);
if (skb_has_frag_list(skb)) {
int first_len = skb_pagelen(skb);
struct sk_buff *frag2;
if (first_len - hlen > mtu ||
((first_len - hlen) & 7) ||
- skb_cloned(skb))
+ skb_cloned(skb) ||
+ skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
goto slow_path;
skb_walk_frags(skb, frag) {
/* Correct geometry. */
if (frag->len > mtu ||
((frag->len & 7) && frag->next) ||
- skb_headroom(frag) < hlen)
+ skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
goto slow_path_clean;
/* Partially cloned skb? */
@@ -614,8 +639,6 @@ int ip6_fragment(struct sock *sk, struct sk_buff *skb,
err = 0;
offset = 0;
- frag = skb_shinfo(skb)->frag_list;
- skb_frag_list_init(skb);
/* BUILD HEADER */
*prevhdr = NEXTHDR_FRAGMENT;
@@ -623,8 +646,11 @@ int ip6_fragment(struct sock *sk, struct sk_buff *skb,
if (!tmp_hdr) {
IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
IPSTATS_MIB_FRAGFAILS);
- return -ENOMEM;
+ err = -ENOMEM;
+ goto fail;
}
+ frag = skb_shinfo(skb)->frag_list;
+ skb_frag_list_init(skb);
__skb_pull(skb, hlen);
fh = (struct frag_hdr *)__skb_push(skb, sizeof(struct frag_hdr));
@@ -632,11 +658,10 @@ int ip6_fragment(struct sock *sk, struct sk_buff *skb,
skb_reset_network_header(skb);
memcpy(skb_network_header(skb), tmp_hdr, hlen);
- ipv6_select_ident(net, fh, rt);
fh->nexthdr = nexthdr;
fh->reserved = 0;
fh->frag_off = htons(IP6_MF);
- frag_id = fh->identification;
+ fh->identification = frag_id;
first_len = skb_pagelen(skb);
skb->data_len = first_len - skb_headlen(skb);
@@ -670,7 +695,7 @@ int ip6_fragment(struct sock *sk, struct sk_buff *skb,
ip6_copy_metadata(frag, skb);
}
- err = output(sk, skb);
+ err = output(net, sk, skb);
if (!err)
IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
IPSTATS_MIB_FRAGCREATES);
@@ -710,10 +735,6 @@ slow_path_clean:
}
slow_path:
- if ((skb->ip_summed == CHECKSUM_PARTIAL) &&
- skb_checksum_help(skb))
- goto fail;
-
left = skb->len - hlen; /* Space per frame */
ptr = hlen; /* Where to start from */
@@ -722,7 +743,6 @@ slow_path:
*/
*prevhdr = NEXTHDR_FRAGMENT;
- hroom = LL_RESERVED_SPACE(rt->dst.dev);
troom = rt->dst.dev->needed_tailroom;
/*
@@ -778,11 +798,7 @@ slow_path:
*/
fh->nexthdr = nexthdr;
fh->reserved = 0;
- if (!frag_id) {
- ipv6_select_ident(net, fh, rt);
- frag_id = fh->identification;
- } else
- fh->identification = frag_id;
+ fh->identification = frag_id;
/*
* Copy a block of the IP datagram.
@@ -803,7 +819,7 @@ slow_path:
/*
* Put this fragment into the sending queue.
*/
- err = output(sk, frag);
+ err = output(net, sk, frag);
if (err)
goto fail;
@@ -815,6 +831,14 @@ slow_path:
consume_skb(skb);
return err;
+fail_toobig:
+ if (skb->sk && dst_allfrag(skb_dst(skb)))
+ sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
+
+ skb->dev = skb_dst(skb)->dev;
+ icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+ err = -EMSGSIZE;
+
fail:
IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
IPSTATS_MIB_FRAGFAILS);
@@ -867,7 +891,8 @@ static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
#ifdef CONFIG_IPV6_SUBTREES
ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
#endif
- (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) {
+ (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
+ (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
dst_release(dst);
dst = NULL;
}
@@ -876,15 +901,15 @@ out:
return dst;
}
-static int ip6_dst_lookup_tail(struct sock *sk,
+static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
struct dst_entry **dst, struct flowi6 *fl6)
{
- struct net *net = sock_net(sk);
#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
struct neighbour *n;
struct rt6_info *rt;
#endif
int err;
+ int flags = 0;
/* The correct way to handle this would be to do
* ip6_route_get_saddr, and then ip6_route_output; however,
@@ -916,10 +941,13 @@ static int ip6_dst_lookup_tail(struct sock *sk,
dst_release(*dst);
*dst = NULL;
}
+
+ if (fl6->flowi6_oif)
+ flags |= RT6_LOOKUP_F_IFACE;
}
if (!*dst)
- *dst = ip6_route_output(net, sk, fl6);
+ *dst = ip6_route_output_flags(net, sk, fl6, flags);
err = (*dst)->error;
if (err)
@@ -936,7 +964,8 @@ static int ip6_dst_lookup_tail(struct sock *sk,
*/
rt = (struct rt6_info *) *dst;
rcu_read_lock_bh();
- n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt));
+ n = __ipv6_neigh_lookup_noref(rt->dst.dev,
+ rt6_nexthop(rt, &fl6->daddr));
err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
rcu_read_unlock_bh();
@@ -988,10 +1017,11 @@ out_err_release:
*
* It returns zero on success, or a standard errno code on error.
*/
-int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6)
+int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
+ struct flowi6 *fl6)
{
*dst = NULL;
- return ip6_dst_lookup_tail(sk, dst, fl6);
+ return ip6_dst_lookup_tail(net, sk, dst, fl6);
}
EXPORT_SYMBOL_GPL(ip6_dst_lookup);
@@ -1006,17 +1036,19 @@ EXPORT_SYMBOL_GPL(ip6_dst_lookup);
* It returns a valid dst pointer on success, or a pointer encoded
* error code.
*/
-struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
+struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6,
const struct in6_addr *final_dst)
{
struct dst_entry *dst = NULL;
int err;
- err = ip6_dst_lookup_tail(sk, &dst, fl6);
+ err = ip6_dst_lookup_tail(sock_net(sk), sk, &dst, fl6);
if (err)
return ERR_PTR(err);
if (final_dst)
fl6->daddr = *final_dst;
+ if (!fl6->flowi6_oif)
+ fl6->flowi6_oif = l3mdev_fib_oif(dst->dev);
return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
}
@@ -1044,7 +1076,7 @@ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
dst = ip6_sk_dst_check(sk, dst, fl6);
- err = ip6_dst_lookup_tail(sk, &dst, fl6);
+ err = ip6_dst_lookup_tail(sock_net(sk), sk, &dst, fl6);
if (err)
return ERR_PTR(err);
if (final_dst)
@@ -1060,11 +1092,10 @@ static inline int ip6_ufo_append_data(struct sock *sk,
int odd, struct sk_buff *skb),
void *from, int length, int hh_len, int fragheaderlen,
int transhdrlen, int mtu, unsigned int flags,
- struct rt6_info *rt)
+ const struct flowi6 *fl6)
{
struct sk_buff *skb;
- struct frag_hdr fhdr;
int err;
/* There is support for UDP large send offload by network
@@ -1106,8 +1137,9 @@ static inline int ip6_ufo_append_data(struct sock *sk,
skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
sizeof(struct frag_hdr)) & ~7;
skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
- ipv6_select_ident(sock_net(sk), &fhdr, rt);
- skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
+ skb_shinfo(skb)->ip6_frag_id = ipv6_select_ident(sock_net(sk),
+ &fl6->daddr,
+ &fl6->saddr);
append:
return skb_append_datato_frags(sk, skb, getfrag, from,
@@ -1242,6 +1274,7 @@ static int __ip6_append_data(struct sock *sk,
struct rt6_info *rt = (struct rt6_info *)cork->dst;
struct ipv6_txoptions *opt = v6_cork->opt;
int csummode = CHECKSUM_NONE;
+ unsigned int maxnonfragsize, headersize;
skb = skb_peek_tail(queue);
if (!skb) {
@@ -1259,38 +1292,43 @@ static int __ip6_append_data(struct sock *sk,
maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
sizeof(struct frag_hdr);
- if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
- unsigned int maxnonfragsize, headersize;
-
- headersize = sizeof(struct ipv6hdr) +
- (opt ? opt->opt_flen + opt->opt_nflen : 0) +
- (dst_allfrag(&rt->dst) ?
- sizeof(struct frag_hdr) : 0) +
- rt->rt6i_nfheader_len;
-
- if (ip6_sk_ignore_df(sk))
- maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
- else
- maxnonfragsize = mtu;
+ headersize = sizeof(struct ipv6hdr) +
+ (opt ? opt->opt_flen + opt->opt_nflen : 0) +
+ (dst_allfrag(&rt->dst) ?
+ sizeof(struct frag_hdr) : 0) +
+ rt->rt6i_nfheader_len;
+
+ if (cork->length + length > mtu - headersize && dontfrag &&
+ (sk->sk_protocol == IPPROTO_UDP ||
+ sk->sk_protocol == IPPROTO_RAW)) {
+ ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
+ sizeof(struct ipv6hdr));
+ goto emsgsize;
+ }
- /* dontfrag active */
- if ((cork->length + length > mtu - headersize) && dontfrag &&
- (sk->sk_protocol == IPPROTO_UDP ||
- sk->sk_protocol == IPPROTO_RAW)) {
- ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
- sizeof(struct ipv6hdr));
- goto emsgsize;
- }
+ if (ip6_sk_ignore_df(sk))
+ maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
+ else
+ maxnonfragsize = mtu;
- if (cork->length + length > maxnonfragsize - headersize) {
+ if (cork->length + length > maxnonfragsize - headersize) {
emsgsize:
- ipv6_local_error(sk, EMSGSIZE, fl6,
- mtu - headersize +
- sizeof(struct ipv6hdr));
- return -EMSGSIZE;
- }
+ ipv6_local_error(sk, EMSGSIZE, fl6,
+ mtu - headersize +
+ sizeof(struct ipv6hdr));
+ return -EMSGSIZE;
}
+ /* CHECKSUM_PARTIAL only with no extension headers and when
+ * we are not going to fragment
+ */
+ if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
+ headersize == sizeof(struct ipv6hdr) &&
+ length < mtu - headersize &&
+ !(flags & MSG_MORE) &&
+ rt->dst.dev->features & NETIF_F_V6_CSUM)
+ csummode = CHECKSUM_PARTIAL;
+
if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) {
sock_tx_timestamp(sk, &tx_flags);
if (tx_flags & SKBTX_ANY_SW_TSTAMP &&
@@ -1298,16 +1336,6 @@ emsgsize:
tskey = sk->sk_tskey++;
}
- /* If this is the first and only packet and device
- * supports checksum offloading, let's use it.
- * Use transhdrlen, same as IPv4, because partial
- * sums only work when transhdrlen is set.
- */
- if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
- length + fragheaderlen < mtu &&
- rt->dst.dev->features & NETIF_F_V6_CSUM &&
- !exthdrlen)
- csummode = CHECKSUM_PARTIAL;
/*
* Let's try using as much space as possible.
* Use MTU if total length of the message fits into the MTU.
@@ -1329,10 +1357,10 @@ emsgsize:
(skb && skb_is_gso(skb))) &&
(sk->sk_protocol == IPPROTO_UDP) &&
(rt->dst.dev->features & NETIF_F_UFO) &&
- (sk->sk_type == SOCK_DGRAM)) {
+ (sk->sk_type == SOCK_DGRAM) && !udp_get_no_check6_tx(sk)) {
err = ip6_ufo_append_data(sk, queue, getfrag, from, length,
hh_len, fragheaderlen,
- transhdrlen, mtu, flags, rt);
+ transhdrlen, mtu, flags, fl6);
if (err)
goto error;
return 0;
@@ -1641,7 +1669,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
ip6_flow_hdr(hdr, v6_cork->tclass,
ip6_make_flowlabel(net, skb, fl6->flowlabel,
- np->autoflowlabel));
+ np->autoflowlabel, fl6));
hdr->hop_limit = v6_cork->hop_limit;
hdr->nexthdr = proto;
hdr->saddr = fl6->saddr;
@@ -1670,7 +1698,7 @@ int ip6_send_skb(struct sk_buff *skb)
struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
int err;
- err = ip6_local_out(skb);
+ err = ip6_local_out(net, skb->sk, skb);
if (err) {
if (err > 0)
err = net_xmit_errno(err);
diff --git a/kernel/net/ipv6/ip6_tunnel.c b/kernel/net/ipv6/ip6_tunnel.c
index 5cafd92c2..137fca42a 100644
--- a/kernel/net/ipv6/ip6_tunnel.c
+++ b/kernel/net/ipv6/ip6_tunnel.c
@@ -126,36 +126,92 @@ static struct net_device_stats *ip6_get_stats(struct net_device *dev)
* Locking : hash tables are protected by RCU and RTNL
*/
-struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t)
+static void ip6_tnl_per_cpu_dst_set(struct ip6_tnl_dst *idst,
+ struct dst_entry *dst)
{
- struct dst_entry *dst = t->dst_cache;
+ write_seqlock_bh(&idst->lock);
+ dst_release(rcu_dereference_protected(
+ idst->dst,
+ lockdep_is_held(&idst->lock.lock)));
+ if (dst) {
+ dst_hold(dst);
+ idst->cookie = rt6_get_cookie((struct rt6_info *)dst);
+ } else {
+ idst->cookie = 0;
+ }
+ rcu_assign_pointer(idst->dst, dst);
+ write_sequnlock_bh(&idst->lock);
+}
+
+struct dst_entry *ip6_tnl_dst_get(struct ip6_tnl *t)
+{
+ struct ip6_tnl_dst *idst;
+ struct dst_entry *dst;
+ unsigned int seq;
+ u32 cookie;
- if (dst && dst->obsolete &&
- !dst->ops->check(dst, t->dst_cookie)) {
- t->dst_cache = NULL;
+ idst = raw_cpu_ptr(t->dst_cache);
+
+ rcu_read_lock();
+ do {
+ seq = read_seqbegin(&idst->lock);
+ dst = rcu_dereference(idst->dst);
+ cookie = idst->cookie;
+ } while (read_seqretry(&idst->lock, seq));
+
+ if (dst && !atomic_inc_not_zero(&dst->__refcnt))
+ dst = NULL;
+ rcu_read_unlock();
+
+ if (dst && dst->obsolete && !dst->ops->check(dst, cookie)) {
+ ip6_tnl_per_cpu_dst_set(idst, NULL);
dst_release(dst);
- return NULL;
+ dst = NULL;
}
-
return dst;
}
-EXPORT_SYMBOL_GPL(ip6_tnl_dst_check);
+EXPORT_SYMBOL_GPL(ip6_tnl_dst_get);
void ip6_tnl_dst_reset(struct ip6_tnl *t)
{
- dst_release(t->dst_cache);
- t->dst_cache = NULL;
+ int i;
+
+ for_each_possible_cpu(i)
+ ip6_tnl_per_cpu_dst_set(per_cpu_ptr(t->dst_cache, i), NULL);
}
EXPORT_SYMBOL_GPL(ip6_tnl_dst_reset);
-void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst)
+void ip6_tnl_dst_set(struct ip6_tnl *t, struct dst_entry *dst)
+{
+ ip6_tnl_per_cpu_dst_set(raw_cpu_ptr(t->dst_cache), dst);
+
+}
+EXPORT_SYMBOL_GPL(ip6_tnl_dst_set);
+
+void ip6_tnl_dst_destroy(struct ip6_tnl *t)
{
- struct rt6_info *rt = (struct rt6_info *) dst;
- t->dst_cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
- dst_release(t->dst_cache);
- t->dst_cache = dst;
+ if (!t->dst_cache)
+ return;
+
+ ip6_tnl_dst_reset(t);
+ free_percpu(t->dst_cache);
}
-EXPORT_SYMBOL_GPL(ip6_tnl_dst_store);
+EXPORT_SYMBOL_GPL(ip6_tnl_dst_destroy);
+
+int ip6_tnl_dst_init(struct ip6_tnl *t)
+{
+ int i;
+
+ t->dst_cache = alloc_percpu(struct ip6_tnl_dst);
+ if (!t->dst_cache)
+ return -ENOMEM;
+
+ for_each_possible_cpu(i)
+ seqlock_init(&per_cpu_ptr(t->dst_cache, i)->lock);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(ip6_tnl_dst_init);
/**
* ip6_tnl_lookup - fetch tunnel matching the end-point addresses
@@ -271,6 +327,9 @@ ip6_tnl_unlink(struct ip6_tnl_net *ip6n, struct ip6_tnl *t)
static void ip6_dev_free(struct net_device *dev)
{
+ struct ip6_tnl *t = netdev_priv(dev);
+
+ ip6_tnl_dst_destroy(t);
free_percpu(dev->tstats);
free_netdev(dev);
}
@@ -510,14 +569,14 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
struct ipv6_tlv_tnl_enc_lim *tel;
__u32 mtu;
case ICMPV6_DEST_UNREACH:
- net_warn_ratelimited("%s: Path to destination invalid or inactive!\n",
- t->parms.name);
+ net_dbg_ratelimited("%s: Path to destination invalid or inactive!\n",
+ t->parms.name);
rel_msg = 1;
break;
case ICMPV6_TIME_EXCEED:
if ((*code) == ICMPV6_EXC_HOPLIMIT) {
- net_warn_ratelimited("%s: Too small hop limit or routing loop in tunnel!\n",
- t->parms.name);
+ net_dbg_ratelimited("%s: Too small hop limit or routing loop in tunnel!\n",
+ t->parms.name);
rel_msg = 1;
}
break;
@@ -529,13 +588,13 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
if (teli && teli == *info - 2) {
tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli];
if (tel->encap_limit == 0) {
- net_warn_ratelimited("%s: Too small encapsulation limit or routing loop in tunnel!\n",
- t->parms.name);
+ net_dbg_ratelimited("%s: Too small encapsulation limit or routing loop in tunnel!\n",
+ t->parms.name);
rel_msg = 1;
}
} else {
- net_warn_ratelimited("%s: Recipient unable to parse tunneled packet!\n",
- t->parms.name);
+ net_dbg_ratelimited("%s: Recipient unable to parse tunneled packet!\n",
+ t->parms.name);
}
break;
case ICMPV6_PKT_TOOBIG:
@@ -1010,23 +1069,23 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr));
neigh_release(neigh);
} else if (!fl6->flowi6_mark)
- dst = ip6_tnl_dst_check(t);
+ dst = ip6_tnl_dst_get(t);
if (!ip6_tnl_xmit_ctl(t, &fl6->saddr, &fl6->daddr))
goto tx_err_link_failure;
if (!dst) {
- ndst = ip6_route_output(net, NULL, fl6);
+ dst = ip6_route_output(net, NULL, fl6);
- if (ndst->error)
+ if (dst->error)
goto tx_err_link_failure;
- ndst = xfrm_lookup(net, ndst, flowi6_to_flowi(fl6), NULL, 0);
- if (IS_ERR(ndst)) {
- err = PTR_ERR(ndst);
- ndst = NULL;
+ dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), NULL, 0);
+ if (IS_ERR(dst)) {
+ err = PTR_ERR(dst);
+ dst = NULL;
goto tx_err_link_failure;
}
- dst = ndst;
+ ndst = dst;
}
tdev = dst->dev;
@@ -1072,12 +1131,11 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
consume_skb(skb);
skb = new_skb;
}
- if (fl6->flowi6_mark) {
- skb_dst_set(skb, dst);
- ndst = NULL;
- } else {
- skb_dst_set_noref(skb, dst);
- }
+
+ if (!fl6->flowi6_mark && ndst)
+ ip6_tnl_dst_set(t, ndst);
+ skb_dst_set(skb, dst);
+
skb->transport_header = skb->network_header;
proto = fl6->flowi6_proto;
@@ -1095,20 +1153,18 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
skb_reset_network_header(skb);
ipv6h = ipv6_hdr(skb);
ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield),
- ip6_make_flowlabel(net, skb, fl6->flowlabel, false));
+ ip6_make_flowlabel(net, skb, fl6->flowlabel, true, fl6));
ipv6h->hop_limit = t->parms.hop_limit;
ipv6h->nexthdr = proto;
ipv6h->saddr = fl6->saddr;
ipv6h->daddr = fl6->daddr;
ip6tunnel_xmit(NULL, skb, dev);
- if (ndst)
- ip6_tnl_dst_store(t, ndst);
return 0;
tx_err_link_failure:
stats->tx_carrier_errors++;
dst_link_failure(skb);
tx_err_dst_release:
- dst_release(ndst);
+ dst_release(dst);
return err;
}
@@ -1573,12 +1629,21 @@ static inline int
ip6_tnl_dev_init_gen(struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
+ int ret;
t->dev = dev;
t->net = dev_net(dev);
dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
if (!dev->tstats)
return -ENOMEM;
+
+ ret = ip6_tnl_dst_init(t);
+ if (ret) {
+ free_percpu(dev->tstats);
+ dev->tstats = NULL;
+ return ret;
+ }
+
return 0;
}
diff --git a/kernel/net/ipv6/ip6_udp_tunnel.c b/kernel/net/ipv6/ip6_udp_tunnel.c
index bba8903e8..14dacf1df 100644
--- a/kernel/net/ipv6/ip6_udp_tunnel.c
+++ b/kernel/net/ipv6/ip6_udp_tunnel.c
@@ -19,11 +19,18 @@ int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
int err;
struct socket *sock = NULL;
- err = sock_create_kern(AF_INET6, SOCK_DGRAM, 0, &sock);
+ err = sock_create_kern(net, AF_INET6, SOCK_DGRAM, 0, &sock);
if (err < 0)
goto error;
- sk_change_net(sock->sk, net);
+ if (cfg->ipv6_v6only) {
+ int val = 1;
+
+ err = kernel_setsockopt(sock, IPPROTO_IPV6, IPV6_V6ONLY,
+ (char *) &val, sizeof(val));
+ if (err < 0)
+ goto error;
+ }
udp6_addr.sin6_family = AF_INET6;
memcpy(&udp6_addr.sin6_addr, &cfg->local_ip6,
@@ -55,7 +62,7 @@ int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
error:
if (sock) {
kernel_sock_shutdown(sock, SHUT_RDWR);
- sk_release_kernel(sock->sk);
+ sock_release(sock);
}
*sockp = NULL;
return err;
diff --git a/kernel/net/ipv6/ip6_vti.c b/kernel/net/ipv6/ip6_vti.c
index 0224c032d..0a8610b33 100644
--- a/kernel/net/ipv6/ip6_vti.c
+++ b/kernel/net/ipv6/ip6_vti.c
@@ -482,7 +482,7 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
return -EMSGSIZE;
}
- err = dst_output(skb);
+ err = dst_output(t->net, skb->sk, skb);
if (net_xmit_eval(err) == 0) {
struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats);
diff --git a/kernel/net/ipv6/ip6mr.c b/kernel/net/ipv6/ip6mr.c
index 5f36266b1..a10e77103 100644
--- a/kernel/net/ipv6/ip6mr.c
+++ b/kernel/net/ipv6/ip6mr.c
@@ -118,7 +118,7 @@ static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
int cmd);
static int ip6mr_rtm_dumproute(struct sk_buff *skb,
struct netlink_callback *cb);
-static void mroute_clean_tables(struct mr6_table *mrt);
+static void mroute_clean_tables(struct mr6_table *mrt, bool all);
static void ipmr_expire_process(unsigned long arg);
#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
@@ -217,7 +217,6 @@ static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
.match = ip6mr_rule_match,
.configure = ip6mr_rule_configure,
.compare = ip6mr_rule_compare,
- .default_pref = fib_default_rule_pref,
.fill = ip6mr_rule_fill,
.nlgroup = RTNLGRP_IPV6_RULE,
.policy = ip6mr_rule_policy,
@@ -335,7 +334,7 @@ static struct mr6_table *ip6mr_new_table(struct net *net, u32 id)
static void ip6mr_free_table(struct mr6_table *mrt)
{
del_timer_sync(&mrt->ipmr_expire_timer);
- mroute_clean_tables(mrt);
+ mroute_clean_tables(mrt, true);
kfree(mrt);
}
@@ -766,10 +765,6 @@ static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt)
return dev;
failure:
- /* allow the register to be completed before unregistering. */
- rtnl_unlock();
- rtnl_lock();
-
unregister_netdevice(dev);
return NULL;
}
@@ -1543,7 +1538,7 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
* Close the multicast socket, and clear the vif tables etc
*/
-static void mroute_clean_tables(struct mr6_table *mrt)
+static void mroute_clean_tables(struct mr6_table *mrt, bool all)
{
int i;
LIST_HEAD(list);
@@ -1553,8 +1548,9 @@ static void mroute_clean_tables(struct mr6_table *mrt)
* Shut down all active vif entries
*/
for (i = 0; i < mrt->maxvif; i++) {
- if (!(mrt->vif6_table[i].flags & VIFF_STATIC))
- mif6_delete(mrt, i, &list);
+ if (!all && (mrt->vif6_table[i].flags & VIFF_STATIC))
+ continue;
+ mif6_delete(mrt, i, &list);
}
unregister_netdevice_many(&list);
@@ -1563,7 +1559,7 @@ static void mroute_clean_tables(struct mr6_table *mrt)
*/
for (i = 0; i < MFC6_LINES; i++) {
list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) {
- if (c->mfc_flags & MFC_STATIC)
+ if (!all && (c->mfc_flags & MFC_STATIC))
continue;
write_lock_bh(&mrt_lock);
list_del(&c->list);
@@ -1626,7 +1622,7 @@ int ip6mr_sk_done(struct sock *sk)
net->ipv6.devconf_all);
write_unlock_bh(&mrt_lock);
- mroute_clean_tables(mrt);
+ mroute_clean_tables(mrt, false);
err = 0;
break;
}
@@ -1986,13 +1982,13 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
}
#endif
-static inline int ip6mr_forward2_finish(struct sock *sk, struct sk_buff *skb)
+static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
- IP6_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)),
+ IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
IPSTATS_MIB_OUTFORWDATAGRAMS);
- IP6_ADD_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)),
+ IP6_ADD_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
IPSTATS_MIB_OUTOCTETS, skb->len);
- return dst_output_sk(sk, skb);
+ return dst_output(net, sk, skb);
}
/*
@@ -2064,8 +2060,8 @@ static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
IP6CB(skb)->flags |= IP6SKB_FORWARDED;
- return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, NULL, skb,
- skb->dev, dev,
+ return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
+ net, NULL, skb, skb->dev, dev,
ip6mr_forward2_finish);
out_free:
diff --git a/kernel/net/ipv6/ipv6_sockglue.c b/kernel/net/ipv6/ipv6_sockglue.c
index 63e695691..4449ad1f8 100644
--- a/kernel/net/ipv6/ipv6_sockglue.c
+++ b/kernel/net/ipv6/ipv6_sockglue.c
@@ -111,7 +111,8 @@ struct ipv6_txoptions *ipv6_update_options(struct sock *sk,
icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
}
}
- opt = xchg(&inet6_sk(sk)->opt, opt);
+ opt = xchg((__force struct ipv6_txoptions **)&inet6_sk(sk)->opt,
+ opt);
sk_dst_reset(sk);
return opt;
@@ -231,9 +232,12 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
sk->sk_socket->ops = &inet_dgram_ops;
sk->sk_family = PF_INET;
}
- opt = xchg(&np->opt, NULL);
- if (opt)
- sock_kfree_s(sk, opt, opt->tot_len);
+ opt = xchg((__force struct ipv6_txoptions **)&np->opt,
+ NULL);
+ if (opt) {
+ atomic_sub(opt->tot_len, &sk->sk_omem_alloc);
+ txopt_put(opt);
+ }
pktopt = xchg(&np->pktoptions, NULL);
kfree_skb(pktopt);
@@ -403,7 +407,8 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
if (optname != IPV6_RTHDR && !ns_capable(net->user_ns, CAP_NET_RAW))
break;
- opt = ipv6_renew_options(sk, np->opt, optname,
+ opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk));
+ opt = ipv6_renew_options(sk, opt, optname,
(struct ipv6_opt_hdr __user *)optval,
optlen);
if (IS_ERR(opt)) {
@@ -432,8 +437,10 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
retv = 0;
opt = ipv6_update_options(sk, opt);
sticky_done:
- if (opt)
- sock_kfree_s(sk, opt, opt->tot_len);
+ if (opt) {
+ atomic_sub(opt->tot_len, &sk->sk_omem_alloc);
+ txopt_put(opt);
+ }
break;
}
@@ -486,6 +493,7 @@ sticky_done:
break;
memset(opt, 0, sizeof(*opt));
+ atomic_set(&opt->refcnt, 1);
opt->tot_len = sizeof(*opt) + optlen;
retv = -EFAULT;
if (copy_from_user(opt+1, optval, optlen))
@@ -502,8 +510,10 @@ update:
retv = 0;
opt = ipv6_update_options(sk, opt);
done:
- if (opt)
- sock_kfree_s(sk, opt, opt->tot_len);
+ if (opt) {
+ atomic_sub(opt->tot_len, &sk->sk_omem_alloc);
+ txopt_put(opt);
+ }
break;
}
case IPV6_UNICAST_HOPS:
@@ -1110,10 +1120,11 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
case IPV6_RTHDR:
case IPV6_DSTOPTS:
{
+ struct ipv6_txoptions *opt;
lock_sock(sk);
- len = ipv6_getsockopt_sticky(sk, np->opt,
- optname, optval, len);
+ opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk));
+ len = ipv6_getsockopt_sticky(sk, opt, optname, optval, len);
release_sock(sk);
/* check if ipv6_getsockopt_sticky() returns err code */
if (len < 0)
diff --git a/kernel/net/ipv6/mcast.c b/kernel/net/ipv6/mcast.c
index 083b2927f..5ee56d0a8 100644
--- a/kernel/net/ipv6/mcast.c
+++ b/kernel/net/ipv6/mcast.c
@@ -1645,13 +1645,12 @@ static void mld_sendpack(struct sk_buff *skb)
payload_len = skb->len;
err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
- net->ipv6.igmp_sk, skb, NULL, skb->dev,
- dst_output_sk);
+ net, net->ipv6.igmp_sk, skb, NULL, skb->dev,
+ dst_output);
out:
if (!err) {
ICMP6MSGOUT_INC_STATS(net, idev, ICMPV6_MLD2_REPORT);
ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
- IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, payload_len);
} else {
IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
}
@@ -2008,13 +2007,13 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
}
skb_dst_set(skb, dst);
- err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, sk, skb,
- NULL, skb->dev, dst_output_sk);
+ err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
+ net, sk, skb, NULL, skb->dev,
+ dst_output);
out:
if (!err) {
ICMP6MSGOUT_INC_STATS(net, idev, type);
ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
- IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, full_len);
} else
IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
diff --git a/kernel/net/ipv6/mcast_snoop.c b/kernel/net/ipv6/mcast_snoop.c
new file mode 100644
index 000000000..9405b04ee
--- /dev/null
+++ b/kernel/net/ipv6/mcast_snoop.c
@@ -0,0 +1,216 @@
+/* Copyright (C) 2010: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
+ * Copyright (C) 2015: Linus Lüssing <linus.luessing@c0d3.blue>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * Based on the MLD support added to br_multicast.c by YOSHIFUJI Hideaki.
+ */
+
+#include <linux/skbuff.h>
+#include <net/ipv6.h>
+#include <net/mld.h>
+#include <net/addrconf.h>
+#include <net/ip6_checksum.h>
+
+static int ipv6_mc_check_ip6hdr(struct sk_buff *skb)
+{
+ const struct ipv6hdr *ip6h;
+ unsigned int len;
+ unsigned int offset = skb_network_offset(skb) + sizeof(*ip6h);
+
+ if (!pskb_may_pull(skb, offset))
+ return -EINVAL;
+
+ ip6h = ipv6_hdr(skb);
+
+ if (ip6h->version != 6)
+ return -EINVAL;
+
+ len = offset + ntohs(ip6h->payload_len);
+ if (skb->len < len || len <= offset)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int ipv6_mc_check_exthdrs(struct sk_buff *skb)
+{
+ const struct ipv6hdr *ip6h;
+ int offset;
+ u8 nexthdr;
+ __be16 frag_off;
+
+ ip6h = ipv6_hdr(skb);
+
+ if (ip6h->nexthdr != IPPROTO_HOPOPTS)
+ return -ENOMSG;
+
+ nexthdr = ip6h->nexthdr;
+ offset = skb_network_offset(skb) + sizeof(*ip6h);
+ offset = ipv6_skip_exthdr(skb, offset, &nexthdr, &frag_off);
+
+ if (offset < 0)
+ return -EINVAL;
+
+ if (nexthdr != IPPROTO_ICMPV6)
+ return -ENOMSG;
+
+ skb_set_transport_header(skb, offset);
+
+ return 0;
+}
+
+static int ipv6_mc_check_mld_reportv2(struct sk_buff *skb)
+{
+ unsigned int len = skb_transport_offset(skb);
+
+ len += sizeof(struct mld2_report);
+
+ return pskb_may_pull(skb, len) ? 0 : -EINVAL;
+}
+
+static int ipv6_mc_check_mld_query(struct sk_buff *skb)
+{
+ struct mld_msg *mld;
+ unsigned int len = skb_transport_offset(skb);
+
+ /* RFC2710+RFC3810 (MLDv1+MLDv2) require link-local source addresses */
+ if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL))
+ return -EINVAL;
+
+ len += sizeof(struct mld_msg);
+ if (skb->len < len)
+ return -EINVAL;
+
+ /* MLDv1? */
+ if (skb->len != len) {
+ /* or MLDv2? */
+ len += sizeof(struct mld2_query) - sizeof(struct mld_msg);
+ if (skb->len < len || !pskb_may_pull(skb, len))
+ return -EINVAL;
+ }
+
+ mld = (struct mld_msg *)skb_transport_header(skb);
+
+ /* RFC2710+RFC3810 (MLDv1+MLDv2) require the multicast link layer
+ * all-nodes destination address (ff02::1) for general queries
+ */
+ if (ipv6_addr_any(&mld->mld_mca) &&
+ !ipv6_addr_is_ll_all_nodes(&ipv6_hdr(skb)->daddr))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int ipv6_mc_check_mld_msg(struct sk_buff *skb)
+{
+ struct mld_msg *mld = (struct mld_msg *)skb_transport_header(skb);
+
+ switch (mld->mld_type) {
+ case ICMPV6_MGM_REDUCTION:
+ case ICMPV6_MGM_REPORT:
+ /* fall through */
+ return 0;
+ case ICMPV6_MLD2_REPORT:
+ return ipv6_mc_check_mld_reportv2(skb);
+ case ICMPV6_MGM_QUERY:
+ return ipv6_mc_check_mld_query(skb);
+ default:
+ return -ENOMSG;
+ }
+}
+
+static inline __sum16 ipv6_mc_validate_checksum(struct sk_buff *skb)
+{
+ return skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo);
+}
+
+static int __ipv6_mc_check_mld(struct sk_buff *skb,
+ struct sk_buff **skb_trimmed)
+
+{
+ struct sk_buff *skb_chk = NULL;
+ unsigned int transport_len;
+ unsigned int len = skb_transport_offset(skb) + sizeof(struct mld_msg);
+ int ret = -EINVAL;
+
+ transport_len = ntohs(ipv6_hdr(skb)->payload_len);
+ transport_len -= skb_transport_offset(skb) - sizeof(struct ipv6hdr);
+
+ skb_chk = skb_checksum_trimmed(skb, transport_len,
+ ipv6_mc_validate_checksum);
+ if (!skb_chk)
+ goto err;
+
+ if (!pskb_may_pull(skb_chk, len))
+ goto err;
+
+ ret = ipv6_mc_check_mld_msg(skb_chk);
+ if (ret)
+ goto err;
+
+ if (skb_trimmed)
+ *skb_trimmed = skb_chk;
+ /* free now unneeded clone */
+ else if (skb_chk != skb)
+ kfree_skb(skb_chk);
+
+ ret = 0;
+
+err:
+ if (ret && skb_chk && skb_chk != skb)
+ kfree_skb(skb_chk);
+
+ return ret;
+}
+
+/**
+ * ipv6_mc_check_mld - checks whether this is a sane MLD packet
+ * @skb: the skb to validate
+ * @skb_trimmed: to store an skb pointer trimmed to IPv6 packet tail (optional)
+ *
+ * Checks whether an IPv6 packet is a valid MLD packet. If so sets
+ * skb transport header accordingly and returns zero.
+ *
+ * -EINVAL: A broken packet was detected, i.e. it violates some internet
+ * standard
+ * -ENOMSG: IP header validation succeeded but it is not an MLD packet.
+ * -ENOMEM: A memory allocation failure happened.
+ *
+ * Optionally, an skb pointer might be provided via skb_trimmed (or set it
+ * to NULL): After parsing an MLD packet successfully it will point to
+ * an skb which has its tail aligned to the IP packet end. This might
+ * either be the originally provided skb or a trimmed, cloned version if
+ * the skb frame had data beyond the IP packet. A cloned skb allows us
+ * to leave the original skb and its full frame unchanged (which might be
+ * desirable for layer 2 frame jugglers).
+ *
+ * Caller needs to set the skb network header and free any returned skb if it
+ * differs from the provided skb.
+ */
+int ipv6_mc_check_mld(struct sk_buff *skb, struct sk_buff **skb_trimmed)
+{
+ int ret;
+
+ ret = ipv6_mc_check_ip6hdr(skb);
+ if (ret < 0)
+ return ret;
+
+ ret = ipv6_mc_check_exthdrs(skb);
+ if (ret < 0)
+ return ret;
+
+ return __ipv6_mc_check_mld(skb, skb_trimmed);
+}
+EXPORT_SYMBOL(ipv6_mc_check_mld);
diff --git a/kernel/net/ipv6/mip6.c b/kernel/net/ipv6/mip6.c
index b9779d441..60c79a08e 100644
--- a/kernel/net/ipv6/mip6.c
+++ b/kernel/net/ipv6/mip6.c
@@ -118,7 +118,7 @@ static int mip6_mh_filter(struct sock *sk, struct sk_buff *skb)
struct mip6_report_rate_limiter {
spinlock_t lock;
- struct timeval stamp;
+ ktime_t stamp;
int iif;
struct in6_addr src;
struct in6_addr dst;
@@ -184,20 +184,18 @@ static int mip6_destopt_output(struct xfrm_state *x, struct sk_buff *skb)
return 0;
}
-static inline int mip6_report_rl_allow(struct timeval *stamp,
+static inline int mip6_report_rl_allow(ktime_t stamp,
const struct in6_addr *dst,
const struct in6_addr *src, int iif)
{
int allow = 0;
spin_lock_bh(&mip6_report_rl.lock);
- if (mip6_report_rl.stamp.tv_sec != stamp->tv_sec ||
- mip6_report_rl.stamp.tv_usec != stamp->tv_usec ||
+ if (!ktime_equal(mip6_report_rl.stamp, stamp) ||
mip6_report_rl.iif != iif ||
!ipv6_addr_equal(&mip6_report_rl.src, src) ||
!ipv6_addr_equal(&mip6_report_rl.dst, dst)) {
- mip6_report_rl.stamp.tv_sec = stamp->tv_sec;
- mip6_report_rl.stamp.tv_usec = stamp->tv_usec;
+ mip6_report_rl.stamp = stamp;
mip6_report_rl.iif = iif;
mip6_report_rl.src = *src;
mip6_report_rl.dst = *dst;
@@ -216,7 +214,7 @@ static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb,
struct ipv6_destopt_hao *hao = NULL;
struct xfrm_selector sel;
int offset;
- struct timeval stamp;
+ ktime_t stamp;
int err = 0;
if (unlikely(fl6->flowi6_proto == IPPROTO_MH &&
@@ -230,9 +228,9 @@ static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb,
(skb_network_header(skb) + offset);
}
- skb_get_timestamp(skb, &stamp);
+ stamp = skb_get_ktime(skb);
- if (!mip6_report_rl_allow(&stamp, &ipv6_hdr(skb)->daddr,
+ if (!mip6_report_rl_allow(stamp, &ipv6_hdr(skb)->daddr,
hao ? &hao->addr : &ipv6_hdr(skb)->saddr,
opt->iif))
goto out;
diff --git a/kernel/net/ipv6/ndisc.c b/kernel/net/ipv6/ndisc.c
index 96f153c08..84afb9a77 100644
--- a/kernel/net/ipv6/ndisc.c
+++ b/kernel/net/ipv6/ndisc.c
@@ -67,6 +67,7 @@
#include <net/flow.h>
#include <net/ip6_checksum.h>
#include <net/inet_common.h>
+#include <net/l3mdev.h>
#include <linux/proc_fs.h>
#include <linux/netfilter.h>
@@ -147,6 +148,7 @@ struct neigh_table nd_tbl = {
.gc_thresh2 = 512,
.gc_thresh3 = 1024,
};
+EXPORT_SYMBOL_GPL(nd_tbl);
static void ndisc_fill_addr_option(struct sk_buff *skb, int type, void *data)
{
@@ -441,8 +443,11 @@ static void ndisc_send_skb(struct sk_buff *skb,
if (!dst) {
struct flowi6 fl6;
+ int oif = l3mdev_fib_oif(skb->dev);
- icmpv6_flow_init(sk, &fl6, type, saddr, daddr, skb->dev->ifindex);
+ icmpv6_flow_init(sk, &fl6, type, saddr, daddr, oif);
+ if (oif != skb->dev->ifindex)
+ fl6.flowi6_flags |= FLOWI_FLAG_L3MDEV_SRC;
dst = icmp6_dst_alloc(skb->dev, &fl6);
if (IS_ERR(dst)) {
kfree_skb(skb);
@@ -463,9 +468,9 @@ static void ndisc_send_skb(struct sk_buff *skb,
idev = __in6_dev_get(dst->dev);
IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
- err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, sk, skb,
- NULL, dst->dev,
- dst_output_sk);
+ err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
+ net, sk, skb, NULL, dst->dev,
+ dst_output);
if (!err) {
ICMP6MSGOUT_INC_STATS(net, idev, type);
ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
@@ -474,8 +479,7 @@ static void ndisc_send_skb(struct sk_buff *skb,
rcu_read_unlock();
}
-void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
- const struct in6_addr *daddr,
+void ndisc_send_na(struct net_device *dev, const struct in6_addr *daddr,
const struct in6_addr *solicited_addr,
bool router, bool solicited, bool override, bool inc_opt)
{
@@ -541,7 +545,7 @@ static void ndisc_send_unsol_na(struct net_device *dev)
read_lock_bh(&idev->lock);
list_for_each_entry(ifa, &idev->addr_list, if_list) {
- ndisc_send_na(dev, NULL, &in6addr_linklocal_allnodes, &ifa->addr,
+ ndisc_send_na(dev, &in6addr_linklocal_allnodes, &ifa->addr,
/*router=*/ !!idev->cnf.forwarding,
/*solicited=*/ false, /*override=*/ true,
/*inc_opt=*/ true);
@@ -551,8 +555,7 @@ static void ndisc_send_unsol_na(struct net_device *dev)
in6_dev_put(idev);
}
-void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh,
- const struct in6_addr *solicit,
+void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit,
const struct in6_addr *daddr, const struct in6_addr *saddr)
{
struct sk_buff *skb;
@@ -675,12 +678,12 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
"%s: trying to ucast probe in NUD_INVALID: %pI6\n",
__func__, target);
}
- ndisc_send_ns(dev, neigh, target, target, saddr);
+ ndisc_send_ns(dev, target, target, saddr);
} else if ((probes -= NEIGH_VAR(neigh->parms, APP_PROBES)) < 0) {
neigh_app_ns(neigh);
} else {
addrconf_addr_solict_mult(target, &mcaddr);
- ndisc_send_ns(dev, NULL, target, &mcaddr, saddr);
+ ndisc_send_ns(dev, target, &mcaddr, saddr);
}
}
@@ -764,7 +767,7 @@ static void ndisc_recv_ns(struct sk_buff *skb)
ifp = ipv6_get_ifaddr(dev_net(dev), &msg->target, dev, 1);
if (ifp) {
-
+have_ifp:
if (ifp->flags & (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)) {
if (dad) {
/*
@@ -790,6 +793,18 @@ static void ndisc_recv_ns(struct sk_buff *skb)
} else {
struct net *net = dev_net(dev);
+ /* perhaps an address on the master device */
+ if (netif_is_l3_slave(dev)) {
+ struct net_device *mdev;
+
+ mdev = netdev_master_upper_dev_get_rcu(dev);
+ if (mdev) {
+ ifp = ipv6_get_ifaddr(net, &msg->target, mdev, 1);
+ if (ifp)
+ goto have_ifp;
+ }
+ }
+
idev = in6_dev_get(dev);
if (!idev) {
/* XXX: count this drop? */
@@ -824,7 +839,7 @@ static void ndisc_recv_ns(struct sk_buff *skb)
is_router = idev->cnf.forwarding;
if (dad) {
- ndisc_send_na(dev, NULL, &in6addr_linklocal_allnodes, &msg->target,
+ ndisc_send_na(dev, &in6addr_linklocal_allnodes, &msg->target,
!!is_router, false, (ifp != NULL), true);
goto out;
}
@@ -845,8 +860,7 @@ static void ndisc_recv_ns(struct sk_buff *skb)
NEIGH_UPDATE_F_WEAK_OVERRIDE|
NEIGH_UPDATE_F_OVERRIDE);
if (neigh || !dev->header_ops) {
- ndisc_send_na(dev, neigh, saddr, &msg->target,
- !!is_router,
+ ndisc_send_na(dev, saddr, &msg->target, !!is_router,
true, (ifp != NULL && inc), inc);
if (neigh)
neigh_release(neigh);
@@ -1074,6 +1088,8 @@ static void ndisc_router_discovery(struct sk_buff *skb)
struct ndisc_options ndopts;
int optlen;
unsigned int pref = 0;
+ __u32 old_if_flags;
+ bool send_ifinfo_notify = false;
__u8 *opt = (__u8 *)(ra_msg + 1);
@@ -1144,6 +1160,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
* Remember the managed/otherconf flags from most recently
* received RA message (RFC 2462) -- yoshfuji
*/
+ old_if_flags = in6_dev->if_flags;
in6_dev->if_flags = (in6_dev->if_flags & ~(IF_RA_MANAGED |
IF_RA_OTHERCONF)) |
(ra_msg->icmph.icmp6_addrconf_managed ?
@@ -1151,6 +1168,9 @@ static void ndisc_router_discovery(struct sk_buff *skb)
(ra_msg->icmph.icmp6_addrconf_other ?
IF_RA_OTHERCONF : 0);
+ if (old_if_flags != in6_dev->if_flags)
+ send_ifinfo_notify = true;
+
if (!in6_dev->cnf.accept_ra_defrtr) {
ND_PRINTK(2, info,
"RA: %s, defrtr is false for dev: %s\n",
@@ -1163,7 +1183,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
*/
if (!in6_dev->cnf.accept_ra_from_local &&
ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr,
- NULL, 0)) {
+ in6_dev->dev, 0)) {
ND_PRINTK(2, info,
"RA from local address detected on dev: %s: default router ignored\n",
skb->dev->name);
@@ -1225,18 +1245,16 @@ static void ndisc_router_discovery(struct sk_buff *skb)
if (rt)
rt6_set_expires(rt, jiffies + (HZ * lifetime));
- if (ra_msg->icmph.icmp6_hop_limit) {
- /* Only set hop_limit on the interface if it is higher than
- * the current hop_limit.
- */
- if (in6_dev->cnf.hop_limit < ra_msg->icmph.icmp6_hop_limit) {
+ if (in6_dev->cnf.accept_ra_min_hop_limit < 256 &&
+ ra_msg->icmph.icmp6_hop_limit) {
+ if (in6_dev->cnf.accept_ra_min_hop_limit <= ra_msg->icmph.icmp6_hop_limit) {
in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit;
+ if (rt)
+ dst_metric_set(&rt->dst, RTAX_HOPLIMIT,
+ ra_msg->icmph.icmp6_hop_limit);
} else {
- ND_PRINTK(2, warn, "RA: Got route advertisement with lower hop_limit than current\n");
+ ND_PRINTK(2, warn, "RA: Got route advertisement with lower hop_limit than minimum\n");
}
- if (rt)
- dst_metric_set(&rt->dst, RTAX_HOPLIMIT,
- ra_msg->icmph.icmp6_hop_limit);
}
skip_defrtr:
@@ -1254,7 +1272,7 @@ skip_defrtr:
rtime = HZ/10;
NEIGH_VAR_SET(in6_dev->nd_parms, RETRANS_TIME, rtime);
in6_dev->tstamp = jiffies;
- inet6_ifinfo_notify(RTM_NEWLINK, in6_dev);
+ send_ifinfo_notify = true;
}
rtime = ntohl(ra_msg->reachable_time);
@@ -1271,11 +1289,17 @@ skip_defrtr:
GC_STALETIME, 3 * rtime);
in6_dev->nd_parms->reachable_time = neigh_rand_reach_time(rtime);
in6_dev->tstamp = jiffies;
- inet6_ifinfo_notify(RTM_NEWLINK, in6_dev);
+ send_ifinfo_notify = true;
}
}
}
+ /*
+ * Send a notify if RA changed managed/otherconf flags or timer settings
+ */
+ if (send_ifinfo_notify)
+ inet6_ifinfo_notify(RTM_NEWLINK, in6_dev);
+
skip_linkparms:
/*
@@ -1313,7 +1337,7 @@ skip_linkparms:
#ifdef CONFIG_IPV6_ROUTE_INFO
if (!in6_dev->cnf.accept_ra_from_local &&
ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr,
- NULL, 0)) {
+ in6_dev->dev, 0)) {
ND_PRINTK(2, info,
"RA from local address detected on dev: %s: router info ignored.\n",
skb->dev->name);
@@ -1472,6 +1496,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
struct flowi6 fl6;
int rd_len;
u8 ha_buf[MAX_ADDR_LEN], *ha = NULL;
+ int oif = l3mdev_fib_oif(dev);
bool ret;
if (ipv6_get_lladdr(dev, &saddr_buf, IFA_F_TENTATIVE)) {
@@ -1488,7 +1513,10 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
}
icmpv6_flow_init(sk, &fl6, NDISC_REDIRECT,
- &saddr_buf, &ipv6_hdr(skb)->saddr, dev->ifindex);
+ &saddr_buf, &ipv6_hdr(skb)->saddr, oif);
+
+ if (oif != skb->dev->ifindex)
+ fl6.flowi6_flags |= FLOWI_FLAG_L3MDEV_SRC;
dst = ip6_route_output(net, NULL, &fl6);
if (dst->error) {
@@ -1506,7 +1534,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
"Redirect: destination is not a neighbour\n");
goto release;
}
- peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
+ peer = inet_getpeer_v6(net->ipv6.peers, &ipv6_hdr(skb)->saddr, 1);
ret = inet_peer_xrlim_allow(peer, 1*HZ);
if (peer)
inet_putpeer(peer);
@@ -1650,6 +1678,7 @@ int ndisc_rcv(struct sk_buff *skb)
static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct netdev_notifier_change_info *change_info;
struct net *net = dev_net(dev);
struct inet6_dev *idev;
@@ -1664,6 +1693,11 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event,
ndisc_send_unsol_na(dev);
in6_dev_put(idev);
break;
+ case NETDEV_CHANGE:
+ change_info = ptr;
+ if (change_info->flags_changed & IFF_NOARP)
+ neigh_changeaddr(&nd_tbl, dev);
+ break;
case NETDEV_DOWN:
neigh_ifdown(&nd_tbl, dev);
fib6_run_gc(0, net, false);
diff --git a/kernel/net/ipv6/netfilter.c b/kernel/net/ipv6/netfilter.c
index d958718b5..d11c46833 100644
--- a/kernel/net/ipv6/netfilter.c
+++ b/kernel/net/ipv6/netfilter.c
@@ -18,9 +18,8 @@
#include <net/ip6_checksum.h>
#include <net/netfilter/nf_queue.h>
-int ip6_route_me_harder(struct sk_buff *skb)
+int ip6_route_me_harder(struct net *net, struct sk_buff *skb)
{
- struct net *net = dev_net(skb_dst(skb)->dev);
const struct ipv6hdr *iph = ipv6_hdr(skb);
unsigned int hh_len;
struct dst_entry *dst;
@@ -93,7 +92,7 @@ static void nf_ip6_saveroute(const struct sk_buff *skb,
}
}
-static int nf_ip6_reroute(struct sk_buff *skb,
+static int nf_ip6_reroute(struct net *net, struct sk_buff *skb,
const struct nf_queue_entry *entry)
{
struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry);
@@ -103,7 +102,7 @@ static int nf_ip6_reroute(struct sk_buff *skb,
if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) ||
!ipv6_addr_equal(&iph->saddr, &rt_info->saddr) ||
skb->mark != rt_info->mark)
- return ip6_route_me_harder(skb);
+ return ip6_route_me_harder(net, skb);
}
return 0;
}
@@ -191,6 +190,8 @@ static __sum16 nf_ip6_checksum_partial(struct sk_buff *skb, unsigned int hook,
static const struct nf_ipv6_ops ipv6ops = {
.chk_addr = ipv6_chk_addr,
+ .route_input = ip6_route_input,
+ .fragment = ip6_fragment
};
static const struct nf_afinfo nf_ip6_afinfo = {
diff --git a/kernel/net/ipv6/netfilter/Kconfig b/kernel/net/ipv6/netfilter/Kconfig
index ca6998345..e10a04c9c 100644
--- a/kernel/net/ipv6/netfilter/Kconfig
+++ b/kernel/net/ipv6/netfilter/Kconfig
@@ -47,9 +47,23 @@ config NFT_REJECT_IPV6
default NFT_REJECT
tristate
+config NFT_DUP_IPV6
+ tristate "IPv6 nf_tables packet duplication support"
+ depends on !NF_CONNTRACK || NF_CONNTRACK
+ select NF_DUP_IPV6
+ help
+ This module enables IPv6 packet duplication support for nf_tables.
+
endif # NF_TABLES_IPV6
endif # NF_TABLES
+config NF_DUP_IPV6
+ tristate "Netfilter IPv6 packet duplication to alternate destination"
+ depends on !NF_CONNTRACK || NF_CONNTRACK
+ help
+ This option enables the nf_dup_ipv6 core, which duplicates an IPv6
+ packet to be rerouted to another destination.
+
config NF_REJECT_IPV6
tristate "IPv6 packet rejection"
default m if NETFILTER_ADVANCED=n
@@ -186,7 +200,8 @@ config IP6_NF_MATCH_MH
config IP6_NF_MATCH_RPFILTER
tristate '"rpfilter" reverse path filter match support'
- depends on NETFILTER_ADVANCED && (IP6_NF_MANGLE || IP6_NF_RAW)
+ depends on NETFILTER_ADVANCED
+ depends on IP6_NF_MANGLE || IP6_NF_RAW
---help---
This option allows you to match packets whose replies would
go out via the interface the packet came in.
diff --git a/kernel/net/ipv6/netfilter/Makefile b/kernel/net/ipv6/netfilter/Makefile
index c36e0a549..b4f7d0b4e 100644
--- a/kernel/net/ipv6/netfilter/Makefile
+++ b/kernel/net/ipv6/netfilter/Makefile
@@ -30,6 +30,8 @@ obj-$(CONFIG_NF_LOG_IPV6) += nf_log_ipv6.o
# reject
obj-$(CONFIG_NF_REJECT_IPV6) += nf_reject_ipv6.o
+obj-$(CONFIG_NF_DUP_IPV6) += nf_dup_ipv6.o
+
# nf_tables
obj-$(CONFIG_NF_TABLES_IPV6) += nf_tables_ipv6.o
obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV6) += nft_chain_route_ipv6.o
@@ -37,6 +39,7 @@ obj-$(CONFIG_NFT_CHAIN_NAT_IPV6) += nft_chain_nat_ipv6.o
obj-$(CONFIG_NFT_REJECT_IPV6) += nft_reject_ipv6.o
obj-$(CONFIG_NFT_MASQ_IPV6) += nft_masq_ipv6.o
obj-$(CONFIG_NFT_REDIR_IPV6) += nft_redir_ipv6.o
+obj-$(CONFIG_NFT_DUP_IPV6) += nft_dup_ipv6.o
# matches
obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o
diff --git a/kernel/net/ipv6/netfilter/ip6_tables.c b/kernel/net/ipv6/netfilter/ip6_tables.c
index 62f5b0d0b..99425cf28 100644
--- a/kernel/net/ipv6/netfilter/ip6_tables.c
+++ b/kernel/net/ipv6/netfilter/ip6_tables.c
@@ -117,7 +117,7 @@ ip6_packet_match(const struct sk_buff *skb,
if (FWINV(ret != 0, IP6T_INV_VIA_IN)) {
dprintf("VIA in mismatch (%s vs %s).%s\n",
indev, ip6info->iniface,
- ip6info->invflags&IP6T_INV_VIA_IN ?" (INV)":"");
+ ip6info->invflags & IP6T_INV_VIA_IN ? " (INV)" : "");
return false;
}
@@ -126,14 +126,14 @@ ip6_packet_match(const struct sk_buff *skb,
if (FWINV(ret != 0, IP6T_INV_VIA_OUT)) {
dprintf("VIA out mismatch (%s vs %s).%s\n",
outdev, ip6info->outiface,
- ip6info->invflags&IP6T_INV_VIA_OUT ?" (INV)":"");
+ ip6info->invflags & IP6T_INV_VIA_OUT ? " (INV)" : "");
return false;
}
/* ... might want to do something with class and flowlabel here ... */
/* look for the desired protocol header */
- if((ip6info->flags & IP6T_F_PROTO)) {
+ if (ip6info->flags & IP6T_F_PROTO) {
int protohdr;
unsigned short _frag_off;
@@ -151,9 +151,9 @@ ip6_packet_match(const struct sk_buff *skb,
ip6info->proto);
if (ip6info->proto == protohdr) {
- if(ip6info->invflags & IP6T_INV_PROTO) {
+ if (ip6info->invflags & IP6T_INV_PROTO)
return false;
- }
+
return true;
}
@@ -275,7 +275,8 @@ get_chainname_rulenum(const struct ip6t_entry *s, const struct ip6t_entry *e,
return 0;
}
-static void trace_packet(const struct sk_buff *skb,
+static void trace_packet(struct net *net,
+ const struct sk_buff *skb,
unsigned int hook,
const struct net_device *in,
const struct net_device *out,
@@ -283,15 +284,12 @@ static void trace_packet(const struct sk_buff *skb,
const struct xt_table_info *private,
const struct ip6t_entry *e)
{
- const void *table_base;
const struct ip6t_entry *root;
const char *hookname, *chainname, *comment;
const struct ip6t_entry *iter;
unsigned int rulenum = 0;
- struct net *net = dev_net(in ? in : out);
- table_base = private->entries[smp_processor_id()];
- root = get_entry(table_base, private->hook_entry[hook]);
+ root = get_entry(private->entries, private->hook_entry[hook]);
hookname = chainname = hooknames[hook];
comment = comments[NF_IP6_TRACE_COMMENT_RULE];
@@ -307,7 +305,7 @@ static void trace_packet(const struct sk_buff *skb,
}
#endif
-static inline __pure struct ip6t_entry *
+static inline struct ip6t_entry *
ip6t_next_entry(const struct ip6t_entry *entry)
{
return (void *)entry + entry->next_offset;
@@ -316,22 +314,23 @@ ip6t_next_entry(const struct ip6t_entry *entry)
/* Returns one of the generic firewall policies, like NF_ACCEPT. */
unsigned int
ip6t_do_table(struct sk_buff *skb,
- unsigned int hook,
const struct nf_hook_state *state,
struct xt_table *table)
{
+ unsigned int hook = state->hook;
static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
/* Initializing verdict to NF_DROP keeps gcc happy. */
unsigned int verdict = NF_DROP;
const char *indev, *outdev;
const void *table_base;
struct ip6t_entry *e, **jumpstack;
- unsigned int *stackptr, origptr, cpu;
+ unsigned int stackidx, cpu;
const struct xt_table_info *private;
struct xt_action_param acpar;
unsigned int addend;
/* Initialization */
+ stackidx = 0;
indev = state->in ? state->in->name : nulldevname;
outdev = state->out ? state->out->name : nulldevname;
/* We handle fragments by dealing with the first fragment as
@@ -341,6 +340,7 @@ ip6t_do_table(struct sk_buff *skb,
* rule is also a fragment-specific rule, non-fragments won't
* match it. */
acpar.hotdrop = false;
+ acpar.net = state->net;
acpar.in = state->in;
acpar.out = state->out;
acpar.family = NFPROTO_IPV6;
@@ -357,16 +357,25 @@ ip6t_do_table(struct sk_buff *skb,
*/
smp_read_barrier_depends();
cpu = smp_processor_id();
- table_base = private->entries[cpu];
+ table_base = private->entries;
jumpstack = (struct ip6t_entry **)private->jumpstack[cpu];
- stackptr = per_cpu_ptr(private->stackptr, cpu);
- origptr = *stackptr;
+
+ /* Switch to alternate jumpstack if we're being invoked via TEE.
+ * TEE issues XT_CONTINUE verdict on original skb so we must not
+ * clobber the jumpstack.
+ *
+ * For recursion via REJECT or SYNPROXY the stack will be clobbered
+ * but it is no problem since absolute verdict is issued by these.
+ */
+ if (static_key_false(&xt_tee_enabled))
+ jumpstack += private->stacksize * __this_cpu_read(nf_skb_duplicated);
e = get_entry(table_base, private->hook_entry[hook]);
do {
const struct xt_entry_target *t;
const struct xt_entry_match *ematch;
+ struct xt_counters *counter;
IP_NF_ASSERT(e);
acpar.thoff = 0;
@@ -384,7 +393,8 @@ ip6t_do_table(struct sk_buff *skb,
goto no_match;
}
- ADD_COUNTER(e->counters, skb->len, 1);
+ counter = xt_get_this_cpu_counter(&e->counters);
+ ADD_COUNTER(*counter, skb->len, 1);
t = ip6t_get_target_c(e);
IP_NF_ASSERT(t->u.kernel.target);
@@ -392,8 +402,8 @@ ip6t_do_table(struct sk_buff *skb,
#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
/* The packet is traced: log it */
if (unlikely(skb->nf_trace))
- trace_packet(skb, hook, state->in, state->out,
- table->name, private, e);
+ trace_packet(state->net, skb, hook, state->in,
+ state->out, table->name, private, e);
#endif
/* Standard target? */
if (!t->u.kernel.target->target) {
@@ -406,20 +416,16 @@ ip6t_do_table(struct sk_buff *skb,
verdict = (unsigned int)(-v) - 1;
break;
}
- if (*stackptr <= origptr)
+ if (stackidx == 0)
e = get_entry(table_base,
private->underflow[hook]);
else
- e = ip6t_next_entry(jumpstack[--*stackptr]);
+ e = ip6t_next_entry(jumpstack[--stackidx]);
continue;
}
if (table_base + v != ip6t_next_entry(e) &&
!(e->ipv6.flags & IP6T_F_GOTO)) {
- if (*stackptr >= private->stacksize) {
- verdict = NF_DROP;
- break;
- }
- jumpstack[(*stackptr)++] = e;
+ jumpstack[stackidx++] = e;
}
e = get_entry(table_base, v);
@@ -437,10 +443,8 @@ ip6t_do_table(struct sk_buff *skb,
break;
} while (!acpar.hotdrop);
- *stackptr = origptr;
-
- xt_write_recseq_end(addend);
- local_bh_enable();
+ xt_write_recseq_end(addend);
+ local_bh_enable();
#ifdef DEBUG_ALLOW_ALL
return NF_ACCEPT;
@@ -557,7 +561,7 @@ mark_source_chains(const struct xt_table_info *newinfo,
pos = newpos;
}
}
- next:
+next:
duprintf("Finished chain %u\n", hook);
}
return 1;
@@ -679,6 +683,10 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
if (ret)
return ret;
+ e->counters.pcnt = xt_percpu_counter_alloc();
+ if (IS_ERR_VALUE(e->counters.pcnt))
+ return -ENOMEM;
+
j = 0;
mtpar.net = net;
mtpar.table = name;
@@ -714,6 +722,9 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
break;
cleanup_match(ematch, net);
}
+
+ xt_percpu_counter_free(e->counters.pcnt);
+
return ret;
}
@@ -797,13 +808,15 @@ static void cleanup_entry(struct ip6t_entry *e, struct net *net)
if (par.target->destroy != NULL)
par.target->destroy(&par);
module_put(par.target->me);
+
+ xt_percpu_counter_free(e->counters.pcnt);
}
/* Checks and translates the user-supplied table segment (held in
newinfo) */
static int
translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
- const struct ip6t_replace *repl)
+ const struct ip6t_replace *repl)
{
struct ip6t_entry *iter;
unsigned int i;
@@ -879,12 +892,6 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
return ret;
}
- /* And one copy for every other CPU */
- for_each_possible_cpu(i) {
- if (newinfo->entries[i] && newinfo->entries[i] != entry0)
- memcpy(newinfo->entries[i], entry0, newinfo->size);
- }
-
return ret;
}
@@ -900,14 +907,16 @@ get_counters(const struct xt_table_info *t,
seqcount_t *s = &per_cpu(xt_recseq, cpu);
i = 0;
- xt_entry_foreach(iter, t->entries[cpu], t->size) {
+ xt_entry_foreach(iter, t->entries, t->size) {
+ struct xt_counters *tmp;
u64 bcnt, pcnt;
unsigned int start;
+ tmp = xt_get_per_cpu_counter(&iter->counters, cpu);
do {
start = read_seqcount_begin(s);
- bcnt = iter->counters.bcnt;
- pcnt = iter->counters.pcnt;
+ bcnt = tmp->bcnt;
+ pcnt = tmp->pcnt;
} while (read_seqcount_retry(s, start));
ADD_COUNTER(counters[i], bcnt, pcnt);
@@ -952,11 +961,7 @@ copy_entries_to_user(unsigned int total_size,
if (IS_ERR(counters))
return PTR_ERR(counters);
- /* choose the copy that is on our node/cpu, ...
- * This choice is lazy (because current thread is
- * allowed to migrate to another cpu)
- */
- loc_cpu_entry = private->entries[raw_smp_processor_id()];
+ loc_cpu_entry = private->entries;
if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) {
ret = -EFAULT;
goto free_counters;
@@ -1064,16 +1069,16 @@ static int compat_table_info(const struct xt_table_info *info,
struct xt_table_info *newinfo)
{
struct ip6t_entry *iter;
- void *loc_cpu_entry;
+ const void *loc_cpu_entry;
int ret;
if (!newinfo || !info)
return -EINVAL;
- /* we dont care about newinfo->entries[] */
+ /* we dont care about newinfo->entries */
memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
newinfo->initial_entries = 0;
- loc_cpu_entry = info->entries[raw_smp_processor_id()];
+ loc_cpu_entry = info->entries;
xt_compat_init_offsets(AF_INET6, info->number);
xt_entry_foreach(iter, loc_cpu_entry, info->size) {
ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
@@ -1085,7 +1090,7 @@ static int compat_table_info(const struct xt_table_info *info,
#endif
static int get_info(struct net *net, void __user *user,
- const int *len, int compat)
+ const int *len, int compat)
{
char name[XT_TABLE_MAXNAMELEN];
struct xt_table *t;
@@ -1147,7 +1152,7 @@ static int get_info(struct net *net, void __user *user,
static int
get_entries(struct net *net, struct ip6t_get_entries __user *uptr,
- const int *len)
+ const int *len)
{
int ret;
struct ip6t_get_entries get;
@@ -1194,7 +1199,6 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
struct xt_table *t;
struct xt_table_info *oldinfo;
struct xt_counters *counters;
- const void *loc_cpu_old_entry;
struct ip6t_entry *iter;
ret = 0;
@@ -1237,8 +1241,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
get_counters(oldinfo, counters);
/* Decrease module usage counts and free resource */
- loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
- xt_entry_foreach(iter, loc_cpu_old_entry, oldinfo->size)
+ xt_entry_foreach(iter, oldinfo->entries, oldinfo->size)
cleanup_entry(iter, net);
xt_free_table_info(oldinfo);
@@ -1284,8 +1287,7 @@ do_replace(struct net *net, const void __user *user, unsigned int len)
if (!newinfo)
return -ENOMEM;
- /* choose the copy that is on our node/cpu */
- loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+ loc_cpu_entry = newinfo->entries;
if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
tmp.size) != 0) {
ret = -EFAULT;
@@ -1316,7 +1318,7 @@ static int
do_add_counters(struct net *net, const void __user *user, unsigned int len,
int compat)
{
- unsigned int i, curcpu;
+ unsigned int i;
struct xt_counters_info tmp;
struct xt_counters *paddc;
unsigned int num_counters;
@@ -1326,7 +1328,6 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len,
struct xt_table *t;
const struct xt_table_info *private;
int ret = 0;
- const void *loc_cpu_entry;
struct ip6t_entry *iter;
unsigned int addend;
#ifdef CONFIG_COMPAT
@@ -1374,7 +1375,6 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len,
goto free;
}
-
local_bh_disable();
private = t->private;
if (private->number != num_counters) {
@@ -1383,16 +1383,15 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len,
}
i = 0;
- /* Choose the copy that is on our node */
- curcpu = smp_processor_id();
addend = xt_write_recseq_begin();
- loc_cpu_entry = private->entries[curcpu];
- xt_entry_foreach(iter, loc_cpu_entry, private->size) {
- ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt);
+ xt_entry_foreach(iter, private->entries, private->size) {
+ struct xt_counters *tmp;
+
+ tmp = xt_get_this_cpu_counter(&iter->counters);
+ ADD_COUNTER(*tmp, paddc[i].bcnt, paddc[i].pcnt);
++i;
}
xt_write_recseq_end(addend);
-
unlock_up_free:
local_bh_enable();
xt_table_unlock(t);
@@ -1459,7 +1458,6 @@ static int
compat_find_calc_match(struct xt_entry_match *m,
const char *name,
const struct ip6t_ip6 *ipv6,
- unsigned int hookmask,
int *size)
{
struct xt_match *match;
@@ -1528,8 +1526,7 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e,
entry_offset = (void *)e - (void *)base;
j = 0;
xt_ematch_foreach(ematch, e) {
- ret = compat_find_calc_match(ematch, name,
- &e->ipv6, e->comefrom, &off);
+ ret = compat_find_calc_match(ematch, name, &e->ipv6, &off);
if (ret != 0)
goto release_matches;
++j;
@@ -1623,6 +1620,9 @@ static int compat_check_entry(struct ip6t_entry *e, struct net *net,
struct xt_mtchk_param mtpar;
struct xt_entry_match *ematch;
+ e->counters.pcnt = xt_percpu_counter_alloc();
+ if (IS_ERR_VALUE(e->counters.pcnt))
+ return -ENOMEM;
j = 0;
mtpar.net = net;
mtpar.table = name;
@@ -1647,6 +1647,9 @@ static int compat_check_entry(struct ip6t_entry *e, struct net *net,
break;
cleanup_match(ematch, net);
}
+
+ xt_percpu_counter_free(e->counters.pcnt);
+
return ret;
}
@@ -1731,7 +1734,7 @@ translate_compat_table(struct net *net,
newinfo->hook_entry[i] = info->hook_entry[i];
newinfo->underflow[i] = info->underflow[i];
}
- entry1 = newinfo->entries[raw_smp_processor_id()];
+ entry1 = newinfo->entries;
pos = entry1;
size = total_size;
xt_entry_foreach(iter0, entry0, total_size) {
@@ -1783,11 +1786,6 @@ translate_compat_table(struct net *net,
return ret;
}
- /* And one copy for every other CPU */
- for_each_possible_cpu(i)
- if (newinfo->entries[i] && newinfo->entries[i] != entry1)
- memcpy(newinfo->entries[i], entry1, newinfo->size);
-
*pinfo = newinfo;
*pentry0 = entry1;
xt_free_table_info(info);
@@ -1834,8 +1832,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
if (!newinfo)
return -ENOMEM;
- /* choose the copy that is on our node/cpu */
- loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+ loc_cpu_entry = newinfo->entries;
if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
tmp.size) != 0) {
ret = -EFAULT;
@@ -1906,7 +1903,6 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table,
void __user *pos;
unsigned int size;
int ret = 0;
- const void *loc_cpu_entry;
unsigned int i = 0;
struct ip6t_entry *iter;
@@ -1914,14 +1910,9 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table,
if (IS_ERR(counters))
return PTR_ERR(counters);
- /* choose the copy that is on our node/cpu, ...
- * This choice is lazy (because current thread is
- * allowed to migrate to another cpu)
- */
- loc_cpu_entry = private->entries[raw_smp_processor_id()];
pos = userptr;
size = total_size;
- xt_entry_foreach(iter, loc_cpu_entry, total_size) {
+ xt_entry_foreach(iter, private->entries, total_size) {
ret = compat_copy_entry_to_user(iter, &pos,
&size, counters, i++);
if (ret != 0)
@@ -2096,8 +2087,7 @@ struct xt_table *ip6t_register_table(struct net *net,
goto out;
}
- /* choose the copy on our node/cpu, but dont care about preemption */
- loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+ loc_cpu_entry = newinfo->entries;
memcpy(loc_cpu_entry, repl->entries, repl->size);
ret = translate_table(net, newinfo, loc_cpu_entry, repl);
@@ -2127,7 +2117,7 @@ void ip6t_unregister_table(struct net *net, struct xt_table *table)
private = xt_unregister_table(table);
/* Decrease module usage counts and free resources */
- loc_cpu_entry = private->entries[raw_smp_processor_id()];
+ loc_cpu_entry = private->entries;
xt_entry_foreach(iter, loc_cpu_entry, private->size)
cleanup_entry(iter, net);
if (private->number > private->initial_entries)
diff --git a/kernel/net/ipv6/netfilter/ip6t_REJECT.c b/kernel/net/ipv6/netfilter/ip6t_REJECT.c
index 12331efd4..db29bbf41 100644
--- a/kernel/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/kernel/net/ipv6/netfilter/ip6t_REJECT.c
@@ -35,14 +35,12 @@ MODULE_AUTHOR("Yasuyuki KOZAKAI <yasuyuki.kozakai@toshiba.co.jp>");
MODULE_DESCRIPTION("Xtables: packet \"rejection\" target for IPv6");
MODULE_LICENSE("GPL");
-
static unsigned int
reject_tg6(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct ip6t_reject_info *reject = par->targinfo;
- struct net *net = dev_net((par->in != NULL) ? par->in : par->out);
+ struct net *net = par->net;
- pr_debug("%s: medium point\n", __func__);
switch (reject->with) {
case IP6T_ICMP6_NO_ROUTE:
nf_send_unreach6(net, skb, ICMPV6_NOROUTE, par->hooknum);
@@ -65,8 +63,11 @@ reject_tg6(struct sk_buff *skb, const struct xt_action_param *par)
case IP6T_TCP_RESET:
nf_send_reset6(net, skb, par->hooknum);
break;
- default:
- net_info_ratelimited("case %u not handled yet\n", reject->with);
+ case IP6T_ICMP6_POLICY_FAIL:
+ nf_send_unreach6(net, skb, ICMPV6_POLICY_FAIL, par->hooknum);
+ break;
+ case IP6T_ICMP6_REJECT_ROUTE:
+ nf_send_unreach6(net, skb, ICMPV6_REJECT_ROUTE, par->hooknum);
break;
}
diff --git a/kernel/net/ipv6/netfilter/ip6t_SYNPROXY.c b/kernel/net/ipv6/netfilter/ip6t_SYNPROXY.c
index 6edb7b106..3deed5860 100644
--- a/kernel/net/ipv6/netfilter/ip6t_SYNPROXY.c
+++ b/kernel/net/ipv6/netfilter/ip6t_SYNPROXY.c
@@ -37,12 +37,13 @@ synproxy_build_ip(struct sk_buff *skb, const struct in6_addr *saddr,
}
static void
-synproxy_send_tcp(const struct sk_buff *skb, struct sk_buff *nskb,
+synproxy_send_tcp(const struct synproxy_net *snet,
+ const struct sk_buff *skb, struct sk_buff *nskb,
struct nf_conntrack *nfct, enum ip_conntrack_info ctinfo,
struct ipv6hdr *niph, struct tcphdr *nth,
unsigned int tcp_hdr_size)
{
- struct net *net = nf_ct_net((struct nf_conn *)nfct);
+ struct net *net = nf_ct_net(snet->tmpl);
struct dst_entry *dst;
struct flowi6 fl6;
@@ -75,7 +76,7 @@ synproxy_send_tcp(const struct sk_buff *skb, struct sk_buff *nskb,
nf_conntrack_get(nfct);
}
- ip6_local_out(nskb);
+ ip6_local_out(net, nskb->sk, nskb);
return;
free_nskb:
@@ -83,7 +84,8 @@ free_nskb:
}
static void
-synproxy_send_client_synack(const struct sk_buff *skb, const struct tcphdr *th,
+synproxy_send_client_synack(const struct synproxy_net *snet,
+ const struct sk_buff *skb, const struct tcphdr *th,
const struct synproxy_options *opts)
{
struct sk_buff *nskb;
@@ -119,7 +121,7 @@ synproxy_send_client_synack(const struct sk_buff *skb, const struct tcphdr *th,
synproxy_build_options(nth, opts);
- synproxy_send_tcp(skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY,
+ synproxy_send_tcp(snet, skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY,
niph, nth, tcp_hdr_size);
}
@@ -163,7 +165,7 @@ synproxy_send_server_syn(const struct synproxy_net *snet,
synproxy_build_options(nth, opts);
- synproxy_send_tcp(skb, nskb, &snet->tmpl->ct_general, IP_CT_NEW,
+ synproxy_send_tcp(snet, skb, nskb, &snet->tmpl->ct_general, IP_CT_NEW,
niph, nth, tcp_hdr_size);
}
@@ -203,7 +205,7 @@ synproxy_send_server_ack(const struct synproxy_net *snet,
synproxy_build_options(nth, opts);
- synproxy_send_tcp(skb, nskb, NULL, 0, niph, nth, tcp_hdr_size);
+ synproxy_send_tcp(snet, skb, nskb, NULL, 0, niph, nth, tcp_hdr_size);
}
static void
@@ -235,13 +237,14 @@ synproxy_send_client_ack(const struct synproxy_net *snet,
nth->ack_seq = th->ack_seq;
tcp_flag_word(nth) = TCP_FLAG_ACK;
nth->doff = tcp_hdr_size / 4;
- nth->window = ntohs(htons(th->window) >> opts->wscale);
+ nth->window = htons(ntohs(th->window) >> opts->wscale);
nth->check = 0;
nth->urg_ptr = 0;
synproxy_build_options(nth, opts);
- synproxy_send_tcp(skb, nskb, NULL, 0, niph, nth, tcp_hdr_size);
+ synproxy_send_tcp(snet, skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY,
+ niph, nth, tcp_hdr_size);
}
static bool
@@ -272,7 +275,7 @@ static unsigned int
synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct xt_synproxy_info *info = par->targinfo;
- struct synproxy_net *snet = synproxy_pernet(dev_net(par->in));
+ struct synproxy_net *snet = synproxy_pernet(par->net);
struct synproxy_options opts = {};
struct tcphdr *th, _th;
@@ -301,7 +304,7 @@ synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par)
XT_SYNPROXY_OPT_SACK_PERM |
XT_SYNPROXY_OPT_ECN);
- synproxy_send_client_synack(skb, th, &opts);
+ synproxy_send_client_synack(snet, skb, th, &opts);
return NF_DROP;
} else if (th->ack && !(th->fin || th->rst || th->syn)) {
@@ -313,11 +316,11 @@ synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par)
return XT_CONTINUE;
}
-static unsigned int ipv6_synproxy_hook(const struct nf_hook_ops *ops,
+static unsigned int ipv6_synproxy_hook(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *nhs)
{
- struct synproxy_net *snet = synproxy_pernet(dev_net(nhs->in ? : nhs->out));
+ struct synproxy_net *snet = synproxy_pernet(nhs->net);
enum ip_conntrack_info ctinfo;
struct nf_conn *ct;
struct nf_conn_synproxy *synproxy;
@@ -455,14 +458,12 @@ static struct xt_target synproxy_tg6_reg __read_mostly = {
static struct nf_hook_ops ipv6_synproxy_ops[] __read_mostly = {
{
.hook = ipv6_synproxy_hook,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1,
},
{
.hook = ipv6_synproxy_hook,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_POST_ROUTING,
.priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1,
diff --git a/kernel/net/ipv6/netfilter/ip6t_rpfilter.c b/kernel/net/ipv6/netfilter/ip6t_rpfilter.c
index 790e0c6b1..1ee1b25df 100644
--- a/kernel/net/ipv6/netfilter/ip6t_rpfilter.c
+++ b/kernel/net/ipv6/netfilter/ip6t_rpfilter.c
@@ -26,7 +26,7 @@ static bool rpfilter_addr_unicast(const struct in6_addr *addr)
return addr_type & IPV6_ADDR_UNICAST;
}
-static bool rpfilter_lookup_reverse6(const struct sk_buff *skb,
+static bool rpfilter_lookup_reverse6(struct net *net, const struct sk_buff *skb,
const struct net_device *dev, u8 flags)
{
struct rt6_info *rt;
@@ -53,7 +53,7 @@ static bool rpfilter_lookup_reverse6(const struct sk_buff *skb,
lookup_flags |= RT6_LOOKUP_F_IFACE;
}
- rt = (void *) ip6_route_lookup(dev_net(dev), &fl6, lookup_flags);
+ rt = (void *) ip6_route_lookup(net, &fl6, lookup_flags);
if (rt->dst.error)
goto out;
@@ -93,7 +93,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
if (unlikely(saddrtype == IPV6_ADDR_ANY))
return true ^ invert; /* not routable: forward path will drop it */
- return rpfilter_lookup_reverse6(skb, par->in, info->flags) ^ invert;
+ return rpfilter_lookup_reverse6(par->net, skb, par->in, info->flags) ^ invert;
}
static int rpfilter_check(const struct xt_mtchk_param *par)
diff --git a/kernel/net/ipv6/netfilter/ip6table_filter.c b/kernel/net/ipv6/netfilter/ip6table_filter.c
index 5c33d8abc..8b277b983 100644
--- a/kernel/net/ipv6/netfilter/ip6table_filter.c
+++ b/kernel/net/ipv6/netfilter/ip6table_filter.c
@@ -32,12 +32,10 @@ static const struct xt_table packet_filter = {
/* The work comes in here from netfilter.c. */
static unsigned int
-ip6table_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ip6table_filter_hook(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
- const struct net *net = dev_net(state->in ? state->in : state->out);
-
- return ip6t_do_table(skb, ops->hooknum, state, net->ipv6.ip6table_filter);
+ return ip6t_do_table(skb, state, state->net->ipv6.ip6table_filter);
}
static struct nf_hook_ops *filter_ops __read_mostly;
diff --git a/kernel/net/ipv6/netfilter/ip6table_mangle.c b/kernel/net/ipv6/netfilter/ip6table_mangle.c
index b551f5b79..abe278b07 100644
--- a/kernel/net/ipv6/netfilter/ip6table_mangle.c
+++ b/kernel/net/ipv6/netfilter/ip6table_mangle.c
@@ -57,8 +57,7 @@ ip6t_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state)
/* flowlabel and prio (includes version, which shouldn't change either */
flowlabel = *((u_int32_t *)ipv6_hdr(skb));
- ret = ip6t_do_table(skb, NF_INET_LOCAL_OUT, state,
- dev_net(state->out)->ipv6.ip6table_mangle);
+ ret = ip6t_do_table(skb, state, state->net->ipv6.ip6table_mangle);
if (ret != NF_DROP && ret != NF_STOLEN &&
(!ipv6_addr_equal(&ipv6_hdr(skb)->saddr, &saddr) ||
@@ -66,7 +65,7 @@ ip6t_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state)
skb->mark != mark ||
ipv6_hdr(skb)->hop_limit != hop_limit ||
flowlabel != *((u_int32_t *)ipv6_hdr(skb)))) {
- err = ip6_route_me_harder(skb);
+ err = ip6_route_me_harder(state->net, skb);
if (err < 0)
ret = NF_DROP_ERR(err);
}
@@ -76,17 +75,16 @@ ip6t_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state)
/* The work comes in here from netfilter.c. */
static unsigned int
-ip6table_mangle_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ip6table_mangle_hook(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
- if (ops->hooknum == NF_INET_LOCAL_OUT)
+ if (state->hook == NF_INET_LOCAL_OUT)
return ip6t_mangle_out(skb, state);
- if (ops->hooknum == NF_INET_POST_ROUTING)
- return ip6t_do_table(skb, ops->hooknum, state,
- dev_net(state->out)->ipv6.ip6table_mangle);
+ if (state->hook == NF_INET_POST_ROUTING)
+ return ip6t_do_table(skb, state,
+ state->net->ipv6.ip6table_mangle);
/* INPUT/FORWARD */
- return ip6t_do_table(skb, ops->hooknum, state,
- dev_net(state->in)->ipv6.ip6table_mangle);
+ return ip6t_do_table(skb, state, state->net->ipv6.ip6table_mangle);
}
static struct nf_hook_ops *mangle_ops __read_mostly;
diff --git a/kernel/net/ipv6/netfilter/ip6table_nat.c b/kernel/net/ipv6/netfilter/ip6table_nat.c
index c3a7f7af0..de2a10a56 100644
--- a/kernel/net/ipv6/netfilter/ip6table_nat.c
+++ b/kernel/net/ipv6/netfilter/ip6table_nat.c
@@ -30,49 +30,46 @@ static const struct xt_table nf_nat_ipv6_table = {
.af = NFPROTO_IPV6,
};
-static unsigned int ip6table_nat_do_chain(const struct nf_hook_ops *ops,
+static unsigned int ip6table_nat_do_chain(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state,
struct nf_conn *ct)
{
- struct net *net = nf_ct_net(ct);
-
- return ip6t_do_table(skb, ops->hooknum, state, net->ipv6.ip6table_nat);
+ return ip6t_do_table(skb, state, state->net->ipv6.ip6table_nat);
}
-static unsigned int ip6table_nat_fn(const struct nf_hook_ops *ops,
+static unsigned int ip6table_nat_fn(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
- return nf_nat_ipv6_fn(ops, skb, state, ip6table_nat_do_chain);
+ return nf_nat_ipv6_fn(priv, skb, state, ip6table_nat_do_chain);
}
-static unsigned int ip6table_nat_in(const struct nf_hook_ops *ops,
+static unsigned int ip6table_nat_in(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
- return nf_nat_ipv6_in(ops, skb, state, ip6table_nat_do_chain);
+ return nf_nat_ipv6_in(priv, skb, state, ip6table_nat_do_chain);
}
-static unsigned int ip6table_nat_out(const struct nf_hook_ops *ops,
+static unsigned int ip6table_nat_out(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
- return nf_nat_ipv6_out(ops, skb, state, ip6table_nat_do_chain);
+ return nf_nat_ipv6_out(priv, skb, state, ip6table_nat_do_chain);
}
-static unsigned int ip6table_nat_local_fn(const struct nf_hook_ops *ops,
+static unsigned int ip6table_nat_local_fn(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
- return nf_nat_ipv6_local_fn(ops, skb, state, ip6table_nat_do_chain);
+ return nf_nat_ipv6_local_fn(priv, skb, state, ip6table_nat_do_chain);
}
static struct nf_hook_ops nf_nat_ipv6_ops[] __read_mostly = {
/* Before packet filtering, change destination */
{
.hook = ip6table_nat_in,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_PRE_ROUTING,
.priority = NF_IP6_PRI_NAT_DST,
@@ -80,7 +77,6 @@ static struct nf_hook_ops nf_nat_ipv6_ops[] __read_mostly = {
/* After packet filtering, change source */
{
.hook = ip6table_nat_out,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_POST_ROUTING,
.priority = NF_IP6_PRI_NAT_SRC,
@@ -88,7 +84,6 @@ static struct nf_hook_ops nf_nat_ipv6_ops[] __read_mostly = {
/* Before packet filtering, change destination */
{
.hook = ip6table_nat_local_fn,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_LOCAL_OUT,
.priority = NF_IP6_PRI_NAT_DST,
@@ -96,7 +91,6 @@ static struct nf_hook_ops nf_nat_ipv6_ops[] __read_mostly = {
/* After packet filtering, change source */
{
.hook = ip6table_nat_fn,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP6_PRI_NAT_SRC,
diff --git a/kernel/net/ipv6/netfilter/ip6table_raw.c b/kernel/net/ipv6/netfilter/ip6table_raw.c
index 0b33caad2..902196356 100644
--- a/kernel/net/ipv6/netfilter/ip6table_raw.c
+++ b/kernel/net/ipv6/netfilter/ip6table_raw.c
@@ -19,12 +19,10 @@ static const struct xt_table packet_raw = {
/* The work comes in here from netfilter.c. */
static unsigned int
-ip6table_raw_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ip6table_raw_hook(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
- const struct net *net = dev_net(state->in ? state->in : state->out);
-
- return ip6t_do_table(skb, ops->hooknum, state, net->ipv6.ip6table_raw);
+ return ip6t_do_table(skb, state, state->net->ipv6.ip6table_raw);
}
static struct nf_hook_ops *rawtable_ops __read_mostly;
diff --git a/kernel/net/ipv6/netfilter/ip6table_security.c b/kernel/net/ipv6/netfilter/ip6table_security.c
index fcef83c25..0d856fedf 100644
--- a/kernel/net/ipv6/netfilter/ip6table_security.c
+++ b/kernel/net/ipv6/netfilter/ip6table_security.c
@@ -36,13 +36,10 @@ static const struct xt_table security_table = {
};
static unsigned int
-ip6table_security_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ip6table_security_hook(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
- const struct net *net = dev_net(state->in ? state->in : state->out);
-
- return ip6t_do_table(skb, ops->hooknum, state,
- net->ipv6.ip6table_security);
+ return ip6t_do_table(skb, state, state->net->ipv6.ip6table_security);
}
static struct nf_hook_ops *sectbl_ops __read_mostly;
diff --git a/kernel/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/kernel/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 4ba0c34c6..1aa584876 100644
--- a/kernel/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/kernel/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -95,7 +95,7 @@ static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
return NF_ACCEPT;
}
-static unsigned int ipv6_helper(const struct nf_hook_ops *ops,
+static unsigned int ipv6_helper(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
@@ -131,7 +131,7 @@ static unsigned int ipv6_helper(const struct nf_hook_ops *ops,
return helper->help(skb, protoff, ct, ctinfo);
}
-static unsigned int ipv6_confirm(const struct nf_hook_ops *ops,
+static unsigned int ipv6_confirm(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
@@ -165,14 +165,14 @@ out:
return nf_conntrack_confirm(skb);
}
-static unsigned int ipv6_conntrack_in(const struct nf_hook_ops *ops,
+static unsigned int ipv6_conntrack_in(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
- return nf_conntrack_in(dev_net(state->in), PF_INET6, ops->hooknum, skb);
+ return nf_conntrack_in(state->net, PF_INET6, state->hook, skb);
}
-static unsigned int ipv6_conntrack_local(const struct nf_hook_ops *ops,
+static unsigned int ipv6_conntrack_local(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
@@ -181,48 +181,42 @@ static unsigned int ipv6_conntrack_local(const struct nf_hook_ops *ops,
net_notice_ratelimited("ipv6_conntrack_local: packet too short\n");
return NF_ACCEPT;
}
- return nf_conntrack_in(dev_net(state->out), PF_INET6, ops->hooknum, skb);
+ return nf_conntrack_in(state->net, PF_INET6, state->hook, skb);
}
static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = {
{
.hook = ipv6_conntrack_in,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_PRE_ROUTING,
.priority = NF_IP6_PRI_CONNTRACK,
},
{
.hook = ipv6_conntrack_local,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_LOCAL_OUT,
.priority = NF_IP6_PRI_CONNTRACK,
},
{
.hook = ipv6_helper,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_POST_ROUTING,
.priority = NF_IP6_PRI_CONNTRACK_HELPER,
},
{
.hook = ipv6_confirm,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_POST_ROUTING,
.priority = NF_IP6_PRI_LAST,
},
{
.hook = ipv6_helper,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP6_PRI_CONNTRACK_HELPER,
},
{
.hook = ipv6_confirm,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP6_PRI_LAST-1,
@@ -251,7 +245,7 @@ ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len)
if (*len < 0 || (unsigned int) *len < sizeof(sin6))
return -EINVAL;
- h = nf_conntrack_find_get(sock_net(sk), NF_CT_DEFAULT_ZONE, &tuple);
+ h = nf_conntrack_find_get(sock_net(sk), &nf_ct_zone_dflt, &tuple);
if (!h) {
pr_debug("IP6T_SO_ORIGINAL_DST: Can't find %pI6c/%u-%pI6c/%u.\n",
&tuple.src.u3.ip6, ntohs(tuple.src.u.tcp.port),
diff --git a/kernel/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/kernel/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index 90388d606..660bc10c7 100644
--- a/kernel/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/kernel/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -36,6 +36,7 @@ static inline struct nf_icmp_net *icmpv6_pernet(struct net *net)
static bool icmpv6_pkt_to_tuple(const struct sk_buff *skb,
unsigned int dataoff,
+ struct net *net,
struct nf_conntrack_tuple *tuple)
{
const struct icmp6hdr *hp;
@@ -56,12 +57,12 @@ static const u_int8_t invmap[] = {
[ICMPV6_ECHO_REQUEST - 128] = ICMPV6_ECHO_REPLY + 1,
[ICMPV6_ECHO_REPLY - 128] = ICMPV6_ECHO_REQUEST + 1,
[ICMPV6_NI_QUERY - 128] = ICMPV6_NI_REPLY + 1,
- [ICMPV6_NI_REPLY - 128] = ICMPV6_NI_QUERY +1
+ [ICMPV6_NI_REPLY - 128] = ICMPV6_NI_QUERY + 1
};
static const u_int8_t noct_valid_new[] = {
[ICMPV6_MGM_QUERY - 130] = 1,
- [ICMPV6_MGM_REPORT -130] = 1,
+ [ICMPV6_MGM_REPORT - 130] = 1,
[ICMPV6_MGM_REDUCTION - 130] = 1,
[NDISC_ROUTER_SOLICITATION - 130] = 1,
[NDISC_ROUTER_ADVERTISEMENT - 130] = 1,
@@ -150,7 +151,7 @@ icmpv6_error_message(struct net *net, struct nf_conn *tmpl,
struct nf_conntrack_tuple intuple, origtuple;
const struct nf_conntrack_tuple_hash *h;
const struct nf_conntrack_l4proto *inproto;
- u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE;
+ struct nf_conntrack_zone tmp;
NF_CT_ASSERT(skb->nfct == NULL);
@@ -159,7 +160,7 @@ icmpv6_error_message(struct net *net, struct nf_conn *tmpl,
skb_network_offset(skb)
+ sizeof(struct ipv6hdr)
+ sizeof(struct icmp6hdr),
- PF_INET6, &origtuple)) {
+ PF_INET6, net, &origtuple)) {
pr_debug("icmpv6_error: Can't get tuple\n");
return -NF_ACCEPT;
}
@@ -177,7 +178,8 @@ icmpv6_error_message(struct net *net, struct nf_conn *tmpl,
*ctinfo = IP_CT_RELATED;
- h = nf_conntrack_find_get(net, zone, &intuple);
+ h = nf_conntrack_find_get(net, nf_ct_zone_tmpl(tmpl, skb, &tmp),
+ &intuple);
if (!h) {
pr_debug("icmpv6_error: no match\n");
return -NF_ACCEPT;
diff --git a/kernel/net/ipv6/netfilter/nf_conntrack_reasm.c b/kernel/net/ipv6/netfilter/nf_conntrack_reasm.c
index 6f187c8d8..bab4441ed 100644
--- a/kernel/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/kernel/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -59,7 +59,7 @@ struct nf_ct_frag6_skb_cb
struct sk_buff *orig;
};
-#define NFCT_FRAG6_CB(skb) ((struct nf_ct_frag6_skb_cb*)((skb)->cb))
+#define NFCT_FRAG6_CB(skb) ((struct nf_ct_frag6_skb_cb *)((skb)->cb))
static struct inet_frags nf_frags;
@@ -190,7 +190,7 @@ static void nf_ct_frag6_expire(unsigned long data)
/* Creation primitives. */
static inline struct frag_queue *fq_find(struct net *net, __be32 id,
u32 user, struct in6_addr *src,
- struct in6_addr *dst, u8 ecn)
+ struct in6_addr *dst, int iif, u8 ecn)
{
struct inet_frag_queue *q;
struct ip6_create_arg arg;
@@ -200,6 +200,7 @@ static inline struct frag_queue *fq_find(struct net *net, __be32 id,
arg.user = user;
arg.src = src;
arg.dst = dst;
+ arg.iif = iif;
arg.ecn = ecn;
local_bh_disable();
@@ -348,7 +349,7 @@ found:
fq->ecn |= ecn;
if (payload_len > fq->q.max_size)
fq->q.max_size = payload_len;
- add_frag_mem_limit(&fq->q, skb->truesize);
+ add_frag_mem_limit(fq->q.net, skb->truesize);
/* The first fragment.
* nhoffset is obtained from the first fragment, of course.
@@ -430,7 +431,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
clone->ip_summed = head->ip_summed;
NFCT_FRAG6_CB(clone)->orig = NULL;
- add_frag_mem_limit(&fq->q, clone->truesize);
+ add_frag_mem_limit(fq->q.net, clone->truesize);
}
/* We have to remove fragment header from datagram and to relocate
@@ -445,7 +446,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
skb_reset_transport_header(head);
skb_push(head, head->data - skb_network_header(head));
- for (fp=head->next; fp; fp = fp->next) {
+ for (fp = head->next; fp; fp = fp->next) {
head->data_len += fp->len;
head->len += fp->len;
if (head->ip_summed != fp->ip_summed)
@@ -454,7 +455,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
head->csum = csum_add(head->csum, fp->csum);
head->truesize += fp->truesize;
}
- sub_frag_mem_limit(&fq->q, head->truesize);
+ sub_frag_mem_limit(fq->q.net, head->truesize);
head->ignore_df = 1;
head->next = NULL;
@@ -563,12 +564,10 @@ find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff)
return 0;
}
-struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user)
+struct sk_buff *nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
{
struct sk_buff *clone;
struct net_device *dev = skb->dev;
- struct net *net = skb_dst(skb) ? dev_net(skb_dst(skb)->dev)
- : dev_net(skb->dev);
struct frag_hdr *fhdr;
struct frag_queue *fq;
struct ipv6hdr *hdr;
@@ -603,7 +602,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user)
fhdr = (struct frag_hdr *)skb_transport_header(clone);
fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr,
- ip6_frag_ecn(hdr));
+ skb->dev ? skb->dev->ifindex : 0, ip6_frag_ecn(hdr));
if (fq == NULL) {
pr_debug("Can't find and can't create new queue\n");
goto ret_orig;
@@ -633,6 +632,7 @@ ret_orig:
kfree_skb(clone);
return skb;
}
+EXPORT_SYMBOL_GPL(nf_ct_frag6_gather);
void nf_ct_frag6_consume_orig(struct sk_buff *skb)
{
@@ -645,15 +645,22 @@ void nf_ct_frag6_consume_orig(struct sk_buff *skb)
s = s2;
}
}
+EXPORT_SYMBOL_GPL(nf_ct_frag6_consume_orig);
static int nf_ct_net_init(struct net *net)
{
+ int res;
+
net->nf_frag.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
net->nf_frag.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
net->nf_frag.frags.timeout = IPV6_FRAG_TIMEOUT;
- inet_frags_init_net(&net->nf_frag.frags);
-
- return nf_ct_frag6_sysctl_register(net);
+ res = inet_frags_init_net(&net->nf_frag.frags);
+ if (res)
+ return res;
+ res = nf_ct_frag6_sysctl_register(net);
+ if (res)
+ inet_frags_uninit_net(&net->nf_frag.frags);
+ return res;
}
static void nf_ct_net_exit(struct net *net)
diff --git a/kernel/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/kernel/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
index a45db0b47..4fdbed5eb 100644
--- a/kernel/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
+++ b/kernel/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
@@ -33,26 +33,25 @@
static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum,
struct sk_buff *skb)
{
- u16 zone = NF_CT_DEFAULT_ZONE;
-
+ u16 zone_id = NF_CT_DEFAULT_ZONE_ID;
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
- if (skb->nfct)
- zone = nf_ct_zone((struct nf_conn *)skb->nfct);
-#endif
+ if (skb->nfct) {
+ enum ip_conntrack_info ctinfo;
+ const struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
-#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
- if (skb->nf_bridge &&
- skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)
- return IP6_DEFRAG_CONNTRACK_BRIDGE_IN + zone;
+ zone_id = nf_ct_zone_id(nf_ct_zone(ct), CTINFO2DIR(ctinfo));
+ }
#endif
+ if (nf_bridge_in_prerouting(skb))
+ return IP6_DEFRAG_CONNTRACK_BRIDGE_IN + zone_id;
+
if (hooknum == NF_INET_PRE_ROUTING)
- return IP6_DEFRAG_CONNTRACK_IN + zone;
+ return IP6_DEFRAG_CONNTRACK_IN + zone_id;
else
- return IP6_DEFRAG_CONNTRACK_OUT + zone;
-
+ return IP6_DEFRAG_CONNTRACK_OUT + zone_id;
}
-static unsigned int ipv6_defrag(const struct nf_hook_ops *ops,
+static unsigned int ipv6_defrag(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
@@ -64,7 +63,8 @@ static unsigned int ipv6_defrag(const struct nf_hook_ops *ops,
return NF_ACCEPT;
#endif
- reasm = nf_ct_frag6_gather(skb, nf_ct6_defrag_user(ops->hooknum, skb));
+ reasm = nf_ct_frag6_gather(state->net, skb,
+ nf_ct6_defrag_user(state->hook, skb));
/* queued */
if (reasm == NULL)
return NF_STOLEN;
@@ -75,7 +75,7 @@ static unsigned int ipv6_defrag(const struct nf_hook_ops *ops,
nf_ct_frag6_consume_orig(reasm);
- NF_HOOK_THRESH(NFPROTO_IPV6, ops->hooknum, state->sk, reasm,
+ NF_HOOK_THRESH(NFPROTO_IPV6, state->hook, state->net, state->sk, reasm,
state->in, state->out,
state->okfn, NF_IP6_PRI_CONNTRACK_DEFRAG + 1);
@@ -85,14 +85,12 @@ static unsigned int ipv6_defrag(const struct nf_hook_ops *ops,
static struct nf_hook_ops ipv6_defrag_ops[] = {
{
.hook = ipv6_defrag,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_PRE_ROUTING,
.priority = NF_IP6_PRI_CONNTRACK_DEFRAG,
},
{
.hook = ipv6_defrag,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_LOCAL_OUT,
.priority = NF_IP6_PRI_CONNTRACK_DEFRAG,
diff --git a/kernel/net/ipv6/netfilter/nf_dup_ipv6.c b/kernel/net/ipv6/netfilter/nf_dup_ipv6.c
new file mode 100644
index 000000000..6989c70ae
--- /dev/null
+++ b/kernel/net/ipv6/netfilter/nf_dup_ipv6.c
@@ -0,0 +1,82 @@
+/*
+ * (C) 2007 by Sebastian Claßen <sebastian.classen@freenet.ag>
+ * (C) 2007-2010 by Jan Engelhardt <jengelh@medozas.de>
+ *
+ * Extracted from xt_TEE.c
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 or later, as
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/percpu.h>
+#include <linux/skbuff.h>
+#include <linux/netfilter.h>
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
+#include <net/netfilter/ipv6/nf_dup_ipv6.h>
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+#include <net/netfilter/nf_conntrack.h>
+#endif
+
+static bool nf_dup_ipv6_route(struct net *net, struct sk_buff *skb,
+ const struct in6_addr *gw, int oif)
+{
+ const struct ipv6hdr *iph = ipv6_hdr(skb);
+ struct dst_entry *dst;
+ struct flowi6 fl6;
+
+ memset(&fl6, 0, sizeof(fl6));
+ if (oif != -1)
+ fl6.flowi6_oif = oif;
+
+ fl6.daddr = *gw;
+ fl6.flowlabel = (__force __be32)(((iph->flow_lbl[0] & 0xF) << 16) |
+ (iph->flow_lbl[1] << 8) | iph->flow_lbl[2]);
+ dst = ip6_route_output(net, NULL, &fl6);
+ if (dst->error) {
+ dst_release(dst);
+ return false;
+ }
+ skb_dst_drop(skb);
+ skb_dst_set(skb, dst);
+ skb->dev = dst->dev;
+ skb->protocol = htons(ETH_P_IPV6);
+
+ return true;
+}
+
+void nf_dup_ipv6(struct net *net, struct sk_buff *skb, unsigned int hooknum,
+ const struct in6_addr *gw, int oif)
+{
+ if (this_cpu_read(nf_skb_duplicated))
+ return;
+ skb = pskb_copy(skb, GFP_ATOMIC);
+ if (skb == NULL)
+ return;
+
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+ nf_conntrack_put(skb->nfct);
+ skb->nfct = &nf_ct_untracked_get()->ct_general;
+ skb->nfctinfo = IP_CT_NEW;
+ nf_conntrack_get(skb->nfct);
+#endif
+ if (hooknum == NF_INET_PRE_ROUTING ||
+ hooknum == NF_INET_LOCAL_IN) {
+ struct ipv6hdr *iph = ipv6_hdr(skb);
+ --iph->hop_limit;
+ }
+ if (nf_dup_ipv6_route(net, skb, gw, oif)) {
+ __this_cpu_write(nf_skb_duplicated, true);
+ ip6_local_out(net, skb->sk, skb);
+ __this_cpu_write(nf_skb_duplicated, false);
+ } else {
+ kfree_skb(skb);
+ }
+}
+EXPORT_SYMBOL_GPL(nf_dup_ipv6);
+
+MODULE_AUTHOR("Sebastian Claßen <sebastian.classen@freenet.ag>");
+MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>");
+MODULE_DESCRIPTION("nf_dup_ipv6: IPv6 packet duplication");
+MODULE_LICENSE("GPL");
diff --git a/kernel/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/kernel/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
index e76900e0a..238e70c3f 100644
--- a/kernel/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
+++ b/kernel/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
@@ -124,7 +124,7 @@ static void nf_nat_ipv6_csum_update(struct sk_buff *skb,
newip = &t->dst.u3.in6;
}
inet_proto_csum_replace16(check, skb, oldip->s6_addr32,
- newip->s6_addr32, 1);
+ newip->s6_addr32, true);
}
static void nf_nat_ipv6_csum_recalc(struct sk_buff *skb,
@@ -155,7 +155,7 @@ static void nf_nat_ipv6_csum_recalc(struct sk_buff *skb,
}
} else
inet_proto_csum_replace2(check, skb,
- htons(oldlen), htons(datalen), 1);
+ htons(oldlen), htons(datalen), true);
}
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
@@ -262,9 +262,9 @@ int nf_nat_icmpv6_reply_translation(struct sk_buff *skb,
EXPORT_SYMBOL_GPL(nf_nat_icmpv6_reply_translation);
unsigned int
-nf_nat_ipv6_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
+nf_nat_ipv6_fn(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state,
- unsigned int (*do_chain)(const struct nf_hook_ops *ops,
+ unsigned int (*do_chain)(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state,
struct nf_conn *ct))
@@ -272,7 +272,7 @@ nf_nat_ipv6_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
struct nf_conn_nat *nat;
- enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum);
+ enum nf_nat_manip_type maniptype = HOOK2MANIP(state->hook);
__be16 frag_off;
int hdrlen;
u8 nexthdr;
@@ -303,7 +303,7 @@ nf_nat_ipv6_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) {
if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo,
- ops->hooknum,
+ state->hook,
hdrlen))
return NF_DROP;
else
@@ -317,21 +317,21 @@ nf_nat_ipv6_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
if (!nf_nat_initialized(ct, maniptype)) {
unsigned int ret;
- ret = do_chain(ops, skb, state, ct);
+ ret = do_chain(priv, skb, state, ct);
if (ret != NF_ACCEPT)
return ret;
- if (nf_nat_initialized(ct, HOOK2MANIP(ops->hooknum)))
+ if (nf_nat_initialized(ct, HOOK2MANIP(state->hook)))
break;
- ret = nf_nat_alloc_null_binding(ct, ops->hooknum);
+ ret = nf_nat_alloc_null_binding(ct, state->hook);
if (ret != NF_ACCEPT)
return ret;
} else {
pr_debug("Already setup manip %s for ct %p\n",
maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST",
ct);
- if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, state->out))
+ if (nf_nat_oif_changed(state->hook, ctinfo, nat, state->out))
goto oif_changed;
}
break;
@@ -340,11 +340,11 @@ nf_nat_ipv6_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
/* ESTABLISHED */
NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
ctinfo == IP_CT_ESTABLISHED_REPLY);
- if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, state->out))
+ if (nf_nat_oif_changed(state->hook, ctinfo, nat, state->out))
goto oif_changed;
}
- return nf_nat_packet(ct, ctinfo, ops->hooknum, skb);
+ return nf_nat_packet(ct, ctinfo, state->hook, skb);
oif_changed:
nf_ct_kill_acct(ct, ctinfo, skb);
@@ -353,9 +353,9 @@ oif_changed:
EXPORT_SYMBOL_GPL(nf_nat_ipv6_fn);
unsigned int
-nf_nat_ipv6_in(const struct nf_hook_ops *ops, struct sk_buff *skb,
+nf_nat_ipv6_in(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state,
- unsigned int (*do_chain)(const struct nf_hook_ops *ops,
+ unsigned int (*do_chain)(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state,
struct nf_conn *ct))
@@ -363,7 +363,7 @@ nf_nat_ipv6_in(const struct nf_hook_ops *ops, struct sk_buff *skb,
unsigned int ret;
struct in6_addr daddr = ipv6_hdr(skb)->daddr;
- ret = nf_nat_ipv6_fn(ops, skb, state, do_chain);
+ ret = nf_nat_ipv6_fn(priv, skb, state, do_chain);
if (ret != NF_DROP && ret != NF_STOLEN &&
ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr))
skb_dst_drop(skb);
@@ -373,9 +373,9 @@ nf_nat_ipv6_in(const struct nf_hook_ops *ops, struct sk_buff *skb,
EXPORT_SYMBOL_GPL(nf_nat_ipv6_in);
unsigned int
-nf_nat_ipv6_out(const struct nf_hook_ops *ops, struct sk_buff *skb,
+nf_nat_ipv6_out(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state,
- unsigned int (*do_chain)(const struct nf_hook_ops *ops,
+ unsigned int (*do_chain)(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state,
struct nf_conn *ct))
@@ -391,7 +391,7 @@ nf_nat_ipv6_out(const struct nf_hook_ops *ops, struct sk_buff *skb,
if (skb->len < sizeof(struct ipv6hdr))
return NF_ACCEPT;
- ret = nf_nat_ipv6_fn(ops, skb, state, do_chain);
+ ret = nf_nat_ipv6_fn(priv, skb, state, do_chain);
#ifdef CONFIG_XFRM
if (ret != NF_DROP && ret != NF_STOLEN &&
!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
@@ -403,7 +403,7 @@ nf_nat_ipv6_out(const struct nf_hook_ops *ops, struct sk_buff *skb,
(ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 &&
ct->tuplehash[dir].tuple.src.u.all !=
ct->tuplehash[!dir].tuple.dst.u.all)) {
- err = nf_xfrm_me_harder(skb, AF_INET6);
+ err = nf_xfrm_me_harder(state->net, skb, AF_INET6);
if (err < 0)
ret = NF_DROP_ERR(err);
}
@@ -414,9 +414,9 @@ nf_nat_ipv6_out(const struct nf_hook_ops *ops, struct sk_buff *skb,
EXPORT_SYMBOL_GPL(nf_nat_ipv6_out);
unsigned int
-nf_nat_ipv6_local_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
+nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state,
- unsigned int (*do_chain)(const struct nf_hook_ops *ops,
+ unsigned int (*do_chain)(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state,
struct nf_conn *ct))
@@ -430,14 +430,14 @@ nf_nat_ipv6_local_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
if (skb->len < sizeof(struct ipv6hdr))
return NF_ACCEPT;
- ret = nf_nat_ipv6_fn(ops, skb, state, do_chain);
+ ret = nf_nat_ipv6_fn(priv, skb, state, do_chain);
if (ret != NF_DROP && ret != NF_STOLEN &&
(ct = nf_ct_get(skb, &ctinfo)) != NULL) {
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3,
&ct->tuplehash[!dir].tuple.src.u3)) {
- err = ip6_route_me_harder(skb);
+ err = ip6_route_me_harder(state->net, skb);
if (err < 0)
ret = NF_DROP_ERR(err);
}
@@ -446,7 +446,7 @@ nf_nat_ipv6_local_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 &&
ct->tuplehash[dir].tuple.dst.u.all !=
ct->tuplehash[!dir].tuple.src.u.all) {
- err = nf_xfrm_me_harder(skb, AF_INET6);
+ err = nf_xfrm_me_harder(state->net, skb, AF_INET6);
if (err < 0)
ret = NF_DROP_ERR(err);
}
diff --git a/kernel/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c b/kernel/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
index 774560966..31ba7ca19 100644
--- a/kernel/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
+++ b/kernel/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
@@ -34,7 +34,7 @@ nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range *range,
NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
ctinfo == IP_CT_RELATED_REPLY));
- if (ipv6_dev_get_saddr(dev_net(out), out,
+ if (ipv6_dev_get_saddr(nf_ct_net(ct), out,
&ipv6_hdr(skb)->daddr, 0, &src) < 0)
return NF_DROP;
diff --git a/kernel/net/ipv6/netfilter/nf_nat_proto_icmpv6.c b/kernel/net/ipv6/netfilter/nf_nat_proto_icmpv6.c
index 2205e8eee..57593b00c 100644
--- a/kernel/net/ipv6/netfilter/nf_nat_proto_icmpv6.c
+++ b/kernel/net/ipv6/netfilter/nf_nat_proto_icmpv6.c
@@ -73,7 +73,7 @@ icmpv6_manip_pkt(struct sk_buff *skb,
hdr->icmp6_type == ICMPV6_ECHO_REPLY) {
inet_proto_csum_replace2(&hdr->icmp6_cksum, skb,
hdr->icmp6_identifier,
- tuple->src.u.icmp.id, 0);
+ tuple->src.u.icmp.id, false);
hdr->icmp6_identifier = tuple->src.u.icmp.id;
}
return true;
diff --git a/kernel/net/ipv6/netfilter/nf_reject_ipv6.c b/kernel/net/ipv6/netfilter/nf_reject_ipv6.c
index 94b4c6dfb..e0f922b77 100644
--- a/kernel/net/ipv6/netfilter/nf_reject_ipv6.c
+++ b/kernel/net/ipv6/netfilter/nf_reject_ipv6.c
@@ -26,7 +26,7 @@ const struct tcphdr *nf_reject_ip6_tcphdr_get(struct sk_buff *oldskb,
int tcphoff;
proto = oip6h->nexthdr;
- tcphoff = ipv6_skip_exthdr(oldskb, ((u8*)(oip6h+1) - oldskb->data),
+ tcphoff = ipv6_skip_exthdr(oldskb, ((u8 *)(oip6h + 1) - oldskb->data),
&proto, &frag_off);
if ((tcphoff < 0) || (tcphoff > oldskb->len)) {
@@ -206,7 +206,7 @@ void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook)
dev_queue_xmit(nskb);
} else
#endif
- ip6_local_out(nskb);
+ ip6_local_out(net, nskb->sk, nskb);
}
EXPORT_SYMBOL_GPL(nf_send_reset6);
@@ -224,7 +224,7 @@ static bool reject6_csum_ok(struct sk_buff *skb, int hook)
return true;
proto = ip6h->nexthdr;
- thoff = ipv6_skip_exthdr(skb, ((u8*)(ip6h+1) - skb->data), &proto, &fo);
+ thoff = ipv6_skip_exthdr(skb, ((u8 *)(ip6h + 1) - skb->data), &proto, &fo);
if (thoff < 0 || thoff >= skb->len || (fo & htons(~0x7)) != 0)
return false;
diff --git a/kernel/net/ipv6/netfilter/nf_tables_ipv6.c b/kernel/net/ipv6/netfilter/nf_tables_ipv6.c
index c8148ba76..120ea9131 100644
--- a/kernel/net/ipv6/netfilter/nf_tables_ipv6.c
+++ b/kernel/net/ipv6/netfilter/nf_tables_ipv6.c
@@ -16,20 +16,20 @@
#include <net/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables_ipv6.h>
-static unsigned int nft_do_chain_ipv6(const struct nf_hook_ops *ops,
+static unsigned int nft_do_chain_ipv6(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
struct nft_pktinfo pkt;
/* malformed packet, drop it */
- if (nft_set_pktinfo_ipv6(&pkt, ops, skb, state) < 0)
+ if (nft_set_pktinfo_ipv6(&pkt, skb, state) < 0)
return NF_DROP;
- return nft_do_chain(&pkt, ops);
+ return nft_do_chain(&pkt, priv);
}
-static unsigned int nft_ipv6_output(const struct nf_hook_ops *ops,
+static unsigned int nft_ipv6_output(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
@@ -40,7 +40,7 @@ static unsigned int nft_ipv6_output(const struct nf_hook_ops *ops,
return NF_ACCEPT;
}
- return nft_do_chain_ipv6(ops, skb, state);
+ return nft_do_chain_ipv6(priv, skb, state);
}
struct nft_af_info nft_af_ipv6 __read_mostly = {
diff --git a/kernel/net/ipv6/netfilter/nft_chain_nat_ipv6.c b/kernel/net/ipv6/netfilter/nft_chain_nat_ipv6.c
index 951bb458b..443cd306c 100644
--- a/kernel/net/ipv6/netfilter/nft_chain_nat_ipv6.c
+++ b/kernel/net/ipv6/netfilter/nft_chain_nat_ipv6.c
@@ -24,44 +24,44 @@
#include <net/netfilter/nf_nat_l3proto.h>
#include <net/ipv6.h>
-static unsigned int nft_nat_do_chain(const struct nf_hook_ops *ops,
+static unsigned int nft_nat_do_chain(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state,
struct nf_conn *ct)
{
struct nft_pktinfo pkt;
- nft_set_pktinfo_ipv6(&pkt, ops, skb, state);
+ nft_set_pktinfo_ipv6(&pkt, skb, state);
- return nft_do_chain(&pkt, ops);
+ return nft_do_chain(&pkt, priv);
}
-static unsigned int nft_nat_ipv6_fn(const struct nf_hook_ops *ops,
+static unsigned int nft_nat_ipv6_fn(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
- return nf_nat_ipv6_fn(ops, skb, state, nft_nat_do_chain);
+ return nf_nat_ipv6_fn(priv, skb, state, nft_nat_do_chain);
}
-static unsigned int nft_nat_ipv6_in(const struct nf_hook_ops *ops,
+static unsigned int nft_nat_ipv6_in(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
- return nf_nat_ipv6_in(ops, skb, state, nft_nat_do_chain);
+ return nf_nat_ipv6_in(priv, skb, state, nft_nat_do_chain);
}
-static unsigned int nft_nat_ipv6_out(const struct nf_hook_ops *ops,
+static unsigned int nft_nat_ipv6_out(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
- return nf_nat_ipv6_out(ops, skb, state, nft_nat_do_chain);
+ return nf_nat_ipv6_out(priv, skb, state, nft_nat_do_chain);
}
-static unsigned int nft_nat_ipv6_local_fn(const struct nf_hook_ops *ops,
+static unsigned int nft_nat_ipv6_local_fn(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
- return nf_nat_ipv6_local_fn(ops, skb, state, nft_nat_do_chain);
+ return nf_nat_ipv6_local_fn(priv, skb, state, nft_nat_do_chain);
}
static const struct nf_chain_type nft_chain_nat_ipv6 = {
diff --git a/kernel/net/ipv6/netfilter/nft_chain_route_ipv6.c b/kernel/net/ipv6/netfilter/nft_chain_route_ipv6.c
index 0dafdaac5..71d995ff3 100644
--- a/kernel/net/ipv6/netfilter/nft_chain_route_ipv6.c
+++ b/kernel/net/ipv6/netfilter/nft_chain_route_ipv6.c
@@ -22,7 +22,7 @@
#include <net/netfilter/nf_tables_ipv6.h>
#include <net/route.h>
-static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops,
+static unsigned int nf_route_table_hook(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
@@ -33,7 +33,7 @@ static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops,
u32 mark, flowlabel;
/* malformed packet, drop it */
- if (nft_set_pktinfo_ipv6(&pkt, ops, skb, state) < 0)
+ if (nft_set_pktinfo_ipv6(&pkt, skb, state) < 0)
return NF_DROP;
/* save source/dest address, mark, hoplimit, flowlabel, priority */
@@ -45,14 +45,14 @@ static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops,
/* flowlabel and prio (includes version, which shouldn't change either */
flowlabel = *((u32 *)ipv6_hdr(skb));
- ret = nft_do_chain(&pkt, ops);
+ ret = nft_do_chain(&pkt, priv);
if (ret != NF_DROP && ret != NF_QUEUE &&
(memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr)) ||
memcmp(&ipv6_hdr(skb)->daddr, &daddr, sizeof(daddr)) ||
skb->mark != mark ||
ipv6_hdr(skb)->hop_limit != hop_limit ||
flowlabel != *((u_int32_t *)ipv6_hdr(skb))))
- return ip6_route_me_harder(skb) == 0 ? ret : NF_DROP;
+ return ip6_route_me_harder(state->net, skb) == 0 ? ret : NF_DROP;
return ret;
}
@@ -61,11 +61,11 @@ static const struct nf_chain_type nft_chain_route_ipv6 = {
.name = "route",
.type = NFT_CHAIN_T_ROUTE,
.family = NFPROTO_IPV6,
- .owner = THIS_MODULE,
+ .owner = THIS_MODULE,
.hook_mask = (1 << NF_INET_LOCAL_OUT),
.hooks = {
- [NF_INET_LOCAL_OUT] = nf_route_table_hook,
- },
+ [NF_INET_LOCAL_OUT] = nf_route_table_hook,
+ },
};
static int __init nft_chain_route_init(void)
diff --git a/kernel/net/ipv6/netfilter/nft_dup_ipv6.c b/kernel/net/ipv6/netfilter/nft_dup_ipv6.c
new file mode 100644
index 000000000..8bfd470cb
--- /dev/null
+++ b/kernel/net/ipv6/netfilter/nft_dup_ipv6.c
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2015 Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/ipv6/nf_dup_ipv6.h>
+
+struct nft_dup_ipv6 {
+ enum nft_registers sreg_addr:8;
+ enum nft_registers sreg_dev:8;
+};
+
+static void nft_dup_ipv6_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_dup_ipv6 *priv = nft_expr_priv(expr);
+ struct in6_addr *gw = (struct in6_addr *)&regs->data[priv->sreg_addr];
+ int oif = regs->data[priv->sreg_dev];
+
+ nf_dup_ipv6(pkt->net, pkt->skb, pkt->hook, gw, oif);
+}
+
+static int nft_dup_ipv6_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_dup_ipv6 *priv = nft_expr_priv(expr);
+ int err;
+
+ if (tb[NFTA_DUP_SREG_ADDR] == NULL)
+ return -EINVAL;
+
+ priv->sreg_addr = nft_parse_register(tb[NFTA_DUP_SREG_ADDR]);
+ err = nft_validate_register_load(priv->sreg_addr, sizeof(struct in6_addr));
+ if (err < 0)
+ return err;
+
+ if (tb[NFTA_DUP_SREG_DEV] != NULL) {
+ priv->sreg_dev = nft_parse_register(tb[NFTA_DUP_SREG_DEV]);
+ return nft_validate_register_load(priv->sreg_dev, sizeof(int));
+ }
+ return 0;
+}
+
+static int nft_dup_ipv6_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ struct nft_dup_ipv6 *priv = nft_expr_priv(expr);
+
+ if (nft_dump_register(skb, NFTA_DUP_SREG_ADDR, priv->sreg_addr) ||
+ nft_dump_register(skb, NFTA_DUP_SREG_DEV, priv->sreg_dev))
+ goto nla_put_failure;
+
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static struct nft_expr_type nft_dup_ipv6_type;
+static const struct nft_expr_ops nft_dup_ipv6_ops = {
+ .type = &nft_dup_ipv6_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_dup_ipv6)),
+ .eval = nft_dup_ipv6_eval,
+ .init = nft_dup_ipv6_init,
+ .dump = nft_dup_ipv6_dump,
+};
+
+static const struct nla_policy nft_dup_ipv6_policy[NFTA_DUP_MAX + 1] = {
+ [NFTA_DUP_SREG_ADDR] = { .type = NLA_U32 },
+ [NFTA_DUP_SREG_DEV] = { .type = NLA_U32 },
+};
+
+static struct nft_expr_type nft_dup_ipv6_type __read_mostly = {
+ .family = NFPROTO_IPV6,
+ .name = "dup",
+ .ops = &nft_dup_ipv6_ops,
+ .policy = nft_dup_ipv6_policy,
+ .maxattr = NFTA_DUP_MAX,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_dup_ipv6_module_init(void)
+{
+ return nft_register_expr(&nft_dup_ipv6_type);
+}
+
+static void __exit nft_dup_ipv6_module_exit(void)
+{
+ nft_unregister_expr(&nft_dup_ipv6_type);
+}
+
+module_init(nft_dup_ipv6_module_init);
+module_exit(nft_dup_ipv6_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "dup");
diff --git a/kernel/net/ipv6/netfilter/nft_redir_ipv6.c b/kernel/net/ipv6/netfilter/nft_redir_ipv6.c
index effd393bd..aca44e89a 100644
--- a/kernel/net/ipv6/netfilter/nft_redir_ipv6.c
+++ b/kernel/net/ipv6/netfilter/nft_redir_ipv6.c
@@ -35,8 +35,7 @@ static void nft_redir_ipv6_eval(const struct nft_expr *expr,
range.flags |= priv->flags;
- regs->verdict.code = nf_nat_redirect_ipv6(pkt->skb, &range,
- pkt->ops->hooknum);
+ regs->verdict.code = nf_nat_redirect_ipv6(pkt->skb, &range, pkt->hook);
}
static struct nft_expr_type nft_redir_ipv6_type;
diff --git a/kernel/net/ipv6/netfilter/nft_reject_ipv6.c b/kernel/net/ipv6/netfilter/nft_reject_ipv6.c
index d0d1540ec..533cd5719 100644
--- a/kernel/net/ipv6/netfilter/nft_reject_ipv6.c
+++ b/kernel/net/ipv6/netfilter/nft_reject_ipv6.c
@@ -24,15 +24,14 @@ static void nft_reject_ipv6_eval(const struct nft_expr *expr,
const struct nft_pktinfo *pkt)
{
struct nft_reject *priv = nft_expr_priv(expr);
- struct net *net = dev_net((pkt->in != NULL) ? pkt->in : pkt->out);
switch (priv->type) {
case NFT_REJECT_ICMP_UNREACH:
- nf_send_unreach6(net, pkt->skb, priv->icmp_code,
- pkt->ops->hooknum);
+ nf_send_unreach6(pkt->net, pkt->skb, priv->icmp_code,
+ pkt->hook);
break;
case NFT_REJECT_TCP_RST:
- nf_send_reset6(net, pkt->skb, pkt->ops->hooknum);
+ nf_send_reset6(pkt->net, pkt->skb, pkt->hook);
break;
default:
break;
diff --git a/kernel/net/ipv6/output_core.c b/kernel/net/ipv6/output_core.c
index 85892af57..462f2a76b 100644
--- a/kernel/net/ipv6/output_core.c
+++ b/kernel/net/ipv6/output_core.c
@@ -8,9 +8,11 @@
#include <net/ip6_fib.h>
#include <net/addrconf.h>
#include <net/secure_seq.h>
+#include <linux/netfilter.h>
static u32 __ipv6_select_ident(struct net *net, u32 hashrnd,
- struct in6_addr *dst, struct in6_addr *src)
+ const struct in6_addr *dst,
+ const struct in6_addr *src)
{
u32 hash, id;
@@ -60,17 +62,17 @@ void ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb)
}
EXPORT_SYMBOL_GPL(ipv6_proxy_select_ident);
-void ipv6_select_ident(struct net *net, struct frag_hdr *fhdr,
- struct rt6_info *rt)
+__be32 ipv6_select_ident(struct net *net,
+ const struct in6_addr *daddr,
+ const struct in6_addr *saddr)
{
static u32 ip6_idents_hashrnd __read_mostly;
u32 id;
net_get_random_once(&ip6_idents_hashrnd, sizeof(ip6_idents_hashrnd));
- id = __ipv6_select_ident(net, ip6_idents_hashrnd, &rt->rt6i_dst.addr,
- &rt->rt6i_src.addr);
- fhdr->identification = htonl(id);
+ id = __ipv6_select_ident(net, ip6_idents_hashrnd, daddr, saddr);
+ return htonl(id);
}
EXPORT_SYMBOL(ipv6_select_ident);
@@ -136,7 +138,7 @@ int ip6_dst_hoplimit(struct dst_entry *dst)
EXPORT_SYMBOL(ip6_dst_hoplimit);
#endif
-static int __ip6_local_out_sk(struct sock *sk, struct sk_buff *skb)
+int __ip6_local_out(struct net *net, struct sock *sk, struct sk_buff *skb)
{
int len;
@@ -146,30 +148,20 @@ static int __ip6_local_out_sk(struct sock *sk, struct sk_buff *skb)
ipv6_hdr(skb)->payload_len = htons(len);
IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr);
- return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, sk, skb,
- NULL, skb_dst(skb)->dev, dst_output_sk);
-}
-
-int __ip6_local_out(struct sk_buff *skb)
-{
- return __ip6_local_out_sk(skb->sk, skb);
+ return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
+ net, sk, skb, NULL, skb_dst(skb)->dev,
+ dst_output);
}
EXPORT_SYMBOL_GPL(__ip6_local_out);
-int ip6_local_out_sk(struct sock *sk, struct sk_buff *skb)
+int ip6_local_out(struct net *net, struct sock *sk, struct sk_buff *skb)
{
int err;
- err = __ip6_local_out_sk(sk, skb);
+ err = __ip6_local_out(net, sk, skb);
if (likely(err == 1))
- err = dst_output_sk(sk, skb);
+ err = dst_output(net, sk, skb);
return err;
}
-EXPORT_SYMBOL_GPL(ip6_local_out_sk);
-
-int ip6_local_out(struct sk_buff *skb)
-{
- return ip6_local_out_sk(skb->sk, skb);
-}
EXPORT_SYMBOL_GPL(ip6_local_out);
diff --git a/kernel/net/ipv6/raw.c b/kernel/net/ipv6/raw.c
index 8072bd413..99140986e 100644
--- a/kernel/net/ipv6/raw.c
+++ b/kernel/net/ipv6/raw.c
@@ -295,7 +295,8 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
* unspecified and mapped address have a v4 equivalent.
*/
v4addr = LOOPBACK4_IPV6;
- if (!(addr_type & IPV6_ADDR_MULTICAST)) {
+ if (!(addr_type & IPV6_ADDR_MULTICAST) &&
+ !sock_net(sk)->ipv6.sysctl.ip_nonlocal_bind) {
err = -EADDRNOTAVAIL;
if (!ipv6_chk_addr(sock_net(sk), &addr->sin6_addr,
dev, 0)) {
@@ -613,6 +614,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
unsigned int flags)
{
struct ipv6_pinfo *np = inet6_sk(sk);
+ struct net *net = sock_net(sk);
struct ipv6hdr *iph;
struct sk_buff *skb;
int err;
@@ -651,9 +653,9 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
if (err)
goto error_fault;
- IP6_UPD_PO_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
- err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, sk, skb,
- NULL, rt->dst.dev, dst_output_sk);
+ IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
+ err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb,
+ NULL, rt->dst.dev, dst_output);
if (err > 0)
err = net_xmit_errno(err);
if (err)
@@ -665,7 +667,7 @@ error_fault:
err = -EFAULT;
kfree_skb(skb);
error:
- IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
+ IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
if (err == -ENOBUFS && !np->recverr)
err = 0;
return err;
@@ -731,6 +733,7 @@ static int raw6_getfrag(void *from, char *to, int offset, int len, int odd,
static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
{
+ struct ipv6_txoptions *opt_to_free = NULL;
struct ipv6_txoptions opt_space;
DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name);
struct in6_addr *daddr, *final_p, final;
@@ -837,8 +840,10 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (!(opt->opt_nflen|opt->opt_flen))
opt = NULL;
}
- if (!opt)
- opt = np->opt;
+ if (!opt) {
+ opt = txopt_get(np);
+ opt_to_free = opt;
+ }
if (flowlabel)
opt = fl6_merge_options(&opt_space, flowlabel, opt);
opt = ipv6_fixup_options(&opt_space, opt);
@@ -865,6 +870,9 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
fl6.flowi6_oif = np->ucast_oif;
security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
+ if (inet->hdrincl)
+ fl6.flowi6_flags |= FLOWI_FLAG_KNOWN_NH;
+
dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
if (IS_ERR(dst)) {
err = PTR_ERR(dst);
@@ -901,6 +909,7 @@ done:
dst_release(dst);
out:
fl6_sock_release(flowlabel);
+ txopt_put(opt_to_free);
return err < 0 ? err : len;
do_confirm:
dst_confirm(dst);
@@ -1324,13 +1333,7 @@ static struct inet_protosw rawv6_protosw = {
int __init rawv6_init(void)
{
- int ret;
-
- ret = inet6_register_protosw(&rawv6_protosw);
- if (ret)
- goto out;
-out:
- return ret;
+ return inet6_register_protosw(&rawv6_protosw);
}
void rawv6_exit(void)
diff --git a/kernel/net/ipv6/reassembly.c b/kernel/net/ipv6/reassembly.c
index 8ffa2c8cc..45f5ae51d 100644
--- a/kernel/net/ipv6/reassembly.c
+++ b/kernel/net/ipv6/reassembly.c
@@ -108,7 +108,10 @@ bool ip6_frag_match(const struct inet_frag_queue *q, const void *a)
return fq->id == arg->id &&
fq->user == arg->user &&
ipv6_addr_equal(&fq->saddr, arg->src) &&
- ipv6_addr_equal(&fq->daddr, arg->dst);
+ ipv6_addr_equal(&fq->daddr, arg->dst) &&
+ (arg->iif == fq->iif ||
+ !(ipv6_addr_type(arg->dst) & (IPV6_ADDR_MULTICAST |
+ IPV6_ADDR_LINKLOCAL)));
}
EXPORT_SYMBOL(ip6_frag_match);
@@ -144,7 +147,7 @@ void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq,
IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
- if (fq->q.flags & INET_FRAG_EVICTED)
+ if (inet_frag_evicting(&fq->q))
goto out_rcu_unlock;
IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT);
@@ -180,7 +183,7 @@ static void ip6_frag_expire(unsigned long data)
static struct frag_queue *
fq_find(struct net *net, __be32 id, const struct in6_addr *src,
- const struct in6_addr *dst, u8 ecn)
+ const struct in6_addr *dst, int iif, u8 ecn)
{
struct inet_frag_queue *q;
struct ip6_create_arg arg;
@@ -190,6 +193,7 @@ fq_find(struct net *net, __be32 id, const struct in6_addr *src,
arg.user = IP6_DEFRAG_LOCAL_DELIVER;
arg.src = src;
arg.dst = dst;
+ arg.iif = iif;
arg.ecn = ecn;
hash = inet6_hash_frag(id, src, dst);
@@ -330,7 +334,7 @@ found:
fq->q.stamp = skb->tstamp;
fq->q.meat += skb->len;
fq->ecn |= ecn;
- add_frag_mem_limit(&fq->q, skb->truesize);
+ add_frag_mem_limit(fq->q.net, skb->truesize);
/* The first fragment.
* nhoffset is obtained from the first fragment, of course.
@@ -443,7 +447,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
head->len -= clone->len;
clone->csum = 0;
clone->ip_summed = head->ip_summed;
- add_frag_mem_limit(&fq->q, clone->truesize);
+ add_frag_mem_limit(fq->q.net, clone->truesize);
}
/* We have to remove fragment header from datagram and to relocate
@@ -481,7 +485,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
}
fp = next;
}
- sub_frag_mem_limit(&fq->q, sum_truesize);
+ sub_frag_mem_limit(fq->q.net, sum_truesize);
head->next = NULL;
head->dev = dev;
@@ -551,7 +555,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
}
fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr,
- ip6_frag_ecn(hdr));
+ skb->dev ? skb->dev->ifindex : 0, ip6_frag_ecn(hdr));
if (fq) {
int ret;
@@ -706,13 +710,19 @@ static void ip6_frags_sysctl_unregister(void)
static int __net_init ipv6_frags_init_net(struct net *net)
{
+ int res;
+
net->ipv6.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
net->ipv6.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
net->ipv6.frags.timeout = IPV6_FRAG_TIMEOUT;
- inet_frags_init_net(&net->ipv6.frags);
-
- return ip6_frags_ns_sysctl_register(net);
+ res = inet_frags_init_net(&net->ipv6.frags);
+ if (res)
+ return res;
+ res = ip6_frags_ns_sysctl_register(net);
+ if (res)
+ inet_frags_uninit_net(&net->ipv6.frags);
+ return res;
}
static void __net_exit ipv6_frags_exit_net(struct net *net)
diff --git a/kernel/net/ipv6/route.c b/kernel/net/ipv6/route.c
index f371fefa7..3f164d3aa 100644
--- a/kernel/net/ipv6/route.c
+++ b/kernel/net/ipv6/route.c
@@ -54,10 +54,14 @@
#include <net/tcp.h>
#include <linux/rtnetlink.h>
#include <net/dst.h>
+#include <net/dst_metadata.h>
#include <net/xfrm.h>
#include <net/netevent.h>
#include <net/netlink.h>
#include <net/nexthop.h>
+#include <net/lwtunnel.h>
+#include <net/ip_tunnels.h>
+#include <net/l3mdev.h>
#include <asm/uaccess.h>
@@ -72,8 +76,7 @@ enum rt6_nud_state {
RT6_NUD_SUCCEED = 1
};
-static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
- const struct in6_addr *dest);
+static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
static unsigned int ip6_default_advmss(const struct dst_entry *dst);
static unsigned int ip6_mtu(const struct dst_entry *dst);
@@ -84,14 +87,15 @@ static void ip6_dst_ifdown(struct dst_entry *,
static int ip6_dst_gc(struct dst_ops *ops);
static int ip6_pkt_discard(struct sk_buff *skb);
-static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb);
+static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
static int ip6_pkt_prohibit(struct sk_buff *skb);
-static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb);
+static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
static void ip6_link_failure(struct sk_buff *skb);
static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb, u32 mtu);
static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb);
+static void rt6_dst_from_metrics_check(struct rt6_info *rt);
static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
#ifdef CONFIG_IPV6_ROUTE_INFO
@@ -104,65 +108,83 @@ static struct rt6_info *rt6_get_route_info(struct net *net,
const struct in6_addr *gwaddr, int ifindex);
#endif
-static void rt6_bind_peer(struct rt6_info *rt, int create)
+struct uncached_list {
+ spinlock_t lock;
+ struct list_head head;
+};
+
+static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
+
+static void rt6_uncached_list_add(struct rt6_info *rt)
{
- struct inet_peer_base *base;
- struct inet_peer *peer;
+ struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
- base = inetpeer_base_ptr(rt->_rt6i_peer);
- if (!base)
- return;
+ rt->dst.flags |= DST_NOCACHE;
+ rt->rt6i_uncached_list = ul;
- peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
- if (peer) {
- if (!rt6_set_peer(rt, peer))
- inet_putpeer(peer);
- }
+ spin_lock_bh(&ul->lock);
+ list_add_tail(&rt->rt6i_uncached, &ul->head);
+ spin_unlock_bh(&ul->lock);
}
-static struct inet_peer *__rt6_get_peer(struct rt6_info *rt, int create)
+static void rt6_uncached_list_del(struct rt6_info *rt)
{
- if (rt6_has_peer(rt))
- return rt6_peer_ptr(rt);
+ if (!list_empty(&rt->rt6i_uncached)) {
+ struct uncached_list *ul = rt->rt6i_uncached_list;
- rt6_bind_peer(rt, create);
- return (rt6_has_peer(rt) ? rt6_peer_ptr(rt) : NULL);
+ spin_lock_bh(&ul->lock);
+ list_del(&rt->rt6i_uncached);
+ spin_unlock_bh(&ul->lock);
+ }
}
-static struct inet_peer *rt6_get_peer_create(struct rt6_info *rt)
+static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
{
- return __rt6_get_peer(rt, 1);
-}
+ struct net_device *loopback_dev = net->loopback_dev;
+ int cpu;
-static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
-{
- struct rt6_info *rt = (struct rt6_info *) dst;
- struct inet_peer *peer;
- u32 *p = NULL;
-
- if (!(rt->dst.flags & DST_HOST))
- return dst_cow_metrics_generic(dst, old);
+ if (dev == loopback_dev)
+ return;
- peer = rt6_get_peer_create(rt);
- if (peer) {
- u32 *old_p = __DST_METRICS_PTR(old);
- unsigned long prev, new;
+ for_each_possible_cpu(cpu) {
+ struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
+ struct rt6_info *rt;
- p = peer->metrics;
- if (inet_metrics_new(peer) ||
- (old & DST_METRICS_FORCE_OVERWRITE))
- memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
+ spin_lock_bh(&ul->lock);
+ list_for_each_entry(rt, &ul->head, rt6i_uncached) {
+ struct inet6_dev *rt_idev = rt->rt6i_idev;
+ struct net_device *rt_dev = rt->dst.dev;
- new = (unsigned long) p;
- prev = cmpxchg(&dst->_metrics, old, new);
+ if (rt_idev->dev == dev) {
+ rt->rt6i_idev = in6_dev_get(loopback_dev);
+ in6_dev_put(rt_idev);
+ }
- if (prev != old) {
- p = __DST_METRICS_PTR(prev);
- if (prev & DST_METRICS_READ_ONLY)
- p = NULL;
+ if (rt_dev == dev) {
+ rt->dst.dev = loopback_dev;
+ dev_hold(rt->dst.dev);
+ dev_put(rt_dev);
+ }
}
+ spin_unlock_bh(&ul->lock);
}
- return p;
+}
+
+static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
+{
+ return dst_metrics_write_ptr(rt->dst.from);
+}
+
+static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
+{
+ struct rt6_info *rt = (struct rt6_info *)dst;
+
+ if (rt->rt6i_flags & RTF_PCPU)
+ return rt6_pcpu_cow_metrics(rt);
+ else if (rt->rt6i_flags & RTF_CACHE)
+ return NULL;
+ else
+ return dst_cow_metrics_generic(dst, old);
}
static inline const void *choose_neigh_daddr(struct rt6_info *rt,
@@ -227,12 +249,6 @@ static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
{
}
-static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
- unsigned long old)
-{
- return NULL;
-}
-
static struct dst_ops ip6_dst_blackhole_ops = {
.family = AF_INET6,
.destroy = ip6_dst_destroy,
@@ -241,7 +257,7 @@ static struct dst_ops ip6_dst_blackhole_ops = {
.default_advmss = ip6_default_advmss,
.update_pmtu = ip6_rt_blackhole_update_pmtu,
.redirect = ip6_rt_blackhole_redirect,
- .cow_metrics = ip6_rt_blackhole_cow_metrics,
+ .cow_metrics = dst_cow_metrics_generic,
.neigh_lookup = ip6_neigh_lookup,
};
@@ -288,7 +304,7 @@ static const struct rt6_info ip6_blk_hole_entry_template = {
.obsolete = DST_OBSOLETE_FORCE_CHK,
.error = -EINVAL,
.input = dst_discard,
- .output = dst_discard_sk,
+ .output = dst_discard_out,
},
.rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
.rt6i_protocol = RTPROT_KERNEL,
@@ -298,34 +314,67 @@ static const struct rt6_info ip6_blk_hole_entry_template = {
#endif
+static void rt6_info_init(struct rt6_info *rt)
+{
+ struct dst_entry *dst = &rt->dst;
+
+ memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
+ INIT_LIST_HEAD(&rt->rt6i_siblings);
+ INIT_LIST_HEAD(&rt->rt6i_uncached);
+}
+
/* allocate dst with ip6_dst_ops */
-static inline struct rt6_info *ip6_dst_alloc(struct net *net,
- struct net_device *dev,
- int flags,
- struct fib6_table *table)
+static struct rt6_info *__ip6_dst_alloc(struct net *net,
+ struct net_device *dev,
+ int flags)
{
struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
0, DST_OBSOLETE_FORCE_CHK, flags);
+ if (rt)
+ rt6_info_init(rt);
+
+ return rt;
+}
+
+static struct rt6_info *ip6_dst_alloc(struct net *net,
+ struct net_device *dev,
+ int flags)
+{
+ struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
+
if (rt) {
- struct dst_entry *dst = &rt->dst;
+ rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
+ if (rt->rt6i_pcpu) {
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ struct rt6_info **p;
- memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
- rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
- INIT_LIST_HEAD(&rt->rt6i_siblings);
+ p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
+ /* no one shares rt */
+ *p = NULL;
+ }
+ } else {
+ dst_destroy((struct dst_entry *)rt);
+ return NULL;
+ }
}
+
return rt;
}
static void ip6_dst_destroy(struct dst_entry *dst)
{
struct rt6_info *rt = (struct rt6_info *)dst;
- struct inet6_dev *idev = rt->rt6i_idev;
struct dst_entry *from = dst->from;
+ struct inet6_dev *idev;
- if (!(rt->dst.flags & DST_HOST))
- dst_destroy_metrics_generic(dst);
+ dst_destroy_metrics_generic(dst);
+ free_percpu(rt->rt6i_pcpu);
+ rt6_uncached_list_del(rt);
+ idev = rt->rt6i_idev;
if (idev) {
rt->rt6i_idev = NULL;
in6_dev_put(idev);
@@ -333,11 +382,6 @@ static void ip6_dst_destroy(struct dst_entry *dst)
dst->from = NULL;
dst_release(from);
-
- if (rt6_has_peer(rt)) {
- struct inet_peer *peer = rt6_peer_ptr(rt);
- inet_putpeer(peer);
- }
}
static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
@@ -360,6 +404,14 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
}
}
+static bool __rt6_check_expired(const struct rt6_info *rt)
+{
+ if (rt->rt6i_flags & RTF_EXPIRES)
+ return time_after(jiffies, rt->dst.expires);
+ else
+ return false;
+}
+
static bool rt6_check_expired(const struct rt6_info *rt)
{
if (rt->rt6i_flags & RTF_EXPIRES) {
@@ -378,31 +430,7 @@ static bool rt6_check_expired(const struct rt6_info *rt)
static int rt6_info_hash_nhsfn(unsigned int candidate_count,
const struct flowi6 *fl6)
{
- unsigned int val = fl6->flowi6_proto;
-
- val ^= ipv6_addr_hash(&fl6->daddr);
- val ^= ipv6_addr_hash(&fl6->saddr);
-
- /* Work only if this not encapsulated */
- switch (fl6->flowi6_proto) {
- case IPPROTO_UDP:
- case IPPROTO_TCP:
- case IPPROTO_SCTP:
- val ^= (__force u16)fl6->fl6_sport;
- val ^= (__force u16)fl6->fl6_dport;
- break;
-
- case IPPROTO_ICMPV6:
- val ^= (__force u16)fl6->fl6_icmp_type;
- val ^= (__force u16)fl6->fl6_icmp_code;
- break;
- }
- /* RFC6438 recommands to use flowlabel */
- val ^= (__force u32)fl6->flowlabel;
-
- /* Perhaps, we need to tune, this function? */
- val = val ^ (val >> 7) ^ (val >> 12);
- return val % candidate_count;
+ return get_hash_from_flowi6(fl6) % candidate_count;
}
static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
@@ -455,10 +483,10 @@ static inline struct rt6_info *rt6_device_match(struct net *net,
if (dev->flags & IFF_LOOPBACK) {
if (!sprt->rt6i_idev ||
sprt->rt6i_idev->dev->ifindex != oif) {
- if (flags & RT6_LOOKUP_F_IFACE && oif)
+ if (flags & RT6_LOOKUP_F_IFACE)
continue;
- if (local && (!oif ||
- local->rt6i_idev->dev->ifindex == oif))
+ if (local &&
+ local->rt6i_idev->dev->ifindex == oif)
continue;
}
local = sprt;
@@ -495,13 +523,14 @@ static void rt6_probe_deferred(struct work_struct *w)
container_of(w, struct __rt6_probe_work, work);
addrconf_addr_solict_mult(&work->target, &mcaddr);
- ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL);
+ ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL);
dev_put(work->dev);
kfree(work);
}
static void rt6_probe(struct rt6_info *rt)
{
+ struct __rt6_probe_work *work;
struct neighbour *neigh;
/*
* Okay, this does not seem to be appropriate
@@ -516,34 +545,33 @@ static void rt6_probe(struct rt6_info *rt)
rcu_read_lock_bh();
neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
if (neigh) {
- write_lock(&neigh->lock);
if (neigh->nud_state & NUD_VALID)
goto out;
- }
-
- if (!neigh ||
- time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
- struct __rt6_probe_work *work;
+ work = NULL;
+ write_lock(&neigh->lock);
+ if (!(neigh->nud_state & NUD_VALID) &&
+ time_after(jiffies,
+ neigh->updated +
+ rt->rt6i_idev->cnf.rtr_probe_interval)) {
+ work = kmalloc(sizeof(*work), GFP_ATOMIC);
+ if (work)
+ __neigh_set_probe_once(neigh);
+ }
+ write_unlock(&neigh->lock);
+ } else {
work = kmalloc(sizeof(*work), GFP_ATOMIC);
+ }
- if (neigh && work)
- __neigh_set_probe_once(neigh);
-
- if (neigh)
- write_unlock(&neigh->lock);
+ if (work) {
+ INIT_WORK(&work->work, rt6_probe_deferred);
+ work->target = rt->rt6i_gateway;
+ dev_hold(rt->dst.dev);
+ work->dev = rt->dst.dev;
+ schedule_work(&work->work);
+ }
- if (work) {
- INIT_WORK(&work->work, rt6_probe_deferred);
- work->target = rt->rt6i_gateway;
- dev_hold(rt->dst.dev);
- work->dev = rt->dst.dev;
- schedule_work(&work->work);
- }
- } else {
out:
- write_unlock(&neigh->lock);
- }
rcu_read_unlock_bh();
}
#else
@@ -622,6 +650,12 @@ static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
{
int m;
bool match_do_rr = false;
+ struct inet6_dev *idev = rt->rt6i_idev;
+ struct net_device *dev = rt->dst.dev;
+
+ if (dev && !netif_carrier_ok(dev) &&
+ idev->cnf.ignore_routes_with_linkdown)
+ goto out;
if (rt6_check_expired(rt))
goto out;
@@ -652,15 +686,33 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
u32 metric, int oif, int strict,
bool *do_rr)
{
- struct rt6_info *rt, *match;
+ struct rt6_info *rt, *match, *cont;
int mpri = -1;
match = NULL;
- for (rt = rr_head; rt && rt->rt6i_metric == metric;
- rt = rt->dst.rt6_next)
+ cont = NULL;
+ for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
+ if (rt->rt6i_metric != metric) {
+ cont = rt;
+ break;
+ }
+
match = find_match(rt, oif, strict, &mpri, match, do_rr);
- for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
- rt = rt->dst.rt6_next)
+ }
+
+ for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
+ if (rt->rt6i_metric != metric) {
+ cont = rt;
+ break;
+ }
+
+ match = find_match(rt, oif, strict, &mpri, match, do_rr);
+ }
+
+ if (match || !cont)
+ return match;
+
+ for (rt = cont; rt; rt = rt->dst.rt6_next)
match = find_match(rt, oif, strict, &mpri, match, do_rr);
return match;
@@ -694,6 +746,11 @@ static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
return match ? match : net->ipv6.ip6_null_entry;
}
+static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
+{
+ return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
+}
+
#ifdef CONFIG_IPV6_ROUTE_INFO
int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
const struct in6_addr *gwaddr)
@@ -872,9 +929,9 @@ int ip6_ins_rt(struct rt6_info *rt)
return __ip6_ins_rt(rt, &info, &mxc);
}
-static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
- const struct in6_addr *daddr,
- const struct in6_addr *saddr)
+static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
+ const struct in6_addr *daddr,
+ const struct in6_addr *saddr)
{
struct rt6_info *rt;
@@ -882,15 +939,25 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
* Clone the route.
*/
- rt = ip6_rt_copy(ort, daddr);
+ if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
+ ort = (struct rt6_info *)ort->dst.from;
- if (rt) {
+ rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, 0);
+
+ if (!rt)
+ return NULL;
+
+ ip6_rt_copy_init(rt, ort);
+ rt->rt6i_flags |= RTF_CACHE;
+ rt->rt6i_metric = 0;
+ rt->dst.flags |= DST_HOST;
+ rt->rt6i_dst.addr = *daddr;
+ rt->rt6i_dst.plen = 128;
+
+ if (!rt6_is_gw_or_nonexthop(ort)) {
if (ort->rt6i_dst.plen != 128 &&
ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
rt->rt6i_flags |= RTF_ANYCAST;
-
- rt->rt6i_flags |= RTF_CACHE;
-
#ifdef CONFIG_IPV6_SUBTREES
if (rt->rt6i_src.plen && saddr) {
rt->rt6i_src.addr = *saddr;
@@ -902,35 +969,93 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
return rt;
}
-static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
- const struct in6_addr *daddr)
+static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
{
- struct rt6_info *rt = ip6_rt_copy(ort, daddr);
+ struct rt6_info *pcpu_rt;
- if (rt)
- rt->rt6i_flags |= RTF_CACHE;
- return rt;
+ pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev),
+ rt->dst.dev, rt->dst.flags);
+
+ if (!pcpu_rt)
+ return NULL;
+ ip6_rt_copy_init(pcpu_rt, rt);
+ pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
+ pcpu_rt->rt6i_flags |= RTF_PCPU;
+ return pcpu_rt;
+}
+
+/* It should be called with read_lock_bh(&tb6_lock) acquired */
+static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
+{
+ struct rt6_info *pcpu_rt, **p;
+
+ p = this_cpu_ptr(rt->rt6i_pcpu);
+ pcpu_rt = *p;
+
+ if (pcpu_rt) {
+ dst_hold(&pcpu_rt->dst);
+ rt6_dst_from_metrics_check(pcpu_rt);
+ }
+ return pcpu_rt;
+}
+
+static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
+{
+ struct fib6_table *table = rt->rt6i_table;
+ struct rt6_info *pcpu_rt, *prev, **p;
+
+ pcpu_rt = ip6_rt_pcpu_alloc(rt);
+ if (!pcpu_rt) {
+ struct net *net = dev_net(rt->dst.dev);
+
+ dst_hold(&net->ipv6.ip6_null_entry->dst);
+ return net->ipv6.ip6_null_entry;
+ }
+
+ read_lock_bh(&table->tb6_lock);
+ if (rt->rt6i_pcpu) {
+ p = this_cpu_ptr(rt->rt6i_pcpu);
+ prev = cmpxchg(p, NULL, pcpu_rt);
+ if (prev) {
+ /* If someone did it before us, return prev instead */
+ dst_destroy(&pcpu_rt->dst);
+ pcpu_rt = prev;
+ }
+ } else {
+ /* rt has been removed from the fib6 tree
+ * before we have a chance to acquire the read_lock.
+ * In this case, don't brother to create a pcpu rt
+ * since rt is going away anyway. The next
+ * dst_check() will trigger a re-lookup.
+ */
+ dst_destroy(&pcpu_rt->dst);
+ pcpu_rt = rt;
+ }
+ dst_hold(&pcpu_rt->dst);
+ rt6_dst_from_metrics_check(pcpu_rt);
+ read_unlock_bh(&table->tb6_lock);
+ return pcpu_rt;
}
static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
struct flowi6 *fl6, int flags)
{
struct fib6_node *fn, *saved_fn;
- struct rt6_info *rt, *nrt;
+ struct rt6_info *rt;
int strict = 0;
- int attempts = 3;
- int err;
strict |= flags & RT6_LOOKUP_F_IFACE;
if (net->ipv6.devconf_all->forwarding == 0)
strict |= RT6_LOOKUP_F_REACHABLE;
-redo_fib6_lookup_lock:
read_lock_bh(&table->tb6_lock);
fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
saved_fn = fn;
+ if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
+ oif = 0;
+
redo_rt6_select:
rt = rt6_select(fn, oif, strict);
if (rt->rt6i_nsiblings)
@@ -944,51 +1069,65 @@ redo_rt6_select:
strict &= ~RT6_LOOKUP_F_REACHABLE;
fn = saved_fn;
goto redo_rt6_select;
- } else {
- dst_hold(&rt->dst);
- read_unlock_bh(&table->tb6_lock);
- goto out2;
}
}
- dst_hold(&rt->dst);
- read_unlock_bh(&table->tb6_lock);
- if (rt->rt6i_flags & RTF_CACHE)
- goto out2;
+ if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
+ dst_use(&rt->dst, jiffies);
+ read_unlock_bh(&table->tb6_lock);
- if (!(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY)))
- nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
- else if (!(rt->dst.flags & DST_HOST))
- nrt = rt6_alloc_clone(rt, &fl6->daddr);
- else
- goto out2;
+ rt6_dst_from_metrics_check(rt);
+ return rt;
+ } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
+ !(rt->rt6i_flags & RTF_GATEWAY))) {
+ /* Create a RTF_CACHE clone which will not be
+ * owned by the fib6 tree. It is for the special case where
+ * the daddr in the skb during the neighbor look-up is different
+ * from the fl6->daddr used to look-up route here.
+ */
- ip6_rt_put(rt);
- rt = nrt ? : net->ipv6.ip6_null_entry;
+ struct rt6_info *uncached_rt;
- dst_hold(&rt->dst);
- if (nrt) {
- err = ip6_ins_rt(nrt);
- if (!err)
- goto out2;
- }
+ dst_use(&rt->dst, jiffies);
+ read_unlock_bh(&table->tb6_lock);
- if (--attempts <= 0)
- goto out2;
+ uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
+ dst_release(&rt->dst);
- /*
- * Race condition! In the gap, when table->tb6_lock was
- * released someone could insert this route. Relookup.
- */
- ip6_rt_put(rt);
- goto redo_fib6_lookup_lock;
+ if (uncached_rt)
+ rt6_uncached_list_add(uncached_rt);
+ else
+ uncached_rt = net->ipv6.ip6_null_entry;
-out2:
- rt->dst.lastuse = jiffies;
- rt->dst.__use++;
+ dst_hold(&uncached_rt->dst);
+ return uncached_rt;
- return rt;
+ } else {
+ /* Get a percpu copy */
+
+ struct rt6_info *pcpu_rt;
+
+ rt->dst.lastuse = jiffies;
+ rt->dst.__use++;
+ pcpu_rt = rt6_get_pcpu_route(rt);
+
+ if (pcpu_rt) {
+ read_unlock_bh(&table->tb6_lock);
+ } else {
+ /* We have to do the read_unlock first
+ * because rt6_make_pcpu_route() may trigger
+ * ip6_dst_gc() which will take the write_lock.
+ */
+ dst_hold(&rt->dst);
+ read_unlock_bh(&table->tb6_lock);
+ pcpu_rt = rt6_make_pcpu_route(rt);
+ dst_release(&rt->dst);
+ }
+
+ return pcpu_rt;
+
+ }
}
static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
@@ -1012,8 +1151,9 @@ void ip6_route_input(struct sk_buff *skb)
const struct ipv6hdr *iph = ipv6_hdr(skb);
struct net *net = dev_net(skb->dev);
int flags = RT6_LOOKUP_F_HAS_SADDR;
+ struct ip_tunnel_info *tun_info;
struct flowi6 fl6 = {
- .flowi6_iif = skb->dev->ifindex,
+ .flowi6_iif = l3mdev_fib_oif(skb->dev),
.daddr = iph->daddr,
.saddr = iph->saddr,
.flowlabel = ip6_flowinfo(iph),
@@ -1021,6 +1161,10 @@ void ip6_route_input(struct sk_buff *skb)
.flowi6_proto = iph->nexthdr,
};
+ tun_info = skb_tunnel_info(skb);
+ if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
+ fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
+ skb_dst_drop(skb);
skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
}
@@ -1030,24 +1174,31 @@ static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table
return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
}
-struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
- struct flowi6 *fl6)
+struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
+ struct flowi6 *fl6, int flags)
{
- int flags = 0;
+ struct dst_entry *dst;
+ bool any_src;
+
+ dst = l3mdev_rt6_dst_by_oif(net, fl6);
+ if (dst)
+ return dst;
fl6->flowi6_iif = LOOPBACK_IFINDEX;
- if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
+ any_src = ipv6_addr_any(&fl6->saddr);
+ if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
+ (fl6->flowi6_oif && any_src))
flags |= RT6_LOOKUP_F_IFACE;
- if (!ipv6_addr_any(&fl6->saddr))
+ if (!any_src)
flags |= RT6_LOOKUP_F_HAS_SADDR;
else if (sk)
flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
}
-EXPORT_SYMBOL(ip6_route_output);
+EXPORT_SYMBOL_GPL(ip6_route_output_flags);
struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
{
@@ -1056,25 +1207,20 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori
rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
if (rt) {
- new = &rt->dst;
-
- memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
- rt6_init_peer(rt, net->ipv6.peers);
+ rt6_info_init(rt);
+ new = &rt->dst;
new->__use = 1;
new->input = dst_discard;
- new->output = dst_discard_sk;
+ new->output = dst_discard_out;
- if (dst_metrics_read_only(&ort->dst))
- new->_metrics = ort->dst._metrics;
- else
- dst_copy_metrics(new, &ort->dst);
+ dst_copy_metrics(new, &ort->dst);
rt->rt6i_idev = ort->rt6i_idev;
if (rt->rt6i_idev)
in6_dev_hold(rt->rt6i_idev);
rt->rt6i_gateway = ort->rt6i_gateway;
- rt->rt6i_flags = ort->rt6i_flags;
+ rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
rt->rt6i_metric = 0;
memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
@@ -1093,6 +1239,34 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori
* Destination cache support functions
*/
+static void rt6_dst_from_metrics_check(struct rt6_info *rt)
+{
+ if (rt->dst.from &&
+ dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
+ dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
+}
+
+static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
+{
+ if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
+ return NULL;
+
+ if (rt6_check_expired(rt))
+ return NULL;
+
+ return &rt->dst;
+}
+
+static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
+{
+ if (!__rt6_check_expired(rt) &&
+ rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
+ rt6_check((struct rt6_info *)(rt->dst.from), cookie))
+ return &rt->dst;
+ else
+ return NULL;
+}
+
static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
{
struct rt6_info *rt;
@@ -1103,13 +1277,14 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
* DST_OBSOLETE_FORCE_CHK which forces validation calls down
* into this function always.
*/
- if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
- return NULL;
- if (rt6_check_expired(rt))
- return NULL;
+ rt6_dst_from_metrics_check(rt);
- return dst;
+ if (rt->rt6i_flags & RTF_PCPU ||
+ (unlikely(dst->flags & DST_NOCACHE) && rt->dst.from))
+ return rt6_dst_from_check(rt, cookie);
+ else
+ return rt6_check(rt, cookie);
}
static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
@@ -1140,32 +1315,76 @@ static void ip6_link_failure(struct sk_buff *skb)
if (rt) {
if (rt->rt6i_flags & RTF_CACHE) {
dst_hold(&rt->dst);
- if (ip6_del_rt(rt))
- dst_free(&rt->dst);
+ ip6_del_rt(rt);
} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
rt->rt6i_node->fn_sernum = -1;
}
}
}
-static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
- struct sk_buff *skb, u32 mtu)
+static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
+{
+ struct net *net = dev_net(rt->dst.dev);
+
+ rt->rt6i_flags |= RTF_MODIFIED;
+ rt->rt6i_pmtu = mtu;
+ rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
+}
+
+static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
+{
+ return !(rt->rt6i_flags & RTF_CACHE) &&
+ (rt->rt6i_flags & RTF_PCPU || rt->rt6i_node);
+}
+
+static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
+ const struct ipv6hdr *iph, u32 mtu)
{
struct rt6_info *rt6 = (struct rt6_info *)dst;
- dst_confirm(dst);
- if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
- struct net *net = dev_net(dst->dev);
+ if (rt6->rt6i_flags & RTF_LOCAL)
+ return;
- rt6->rt6i_flags |= RTF_MODIFIED;
- if (mtu < IPV6_MIN_MTU)
- mtu = IPV6_MIN_MTU;
+ dst_confirm(dst);
+ mtu = max_t(u32, mtu, IPV6_MIN_MTU);
+ if (mtu >= dst_mtu(dst))
+ return;
- dst_metric_set(dst, RTAX_MTU, mtu);
- rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
+ if (!rt6_cache_allowed_for_pmtu(rt6)) {
+ rt6_do_update_pmtu(rt6, mtu);
+ } else {
+ const struct in6_addr *daddr, *saddr;
+ struct rt6_info *nrt6;
+
+ if (iph) {
+ daddr = &iph->daddr;
+ saddr = &iph->saddr;
+ } else if (sk) {
+ daddr = &sk->sk_v6_daddr;
+ saddr = &inet6_sk(sk)->saddr;
+ } else {
+ return;
+ }
+ nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
+ if (nrt6) {
+ rt6_do_update_pmtu(nrt6, mtu);
+
+ /* ip6_ins_rt(nrt6) will bump the
+ * rt6->rt6i_node->fn_sernum
+ * which will fail the next rt6_check() and
+ * invalidate the sk->sk_dst_cache.
+ */
+ ip6_ins_rt(nrt6);
+ }
}
}
+static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
+ struct sk_buff *skb, u32 mtu)
+{
+ __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
+}
+
void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
int oif, u32 mark)
{
@@ -1182,7 +1401,7 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
dst = ip6_route_output(net, NULL, &fl6);
if (!dst->error)
- ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
+ __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
dst_release(dst);
}
EXPORT_SYMBOL_GPL(ip6_update_pmtu);
@@ -1341,12 +1560,17 @@ static unsigned int ip6_default_advmss(const struct dst_entry *dst)
static unsigned int ip6_mtu(const struct dst_entry *dst)
{
+ const struct rt6_info *rt = (const struct rt6_info *)dst;
+ unsigned int mtu = rt->rt6i_pmtu;
struct inet6_dev *idev;
- unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
if (mtu)
goto out;
+ mtu = dst_metric_raw(dst, RTAX_MTU);
+ if (mtu)
+ goto out;
+
mtu = IPV6_MIN_MTU;
rcu_read_lock();
@@ -1373,7 +1597,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
if (unlikely(!idev))
return ERR_PTR(-ENODEV);
- rt = ip6_dst_alloc(net, dev, 0, NULL);
+ rt = ip6_dst_alloc(net, dev, 0);
if (unlikely(!rt)) {
in6_dev_put(idev);
dst = ERR_PTR(-ENOMEM);
@@ -1472,6 +1696,7 @@ out:
static int ip6_convert_metrics(struct mx6_config *mxc,
const struct fib6_config *cfg)
{
+ bool ecn_ca = false;
struct nlattr *nla;
int remaining;
u32 *mp;
@@ -1485,51 +1710,57 @@ static int ip6_convert_metrics(struct mx6_config *mxc,
nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
int type = nla_type(nla);
+ u32 val;
+
+ if (!type)
+ continue;
+ if (unlikely(type > RTAX_MAX))
+ goto err;
- if (type) {
- u32 val;
+ if (type == RTAX_CC_ALGO) {
+ char tmp[TCP_CA_NAME_MAX];
- if (unlikely(type > RTAX_MAX))
+ nla_strlcpy(tmp, nla, sizeof(tmp));
+ val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
+ if (val == TCP_CA_UNSPEC)
goto err;
- if (type == RTAX_CC_ALGO) {
- char tmp[TCP_CA_NAME_MAX];
+ } else {
+ val = nla_get_u32(nla);
+ }
+ if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
+ goto err;
- nla_strlcpy(tmp, nla, sizeof(tmp));
- val = tcp_ca_get_key_by_name(tmp);
- if (val == TCP_CA_UNSPEC)
- goto err;
- } else {
- val = nla_get_u32(nla);
- }
+ mp[type - 1] = val;
+ __set_bit(type - 1, mxc->mx_valid);
+ }
- mp[type - 1] = val;
- __set_bit(type - 1, mxc->mx_valid);
- }
+ if (ecn_ca) {
+ __set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
+ mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
}
mxc->mx = mp;
-
return 0;
err:
kfree(mp);
return -EINVAL;
}
-int ip6_route_info_create(struct fib6_config *cfg, struct rt6_info **rt_ret)
+static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
{
- int err;
struct net *net = cfg->fc_nlinfo.nl_net;
struct rt6_info *rt = NULL;
struct net_device *dev = NULL;
struct inet6_dev *idev = NULL;
struct fib6_table *table;
int addr_type;
+ int err = -EINVAL;
if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
- return -EINVAL;
+ goto out;
#ifndef CONFIG_IPV6_SUBTREES
if (cfg->fc_src_len)
- return -EINVAL;
+ goto out;
#endif
if (cfg->fc_ifindex) {
err = -ENODEV;
@@ -1559,7 +1790,8 @@ int ip6_route_info_create(struct fib6_config *cfg, struct rt6_info **rt_ret)
if (!table)
goto out;
- rt = ip6_dst_alloc(net, NULL, (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT, table);
+ rt = ip6_dst_alloc(net, NULL,
+ (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
if (!rt) {
err = -ENOMEM;
@@ -1587,12 +1819,29 @@ int ip6_route_info_create(struct fib6_config *cfg, struct rt6_info **rt_ret)
rt->dst.output = ip6_output;
+ if (cfg->fc_encap) {
+ struct lwtunnel_state *lwtstate;
+
+ err = lwtunnel_build_state(dev, cfg->fc_encap_type,
+ cfg->fc_encap, AF_INET6, cfg,
+ &lwtstate);
+ if (err)
+ goto out;
+ rt->dst.lwtstate = lwtstate_get(lwtstate);
+ if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
+ rt->dst.lwtstate->orig_output = rt->dst.output;
+ rt->dst.output = lwtunnel_output;
+ }
+ if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
+ rt->dst.lwtstate->orig_input = rt->dst.input;
+ rt->dst.input = lwtunnel_input;
+ }
+ }
+
ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
rt->rt6i_dst.plen = cfg->fc_dst_len;
- if (rt->rt6i_dst.plen == 128) {
+ if (rt->rt6i_dst.plen == 128)
rt->dst.flags |= DST_HOST;
- dst_metrics_set_force_overwrite(&rt->dst);
- }
#ifdef CONFIG_IPV6_SUBTREES
ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
@@ -1626,7 +1875,7 @@ int ip6_route_info_create(struct fib6_config *cfg, struct rt6_info **rt_ret)
switch (cfg->fc_type) {
case RTN_BLACKHOLE:
rt->dst.error = -EINVAL;
- rt->dst.output = dst_discard_sk;
+ rt->dst.output = dst_discard_out;
rt->dst.input = dst_discard;
break;
case RTN_PROHIBIT:
@@ -1635,9 +1884,11 @@ int ip6_route_info_create(struct fib6_config *cfg, struct rt6_info **rt_ret)
rt->dst.input = ip6_pkt_prohibit;
break;
case RTN_THROW:
+ case RTN_UNREACHABLE:
default:
rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
- : -ENETUNREACH;
+ : (cfg->fc_type == RTN_UNREACHABLE)
+ ? -EHOSTUNREACH : -ENETUNREACH;
rt->dst.output = ip6_pkt_discard_out;
rt->dst.input = ip6_pkt_discard;
break;
@@ -1650,9 +1901,21 @@ int ip6_route_info_create(struct fib6_config *cfg, struct rt6_info **rt_ret)
int gwa_type;
gw_addr = &cfg->fc_gateway;
- rt->rt6i_gateway = *gw_addr;
gwa_type = ipv6_addr_type(gw_addr);
+ /* if gw_addr is local we will fail to detect this in case
+ * address is still TENTATIVE (DAD in progress). rt6_lookup()
+ * will return already-added prefix route via interface that
+ * prefix route was assigned to, which might be non-loopback.
+ */
+ err = -EINVAL;
+ if (ipv6_chk_addr_and_flags(net, gw_addr,
+ gwa_type & IPV6_ADDR_LINKLOCAL ?
+ dev : NULL, 0, 0))
+ goto out;
+
+ rt->rt6i_gateway = *gw_addr;
+
if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
struct rt6_info *grt;
@@ -1663,7 +1926,6 @@ int ip6_route_info_create(struct fib6_config *cfg, struct rt6_info **rt_ret)
(SIT, PtP, NBMA NOARP links) it is handy to allow
some exceptions. --ANK
*/
- err = -EINVAL;
if (!(gwa_type & IPV6_ADDR_UNICAST))
goto out;
@@ -1718,9 +1980,7 @@ install_route:
cfg->fc_nlinfo.nl_net = dev_net(dev);
- *rt_ret = rt;
-
- return 0;
+ return rt;
out:
if (dev)
dev_put(dev);
@@ -1729,20 +1989,21 @@ out:
if (rt)
dst_free(&rt->dst);
- *rt_ret = NULL;
-
- return err;
+ return ERR_PTR(err);
}
int ip6_route_add(struct fib6_config *cfg)
{
struct mx6_config mxc = { .mx = NULL, };
- struct rt6_info *rt = NULL;
+ struct rt6_info *rt;
int err;
- err = ip6_route_info_create(cfg, &rt);
- if (err)
+ rt = ip6_route_info_create(cfg);
+ if (IS_ERR(rt)) {
+ err = PTR_ERR(rt);
+ rt = NULL;
goto out;
+ }
err = ip6_convert_metrics(&mxc, cfg);
if (err)
@@ -1766,7 +2027,8 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
struct fib6_table *table;
struct net *net = dev_net(rt->dst.dev);
- if (rt == net->ipv6.ip6_null_entry) {
+ if (rt == net->ipv6.ip6_null_entry ||
+ rt->dst.flags & DST_NOCACHE) {
err = -ENOENT;
goto out;
}
@@ -1808,6 +2070,9 @@ static int ip6_route_del(struct fib6_config *cfg)
if (fn) {
for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
+ if ((rt->rt6i_flags & RTF_CACHE) &&
+ !(cfg->fc_flags & RTF_CACHE))
+ continue;
if (cfg->fc_ifindex &&
(!rt->dst.dev ||
rt->dst.dev->ifindex != cfg->fc_ifindex))
@@ -1830,7 +2095,6 @@ static int ip6_route_del(struct fib6_config *cfg)
static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
{
- struct net *net = dev_net(skb->dev);
struct netevent_redirect netevent;
struct rt6_info *rt, *nrt = NULL;
struct ndisc_options ndopts;
@@ -1891,7 +2155,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
}
rt = (struct rt6_info *) dst;
- if (rt == net->ipv6.ip6_null_entry) {
+ if (rt->rt6i_flags & RTF_REJECT) {
net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
return;
}
@@ -1917,7 +2181,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
NEIGH_UPDATE_F_ISROUTER))
);
- nrt = ip6_rt_copy(rt, &msg->dest);
+ nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
if (!nrt)
goto out;
@@ -1949,42 +2213,36 @@ out:
* Misc support functions
*/
-static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
- const struct in6_addr *dest)
+static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
{
- struct net *net = dev_net(ort->dst.dev);
- struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
- ort->rt6i_table);
+ BUG_ON(from->dst.from);
- if (rt) {
- rt->dst.input = ort->dst.input;
- rt->dst.output = ort->dst.output;
- rt->dst.flags |= DST_HOST;
-
- rt->rt6i_dst.addr = *dest;
- rt->rt6i_dst.plen = 128;
- dst_copy_metrics(&rt->dst, &ort->dst);
- rt->dst.error = ort->dst.error;
- rt->rt6i_idev = ort->rt6i_idev;
- if (rt->rt6i_idev)
- in6_dev_hold(rt->rt6i_idev);
- rt->dst.lastuse = jiffies;
-
- if (ort->rt6i_flags & RTF_GATEWAY)
- rt->rt6i_gateway = ort->rt6i_gateway;
- else
- rt->rt6i_gateway = *dest;
- rt->rt6i_flags = ort->rt6i_flags;
- rt6_set_from(rt, ort);
- rt->rt6i_metric = 0;
+ rt->rt6i_flags &= ~RTF_EXPIRES;
+ dst_hold(&from->dst);
+ rt->dst.from = &from->dst;
+ dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
+}
+static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
+{
+ rt->dst.input = ort->dst.input;
+ rt->dst.output = ort->dst.output;
+ rt->rt6i_dst = ort->rt6i_dst;
+ rt->dst.error = ort->dst.error;
+ rt->rt6i_idev = ort->rt6i_idev;
+ if (rt->rt6i_idev)
+ in6_dev_hold(rt->rt6i_idev);
+ rt->dst.lastuse = jiffies;
+ rt->rt6i_gateway = ort->rt6i_gateway;
+ rt->rt6i_flags = ort->rt6i_flags;
+ rt6_set_from(rt, ort);
+ rt->rt6i_metric = ort->rt6i_metric;
#ifdef CONFIG_IPV6_SUBTREES
- memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
+ rt->rt6i_src = ort->rt6i_src;
#endif
- memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
- rt->rt6i_table = ort->rt6i_table;
- }
- return rt;
+ rt->rt6i_prefsrc = ort->rt6i_prefsrc;
+ rt->rt6i_table = ort->rt6i_table;
+ rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
}
#ifdef CONFIG_IPV6_ROUTE_INFO
@@ -2026,7 +2284,6 @@ static struct rt6_info *rt6_add_route_info(struct net *net,
unsigned int pref)
{
struct fib6_config cfg = {
- .fc_table = RT6_TABLE_INFO,
.fc_metric = IP6_RT_PRIO_USER,
.fc_ifindex = ifindex,
.fc_dst_len = prefixlen,
@@ -2037,6 +2294,7 @@ static struct rt6_info *rt6_add_route_info(struct net *net,
.fc_nlinfo.nl_net = net,
};
+ cfg.fc_table = l3mdev_fib_table_by_index(net, ifindex) ? : RT6_TABLE_INFO;
cfg.fc_dst = *prefix;
cfg.fc_gateway = *gwaddr;
@@ -2077,7 +2335,7 @@ struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
unsigned int pref)
{
struct fib6_config cfg = {
- .fc_table = RT6_TABLE_DFLT,
+ .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
.fc_metric = IP6_RT_PRIO_USER,
.fc_ifindex = dev->ifindex,
.fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
@@ -2124,7 +2382,8 @@ static void rtmsg_to_fib6_config(struct net *net,
{
memset(cfg, 0, sizeof(*cfg));
- cfg->fc_table = RT6_TABLE_MAIN;
+ cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
+ : RT6_TABLE_MAIN;
cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
cfg->fc_metric = rtmsg->rtmsg_metric;
cfg->fc_expires = rtmsg->rtmsg_info;
@@ -2208,7 +2467,7 @@ static int ip6_pkt_discard(struct sk_buff *skb)
return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
}
-static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb)
+static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
{
skb->dev = skb_dst(skb)->dev;
return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
@@ -2219,7 +2478,7 @@ static int ip6_pkt_prohibit(struct sk_buff *skb)
return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
}
-static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb)
+static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
{
skb->dev = skb_dst(skb)->dev;
return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
@@ -2233,9 +2492,10 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
const struct in6_addr *addr,
bool anycast)
{
+ u32 tb_id;
struct net *net = dev_net(idev->dev);
struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
- DST_NOCOUNT, NULL);
+ DST_NOCOUNT);
if (!rt)
return ERR_PTR(-ENOMEM);
@@ -2255,7 +2515,9 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
rt->rt6i_gateway = *addr;
rt->rt6i_dst.addr = *addr;
rt->rt6i_dst.plen = 128;
- rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
+ tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
+ rt->rt6i_table = fib6_get_table(net, tb_id);
+ rt->dst.flags |= DST_NOCACHE;
atomic_set(&rt->dst.__refcnt, 1);
@@ -2359,6 +2621,8 @@ void rt6_ifdown(struct net *net, struct net_device *dev)
fib6_clean_all(net, fib6_ifdown, &adn);
icmp6_clean_all(fib6_ifdown, &adn);
+ if (dev)
+ rt6_uncached_list_flush_dev(net, dev);
}
struct rt6_mtu_change_arg {
@@ -2396,11 +2660,20 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
PMTU discouvery.
*/
if (rt->dst.dev == arg->dev &&
- !dst_metric_locked(&rt->dst, RTAX_MTU) &&
- (dst_mtu(&rt->dst) >= arg->mtu ||
- (dst_mtu(&rt->dst) < arg->mtu &&
- dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
- dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
+ !dst_metric_locked(&rt->dst, RTAX_MTU)) {
+ if (rt->rt6i_flags & RTF_CACHE) {
+ /* For RTF_CACHE with rt6i_pmtu == 0
+ * (i.e. a redirected route),
+ * the metrics of its rt->dst.from has already
+ * been updated.
+ */
+ if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
+ rt->rt6i_pmtu = arg->mtu;
+ } else if (dst_mtu(&rt->dst) >= arg->mtu ||
+ (dst_mtu(&rt->dst) < arg->mtu &&
+ dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
+ dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
+ }
}
return 0;
}
@@ -2423,6 +2696,8 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
[RTA_METRICS] = { .type = NLA_NESTED },
[RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
[RTA_PREF] = { .type = NLA_U8 },
+ [RTA_ENCAP_TYPE] = { .type = NLA_U16 },
+ [RTA_ENCAP] = { .type = NLA_NESTED },
};
static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
@@ -2457,6 +2732,9 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
if (rtm->rtm_type == RTN_LOCAL)
cfg->fc_flags |= RTF_LOCAL;
+ if (rtm->rtm_flags & RTM_F_CLONED)
+ cfg->fc_flags |= RTF_CACHE;
+
cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
cfg->fc_nlinfo.nlh = nlh;
cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
@@ -2514,6 +2792,12 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
cfg->fc_flags |= RTF_PREF(pref);
}
+ if (tb[RTA_ENCAP])
+ cfg->fc_encap = tb[RTA_ENCAP];
+
+ if (tb[RTA_ENCAP_TYPE])
+ cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
+
err = 0;
errout:
return err;
@@ -2605,11 +2889,18 @@ static int ip6_route_multipath_add(struct fib6_config *cfg)
r_cfg.fc_gateway = nla_get_in6_addr(nla);
r_cfg.fc_flags |= RTF_GATEWAY;
}
+ r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
+ nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
+ if (nla)
+ r_cfg.fc_encap_type = nla_get_u16(nla);
}
- err = ip6_route_info_create(&r_cfg, &rt);
- if (err)
+ rt = ip6_route_info_create(&r_cfg);
+ if (IS_ERR(rt)) {
+ err = PTR_ERR(rt);
+ rt = NULL;
goto cleanup;
+ }
err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
if (err) {
@@ -2658,8 +2949,7 @@ cleanup:
list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
if (nh->rt6_info)
dst_free(&nh->rt6_info->dst);
- if (nh->mxc.mx)
- kfree(nh->mxc.mx);
+ kfree(nh->mxc.mx);
list_del(&nh->next);
kfree(nh);
}
@@ -2734,7 +3024,7 @@ static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
return ip6_route_add(&cfg);
}
-static inline size_t rt6_nlmsg_size(void)
+static inline size_t rt6_nlmsg_size(struct rt6_info *rt)
{
return NLMSG_ALIGN(sizeof(struct rtmsg))
+ nla_total_size(16) /* RTA_SRC */
@@ -2748,7 +3038,8 @@ static inline size_t rt6_nlmsg_size(void)
+ RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
+ nla_total_size(sizeof(struct rta_cacheinfo))
+ nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
- + nla_total_size(1); /* RTA_PREF */
+ + nla_total_size(1) /* RTA_PREF */
+ + lwtunnel_get_encap_size(rt->dst.lwtstate);
}
static int rt6_fill_node(struct net *net,
@@ -2757,6 +3048,7 @@ static int rt6_fill_node(struct net *net,
int iif, int type, u32 portid, u32 seq,
int prefix, int nowait, unsigned int flags)
{
+ u32 metrics[RTAX_MAX];
struct rtmsg *rtm;
struct nlmsghdr *nlh;
long expires;
@@ -2808,6 +3100,11 @@ static int rt6_fill_node(struct net *net,
else
rtm->rtm_type = RTN_UNICAST;
rtm->rtm_flags = 0;
+ if (!netif_carrier_ok(rt->dst.dev)) {
+ rtm->rtm_flags |= RTNH_F_LINKDOWN;
+ if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
+ rtm->rtm_flags |= RTNH_F_DEAD;
+ }
rtm->rtm_scope = RT_SCOPE_UNIVERSE;
rtm->rtm_protocol = rt->rt6i_protocol;
if (rt->rt6i_flags & RTF_DYNAMIC)
@@ -2870,7 +3167,10 @@ static int rt6_fill_node(struct net *net,
goto nla_put_failure;
}
- if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
+ memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
+ if (rt->rt6i_pmtu)
+ metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
+ if (rtnetlink_put_metrics(skb, metrics) < 0)
goto nla_put_failure;
if (rt->rt6i_flags & RTF_GATEWAY) {
@@ -2892,6 +3192,8 @@ static int rt6_fill_node(struct net *net,
if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
goto nla_put_failure;
+ lwtunnel_fill_encap(skb, rt->dst.lwtstate);
+
nlmsg_end(skb, nlh);
return 0;
@@ -2977,6 +3279,11 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
} else {
fl6.flowi6_oif = oif;
+ if (netif_index_is_l3_master(net, oif)) {
+ fl6.flowi6_flags = FLOWI_FLAG_L3MDEV_SRC |
+ FLOWI_FLAG_SKIP_NH_OIF;
+ }
+
rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
}
@@ -3008,7 +3315,8 @@ errout:
return err;
}
-void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
+void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
+ unsigned int nlm_flags)
{
struct sk_buff *skb;
struct net *net = info->nl_net;
@@ -3018,12 +3326,12 @@ void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
err = -ENOBUFS;
seq = info->nlh ? info->nlh->nlmsg_seq : 0;
- skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
+ skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
if (!skb)
goto errout;
err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
- event, info->portid, seq, 0, 0, 0);
+ event, info->portid, seq, 0, 0, nlm_flags);
if (err < 0) {
/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
WARN_ON(err == -EMSGSIZE);
@@ -3365,6 +3673,7 @@ static struct notifier_block ip6_route_dev_notifier = {
int __init ip6_route_init(void)
{
int ret;
+ int cpu;
ret = -ENOMEM;
ip6_dst_ops_template.kmem_cachep =
@@ -3424,6 +3733,13 @@ int __init ip6_route_init(void)
if (ret)
goto out_register_late_subsys;
+ for_each_possible_cpu(cpu) {
+ struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
+
+ INIT_LIST_HEAD(&ul->head);
+ spin_lock_init(&ul->lock);
+ }
+
out:
return ret;
diff --git a/kernel/net/ipv6/sit.c b/kernel/net/ipv6/sit.c
index ac35a2859..dcccae861 100644
--- a/kernel/net/ipv6/sit.c
+++ b/kernel/net/ipv6/sit.c
@@ -742,7 +742,7 @@ static int ipip_rcv(struct sk_buff *skb)
goto drop;
if (iptunnel_pull_header(skb, 0, tpi.proto))
goto drop;
- return ip_tunnel_rcv(tunnel, skb, &tpi, log_ecn_error);
+ return ip_tunnel_rcv(tunnel, skb, &tpi, NULL, log_ecn_error);
}
return 1;
@@ -1394,34 +1394,20 @@ static int ipip6_tunnel_init(struct net_device *dev)
return 0;
}
-static int __net_init ipip6_fb_tunnel_init(struct net_device *dev)
+static void __net_init ipip6_fb_tunnel_init(struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
struct iphdr *iph = &tunnel->parms.iph;
struct net *net = dev_net(dev);
struct sit_net *sitn = net_generic(net, sit_net_id);
- tunnel->dev = dev;
- tunnel->net = dev_net(dev);
-
iph->version = 4;
iph->protocol = IPPROTO_IPV6;
iph->ihl = 5;
iph->ttl = 64;
- dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
- if (!dev->tstats)
- return -ENOMEM;
-
- tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
- if (!tunnel->dst_cache) {
- free_percpu(dev->tstats);
- return -ENOMEM;
- }
-
dev_hold(dev);
rcu_assign_pointer(sitn->tunnels_wc[0], tunnel);
- return 0;
}
static int ipip6_validate(struct nlattr *tb[], struct nlattr *data[])
@@ -1831,23 +1817,19 @@ static int __net_init sit_init_net(struct net *net)
*/
sitn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
- err = ipip6_fb_tunnel_init(sitn->fb_tunnel_dev);
- if (err)
- goto err_dev_free;
-
- ipip6_tunnel_clone_6rd(sitn->fb_tunnel_dev, sitn);
err = register_netdev(sitn->fb_tunnel_dev);
if (err)
goto err_reg_dev;
+ ipip6_tunnel_clone_6rd(sitn->fb_tunnel_dev, sitn);
+ ipip6_fb_tunnel_init(sitn->fb_tunnel_dev);
+
t = netdev_priv(sitn->fb_tunnel_dev);
strcpy(t->parms.name, sitn->fb_tunnel_dev->name);
return 0;
err_reg_dev:
- dev_put(sitn->fb_tunnel_dev);
-err_dev_free:
ipip6_dev_free(sitn->fb_tunnel_dev);
err_alloc_dev:
return err;
diff --git a/kernel/net/ipv6/syncookies.c b/kernel/net/ipv6/syncookies.c
index 21bc2eb53..eaf7ac496 100644
--- a/kernel/net/ipv6/syncookies.c
+++ b/kernel/net/ipv6/syncookies.c
@@ -41,23 +41,6 @@ static __u16 const msstab[] = {
9000 - 60,
};
-static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
- struct request_sock *req,
- struct dst_entry *dst)
-{
- struct inet_connection_sock *icsk = inet_csk(sk);
- struct sock *child;
-
- child = icsk->icsk_af_ops->syn_recv_sock(sk, skb, req, dst);
- if (child) {
- atomic_set(&req->rsk_refcnt, 1);
- inet_csk_reqsk_queue_add(sk, req, child);
- } else {
- reqsk_free(req);
- }
- return child;
-}
-
static DEFINE_PER_CPU(__u32 [16 + 5 + SHA_WORKSPACE_WORDS],
ipv6_cookie_scratch);
@@ -131,14 +114,11 @@ u32 __cookie_v6_init_sequence(const struct ipv6hdr *iph,
}
EXPORT_SYMBOL_GPL(__cookie_v6_init_sequence);
-__u32 cookie_v6_init_sequence(struct sock *sk, const struct sk_buff *skb, __u16 *mssp)
+__u32 cookie_v6_init_sequence(const struct sk_buff *skb, __u16 *mssp)
{
const struct ipv6hdr *iph = ipv6_hdr(skb);
const struct tcphdr *th = tcp_hdr(skb);
- tcp_synq_overflow(sk);
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT);
-
return __cookie_v6_init_sequence(iph, th, mssp);
}
@@ -190,7 +170,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
goto out;
ret = NULL;
- req = inet_reqsk_alloc(&tcp6_request_sock_ops, sk);
+ req = inet_reqsk_alloc(&tcp6_request_sock_ops, sk, false);
if (!req)
goto out;
@@ -227,7 +207,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
ireq->wscale_ok = tcp_opt.wscale_ok;
ireq->tstamp_ok = tcp_opt.saw_tstamp;
req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
- treq->snt_synack = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsecr : 0;
+ treq->snt_synack.v64 = 0;
treq->rcv_isn = ntohl(th->seq) - 1;
treq->snt_isn = cookie;
@@ -242,7 +222,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_proto = IPPROTO_TCP;
fl6.daddr = ireq->ir_v6_rmt_addr;
- final_p = fl6_update_dst(&fl6, np->opt, &final);
+ final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), &final);
fl6.saddr = ireq->ir_v6_loc_addr;
fl6.flowi6_oif = sk->sk_bound_dev_if;
fl6.flowi6_mark = ireq->ir_mark;
@@ -255,16 +235,16 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
goto out_free;
}
- req->window_clamp = tp->window_clamp ? :dst_metric(dst, RTAX_WINDOW);
+ req->rsk_window_clamp = tp->window_clamp ? :dst_metric(dst, RTAX_WINDOW);
tcp_select_initial_window(tcp_full_space(sk), req->mss,
- &req->rcv_wnd, &req->window_clamp,
+ &req->rsk_rcv_wnd, &req->rsk_window_clamp,
ireq->wscale_ok, &rcv_wscale,
dst_metric(dst, RTAX_INITRWND));
ireq->rcv_wscale = rcv_wscale;
ireq->ecn_ok = cookie_ecn_ok(&tcp_opt, sock_net(sk), dst);
- ret = get_cookie_sock(sk, skb, req, dst);
+ ret = tcp_get_cookie_sock(sk, skb, req, dst);
out:
return ret;
out_free:
diff --git a/kernel/net/ipv6/sysctl_net_ipv6.c b/kernel/net/ipv6/sysctl_net_ipv6.c
index abcc79f64..45243bbe5 100644
--- a/kernel/net/ipv6/sysctl_net_ipv6.c
+++ b/kernel/net/ipv6/sysctl_net_ipv6.c
@@ -17,6 +17,9 @@
#include <net/inet_frag.h>
static int one = 1;
+static int auto_flowlabels_min;
+static int auto_flowlabels_max = IP6_AUTO_FLOW_LABEL_MAX;
+
static struct ctl_table ipv6_table_template[] = {
{
@@ -45,7 +48,9 @@ static struct ctl_table ipv6_table_template[] = {
.data = &init_net.ipv6.sysctl.auto_flowlabels,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &auto_flowlabels_min,
+ .extra2 = &auto_flowlabels_max
},
{
.procname = "fwmark_reflect",
@@ -68,6 +73,20 @@ static struct ctl_table ipv6_table_template[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
+ {
+ .procname = "flowlabel_state_ranges",
+ .data = &init_net.ipv6.sysctl.flowlabel_state_ranges,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
+ .procname = "ip_nonlocal_bind",
+ .data = &init_net.ipv6.sysctl.ip_nonlocal_bind,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
{ }
};
@@ -109,6 +128,8 @@ static int __net_init ipv6_sysctl_net_init(struct net *net)
ipv6_table[4].data = &net->ipv6.sysctl.fwmark_reflect;
ipv6_table[5].data = &net->ipv6.sysctl.idgen_retries;
ipv6_table[6].data = &net->ipv6.sysctl.idgen_delay;
+ ipv6_table[7].data = &net->ipv6.sysctl.flowlabel_state_ranges;
+ ipv6_table[8].data = &net->ipv6.sysctl.ip_nonlocal_bind;
ipv6_route_table = ipv6_route_sysctl_init(net);
if (!ipv6_route_table)
diff --git a/kernel/net/ipv6/tcp_ipv6.c b/kernel/net/ipv6/tcp_ipv6.c
index e541d68db..b8d405623 100644
--- a/kernel/net/ipv6/tcp_ipv6.c
+++ b/kernel/net/ipv6/tcp_ipv6.c
@@ -70,8 +70,8 @@
#include <linux/crypto.h>
#include <linux/scatterlist.h>
-static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb);
-static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
+static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
+static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
struct request_sock *req);
static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
@@ -82,7 +82,7 @@ static const struct inet_connection_sock_af_ops ipv6_specific;
static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
#else
-static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
+static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
const struct in6_addr *addr)
{
return NULL;
@@ -93,14 +93,12 @@ static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
- if (dst) {
+ if (dst && dst_hold_safe(dst)) {
const struct rt6_info *rt = (const struct rt6_info *)dst;
- dst_hold(dst);
sk->sk_rx_dst = dst;
inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
- if (rt->rt6i_node)
- inet6_sk(sk)->rx_dst_cookie = rt->rt6i_node->fn_sernum;
+ inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
}
}
@@ -121,7 +119,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
struct ipv6_pinfo *np = inet6_sk(sk);
struct tcp_sock *tp = tcp_sk(sk);
struct in6_addr *saddr = NULL, *final_p, final;
- struct rt6_info *rt;
+ struct ipv6_txoptions *opt;
struct flowi6 fl6;
struct dst_entry *dst;
int addr_type;
@@ -237,7 +235,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
fl6.fl6_dport = usin->sin6_port;
fl6.fl6_sport = inet->inet_sport;
- final_p = fl6_update_dst(&fl6, np->opt, &final);
+ opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk));
+ final_p = fl6_update_dst(&fl6, opt, &final);
security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
@@ -257,18 +256,17 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
inet->inet_rcv_saddr = LOOPBACK4_IPV6;
sk->sk_gso_type = SKB_GSO_TCPV6;
- __ip6_dst_store(sk, dst, NULL, NULL);
+ ip6_dst_store(sk, dst, NULL, NULL);
- rt = (struct rt6_info *) dst;
if (tcp_death_row.sysctl_tw_recycle &&
!tp->rx_opt.ts_recent_stamp &&
- ipv6_addr_equal(&rt->rt6i_dst.addr, &sk->sk_v6_daddr))
+ ipv6_addr_equal(&fl6.daddr, &sk->sk_v6_daddr))
tcp_fetch_timewait_stamp(sk, dst);
icsk->icsk_ext_hdr_len = 0;
- if (np->opt)
- icsk->icsk_ext_hdr_len = (np->opt->opt_flen +
- np->opt->opt_nflen);
+ if (opt)
+ icsk->icsk_ext_hdr_len = opt->opt_flen +
+ opt->opt_nflen;
tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
@@ -279,7 +277,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
if (err)
goto late_failure;
- ip6_set_txhash(sk);
+ sk_set_txhash(sk);
if (!tp->write_seq && likely(!tp->repair))
tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
@@ -330,6 +328,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
struct tcp_sock *tp;
__u32 seq, snd_una;
struct sock *sk;
+ bool fatal;
int err;
sk = __inet6_lookup_established(net, &tcp_hashinfo,
@@ -348,8 +347,9 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
return;
}
seq = ntohl(th->seq);
+ fatal = icmpv6_err_convert(type, code, &err);
if (sk->sk_state == TCP_NEW_SYN_RECV)
- return tcp_req_err(sk, seq);
+ return tcp_req_err(sk, seq, fatal);
bh_lock_sock(sk);
if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
@@ -403,7 +403,6 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
goto out;
}
- icmpv6_err_convert(type, code, &err);
/* Might be for an request_sock */
switch (sk->sk_state) {
@@ -437,11 +436,11 @@ out:
}
-static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
+static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
struct flowi *fl,
struct request_sock *req,
- u16 queue_mapping,
- struct tcp_fastopen_cookie *foc)
+ struct tcp_fastopen_cookie *foc,
+ bool attach_req)
{
struct inet_request_sock *ireq = inet_rsk(req);
struct ipv6_pinfo *np = inet6_sk(sk);
@@ -450,10 +449,11 @@ static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
int err = -ENOMEM;
/* First, grab a route. */
- if (!dst && (dst = inet6_csk_route_req(sk, fl6, req)) == NULL)
+ if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
+ IPPROTO_TCP)) == NULL)
goto done;
- skb = tcp_make_synack(sk, dst, req, foc);
+ skb = tcp_make_synack(sk, dst, req, foc, attach_req);
if (skb) {
__tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
@@ -463,8 +463,10 @@ static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
if (np->repflow && ireq->pktopts)
fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
- skb_set_queue_mapping(skb, queue_mapping);
- err = ip6_xmit(sk, skb, fl6, np->opt, np->tclass);
+ rcu_read_lock();
+ err = ip6_xmit(sk, skb, fl6, rcu_dereference(np->opt),
+ np->tclass);
+ rcu_read_unlock();
err = net_xmit_eval(err);
}
@@ -479,13 +481,13 @@ static void tcp_v6_reqsk_destructor(struct request_sock *req)
}
#ifdef CONFIG_TCP_MD5SIG
-static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
+static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
const struct in6_addr *addr)
{
return tcp_md5_do_lookup(sk, (union tcp_md5_addr *)addr, AF_INET6);
}
-static struct tcp_md5sig_key *tcp_v6_md5_lookup(struct sock *sk,
+static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
const struct sock *addr_sk)
{
return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr);
@@ -624,8 +626,12 @@ clear_hash_noput:
return 1;
}
-static bool tcp_v6_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
+#endif
+
+static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
+ const struct sk_buff *skb)
{
+#ifdef CONFIG_TCP_MD5SIG
const __u8 *hash_location = NULL;
struct tcp_md5sig_key *hash_expected;
const struct ipv6hdr *ip6h = ipv6_hdr(skb);
@@ -662,26 +668,27 @@ static bool tcp_v6_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
&ip6h->daddr, ntohs(th->dest));
return true;
}
+#endif
return false;
}
-#endif
-static void tcp_v6_init_req(struct request_sock *req, struct sock *sk,
+static void tcp_v6_init_req(struct request_sock *req,
+ const struct sock *sk_listener,
struct sk_buff *skb)
{
struct inet_request_sock *ireq = inet_rsk(req);
- struct ipv6_pinfo *np = inet6_sk(sk);
+ const struct ipv6_pinfo *np = inet6_sk(sk_listener);
ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
/* So that link locals have meaning */
- if (!sk->sk_bound_dev_if &&
+ if (!sk_listener->sk_bound_dev_if &&
ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
ireq->ir_iif = tcp_v6_iif(skb);
if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
- (ipv6_opt_accepted(sk, skb, &TCP_SKB_CB(skb)->header.h6) ||
+ (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
np->rxopt.bits.rxinfo ||
np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
np->rxopt.bits.rxohlim || np->repflow)) {
@@ -690,13 +697,14 @@ static void tcp_v6_init_req(struct request_sock *req, struct sock *sk,
}
}
-static struct dst_entry *tcp_v6_route_req(struct sock *sk, struct flowi *fl,
+static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
+ struct flowi *fl,
const struct request_sock *req,
bool *strict)
{
if (strict)
*strict = true;
- return inet6_csk_route_req(sk, &fl->u.ip6, req);
+ return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
}
struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
@@ -723,10 +731,9 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
.route_req = tcp_v6_route_req,
.init_seq = tcp_v6_init_sequence,
.send_synack = tcp_v6_send_synack,
- .queue_hash_add = inet6_csk_reqsk_queue_hash_add,
};
-static void tcp_v6_send_response(struct sock *sk, struct sk_buff *skb, u32 seq,
+static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
u32 ack, u32 win, u32 tsval, u32 tsecr,
int oif, struct tcp_md5sig_key *key, int rst,
u8 tclass, u32 label)
@@ -825,7 +832,7 @@ static void tcp_v6_send_response(struct sock *sk, struct sk_buff *skb, u32 seq,
kfree_skb(buff);
}
-static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
+static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
{
const struct tcphdr *th = tcp_hdr(skb);
u32 seq = 0, ack_seq = 0;
@@ -896,7 +903,7 @@ release_sk1:
#endif
}
-static void tcp_v6_send_ack(struct sock *sk, struct sk_buff *skb, u32 seq,
+static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
struct tcp_md5sig_key *key, u8 tclass,
u32 label)
@@ -919,7 +926,7 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
inet_twsk_put(tw);
}
-static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
+static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
struct request_sock *req)
{
/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
@@ -927,44 +934,18 @@ static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
*/
tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
- tcp_rsk(req)->rcv_nxt, req->rcv_wnd,
+ tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd,
tcp_time_stamp, req->ts_recent, sk->sk_bound_dev_if,
tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr),
0, 0);
}
-static struct sock *tcp_v6_hnd_req(struct sock *sk, struct sk_buff *skb)
+static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
{
+#ifdef CONFIG_SYN_COOKIES
const struct tcphdr *th = tcp_hdr(skb);
- struct request_sock *req;
- struct sock *nsk;
-
- /* Find possible connection requests. */
- req = inet6_csk_search_req(sk, th->source,
- &ipv6_hdr(skb)->saddr,
- &ipv6_hdr(skb)->daddr, tcp_v6_iif(skb));
- if (req) {
- nsk = tcp_check_req(sk, skb, req, false);
- if (!nsk || nsk == sk)
- reqsk_put(req);
- return nsk;
- }
- nsk = __inet6_lookup_established(sock_net(sk), &tcp_hashinfo,
- &ipv6_hdr(skb)->saddr, th->source,
- &ipv6_hdr(skb)->daddr, ntohs(th->dest),
- tcp_v6_iif(skb));
-
- if (nsk) {
- if (nsk->sk_state != TCP_TIME_WAIT) {
- bh_lock_sock(nsk);
- return nsk;
- }
- inet_twsk_put(inet_twsk(nsk));
- return NULL;
- }
-#ifdef CONFIG_SYN_COOKIES
if (!th->syn)
sk = cookie_v6_check(sk, skb);
#endif
@@ -987,12 +968,16 @@ drop:
return 0; /* don't send reset */
}
-static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
+static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
struct request_sock *req,
- struct dst_entry *dst)
+ struct dst_entry *dst,
+ struct request_sock *req_unhash,
+ bool *own_req)
{
struct inet_request_sock *ireq;
- struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
+ struct ipv6_pinfo *newnp;
+ const struct ipv6_pinfo *np = inet6_sk(sk);
+ struct ipv6_txoptions *opt;
struct tcp6_sock *newtcp6sk;
struct inet_sock *newinet;
struct tcp_sock *newtp;
@@ -1007,7 +992,8 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
* v6 mapped
*/
- newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
+ newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
+ req_unhash, own_req);
if (!newsk)
return NULL;
@@ -1060,7 +1046,7 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
goto out_overflow;
if (!dst) {
- dst = inet6_csk_route_req(sk, &fl6, req);
+ dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
if (!dst)
goto out;
}
@@ -1076,7 +1062,7 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
*/
newsk->sk_gso_type = SKB_GSO_TCPV6;
- __ip6_dst_store(newsk, dst, NULL, NULL);
+ ip6_dst_store(newsk, dst, NULL, NULL);
inet6_sk_rx_dst_set(newsk, skb);
newtcp6sk = (struct tcp6_sock *)newsk;
@@ -1093,8 +1079,6 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
newsk->sk_bound_dev_if = ireq->ir_iif;
- ip6_set_txhash(newsk);
-
/* Now IPv6 options...
First: no IPv4 options.
@@ -1106,16 +1090,7 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
/* Clone RX bits */
newnp->rxopt.all = np->rxopt.all;
- /* Clone pktoptions received with SYN */
newnp->pktoptions = NULL;
- if (ireq->pktopts) {
- newnp->pktoptions = skb_clone(ireq->pktopts,
- sk_gfp_atomic(sk, GFP_ATOMIC));
- consume_skb(ireq->pktopts);
- ireq->pktopts = NULL;
- if (newnp->pktoptions)
- skb_set_owner_r(newnp->pktoptions, newsk);
- }
newnp->opt = NULL;
newnp->mcast_oif = tcp_v6_iif(skb);
newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
@@ -1129,13 +1104,15 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
but we make one more one thing there: reattach optmem
to newsk.
*/
- if (np->opt)
- newnp->opt = ipv6_dup_options(newsk, np->opt);
-
+ opt = rcu_dereference(np->opt);
+ if (opt) {
+ opt = ipv6_dup_options(newsk, opt);
+ RCU_INIT_POINTER(newnp->opt, opt);
+ }
inet_csk(newsk)->icsk_ext_hdr_len = 0;
- if (newnp->opt)
- inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen +
- newnp->opt->opt_flen);
+ if (opt)
+ inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
+ opt->opt_flen;
tcp_ca_openreq_child(newsk, dst);
@@ -1170,7 +1147,20 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
tcp_done(newsk);
goto out;
}
- __inet_hash(newsk, NULL);
+ *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
+ if (*own_req) {
+ tcp_move_syn(newtp, req);
+
+ /* Clone pktoptions received with SYN, if we own the req */
+ if (ireq->pktopts) {
+ newnp->pktoptions = skb_clone(ireq->pktopts,
+ sk_gfp_atomic(sk, GFP_ATOMIC));
+ consume_skb(ireq->pktopts);
+ ireq->pktopts = NULL;
+ if (newnp->pktoptions)
+ skb_set_owner_r(newnp->pktoptions, newsk);
+ }
+ }
return newsk;
@@ -1184,7 +1174,7 @@ out:
}
/* The socket must have it's spinlock held when we get
- * here.
+ * here, unless it is a TCP_LISTEN socket.
*
* We have a potential double-lock case here, so even when
* doing backlog processing we use the BH locking scheme.
@@ -1251,22 +1241,18 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
return 0;
}
- if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
+ if (tcp_checksum_complete(skb))
goto csum_err;
if (sk->sk_state == TCP_LISTEN) {
- struct sock *nsk = tcp_v6_hnd_req(sk, skb);
+ struct sock *nsk = tcp_v6_cookie_check(sk, skb);
+
if (!nsk)
goto discard;
- /*
- * Queue it on the new socket if the new socket is active,
- * otherwise we just shortcircuit this and continue with
- * the new socket..
- */
if (nsk != sk) {
sock_rps_save_rxhash(nsk, skb);
- sk_mark_napi_id(sk, skb);
+ sk_mark_napi_id(nsk, skb);
if (tcp_child_process(sk, nsk, skb))
goto reset;
if (opt_skb)
@@ -1276,7 +1262,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
} else
sock_rps_save_rxhash(sk, skb);
- if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len))
+ if (tcp_rcv_state_process(sk, skb))
goto reset;
if (opt_skb)
goto ipv6_pktoptions;
@@ -1390,6 +1376,7 @@ static int tcp_v6_rcv(struct sk_buff *skb)
th = tcp_hdr(skb);
hdr = ipv6_hdr(skb);
+lookup:
sk = __inet6_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest,
inet6_iif(skb));
if (!sk)
@@ -1399,6 +1386,37 @@ process:
if (sk->sk_state == TCP_TIME_WAIT)
goto do_time_wait;
+ if (sk->sk_state == TCP_NEW_SYN_RECV) {
+ struct request_sock *req = inet_reqsk(sk);
+ struct sock *nsk;
+
+ sk = req->rsk_listener;
+ tcp_v6_fill_cb(skb, hdr, th);
+ if (tcp_v6_inbound_md5_hash(sk, skb)) {
+ reqsk_put(req);
+ goto discard_it;
+ }
+ if (unlikely(sk->sk_state != TCP_LISTEN)) {
+ inet_csk_reqsk_queue_drop_and_put(sk, req);
+ goto lookup;
+ }
+ sock_hold(sk);
+ nsk = tcp_check_req(sk, skb, req, false);
+ if (!nsk) {
+ reqsk_put(req);
+ goto discard_and_relse;
+ }
+ if (nsk == sk) {
+ reqsk_put(req);
+ tcp_v6_restore_cb(skb);
+ } else if (tcp_child_process(sk, nsk, skb)) {
+ tcp_v6_send_reset(nsk, skb);
+ goto discard_and_relse;
+ } else {
+ sock_put(sk);
+ return 0;
+ }
+ }
if (hdr->hop_limit < inet6_sk(sk)->min_hopcount) {
NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
goto discard_and_relse;
@@ -1409,18 +1427,23 @@ process:
tcp_v6_fill_cb(skb, hdr, th);
-#ifdef CONFIG_TCP_MD5SIG
if (tcp_v6_inbound_md5_hash(sk, skb))
goto discard_and_relse;
-#endif
if (sk_filter(sk, skb))
goto discard_and_relse;
- sk_incoming_cpu_update(sk);
skb->dev = NULL;
+ if (sk->sk_state == TCP_LISTEN) {
+ ret = tcp_v6_do_rcv(sk, skb);
+ goto put_and_return;
+ }
+
+ sk_incoming_cpu_update(sk);
+
bh_lock_sock_nested(sk);
+ tcp_sk(sk)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs);
ret = 0;
if (!sock_owned_by_user(sk)) {
if (!tcp_prequeue(sk, skb))
@@ -1433,6 +1456,7 @@ process:
}
bh_unlock_sock(sk);
+put_and_return:
sock_put(sk);
return ret ? -1 : 0;
@@ -1442,7 +1466,7 @@ no_tcp_socket:
tcp_v6_fill_cb(skb, hdr, th);
- if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
+ if (tcp_checksum_complete(skb)) {
csum_error:
TCP_INC_STATS_BH(net, TCP_MIB_CSUMERRORS);
bad_packet:
@@ -1467,10 +1491,6 @@ do_time_wait:
tcp_v6_fill_cb(skb, hdr, th);
- if (skb->len < (th->doff<<2)) {
- inet_twsk_put(inet_twsk(sk));
- goto bad_packet;
- }
if (tcp_checksum_complete(skb)) {
inet_twsk_put(inet_twsk(sk));
goto csum_error;
@@ -1487,8 +1507,7 @@ do_time_wait:
ntohs(th->dest), tcp_v6_iif(skb));
if (sk2) {
struct inet_timewait_sock *tw = inet_twsk(sk);
- inet_twsk_deschedule(tw);
- inet_twsk_put(tw);
+ inet_twsk_deschedule_put(tw);
sk = sk2;
tcp_v6_restore_cb(skb);
goto process;
@@ -1638,7 +1657,7 @@ static void tcp_v6_destroy_sock(struct sock *sk)
#ifdef CONFIG_PROC_FS
/* Proc filesystem TCPv6 sock list dumping. */
static void get_openreq6(struct seq_file *seq,
- struct request_sock *req, int i, kuid_t uid)
+ const struct request_sock *req, int i)
{
long ttd = req->rsk_timer.expires - jiffies;
const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
@@ -1662,7 +1681,8 @@ static void get_openreq6(struct seq_file *seq,
1, /* timers active (only the expire timer) */
jiffies_to_clock_t(ttd),
req->num_timeout,
- from_kuid_munged(seq_user_ns(seq), uid),
+ from_kuid_munged(seq_user_ns(seq),
+ sock_i_uid(req->rsk_listener)),
0, /* non standard timer */
0, /* open_requests have no inode */
0, req);
@@ -1677,7 +1697,9 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
const struct inet_sock *inet = inet_sk(sp);
const struct tcp_sock *tp = tcp_sk(sp);
const struct inet_connection_sock *icsk = inet_csk(sp);
- struct fastopen_queue *fastopenq = icsk->icsk_accept_queue.fastopenq;
+ const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
+ int rx_queue;
+ int state;
dest = &sp->sk_v6_daddr;
src = &sp->sk_v6_rcv_saddr;
@@ -1698,6 +1720,15 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
timer_expires = jiffies;
}
+ state = sk_state_load(sp);
+ if (state == TCP_LISTEN)
+ rx_queue = sp->sk_ack_backlog;
+ else
+ /* Because we don't lock the socket,
+ * we might find a transient negative value.
+ */
+ rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
+
seq_printf(seq,
"%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
"%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
@@ -1706,9 +1737,9 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
src->s6_addr32[2], src->s6_addr32[3], srcp,
dest->s6_addr32[0], dest->s6_addr32[1],
dest->s6_addr32[2], dest->s6_addr32[3], destp,
- sp->sk_state,
- tp->write_seq-tp->snd_una,
- (sp->sk_state == TCP_LISTEN) ? sp->sk_ack_backlog : (tp->rcv_nxt - tp->copied_seq),
+ state,
+ tp->write_seq - tp->snd_una,
+ rx_queue,
timer_active,
jiffies_delta_to_clock_t(timer_expires - jiffies),
icsk->icsk_retransmits,
@@ -1720,8 +1751,8 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
jiffies_to_clock_t(icsk->icsk_ack.ato),
(icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
tp->snd_cwnd,
- sp->sk_state == TCP_LISTEN ?
- (fastopenq ? fastopenq->max_qlen : 0) :
+ state == TCP_LISTEN ?
+ fastopenq->max_qlen :
(tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
);
}
@@ -1767,18 +1798,12 @@ static int tcp6_seq_show(struct seq_file *seq, void *v)
}
st = seq->private;
- switch (st->state) {
- case TCP_SEQ_STATE_LISTENING:
- case TCP_SEQ_STATE_ESTABLISHED:
- if (sk->sk_state == TCP_TIME_WAIT)
- get_timewait6_sock(seq, v, st->num);
- else
- get_tcp6_sock(seq, v, st->num);
- break;
- case TCP_SEQ_STATE_OPENREQ:
- get_openreq6(seq, v, st->num, st->uid);
- break;
- }
+ if (sk->sk_state == TCP_TIME_WAIT)
+ get_timewait6_sock(seq, v, st->num);
+ else if (sk->sk_state == TCP_NEW_SYN_RECV)
+ get_openreq6(seq, v, st->num);
+ else
+ get_tcp6_sock(seq, v, st->num);
out:
return 0;
}
diff --git a/kernel/net/ipv6/tunnel6.c b/kernel/net/ipv6/tunnel6.c
index 3c758007b..dae25cad0 100644
--- a/kernel/net/ipv6/tunnel6.c
+++ b/kernel/net/ipv6/tunnel6.c
@@ -144,6 +144,16 @@ static void tunnel6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
break;
}
+static void tunnel46_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ u8 type, u8 code, int offset, __be32 info)
+{
+ struct xfrm6_tunnel *handler;
+
+ for_each_tunnel_rcu(tunnel46_handlers, handler)
+ if (!handler->err_handler(skb, opt, type, code, offset, info))
+ break;
+}
+
static const struct inet6_protocol tunnel6_protocol = {
.handler = tunnel6_rcv,
.err_handler = tunnel6_err,
@@ -152,7 +162,7 @@ static const struct inet6_protocol tunnel6_protocol = {
static const struct inet6_protocol tunnel46_protocol = {
.handler = tunnel46_rcv,
- .err_handler = tunnel6_err,
+ .err_handler = tunnel46_err,
.flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
};
diff --git a/kernel/net/ipv6/udp.c b/kernel/net/ipv6/udp.c
index e51fc3eee..9da3287a3 100644
--- a/kernel/net/ipv6/udp.c
+++ b/kernel/net/ipv6/udp.c
@@ -182,10 +182,12 @@ static inline int compute_score(struct sock *sk, struct net *net,
score++;
}
+ if (sk->sk_incoming_cpu == raw_smp_processor_id())
+ score++;
+
return score;
}
-#define SCORE2_MAX (1 + 1 + 1)
static inline int compute_score2(struct sock *sk, struct net *net,
const struct in6_addr *saddr, __be16 sport,
const struct in6_addr *daddr,
@@ -223,6 +225,9 @@ static inline int compute_score2(struct sock *sk, struct net *net,
score++;
}
+ if (sk->sk_incoming_cpu == raw_smp_processor_id())
+ score++;
+
return score;
}
@@ -251,8 +256,7 @@ begin:
hash = udp6_ehashfn(net, daddr, hnum,
saddr, sport);
matches = 1;
- } else if (score == SCORE2_MAX)
- goto exact_match;
+ }
} else if (score == badness && reuseport) {
matches++;
if (reciprocal_scale(hash, matches) == 0)
@@ -269,7 +273,6 @@ begin:
goto begin;
if (result) {
-exact_match:
if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2)))
result = NULL;
else if (unlikely(compute_score2(result, net, saddr, sport,
@@ -1107,6 +1110,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name);
struct in6_addr *daddr, *final_p, final;
struct ipv6_txoptions *opt = NULL;
+ struct ipv6_txoptions *opt_to_free = NULL;
struct ip6_flowlabel *flowlabel = NULL;
struct flowi6 fl6;
struct dst_entry *dst;
@@ -1260,8 +1264,10 @@ do_udp_sendmsg:
opt = NULL;
connected = 0;
}
- if (!opt)
- opt = np->opt;
+ if (!opt) {
+ opt = txopt_get(np);
+ opt_to_free = opt;
+ }
if (flowlabel)
opt = fl6_merge_options(&opt_space, flowlabel, opt);
opt = ipv6_fixup_options(&opt_space, opt);
@@ -1370,6 +1376,7 @@ release_dst:
out:
dst_release(dst);
fl6_sock_release(flowlabel);
+ txopt_put(opt_to_free);
if (!err)
return len;
/*
@@ -1496,7 +1503,8 @@ int __net_init udp6_proc_init(struct net *net)
return udp_proc_register(net, &udp6_seq_afinfo);
}
-void udp6_proc_exit(struct net *net) {
+void udp6_proc_exit(struct net *net)
+{
udp_proc_unregister(net, &udp6_seq_afinfo);
}
#endif /* CONFIG_PROC_FS */
diff --git a/kernel/net/ipv6/xfrm6_input.c b/kernel/net/ipv6/xfrm6_input.c
index 74bd17882..0eaab1fa6 100644
--- a/kernel/net/ipv6/xfrm6_input.c
+++ b/kernel/net/ipv6/xfrm6_input.c
@@ -42,8 +42,8 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async)
ipv6_hdr(skb)->payload_len = htons(skb->len);
__skb_push(skb, skb->data - skb_network_header(skb));
- NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, NULL, skb,
- skb->dev, NULL,
+ NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING,
+ dev_net(skb->dev), NULL, skb, skb->dev, NULL,
ip6_rcv_finish);
return -1;
}
diff --git a/kernel/net/ipv6/xfrm6_mode_tunnel.c b/kernel/net/ipv6/xfrm6_mode_tunnel.c
index 901ef6f8a..372855eea 100644
--- a/kernel/net/ipv6/xfrm6_mode_tunnel.c
+++ b/kernel/net/ipv6/xfrm6_mode_tunnel.c
@@ -20,11 +20,10 @@
static inline void ipip6_ecn_decapsulate(struct sk_buff *skb)
{
- const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
struct ipv6hdr *inner_iph = ipipv6_hdr(skb);
- if (INET_ECN_is_ce(ipv6_get_dsfield(outer_iph)))
- IP6_ECN_set_ce(inner_iph);
+ if (INET_ECN_is_ce(XFRM_MODE_SKB_CB(skb)->tos))
+ IP6_ECN_set_ce(skb, inner_iph);
}
/* Add encapsulation header.
diff --git a/kernel/net/ipv6/xfrm6_output.c b/kernel/net/ipv6/xfrm6_output.c
index 09c76a7b4..4d09ce6fa 100644
--- a/kernel/net/ipv6/xfrm6_output.c
+++ b/kernel/net/ipv6/xfrm6_output.c
@@ -79,6 +79,7 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb)
if (!skb->ignore_df && skb->len > mtu) {
skb->dev = dst->dev;
+ skb->protocol = htons(ETH_P_IPV6);
if (xfrm6_local_dontfrag(skb))
xfrm6_local_rxpmtu(skb, mtu);
@@ -131,44 +132,57 @@ int xfrm6_output_finish(struct sock *sk, struct sk_buff *skb)
return xfrm_output(sk, skb);
}
-static int __xfrm6_output(struct sock *sk, struct sk_buff *skb)
+static int __xfrm6_output_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
+{
+ struct xfrm_state *x = skb_dst(skb)->xfrm;
+
+ return x->outer_mode->afinfo->output_finish(sk, skb);
+}
+
+static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
struct xfrm_state *x = dst->xfrm;
int mtu;
+ bool toobig;
#ifdef CONFIG_NETFILTER
if (!x) {
IP6CB(skb)->flags |= IP6SKB_REROUTED;
- return dst_output_sk(sk, skb);
+ return dst_output(net, sk, skb);
}
#endif
+ if (x->props.mode != XFRM_MODE_TUNNEL)
+ goto skip_frag;
+
if (skb->protocol == htons(ETH_P_IPV6))
mtu = ip6_skb_dst_mtu(skb);
else
mtu = dst_mtu(skb_dst(skb));
- if (skb->len > mtu && xfrm6_local_dontfrag(skb)) {
+ toobig = skb->len > mtu && !skb_is_gso(skb);
+
+ if (toobig && xfrm6_local_dontfrag(skb)) {
xfrm6_local_rxpmtu(skb, mtu);
return -EMSGSIZE;
- } else if (!skb->ignore_df && skb->len > mtu && skb->sk) {
+ } else if (!skb->ignore_df && toobig && skb->sk) {
xfrm_local_error(skb, mtu);
return -EMSGSIZE;
}
- if (x->props.mode == XFRM_MODE_TUNNEL &&
- ((skb->len > mtu && !skb_is_gso(skb)) ||
- dst_allfrag(skb_dst(skb)))) {
- return ip6_fragment(sk, skb,
- x->outer_mode->afinfo->output_finish);
- }
+ if (toobig || dst_allfrag(skb_dst(skb)))
+ return ip6_fragment(net, sk, skb,
+ __xfrm6_output_finish);
+
+skip_frag:
return x->outer_mode->afinfo->output_finish(sk, skb);
}
-int xfrm6_output(struct sock *sk, struct sk_buff *skb)
+int xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
- return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, sk, skb,
- NULL, skb_dst(skb)->dev, __xfrm6_output,
+ return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
+ net, sk, skb, NULL, skb_dst(skb)->dev,
+ __xfrm6_output,
!(IP6CB(skb)->flags & IP6SKB_REROUTED));
}
diff --git a/kernel/net/ipv6/xfrm6_policy.c b/kernel/net/ipv6/xfrm6_policy.c
index f337a908a..c074771a1 100644
--- a/kernel/net/ipv6/xfrm6_policy.c
+++ b/kernel/net/ipv6/xfrm6_policy.c
@@ -20,13 +20,14 @@
#include <net/ip.h>
#include <net/ipv6.h>
#include <net/ip6_route.h>
+#include <net/l3mdev.h>
#if IS_ENABLED(CONFIG_IPV6_MIP6)
#include <net/mip6.h>
#endif
static struct xfrm_policy_afinfo xfrm6_policy_afinfo;
-static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos,
+static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, int oif,
const xfrm_address_t *saddr,
const xfrm_address_t *daddr)
{
@@ -35,6 +36,8 @@ static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos,
int err;
memset(&fl6, 0, sizeof(fl6));
+ fl6.flowi6_oif = oif;
+ fl6.flowi6_flags = FLOWI_FLAG_SKIP_NH_OIF;
memcpy(&fl6.daddr, daddr, sizeof(fl6.daddr));
if (saddr)
memcpy(&fl6.saddr, saddr, sizeof(fl6.saddr));
@@ -50,13 +53,13 @@ static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos,
return dst;
}
-static int xfrm6_get_saddr(struct net *net,
+static int xfrm6_get_saddr(struct net *net, int oif,
xfrm_address_t *saddr, xfrm_address_t *daddr)
{
struct dst_entry *dst;
struct net_device *dev;
- dst = xfrm6_dst_lookup(net, 0, NULL, daddr);
+ dst = xfrm6_dst_lookup(net, 0, oif, NULL, daddr);
if (IS_ERR(dst))
return -EHOSTUNREACH;
@@ -71,20 +74,12 @@ static int xfrm6_get_tos(const struct flowi *fl)
return 0;
}
-static void xfrm6_init_dst(struct net *net, struct xfrm_dst *xdst)
-{
- struct rt6_info *rt = (struct rt6_info *)xdst;
-
- rt6_init_peer(rt, net->ipv6.peers);
-}
-
static int xfrm6_init_path(struct xfrm_dst *path, struct dst_entry *dst,
int nfheader_len)
{
if (dst->ops->family == AF_INET6) {
struct rt6_info *rt = (struct rt6_info *)dst;
- if (rt->rt6i_node)
- path->path_cookie = rt->rt6i_node->fn_sernum;
+ path->path_cookie = rt6_get_cookie(rt);
}
path->u.rt6.rt6i_nfheader_len = nfheader_len;
@@ -106,16 +101,13 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
return -ENODEV;
}
- rt6_transfer_peer(&xdst->u.rt6, rt);
-
/* Sheit... I remember I did this right. Apparently,
* it was magically lost, so this code needs audit */
xdst->u.rt6.rt6i_flags = rt->rt6i_flags & (RTF_ANYCAST |
RTF_LOCAL);
xdst->u.rt6.rt6i_metric = rt->rt6i_metric;
xdst->u.rt6.rt6i_node = rt->rt6i_node;
- if (rt->rt6i_node)
- xdst->route_cookie = rt->rt6i_node->fn_sernum;
+ xdst->route_cookie = rt6_get_cookie(rt);
xdst->u.rt6.rt6i_gateway = rt->rt6i_gateway;
xdst->u.rt6.rt6i_dst = rt->rt6i_dst;
xdst->u.rt6.rt6i_src = rt->rt6i_src;
@@ -142,7 +134,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
nexthdr = nh[nhoff];
if (skb_dst(skb))
- oif = skb_dst(skb)->dev->ifindex;
+ oif = l3mdev_fib_oif(skb_dst(skb)->dev);
memset(fl6, 0, sizeof(struct flowi6));
fl6->flowi6_mark = skb->mark;
@@ -185,7 +177,8 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
return;
case IPPROTO_ICMPV6:
- if (!onlyproto && pskb_may_pull(skb, nh + offset + 2 - skb->data)) {
+ if (!onlyproto && (nh + offset + 2 < skb->data ||
+ pskb_may_pull(skb, nh + offset + 2 - skb->data))) {
u8 *icmp;
nh = skb_network_header(skb);
@@ -199,7 +192,8 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
#if IS_ENABLED(CONFIG_IPV6_MIP6)
case IPPROTO_MH:
offset += ipv6_optlen(exthdr);
- if (!onlyproto && pskb_may_pull(skb, nh + offset + 3 - skb->data)) {
+ if (!onlyproto && (nh + offset + 3 < skb->data ||
+ pskb_may_pull(skb, nh + offset + 3 - skb->data))) {
struct ip6_mh *mh;
nh = skb_network_header(skb);
@@ -255,10 +249,6 @@ static void xfrm6_dst_destroy(struct dst_entry *dst)
if (likely(xdst->u.rt6.rt6i_idev))
in6_dev_put(xdst->u.rt6.rt6i_idev);
dst_destroy_metrics_generic(dst);
- if (rt6_has_peer(&xdst->u.rt6)) {
- struct inet_peer *peer = rt6_peer_ptr(&xdst->u.rt6);
- inet_putpeer(peer);
- }
xfrm_dst_destroy(xdst);
}
@@ -289,7 +279,7 @@ static void xfrm6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
xfrm_dst_ifdown(dst, dev);
}
-static struct dst_ops xfrm6_dst_ops = {
+static struct dst_ops xfrm6_dst_ops_template = {
.family = AF_INET6,
.gc = xfrm6_garbage_collect,
.update_pmtu = xfrm6_update_pmtu,
@@ -298,17 +288,16 @@ static struct dst_ops xfrm6_dst_ops = {
.destroy = xfrm6_dst_destroy,
.ifdown = xfrm6_dst_ifdown,
.local_out = __ip6_local_out,
- .gc_thresh = 32768,
+ .gc_thresh = INT_MAX,
};
static struct xfrm_policy_afinfo xfrm6_policy_afinfo = {
.family = AF_INET6,
- .dst_ops = &xfrm6_dst_ops,
+ .dst_ops = &xfrm6_dst_ops_template,
.dst_lookup = xfrm6_dst_lookup,
.get_saddr = xfrm6_get_saddr,
.decode_session = _decode_session6,
.get_tos = xfrm6_get_tos,
- .init_dst = xfrm6_init_dst,
.init_path = xfrm6_init_path,
.fill_dst = xfrm6_fill_dst,
.blackhole_route = ip6_blackhole_route,
@@ -336,7 +325,7 @@ static struct ctl_table xfrm6_policy_table[] = {
{ }
};
-static int __net_init xfrm6_net_init(struct net *net)
+static int __net_init xfrm6_net_sysctl_init(struct net *net)
{
struct ctl_table *table;
struct ctl_table_header *hdr;
@@ -364,7 +353,7 @@ err_alloc:
return -ENOMEM;
}
-static void __net_exit xfrm6_net_exit(struct net *net)
+static void __net_exit xfrm6_net_sysctl_exit(struct net *net)
{
struct ctl_table *table;
@@ -376,24 +365,52 @@ static void __net_exit xfrm6_net_exit(struct net *net)
if (!net_eq(net, &init_net))
kfree(table);
}
+#else /* CONFIG_SYSCTL */
+static int inline xfrm6_net_sysctl_init(struct net *net)
+{
+ return 0;
+}
+
+static void inline xfrm6_net_sysctl_exit(struct net *net)
+{
+}
+#endif
+
+static int __net_init xfrm6_net_init(struct net *net)
+{
+ int ret;
+
+ memcpy(&net->xfrm.xfrm6_dst_ops, &xfrm6_dst_ops_template,
+ sizeof(xfrm6_dst_ops_template));
+ ret = dst_entries_init(&net->xfrm.xfrm6_dst_ops);
+ if (ret)
+ return ret;
+
+ ret = xfrm6_net_sysctl_init(net);
+ if (ret)
+ dst_entries_destroy(&net->xfrm.xfrm6_dst_ops);
+
+ return ret;
+}
+
+static void __net_exit xfrm6_net_exit(struct net *net)
+{
+ xfrm6_net_sysctl_exit(net);
+ dst_entries_destroy(&net->xfrm.xfrm6_dst_ops);
+}
static struct pernet_operations xfrm6_net_ops = {
.init = xfrm6_net_init,
.exit = xfrm6_net_exit,
};
-#endif
int __init xfrm6_init(void)
{
int ret;
- dst_entries_init(&xfrm6_dst_ops);
-
ret = xfrm6_policy_init();
- if (ret) {
- dst_entries_destroy(&xfrm6_dst_ops);
+ if (ret)
goto out;
- }
ret = xfrm6_state_init();
if (ret)
goto out_policy;
@@ -402,9 +419,7 @@ int __init xfrm6_init(void)
if (ret)
goto out_state;
-#ifdef CONFIG_SYSCTL
register_pernet_subsys(&xfrm6_net_ops);
-#endif
out:
return ret;
out_state:
@@ -416,11 +431,8 @@ out_policy:
void xfrm6_fini(void)
{
-#ifdef CONFIG_SYSCTL
unregister_pernet_subsys(&xfrm6_net_ops);
-#endif
xfrm6_protocol_fini();
xfrm6_policy_fini();
xfrm6_state_fini();
- dst_entries_destroy(&xfrm6_dst_ops);
}