From 52f993b8e89487ec9ee15a7fb4979e0f09a45b27 Mon Sep 17 00:00:00 2001 From: Yunhong Jiang Date: Wed, 8 Mar 2017 23:13:28 -0800 Subject: Upgrade to 4.4.50-rt62 The current kernel is based on rt kernel v4.4.6-rt14. We will upgrade it to 4.4.50-rt62. The command to achieve it is: a) Clone a git repo from git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-stable-rt.git b) Get the diff between this two changesets: git diff 640eca2901f3435e616157b11379d3223a44b391 705619beeea1b0b48219a683fd1a901a86fdaf5e where the two commits are: [yjiang5@jnakajim-build linux-stable-rt]$ git show --oneline --name-only 640eca2901f3435e616157b11379d3223a44b391 640eca2901f3 v4.4.6-rt14 localversion-rt [yjiang5@jnakajim-build linux-stable-rt]$ git show --oneline --name-only 705619beeea1b0b48219a683fd1a901a86fdaf5e 705619beeea1 Linux 4.4.50-rt62 localversion-rt c) One patch has been backported thus revert the patch before applying. filterdiff -p1 -x scripts/package/Makefile ~/tmp/v4.4.6-rt14-4.4.50-rt62.diff |patch -p1 --dry-run Upstream status: backport Change-Id: I244d57a32f6066e5a5b9915f9fbf99e7bbca6e01 Signed-off-by: Yunhong Jiang --- kernel/net/8021q/vlan.c | 2 +- kernel/net/ax25/af_ax25.c | 3 +- kernel/net/ax25/ax25_ds_timer.c | 5 +- kernel/net/ax25/ax25_ip.c | 15 + kernel/net/ax25/ax25_std_timer.c | 5 +- kernel/net/ax25/ax25_subr.c | 3 +- kernel/net/batman-adv/distributed-arp-table.c | 17 +- kernel/net/batman-adv/originator.c | 6 - kernel/net/batman-adv/routing.c | 9 + kernel/net/batman-adv/send.c | 6 + kernel/net/batman-adv/soft-interface.c | 8 +- kernel/net/batman-adv/translation-table.c | 4 +- kernel/net/batman-adv/types.h | 3 - kernel/net/bluetooth/l2cap_sock.c | 2 +- kernel/net/bluetooth/mgmt.c | 4 + kernel/net/bridge/br_fdb.c | 2 + kernel/net/bridge/br_ioctl.c | 5 +- kernel/net/bridge/br_multicast.c | 41 ++- kernel/net/bridge/br_netlink.c | 33 +- kernel/net/bridge/br_private.h | 23 +- kernel/net/bridge/br_stp.c | 13 +- kernel/net/caif/cfpkt_skbuff.c | 2 +- kernel/net/can/af_can.c | 12 +- kernel/net/can/af_can.h | 3 +- kernel/net/can/bcm.c | 59 ++-- kernel/net/can/gw.c | 2 +- kernel/net/can/raw.c | 7 +- kernel/net/ceph/messenger.c | 13 + kernel/net/ceph/osdmap.c | 156 ++++++--- kernel/net/core/dev.c | 139 ++++---- kernel/net/core/drop_monitor.c | 39 ++- kernel/net/core/filter.c | 61 ++-- kernel/net/core/flow_dissector.c | 56 +++- kernel/net/core/neighbour.c | 6 +- kernel/net/core/net_namespace.c | 2 + kernel/net/core/pktgen.c | 38 +-- kernel/net/core/rtnetlink.c | 24 +- kernel/net/core/skbuff.c | 15 +- kernel/net/core/sock.c | 5 +- kernel/net/dccp/ipv4.c | 28 +- kernel/net/dccp/ipv6.c | 16 +- kernel/net/dccp/proto.c | 4 + kernel/net/decnet/dn_route.c | 9 +- kernel/net/dsa/slave.c | 2 + kernel/net/ethernet/eth.c | 3 +- kernel/net/ipv4/af_inet.c | 26 +- kernel/net/ipv4/cipso_ipv4.c | 4 + kernel/net/ipv4/devinet.c | 4 + kernel/net/ipv4/esp4.c | 54 ++-- kernel/net/ipv4/fib_frontend.c | 28 +- kernel/net/ipv4/fib_semantics.c | 28 +- kernel/net/ipv4/fib_trie.c | 29 +- kernel/net/ipv4/fou.c | 17 +- kernel/net/ipv4/gre_offload.c | 7 +- kernel/net/ipv4/icmp.c | 8 + kernel/net/ipv4/igmp.c | 10 +- kernel/net/ipv4/ip_gre.c | 19 +- kernel/net/ipv4/ip_output.c | 8 +- kernel/net/ipv4/ip_sockglue.c | 19 +- kernel/net/ipv4/ip_tunnel.c | 23 +- kernel/net/ipv4/ip_tunnel_core.c | 3 +- kernel/net/ipv4/ip_vti.c | 31 ++ kernel/net/ipv4/ipmr.c | 7 +- kernel/net/ipv4/netfilter/arp_tables.c | 326 ++++++------------- kernel/net/ipv4/netfilter/ip_tables.c | 356 ++++++--------------- kernel/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c | 12 +- kernel/net/ipv4/ping.c | 6 + kernel/net/ipv4/route.c | 21 +- kernel/net/ipv4/sysctl_net_ipv4.c | 8 +- kernel/net/ipv4/tcp.c | 8 +- kernel/net/ipv4/tcp_dctcp.c | 13 +- kernel/net/ipv4/tcp_fastopen.c | 3 +- kernel/net/ipv4/tcp_input.c | 57 ++-- kernel/net/ipv4/tcp_ipv4.c | 36 ++- kernel/net/ipv4/tcp_metrics.c | 2 +- kernel/net/ipv4/tcp_minisocks.c | 3 +- kernel/net/ipv4/tcp_output.c | 30 +- kernel/net/ipv4/tcp_yeah.c | 2 +- kernel/net/ipv4/udp.c | 22 +- kernel/net/ipv4/udp_offload.c | 10 +- kernel/net/ipv4/udp_tunnel.c | 2 + kernel/net/ipv6/addrconf.c | 8 +- kernel/net/ipv6/esp6.c | 2 +- kernel/net/ipv6/exthdrs_core.c | 6 +- kernel/net/ipv6/ip6_fib.c | 1 + kernel/net/ipv6/ip6_gre.c | 44 +-- kernel/net/ipv6/ip6_offload.c | 18 +- kernel/net/ipv6/ip6_output.c | 19 +- kernel/net/ipv6/ip6_tunnel.c | 53 ++- kernel/net/ipv6/ip6mr.c | 6 +- kernel/net/ipv6/mcast.c | 3 +- kernel/net/ipv6/netfilter/ip6_tables.c | 349 ++++++-------------- kernel/net/ipv6/output_core.c | 2 + kernel/net/ipv6/ping.c | 9 +- kernel/net/ipv6/raw.c | 6 +- kernel/net/ipv6/reassembly.c | 6 +- kernel/net/ipv6/route.c | 9 +- kernel/net/ipv6/sit.c | 10 +- kernel/net/ipv6/tcp_ipv6.c | 42 ++- kernel/net/ipv6/udp.c | 21 +- kernel/net/irda/af_irda.c | 7 +- kernel/net/irda/iriap.c | 8 +- kernel/net/l2tp/l2tp_core.c | 2 +- kernel/net/l2tp/l2tp_core.h | 1 + kernel/net/l2tp/l2tp_ip.c | 40 ++- kernel/net/l2tp/l2tp_ip6.c | 15 +- kernel/net/llc/af_llc.c | 1 + kernel/net/mac80211/cfg.c | 2 +- kernel/net/mac80211/ibss.c | 22 +- kernel/net/mac80211/iface.c | 7 +- kernel/net/mac80211/mesh.c | 13 +- kernel/net/mac80211/mlme.c | 21 +- kernel/net/mac80211/rx.c | 29 +- kernel/net/mac80211/sta_info.c | 37 ++- kernel/net/mac80211/sta_info.h | 2 +- kernel/net/mac80211/tx.c | 9 +- kernel/net/mpls/af_mpls.c | 3 + kernel/net/netfilter/ipvs/ip_vs_core.c | 37 ++- kernel/net/netfilter/ipvs/ip_vs_pe_sip.c | 6 +- kernel/net/netfilter/ipvs/ip_vs_sync.c | 6 +- kernel/net/netfilter/nf_conntrack_core.c | 4 +- kernel/net/netfilter/nf_log.c | 6 +- kernel/net/netfilter/nft_dynset.c | 6 +- kernel/net/netfilter/x_tables.c | 251 ++++++++++++++- kernel/net/netlabel/netlabel_kapi.c | 12 +- kernel/net/netlink/af_netlink.c | 39 ++- kernel/net/netlink/af_netlink.h | 2 + kernel/net/openvswitch/actions.c | 12 +- kernel/net/openvswitch/conntrack.c | 3 +- kernel/net/openvswitch/vport-netdev.c | 2 +- kernel/net/openvswitch/vport-vxlan.c | 2 + kernel/net/openvswitch/vport.h | 7 - kernel/net/packet/af_packet.c | 75 ++--- kernel/net/rds/recv.c | 2 + kernel/net/rds/tcp.c | 5 +- kernel/net/sched/act_csum.c | 8 +- kernel/net/sched/act_mirred.c | 2 +- kernel/net/sched/act_nat.c | 18 +- kernel/net/sched/act_pedit.c | 24 +- kernel/net/sched/act_vlan.c | 9 + kernel/net/sched/cls_api.c | 7 +- kernel/net/sched/cls_basic.c | 4 - kernel/net/sched/cls_bpf.c | 4 - kernel/net/sched/cls_cgroup.c | 7 +- kernel/net/sched/cls_flow.c | 1 - kernel/net/sched/cls_flower.c | 31 +- kernel/net/sched/cls_rsvp.h | 3 +- kernel/net/sched/cls_tcindex.c | 1 - kernel/net/sched/sch_api.c | 8 +- kernel/net/sched/sch_cbq.c | 12 +- kernel/net/sched/sch_choke.c | 6 +- kernel/net/sched/sch_codel.c | 10 +- kernel/net/sched/sch_drr.c | 9 +- kernel/net/sched/sch_dsmark.c | 11 +- kernel/net/sched/sch_fifo.c | 4 + kernel/net/sched/sch_fq.c | 4 +- kernel/net/sched/sch_fq_codel.c | 17 +- kernel/net/sched/sch_generic.c | 5 +- kernel/net/sched/sch_hfsc.c | 9 +- kernel/net/sched/sch_hhf.c | 10 +- kernel/net/sched/sch_htb.c | 24 +- kernel/net/sched/sch_multiq.c | 16 +- kernel/net/sched/sch_netem.c | 82 ++++- kernel/net/sched/sch_pie.c | 5 +- kernel/net/sched/sch_prio.c | 15 +- kernel/net/sched/sch_qfq.c | 9 +- kernel/net/sched/sch_red.c | 10 +- kernel/net/sched/sch_sfb.c | 10 +- kernel/net/sched/sch_sfq.c | 16 +- kernel/net/sched/sch_tbf.c | 15 +- kernel/net/sctp/ipv6.c | 2 + kernel/net/sctp/sm_statefuns.c | 12 +- kernel/net/sctp/socket.c | 15 +- kernel/net/socket.c | 40 +-- kernel/net/sunrpc/auth_gss/auth_gss.c | 8 +- kernel/net/sunrpc/auth_gss/gss_rpc_xdr.c | 2 +- kernel/net/sunrpc/auth_gss/svcauth_gss.c | 6 +- kernel/net/sunrpc/cache.c | 6 +- kernel/net/sunrpc/clnt.c | 17 +- kernel/net/sunrpc/sunrpc_syms.c | 1 + kernel/net/sunrpc/svc.c | 8 +- kernel/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 2 - kernel/net/sunrpc/xprtsock.c | 15 +- kernel/net/switchdev/switchdev.c | 6 +- kernel/net/sysctl_net.c | 2 +- kernel/net/tipc/link.c | 2 + kernel/net/tipc/name_distr.c | 1 + kernel/net/tipc/netlink_compat.c | 5 +- kernel/net/tipc/socket.c | 39 ++- kernel/net/tipc/subscr.c | 3 +- kernel/net/tipc/udp_media.c | 5 - kernel/net/unix/af_unix.c | 141 ++++---- kernel/net/vmw_vsock/af_vsock.c | 21 +- kernel/net/wireless/core.h | 2 + kernel/net/wireless/mlme.c | 12 + kernel/net/wireless/nl80211.c | 20 +- kernel/net/wireless/scan.c | 69 ++++ kernel/net/wireless/sme.c | 14 + kernel/net/x25/x25_facilities.c | 1 + kernel/net/xfrm/xfrm_input.c | 3 + 200 files changed, 2605 insertions(+), 1763 deletions(-) (limited to 'kernel/net') diff --git a/kernel/net/8021q/vlan.c b/kernel/net/8021q/vlan.c index d2cd9de4b..ad8d6e6b8 100644 --- a/kernel/net/8021q/vlan.c +++ b/kernel/net/8021q/vlan.c @@ -659,7 +659,7 @@ static struct sk_buff **vlan_gro_receive(struct sk_buff **head, skb_gro_pull(skb, sizeof(*vhdr)); skb_gro_postpull_rcsum(skb, vhdr, sizeof(*vhdr)); - pp = ptype->callbacks.gro_receive(head, skb); + pp = call_gro_receive(ptype->callbacks.gro_receive, head, skb); out_unlock: rcu_read_unlock(); diff --git a/kernel/net/ax25/af_ax25.c b/kernel/net/ax25/af_ax25.c index fbd0acf80..2fdebabbf 100644 --- a/kernel/net/ax25/af_ax25.c +++ b/kernel/net/ax25/af_ax25.c @@ -976,7 +976,8 @@ static int ax25_release(struct socket *sock) release_sock(sk); ax25_disconnect(ax25, 0); lock_sock(sk); - ax25_destroy_socket(ax25); + if (!sock_flag(ax25->sk, SOCK_DESTROY)) + ax25_destroy_socket(ax25); break; case AX25_STATE_3: diff --git a/kernel/net/ax25/ax25_ds_timer.c b/kernel/net/ax25/ax25_ds_timer.c index 951cd57bb..5237dff69 100644 --- a/kernel/net/ax25/ax25_ds_timer.c +++ b/kernel/net/ax25/ax25_ds_timer.c @@ -102,6 +102,7 @@ void ax25_ds_heartbeat_expiry(ax25_cb *ax25) switch (ax25->state) { case AX25_STATE_0: + case AX25_STATE_2: /* Magic here: If we listen() and a new link dies before it is accepted() it isn't 'dead' so doesn't get removed. */ if (!sk || sock_flag(sk, SOCK_DESTROY) || @@ -111,6 +112,7 @@ void ax25_ds_heartbeat_expiry(ax25_cb *ax25) sock_hold(sk); ax25_destroy_socket(ax25); bh_unlock_sock(sk); + /* Ungrab socket and destroy it */ sock_put(sk); } else ax25_destroy_socket(ax25); @@ -213,7 +215,8 @@ void ax25_ds_t1_timeout(ax25_cb *ax25) case AX25_STATE_2: if (ax25->n2count == ax25->n2) { ax25_send_control(ax25, AX25_DISC, AX25_POLLON, AX25_COMMAND); - ax25_disconnect(ax25, ETIMEDOUT); + if (!sock_flag(ax25->sk, SOCK_DESTROY)) + ax25_disconnect(ax25, ETIMEDOUT); return; } else { ax25->n2count++; diff --git a/kernel/net/ax25/ax25_ip.c b/kernel/net/ax25/ax25_ip.c index b563a3f5f..2fa3be965 100644 --- a/kernel/net/ax25/ax25_ip.c +++ b/kernel/net/ax25/ax25_ip.c @@ -228,8 +228,23 @@ netdev_tx_t ax25_ip_xmit(struct sk_buff *skb) } #endif +static bool ax25_validate_header(const char *header, unsigned int len) +{ + ax25_digi digi; + + if (!len) + return false; + + if (header[0]) + return true; + + return ax25_addr_parse(header + 1, len - 1, NULL, NULL, &digi, NULL, + NULL); +} + const struct header_ops ax25_header_ops = { .create = ax25_hard_header, + .validate = ax25_validate_header, }; EXPORT_SYMBOL(ax25_header_ops); diff --git a/kernel/net/ax25/ax25_std_timer.c b/kernel/net/ax25/ax25_std_timer.c index 004467c9e..2c0d6ef66 100644 --- a/kernel/net/ax25/ax25_std_timer.c +++ b/kernel/net/ax25/ax25_std_timer.c @@ -38,6 +38,7 @@ void ax25_std_heartbeat_expiry(ax25_cb *ax25) switch (ax25->state) { case AX25_STATE_0: + case AX25_STATE_2: /* Magic here: If we listen() and a new link dies before it is accepted() it isn't 'dead' so doesn't get removed. */ if (!sk || sock_flag(sk, SOCK_DESTROY) || @@ -47,6 +48,7 @@ void ax25_std_heartbeat_expiry(ax25_cb *ax25) sock_hold(sk); ax25_destroy_socket(ax25); bh_unlock_sock(sk); + /* Ungrab socket and destroy it */ sock_put(sk); } else ax25_destroy_socket(ax25); @@ -144,7 +146,8 @@ void ax25_std_t1timer_expiry(ax25_cb *ax25) case AX25_STATE_2: if (ax25->n2count == ax25->n2) { ax25_send_control(ax25, AX25_DISC, AX25_POLLON, AX25_COMMAND); - ax25_disconnect(ax25, ETIMEDOUT); + if (!sock_flag(ax25->sk, SOCK_DESTROY)) + ax25_disconnect(ax25, ETIMEDOUT); return; } else { ax25->n2count++; diff --git a/kernel/net/ax25/ax25_subr.c b/kernel/net/ax25/ax25_subr.c index 3b78e8473..983f0b5e1 100644 --- a/kernel/net/ax25/ax25_subr.c +++ b/kernel/net/ax25/ax25_subr.c @@ -264,7 +264,8 @@ void ax25_disconnect(ax25_cb *ax25, int reason) { ax25_clear_queues(ax25); - ax25_stop_heartbeat(ax25); + if (!ax25->sk || !sock_flag(ax25->sk, SOCK_DESTROY)) + ax25_stop_heartbeat(ax25); ax25_stop_t1timer(ax25); ax25_stop_t2timer(ax25); ax25_stop_t3timer(ax25); diff --git a/kernel/net/batman-adv/distributed-arp-table.c b/kernel/net/batman-adv/distributed-arp-table.c index a49c705fb..5f19133c5 100644 --- a/kernel/net/batman-adv/distributed-arp-table.c +++ b/kernel/net/batman-adv/distributed-arp-table.c @@ -553,6 +553,7 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv, * be sent to * @bat_priv: the bat priv with all the soft interface information * @ip_dst: ipv4 to look up in the DHT + * @vid: VLAN identifier * * An originator O is selected if and only if its DHT_ID value is one of three * closest values (from the LEFT, with wrap around if needed) then the hash @@ -561,7 +562,8 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv, * Returns the candidate array of size BATADV_DAT_CANDIDATE_NUM. */ static struct batadv_dat_candidate * -batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst) +batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst, + unsigned short vid) { int select; batadv_dat_addr_t last_max = BATADV_DAT_ADDR_MAX, ip_key; @@ -577,7 +579,7 @@ batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst) return NULL; dat.ip = ip_dst; - dat.vid = 0; + dat.vid = vid; ip_key = (batadv_dat_addr_t)batadv_hash_dat(&dat, BATADV_DAT_ADDR_MAX); @@ -597,6 +599,7 @@ batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst) * @bat_priv: the bat priv with all the soft interface information * @skb: payload to send * @ip: the DHT key + * @vid: VLAN identifier * @packet_subtype: unicast4addr packet subtype to use * * This function copies the skb with pskb_copy() and is sent as unicast packet @@ -607,7 +610,7 @@ batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst) */ static bool batadv_dat_send_data(struct batadv_priv *bat_priv, struct sk_buff *skb, __be32 ip, - int packet_subtype) + unsigned short vid, int packet_subtype) { int i; bool ret = false; @@ -616,7 +619,7 @@ static bool batadv_dat_send_data(struct batadv_priv *bat_priv, struct sk_buff *tmp_skb; struct batadv_dat_candidate *cand; - cand = batadv_dat_select_candidates(bat_priv, ip); + cand = batadv_dat_select_candidates(bat_priv, ip, vid); if (!cand) goto out; @@ -1004,7 +1007,7 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv, ret = true; } else { /* Send the request to the DHT */ - ret = batadv_dat_send_data(bat_priv, skb, ip_dst, + ret = batadv_dat_send_data(bat_priv, skb, ip_dst, vid, BATADV_P_DAT_DHT_GET); } out: @@ -1132,8 +1135,8 @@ void batadv_dat_snoop_outgoing_arp_reply(struct batadv_priv *bat_priv, /* Send the ARP reply to the candidates for both the IP addresses that * the node obtained from the ARP reply */ - batadv_dat_send_data(bat_priv, skb, ip_src, BATADV_P_DAT_DHT_PUT); - batadv_dat_send_data(bat_priv, skb, ip_dst, BATADV_P_DAT_DHT_PUT); + batadv_dat_send_data(bat_priv, skb, ip_src, vid, BATADV_P_DAT_DHT_PUT); + batadv_dat_send_data(bat_priv, skb, ip_dst, vid, BATADV_P_DAT_DHT_PUT); } /** diff --git a/kernel/net/batman-adv/originator.c b/kernel/net/batman-adv/originator.c index 17851d3aa..6282f021d 100644 --- a/kernel/net/batman-adv/originator.c +++ b/kernel/net/batman-adv/originator.c @@ -197,18 +197,12 @@ static void batadv_neigh_node_release(struct batadv_neigh_node *neigh_node) { struct hlist_node *node_tmp; struct batadv_neigh_ifinfo *neigh_ifinfo; - struct batadv_algo_ops *bao; - - bao = neigh_node->orig_node->bat_priv->bat_algo_ops; hlist_for_each_entry_safe(neigh_ifinfo, node_tmp, &neigh_node->ifinfo_list, list) { batadv_neigh_ifinfo_free_ref(neigh_ifinfo); } - if (bao->bat_neigh_free) - bao->bat_neigh_free(neigh_node); - batadv_hardif_free_ref(neigh_node->if_incoming); kfree_rcu(neigh_node, rcu); diff --git a/kernel/net/batman-adv/routing.c b/kernel/net/batman-adv/routing.c index 3207667e6..d8a2f33e6 100644 --- a/kernel/net/batman-adv/routing.c +++ b/kernel/net/batman-adv/routing.c @@ -104,6 +104,15 @@ static void _batadv_update_route(struct batadv_priv *bat_priv, neigh_node = NULL; spin_lock_bh(&orig_node->neigh_list_lock); + /* curr_router used earlier may not be the current orig_ifinfo->router + * anymore because it was dereferenced outside of the neigh_list_lock + * protected region. After the new best neighbor has replace the current + * best neighbor the reference counter needs to decrease. Consequently, + * the code needs to ensure the curr_router variable contains a pointer + * to the replaced best neighbor. + */ + curr_router = rcu_dereference_protected(orig_ifinfo->router, true); + rcu_assign_pointer(orig_ifinfo->router, neigh_node); spin_unlock_bh(&orig_node->neigh_list_lock); batadv_orig_ifinfo_free_ref(orig_ifinfo); diff --git a/kernel/net/batman-adv/send.c b/kernel/net/batman-adv/send.c index f66432480..0e0c3b8ed 100644 --- a/kernel/net/batman-adv/send.c +++ b/kernel/net/batman-adv/send.c @@ -630,6 +630,9 @@ batadv_purge_outstanding_packets(struct batadv_priv *bat_priv, if (pending) { hlist_del(&forw_packet->list); + if (!forw_packet->own) + atomic_inc(&bat_priv->bcast_queue_left); + batadv_forw_packet_free(forw_packet); } } @@ -657,6 +660,9 @@ batadv_purge_outstanding_packets(struct batadv_priv *bat_priv, if (pending) { hlist_del(&forw_packet->list); + if (!forw_packet->own) + atomic_inc(&bat_priv->batman_queue_left); + batadv_forw_packet_free(forw_packet); } } diff --git a/kernel/net/batman-adv/soft-interface.c b/kernel/net/batman-adv/soft-interface.c index ac4d08de5..720f1a5b8 100644 --- a/kernel/net/batman-adv/soft-interface.c +++ b/kernel/net/batman-adv/soft-interface.c @@ -407,11 +407,17 @@ void batadv_interface_rx(struct net_device *soft_iface, */ nf_reset(skb); + if (unlikely(!pskb_may_pull(skb, ETH_HLEN))) + goto dropped; + vid = batadv_get_vid(skb, 0); ethhdr = eth_hdr(skb); switch (ntohs(ethhdr->h_proto)) { case ETH_P_8021Q: + if (!pskb_may_pull(skb, VLAN_ETH_HLEN)) + goto dropped; + vhdr = (struct vlan_ethhdr *)skb->data; if (vhdr->h_vlan_encapsulated_proto != ethertype) @@ -423,8 +429,6 @@ void batadv_interface_rx(struct net_device *soft_iface, } /* skb->dev & skb->pkt_type are set here */ - if (unlikely(!pskb_may_pull(skb, ETH_HLEN))) - goto dropped; skb->protocol = eth_type_trans(skb, soft_iface); /* should not be necessary anymore as we use skb_pull_rcsum() diff --git a/kernel/net/batman-adv/translation-table.c b/kernel/net/batman-adv/translation-table.c index 83b0ca27a..f2079acb5 100644 --- a/kernel/net/batman-adv/translation-table.c +++ b/kernel/net/batman-adv/translation-table.c @@ -2764,7 +2764,7 @@ static bool batadv_send_my_tt_response(struct batadv_priv *bat_priv, &tvlv_tt_data, &tt_change, &tt_len); - if (!tt_len) + if (!tt_len || !tvlv_len) goto unlock; /* Copy the last orig_node's OGM buffer */ @@ -2782,7 +2782,7 @@ static bool batadv_send_my_tt_response(struct batadv_priv *bat_priv, &tvlv_tt_data, &tt_change, &tt_len); - if (!tt_len) + if (!tt_len || !tvlv_len) goto out; /* fill the rest of the tvlv with the real TT entries */ diff --git a/kernel/net/batman-adv/types.h b/kernel/net/batman-adv/types.h index d260efd70..cbd347c2e 100644 --- a/kernel/net/batman-adv/types.h +++ b/kernel/net/batman-adv/types.h @@ -1136,8 +1136,6 @@ struct batadv_forw_packet { * @bat_neigh_is_equiv_or_better: check if neigh1 is equally good or better * than neigh2 for their respective outgoing interface from the metric * prospective - * @bat_neigh_free: free the resources allocated by the routing algorithm for a - * neigh_node object * @bat_orig_print: print the originator table (optional) * @bat_orig_free: free the resources allocated by the routing algorithm for an * orig_node object @@ -1165,7 +1163,6 @@ struct batadv_algo_ops { struct batadv_hard_iface *if_outgoing1, struct batadv_neigh_node *neigh2, struct batadv_hard_iface *if_outgoing2); - void (*bat_neigh_free)(struct batadv_neigh_node *neigh); /* orig_node handling API */ void (*bat_orig_print)(struct batadv_priv *priv, struct seq_file *seq, struct batadv_hard_iface *hard_iface); diff --git a/kernel/net/bluetooth/l2cap_sock.c b/kernel/net/bluetooth/l2cap_sock.c index 1bb551527..d9bbbded4 100644 --- a/kernel/net/bluetooth/l2cap_sock.c +++ b/kernel/net/bluetooth/l2cap_sock.c @@ -927,7 +927,7 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, break; } - if (get_user(opt, (u32 __user *) optval)) { + if (get_user(opt, (u16 __user *) optval)) { err = -EFAULT; break; } diff --git a/kernel/net/bluetooth/mgmt.c b/kernel/net/bluetooth/mgmt.c index 7f2211927..b1b0a1c0b 100644 --- a/kernel/net/bluetooth/mgmt.c +++ b/kernel/net/bluetooth/mgmt.c @@ -7155,6 +7155,10 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev, return mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING, status); + if (data_len != sizeof(*cp) + cp->adv_data_len + cp->scan_rsp_len) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING, + MGMT_STATUS_INVALID_PARAMS); + flags = __le32_to_cpu(cp->flags); timeout = __le16_to_cpu(cp->timeout); duration = __le16_to_cpu(cp->duration); diff --git a/kernel/net/bridge/br_fdb.c b/kernel/net/bridge/br_fdb.c index a642bb829..09442e0f7 100644 --- a/kernel/net/bridge/br_fdb.c +++ b/kernel/net/bridge/br_fdb.c @@ -278,6 +278,8 @@ void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr) * change from under us. */ list_for_each_entry(v, &vg->vlan_list, vlist) { + if (!br_vlan_should_use(v)) + continue; f = __br_fdb_get(br, br->dev->dev_addr, v->vid); if (f && f->is_local && !f->dst) fdb_delete_local(br, NULL, f); diff --git a/kernel/net/bridge/br_ioctl.c b/kernel/net/bridge/br_ioctl.c index 263b4de4d..60a3dbfca 100644 --- a/kernel/net/bridge/br_ioctl.c +++ b/kernel/net/bridge/br_ioctl.c @@ -21,18 +21,19 @@ #include #include "br_private.h" -/* called with RTNL */ static int get_bridge_ifindices(struct net *net, int *indices, int num) { struct net_device *dev; int i = 0; - for_each_netdev(net, dev) { + rcu_read_lock(); + for_each_netdev_rcu(net, dev) { if (i >= num) break; if (dev->priv_flags & IFF_EBRIDGE) indices[i++] = dev->ifindex; } + rcu_read_unlock(); return i; } diff --git a/kernel/net/bridge/br_multicast.c b/kernel/net/bridge/br_multicast.c index 03661d974..d80c15d02 100644 --- a/kernel/net/bridge/br_multicast.c +++ b/kernel/net/bridge/br_multicast.c @@ -464,8 +464,11 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br, if (ipv6_dev_get_saddr(dev_net(br->dev), br->dev, &ip6h->daddr, 0, &ip6h->saddr)) { kfree_skb(skb); + br->has_ipv6_addr = 0; return NULL; } + + br->has_ipv6_addr = 1; ipv6_eth_mc_map(&ip6h->daddr, eth->h_dest); hopopt = (u8 *)(ip6h + 1); @@ -948,13 +951,12 @@ static void br_multicast_enable(struct bridge_mcast_own_query *query) mod_timer(&query->timer, jiffies); } -void br_multicast_enable_port(struct net_bridge_port *port) +static void __br_multicast_enable_port(struct net_bridge_port *port) { struct net_bridge *br = port->br; - spin_lock(&br->multicast_lock); if (br->multicast_disabled || !netif_running(br->dev)) - goto out; + return; br_multicast_enable(&port->ip4_own_query); #if IS_ENABLED(CONFIG_IPV6) @@ -962,8 +964,14 @@ void br_multicast_enable_port(struct net_bridge_port *port) #endif if (port->multicast_router == 2 && hlist_unhashed(&port->rlist)) br_multicast_add_router(br, port); +} -out: +void br_multicast_enable_port(struct net_bridge_port *port) +{ + struct net_bridge *br = port->br; + + spin_lock(&br->multicast_lock); + __br_multicast_enable_port(port); spin_unlock(&br->multicast_lock); } @@ -1110,7 +1118,7 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br, } else { err = br_ip6_multicast_add_group(br, port, &grec->grec_mca, vid); - if (!err) + if (err) break; } } @@ -1270,6 +1278,7 @@ static int br_ip4_multicast_query(struct net_bridge *br, struct br_ip saddr; unsigned long max_delay; unsigned long now = jiffies; + unsigned int offset = skb_transport_offset(skb); __be32 group; int err = 0; @@ -1280,14 +1289,14 @@ static int br_ip4_multicast_query(struct net_bridge *br, group = ih->group; - if (skb->len == sizeof(*ih)) { + if (skb->len == offset + sizeof(*ih)) { max_delay = ih->code * (HZ / IGMP_TIMER_SCALE); if (!max_delay) { max_delay = 10 * HZ; group = 0; } - } else if (skb->len >= sizeof(*ih3)) { + } else if (skb->len >= offset + sizeof(*ih3)) { ih3 = igmpv3_query_hdr(skb); if (ih3->nsrcs) goto out; @@ -1348,6 +1357,7 @@ static int br_ip6_multicast_query(struct net_bridge *br, struct br_ip saddr; unsigned long max_delay; unsigned long now = jiffies; + unsigned int offset = skb_transport_offset(skb); const struct in6_addr *group = NULL; bool is_general_query; int err = 0; @@ -1357,8 +1367,8 @@ static int br_ip6_multicast_query(struct net_bridge *br, (port && port->state == BR_STATE_DISABLED)) goto out; - if (skb->len == sizeof(*mld)) { - if (!pskb_may_pull(skb, sizeof(*mld))) { + if (skb->len == offset + sizeof(*mld)) { + if (!pskb_may_pull(skb, offset + sizeof(*mld))) { err = -EINVAL; goto out; } @@ -1367,7 +1377,7 @@ static int br_ip6_multicast_query(struct net_bridge *br, if (max_delay) group = &mld->mld_mca; } else { - if (!pskb_may_pull(skb, sizeof(*mld2q))) { + if (!pskb_may_pull(skb, offset + sizeof(*mld2q))) { err = -EINVAL; goto out; } @@ -1734,6 +1744,7 @@ void br_multicast_init(struct net_bridge *br) br->ip6_other_query.delay_time = 0; br->ip6_querier.port = NULL; #endif + br->has_ipv6_addr = 1; spin_lock_init(&br->multicast_lock); setup_timer(&br->multicast_router_timer, @@ -1899,8 +1910,9 @@ static void br_multicast_start_querier(struct net_bridge *br, int br_multicast_toggle(struct net_bridge *br, unsigned long val) { - int err = 0; struct net_bridge_mdb_htable *mdb; + struct net_bridge_port *port; + int err = 0; spin_lock_bh(&br->multicast_lock); if (br->multicast_disabled == !val) @@ -1928,10 +1940,9 @@ rollback: goto rollback; } - br_multicast_start_querier(br, &br->ip4_own_query); -#if IS_ENABLED(CONFIG_IPV6) - br_multicast_start_querier(br, &br->ip6_own_query); -#endif + br_multicast_open(br); + list_for_each_entry(port, &br->port_list, list) + __br_multicast_enable_port(port); unlock: spin_unlock_bh(&br->multicast_lock); diff --git a/kernel/net/bridge/br_netlink.c b/kernel/net/bridge/br_netlink.c index 40197ff89..413d18e37 100644 --- a/kernel/net/bridge/br_netlink.c +++ b/kernel/net/bridge/br_netlink.c @@ -773,20 +773,6 @@ static int br_validate(struct nlattr *tb[], struct nlattr *data[]) return 0; } -static int br_dev_newlink(struct net *src_net, struct net_device *dev, - struct nlattr *tb[], struct nlattr *data[]) -{ - struct net_bridge *br = netdev_priv(dev); - - if (tb[IFLA_ADDRESS]) { - spin_lock_bh(&br->lock); - br_stp_change_bridge_id(br, nla_data(tb[IFLA_ADDRESS])); - spin_unlock_bh(&br->lock); - } - - return register_netdevice(dev); -} - static int br_port_slave_changelink(struct net_device *brdev, struct net_device *dev, struct nlattr *tb[], @@ -1068,6 +1054,25 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[], return 0; } +static int br_dev_newlink(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]) +{ + struct net_bridge *br = netdev_priv(dev); + int err; + + if (tb[IFLA_ADDRESS]) { + spin_lock_bh(&br->lock); + br_stp_change_bridge_id(br, nla_data(tb[IFLA_ADDRESS])); + spin_unlock_bh(&br->lock); + } + + err = br_changelink(dev, tb, data); + if (err) + return err; + + return register_netdevice(dev); +} + static size_t br_get_size(const struct net_device *brdev) { return nla_total_size(sizeof(u32)) + /* IFLA_BR_FORWARD_DELAY */ diff --git a/kernel/net/bridge/br_private.h b/kernel/net/bridge/br_private.h index 216018c76..1001a1b7d 100644 --- a/kernel/net/bridge/br_private.h +++ b/kernel/net/bridge/br_private.h @@ -301,6 +301,7 @@ struct net_bridge u8 multicast_disabled:1; u8 multicast_querier:1; u8 multicast_query_use_ifaddr:1; + u8 has_ipv6_addr:1; u32 hash_elasticity; u32 hash_max; @@ -574,10 +575,22 @@ static inline bool br_multicast_is_router(struct net_bridge *br) static inline bool __br_multicast_querier_exists(struct net_bridge *br, - struct bridge_mcast_other_query *querier) + struct bridge_mcast_other_query *querier, + const bool is_ipv6) { + bool own_querier_enabled; + + if (br->multicast_querier) { + if (is_ipv6 && !br->has_ipv6_addr) + own_querier_enabled = false; + else + own_querier_enabled = true; + } else { + own_querier_enabled = false; + } + return time_is_before_jiffies(querier->delay_time) && - (br->multicast_querier || timer_pending(&querier->timer)); + (own_querier_enabled || timer_pending(&querier->timer)); } static inline bool br_multicast_querier_exists(struct net_bridge *br, @@ -585,10 +598,12 @@ static inline bool br_multicast_querier_exists(struct net_bridge *br, { switch (eth->h_proto) { case (htons(ETH_P_IP)): - return __br_multicast_querier_exists(br, &br->ip4_other_query); + return __br_multicast_querier_exists(br, + &br->ip4_other_query, false); #if IS_ENABLED(CONFIG_IPV6) case (htons(ETH_P_IPV6)): - return __br_multicast_querier_exists(br, &br->ip6_other_query); + return __br_multicast_querier_exists(br, + &br->ip6_other_query, true); #endif default: return false; diff --git a/kernel/net/bridge/br_stp.c b/kernel/net/bridge/br_stp.c index 5f3f64553..eff69cb27 100644 --- a/kernel/net/bridge/br_stp.c +++ b/kernel/net/bridge/br_stp.c @@ -567,6 +567,14 @@ int br_set_max_age(struct net_bridge *br, unsigned long val) } +/* Set time interval that dynamic forwarding entries live + * For pure software bridge, allow values outside the 802.1 + * standard specification for special cases: + * 0 - entry never ages (all permanant) + * 1 - entry disappears (no persistance) + * + * Offloaded switch entries maybe more restrictive + */ int br_set_ageing_time(struct net_bridge *br, u32 ageing_time) { struct switchdev_attr attr = { @@ -577,11 +585,8 @@ int br_set_ageing_time(struct net_bridge *br, u32 ageing_time) unsigned long t = clock_t_to_jiffies(ageing_time); int err; - if (t < BR_MIN_AGEING_TIME || t > BR_MAX_AGEING_TIME) - return -ERANGE; - err = switchdev_port_attr_set(br->dev, &attr); - if (err) + if (err && err != -EOPNOTSUPP) return err; br->ageing_time = t; diff --git a/kernel/net/caif/cfpkt_skbuff.c b/kernel/net/caif/cfpkt_skbuff.c index f6c3b2137..59ce1fcc2 100644 --- a/kernel/net/caif/cfpkt_skbuff.c +++ b/kernel/net/caif/cfpkt_skbuff.c @@ -286,7 +286,7 @@ int cfpkt_setlen(struct cfpkt *pkt, u16 len) else skb_trim(skb, len); - return cfpkt_getlen(pkt); + return cfpkt_getlen(pkt); } /* Need to expand SKB */ diff --git a/kernel/net/can/af_can.c b/kernel/net/can/af_can.c index 166d43619..928f58064 100644 --- a/kernel/net/can/af_can.c +++ b/kernel/net/can/af_can.c @@ -445,6 +445,7 @@ static struct hlist_head *find_rcv_list(canid_t *can_id, canid_t *mask, * @func: callback function on filter match * @data: returned parameter for callback function * @ident: string for calling module identification + * @sk: socket pointer (might be NULL) * * Description: * Invokes the callback function with the received sk_buff and the given @@ -468,7 +469,7 @@ static struct hlist_head *find_rcv_list(canid_t *can_id, canid_t *mask, */ int can_rx_register(struct net_device *dev, canid_t can_id, canid_t mask, void (*func)(struct sk_buff *, void *), void *data, - char *ident) + char *ident, struct sock *sk) { struct receiver *r; struct hlist_head *rl; @@ -496,6 +497,7 @@ int can_rx_register(struct net_device *dev, canid_t can_id, canid_t mask, r->func = func; r->data = data; r->ident = ident; + r->sk = sk; hlist_add_head_rcu(&r->list, rl); d->entries++; @@ -520,8 +522,11 @@ EXPORT_SYMBOL(can_rx_register); static void can_rx_delete_receiver(struct rcu_head *rp) { struct receiver *r = container_of(rp, struct receiver, rcu); + struct sock *sk = r->sk; kmem_cache_free(rcv_cache, r); + if (sk) + sock_put(sk); } /** @@ -596,8 +601,11 @@ void can_rx_unregister(struct net_device *dev, canid_t can_id, canid_t mask, spin_unlock(&can_rcvlists_lock); /* schedule the receiver item for deletion */ - if (r) + if (r) { + if (r->sk) + sock_hold(r->sk); call_rcu(&r->rcu, can_rx_delete_receiver); + } } EXPORT_SYMBOL(can_rx_unregister); diff --git a/kernel/net/can/af_can.h b/kernel/net/can/af_can.h index fca0fe9fc..b86f5129e 100644 --- a/kernel/net/can/af_can.h +++ b/kernel/net/can/af_can.h @@ -50,13 +50,14 @@ struct receiver { struct hlist_node list; - struct rcu_head rcu; canid_t can_id; canid_t mask; unsigned long matches; void (*func)(struct sk_buff *, void *); void *data; char *ident; + struct sock *sk; + struct rcu_head rcu; }; #define CAN_SFF_RCV_ARRAY_SZ (1 << CAN_SFF_ID_BITS) diff --git a/kernel/net/can/bcm.c b/kernel/net/can/bcm.c index 6863310d6..4ccfd356b 100644 --- a/kernel/net/can/bcm.c +++ b/kernel/net/can/bcm.c @@ -710,14 +710,23 @@ static struct bcm_op *bcm_find_op(struct list_head *ops, canid_t can_id, static void bcm_remove_op(struct bcm_op *op) { - hrtimer_cancel(&op->timer); - hrtimer_cancel(&op->thrtimer); - - if (op->tsklet.func) - tasklet_kill(&op->tsklet); + if (op->tsklet.func) { + while (test_bit(TASKLET_STATE_SCHED, &op->tsklet.state) || + test_bit(TASKLET_STATE_RUN, &op->tsklet.state) || + hrtimer_active(&op->timer)) { + hrtimer_cancel(&op->timer); + tasklet_kill(&op->tsklet); + } + } - if (op->thrtsklet.func) - tasklet_kill(&op->thrtsklet); + if (op->thrtsklet.func) { + while (test_bit(TASKLET_STATE_SCHED, &op->thrtsklet.state) || + test_bit(TASKLET_STATE_RUN, &op->thrtsklet.state) || + hrtimer_active(&op->thrtimer)) { + hrtimer_cancel(&op->thrtimer); + tasklet_kill(&op->thrtsklet); + } + } if ((op->frames) && (op->frames != &op->sframe)) kfree(op->frames); @@ -1170,7 +1179,7 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, err = can_rx_register(dev, op->can_id, REGMASK(op->can_id), bcm_rx_handler, op, - "bcm"); + "bcm", sk); op->rx_reg_dev = dev; dev_put(dev); @@ -1179,7 +1188,7 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, } else err = can_rx_register(NULL, op->can_id, REGMASK(op->can_id), - bcm_rx_handler, op, "bcm"); + bcm_rx_handler, op, "bcm", sk); if (err) { /* this bcm rx op is broken -> remove it */ list_del(&op->list); @@ -1500,24 +1509,31 @@ static int bcm_connect(struct socket *sock, struct sockaddr *uaddr, int len, struct sockaddr_can *addr = (struct sockaddr_can *)uaddr; struct sock *sk = sock->sk; struct bcm_sock *bo = bcm_sk(sk); + int ret = 0; if (len < sizeof(*addr)) return -EINVAL; - if (bo->bound) - return -EISCONN; + lock_sock(sk); + + if (bo->bound) { + ret = -EISCONN; + goto fail; + } /* bind a device to this socket */ if (addr->can_ifindex) { struct net_device *dev; dev = dev_get_by_index(&init_net, addr->can_ifindex); - if (!dev) - return -ENODEV; - + if (!dev) { + ret = -ENODEV; + goto fail; + } if (dev->type != ARPHRD_CAN) { dev_put(dev); - return -ENODEV; + ret = -ENODEV; + goto fail; } bo->ifindex = dev->ifindex; @@ -1528,17 +1544,24 @@ static int bcm_connect(struct socket *sock, struct sockaddr *uaddr, int len, bo->ifindex = 0; } - bo->bound = 1; - if (proc_dir) { /* unique socket address as filename */ sprintf(bo->procname, "%lu", sock_i_ino(sk)); bo->bcm_proc_read = proc_create_data(bo->procname, 0644, proc_dir, &bcm_proc_fops, sk); + if (!bo->bcm_proc_read) { + ret = -ENOMEM; + goto fail; + } } - return 0; + bo->bound = 1; + +fail: + release_sock(sk); + + return ret; } static int bcm_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, diff --git a/kernel/net/can/gw.c b/kernel/net/can/gw.c index 455168718..77c8af404 100644 --- a/kernel/net/can/gw.c +++ b/kernel/net/can/gw.c @@ -442,7 +442,7 @@ static inline int cgw_register_filter(struct cgw_job *gwj) { return can_rx_register(gwj->src.dev, gwj->ccgw.filter.can_id, gwj->ccgw.filter.can_mask, can_can_gw_rcv, - gwj, "gw"); + gwj, "gw", NULL); } static inline void cgw_unregister_filter(struct cgw_job *gwj) diff --git a/kernel/net/can/raw.c b/kernel/net/can/raw.c index 2e67b1423..e9403a26a 100644 --- a/kernel/net/can/raw.c +++ b/kernel/net/can/raw.c @@ -190,7 +190,7 @@ static int raw_enable_filters(struct net_device *dev, struct sock *sk, for (i = 0; i < count; i++) { err = can_rx_register(dev, filter[i].can_id, filter[i].can_mask, - raw_rcv, sk, "raw"); + raw_rcv, sk, "raw", sk); if (err) { /* clean up successfully registered filters */ while (--i >= 0) @@ -211,7 +211,7 @@ static int raw_enable_errfilter(struct net_device *dev, struct sock *sk, if (err_mask) err = can_rx_register(dev, 0, err_mask | CAN_ERR_FLAG, - raw_rcv, sk, "raw"); + raw_rcv, sk, "raw", sk); return err; } @@ -499,6 +499,9 @@ static int raw_setsockopt(struct socket *sock, int level, int optname, if (optlen % sizeof(struct can_filter) != 0) return -EINVAL; + if (optlen > CAN_RAW_FILTER_MAX * sizeof(struct can_filter)) + return -EINVAL; + count = optlen / sizeof(struct can_filter); if (count > 1) { diff --git a/kernel/net/ceph/messenger.c b/kernel/net/ceph/messenger.c index 63ae5dd24..b8d927c56 100644 --- a/kernel/net/ceph/messenger.c +++ b/kernel/net/ceph/messenger.c @@ -2042,6 +2042,19 @@ static int process_connect(struct ceph_connection *con) dout("process_connect on %p tag %d\n", con, (int)con->in_tag); + if (con->auth_reply_buf) { + /* + * Any connection that defines ->get_authorizer() + * should also define ->verify_authorizer_reply(). + * See get_connect_authorizer(). + */ + ret = con->ops->verify_authorizer_reply(con, 0); + if (ret < 0) { + con->error_msg = "bad authorize reply"; + return ret; + } + } + switch (con->in_reply.tag) { case CEPH_MSGR_TAG_FEATURES: pr_err("%s%lld %s feature set mismatch," diff --git a/kernel/net/ceph/osdmap.c b/kernel/net/ceph/osdmap.c index 7d8f581d9..ddc357389 100644 --- a/kernel/net/ceph/osdmap.c +++ b/kernel/net/ceph/osdmap.c @@ -1191,6 +1191,115 @@ struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end) return map; } +/* + * Encoding order is (new_up_client, new_state, new_weight). Need to + * apply in the (new_weight, new_state, new_up_client) order, because + * an incremental map may look like e.g. + * + * new_up_client: { osd=6, addr=... } # set osd_state and addr + * new_state: { osd=6, xorstate=EXISTS } # clear osd_state + */ +static int decode_new_up_state_weight(void **p, void *end, + struct ceph_osdmap *map) +{ + void *new_up_client; + void *new_state; + void *new_weight_end; + u32 len; + + new_up_client = *p; + ceph_decode_32_safe(p, end, len, e_inval); + len *= sizeof(u32) + sizeof(struct ceph_entity_addr); + ceph_decode_need(p, end, len, e_inval); + *p += len; + + new_state = *p; + ceph_decode_32_safe(p, end, len, e_inval); + len *= sizeof(u32) + sizeof(u8); + ceph_decode_need(p, end, len, e_inval); + *p += len; + + /* new_weight */ + ceph_decode_32_safe(p, end, len, e_inval); + while (len--) { + s32 osd; + u32 w; + + ceph_decode_need(p, end, 2*sizeof(u32), e_inval); + osd = ceph_decode_32(p); + w = ceph_decode_32(p); + BUG_ON(osd >= map->max_osd); + pr_info("osd%d weight 0x%x %s\n", osd, w, + w == CEPH_OSD_IN ? "(in)" : + (w == CEPH_OSD_OUT ? "(out)" : "")); + map->osd_weight[osd] = w; + + /* + * If we are marking in, set the EXISTS, and clear the + * AUTOOUT and NEW bits. + */ + if (w) { + map->osd_state[osd] |= CEPH_OSD_EXISTS; + map->osd_state[osd] &= ~(CEPH_OSD_AUTOOUT | + CEPH_OSD_NEW); + } + } + new_weight_end = *p; + + /* new_state (up/down) */ + *p = new_state; + len = ceph_decode_32(p); + while (len--) { + s32 osd; + u8 xorstate; + int ret; + + osd = ceph_decode_32(p); + xorstate = ceph_decode_8(p); + if (xorstate == 0) + xorstate = CEPH_OSD_UP; + BUG_ON(osd >= map->max_osd); + if ((map->osd_state[osd] & CEPH_OSD_UP) && + (xorstate & CEPH_OSD_UP)) + pr_info("osd%d down\n", osd); + if ((map->osd_state[osd] & CEPH_OSD_EXISTS) && + (xorstate & CEPH_OSD_EXISTS)) { + pr_info("osd%d does not exist\n", osd); + map->osd_weight[osd] = CEPH_OSD_IN; + ret = set_primary_affinity(map, osd, + CEPH_OSD_DEFAULT_PRIMARY_AFFINITY); + if (ret) + return ret; + memset(map->osd_addr + osd, 0, sizeof(*map->osd_addr)); + map->osd_state[osd] = 0; + } else { + map->osd_state[osd] ^= xorstate; + } + } + + /* new_up_client */ + *p = new_up_client; + len = ceph_decode_32(p); + while (len--) { + s32 osd; + struct ceph_entity_addr addr; + + osd = ceph_decode_32(p); + ceph_decode_copy(p, &addr, sizeof(addr)); + ceph_decode_addr(&addr); + BUG_ON(osd >= map->max_osd); + pr_info("osd%d up\n", osd); + map->osd_state[osd] |= CEPH_OSD_EXISTS | CEPH_OSD_UP; + map->osd_addr[osd] = addr; + } + + *p = new_weight_end; + return 0; + +e_inval: + return -EINVAL; +} + /* * decode and apply an incremental map update. */ @@ -1290,49 +1399,10 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, __remove_pg_pool(&map->pg_pools, pi); } - /* new_up */ - ceph_decode_32_safe(p, end, len, e_inval); - while (len--) { - u32 osd; - struct ceph_entity_addr addr; - ceph_decode_32_safe(p, end, osd, e_inval); - ceph_decode_copy_safe(p, end, &addr, sizeof(addr), e_inval); - ceph_decode_addr(&addr); - pr_info("osd%d up\n", osd); - BUG_ON(osd >= map->max_osd); - map->osd_state[osd] |= CEPH_OSD_UP | CEPH_OSD_EXISTS; - map->osd_addr[osd] = addr; - } - - /* new_state */ - ceph_decode_32_safe(p, end, len, e_inval); - while (len--) { - u32 osd; - u8 xorstate; - ceph_decode_32_safe(p, end, osd, e_inval); - xorstate = **(u8 **)p; - (*p)++; /* clean flag */ - if (xorstate == 0) - xorstate = CEPH_OSD_UP; - if (xorstate & CEPH_OSD_UP) - pr_info("osd%d down\n", osd); - if (osd < map->max_osd) - map->osd_state[osd] ^= xorstate; - } - - /* new_weight */ - ceph_decode_32_safe(p, end, len, e_inval); - while (len--) { - u32 osd, off; - ceph_decode_need(p, end, sizeof(u32)*2, e_inval); - osd = ceph_decode_32(p); - off = ceph_decode_32(p); - pr_info("osd%d weight 0x%x %s\n", osd, off, - off == CEPH_OSD_IN ? "(in)" : - (off == CEPH_OSD_OUT ? "(out)" : "")); - if (osd < map->max_osd) - map->osd_weight[osd] = off; - } + /* new_up_client, new_state, new_weight */ + err = decode_new_up_state_weight(p, end, map); + if (err) + goto bad; /* new_pg_temp */ err = decode_new_pg_temp(p, end, map); diff --git a/kernel/net/core/dev.c b/kernel/net/core/dev.c index 0e17592ad..aa6165e02 100644 --- a/kernel/net/core/dev.c +++ b/kernel/net/core/dev.c @@ -1682,24 +1682,19 @@ EXPORT_SYMBOL_GPL(net_dec_ingress_queue); static struct static_key netstamp_needed __read_mostly; #ifdef HAVE_JUMP_LABEL -/* We are not allowed to call static_key_slow_dec() from irq context - * If net_disable_timestamp() is called from irq context, defer the - * static_key_slow_dec() calls. - */ static atomic_t netstamp_needed_deferred; -#endif - -void net_enable_timestamp(void) +static void netstamp_clear(struct work_struct *work) { -#ifdef HAVE_JUMP_LABEL int deferred = atomic_xchg(&netstamp_needed_deferred, 0); - if (deferred) { - while (--deferred) - static_key_slow_dec(&netstamp_needed); - return; - } + while (deferred--) + static_key_slow_dec(&netstamp_needed); +} +static DECLARE_WORK(netstamp_work, netstamp_clear); #endif + +void net_enable_timestamp(void) +{ static_key_slow_inc(&netstamp_needed); } EXPORT_SYMBOL(net_enable_timestamp); @@ -1707,12 +1702,12 @@ EXPORT_SYMBOL(net_enable_timestamp); void net_disable_timestamp(void) { #ifdef HAVE_JUMP_LABEL - if (in_interrupt()) { - atomic_inc(&netstamp_needed_deferred); - return; - } -#endif + /* net_disable_timestamp() can be called from non process context */ + atomic_inc(&netstamp_needed_deferred); + schedule_work(&netstamp_work); +#else static_key_slow_dec(&netstamp_needed); +#endif } EXPORT_SYMBOL(net_disable_timestamp); @@ -2470,7 +2465,7 @@ int skb_checksum_help(struct sk_buff *skb) goto out; } - *(__sum16 *)(skb->data + offset) = csum_fold(csum); + *(__sum16 *)(skb->data + offset) = csum_fold(csum) ?: CSUM_MANGLED_0; out_set_summed: skb->ip_summed = CHECKSUM_NONE; out: @@ -2658,9 +2653,9 @@ static netdev_features_t harmonize_features(struct sk_buff *skb, if (skb->ip_summed != CHECKSUM_NONE && !can_checksum_protocol(features, type)) { features &= ~NETIF_F_ALL_CSUM; - } else if (illegal_highdma(skb->dev, skb)) { - features &= ~NETIF_F_SG; } + if (illegal_highdma(skb->dev, skb)) + features &= ~NETIF_F_SG; return features; } @@ -2844,6 +2839,7 @@ struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *d } return head; } +EXPORT_SYMBOL_GPL(validate_xmit_skb_list); static void qdisc_pkt_len_init(struct sk_buff *skb) { @@ -3797,6 +3793,22 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb, return skb; } +/** + * netdev_is_rx_handler_busy - check if receive handler is registered + * @dev: device to check + * + * Check if a receive handler is already registered for a given device. + * Return true if there one. + * + * The caller must hold the rtnl_mutex. + */ +bool netdev_is_rx_handler_busy(struct net_device *dev) +{ + ASSERT_RTNL(); + return dev && rtnl_dereference(dev->rx_handler); +} +EXPORT_SYMBOL_GPL(netdev_is_rx_handler_busy); + /** * netdev_rx_handler_register - register receive handler * @dev: device to register a handler for @@ -4249,7 +4261,9 @@ static void skb_gro_reset_offset(struct sk_buff *skb) pinfo->nr_frags && !PageHighMem(skb_frag_page(frag0))) { NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0); - NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(frag0); + NAPI_GRO_CB(skb)->frag0_len = min_t(unsigned int, + skb_frag_size(frag0), + skb->end - skb->tail); } } @@ -4302,7 +4316,8 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff NAPI_GRO_CB(skb)->same_flow = 0; NAPI_GRO_CB(skb)->flush = 0; NAPI_GRO_CB(skb)->free = 0; - NAPI_GRO_CB(skb)->udp_mark = 0; + NAPI_GRO_CB(skb)->encap_mark = 0; + NAPI_GRO_CB(skb)->recursion_counter = 0; NAPI_GRO_CB(skb)->gro_remcsum_start = 0; /* Setup for GRO checksum validation */ @@ -4694,6 +4709,7 @@ void __napi_schedule(struct napi_struct *n) } EXPORT_SYMBOL(__napi_schedule); +#ifndef CONFIG_PREEMPT_RT_FULL /** * __napi_schedule_irqoff - schedule for receive * @n: entry to schedule @@ -4705,6 +4721,7 @@ void __napi_schedule_irqoff(struct napi_struct *n) ____napi_schedule(this_cpu_ptr(&softnet_data), n); } EXPORT_SYMBOL(__napi_schedule_irqoff); +#endif void __napi_complete(struct napi_struct *n) { @@ -4931,13 +4948,21 @@ static void net_rx_action(struct softirq_action *h) struct softnet_data *sd = this_cpu_ptr(&softnet_data); unsigned long time_limit = jiffies + 2; int budget = netdev_budget; + struct sk_buff_head tofree_q; + struct sk_buff *skb; LIST_HEAD(list); LIST_HEAD(repoll); + __skb_queue_head_init(&tofree_q); + local_irq_disable(); + skb_queue_splice_init(&sd->tofree_queue, &tofree_q); list_splice_init(&sd->poll_list, &list); local_irq_enable(); + while ((skb = __skb_dequeue(&tofree_q))) + kfree_skb(skb); + for (;;) { struct napi_struct *n; @@ -5270,6 +5295,7 @@ static inline bool netdev_adjacent_is_neigh_list(struct net_device *dev, static int __netdev_adjacent_dev_insert(struct net_device *dev, struct net_device *adj_dev, + u16 ref_nr, struct list_head *dev_list, void *private, bool master) { @@ -5279,7 +5305,7 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev, adj = __netdev_find_adj(adj_dev, dev_list); if (adj) { - adj->ref_nr++; + adj->ref_nr += ref_nr; return 0; } @@ -5289,7 +5315,7 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev, adj->dev = adj_dev; adj->master = master; - adj->ref_nr = 1; + adj->ref_nr = ref_nr; adj->private = private; dev_hold(adj_dev); @@ -5328,6 +5354,7 @@ free_adj: static void __netdev_adjacent_dev_remove(struct net_device *dev, struct net_device *adj_dev, + u16 ref_nr, struct list_head *dev_list) { struct netdev_adjacent *adj; @@ -5340,10 +5367,10 @@ static void __netdev_adjacent_dev_remove(struct net_device *dev, BUG(); } - if (adj->ref_nr > 1) { - pr_debug("%s to %s ref_nr-- = %d\n", dev->name, adj_dev->name, - adj->ref_nr-1); - adj->ref_nr--; + if (adj->ref_nr > ref_nr) { + pr_debug("%s to %s ref_nr-%d = %d\n", dev->name, adj_dev->name, + ref_nr, adj->ref_nr-ref_nr); + adj->ref_nr -= ref_nr; return; } @@ -5362,21 +5389,22 @@ static void __netdev_adjacent_dev_remove(struct net_device *dev, static int __netdev_adjacent_dev_link_lists(struct net_device *dev, struct net_device *upper_dev, + u16 ref_nr, struct list_head *up_list, struct list_head *down_list, void *private, bool master) { int ret; - ret = __netdev_adjacent_dev_insert(dev, upper_dev, up_list, private, - master); + ret = __netdev_adjacent_dev_insert(dev, upper_dev, ref_nr, up_list, + private, master); if (ret) return ret; - ret = __netdev_adjacent_dev_insert(upper_dev, dev, down_list, private, - false); + ret = __netdev_adjacent_dev_insert(upper_dev, dev, ref_nr, down_list, + private, false); if (ret) { - __netdev_adjacent_dev_remove(dev, upper_dev, up_list); + __netdev_adjacent_dev_remove(dev, upper_dev, ref_nr, up_list); return ret; } @@ -5384,9 +5412,10 @@ static int __netdev_adjacent_dev_link_lists(struct net_device *dev, } static int __netdev_adjacent_dev_link(struct net_device *dev, - struct net_device *upper_dev) + struct net_device *upper_dev, + u16 ref_nr) { - return __netdev_adjacent_dev_link_lists(dev, upper_dev, + return __netdev_adjacent_dev_link_lists(dev, upper_dev, ref_nr, &dev->all_adj_list.upper, &upper_dev->all_adj_list.lower, NULL, false); @@ -5394,17 +5423,19 @@ static int __netdev_adjacent_dev_link(struct net_device *dev, static void __netdev_adjacent_dev_unlink_lists(struct net_device *dev, struct net_device *upper_dev, + u16 ref_nr, struct list_head *up_list, struct list_head *down_list) { - __netdev_adjacent_dev_remove(dev, upper_dev, up_list); - __netdev_adjacent_dev_remove(upper_dev, dev, down_list); + __netdev_adjacent_dev_remove(dev, upper_dev, ref_nr, up_list); + __netdev_adjacent_dev_remove(upper_dev, dev, ref_nr, down_list); } static void __netdev_adjacent_dev_unlink(struct net_device *dev, - struct net_device *upper_dev) + struct net_device *upper_dev, + u16 ref_nr) { - __netdev_adjacent_dev_unlink_lists(dev, upper_dev, + __netdev_adjacent_dev_unlink_lists(dev, upper_dev, ref_nr, &dev->all_adj_list.upper, &upper_dev->all_adj_list.lower); } @@ -5413,17 +5444,17 @@ static int __netdev_adjacent_dev_link_neighbour(struct net_device *dev, struct net_device *upper_dev, void *private, bool master) { - int ret = __netdev_adjacent_dev_link(dev, upper_dev); + int ret = __netdev_adjacent_dev_link(dev, upper_dev, 1); if (ret) return ret; - ret = __netdev_adjacent_dev_link_lists(dev, upper_dev, + ret = __netdev_adjacent_dev_link_lists(dev, upper_dev, 1, &dev->adj_list.upper, &upper_dev->adj_list.lower, private, master); if (ret) { - __netdev_adjacent_dev_unlink(dev, upper_dev); + __netdev_adjacent_dev_unlink(dev, upper_dev, 1); return ret; } @@ -5433,8 +5464,8 @@ static int __netdev_adjacent_dev_link_neighbour(struct net_device *dev, static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev, struct net_device *upper_dev) { - __netdev_adjacent_dev_unlink(dev, upper_dev); - __netdev_adjacent_dev_unlink_lists(dev, upper_dev, + __netdev_adjacent_dev_unlink(dev, upper_dev, 1); + __netdev_adjacent_dev_unlink_lists(dev, upper_dev, 1, &dev->adj_list.upper, &upper_dev->adj_list.lower); } @@ -5486,7 +5517,7 @@ static int __netdev_upper_dev_link(struct net_device *dev, list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) { pr_debug("Interlinking %s with %s, non-neighbour\n", i->dev->name, j->dev->name); - ret = __netdev_adjacent_dev_link(i->dev, j->dev); + ret = __netdev_adjacent_dev_link(i->dev, j->dev, i->ref_nr); if (ret) goto rollback_mesh; } @@ -5496,7 +5527,7 @@ static int __netdev_upper_dev_link(struct net_device *dev, list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) { pr_debug("linking %s's upper device %s with %s\n", upper_dev->name, i->dev->name, dev->name); - ret = __netdev_adjacent_dev_link(dev, i->dev); + ret = __netdev_adjacent_dev_link(dev, i->dev, i->ref_nr); if (ret) goto rollback_upper_mesh; } @@ -5505,7 +5536,7 @@ static int __netdev_upper_dev_link(struct net_device *dev, list_for_each_entry(i, &dev->all_adj_list.lower, list) { pr_debug("linking %s's lower device %s with %s\n", dev->name, i->dev->name, upper_dev->name); - ret = __netdev_adjacent_dev_link(i->dev, upper_dev); + ret = __netdev_adjacent_dev_link(i->dev, upper_dev, i->ref_nr); if (ret) goto rollback_lower_mesh; } @@ -5519,7 +5550,7 @@ rollback_lower_mesh: list_for_each_entry(i, &dev->all_adj_list.lower, list) { if (i == to_i) break; - __netdev_adjacent_dev_unlink(i->dev, upper_dev); + __netdev_adjacent_dev_unlink(i->dev, upper_dev, i->ref_nr); } i = NULL; @@ -5529,7 +5560,7 @@ rollback_upper_mesh: list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) { if (i == to_i) break; - __netdev_adjacent_dev_unlink(dev, i->dev); + __netdev_adjacent_dev_unlink(dev, i->dev, i->ref_nr); } i = j = NULL; @@ -5541,7 +5572,7 @@ rollback_mesh: list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) { if (i == to_i && j == to_j) break; - __netdev_adjacent_dev_unlink(i->dev, j->dev); + __netdev_adjacent_dev_unlink(i->dev, j->dev, i->ref_nr); } if (i == to_i) break; @@ -5625,16 +5656,16 @@ void netdev_upper_dev_unlink(struct net_device *dev, */ list_for_each_entry(i, &dev->all_adj_list.lower, list) list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) - __netdev_adjacent_dev_unlink(i->dev, j->dev); + __netdev_adjacent_dev_unlink(i->dev, j->dev, i->ref_nr); /* remove also the devices itself from lower/upper device * list */ list_for_each_entry(i, &dev->all_adj_list.lower, list) - __netdev_adjacent_dev_unlink(i->dev, upper_dev); + __netdev_adjacent_dev_unlink(i->dev, upper_dev, i->ref_nr); list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) - __netdev_adjacent_dev_unlink(dev, i->dev); + __netdev_adjacent_dev_unlink(dev, i->dev, i->ref_nr); call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev, &changeupper_info.info); diff --git a/kernel/net/core/drop_monitor.c b/kernel/net/core/drop_monitor.c index 252e155c8..a2270188b 100644 --- a/kernel/net/core/drop_monitor.c +++ b/kernel/net/core/drop_monitor.c @@ -80,6 +80,7 @@ static struct sk_buff *reset_per_cpu_data(struct per_cpu_dm_data *data) struct nlattr *nla; struct sk_buff *skb; unsigned long flags; + void *msg_header; al = sizeof(struct net_dm_alert_msg); al += dm_hit_limit * sizeof(struct net_dm_drop_point); @@ -87,21 +88,41 @@ static struct sk_buff *reset_per_cpu_data(struct per_cpu_dm_data *data) skb = genlmsg_new(al, GFP_KERNEL); - if (skb) { - genlmsg_put(skb, 0, 0, &net_drop_monitor_family, - 0, NET_DM_CMD_ALERT); - nla = nla_reserve(skb, NLA_UNSPEC, - sizeof(struct net_dm_alert_msg)); - msg = nla_data(nla); - memset(msg, 0, al); - } else { - mod_timer(&data->send_timer, jiffies + HZ / 10); + if (!skb) + goto err; + + msg_header = genlmsg_put(skb, 0, 0, &net_drop_monitor_family, + 0, NET_DM_CMD_ALERT); + if (!msg_header) { + nlmsg_free(skb); + skb = NULL; + goto err; + } + nla = nla_reserve(skb, NLA_UNSPEC, + sizeof(struct net_dm_alert_msg)); + if (!nla) { + nlmsg_free(skb); + skb = NULL; + goto err; } + msg = nla_data(nla); + memset(msg, 0, al); + goto out; +err: + mod_timer(&data->send_timer, jiffies + HZ / 10); +out: spin_lock_irqsave(&data->lock, flags); swap(data->skb, skb); spin_unlock_irqrestore(&data->lock, flags); + if (skb) { + struct nlmsghdr *nlh = (struct nlmsghdr *)skb->data; + struct genlmsghdr *gnlh = (struct genlmsghdr *)nlmsg_data(nlh); + + genlmsg_end(skb, genlmsg_data(gnlh)); + } + return skb; } diff --git a/kernel/net/core/filter.c b/kernel/net/core/filter.c index 37157c4c1..e94355452 100644 --- a/kernel/net/core/filter.c +++ b/kernel/net/core/filter.c @@ -52,9 +52,10 @@ #include /** - * sk_filter - run a packet through a socket filter + * sk_filter_trim_cap - run a packet through a socket filter * @sk: sock associated with &sk_buff * @skb: buffer to filter + * @cap: limit on how short the eBPF program may trim the packet * * Run the eBPF program and then cut skb->data to correct size returned by * the program. If pkt_len is 0 we toss packet. If skb->len is smaller @@ -63,7 +64,7 @@ * be accepted or -EPERM if the packet should be tossed. * */ -int sk_filter(struct sock *sk, struct sk_buff *skb) +int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap) { int err; struct sk_filter *filter; @@ -84,14 +85,13 @@ int sk_filter(struct sock *sk, struct sk_buff *skb) filter = rcu_dereference(sk->sk_filter); if (filter) { unsigned int pkt_len = bpf_prog_run_save_cb(filter->prog, skb); - - err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM; + err = pkt_len ? pskb_trim(skb, max(cap, pkt_len)) : -EPERM; } rcu_read_unlock(); return err; } -EXPORT_SYMBOL(sk_filter); +EXPORT_SYMBOL(sk_filter_trim_cap); static u64 __skb_get_pay_offset(u64 ctx, u64 a, u64 x, u64 r4, u64 r5) { @@ -1139,7 +1139,8 @@ void bpf_prog_destroy(struct bpf_prog *fp) } EXPORT_SYMBOL_GPL(bpf_prog_destroy); -static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk) +static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk, + bool locked) { struct sk_filter *fp, *old_fp; @@ -1155,10 +1156,8 @@ static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk) return -ENOMEM; } - old_fp = rcu_dereference_protected(sk->sk_filter, - sock_owned_by_user(sk)); + old_fp = rcu_dereference_protected(sk->sk_filter, locked); rcu_assign_pointer(sk->sk_filter, fp); - if (old_fp) sk_filter_uncharge(sk, old_fp); @@ -1175,7 +1174,8 @@ static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk) * occurs or there is insufficient memory for the filter a negative * errno code is returned. On success the return is zero. */ -int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) +int __sk_attach_filter(struct sock_fprog *fprog, struct sock *sk, + bool locked) { unsigned int fsize = bpf_classic_proglen(fprog); unsigned int bpf_fsize = bpf_prog_size(fprog->len); @@ -1213,7 +1213,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) if (IS_ERR(prog)) return PTR_ERR(prog); - err = __sk_attach_prog(prog, sk); + err = __sk_attach_prog(prog, sk, locked); if (err < 0) { __bpf_prog_release(prog); return err; @@ -1221,7 +1221,12 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) return 0; } -EXPORT_SYMBOL_GPL(sk_attach_filter); +EXPORT_SYMBOL_GPL(__sk_attach_filter); + +int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) +{ + return __sk_attach_filter(fprog, sk, sock_owned_by_user(sk)); +} int sk_attach_bpf(u32 ufd, struct sock *sk) { @@ -1240,7 +1245,7 @@ int sk_attach_bpf(u32 ufd, struct sock *sk) return -EINVAL; } - err = __sk_attach_prog(prog, sk); + err = __sk_attach_prog(prog, sk, sock_owned_by_user(sk)); if (err < 0) { bpf_prog_put(prog); return err; @@ -1270,9 +1275,7 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags) */ if (unlikely((u32) offset > 0xffff || len > sizeof(buf))) return -EFAULT; - - if (unlikely(skb_cloned(skb) && - !skb_clone_writable(skb, offset + len))) + if (unlikely(skb_try_make_writable(skb, offset + len))) return -EFAULT; ptr = skb_header_pointer(skb, offset, len, buf); @@ -1316,8 +1319,7 @@ static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags) if (unlikely((u32) offset > 0xffff)) return -EFAULT; - if (unlikely(skb_cloned(skb) && - !skb_clone_writable(skb, offset + sizeof(sum)))) + if (unlikely(skb_try_make_writable(skb, offset + sizeof(sum)))) return -EFAULT; ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum); @@ -1362,9 +1364,7 @@ static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags) if (unlikely((u32) offset > 0xffff)) return -EFAULT; - - if (unlikely(skb_cloned(skb) && - !skb_clone_writable(skb, offset + sizeof(sum)))) + if (unlikely(skb_try_make_writable(skb, offset + sizeof(sum)))) return -EFAULT; ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum); @@ -1549,6 +1549,13 @@ bool bpf_helper_changes_skb_data(void *func) return true; if (func == bpf_skb_vlan_pop) return true; + if (func == bpf_skb_store_bytes) + return true; + if (func == bpf_l3_csum_replace) + return true; + if (func == bpf_l4_csum_replace) + return true; + return false; } @@ -1913,7 +1920,7 @@ static int __init register_sk_filter_ops(void) } late_initcall(register_sk_filter_ops); -int sk_detach_filter(struct sock *sk) +int __sk_detach_filter(struct sock *sk, bool locked) { int ret = -ENOENT; struct sk_filter *filter; @@ -1921,8 +1928,7 @@ int sk_detach_filter(struct sock *sk) if (sock_flag(sk, SOCK_FILTER_LOCKED)) return -EPERM; - filter = rcu_dereference_protected(sk->sk_filter, - sock_owned_by_user(sk)); + filter = rcu_dereference_protected(sk->sk_filter, locked); if (filter) { RCU_INIT_POINTER(sk->sk_filter, NULL); sk_filter_uncharge(sk, filter); @@ -1931,7 +1937,12 @@ int sk_detach_filter(struct sock *sk) return ret; } -EXPORT_SYMBOL_GPL(sk_detach_filter); +EXPORT_SYMBOL_GPL(__sk_detach_filter); + +int sk_detach_filter(struct sock *sk) +{ + return __sk_detach_filter(sk, sock_owned_by_user(sk)); +} int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf, unsigned int len) diff --git a/kernel/net/core/flow_dissector.c b/kernel/net/core/flow_dissector.c index 12e700332..ee9082792 100644 --- a/kernel/net/core/flow_dissector.c +++ b/kernel/net/core/flow_dissector.c @@ -131,7 +131,7 @@ bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_dissector_key_tags *key_tags; struct flow_dissector_key_keyid *key_keyid; u8 ip_proto = 0; - bool ret = false; + bool ret; if (!data) { data = skb->data; @@ -492,12 +492,17 @@ ip_proto_again: out_good: ret = true; -out_bad: + key_control->thoff = (u16)nhoff; +out: key_basic->n_proto = proto; key_basic->ip_proto = ip_proto; - key_control->thoff = (u16)nhoff; return ret; + +out_bad: + ret = false; + key_control->thoff = min_t(u16, nhoff, skb ? skb->len : hlen); + goto out; } EXPORT_SYMBOL(__skb_flow_dissect); @@ -662,6 +667,23 @@ void make_flow_keys_digest(struct flow_keys_digest *digest, } EXPORT_SYMBOL(make_flow_keys_digest); +static struct flow_dissector flow_keys_dissector_symmetric __read_mostly; + +u32 __skb_get_hash_symmetric(struct sk_buff *skb) +{ + struct flow_keys keys; + + __flow_hash_secret_init(); + + memset(&keys, 0, sizeof(keys)); + __skb_flow_dissect(skb, &flow_keys_dissector_symmetric, &keys, + NULL, 0, 0, 0, + FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL); + + return __flow_hash_from_keys(&keys, hashrnd); +} +EXPORT_SYMBOL_GPL(__skb_get_hash_symmetric); + /** * __skb_get_hash: calculate a flow hash * @skb: sk_buff to calculate flow hash from @@ -874,6 +896,29 @@ static const struct flow_dissector_key flow_keys_dissector_keys[] = { }, }; +static const struct flow_dissector_key flow_keys_dissector_symmetric_keys[] = { + { + .key_id = FLOW_DISSECTOR_KEY_CONTROL, + .offset = offsetof(struct flow_keys, control), + }, + { + .key_id = FLOW_DISSECTOR_KEY_BASIC, + .offset = offsetof(struct flow_keys, basic), + }, + { + .key_id = FLOW_DISSECTOR_KEY_IPV4_ADDRS, + .offset = offsetof(struct flow_keys, addrs.v4addrs), + }, + { + .key_id = FLOW_DISSECTOR_KEY_IPV6_ADDRS, + .offset = offsetof(struct flow_keys, addrs.v6addrs), + }, + { + .key_id = FLOW_DISSECTOR_KEY_PORTS, + .offset = offsetof(struct flow_keys, ports), + }, +}; + static const struct flow_dissector_key flow_keys_buf_dissector_keys[] = { { .key_id = FLOW_DISSECTOR_KEY_CONTROL, @@ -895,10 +940,13 @@ static int __init init_default_flow_dissectors(void) skb_flow_dissector_init(&flow_keys_dissector, flow_keys_dissector_keys, ARRAY_SIZE(flow_keys_dissector_keys)); + skb_flow_dissector_init(&flow_keys_dissector_symmetric, + flow_keys_dissector_symmetric_keys, + ARRAY_SIZE(flow_keys_dissector_symmetric_keys)); skb_flow_dissector_init(&flow_keys_buf_dissector, flow_keys_buf_dissector_keys, ARRAY_SIZE(flow_keys_buf_dissector_keys)); return 0; } -late_initcall_sync(init_default_flow_dissectors); +core_initcall(init_default_flow_dissectors); diff --git a/kernel/net/core/neighbour.c b/kernel/net/core/neighbour.c index f18ae91b6..769cece9b 100644 --- a/kernel/net/core/neighbour.c +++ b/kernel/net/core/neighbour.c @@ -2467,13 +2467,17 @@ int neigh_xmit(int index, struct net_device *dev, tbl = neigh_tables[index]; if (!tbl) goto out; + rcu_read_lock_bh(); neigh = __neigh_lookup_noref(tbl, addr, dev); if (!neigh) neigh = __neigh_create(tbl, addr, dev, false); err = PTR_ERR(neigh); - if (IS_ERR(neigh)) + if (IS_ERR(neigh)) { + rcu_read_unlock_bh(); goto out_kfree_skb; + } err = neigh->output(neigh, skb); + rcu_read_unlock_bh(); } else if (index == NEIGH_LINK_TABLE) { err = dev_hard_header(skb, dev, ntohs(skb->protocol), diff --git a/kernel/net/core/net_namespace.c b/kernel/net/core/net_namespace.c index 2c2eb1b62..2e9a1c281 100644 --- a/kernel/net/core/net_namespace.c +++ b/kernel/net/core/net_namespace.c @@ -217,6 +217,8 @@ int peernet2id_alloc(struct net *net, struct net *peer) bool alloc; int id; + if (atomic_read(&net->count) == 0) + return NETNSA_NSID_NOT_ASSIGNED; spin_lock_irqsave(&net->nsid_lock, flags); alloc = atomic_read(&peer->count) == 0 ? false : true; id = __peernet2id_alloc(net, peer, &alloc); diff --git a/kernel/net/core/pktgen.c b/kernel/net/core/pktgen.c index 4da4d51a2..b6327601f 100644 --- a/kernel/net/core/pktgen.c +++ b/kernel/net/core/pktgen.c @@ -215,8 +215,8 @@ #define M_NETIF_RECEIVE 1 /* Inject packets into stack */ /* If lock -- protects updating of if_list */ -#define if_lock(t) spin_lock(&(t->if_lock)); -#define if_unlock(t) spin_unlock(&(t->if_lock)); +#define if_lock(t) mutex_lock(&(t->if_lock)); +#define if_unlock(t) mutex_unlock(&(t->if_lock)); /* Used to help with determining the pkts on receive */ #define PKTGEN_MAGIC 0xbe9be955 @@ -422,7 +422,7 @@ struct pktgen_net { }; struct pktgen_thread { - spinlock_t if_lock; /* for list of devices */ + struct mutex if_lock; /* for list of devices */ struct list_head if_list; /* All device here */ struct list_head th_list; struct task_struct *tsk; @@ -2002,11 +2002,13 @@ static void pktgen_change_name(const struct pktgen_net *pn, struct net_device *d { struct pktgen_thread *t; + mutex_lock(&pktgen_thread_lock); + list_for_each_entry(t, &pn->pktgen_threads, th_list) { struct pktgen_dev *pkt_dev; - rcu_read_lock(); - list_for_each_entry_rcu(pkt_dev, &t->if_list, list) { + if_lock(t); + list_for_each_entry(pkt_dev, &t->if_list, list) { if (pkt_dev->odev != dev) continue; @@ -2021,8 +2023,9 @@ static void pktgen_change_name(const struct pktgen_net *pn, struct net_device *d dev->name); break; } - rcu_read_unlock(); + if_unlock(t); } + mutex_unlock(&pktgen_thread_lock); } static int pktgen_device_event(struct notifier_block *unused, @@ -2278,7 +2281,7 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until) static inline void set_pkt_overhead(struct pktgen_dev *pkt_dev) { - pkt_dev->pkt_overhead = LL_RESERVED_SPACE(pkt_dev->odev); + pkt_dev->pkt_overhead = 0; pkt_dev->pkt_overhead += pkt_dev->nr_labels*sizeof(u32); pkt_dev->pkt_overhead += VLAN_TAG_SIZE(pkt_dev); pkt_dev->pkt_overhead += SVLAN_TAG_SIZE(pkt_dev); @@ -2769,13 +2772,13 @@ static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb, } static struct sk_buff *pktgen_alloc_skb(struct net_device *dev, - struct pktgen_dev *pkt_dev, - unsigned int extralen) + struct pktgen_dev *pkt_dev) { + unsigned int extralen = LL_RESERVED_SPACE(dev); struct sk_buff *skb = NULL; - unsigned int size = pkt_dev->cur_pkt_size + 64 + extralen + - pkt_dev->pkt_overhead; + unsigned int size; + size = pkt_dev->cur_pkt_size + 64 + extralen + pkt_dev->pkt_overhead; if (pkt_dev->flags & F_NODE) { int node = pkt_dev->node >= 0 ? pkt_dev->node : numa_node_id(); @@ -2788,8 +2791,9 @@ static struct sk_buff *pktgen_alloc_skb(struct net_device *dev, skb = __netdev_alloc_skb(dev, size, GFP_NOWAIT); } + /* the caller pre-fetches from skb->data and reserves for the mac hdr */ if (likely(skb)) - skb_reserve(skb, LL_RESERVED_SPACE(dev)); + skb_reserve(skb, extralen - 16); return skb; } @@ -2822,16 +2826,14 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, mod_cur_headers(pkt_dev); queue_map = pkt_dev->cur_queue_map; - datalen = (odev->hard_header_len + 16) & ~0xf; - - skb = pktgen_alloc_skb(odev, pkt_dev, datalen); + skb = pktgen_alloc_skb(odev, pkt_dev); if (!skb) { sprintf(pkt_dev->result, "No memory"); return NULL; } prefetchw(skb->data); - skb_reserve(skb, datalen); + skb_reserve(skb, 16); /* Reserve for ethernet and IP header */ eth = (__u8 *) skb_push(skb, 14); @@ -2951,7 +2953,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, mod_cur_headers(pkt_dev); queue_map = pkt_dev->cur_queue_map; - skb = pktgen_alloc_skb(odev, pkt_dev, 16); + skb = pktgen_alloc_skb(odev, pkt_dev); if (!skb) { sprintf(pkt_dev->result, "No memory"); return NULL; @@ -3727,7 +3729,7 @@ static int __net_init pktgen_create_thread(int cpu, struct pktgen_net *pn) return -ENOMEM; } - spin_lock_init(&t->if_lock); + mutex_init(&t->if_lock); t->cpu = cpu; INIT_LIST_HEAD(&t->if_list); diff --git a/kernel/net/core/rtnetlink.c b/kernel/net/core/rtnetlink.c index 34ba7a088..b94e165a4 100644 --- a/kernel/net/core/rtnetlink.c +++ b/kernel/net/core/rtnetlink.c @@ -905,6 +905,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + rtnl_link_get_af_size(dev, ext_filter_mask) /* IFLA_AF_SPEC */ + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_PORT_ID */ + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_SWITCH_ID */ + + nla_total_size(IFNAMSIZ) /* IFLA_PHYS_PORT_NAME */ + nla_total_size(1); /* IFLA_PROTO_DOWN */ } @@ -1174,14 +1175,16 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb, static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev) { - struct rtnl_link_ifmap map = { - .mem_start = dev->mem_start, - .mem_end = dev->mem_end, - .base_addr = dev->base_addr, - .irq = dev->irq, - .dma = dev->dma, - .port = dev->if_port, - }; + struct rtnl_link_ifmap map; + + memset(&map, 0, sizeof(map)); + map.mem_start = dev->mem_start; + map.mem_end = dev->mem_end; + map.base_addr = dev->base_addr; + map.irq = dev->irq; + map.dma = dev->dma; + map.port = dev->if_port; + if (nla_put(skb, IFLA_MAP, sizeof(map), &map)) return -EMSGSIZE; @@ -2597,7 +2600,10 @@ nla_put_failure: static inline size_t rtnl_fdb_nlmsg_size(void) { - return NLMSG_ALIGN(sizeof(struct ndmsg)) + nla_total_size(ETH_ALEN); + return NLMSG_ALIGN(sizeof(struct ndmsg)) + + nla_total_size(ETH_ALEN) + /* NDA_LLADDR */ + nla_total_size(sizeof(u16)) + /* NDA_VLAN */ + 0; } static void rtnl_fdb_notify(struct net_device *dev, u8 *addr, u16 vid, int type) diff --git a/kernel/net/core/skbuff.c b/kernel/net/core/skbuff.c index 12780dc62..c8d778f40 100644 --- a/kernel/net/core/skbuff.c +++ b/kernel/net/core/skbuff.c @@ -4096,9 +4096,9 @@ struct sk_buff *skb_checksum_trimmed(struct sk_buff *skb, if (!pskb_may_pull(skb_chk, offset)) goto err; - __skb_pull(skb_chk, offset); + skb_pull_rcsum(skb_chk, offset); ret = skb_chkf(skb_chk); - __skb_push(skb_chk, offset); + skb_push_rcsum(skb_chk, offset); if (ret) goto err; @@ -4421,15 +4421,16 @@ int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci) __skb_push(skb, offset); err = __vlan_insert_tag(skb, skb->vlan_proto, skb_vlan_tag_get(skb)); - if (err) + if (err) { + __skb_pull(skb, offset); return err; + } + skb->protocol = skb->vlan_proto; skb->mac_len += VLAN_HLEN; - __skb_pull(skb, offset); - if (skb->ip_summed == CHECKSUM_COMPLETE) - skb->csum = csum_add(skb->csum, csum_partial(skb->data - + (2 * ETH_ALEN), VLAN_HLEN, 0)); + skb_postpush_rcsum(skb, skb->data + (2 * ETH_ALEN), VLAN_HLEN); + __skb_pull(skb, offset); } __vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci); return 0; diff --git a/kernel/net/core/sock.c b/kernel/net/core/sock.c index 9c3234299..03ba64820 100644 --- a/kernel/net/core/sock.c +++ b/kernel/net/core/sock.c @@ -745,7 +745,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname, val = min_t(u32, val, sysctl_wmem_max); set_sndbuf: sk->sk_userlocks |= SOCK_SNDBUF_LOCK; - sk->sk_sndbuf = max_t(u32, val * 2, SOCK_MIN_SNDBUF); + sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF); /* Wake up sending tasks if we upped the value. */ sk->sk_write_space(sk); break; @@ -781,7 +781,7 @@ set_rcvbuf: * returning the value we actually used in getsockopt * is the most desirable behavior. */ - sk->sk_rcvbuf = max_t(u32, val * 2, SOCK_MIN_RCVBUF); + sk->sk_rcvbuf = max_t(int, val * 2, SOCK_MIN_RCVBUF); break; case SO_RCVBUFFORCE: @@ -1562,6 +1562,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) } newsk->sk_err = 0; + newsk->sk_err_soft = 0; newsk->sk_priority = 0; newsk->sk_incoming_cpu = raw_smp_processor_id(); atomic64_set(&newsk->sk_cookie, 0); diff --git a/kernel/net/dccp/ipv4.c b/kernel/net/dccp/ipv4.c index 902d60632..0759f5b91 100644 --- a/kernel/net/dccp/ipv4.c +++ b/kernel/net/dccp/ipv4.c @@ -204,8 +204,6 @@ void dccp_req_err(struct sock *sk, u64 seq) * ICMPs are not backlogged, hence we cannot get an established * socket here. */ - WARN_ON(req->sk); - if (!between48(seq, dccp_rsk(req)->dreq_iss, dccp_rsk(req)->dreq_gss)) { NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); } else { @@ -237,7 +235,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) { const struct iphdr *iph = (struct iphdr *)skb->data; const u8 offset = iph->ihl << 2; - const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset); + const struct dccp_hdr *dh; struct dccp_sock *dp; struct inet_sock *inet; const int type = icmp_hdr(skb)->type; @@ -247,11 +245,13 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) int err; struct net *net = dev_net(skb->dev); - if (skb->len < offset + sizeof(*dh) || - skb->len < offset + __dccp_basic_hdr_len(dh)) { - ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); - return; - } + /* Only need dccph_dport & dccph_sport which are the first + * 4 bytes in dccp header. + * Our caller (icmp_socket_deliver()) already pulled 8 bytes for us. + */ + BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_sport) > 8); + BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_dport) > 8); + dh = (struct dccp_hdr *)(skb->data + offset); sk = __inet_lookup_established(net, &dccp_hashinfo, iph->daddr, dh->dccph_dport, @@ -698,6 +698,7 @@ int dccp_invalid_packet(struct sk_buff *skb) { const struct dccp_hdr *dh; unsigned int cscov; + u8 dccph_doff; if (skb->pkt_type != PACKET_HOST) return 1; @@ -719,18 +720,19 @@ int dccp_invalid_packet(struct sk_buff *skb) /* * If P.Data Offset is too small for packet type, drop packet and return */ - if (dh->dccph_doff < dccp_hdr_len(skb) / sizeof(u32)) { - DCCP_WARN("P.Data Offset(%u) too small\n", dh->dccph_doff); + dccph_doff = dh->dccph_doff; + if (dccph_doff < dccp_hdr_len(skb) / sizeof(u32)) { + DCCP_WARN("P.Data Offset(%u) too small\n", dccph_doff); return 1; } /* * If P.Data Offset is too too large for packet, drop packet and return */ - if (!pskb_may_pull(skb, dh->dccph_doff * sizeof(u32))) { - DCCP_WARN("P.Data Offset(%u) too large\n", dh->dccph_doff); + if (!pskb_may_pull(skb, dccph_doff * sizeof(u32))) { + DCCP_WARN("P.Data Offset(%u) too large\n", dccph_doff); return 1; } - + dh = dccp_hdr(skb); /* * If P.type is not Data, Ack, or DataAck and P.X == 0 (the packet * has short sequence numbers), drop packet and return diff --git a/kernel/net/dccp/ipv6.c b/kernel/net/dccp/ipv6.c index b8608b71a..27c4e81ef 100644 --- a/kernel/net/dccp/ipv6.c +++ b/kernel/net/dccp/ipv6.c @@ -70,7 +70,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data; - const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset); + const struct dccp_hdr *dh; struct dccp_sock *dp; struct ipv6_pinfo *np; struct sock *sk; @@ -78,12 +78,13 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, __u64 seq; struct net *net = dev_net(skb->dev); - if (skb->len < offset + sizeof(*dh) || - skb->len < offset + __dccp_basic_hdr_len(dh)) { - ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev), - ICMP6_MIB_INERRORS); - return; - } + /* Only need dccph_dport & dccph_sport which are the first + * 4 bytes in dccp header. + * Our caller (icmpv6_notify()) already pulled 8 bytes for us. + */ + BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_sport) > 8); + BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_dport) > 8); + dh = (struct dccp_hdr *)(skb->data + offset); sk = __inet6_lookup_established(net, &dccp_hashinfo, &hdr->daddr, dh->dccph_dport, @@ -947,6 +948,7 @@ static const struct inet_connection_sock_af_ops dccp_ipv6_mapped = { .getsockopt = ipv6_getsockopt, .addr2sockaddr = inet6_csk_addr2sockaddr, .sockaddr_len = sizeof(struct sockaddr_in6), + .bind_conflict = inet6_csk_bind_conflict, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_ipv6_setsockopt, .compat_getsockopt = compat_ipv6_getsockopt, diff --git a/kernel/net/dccp/proto.c b/kernel/net/dccp/proto.c index 41e65804d..9fe25bf63 100644 --- a/kernel/net/dccp/proto.c +++ b/kernel/net/dccp/proto.c @@ -1009,6 +1009,10 @@ void dccp_close(struct sock *sk, long timeout) __kfree_skb(skb); } + /* If socket has been already reset kill it. */ + if (sk->sk_state == DCCP_CLOSED) + goto adjudge_to_death; + if (data_was_unread) { /* Unread data was tossed, send an appropriate Reset Code */ DCCP_WARN("ABORT with %u bytes unread\n", data_was_unread); diff --git a/kernel/net/decnet/dn_route.c b/kernel/net/decnet/dn_route.c index 607a14f20..b1dc096d2 100644 --- a/kernel/net/decnet/dn_route.c +++ b/kernel/net/decnet/dn_route.c @@ -1034,10 +1034,13 @@ source_ok: if (!fld.daddr) { fld.daddr = fld.saddr; - err = -EADDRNOTAVAIL; if (dev_out) dev_put(dev_out); + err = -EINVAL; dev_out = init_net.loopback_dev; + if (!dev_out->dn_ptr) + goto out; + err = -EADDRNOTAVAIL; dev_hold(dev_out); if (!fld.daddr) { fld.daddr = @@ -1110,6 +1113,8 @@ source_ok: if (dev_out == NULL) goto out; dn_db = rcu_dereference_raw(dev_out->dn_ptr); + if (!dn_db) + goto e_inval; /* Possible improvement - check all devices for local addr */ if (dn_dev_islocal(dev_out, fld.daddr)) { dev_put(dev_out); @@ -1151,6 +1156,8 @@ select_source: dev_put(dev_out); dev_out = init_net.loopback_dev; dev_hold(dev_out); + if (!dev_out->dn_ptr) + goto e_inval; fld.flowidn_oif = dev_out->ifindex; if (res.fi) dn_fib_info_put(res.fi); diff --git a/kernel/net/dsa/slave.c b/kernel/net/dsa/slave.c index 7bc787b09..8dfe9fb7a 100644 --- a/kernel/net/dsa/slave.c +++ b/kernel/net/dsa/slave.c @@ -1101,6 +1101,8 @@ int dsa_slave_suspend(struct net_device *slave_dev) { struct dsa_slave_priv *p = netdev_priv(slave_dev); + netif_device_detach(slave_dev); + if (p->phy) { phy_stop(p->phy); p->old_pause = -1; diff --git a/kernel/net/ethernet/eth.c b/kernel/net/ethernet/eth.c index 9e63f252a..52dcd414c 100644 --- a/kernel/net/ethernet/eth.c +++ b/kernel/net/ethernet/eth.c @@ -353,6 +353,7 @@ void ether_setup(struct net_device *dev) dev->header_ops = ð_header_ops; dev->type = ARPHRD_ETHER; dev->hard_header_len = ETH_HLEN; + dev->min_header_len = ETH_HLEN; dev->mtu = ETH_DATA_LEN; dev->addr_len = ETH_ALEN; dev->tx_queue_len = 1000; /* Ethernet wants good queues */ @@ -436,7 +437,7 @@ struct sk_buff **eth_gro_receive(struct sk_buff **head, skb_gro_pull(skb, sizeof(*eh)); skb_gro_postpull_rcsum(skb, eh, sizeof(*eh)); - pp = ptype->callbacks.gro_receive(head, skb); + pp = call_gro_receive(ptype->callbacks.gro_receive, head, skb); out_unlock: rcu_read_unlock(); diff --git a/kernel/net/ipv4/af_inet.c b/kernel/net/ipv4/af_inet.c index 5c5db6636..afc18e9ca 100644 --- a/kernel/net/ipv4/af_inet.c +++ b/kernel/net/ipv4/af_inet.c @@ -1372,7 +1372,7 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, skb_gro_pull(skb, sizeof(*iph)); skb_set_transport_header(skb, skb_gro_offset(skb)); - pp = ops->callbacks.gro_receive(head, skb); + pp = call_gro_receive(ops->callbacks.gro_receive, head, skb); out_unlock: rcu_read_unlock(); @@ -1383,6 +1383,19 @@ out: return pp; } +static struct sk_buff **ipip_gro_receive(struct sk_buff **head, + struct sk_buff *skb) +{ + if (NAPI_GRO_CB(skb)->encap_mark) { + NAPI_GRO_CB(skb)->flush = 1; + return NULL; + } + + NAPI_GRO_CB(skb)->encap_mark = 1; + + return inet_gro_receive(head, skb); +} + int inet_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) { if (sk->sk_family == AF_INET) @@ -1425,6 +1438,13 @@ out_unlock: return err; } +static int ipip_gro_complete(struct sk_buff *skb, int nhoff) +{ + skb->encapsulation = 1; + skb_shinfo(skb)->gso_type |= SKB_GSO_IPIP; + return inet_gro_complete(skb, nhoff); +} + int inet_ctl_sock_create(struct sock **sk, unsigned short family, unsigned short type, unsigned char protocol, struct net *net) @@ -1652,8 +1672,8 @@ static struct packet_offload ip_packet_offload __read_mostly = { static const struct net_offload ipip_offload = { .callbacks = { .gso_segment = inet_gso_segment, - .gro_receive = inet_gro_receive, - .gro_complete = inet_gro_complete, + .gro_receive = ipip_gro_receive, + .gro_complete = ipip_gro_complete, }, }; diff --git a/kernel/net/ipv4/cipso_ipv4.c b/kernel/net/ipv4/cipso_ipv4.c index bdb2a07ec..6cc3e1d60 100644 --- a/kernel/net/ipv4/cipso_ipv4.c +++ b/kernel/net/ipv4/cipso_ipv4.c @@ -1657,6 +1657,10 @@ int cipso_v4_validate(const struct sk_buff *skb, unsigned char **option) goto validate_return_locked; } + if (opt_iter + 1 == opt_len) { + err_offset = opt_iter; + goto validate_return_locked; + } tag_len = tag[1]; if (tag_len > (opt_len - opt_iter)) { err_offset = opt_iter + 1; diff --git a/kernel/net/ipv4/devinet.c b/kernel/net/ipv4/devinet.c index f6303b175..0212591b0 100644 --- a/kernel/net/ipv4/devinet.c +++ b/kernel/net/ipv4/devinet.c @@ -334,6 +334,9 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, ASSERT_RTNL(); + if (in_dev->dead) + goto no_promotions; + /* 1. Deleting primary ifaddr forces deletion all secondaries * unless alias promotion is set **/ @@ -380,6 +383,7 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, fib_del_ifaddr(ifa, ifa1); } +no_promotions: /* 2. Unlink it */ *ifap = ifa1->ifa_next; diff --git a/kernel/net/ipv4/esp4.c b/kernel/net/ipv4/esp4.c index 477937465..20fb25e30 100644 --- a/kernel/net/ipv4/esp4.c +++ b/kernel/net/ipv4/esp4.c @@ -23,6 +23,11 @@ struct esp_skb_cb { void *tmp; }; +struct esp_output_extra { + __be32 seqhi; + u32 esphoff; +}; + #define ESP_SKB_CB(__skb) ((struct esp_skb_cb *)&((__skb)->cb[0])) static u32 esp4_get_mtu(struct xfrm_state *x, int mtu); @@ -35,11 +40,11 @@ static u32 esp4_get_mtu(struct xfrm_state *x, int mtu); * * TODO: Use spare space in skb for this where possible. */ -static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags, int seqhilen) +static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags, int extralen) { unsigned int len; - len = seqhilen; + len = extralen; len += crypto_aead_ivsize(aead); @@ -57,15 +62,16 @@ static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags, int seqhilen) return kmalloc(len, GFP_ATOMIC); } -static inline __be32 *esp_tmp_seqhi(void *tmp) +static inline void *esp_tmp_extra(void *tmp) { - return PTR_ALIGN((__be32 *)tmp, __alignof__(__be32)); + return PTR_ALIGN(tmp, __alignof__(struct esp_output_extra)); } -static inline u8 *esp_tmp_iv(struct crypto_aead *aead, void *tmp, int seqhilen) + +static inline u8 *esp_tmp_iv(struct crypto_aead *aead, void *tmp, int extralen) { return crypto_aead_ivsize(aead) ? - PTR_ALIGN((u8 *)tmp + seqhilen, - crypto_aead_alignmask(aead) + 1) : tmp + seqhilen; + PTR_ALIGN((u8 *)tmp + extralen, + crypto_aead_alignmask(aead) + 1) : tmp + extralen; } static inline struct aead_request *esp_tmp_req(struct crypto_aead *aead, u8 *iv) @@ -99,7 +105,7 @@ static void esp_restore_header(struct sk_buff *skb, unsigned int offset) { struct ip_esp_hdr *esph = (void *)(skb->data + offset); void *tmp = ESP_SKB_CB(skb)->tmp; - __be32 *seqhi = esp_tmp_seqhi(tmp); + __be32 *seqhi = esp_tmp_extra(tmp); esph->seq_no = esph->spi; esph->spi = *seqhi; @@ -107,7 +113,11 @@ static void esp_restore_header(struct sk_buff *skb, unsigned int offset) static void esp_output_restore_header(struct sk_buff *skb) { - esp_restore_header(skb, skb_transport_offset(skb) - sizeof(__be32)); + void *tmp = ESP_SKB_CB(skb)->tmp; + struct esp_output_extra *extra = esp_tmp_extra(tmp); + + esp_restore_header(skb, skb_transport_offset(skb) + extra->esphoff - + sizeof(__be32)); } static void esp_output_done_esn(struct crypto_async_request *base, int err) @@ -121,6 +131,7 @@ static void esp_output_done_esn(struct crypto_async_request *base, int err) static int esp_output(struct xfrm_state *x, struct sk_buff *skb) { int err; + struct esp_output_extra *extra; struct ip_esp_hdr *esph; struct crypto_aead *aead; struct aead_request *req; @@ -137,8 +148,7 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) int tfclen; int nfrags; int assoclen; - int seqhilen; - __be32 *seqhi; + int extralen; __be64 seqno; /* skb is pure payload to encrypt */ @@ -166,21 +176,21 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) nfrags = err; assoclen = sizeof(*esph); - seqhilen = 0; + extralen = 0; if (x->props.flags & XFRM_STATE_ESN) { - seqhilen += sizeof(__be32); - assoclen += seqhilen; + extralen += sizeof(*extra); + assoclen += sizeof(__be32); } - tmp = esp_alloc_tmp(aead, nfrags, seqhilen); + tmp = esp_alloc_tmp(aead, nfrags, extralen); if (!tmp) { err = -ENOMEM; goto error; } - seqhi = esp_tmp_seqhi(tmp); - iv = esp_tmp_iv(aead, tmp, seqhilen); + extra = esp_tmp_extra(tmp); + iv = esp_tmp_iv(aead, tmp, extralen); req = esp_tmp_req(aead, iv); sg = esp_req_sg(aead, req); @@ -247,8 +257,10 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) * encryption. */ if ((x->props.flags & XFRM_STATE_ESN)) { - esph = (void *)(skb_transport_header(skb) - sizeof(__be32)); - *seqhi = esph->spi; + extra->esphoff = (unsigned char *)esph - + skb_transport_header(skb); + esph = (struct ip_esp_hdr *)((unsigned char *)esph - 4); + extra->seqhi = esph->spi; esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.hi); aead_request_set_callback(req, 0, esp_output_done_esn, skb); } @@ -445,7 +457,7 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) goto out; ESP_SKB_CB(skb)->tmp = tmp; - seqhi = esp_tmp_seqhi(tmp); + seqhi = esp_tmp_extra(tmp); iv = esp_tmp_iv(aead, tmp, seqhilen); req = esp_tmp_req(aead, iv); sg = esp_req_sg(aead, req); @@ -464,7 +476,7 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) esph = (void *)skb_push(skb, 4); *seqhi = esph->spi; esph->spi = esph->seq_no; - esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.input.hi); + esph->seq_no = XFRM_SKB_CB(skb)->seq.input.hi; aead_request_set_callback(req, 0, esp_input_done_esn, skb); } diff --git a/kernel/net/ipv4/fib_frontend.c b/kernel/net/ipv4/fib_frontend.c index 473447593..4e60dae86 100644 --- a/kernel/net/ipv4/fib_frontend.c +++ b/kernel/net/ipv4/fib_frontend.c @@ -85,7 +85,7 @@ struct fib_table *fib_new_table(struct net *net, u32 id) if (tb) return tb; - if (id == RT_TABLE_LOCAL) + if (id == RT_TABLE_LOCAL && !net->ipv4.fib_has_custom_rules) alias = fib_new_table(net, RT_TABLE_MAIN); tb = fib_trie_table(id, alias); @@ -280,7 +280,6 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb) struct in_device *in_dev; struct fib_result res; struct rtable *rt; - struct flowi4 fl4; struct net *net; int scope; @@ -296,14 +295,13 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb) scope = RT_SCOPE_UNIVERSE; if (!ipv4_is_zeronet(ip_hdr(skb)->saddr)) { - fl4.flowi4_oif = 0; - fl4.flowi4_iif = LOOPBACK_IFINDEX; - fl4.daddr = ip_hdr(skb)->saddr; - fl4.saddr = 0; - fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos); - fl4.flowi4_scope = scope; - fl4.flowi4_mark = IN_DEV_SRC_VMARK(in_dev) ? skb->mark : 0; - fl4.flowi4_tun_key.tun_id = 0; + struct flowi4 fl4 = { + .flowi4_iif = LOOPBACK_IFINDEX, + .daddr = ip_hdr(skb)->saddr, + .flowi4_tos = RT_TOS(ip_hdr(skb)->tos), + .flowi4_scope = scope, + .flowi4_mark = IN_DEV_SRC_VMARK(in_dev) ? skb->mark : 0, + }; if (!fib_lookup(net, &fl4, &res, 0)) return FIB_RES_PREFSRC(net, res); } else { @@ -906,7 +904,11 @@ void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim) if (ifa->ifa_flags & IFA_F_SECONDARY) { prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask); if (!prim) { - pr_warn("%s: bug: prim == NULL\n", __func__); + /* if the device has been deleted, we don't perform + * address promotion + */ + if (!in_dev->dead) + pr_warn("%s: bug: prim == NULL\n", __func__); return; } if (iprim && iprim != prim) { @@ -922,6 +924,9 @@ void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim) subnet = 1; } + if (in_dev->dead) + goto no_promotions; + /* Deletion is more complicated than add. * We should take care of not to delete too much :-) * @@ -997,6 +1002,7 @@ void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim) } } +no_promotions: if (!(ok & BRD_OK)) fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim); if (subnet && ifa->ifa_prefixlen < 31) { diff --git a/kernel/net/ipv4/fib_semantics.c b/kernel/net/ipv4/fib_semantics.c index d97268e8f..67d44aa9e 100644 --- a/kernel/net/ipv4/fib_semantics.c +++ b/kernel/net/ipv4/fib_semantics.c @@ -479,6 +479,9 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, if (!rtnh_ok(rtnh, remaining)) return -EINVAL; + if (rtnh->rtnh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) + return -EINVAL; + nexthop_nh->nh_flags = (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags; nexthop_nh->nh_oif = rtnh->rtnh_ifindex; @@ -975,6 +978,8 @@ fib_convert_metrics(struct fib_info *fi, const struct fib_config *cfg) val = 65535 - 40; if (type == RTAX_MTU && val > 65535 - 15) val = 65535 - 15; + if (type == RTAX_HOPLIMIT && val > 255) + val = 255; if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK)) return -EINVAL; fi->fib_metrics[type - 1] = val; @@ -1001,6 +1006,9 @@ struct fib_info *fib_create_info(struct fib_config *cfg) if (fib_props[cfg->fc_type].scope > cfg->fc_scope) goto err_inval; + if (cfg->fc_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) + goto err_inval; + #ifdef CONFIG_IP_ROUTE_MULTIPATH if (cfg->fc_mp) { nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len); @@ -1269,8 +1277,9 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, nla_put_u32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid)) goto nla_put_failure; #endif - if (fi->fib_nh->nh_lwtstate) - lwtunnel_fill_encap(skb, fi->fib_nh->nh_lwtstate); + if (fi->fib_nh->nh_lwtstate && + lwtunnel_fill_encap(skb, fi->fib_nh->nh_lwtstate) < 0) + goto nla_put_failure; } #ifdef CONFIG_IP_ROUTE_MULTIPATH if (fi->fib_nhs > 1) { @@ -1306,8 +1315,10 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, nla_put_u32(skb, RTA_FLOW, nh->nh_tclassid)) goto nla_put_failure; #endif - if (nh->nh_lwtstate) - lwtunnel_fill_encap(skb, nh->nh_lwtstate); + if (nh->nh_lwtstate && + lwtunnel_fill_encap(skb, nh->nh_lwtstate) < 0) + goto nla_put_failure; + /* length of rtnetlink header + attributes */ rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh; } endfor_nexthops(fi); @@ -1580,8 +1591,13 @@ void fib_select_multipath(struct fib_result *res, int hash) void fib_select_path(struct net *net, struct fib_result *res, struct flowi4 *fl4, int mp_hash) { + bool oif_check; + + oif_check = (fl4->flowi4_oif == 0 || + fl4->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF); + #ifdef CONFIG_IP_ROUTE_MULTIPATH - if (res->fi->fib_nhs > 1 && fl4->flowi4_oif == 0) { + if (res->fi->fib_nhs > 1 && oif_check) { if (mp_hash < 0) mp_hash = get_hash_from_flowi4(fl4) >> 1; @@ -1591,7 +1607,7 @@ void fib_select_path(struct net *net, struct fib_result *res, #endif if (!res->prefixlen && res->table->tb_num_default > 1 && - res->type == RTN_UNICAST && !fl4->flowi4_oif) + res->type == RTN_UNICAST && oif_check) fib_select_default(fl4, res); if (!fl4->saddr) diff --git a/kernel/net/ipv4/fib_trie.c b/kernel/net/ipv4/fib_trie.c index 744e5936c..7c52afb98 100644 --- a/kernel/net/ipv4/fib_trie.c +++ b/kernel/net/ipv4/fib_trie.c @@ -2453,29 +2453,22 @@ struct fib_route_iter { static struct key_vector *fib_route_get_idx(struct fib_route_iter *iter, loff_t pos) { - struct fib_table *tb = iter->main_tb; struct key_vector *l, **tp = &iter->tnode; - struct trie *t; t_key key; - /* use cache location of next-to-find key */ + /* use cached location of previously found key */ if (iter->pos > 0 && pos >= iter->pos) { - pos -= iter->pos; key = iter->key; } else { - t = (struct trie *)tb->tb_data; - iter->tnode = t->kv; - iter->pos = 0; + iter->pos = 1; key = 0; } - while ((l = leaf_walk_rcu(tp, key)) != NULL) { + pos -= iter->pos; + + while ((l = leaf_walk_rcu(tp, key)) && (pos-- > 0)) { key = l->key + 1; iter->pos++; - - if (--pos <= 0) - break; - l = NULL; /* handle unlikely case of a key wrap */ @@ -2484,7 +2477,7 @@ static struct key_vector *fib_route_get_idx(struct fib_route_iter *iter, } if (l) - iter->key = key; /* remember it */ + iter->key = l->key; /* remember it */ else iter->pos = 0; /* forget it */ @@ -2505,14 +2498,14 @@ static void *fib_route_seq_start(struct seq_file *seq, loff_t *pos) return NULL; iter->main_tb = tb; + t = (struct trie *)tb->tb_data; + iter->tnode = t->kv; if (*pos != 0) return fib_route_get_idx(iter, *pos); - t = (struct trie *)tb->tb_data; - iter->tnode = t->kv; iter->pos = 0; - iter->key = 0; + iter->key = KEY_MAX; return SEQ_START_TOKEN; } @@ -2521,7 +2514,7 @@ static void *fib_route_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct fib_route_iter *iter = seq->private; struct key_vector *l = NULL; - t_key key = iter->key; + t_key key = iter->key + 1; ++*pos; @@ -2530,7 +2523,7 @@ static void *fib_route_seq_next(struct seq_file *seq, void *v, loff_t *pos) l = leaf_walk_rcu(&iter->tnode, key); if (l) { - iter->key = l->key + 1; + iter->key = l->key; iter->pos++; } else { iter->pos = 0; diff --git a/kernel/net/ipv4/fou.c b/kernel/net/ipv4/fou.c index bd903fe0f..08d8ee124 100644 --- a/kernel/net/ipv4/fou.c +++ b/kernel/net/ipv4/fou.c @@ -48,7 +48,7 @@ static inline struct fou *fou_from_sock(struct sock *sk) return sk->sk_user_data; } -static void fou_recv_pull(struct sk_buff *skb, size_t len) +static int fou_recv_pull(struct sk_buff *skb, size_t len) { struct iphdr *iph = ip_hdr(skb); @@ -59,6 +59,7 @@ static void fou_recv_pull(struct sk_buff *skb, size_t len) __skb_pull(skb, len); skb_postpull_rcsum(skb, udp_hdr(skb), len); skb_reset_transport_header(skb); + return iptunnel_pull_offloads(skb); } static int fou_udp_recv(struct sock *sk, struct sk_buff *skb) @@ -68,9 +69,14 @@ static int fou_udp_recv(struct sock *sk, struct sk_buff *skb) if (!fou) return 1; - fou_recv_pull(skb, sizeof(struct udphdr)); + if (fou_recv_pull(skb, sizeof(struct udphdr))) + goto drop; return -fou->protocol; + +drop: + kfree_skb(skb); + return 0; } static struct guehdr *gue_remcsum(struct sk_buff *skb, struct guehdr *guehdr, @@ -170,6 +176,9 @@ static int gue_udp_recv(struct sock *sk, struct sk_buff *skb) __skb_pull(skb, sizeof(struct udphdr) + hdrlen); skb_reset_transport_header(skb); + if (iptunnel_pull_offloads(skb)) + goto drop; + return -guehdr->proto_ctype; drop: @@ -192,7 +201,7 @@ static struct sk_buff **fou_gro_receive(struct sk_buff **head, if (!ops || !ops->callbacks.gro_receive) goto out_unlock; - pp = ops->callbacks.gro_receive(head, skb); + pp = call_gro_receive(ops->callbacks.gro_receive, head, skb); out_unlock: rcu_read_unlock(); @@ -351,7 +360,7 @@ static struct sk_buff **gue_gro_receive(struct sk_buff **head, if (WARN_ON_ONCE(!ops || !ops->callbacks.gro_receive)) goto out_unlock; - pp = ops->callbacks.gro_receive(head, skb); + pp = call_gro_receive(ops->callbacks.gro_receive, head, skb); out_unlock: rcu_read_unlock(); diff --git a/kernel/net/ipv4/gre_offload.c b/kernel/net/ipv4/gre_offload.c index 5a8ee3282..79ae0d7be 100644 --- a/kernel/net/ipv4/gre_offload.c +++ b/kernel/net/ipv4/gre_offload.c @@ -128,6 +128,11 @@ static struct sk_buff **gre_gro_receive(struct sk_buff **head, struct packet_offload *ptype; __be16 type; + if (NAPI_GRO_CB(skb)->encap_mark) + goto out; + + NAPI_GRO_CB(skb)->encap_mark = 1; + off = skb_gro_offset(skb); hlen = off + sizeof(*greh); greh = skb_gro_header_fast(skb, off); @@ -214,7 +219,7 @@ static struct sk_buff **gre_gro_receive(struct sk_buff **head, /* Adjusted NAPI_GRO_CB(skb)->csum after skb_gro_pull()*/ skb_gro_postpull_rcsum(skb, greh, grehlen); - pp = ptype->callbacks.gro_receive(head, skb); + pp = call_gro_receive(ptype->callbacks.gro_receive, head, skb); out_unlock: rcu_read_unlock(); diff --git a/kernel/net/ipv4/icmp.c b/kernel/net/ipv4/icmp.c index 74314d95d..ff2593269 100644 --- a/kernel/net/ipv4/icmp.c +++ b/kernel/net/ipv4/icmp.c @@ -78,6 +78,7 @@ #include #include #include +#include #include #include #include @@ -205,6 +206,8 @@ static const struct icmp_control icmp_pointers[NR_ICMP_TYPES+1]; * * On SMP we have one ICMP socket per-cpu. */ +static DEFINE_LOCAL_IRQ_LOCK(icmp_sk_lock); + static struct sock *icmp_sk(struct net *net) { return *this_cpu_ptr(net->ipv4.icmp_sk); @@ -216,12 +219,14 @@ static inline struct sock *icmp_xmit_lock(struct net *net) local_bh_disable(); + local_lock(icmp_sk_lock); sk = icmp_sk(net); if (unlikely(!spin_trylock(&sk->sk_lock.slock))) { /* This can happen if the output path signals a * dst_link_failure() for an outgoing ICMP packet. */ + local_unlock(icmp_sk_lock); local_bh_enable(); return NULL; } @@ -231,6 +236,7 @@ static inline struct sock *icmp_xmit_lock(struct net *net) static inline void icmp_xmit_unlock(struct sock *sk) { spin_unlock_bh(&sk->sk_lock.slock); + local_unlock(icmp_sk_lock); } int sysctl_icmp_msgs_per_sec __read_mostly = 1000; @@ -359,6 +365,7 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param, struct sock *sk; struct sk_buff *skb; + local_lock(icmp_sk_lock); sk = icmp_sk(dev_net((*rt)->dst.dev)); if (ip_append_data(sk, fl4, icmp_glue_bits, icmp_param, icmp_param->data_len+icmp_param->head_len, @@ -381,6 +388,7 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param, skb->ip_summed = CHECKSUM_NONE; ip_push_pending_frames(sk, fl4); } + local_unlock(icmp_sk_lock); } /* diff --git a/kernel/net/ipv4/igmp.c b/kernel/net/ipv4/igmp.c index 05e4cba14..17adfdaf5 100644 --- a/kernel/net/ipv4/igmp.c +++ b/kernel/net/ipv4/igmp.c @@ -225,9 +225,14 @@ static void igmp_start_timer(struct ip_mc_list *im, int max_delay) static void igmp_gq_start_timer(struct in_device *in_dev) { int tv = prandom_u32() % in_dev->mr_maxdelay; + unsigned long exp = jiffies + tv + 2; + + if (in_dev->mr_gq_running && + time_after_eq(exp, (in_dev->mr_gq_timer).expires)) + return; in_dev->mr_gq_running = 1; - if (!mod_timer(&in_dev->mr_gq_timer, jiffies+tv+2)) + if (!mod_timer(&in_dev->mr_gq_timer, exp)) in_dev_hold(in_dev); } @@ -356,9 +361,8 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu) skb_dst_set(skb, &rt->dst); skb->dev = dev; - skb->reserved_tailroom = skb_end_offset(skb) - - min(mtu, skb_end_offset(skb)); skb_reserve(skb, hlen); + skb_tailroom_reserve(skb, mtu, tlen); skb_reset_network_header(skb); pip = ip_hdr(skb); diff --git a/kernel/net/ipv4/ip_gre.c b/kernel/net/ipv4/ip_gre.c index 614521437..3e4184088 100644 --- a/kernel/net/ipv4/ip_gre.c +++ b/kernel/net/ipv4/ip_gre.c @@ -180,6 +180,7 @@ static __be16 tnl_flags_to_gre_flags(__be16 tflags) return flags; } +/* Fills in tpi and returns header length to be pulled. */ static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, bool *csum_err) { @@ -239,7 +240,7 @@ static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, return -EINVAL; } } - return iptunnel_pull_header(skb, hdr_len, tpi->proto); + return hdr_len; } static void ipgre_err(struct sk_buff *skb, u32 info, @@ -342,7 +343,7 @@ static void gre_err(struct sk_buff *skb, u32 info) struct tnl_ptk_info tpi; bool csum_err = false; - if (parse_gre_header(skb, &tpi, &csum_err)) { + if (parse_gre_header(skb, &tpi, &csum_err) < 0) { if (!csum_err) /* ignore csum errors. */ return; } @@ -420,6 +421,7 @@ static int gre_rcv(struct sk_buff *skb) { struct tnl_ptk_info tpi; bool csum_err = false; + int hdr_len; #ifdef CONFIG_NET_IPGRE_BROADCAST if (ipv4_is_multicast(ip_hdr(skb)->daddr)) { @@ -429,7 +431,10 @@ static int gre_rcv(struct sk_buff *skb) } #endif - if (parse_gre_header(skb, &tpi, &csum_err) < 0) + hdr_len = parse_gre_header(skb, &tpi, &csum_err); + if (hdr_len < 0) + goto drop; + if (iptunnel_pull_header(skb, hdr_len, tpi.proto) < 0) goto drop; if (ipgre_rcv(skb, &tpi) == PACKET_RCVD) @@ -1242,6 +1247,14 @@ struct net_device *gretap_fb_dev_create(struct net *net, const char *name, err = ipgre_newlink(net, dev, tb, NULL); if (err < 0) goto out; + + /* openvswitch users expect packet sizes to be unrestricted, + * so set the largest MTU we can. + */ + err = __ip_tunnel_change_mtu(dev, IP_MAX_MTU, false); + if (err) + goto out; + return dev; out: free_netdev(dev); diff --git a/kernel/net/ipv4/ip_output.c b/kernel/net/ipv4/ip_output.c index 49f028563..2b7283303 100644 --- a/kernel/net/ipv4/ip_output.c +++ b/kernel/net/ipv4/ip_output.c @@ -102,6 +102,9 @@ int __ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb) iph->tot_len = htons(skb->len); ip_send_check(iph); + + skb->protocol = htons(ETH_P_IP); + return nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, net, sk, skb, NULL, skb_dst(skb)->dev, dst_output); @@ -1237,13 +1240,16 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page, if (!skb) return -EINVAL; - cork->length += size; if ((size + skb->len > mtu) && (sk->sk_protocol == IPPROTO_UDP) && (rt->dst.dev->features & NETIF_F_UFO)) { + if (skb->ip_summed != CHECKSUM_PARTIAL) + return -EOPNOTSUPP; + skb_shinfo(skb)->gso_size = mtu - fragheaderlen; skb_shinfo(skb)->gso_type = SKB_GSO_UDP; } + cork->length += size; while (size > 0) { if (skb_is_gso(skb)) { diff --git a/kernel/net/ipv4/ip_sockglue.c b/kernel/net/ipv4/ip_sockglue.c index a50124260..bc14c5bb1 100644 --- a/kernel/net/ipv4/ip_sockglue.c +++ b/kernel/net/ipv4/ip_sockglue.c @@ -98,7 +98,7 @@ static void ip_cmsg_recv_retopts(struct msghdr *msg, struct sk_buff *skb) } static void ip_cmsg_recv_checksum(struct msghdr *msg, struct sk_buff *skb, - int offset) + int tlen, int offset) { __wsum csum = skb->csum; @@ -106,7 +106,9 @@ static void ip_cmsg_recv_checksum(struct msghdr *msg, struct sk_buff *skb, return; if (offset != 0) - csum = csum_sub(csum, csum_partial(skb->data, offset, 0)); + csum = csum_sub(csum, + csum_partial(skb->data + tlen, + offset, 0)); put_cmsg(msg, SOL_IP, IP_CHECKSUM, sizeof(__wsum), &csum); } @@ -152,7 +154,7 @@ static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb) } void ip_cmsg_recv_offset(struct msghdr *msg, struct sk_buff *skb, - int offset) + int tlen, int offset) { struct inet_sock *inet = inet_sk(skb->sk); unsigned int flags = inet->cmsg_flags; @@ -215,7 +217,7 @@ void ip_cmsg_recv_offset(struct msghdr *msg, struct sk_buff *skb, } if (flags & IP_CMSG_CHECKSUM) - ip_cmsg_recv_checksum(msg, skb, offset); + ip_cmsg_recv_checksum(msg, skb, tlen, offset); } EXPORT_SYMBOL(ip_cmsg_recv_offset); @@ -1190,7 +1192,14 @@ void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb) pktinfo->ipi_ifindex = 0; pktinfo->ipi_spec_dst.s_addr = 0; } - skb_dst_drop(skb); + /* We need to keep the dst for __ip_options_echo() + * We could restrict the test to opt.ts_needtime || opt.srr, + * but the following is good enough as IP options are not often used. + */ + if (unlikely(IPCB(skb)->opt.optlen)) + skb_dst_force(skb); + else + skb_dst_drop(skb); } int ip_setsockopt(struct sock *sk, int level, diff --git a/kernel/net/ipv4/ip_tunnel.c b/kernel/net/ipv4/ip_tunnel.c index cbb51f3fa..3310ac75e 100644 --- a/kernel/net/ipv4/ip_tunnel.c +++ b/kernel/net/ipv4/ip_tunnel.c @@ -663,6 +663,8 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, inner_iph = (const struct iphdr *)skb_inner_network_header(skb); connected = (tunnel->parms.iph.daddr != 0); + memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); + dst = tnl_params->daddr; if (dst == 0) { /* NBMA tunnel */ @@ -760,7 +762,6 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, tunnel->err_time + IPTUNNEL_ERR_TIMEO)) { tunnel->err_count--; - memset(IPCB(skb), 0, sizeof(*IPCB(skb))); dst_link_failure(skb); } else tunnel->err_count = 0; @@ -947,17 +948,31 @@ done: } EXPORT_SYMBOL_GPL(ip_tunnel_ioctl); -int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu) +int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict) { struct ip_tunnel *tunnel = netdev_priv(dev); int t_hlen = tunnel->hlen + sizeof(struct iphdr); + int max_mtu = 0xFFF8 - dev->hard_header_len - t_hlen; - if (new_mtu < 68 || - new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen) + if (new_mtu < 68) return -EINVAL; + + if (new_mtu > max_mtu) { + if (strict) + return -EINVAL; + + new_mtu = max_mtu; + } + dev->mtu = new_mtu; return 0; } +EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu); + +int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu) +{ + return __ip_tunnel_change_mtu(dev, new_mtu, true); +} EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu); static void ip_tunnel_dev_free(struct net_device *dev) diff --git a/kernel/net/ipv4/ip_tunnel_core.c b/kernel/net/ipv4/ip_tunnel_core.c index 6cb9009c3..dbda05657 100644 --- a/kernel/net/ipv4/ip_tunnel_core.c +++ b/kernel/net/ipv4/ip_tunnel_core.c @@ -116,7 +116,8 @@ int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto) skb->vlan_tci = 0; skb_set_queue_mapping(skb, 0); skb->pkt_type = PACKET_HOST; - return 0; + + return iptunnel_pull_offloads(skb); } EXPORT_SYMBOL_GPL(iptunnel_pull_header); diff --git a/kernel/net/ipv4/ip_vti.c b/kernel/net/ipv4/ip_vti.c index 4d8f0b698..65036891e 100644 --- a/kernel/net/ipv4/ip_vti.c +++ b/kernel/net/ipv4/ip_vti.c @@ -540,6 +540,33 @@ static struct rtnl_link_ops vti_link_ops __read_mostly = { .get_link_net = ip_tunnel_get_link_net, }; +static bool is_vti_tunnel(const struct net_device *dev) +{ + return dev->netdev_ops == &vti_netdev_ops; +} + +static int vti_device_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct ip_tunnel *tunnel = netdev_priv(dev); + + if (!is_vti_tunnel(dev)) + return NOTIFY_DONE; + + switch (event) { + case NETDEV_DOWN: + if (!net_eq(tunnel->net, dev_net(dev))) + xfrm_garbage_collect(tunnel->net); + break; + } + return NOTIFY_DONE; +} + +static struct notifier_block vti_notifier_block __read_mostly = { + .notifier_call = vti_device_event, +}; + static int __init vti_init(void) { const char *msg; @@ -547,6 +574,8 @@ static int __init vti_init(void) pr_info("IPv4 over IPsec tunneling driver\n"); + register_netdevice_notifier(&vti_notifier_block); + msg = "tunnel device"; err = register_pernet_device(&vti_net_ops); if (err < 0) @@ -579,6 +608,7 @@ xfrm_proto_ah_failed: xfrm_proto_esp_failed: unregister_pernet_device(&vti_net_ops); pernet_dev_failed: + unregister_netdevice_notifier(&vti_notifier_block); pr_err("vti init: failed to register %s\n", msg); return err; } @@ -590,6 +620,7 @@ static void __exit vti_fini(void) xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH); xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP); unregister_pernet_device(&vti_net_ops); + unregister_netdevice_notifier(&vti_notifier_block); } module_init(vti_init); diff --git a/kernel/net/ipv4/ipmr.c b/kernel/net/ipv4/ipmr.c index c3a38353f..8e7778654 100644 --- a/kernel/net/ipv4/ipmr.c +++ b/kernel/net/ipv4/ipmr.c @@ -882,8 +882,10 @@ static struct mfc_cache *ipmr_cache_alloc(void) { struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); - if (c) + if (c) { + c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1; c->mfc_un.res.minvif = MAXVIFS; + } return c; } @@ -2190,7 +2192,7 @@ static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, int ipmr_get_route(struct net *net, struct sk_buff *skb, __be32 saddr, __be32 daddr, - struct rtmsg *rtm, int nowait) + struct rtmsg *rtm, int nowait, u32 portid) { struct mfc_cache *cache; struct mr_table *mrt; @@ -2235,6 +2237,7 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb, return -ENOMEM; } + NETLINK_CB(skb2).portid = portid; skb_push(skb2, sizeof(struct iphdr)); skb_reset_network_header(skb2); iph = ip_hdr(skb2); diff --git a/kernel/net/ipv4/netfilter/arp_tables.c b/kernel/net/ipv4/netfilter/arp_tables.c index 11dccba47..6e3e0e8b1 100644 --- a/kernel/net/ipv4/netfilter/arp_tables.c +++ b/kernel/net/ipv4/netfilter/arp_tables.c @@ -359,11 +359,24 @@ unsigned int arpt_do_table(struct sk_buff *skb, } /* All zeroes == unconditional rule. */ -static inline bool unconditional(const struct arpt_arp *arp) +static inline bool unconditional(const struct arpt_entry *e) { static const struct arpt_arp uncond; - return memcmp(arp, &uncond, sizeof(uncond)) == 0; + return e->target_offset == sizeof(struct arpt_entry) && + memcmp(&e->arp, &uncond, sizeof(uncond)) == 0; +} + +static bool find_jump_target(const struct xt_table_info *t, + const struct arpt_entry *target) +{ + struct arpt_entry *iter; + + xt_entry_foreach(iter, t->entries, t->size) { + if (iter == target) + return true; + } + return false; } /* Figures out from what hook each rule can be called: returns 0 if @@ -402,11 +415,10 @@ static int mark_source_chains(const struct xt_table_info *newinfo, |= ((1 << hook) | (1 << NF_ARP_NUMHOOKS)); /* Unconditional return/END. */ - if ((e->target_offset == sizeof(struct arpt_entry) && + if ((unconditional(e) && (strcmp(t->target.u.user.name, XT_STANDARD_TARGET) == 0) && - t->verdict < 0 && unconditional(&e->arp)) || - visited) { + t->verdict < 0) || visited) { unsigned int oldpos, size; if ((strcmp(t->target.u.user.name, @@ -439,6 +451,8 @@ static int mark_source_chains(const struct xt_table_info *newinfo, size = e->next_offset; e = (struct arpt_entry *) (entry0 + pos + size); + if (pos + size >= newinfo->size) + return 0; e->counters.pcnt = pos; pos += size; } else { @@ -458,9 +472,15 @@ static int mark_source_chains(const struct xt_table_info *newinfo, /* This a jump; chase it. */ duprintf("Jump rule %u -> %u\n", pos, newpos); + e = (struct arpt_entry *) + (entry0 + newpos); + if (!find_jump_target(newinfo, e)) + return 0; } else { /* ... this is a fallthru */ newpos = pos + e->next_offset; + if (newpos >= newinfo->size) + return 0; } e = (struct arpt_entry *) (entry0 + newpos); @@ -474,25 +494,6 @@ next: return 1; } -static inline int check_entry(const struct arpt_entry *e, const char *name) -{ - const struct xt_entry_target *t; - - if (!arp_checkentry(&e->arp)) { - duprintf("arp_tables: arp check failed %p %s.\n", e, name); - return -EINVAL; - } - - if (e->target_offset + sizeof(struct xt_entry_target) > e->next_offset) - return -EINVAL; - - t = arpt_get_target_c(e); - if (e->target_offset + t->u.target_size > e->next_offset) - return -EINVAL; - - return 0; -} - static inline int check_target(struct arpt_entry *e, const char *name) { struct xt_entry_target *t = arpt_get_target(e); @@ -522,10 +523,6 @@ find_check_entry(struct arpt_entry *e, const char *name, unsigned int size) struct xt_target *target; int ret; - ret = check_entry(e, name); - if (ret) - return ret; - e->counters.pcnt = xt_percpu_counter_alloc(); if (IS_ERR_VALUE(e->counters.pcnt)) return -ENOMEM; @@ -557,7 +554,7 @@ static bool check_underflow(const struct arpt_entry *e) const struct xt_entry_target *t; unsigned int verdict; - if (!unconditional(&e->arp)) + if (!unconditional(e)) return false; t = arpt_get_target_c(e); if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0) @@ -576,9 +573,11 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e, unsigned int valid_hooks) { unsigned int h; + int err; if ((unsigned long)e % __alignof__(struct arpt_entry) != 0 || - (unsigned char *)e + sizeof(struct arpt_entry) >= limit) { + (unsigned char *)e + sizeof(struct arpt_entry) >= limit || + (unsigned char *)e + e->next_offset > limit) { duprintf("Bad offset %p\n", e); return -EINVAL; } @@ -590,6 +589,14 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e, return -EINVAL; } + if (!arp_checkentry(&e->arp)) + return -EINVAL; + + err = xt_check_entry_offsets(e, e->elems, e->target_offset, + e->next_offset); + if (err) + return err; + /* Check hooks & underflows */ for (h = 0; h < NF_ARP_NUMHOOKS; h++) { if (!(valid_hooks & (1 << h))) @@ -598,9 +605,9 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e, newinfo->hook_entry[h] = hook_entries[h]; if ((unsigned char *)e - base == underflows[h]) { if (!check_underflow(e)) { - pr_err("Underflows must be unconditional and " - "use the STANDARD target with " - "ACCEPT/DROP\n"); + pr_debug("Underflows must be unconditional and " + "use the STANDARD target with " + "ACCEPT/DROP\n"); return -EINVAL; } newinfo->underflow[h] = underflows[h]; @@ -691,10 +698,8 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0, } } - if (!mark_source_chains(newinfo, repl->valid_hooks, entry0)) { - duprintf("Looping hook\n"); + if (!mark_source_chains(newinfo, repl->valid_hooks, entry0)) return -ELOOP; - } /* Finally, each sanity check must pass */ i = 0; @@ -1125,55 +1130,17 @@ static int do_add_counters(struct net *net, const void __user *user, unsigned int i; struct xt_counters_info tmp; struct xt_counters *paddc; - unsigned int num_counters; - const char *name; - int size; - void *ptmp; struct xt_table *t; const struct xt_table_info *private; int ret = 0; struct arpt_entry *iter; unsigned int addend; -#ifdef CONFIG_COMPAT - struct compat_xt_counters_info compat_tmp; - - if (compat) { - ptmp = &compat_tmp; - size = sizeof(struct compat_xt_counters_info); - } else -#endif - { - ptmp = &tmp; - size = sizeof(struct xt_counters_info); - } - if (copy_from_user(ptmp, user, size) != 0) - return -EFAULT; - -#ifdef CONFIG_COMPAT - if (compat) { - num_counters = compat_tmp.num_counters; - name = compat_tmp.name; - } else -#endif - { - num_counters = tmp.num_counters; - name = tmp.name; - } + paddc = xt_copy_counters_from_user(user, len, &tmp, compat); + if (IS_ERR(paddc)) + return PTR_ERR(paddc); - if (len != size + num_counters * sizeof(struct xt_counters)) - return -EINVAL; - - paddc = vmalloc(len - size); - if (!paddc) - return -ENOMEM; - - if (copy_from_user(paddc, user + size, len - size) != 0) { - ret = -EFAULT; - goto free; - } - - t = xt_find_table_lock(net, NFPROTO_ARP, name); + t = xt_find_table_lock(net, NFPROTO_ARP, tmp.name); if (IS_ERR_OR_NULL(t)) { ret = t ? PTR_ERR(t) : -ENOENT; goto free; @@ -1181,7 +1148,7 @@ static int do_add_counters(struct net *net, const void __user *user, local_bh_disable(); private = t->private; - if (private->number != num_counters) { + if (private->number != tmp.num_counters) { ret = -EINVAL; goto unlock_up_free; } @@ -1208,6 +1175,18 @@ static int do_add_counters(struct net *net, const void __user *user, } #ifdef CONFIG_COMPAT +struct compat_arpt_replace { + char name[XT_TABLE_MAXNAMELEN]; + u32 valid_hooks; + u32 num_entries; + u32 size; + u32 hook_entry[NF_ARP_NUMHOOKS]; + u32 underflow[NF_ARP_NUMHOOKS]; + u32 num_counters; + compat_uptr_t counters; + struct compat_arpt_entry entries[0]; +}; + static inline void compat_release_entry(struct compat_arpt_entry *e) { struct xt_entry_target *t; @@ -1216,24 +1195,22 @@ static inline void compat_release_entry(struct compat_arpt_entry *e) module_put(t->u.kernel.target->me); } -static inline int +static int check_compat_entry_size_and_hooks(struct compat_arpt_entry *e, struct xt_table_info *newinfo, unsigned int *size, const unsigned char *base, - const unsigned char *limit, - const unsigned int *hook_entries, - const unsigned int *underflows, - const char *name) + const unsigned char *limit) { struct xt_entry_target *t; struct xt_target *target; unsigned int entry_offset; - int ret, off, h; + int ret, off; duprintf("check_compat_entry_size_and_hooks %p\n", e); if ((unsigned long)e % __alignof__(struct compat_arpt_entry) != 0 || - (unsigned char *)e + sizeof(struct compat_arpt_entry) >= limit) { + (unsigned char *)e + sizeof(struct compat_arpt_entry) >= limit || + (unsigned char *)e + e->next_offset > limit) { duprintf("Bad offset %p, limit = %p\n", e, limit); return -EINVAL; } @@ -1245,8 +1222,11 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e, return -EINVAL; } - /* For purposes of check_entry casting the compat entry is fine */ - ret = check_entry((struct arpt_entry *)e, name); + if (!arp_checkentry(&e->arp)) + return -EINVAL; + + ret = xt_compat_check_entry_offsets(e, e->elems, e->target_offset, + e->next_offset); if (ret) return ret; @@ -1270,17 +1250,6 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e, if (ret) goto release_target; - /* Check hooks & underflows */ - for (h = 0; h < NF_ARP_NUMHOOKS; h++) { - if ((unsigned char *)e - base == hook_entries[h]) - newinfo->hook_entry[h] = hook_entries[h]; - if ((unsigned char *)e - base == underflows[h]) - newinfo->underflow[h] = underflows[h]; - } - - /* Clear counters and comefrom */ - memset(&e->counters, 0, sizeof(e->counters)); - e->comefrom = 0; return 0; release_target: @@ -1289,18 +1258,17 @@ out: return ret; } -static int +static void compat_copy_entry_from_user(struct compat_arpt_entry *e, void **dstptr, - unsigned int *size, const char *name, + unsigned int *size, struct xt_table_info *newinfo, unsigned char *base) { struct xt_entry_target *t; struct xt_target *target; struct arpt_entry *de; unsigned int origsize; - int ret, h; + int h; - ret = 0; origsize = *size; de = (struct arpt_entry *)*dstptr; memcpy(de, e, sizeof(struct arpt_entry)); @@ -1321,148 +1289,82 @@ compat_copy_entry_from_user(struct compat_arpt_entry *e, void **dstptr, if ((unsigned char *)de - base < newinfo->underflow[h]) newinfo->underflow[h] -= origsize - *size; } - return ret; } -static int translate_compat_table(const char *name, - unsigned int valid_hooks, - struct xt_table_info **pinfo, +static int translate_compat_table(struct xt_table_info **pinfo, void **pentry0, - unsigned int total_size, - unsigned int number, - unsigned int *hook_entries, - unsigned int *underflows) + const struct compat_arpt_replace *compatr) { unsigned int i, j; struct xt_table_info *newinfo, *info; void *pos, *entry0, *entry1; struct compat_arpt_entry *iter0; - struct arpt_entry *iter1; + struct arpt_replace repl; unsigned int size; int ret = 0; info = *pinfo; entry0 = *pentry0; - size = total_size; - info->number = number; - - /* Init all hooks to impossible value. */ - for (i = 0; i < NF_ARP_NUMHOOKS; i++) { - info->hook_entry[i] = 0xFFFFFFFF; - info->underflow[i] = 0xFFFFFFFF; - } + size = compatr->size; + info->number = compatr->num_entries; duprintf("translate_compat_table: size %u\n", info->size); j = 0; xt_compat_lock(NFPROTO_ARP); - xt_compat_init_offsets(NFPROTO_ARP, number); + xt_compat_init_offsets(NFPROTO_ARP, compatr->num_entries); /* Walk through entries, checking offsets. */ - xt_entry_foreach(iter0, entry0, total_size) { + xt_entry_foreach(iter0, entry0, compatr->size) { ret = check_compat_entry_size_and_hooks(iter0, info, &size, entry0, - entry0 + total_size, - hook_entries, - underflows, - name); + entry0 + compatr->size); if (ret != 0) goto out_unlock; ++j; } ret = -EINVAL; - if (j != number) { + if (j != compatr->num_entries) { duprintf("translate_compat_table: %u not %u entries\n", - j, number); + j, compatr->num_entries); goto out_unlock; } - /* Check hooks all assigned */ - for (i = 0; i < NF_ARP_NUMHOOKS; i++) { - /* Only hooks which are valid */ - if (!(valid_hooks & (1 << i))) - continue; - if (info->hook_entry[i] == 0xFFFFFFFF) { - duprintf("Invalid hook entry %u %u\n", - i, hook_entries[i]); - goto out_unlock; - } - if (info->underflow[i] == 0xFFFFFFFF) { - duprintf("Invalid underflow %u %u\n", - i, underflows[i]); - goto out_unlock; - } - } - ret = -ENOMEM; newinfo = xt_alloc_table_info(size); if (!newinfo) goto out_unlock; - newinfo->number = number; + newinfo->number = compatr->num_entries; for (i = 0; i < NF_ARP_NUMHOOKS; i++) { newinfo->hook_entry[i] = info->hook_entry[i]; newinfo->underflow[i] = info->underflow[i]; } entry1 = newinfo->entries; pos = entry1; - size = total_size; - xt_entry_foreach(iter0, entry0, total_size) { - ret = compat_copy_entry_from_user(iter0, &pos, &size, - name, newinfo, entry1); - if (ret != 0) - break; - } + size = compatr->size; + xt_entry_foreach(iter0, entry0, compatr->size) + compat_copy_entry_from_user(iter0, &pos, &size, + newinfo, entry1); + + /* all module references in entry0 are now gone */ + xt_compat_flush_offsets(NFPROTO_ARP); xt_compat_unlock(NFPROTO_ARP); - if (ret) - goto free_newinfo; - ret = -ELOOP; - if (!mark_source_chains(newinfo, valid_hooks, entry1)) - goto free_newinfo; + memcpy(&repl, compatr, sizeof(*compatr)); - i = 0; - xt_entry_foreach(iter1, entry1, newinfo->size) { - iter1->counters.pcnt = xt_percpu_counter_alloc(); - if (IS_ERR_VALUE(iter1->counters.pcnt)) { - ret = -ENOMEM; - break; - } - - ret = check_target(iter1, name); - if (ret != 0) { - xt_percpu_counter_free(iter1->counters.pcnt); - break; - } - ++i; - if (strcmp(arpt_get_target(iter1)->u.user.name, - XT_ERROR_TARGET) == 0) - ++newinfo->stacksize; - } - if (ret) { - /* - * The first i matches need cleanup_entry (calls ->destroy) - * because they had called ->check already. The other j-i - * entries need only release. - */ - int skip = i; - j -= i; - xt_entry_foreach(iter0, entry0, newinfo->size) { - if (skip-- > 0) - continue; - if (j-- == 0) - break; - compat_release_entry(iter0); - } - xt_entry_foreach(iter1, entry1, newinfo->size) { - if (i-- == 0) - break; - cleanup_entry(iter1); - } - xt_free_table_info(newinfo); - return ret; + for (i = 0; i < NF_ARP_NUMHOOKS; i++) { + repl.hook_entry[i] = newinfo->hook_entry[i]; + repl.underflow[i] = newinfo->underflow[i]; } + repl.num_counters = 0; + repl.counters = NULL; + repl.size = newinfo->size; + ret = translate_table(newinfo, entry1, &repl); + if (ret) + goto free_newinfo; + *pinfo = newinfo; *pentry0 = entry1; xt_free_table_info(info); @@ -1470,31 +1372,18 @@ static int translate_compat_table(const char *name, free_newinfo: xt_free_table_info(newinfo); -out: - xt_entry_foreach(iter0, entry0, total_size) { + return ret; +out_unlock: + xt_compat_flush_offsets(NFPROTO_ARP); + xt_compat_unlock(NFPROTO_ARP); + xt_entry_foreach(iter0, entry0, compatr->size) { if (j-- == 0) break; compat_release_entry(iter0); } return ret; -out_unlock: - xt_compat_flush_offsets(NFPROTO_ARP); - xt_compat_unlock(NFPROTO_ARP); - goto out; } -struct compat_arpt_replace { - char name[XT_TABLE_MAXNAMELEN]; - u32 valid_hooks; - u32 num_entries; - u32 size; - u32 hook_entry[NF_ARP_NUMHOOKS]; - u32 underflow[NF_ARP_NUMHOOKS]; - u32 num_counters; - compat_uptr_t counters; - struct compat_arpt_entry entries[0]; -}; - static int compat_do_replace(struct net *net, void __user *user, unsigned int len) { @@ -1527,10 +1416,7 @@ static int compat_do_replace(struct net *net, void __user *user, goto free_newinfo; } - ret = translate_compat_table(tmp.name, tmp.valid_hooks, - &newinfo, &loc_cpu_entry, tmp.size, - tmp.num_entries, tmp.hook_entry, - tmp.underflow); + ret = translate_compat_table(&newinfo, &loc_cpu_entry, &tmp); if (ret != 0) goto free_newinfo; diff --git a/kernel/net/ipv4/netfilter/ip_tables.c b/kernel/net/ipv4/netfilter/ip_tables.c index b99affad6..a399c5419 100644 --- a/kernel/net/ipv4/netfilter/ip_tables.c +++ b/kernel/net/ipv4/netfilter/ip_tables.c @@ -168,11 +168,12 @@ get_entry(const void *base, unsigned int offset) /* All zeroes == unconditional rule. */ /* Mildly perf critical (only if packet tracing is on) */ -static inline bool unconditional(const struct ipt_ip *ip) +static inline bool unconditional(const struct ipt_entry *e) { static const struct ipt_ip uncond; - return memcmp(ip, &uncond, sizeof(uncond)) == 0; + return e->target_offset == sizeof(struct ipt_entry) && + memcmp(&e->ip, &uncond, sizeof(uncond)) == 0; #undef FWINV } @@ -229,11 +230,10 @@ get_chainname_rulenum(const struct ipt_entry *s, const struct ipt_entry *e, } else if (s == e) { (*rulenum)++; - if (s->target_offset == sizeof(struct ipt_entry) && + if (unconditional(s) && strcmp(t->target.u.kernel.target->name, XT_STANDARD_TARGET) == 0 && - t->verdict < 0 && - unconditional(&s->ip)) { + t->verdict < 0) { /* Tail of chains: STANDARD target (return/policy) */ *comment = *chainname == hookname ? comments[NF_IP_TRACE_COMMENT_POLICY] @@ -443,6 +443,18 @@ ipt_do_table(struct sk_buff *skb, #endif } +static bool find_jump_target(const struct xt_table_info *t, + const struct ipt_entry *target) +{ + struct ipt_entry *iter; + + xt_entry_foreach(iter, t->entries, t->size) { + if (iter == target) + return true; + } + return false; +} + /* Figures out from what hook each rule can be called: returns 0 if there are loops. Puts hook bitmask in comefrom. */ static int @@ -476,11 +488,10 @@ mark_source_chains(const struct xt_table_info *newinfo, e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS)); /* Unconditional return/END. */ - if ((e->target_offset == sizeof(struct ipt_entry) && + if ((unconditional(e) && (strcmp(t->target.u.user.name, XT_STANDARD_TARGET) == 0) && - t->verdict < 0 && unconditional(&e->ip)) || - visited) { + t->verdict < 0) || visited) { unsigned int oldpos, size; if ((strcmp(t->target.u.user.name, @@ -521,6 +532,8 @@ mark_source_chains(const struct xt_table_info *newinfo, size = e->next_offset; e = (struct ipt_entry *) (entry0 + pos + size); + if (pos + size >= newinfo->size) + return 0; e->counters.pcnt = pos; pos += size; } else { @@ -539,9 +552,15 @@ mark_source_chains(const struct xt_table_info *newinfo, /* This a jump; chase it. */ duprintf("Jump rule %u -> %u\n", pos, newpos); + e = (struct ipt_entry *) + (entry0 + newpos); + if (!find_jump_target(newinfo, e)) + return 0; } else { /* ... this is a fallthru */ newpos = pos + e->next_offset; + if (newpos >= newinfo->size) + return 0; } e = (struct ipt_entry *) (entry0 + newpos); @@ -568,27 +587,6 @@ static void cleanup_match(struct xt_entry_match *m, struct net *net) module_put(par.match->me); } -static int -check_entry(const struct ipt_entry *e, const char *name) -{ - const struct xt_entry_target *t; - - if (!ip_checkentry(&e->ip)) { - duprintf("ip check failed %p %s.\n", e, name); - return -EINVAL; - } - - if (e->target_offset + sizeof(struct xt_entry_target) > - e->next_offset) - return -EINVAL; - - t = ipt_get_target_c(e); - if (e->target_offset + t->u.target_size > e->next_offset) - return -EINVAL; - - return 0; -} - static int check_match(struct xt_entry_match *m, struct xt_mtchk_param *par) { @@ -666,10 +664,6 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name, struct xt_mtchk_param mtpar; struct xt_entry_match *ematch; - ret = check_entry(e, name); - if (ret) - return ret; - e->counters.pcnt = xt_percpu_counter_alloc(); if (IS_ERR_VALUE(e->counters.pcnt)) return -ENOMEM; @@ -721,7 +715,7 @@ static bool check_underflow(const struct ipt_entry *e) const struct xt_entry_target *t; unsigned int verdict; - if (!unconditional(&e->ip)) + if (!unconditional(e)) return false; t = ipt_get_target_c(e); if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0) @@ -741,9 +735,11 @@ check_entry_size_and_hooks(struct ipt_entry *e, unsigned int valid_hooks) { unsigned int h; + int err; if ((unsigned long)e % __alignof__(struct ipt_entry) != 0 || - (unsigned char *)e + sizeof(struct ipt_entry) >= limit) { + (unsigned char *)e + sizeof(struct ipt_entry) >= limit || + (unsigned char *)e + e->next_offset > limit) { duprintf("Bad offset %p\n", e); return -EINVAL; } @@ -755,6 +751,14 @@ check_entry_size_and_hooks(struct ipt_entry *e, return -EINVAL; } + if (!ip_checkentry(&e->ip)) + return -EINVAL; + + err = xt_check_entry_offsets(e, e->elems, e->target_offset, + e->next_offset); + if (err) + return err; + /* Check hooks & underflows */ for (h = 0; h < NF_INET_NUMHOOKS; h++) { if (!(valid_hooks & (1 << h))) @@ -763,9 +767,9 @@ check_entry_size_and_hooks(struct ipt_entry *e, newinfo->hook_entry[h] = hook_entries[h]; if ((unsigned char *)e - base == underflows[h]) { if (!check_underflow(e)) { - pr_err("Underflows must be unconditional and " - "use the STANDARD target with " - "ACCEPT/DROP\n"); + pr_debug("Underflows must be unconditional and " + "use the STANDARD target with " + "ACCEPT/DROP\n"); return -EINVAL; } newinfo->underflow[h] = underflows[h]; @@ -1309,55 +1313,17 @@ do_add_counters(struct net *net, const void __user *user, unsigned int i; struct xt_counters_info tmp; struct xt_counters *paddc; - unsigned int num_counters; - const char *name; - int size; - void *ptmp; struct xt_table *t; const struct xt_table_info *private; int ret = 0; struct ipt_entry *iter; unsigned int addend; -#ifdef CONFIG_COMPAT - struct compat_xt_counters_info compat_tmp; - - if (compat) { - ptmp = &compat_tmp; - size = sizeof(struct compat_xt_counters_info); - } else -#endif - { - ptmp = &tmp; - size = sizeof(struct xt_counters_info); - } - - if (copy_from_user(ptmp, user, size) != 0) - return -EFAULT; - -#ifdef CONFIG_COMPAT - if (compat) { - num_counters = compat_tmp.num_counters; - name = compat_tmp.name; - } else -#endif - { - num_counters = tmp.num_counters; - name = tmp.name; - } - if (len != size + num_counters * sizeof(struct xt_counters)) - return -EINVAL; - - paddc = vmalloc(len - size); - if (!paddc) - return -ENOMEM; + paddc = xt_copy_counters_from_user(user, len, &tmp, compat); + if (IS_ERR(paddc)) + return PTR_ERR(paddc); - if (copy_from_user(paddc, user + size, len - size) != 0) { - ret = -EFAULT; - goto free; - } - - t = xt_find_table_lock(net, AF_INET, name); + t = xt_find_table_lock(net, AF_INET, tmp.name); if (IS_ERR_OR_NULL(t)) { ret = t ? PTR_ERR(t) : -ENOENT; goto free; @@ -1365,7 +1331,7 @@ do_add_counters(struct net *net, const void __user *user, local_bh_disable(); private = t->private; - if (private->number != num_counters) { + if (private->number != tmp.num_counters) { ret = -EINVAL; goto unlock_up_free; } @@ -1444,7 +1410,6 @@ compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr, static int compat_find_calc_match(struct xt_entry_match *m, - const char *name, const struct ipt_ip *ip, int *size) { @@ -1479,21 +1444,19 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e, struct xt_table_info *newinfo, unsigned int *size, const unsigned char *base, - const unsigned char *limit, - const unsigned int *hook_entries, - const unsigned int *underflows, - const char *name) + const unsigned char *limit) { struct xt_entry_match *ematch; struct xt_entry_target *t; struct xt_target *target; unsigned int entry_offset; unsigned int j; - int ret, off, h; + int ret, off; duprintf("check_compat_entry_size_and_hooks %p\n", e); if ((unsigned long)e % __alignof__(struct compat_ipt_entry) != 0 || - (unsigned char *)e + sizeof(struct compat_ipt_entry) >= limit) { + (unsigned char *)e + sizeof(struct compat_ipt_entry) >= limit || + (unsigned char *)e + e->next_offset > limit) { duprintf("Bad offset %p, limit = %p\n", e, limit); return -EINVAL; } @@ -1505,8 +1468,11 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e, return -EINVAL; } - /* For purposes of check_entry casting the compat entry is fine */ - ret = check_entry((struct ipt_entry *)e, name); + if (!ip_checkentry(&e->ip)) + return -EINVAL; + + ret = xt_compat_check_entry_offsets(e, e->elems, + e->target_offset, e->next_offset); if (ret) return ret; @@ -1514,7 +1480,7 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e, entry_offset = (void *)e - (void *)base; j = 0; xt_ematch_foreach(ematch, e) { - ret = compat_find_calc_match(ematch, name, &e->ip, &off); + ret = compat_find_calc_match(ematch, &e->ip, &off); if (ret != 0) goto release_matches; ++j; @@ -1537,17 +1503,6 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e, if (ret) goto out; - /* Check hooks & underflows */ - for (h = 0; h < NF_INET_NUMHOOKS; h++) { - if ((unsigned char *)e - base == hook_entries[h]) - newinfo->hook_entry[h] = hook_entries[h]; - if ((unsigned char *)e - base == underflows[h]) - newinfo->underflow[h] = underflows[h]; - } - - /* Clear counters and comefrom */ - memset(&e->counters, 0, sizeof(e->counters)); - e->comefrom = 0; return 0; out: @@ -1561,19 +1516,18 @@ release_matches: return ret; } -static int +static void compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr, - unsigned int *size, const char *name, + unsigned int *size, struct xt_table_info *newinfo, unsigned char *base) { struct xt_entry_target *t; struct xt_target *target; struct ipt_entry *de; unsigned int origsize; - int ret, h; + int h; struct xt_entry_match *ematch; - ret = 0; origsize = *size; de = (struct ipt_entry *)*dstptr; memcpy(de, e, sizeof(struct ipt_entry)); @@ -1582,201 +1536,105 @@ compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr, *dstptr += sizeof(struct ipt_entry); *size += sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry); - xt_ematch_foreach(ematch, e) { - ret = xt_compat_match_from_user(ematch, dstptr, size); - if (ret != 0) - return ret; - } + xt_ematch_foreach(ematch, e) + xt_compat_match_from_user(ematch, dstptr, size); + de->target_offset = e->target_offset - (origsize - *size); t = compat_ipt_get_target(e); target = t->u.kernel.target; xt_compat_target_from_user(t, dstptr, size); de->next_offset = e->next_offset - (origsize - *size); + for (h = 0; h < NF_INET_NUMHOOKS; h++) { if ((unsigned char *)de - base < newinfo->hook_entry[h]) newinfo->hook_entry[h] -= origsize - *size; if ((unsigned char *)de - base < newinfo->underflow[h]) newinfo->underflow[h] -= origsize - *size; } - return ret; -} - -static int -compat_check_entry(struct ipt_entry *e, struct net *net, const char *name) -{ - struct xt_entry_match *ematch; - struct xt_mtchk_param mtpar; - unsigned int j; - int ret = 0; - - e->counters.pcnt = xt_percpu_counter_alloc(); - if (IS_ERR_VALUE(e->counters.pcnt)) - return -ENOMEM; - - j = 0; - mtpar.net = net; - mtpar.table = name; - mtpar.entryinfo = &e->ip; - mtpar.hook_mask = e->comefrom; - mtpar.family = NFPROTO_IPV4; - xt_ematch_foreach(ematch, e) { - ret = check_match(ematch, &mtpar); - if (ret != 0) - goto cleanup_matches; - ++j; - } - - ret = check_target(e, net, name); - if (ret) - goto cleanup_matches; - return 0; - - cleanup_matches: - xt_ematch_foreach(ematch, e) { - if (j-- == 0) - break; - cleanup_match(ematch, net); - } - - xt_percpu_counter_free(e->counters.pcnt); - - return ret; } static int translate_compat_table(struct net *net, - const char *name, - unsigned int valid_hooks, struct xt_table_info **pinfo, void **pentry0, - unsigned int total_size, - unsigned int number, - unsigned int *hook_entries, - unsigned int *underflows) + const struct compat_ipt_replace *compatr) { unsigned int i, j; struct xt_table_info *newinfo, *info; void *pos, *entry0, *entry1; struct compat_ipt_entry *iter0; - struct ipt_entry *iter1; + struct ipt_replace repl; unsigned int size; int ret; info = *pinfo; entry0 = *pentry0; - size = total_size; - info->number = number; - - /* Init all hooks to impossible value. */ - for (i = 0; i < NF_INET_NUMHOOKS; i++) { - info->hook_entry[i] = 0xFFFFFFFF; - info->underflow[i] = 0xFFFFFFFF; - } + size = compatr->size; + info->number = compatr->num_entries; duprintf("translate_compat_table: size %u\n", info->size); j = 0; xt_compat_lock(AF_INET); - xt_compat_init_offsets(AF_INET, number); + xt_compat_init_offsets(AF_INET, compatr->num_entries); /* Walk through entries, checking offsets. */ - xt_entry_foreach(iter0, entry0, total_size) { + xt_entry_foreach(iter0, entry0, compatr->size) { ret = check_compat_entry_size_and_hooks(iter0, info, &size, entry0, - entry0 + total_size, - hook_entries, - underflows, - name); + entry0 + compatr->size); if (ret != 0) goto out_unlock; ++j; } ret = -EINVAL; - if (j != number) { + if (j != compatr->num_entries) { duprintf("translate_compat_table: %u not %u entries\n", - j, number); + j, compatr->num_entries); goto out_unlock; } - /* Check hooks all assigned */ - for (i = 0; i < NF_INET_NUMHOOKS; i++) { - /* Only hooks which are valid */ - if (!(valid_hooks & (1 << i))) - continue; - if (info->hook_entry[i] == 0xFFFFFFFF) { - duprintf("Invalid hook entry %u %u\n", - i, hook_entries[i]); - goto out_unlock; - } - if (info->underflow[i] == 0xFFFFFFFF) { - duprintf("Invalid underflow %u %u\n", - i, underflows[i]); - goto out_unlock; - } - } - ret = -ENOMEM; newinfo = xt_alloc_table_info(size); if (!newinfo) goto out_unlock; - newinfo->number = number; + newinfo->number = compatr->num_entries; for (i = 0; i < NF_INET_NUMHOOKS; i++) { - newinfo->hook_entry[i] = info->hook_entry[i]; - newinfo->underflow[i] = info->underflow[i]; + newinfo->hook_entry[i] = compatr->hook_entry[i]; + newinfo->underflow[i] = compatr->underflow[i]; } entry1 = newinfo->entries; pos = entry1; - size = total_size; - xt_entry_foreach(iter0, entry0, total_size) { - ret = compat_copy_entry_from_user(iter0, &pos, &size, - name, newinfo, entry1); - if (ret != 0) - break; - } + size = compatr->size; + xt_entry_foreach(iter0, entry0, compatr->size) + compat_copy_entry_from_user(iter0, &pos, &size, + newinfo, entry1); + + /* all module references in entry0 are now gone. + * entry1/newinfo contains a 64bit ruleset that looks exactly as + * generated by 64bit userspace. + * + * Call standard translate_table() to validate all hook_entrys, + * underflows, check for loops, etc. + */ xt_compat_flush_offsets(AF_INET); xt_compat_unlock(AF_INET); - if (ret) - goto free_newinfo; - ret = -ELOOP; - if (!mark_source_chains(newinfo, valid_hooks, entry1)) - goto free_newinfo; + memcpy(&repl, compatr, sizeof(*compatr)); - i = 0; - xt_entry_foreach(iter1, entry1, newinfo->size) { - ret = compat_check_entry(iter1, net, name); - if (ret != 0) - break; - ++i; - if (strcmp(ipt_get_target(iter1)->u.user.name, - XT_ERROR_TARGET) == 0) - ++newinfo->stacksize; - } - if (ret) { - /* - * The first i matches need cleanup_entry (calls ->destroy) - * because they had called ->check already. The other j-i - * entries need only release. - */ - int skip = i; - j -= i; - xt_entry_foreach(iter0, entry0, newinfo->size) { - if (skip-- > 0) - continue; - if (j-- == 0) - break; - compat_release_entry(iter0); - } - xt_entry_foreach(iter1, entry1, newinfo->size) { - if (i-- == 0) - break; - cleanup_entry(iter1, net); - } - xt_free_table_info(newinfo); - return ret; + for (i = 0; i < NF_INET_NUMHOOKS; i++) { + repl.hook_entry[i] = newinfo->hook_entry[i]; + repl.underflow[i] = newinfo->underflow[i]; } + repl.num_counters = 0; + repl.counters = NULL; + repl.size = newinfo->size; + ret = translate_table(net, newinfo, entry1, &repl); + if (ret) + goto free_newinfo; + *pinfo = newinfo; *pentry0 = entry1; xt_free_table_info(info); @@ -1784,17 +1642,16 @@ translate_compat_table(struct net *net, free_newinfo: xt_free_table_info(newinfo); -out: - xt_entry_foreach(iter0, entry0, total_size) { + return ret; +out_unlock: + xt_compat_flush_offsets(AF_INET); + xt_compat_unlock(AF_INET); + xt_entry_foreach(iter0, entry0, compatr->size) { if (j-- == 0) break; compat_release_entry(iter0); } return ret; -out_unlock: - xt_compat_flush_offsets(AF_INET); - xt_compat_unlock(AF_INET); - goto out; } static int @@ -1830,10 +1687,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len) goto free_newinfo; } - ret = translate_compat_table(net, tmp.name, tmp.valid_hooks, - &newinfo, &loc_cpu_entry, tmp.size, - tmp.num_entries, tmp.hook_entry, - tmp.underflow); + ret = translate_compat_table(net, &newinfo, &loc_cpu_entry, &tmp); if (ret != 0) goto free_newinfo; diff --git a/kernel/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c b/kernel/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c index c6eb42100..ea91058b5 100644 --- a/kernel/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c +++ b/kernel/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c @@ -108,10 +108,18 @@ static int masq_inet_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ((struct in_ifaddr *)ptr)->ifa_dev->dev; + struct in_device *idev = ((struct in_ifaddr *)ptr)->ifa_dev; struct netdev_notifier_info info; - netdev_notifier_info_init(&info, dev); + /* The masq_dev_notifier will catch the case of the device going + * down. So if the inetdev is dead and being destroyed we have + * no work to do. Otherwise this is an individual address removal + * and we have to perform the flush. + */ + if (idev->dead) + return NOTIFY_DONE; + + netdev_notifier_info_init(&info, idev->dev); return masq_device_event(this, event, &info); } diff --git a/kernel/net/ipv4/ping.c b/kernel/net/ipv4/ping.c index aa67e0e64..3a00512ad 100644 --- a/kernel/net/ipv4/ping.c +++ b/kernel/net/ipv4/ping.c @@ -645,6 +645,8 @@ static int ping_v4_push_pending_frames(struct sock *sk, struct pingfakehdr *pfh, { struct sk_buff *skb = skb_peek(&sk->sk_write_queue); + if (!skb) + return 0; pfh->wcheck = csum_partial((char *)&pfh->icmph, sizeof(struct icmphdr), pfh->wcheck); pfh->icmph.checksum = csum_fold(pfh->wcheck); @@ -660,6 +662,10 @@ int ping_common_sendmsg(int family, struct msghdr *msg, size_t len, if (len > 0xFFFF) return -EMSGSIZE; + /* Must have at least a full ICMP header. */ + if (len < icmph_len) + return -EINVAL; + /* * Check the flags. */ diff --git a/kernel/net/ipv4/route.c b/kernel/net/ipv4/route.c index 02c62299d..ef2f527a1 100644 --- a/kernel/net/ipv4/route.c +++ b/kernel/net/ipv4/route.c @@ -747,7 +747,9 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow goto reject_redirect; } - n = ipv4_neigh_lookup(&rt->dst, NULL, &new_gw); + n = __ipv4_neigh_lookup(rt->dst.dev, new_gw); + if (!n) + n = neigh_create(&arp_tbl, &new_gw, rt->dst.dev); if (!IS_ERR(n)) { if (!(n->nud_state & NUD_VALID)) { neigh_event_send(n, NULL); @@ -2045,6 +2047,18 @@ static struct rtable *__mkroute_output(const struct fib_result *res, */ if (fi && res->prefixlen < 4) fi = NULL; + } else if ((type == RTN_LOCAL) && (orig_oif != 0) && + (orig_oif != dev_out->ifindex)) { + /* For local routes that require a particular output interface + * we do not want to cache the result. Caching the result + * causes incorrect behaviour when there are multiple source + * addresses on the interface, the end result being that if the + * intended recipient is waiting on that interface for the + * packet he won't receive it because it will be delivered on + * the loopback interface and the IP_PKTINFO ipi_ifindex will + * be set to the loopback interface as well. + */ + fi = NULL; } fnhe = NULL; @@ -2416,7 +2430,7 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, u32 table_id, r->rtm_dst_len = 32; r->rtm_src_len = 0; r->rtm_tos = fl4->flowi4_tos; - r->rtm_table = table_id; + r->rtm_table = table_id < 256 ? table_id : RT_TABLE_COMPAT; if (nla_put_u32(skb, RTA_TABLE, table_id)) goto nla_put_failure; r->rtm_type = rt->rt_type; @@ -2480,7 +2494,8 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, u32 table_id, IPV4_DEVCONF_ALL(net, MC_FORWARDING)) { int err = ipmr_get_route(net, skb, fl4->saddr, fl4->daddr, - r, nowait); + r, nowait, portid); + if (err <= 0) { if (!nowait) { if (err == 0) diff --git a/kernel/net/ipv4/sysctl_net_ipv4.c b/kernel/net/ipv4/sysctl_net_ipv4.c index 1866f9102..1bcd43670 100644 --- a/kernel/net/ipv4/sysctl_net_ipv4.c +++ b/kernel/net/ipv4/sysctl_net_ipv4.c @@ -97,11 +97,11 @@ static void inet_get_ping_group_range_table(struct ctl_table *table, kgid_t *low container_of(table->data, struct net, ipv4.ping_group_range.range); unsigned int seq; do { - seq = read_seqbegin(&net->ipv4.ip_local_ports.lock); + seq = read_seqbegin(&net->ipv4.ping_group_range.lock); *low = data[0]; *high = data[1]; - } while (read_seqretry(&net->ipv4.ip_local_ports.lock, seq)); + } while (read_seqretry(&net->ipv4.ping_group_range.lock, seq)); } /* Update system visible IP port range */ @@ -110,10 +110,10 @@ static void set_ping_group_range(struct ctl_table *table, kgid_t low, kgid_t hig kgid_t *data = table->data; struct net *net = container_of(table->data, struct net, ipv4.ping_group_range.range); - write_seqlock(&net->ipv4.ip_local_ports.lock); + write_seqlock(&net->ipv4.ping_group_range.lock); data[0] = low; data[1] = high; - write_sequnlock(&net->ipv4.ip_local_ports.lock); + write_sequnlock(&net->ipv4.ping_group_range.lock); } /* Validate changes from /proc interface. */ diff --git a/kernel/net/ipv4/tcp.c b/kernel/net/ipv4/tcp.c index 036a76ba2..600dcda84 100644 --- a/kernel/net/ipv4/tcp.c +++ b/kernel/net/ipv4/tcp.c @@ -783,6 +783,12 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos, ret = -EAGAIN; break; } + /* if __tcp_splice_read() got nothing while we have + * an skb in receive queue, we do not want to loop. + * This might happen with URG data. + */ + if (!skb_queue_empty(&sk->sk_receive_queue)) + break; sk_wait_data(sk, &timeo, NULL); if (signal_pending(current)) { ret = sock_intr_errno(timeo); @@ -1212,7 +1218,7 @@ new_segment: if (!skb_can_coalesce(skb, i, pfrag->page, pfrag->offset)) { - if (i == sysctl_max_skb_frags || !sg) { + if (i >= sysctl_max_skb_frags || !sg) { tcp_mark_push(tp, skb); goto new_segment; } diff --git a/kernel/net/ipv4/tcp_dctcp.c b/kernel/net/ipv4/tcp_dctcp.c index 7e538f71f..55d7da1d2 100644 --- a/kernel/net/ipv4/tcp_dctcp.c +++ b/kernel/net/ipv4/tcp_dctcp.c @@ -56,6 +56,7 @@ struct dctcp { u32 next_seq; u32 ce_state; u32 delayed_ack_reserved; + u32 loss_cwnd; }; static unsigned int dctcp_shift_g __read_mostly = 4; /* g = 1/2^4 */ @@ -96,6 +97,7 @@ static void dctcp_init(struct sock *sk) ca->dctcp_alpha = min(dctcp_alpha_on_init, DCTCP_MAX_ALPHA); ca->delayed_ack_reserved = 0; + ca->loss_cwnd = 0; ca->ce_state = 0; dctcp_reset(tp, ca); @@ -111,9 +113,10 @@ static void dctcp_init(struct sock *sk) static u32 dctcp_ssthresh(struct sock *sk) { - const struct dctcp *ca = inet_csk_ca(sk); + struct dctcp *ca = inet_csk_ca(sk); struct tcp_sock *tp = tcp_sk(sk); + ca->loss_cwnd = tp->snd_cwnd; return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->dctcp_alpha) >> 11U), 2U); } @@ -308,12 +311,20 @@ static size_t dctcp_get_info(struct sock *sk, u32 ext, int *attr, return 0; } +static u32 dctcp_cwnd_undo(struct sock *sk) +{ + const struct dctcp *ca = inet_csk_ca(sk); + + return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd); +} + static struct tcp_congestion_ops dctcp __read_mostly = { .init = dctcp_init, .in_ack_event = dctcp_update_alpha, .cwnd_event = dctcp_cwnd_event, .ssthresh = dctcp_ssthresh, .cong_avoid = tcp_reno_cong_avoid, + .undo_cwnd = dctcp_cwnd_undo, .set_state = dctcp_state, .get_info = dctcp_get_info, .flags = TCP_CONG_NEEDS_ECN, diff --git a/kernel/net/ipv4/tcp_fastopen.c b/kernel/net/ipv4/tcp_fastopen.c index 55be6ac70..fca618272 100644 --- a/kernel/net/ipv4/tcp_fastopen.c +++ b/kernel/net/ipv4/tcp_fastopen.c @@ -112,7 +112,7 @@ static bool tcp_fastopen_cookie_gen(struct request_sock *req, struct tcp_fastopen_cookie tmp; if (__tcp_fastopen_cookie_gen(&ip6h->saddr, &tmp)) { - struct in6_addr *buf = (struct in6_addr *) tmp.val; + struct in6_addr *buf = &tmp.addr; int i; for (i = 0; i < 4; i++) @@ -161,6 +161,7 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk, * scaled. So correct it appropriately. */ tp->snd_wnd = ntohs(tcp_hdr(skb)->window); + tp->max_window = tp->snd_wnd; /* Activate the retrans timer so that SYNACK can be retransmitted. * The request socket is not added to the ehash diff --git a/kernel/net/ipv4/tcp_input.c b/kernel/net/ipv4/tcp_input.c index d4c511584..7cc0f8aac 100644 --- a/kernel/net/ipv4/tcp_input.c +++ b/kernel/net/ipv4/tcp_input.c @@ -89,7 +89,7 @@ int sysctl_tcp_adv_win_scale __read_mostly = 1; EXPORT_SYMBOL(sysctl_tcp_adv_win_scale); /* rfc5961 challenge ack rate limiting */ -int sysctl_tcp_challenge_ack_limit = 100; +int sysctl_tcp_challenge_ack_limit = 1000; int sysctl_tcp_stdurg __read_mostly; int sysctl_tcp_rfc1337 __read_mostly; @@ -2324,10 +2324,9 @@ static void DBGUNDO(struct sock *sk, const char *msg) } #if IS_ENABLED(CONFIG_IPV6) else if (sk->sk_family == AF_INET6) { - struct ipv6_pinfo *np = inet6_sk(sk); pr_debug("Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n", msg, - &np->daddr, ntohs(inet->inet_dport), + &sk->sk_v6_daddr, ntohs(inet->inet_dport), tp->snd_cwnd, tcp_left_out(tp), tp->snd_ssthresh, tp->prior_ssthresh, tp->packets_out); @@ -3390,6 +3389,23 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32 return flag; } +static bool __tcp_oow_rate_limited(struct net *net, int mib_idx, + u32 *last_oow_ack_time) +{ + if (*last_oow_ack_time) { + s32 elapsed = (s32)(tcp_time_stamp - *last_oow_ack_time); + + if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) { + NET_INC_STATS_BH(net, mib_idx); + return true; /* rate-limited: don't send yet! */ + } + } + + *last_oow_ack_time = tcp_time_stamp; + + return false; /* not rate-limited: go ahead, send dupack now! */ +} + /* Return true if we're currently rate-limiting out-of-window ACKs and * thus shouldn't send a dupack right now. We rate-limit dupacks in * response to out-of-window SYNs or ACKs to mitigate ACK loops or DoS @@ -3403,21 +3419,9 @@ bool tcp_oow_rate_limited(struct net *net, const struct sk_buff *skb, /* Data packets without SYNs are not likely part of an ACK loop. */ if ((TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq) && !tcp_hdr(skb)->syn) - goto not_rate_limited; - - if (*last_oow_ack_time) { - s32 elapsed = (s32)(tcp_time_stamp - *last_oow_ack_time); - - if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) { - NET_INC_STATS_BH(net, mib_idx); - return true; /* rate-limited: don't send yet! */ - } - } - - *last_oow_ack_time = tcp_time_stamp; + return false; -not_rate_limited: - return false; /* not rate-limited: go ahead, send dupack now! */ + return __tcp_oow_rate_limited(net, mib_idx, last_oow_ack_time); } /* RFC 5961 7 [ACK Throttling] */ @@ -3427,21 +3431,26 @@ static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb) static u32 challenge_timestamp; static unsigned int challenge_count; struct tcp_sock *tp = tcp_sk(sk); - u32 now; + u32 count, now; /* First check our per-socket dupack rate limit. */ - if (tcp_oow_rate_limited(sock_net(sk), skb, - LINUX_MIB_TCPACKSKIPPEDCHALLENGE, - &tp->last_oow_ack_time)) + if (__tcp_oow_rate_limited(sock_net(sk), + LINUX_MIB_TCPACKSKIPPEDCHALLENGE, + &tp->last_oow_ack_time)) return; - /* Then check the check host-wide RFC 5961 rate limit. */ + /* Then check host-wide RFC 5961 rate limit. */ now = jiffies / HZ; if (now != challenge_timestamp) { + u32 half = (sysctl_tcp_challenge_ack_limit + 1) >> 1; + challenge_timestamp = now; - challenge_count = 0; + WRITE_ONCE(challenge_count, half + + prandom_u32_max(sysctl_tcp_challenge_ack_limit)); } - if (++challenge_count <= sysctl_tcp_challenge_ack_limit) { + count = READ_ONCE(challenge_count); + if (count > 0) { + WRITE_ONCE(challenge_count, count - 1); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK); tcp_send_ack(sk); } diff --git a/kernel/net/ipv4/tcp_ipv4.c b/kernel/net/ipv4/tcp_ipv4.c index 8c7e63163..e90f2507a 100644 --- a/kernel/net/ipv4/tcp_ipv4.c +++ b/kernel/net/ipv4/tcp_ipv4.c @@ -62,6 +62,7 @@ #include #include #include +#include #include #include @@ -320,8 +321,6 @@ void tcp_req_err(struct sock *sk, u32 seq, bool abort) /* ICMPs are not backlogged, hence we cannot get * an established socket here. */ - WARN_ON(req->sk); - if (seq != tcp_rsk(req)->snt_isn) { NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); } else if (abort) { @@ -568,6 +567,7 @@ void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb) } EXPORT_SYMBOL(tcp_v4_send_check); +static DEFINE_LOCAL_IRQ_LOCK(tcp_sk_lock); /* * This routine will send an RST to the other tcp. * @@ -689,10 +689,13 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) arg.bound_dev_if = sk->sk_bound_dev_if; arg.tos = ip_hdr(skb)->tos; + + local_lock(tcp_sk_lock); ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk), skb, &TCP_SKB_CB(skb)->header.h4.opt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len); + local_unlock(tcp_sk_lock); TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS); @@ -774,10 +777,12 @@ static void tcp_v4_send_ack(struct net *net, if (oif) arg.bound_dev_if = oif; arg.tos = tos; + local_lock(tcp_sk_lock); ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk), skb, &TCP_SKB_CB(skb)->header.h4.opt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len); + local_unlock(tcp_sk_lock); TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); } @@ -810,8 +815,14 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq = (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt; + /* RFC 7323 2.3 + * The window field (SEG.WND) of every outgoing segment, with the + * exception of segments, MUST be right-shifted by + * Rcv.Wind.Shift bits: + */ tcp_v4_send_ack(sock_net(sk), skb, seq, - tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd, + tcp_rsk(req)->rcv_nxt, + req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale, tcp_time_stamp, req->ts_recent, 0, @@ -1529,6 +1540,21 @@ bool tcp_prequeue(struct sock *sk, struct sk_buff *skb) } EXPORT_SYMBOL(tcp_prequeue); +int tcp_filter(struct sock *sk, struct sk_buff *skb) +{ + struct tcphdr *th = (struct tcphdr *)skb->data; + unsigned int eaten = skb->len; + int err; + + err = sk_filter_trim_cap(sk, skb, th->doff * 4); + if (!err) { + eaten -= skb->len; + TCP_SKB_CB(skb)->end_seq -= eaten; + } + return err; +} +EXPORT_SYMBOL(tcp_filter); + /* * From tcp_input.c */ @@ -1634,8 +1660,10 @@ process: nf_reset(skb); - if (sk_filter(sk, skb)) + if (tcp_filter(sk, skb)) goto discard_and_relse; + th = (const struct tcphdr *)skb->data; + iph = ip_hdr(skb); skb->dev = NULL; diff --git a/kernel/net/ipv4/tcp_metrics.c b/kernel/net/ipv4/tcp_metrics.c index c8cbc2b4b..a726d7853 100644 --- a/kernel/net/ipv4/tcp_metrics.c +++ b/kernel/net/ipv4/tcp_metrics.c @@ -550,7 +550,7 @@ reset: */ if (crtt > tp->srtt_us) { /* Set RTO like tcp_rtt_estimator(), but from cached RTT. */ - crtt /= 8 * USEC_PER_MSEC; + crtt /= 8 * USEC_PER_SEC / HZ; inet_csk(sk)->icsk_rto = crtt + max(2 * crtt, tcp_rto_min(sk)); } else if (tp->srtt_us == 0) { /* RFC6298: 5.7 We've failed to get a valid RTT sample from diff --git a/kernel/net/ipv4/tcp_minisocks.c b/kernel/net/ipv4/tcp_minisocks.c index ac6b1961f..9475a2748 100644 --- a/kernel/net/ipv4/tcp_minisocks.c +++ b/kernel/net/ipv4/tcp_minisocks.c @@ -458,7 +458,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->rcv_wup = newtp->copied_seq = newtp->rcv_nxt = treq->rcv_isn + 1; - newtp->segs_in = 0; + newtp->segs_in = 1; newtp->snd_sml = newtp->snd_una = newtp->snd_nxt = newtp->snd_up = treq->snt_isn + 1; @@ -818,6 +818,7 @@ int tcp_child_process(struct sock *parent, struct sock *child, int ret = 0; int state = child->sk_state; + tcp_sk(child)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs); if (!sock_owned_by_user(child)) { ret = tcp_rcv_state_process(child, skb); /* Wakeup parent, send SIGIO */ diff --git a/kernel/net/ipv4/tcp_output.c b/kernel/net/ipv4/tcp_output.c index 9bfc39ff2..de95714d0 100644 --- a/kernel/net/ipv4/tcp_output.c +++ b/kernel/net/ipv4/tcp_output.c @@ -239,7 +239,8 @@ void tcp_select_initial_window(int __space, __u32 mss, /* Set window scaling on max possible window * See RFC1323 for an explanation of the limit to 14 */ - space = max_t(u32, sysctl_tcp_rmem[2], sysctl_rmem_max); + space = max_t(u32, space, sysctl_tcp_rmem[2]); + space = max_t(u32, space, sysctl_rmem_max); space = min_t(u32, space, *window_clamp); while (space > 65535 && (*rcv_wscale) < 14) { space >>= 1; @@ -1949,12 +1950,14 @@ static int tcp_mtu_probe(struct sock *sk) len = 0; tcp_for_write_queue_from_safe(skb, next, sk) { copy = min_t(int, skb->len, probe_size - len); - if (nskb->ip_summed) + if (nskb->ip_summed) { skb_copy_bits(skb, 0, skb_put(nskb, copy), copy); - else - nskb->csum = skb_copy_and_csum_bits(skb, 0, - skb_put(nskb, copy), - copy, nskb->csum); + } else { + __wsum csum = skb_copy_and_csum_bits(skb, 0, + skb_put(nskb, copy), + copy, 0); + nskb->csum = csum_block_add(nskb->csum, csum, len); + } if (skb->len <= copy) { /* We've eaten all the data from this skb. @@ -2380,9 +2383,11 @@ u32 __tcp_select_window(struct sock *sk) int full_space = min_t(int, tp->window_clamp, allowed_space); int window; - if (mss > full_space) + if (unlikely(mss > full_space)) { mss = full_space; - + if (mss <= 0) + return 0; + } if (free_space < (full_space >> 1)) { icsk->icsk_ack.quick = 0; @@ -2568,7 +2573,8 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) * copying overhead: fragmentation, tunneling, mangling etc. */ if (atomic_read(&sk->sk_wmem_alloc) > - min(sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), sk->sk_sndbuf)) + min_t(u32, sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), + sk->sk_sndbuf)) return -EAGAIN; if (skb_still_in_host_queue(sk, skb)) @@ -2625,8 +2631,10 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) */ if (unlikely((NET_IP_ALIGN && ((unsigned long)skb->data & 3)) || skb_headroom(skb) >= 0xFFFF)) { - struct sk_buff *nskb = __pskb_copy(skb, MAX_TCP_HEADER, - GFP_ATOMIC); + struct sk_buff *nskb; + + skb_mstamp_get(&skb->skb_mstamp); + nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC); err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) : -ENOBUFS; } else { diff --git a/kernel/net/ipv4/tcp_yeah.c b/kernel/net/ipv4/tcp_yeah.c index 3e6a472e6..92ab5bc91 100644 --- a/kernel/net/ipv4/tcp_yeah.c +++ b/kernel/net/ipv4/tcp_yeah.c @@ -75,7 +75,7 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 acked) if (!tcp_is_cwnd_limited(sk)) return; - if (tp->snd_cwnd <= tp->snd_ssthresh) + if (tcp_in_slow_start(tp)) tcp_slow_start(tp, acked); else if (!yeah->doing_reno_now) { diff --git a/kernel/net/ipv4/udp.c b/kernel/net/ipv4/udp.c index 7f8ab46ad..e9513e397 100644 --- a/kernel/net/ipv4/udp.c +++ b/kernel/net/ipv4/udp.c @@ -1275,6 +1275,7 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, int peeked, off = 0; int err; int is_udplite = IS_UDPLITE(sk); + bool checksum_valid = false; bool slow; if (flags & MSG_ERRQUEUE) @@ -1300,11 +1301,12 @@ try_again: */ if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) { - if (udp_lib_checksum_complete(skb)) + checksum_valid = !udp_lib_checksum_complete(skb); + if (!checksum_valid) goto csum_copy_err; } - if (skb_csum_unnecessary(skb)) + if (checksum_valid || skb_csum_unnecessary(skb)) err = skb_copy_datagram_msg(skb, sizeof(struct udphdr), msg, copied); else { @@ -1340,7 +1342,7 @@ try_again: *addr_len = sizeof(*sin); } if (inet->cmsg_flags) - ip_cmsg_recv_offset(msg, skb, sizeof(struct udphdr)); + ip_cmsg_recv_offset(msg, skb, sizeof(struct udphdr), off); err = copied; if (flags & MSG_TRUNC) @@ -1531,7 +1533,7 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) /* if we're overly short, let UDP handle it */ encap_rcv = ACCESS_ONCE(up->encap_rcv); - if (skb->len > sizeof(struct udphdr) && encap_rcv) { + if (encap_rcv) { int ret; /* Verify checksum before giving to encap */ @@ -1990,10 +1992,14 @@ void udp_v4_early_demux(struct sk_buff *skb) if (!in_dev) return; - ours = ip_check_mc_rcu(in_dev, iph->daddr, iph->saddr, - iph->protocol); - if (!ours) - return; + /* we are supposed to accept bcast packets */ + if (skb->pkt_type == PACKET_MULTICAST) { + ours = ip_check_mc_rcu(in_dev, iph->daddr, iph->saddr, + iph->protocol); + if (!ours) + return; + } + sk = __udp4_lib_mcast_demux_lookup(net, uh->dest, iph->daddr, uh->source, iph->saddr, dif); } else if (skb->pkt_type == PACKET_HOST) { diff --git a/kernel/net/ipv4/udp_offload.c b/kernel/net/ipv4/udp_offload.c index f9386160c..6396f1c80 100644 --- a/kernel/net/ipv4/udp_offload.c +++ b/kernel/net/ipv4/udp_offload.c @@ -299,14 +299,14 @@ struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb, unsigned int off = skb_gro_offset(skb); int flush = 1; - if (NAPI_GRO_CB(skb)->udp_mark || + if (NAPI_GRO_CB(skb)->encap_mark || (skb->ip_summed != CHECKSUM_PARTIAL && NAPI_GRO_CB(skb)->csum_cnt == 0 && !NAPI_GRO_CB(skb)->csum_valid)) goto out; - /* mark that this skb passed once through the udp gro layer */ - NAPI_GRO_CB(skb)->udp_mark = 1; + /* mark that this skb passed once through the tunnel gro layer */ + NAPI_GRO_CB(skb)->encap_mark = 1; rcu_read_lock(); uo_priv = rcu_dereference(udp_offload_base); @@ -339,8 +339,8 @@ unflush: skb_gro_pull(skb, sizeof(struct udphdr)); /* pull encapsulating udp header */ skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr)); NAPI_GRO_CB(skb)->proto = uo_priv->offload->ipproto; - pp = uo_priv->offload->callbacks.gro_receive(head, skb, - uo_priv->offload); + pp = call_gro_receive_udp(uo_priv->offload->callbacks.gro_receive, + head, skb, uo_priv->offload); out_unlock: rcu_read_unlock(); diff --git a/kernel/net/ipv4/udp_tunnel.c b/kernel/net/ipv4/udp_tunnel.c index aba428626..280a9bded 100644 --- a/kernel/net/ipv4/udp_tunnel.c +++ b/kernel/net/ipv4/udp_tunnel.c @@ -89,6 +89,8 @@ int udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb, uh->source = src_port; uh->len = htons(skb->len); + memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); + udp_set_csum(nocheck, skb, src, dst, skb->len); return iptunnel_xmit(sk, rt, skb, src, dst, IPPROTO_UDP, diff --git a/kernel/net/ipv6/addrconf.c b/kernel/net/ipv6/addrconf.c index e8d3da081..253186a35 100644 --- a/kernel/net/ipv6/addrconf.c +++ b/kernel/net/ipv6/addrconf.c @@ -1898,6 +1898,7 @@ errdad: spin_unlock_bh(&ifp->lock); addrconf_mod_dad_work(ifp, 0); + in6_ifa_put(ifp); } /* Join to solicited addr multicast group. @@ -2915,7 +2916,7 @@ static void init_loopback(struct net_device *dev) * lo device down, release this obsolete dst and * reallocate a new router for ifa. */ - if (sp_ifa->rt->dst.obsolete > 0) { + if (!atomic_read(&sp_ifa->rt->rt6i_ref)) { ip6_rt_put(sp_ifa->rt); sp_ifa->rt = NULL; } else { @@ -3609,6 +3610,7 @@ static void addrconf_dad_work(struct work_struct *w) addrconf_dad_begin(ifp); goto out; } else if (action == DAD_ABORT) { + in6_ifa_hold(ifp); addrconf_dad_stop(ifp, 1); goto out; } @@ -5242,8 +5244,7 @@ static void addrconf_disable_change(struct net *net, __s32 newf) struct net_device *dev; struct inet6_dev *idev; - rcu_read_lock(); - for_each_netdev_rcu(net, dev) { + for_each_netdev(net, dev) { idev = __in6_dev_get(dev); if (idev) { int changed = (!idev->cnf.disable_ipv6) ^ (!newf); @@ -5252,7 +5253,6 @@ static void addrconf_disable_change(struct net *net, __s32 newf) dev_disable_change(idev); } } - rcu_read_unlock(); } static int addrconf_disable_ipv6(struct ctl_table *table, int *p, int newf) diff --git a/kernel/net/ipv6/esp6.c b/kernel/net/ipv6/esp6.c index 060a60b2f..111ba55fd 100644 --- a/kernel/net/ipv6/esp6.c +++ b/kernel/net/ipv6/esp6.c @@ -418,7 +418,7 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) esph = (void *)skb_push(skb, 4); *seqhi = esph->spi; esph->spi = esph->seq_no; - esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.input.hi); + esph->seq_no = XFRM_SKB_CB(skb)->seq.input.hi; aead_request_set_callback(req, 0, esp_input_done_esn, skb); } diff --git a/kernel/net/ipv6/exthdrs_core.c b/kernel/net/ipv6/exthdrs_core.c index 5c5d23e59..9508a20fb 100644 --- a/kernel/net/ipv6/exthdrs_core.c +++ b/kernel/net/ipv6/exthdrs_core.c @@ -257,7 +257,11 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, *fragoff = _frag_off; return hp->nexthdr; } - return -ENOENT; + if (!found) + return -ENOENT; + if (fragoff) + *fragoff = _frag_off; + break; } hdrlen = 8; } else if (nexthdr == NEXTHDR_AUTH) { diff --git a/kernel/net/ipv6/ip6_fib.c b/kernel/net/ipv6/ip6_fib.c index 0c7e276c2..34cf46d74 100644 --- a/kernel/net/ipv6/ip6_fib.c +++ b/kernel/net/ipv6/ip6_fib.c @@ -179,6 +179,7 @@ static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt) } } + free_percpu(non_pcpu_rt->rt6i_pcpu); non_pcpu_rt->rt6i_pcpu = NULL; } diff --git a/kernel/net/ipv6/ip6_gre.c b/kernel/net/ipv6/ip6_gre.c index e5ea177d3..e89135828 100644 --- a/kernel/net/ipv6/ip6_gre.c +++ b/kernel/net/ipv6/ip6_gre.c @@ -55,6 +55,7 @@ #include #include #include +#include static bool log_ecn_error = true; @@ -367,35 +368,37 @@ static void ip6gre_tunnel_uninit(struct net_device *dev) static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - u8 type, u8 code, int offset, __be32 info) + u8 type, u8 code, int offset, __be32 info) { - const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)skb->data; - __be16 *p = (__be16 *)(skb->data + offset); - int grehlen = offset + 4; + const struct gre_base_hdr *greh; + const struct ipv6hdr *ipv6h; + int grehlen = sizeof(*greh); struct ip6_tnl *t; + int key_off = 0; __be16 flags; + __be32 key; - flags = p[0]; - if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) { - if (flags&(GRE_VERSION|GRE_ROUTING)) - return; - if (flags&GRE_KEY) { - grehlen += 4; - if (flags&GRE_CSUM) - grehlen += 4; - } + if (!pskb_may_pull(skb, offset + grehlen)) + return; + greh = (const struct gre_base_hdr *)(skb->data + offset); + flags = greh->flags; + if (flags & (GRE_VERSION | GRE_ROUTING)) + return; + if (flags & GRE_CSUM) + grehlen += 4; + if (flags & GRE_KEY) { + key_off = grehlen + offset; + grehlen += 4; } - /* If only 8 bytes returned, keyed message will be dropped here */ - if (!pskb_may_pull(skb, grehlen)) + if (!pskb_may_pull(skb, offset + grehlen)) return; ipv6h = (const struct ipv6hdr *)skb->data; - p = (__be16 *)(skb->data + offset); + greh = (const struct gre_base_hdr *)(skb->data + offset); + key = key_off ? *(__be32 *)(skb->data + key_off) : 0; t = ip6gre_tunnel_lookup(skb->dev, &ipv6h->daddr, &ipv6h->saddr, - flags & GRE_KEY ? - *(((__be32 *)p) + (grehlen / 4) - 1) : 0, - p[1]); + key, greh->protocol); if (!t) return; @@ -778,6 +781,8 @@ static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev) __u32 mtu; int err; + memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); + if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) encap_limit = t->parms.encap_limit; @@ -884,7 +889,6 @@ static int ip6gre_xmit_other(struct sk_buff *skb, struct net_device *dev) encap_limit = t->parms.encap_limit; memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); - fl6.flowi6_proto = skb->protocol; err = ip6gre_xmit2(skb, dev, 0, &fl6, encap_limit, &mtu); diff --git a/kernel/net/ipv6/ip6_offload.c b/kernel/net/ipv6/ip6_offload.c index eeca943f1..225f5f7f2 100644 --- a/kernel/net/ipv6/ip6_offload.c +++ b/kernel/net/ipv6/ip6_offload.c @@ -196,6 +196,7 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head, ops = rcu_dereference(inet6_offloads[proto]); if (!ops || !ops->callbacks.gro_receive) { __pskb_pull(skb, skb_gro_offset(skb)); + skb_gro_frag0_invalidate(skb); proto = ipv6_gso_pull_exthdrs(skb, proto); skb_gro_pull(skb, -skb_transport_offset(skb)); skb_reset_transport_header(skb); @@ -247,7 +248,7 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head, skb_gro_postpull_rcsum(skb, iph, nlen); - pp = ops->callbacks.gro_receive(head, skb); + pp = call_gro_receive(ops->callbacks.gro_receive, head, skb); out_unlock: rcu_read_unlock(); @@ -258,6 +259,19 @@ out: return pp; } +static struct sk_buff **sit_gro_receive(struct sk_buff **head, + struct sk_buff *skb) +{ + if (NAPI_GRO_CB(skb)->encap_mark) { + NAPI_GRO_CB(skb)->flush = 1; + return NULL; + } + + NAPI_GRO_CB(skb)->encap_mark = 1; + + return ipv6_gro_receive(head, skb); +} + static int ipv6_gro_complete(struct sk_buff *skb, int nhoff) { const struct net_offload *ops; @@ -302,7 +316,7 @@ static struct packet_offload ipv6_packet_offload __read_mostly = { static const struct net_offload sit_offload = { .callbacks = { .gso_segment = ipv6_gso_segment, - .gro_receive = ipv6_gro_receive, + .gro_receive = sit_gro_receive, .gro_complete = sit_gro_complete, }, }; diff --git a/kernel/net/ipv6/ip6_output.c b/kernel/net/ipv6/ip6_output.c index 31144c486..58900c21e 100644 --- a/kernel/net/ipv6/ip6_output.c +++ b/kernel/net/ipv6/ip6_output.c @@ -1072,17 +1072,12 @@ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, const struct in6_addr *final_dst) { struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie); - int err; dst = ip6_sk_dst_check(sk, dst, fl6); + if (!dst) + dst = ip6_dst_lookup_flow(sk, fl6, final_dst); - err = ip6_dst_lookup_tail(sock_net(sk), sk, &dst, fl6); - if (err) - return ERR_PTR(err); - if (final_dst) - fl6->daddr = *final_dst; - - return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); + return dst; } EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow); @@ -1091,8 +1086,8 @@ static inline int ip6_ufo_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, int length, int hh_len, int fragheaderlen, - int transhdrlen, int mtu, unsigned int flags, - const struct flowi6 *fl6) + int exthdrlen, int transhdrlen, int mtu, + unsigned int flags, const struct flowi6 *fl6) { struct sk_buff *skb; @@ -1117,7 +1112,7 @@ static inline int ip6_ufo_append_data(struct sock *sk, skb_put(skb, fragheaderlen + transhdrlen); /* initialize network header pointer */ - skb_reset_network_header(skb); + skb_set_network_header(skb, exthdrlen); /* initialize protocol header pointer */ skb->transport_header = skb->network_header + fragheaderlen; @@ -1359,7 +1354,7 @@ emsgsize: (rt->dst.dev->features & NETIF_F_UFO) && (sk->sk_type == SOCK_DGRAM) && !udp_get_no_check6_tx(sk)) { err = ip6_ufo_append_data(sk, queue, getfrag, from, length, - hh_len, fragheaderlen, + hh_len, fragheaderlen, exthdrlen, transhdrlen, mtu, flags, fl6); if (err) goto error; diff --git a/kernel/net/ipv6/ip6_tunnel.c b/kernel/net/ipv6/ip6_tunnel.c index 137fca42a..6c6161763 100644 --- a/kernel/net/ipv6/ip6_tunnel.c +++ b/kernel/net/ipv6/ip6_tunnel.c @@ -246,6 +246,7 @@ ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_ hash = HASH(&any, local); for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) { if (ipv6_addr_equal(local, &t->parms.laddr) && + ipv6_addr_any(&t->parms.raddr) && (t->dev->flags & IFF_UP)) return t; } @@ -253,6 +254,7 @@ ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_ hash = HASH(remote, &any); for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) { if (ipv6_addr_equal(remote, &t->parms.raddr) && + ipv6_addr_any(&t->parms.laddr) && (t->dev->flags & IFF_UP)) return t; } @@ -343,12 +345,12 @@ static int ip6_tnl_create2(struct net_device *dev) t = netdev_priv(dev); + dev->rtnl_link_ops = &ip6_link_ops; err = register_netdevice(dev); if (err < 0) goto out; strcpy(t->parms.name, dev->name); - dev->rtnl_link_ops = &ip6_link_ops; dev_hold(dev); ip6_tnl_link(ip6n, t); @@ -477,18 +479,19 @@ ip6_tnl_dev_uninit(struct net_device *dev) __u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw) { - const struct ipv6hdr *ipv6h = (const struct ipv6hdr *) raw; - __u8 nexthdr = ipv6h->nexthdr; - __u16 off = sizeof(*ipv6h); + const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)raw; + unsigned int nhoff = raw - skb->data; + unsigned int off = nhoff + sizeof(*ipv6h); + u8 next, nexthdr = ipv6h->nexthdr; while (ipv6_ext_hdr(nexthdr) && nexthdr != NEXTHDR_NONE) { - __u16 optlen = 0; struct ipv6_opt_hdr *hdr; - if (raw + off + sizeof(*hdr) > skb->data && - !pskb_may_pull(skb, raw - skb->data + off + sizeof (*hdr))) + u16 optlen; + + if (!pskb_may_pull(skb, off + sizeof(*hdr))) break; - hdr = (struct ipv6_opt_hdr *) (raw + off); + hdr = (struct ipv6_opt_hdr *)(skb->data + off); if (nexthdr == NEXTHDR_FRAGMENT) { struct frag_hdr *frag_hdr = (struct frag_hdr *) hdr; if (frag_hdr->frag_off) @@ -499,20 +502,29 @@ __u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw) } else { optlen = ipv6_optlen(hdr); } + /* cache hdr->nexthdr, since pskb_may_pull() might + * invalidate hdr + */ + next = hdr->nexthdr; if (nexthdr == NEXTHDR_DEST) { - __u16 i = off + 2; + u16 i = 2; + + /* Remember : hdr is no longer valid at this point. */ + if (!pskb_may_pull(skb, off + optlen)) + break; + while (1) { struct ipv6_tlv_tnl_enc_lim *tel; /* No more room for encapsulation limit */ - if (i + sizeof (*tel) > off + optlen) + if (i + sizeof(*tel) > optlen) break; - tel = (struct ipv6_tlv_tnl_enc_lim *) &raw[i]; + tel = (struct ipv6_tlv_tnl_enc_lim *)(skb->data + off + i); /* return index of option if found and valid */ if (tel->type == IPV6_TLV_TNL_ENCAP_LIMIT && tel->length == 1) - return i; + return i + off - nhoff; /* else jump to next option */ if (tel->type) i += tel->length + 2; @@ -520,7 +532,7 @@ __u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw) i++; } } - nexthdr = hdr->nexthdr; + nexthdr = next; off += optlen; } return 0; @@ -1041,6 +1053,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, struct ipv6_tel_txoption opt; struct dst_entry *dst = NULL, *ndst = NULL; struct net_device *tdev; + bool use_cache = false; int mtu; unsigned int max_headroom = sizeof(struct ipv6hdr); u8 proto; @@ -1068,7 +1081,15 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr)); neigh_release(neigh); - } else if (!fl6->flowi6_mark) + } else if (!(t->parms.flags & + (IP6_TNL_F_USE_ORIG_TCLASS | IP6_TNL_F_USE_ORIG_FWMARK))) { + /* enable the cache only only if the routing decision does + * not depend on the current inner header value + */ + use_cache = true; + } + + if (use_cache) dst = ip6_tnl_dst_get(t); if (!ip6_tnl_xmit_ctl(t, &fl6->saddr, &fl6->daddr)) @@ -1132,7 +1153,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, skb = new_skb; } - if (!fl6->flowi6_mark && ndst) + if (use_cache && ndst) ip6_tnl_dst_set(t, ndst); skb_dst_set(skb, dst); @@ -1180,6 +1201,8 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) u8 tproto; int err; + memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); + tproto = ACCESS_ONCE(t->parms.proto); if (tproto != IPPROTO_IPIP && tproto != 0) return -1; diff --git a/kernel/net/ipv6/ip6mr.c b/kernel/net/ipv6/ip6mr.c index a10e77103..d9843e5a6 100644 --- a/kernel/net/ipv6/ip6mr.c +++ b/kernel/net/ipv6/ip6mr.c @@ -1074,6 +1074,7 @@ static struct mfc6_cache *ip6mr_cache_alloc(void) struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); if (!c) return NULL; + c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1; c->mfc_un.res.minvif = MAXMIFS; return c; } @@ -2275,8 +2276,8 @@ static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, return 1; } -int ip6mr_get_route(struct net *net, - struct sk_buff *skb, struct rtmsg *rtm, int nowait) +int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm, + int nowait, u32 portid) { int err; struct mr6_table *mrt; @@ -2321,6 +2322,7 @@ int ip6mr_get_route(struct net *net, return -ENOMEM; } + NETLINK_CB(skb2).portid = portid; skb_reset_transport_header(skb2); skb_put(skb2, sizeof(struct ipv6hdr)); diff --git a/kernel/net/ipv6/mcast.c b/kernel/net/ipv6/mcast.c index 5ee56d0a8..d64ee7e83 100644 --- a/kernel/net/ipv6/mcast.c +++ b/kernel/net/ipv6/mcast.c @@ -1574,9 +1574,8 @@ static struct sk_buff *mld_newpack(struct inet6_dev *idev, unsigned int mtu) return NULL; skb->priority = TC_PRIO_CONTROL; - skb->reserved_tailroom = skb_end_offset(skb) - - min(mtu, skb_end_offset(skb)); skb_reserve(skb, hlen); + skb_tailroom_reserve(skb, mtu, tlen); if (__ipv6_get_lladdr(idev, &addr_buf, IFA_F_TENTATIVE)) { /* : diff --git a/kernel/net/ipv6/netfilter/ip6_tables.c b/kernel/net/ipv6/netfilter/ip6_tables.c index 99425cf28..22f39e00b 100644 --- a/kernel/net/ipv6/netfilter/ip6_tables.c +++ b/kernel/net/ipv6/netfilter/ip6_tables.c @@ -198,11 +198,12 @@ get_entry(const void *base, unsigned int offset) /* All zeroes == unconditional rule. */ /* Mildly perf critical (only if packet tracing is on) */ -static inline bool unconditional(const struct ip6t_ip6 *ipv6) +static inline bool unconditional(const struct ip6t_entry *e) { static const struct ip6t_ip6 uncond; - return memcmp(ipv6, &uncond, sizeof(uncond)) == 0; + return e->target_offset == sizeof(struct ip6t_entry) && + memcmp(&e->ipv6, &uncond, sizeof(uncond)) == 0; } static inline const struct xt_entry_target * @@ -258,11 +259,10 @@ get_chainname_rulenum(const struct ip6t_entry *s, const struct ip6t_entry *e, } else if (s == e) { (*rulenum)++; - if (s->target_offset == sizeof(struct ip6t_entry) && + if (unconditional(s) && strcmp(t->target.u.kernel.target->name, XT_STANDARD_TARGET) == 0 && - t->verdict < 0 && - unconditional(&s->ipv6)) { + t->verdict < 0) { /* Tail of chains: STANDARD target (return/policy) */ *comment = *chainname == hookname ? comments[NF_IP6_TRACE_COMMENT_POLICY] @@ -455,6 +455,18 @@ ip6t_do_table(struct sk_buff *skb, #endif } +static bool find_jump_target(const struct xt_table_info *t, + const struct ip6t_entry *target) +{ + struct ip6t_entry *iter; + + xt_entry_foreach(iter, t->entries, t->size) { + if (iter == target) + return true; + } + return false; +} + /* Figures out from what hook each rule can be called: returns 0 if there are loops. Puts hook bitmask in comefrom. */ static int @@ -488,11 +500,10 @@ mark_source_chains(const struct xt_table_info *newinfo, e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS)); /* Unconditional return/END. */ - if ((e->target_offset == sizeof(struct ip6t_entry) && + if ((unconditional(e) && (strcmp(t->target.u.user.name, XT_STANDARD_TARGET) == 0) && - t->verdict < 0 && - unconditional(&e->ipv6)) || visited) { + t->verdict < 0) || visited) { unsigned int oldpos, size; if ((strcmp(t->target.u.user.name, @@ -533,6 +544,8 @@ mark_source_chains(const struct xt_table_info *newinfo, size = e->next_offset; e = (struct ip6t_entry *) (entry0 + pos + size); + if (pos + size >= newinfo->size) + return 0; e->counters.pcnt = pos; pos += size; } else { @@ -551,9 +564,15 @@ mark_source_chains(const struct xt_table_info *newinfo, /* This a jump; chase it. */ duprintf("Jump rule %u -> %u\n", pos, newpos); + e = (struct ip6t_entry *) + (entry0 + newpos); + if (!find_jump_target(newinfo, e)) + return 0; } else { /* ... this is a fallthru */ newpos = pos + e->next_offset; + if (newpos >= newinfo->size) + return 0; } e = (struct ip6t_entry *) (entry0 + newpos); @@ -580,27 +599,6 @@ static void cleanup_match(struct xt_entry_match *m, struct net *net) module_put(par.match->me); } -static int -check_entry(const struct ip6t_entry *e, const char *name) -{ - const struct xt_entry_target *t; - - if (!ip6_checkentry(&e->ipv6)) { - duprintf("ip_tables: ip check failed %p %s.\n", e, name); - return -EINVAL; - } - - if (e->target_offset + sizeof(struct xt_entry_target) > - e->next_offset) - return -EINVAL; - - t = ip6t_get_target_c(e); - if (e->target_offset + t->u.target_size > e->next_offset) - return -EINVAL; - - return 0; -} - static int check_match(struct xt_entry_match *m, struct xt_mtchk_param *par) { const struct ip6t_ip6 *ipv6 = par->entryinfo; @@ -679,10 +677,6 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name, struct xt_mtchk_param mtpar; struct xt_entry_match *ematch; - ret = check_entry(e, name); - if (ret) - return ret; - e->counters.pcnt = xt_percpu_counter_alloc(); if (IS_ERR_VALUE(e->counters.pcnt)) return -ENOMEM; @@ -733,7 +727,7 @@ static bool check_underflow(const struct ip6t_entry *e) const struct xt_entry_target *t; unsigned int verdict; - if (!unconditional(&e->ipv6)) + if (!unconditional(e)) return false; t = ip6t_get_target_c(e); if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0) @@ -753,9 +747,11 @@ check_entry_size_and_hooks(struct ip6t_entry *e, unsigned int valid_hooks) { unsigned int h; + int err; if ((unsigned long)e % __alignof__(struct ip6t_entry) != 0 || - (unsigned char *)e + sizeof(struct ip6t_entry) >= limit) { + (unsigned char *)e + sizeof(struct ip6t_entry) >= limit || + (unsigned char *)e + e->next_offset > limit) { duprintf("Bad offset %p\n", e); return -EINVAL; } @@ -767,6 +763,14 @@ check_entry_size_and_hooks(struct ip6t_entry *e, return -EINVAL; } + if (!ip6_checkentry(&e->ipv6)) + return -EINVAL; + + err = xt_check_entry_offsets(e, e->elems, e->target_offset, + e->next_offset); + if (err) + return err; + /* Check hooks & underflows */ for (h = 0; h < NF_INET_NUMHOOKS; h++) { if (!(valid_hooks & (1 << h))) @@ -775,9 +779,9 @@ check_entry_size_and_hooks(struct ip6t_entry *e, newinfo->hook_entry[h] = hook_entries[h]; if ((unsigned char *)e - base == underflows[h]) { if (!check_underflow(e)) { - pr_err("Underflows must be unconditional and " - "use the STANDARD target with " - "ACCEPT/DROP\n"); + pr_debug("Underflows must be unconditional and " + "use the STANDARD target with " + "ACCEPT/DROP\n"); return -EINVAL; } newinfo->underflow[h] = underflows[h]; @@ -1321,55 +1325,16 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len, unsigned int i; struct xt_counters_info tmp; struct xt_counters *paddc; - unsigned int num_counters; - char *name; - int size; - void *ptmp; struct xt_table *t; const struct xt_table_info *private; int ret = 0; struct ip6t_entry *iter; unsigned int addend; -#ifdef CONFIG_COMPAT - struct compat_xt_counters_info compat_tmp; - - if (compat) { - ptmp = &compat_tmp; - size = sizeof(struct compat_xt_counters_info); - } else -#endif - { - ptmp = &tmp; - size = sizeof(struct xt_counters_info); - } - - if (copy_from_user(ptmp, user, size) != 0) - return -EFAULT; -#ifdef CONFIG_COMPAT - if (compat) { - num_counters = compat_tmp.num_counters; - name = compat_tmp.name; - } else -#endif - { - num_counters = tmp.num_counters; - name = tmp.name; - } - - if (len != size + num_counters * sizeof(struct xt_counters)) - return -EINVAL; - - paddc = vmalloc(len - size); - if (!paddc) - return -ENOMEM; - - if (copy_from_user(paddc, user + size, len - size) != 0) { - ret = -EFAULT; - goto free; - } - - t = xt_find_table_lock(net, AF_INET6, name); + paddc = xt_copy_counters_from_user(user, len, &tmp, compat); + if (IS_ERR(paddc)) + return PTR_ERR(paddc); + t = xt_find_table_lock(net, AF_INET6, tmp.name); if (IS_ERR_OR_NULL(t)) { ret = t ? PTR_ERR(t) : -ENOENT; goto free; @@ -1377,7 +1342,7 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len, local_bh_disable(); private = t->private; - if (private->number != num_counters) { + if (private->number != tmp.num_counters) { ret = -EINVAL; goto unlock_up_free; } @@ -1456,7 +1421,6 @@ compat_copy_entry_to_user(struct ip6t_entry *e, void __user **dstptr, static int compat_find_calc_match(struct xt_entry_match *m, - const char *name, const struct ip6t_ip6 *ipv6, int *size) { @@ -1491,21 +1455,19 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e, struct xt_table_info *newinfo, unsigned int *size, const unsigned char *base, - const unsigned char *limit, - const unsigned int *hook_entries, - const unsigned int *underflows, - const char *name) + const unsigned char *limit) { struct xt_entry_match *ematch; struct xt_entry_target *t; struct xt_target *target; unsigned int entry_offset; unsigned int j; - int ret, off, h; + int ret, off; duprintf("check_compat_entry_size_and_hooks %p\n", e); if ((unsigned long)e % __alignof__(struct compat_ip6t_entry) != 0 || - (unsigned char *)e + sizeof(struct compat_ip6t_entry) >= limit) { + (unsigned char *)e + sizeof(struct compat_ip6t_entry) >= limit || + (unsigned char *)e + e->next_offset > limit) { duprintf("Bad offset %p, limit = %p\n", e, limit); return -EINVAL; } @@ -1517,8 +1479,11 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e, return -EINVAL; } - /* For purposes of check_entry casting the compat entry is fine */ - ret = check_entry((struct ip6t_entry *)e, name); + if (!ip6_checkentry(&e->ipv6)) + return -EINVAL; + + ret = xt_compat_check_entry_offsets(e, e->elems, + e->target_offset, e->next_offset); if (ret) return ret; @@ -1526,7 +1491,7 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e, entry_offset = (void *)e - (void *)base; j = 0; xt_ematch_foreach(ematch, e) { - ret = compat_find_calc_match(ematch, name, &e->ipv6, &off); + ret = compat_find_calc_match(ematch, &e->ipv6, &off); if (ret != 0) goto release_matches; ++j; @@ -1549,17 +1514,6 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e, if (ret) goto out; - /* Check hooks & underflows */ - for (h = 0; h < NF_INET_NUMHOOKS; h++) { - if ((unsigned char *)e - base == hook_entries[h]) - newinfo->hook_entry[h] = hook_entries[h]; - if ((unsigned char *)e - base == underflows[h]) - newinfo->underflow[h] = underflows[h]; - } - - /* Clear counters and comefrom */ - memset(&e->counters, 0, sizeof(e->counters)); - e->comefrom = 0; return 0; out: @@ -1573,18 +1527,17 @@ release_matches: return ret; } -static int +static void compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr, - unsigned int *size, const char *name, + unsigned int *size, struct xt_table_info *newinfo, unsigned char *base) { struct xt_entry_target *t; struct ip6t_entry *de; unsigned int origsize; - int ret, h; + int h; struct xt_entry_match *ematch; - ret = 0; origsize = *size; de = (struct ip6t_entry *)*dstptr; memcpy(de, e, sizeof(struct ip6t_entry)); @@ -1593,11 +1546,9 @@ compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr, *dstptr += sizeof(struct ip6t_entry); *size += sizeof(struct ip6t_entry) - sizeof(struct compat_ip6t_entry); - xt_ematch_foreach(ematch, e) { - ret = xt_compat_match_from_user(ematch, dstptr, size); - if (ret != 0) - return ret; - } + xt_ematch_foreach(ematch, e) + xt_compat_match_from_user(ematch, dstptr, size); + de->target_offset = e->target_offset - (origsize - *size); t = compat_ip6t_get_target(e); xt_compat_target_from_user(t, dstptr, size); @@ -1609,183 +1560,83 @@ compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr, if ((unsigned char *)de - base < newinfo->underflow[h]) newinfo->underflow[h] -= origsize - *size; } - return ret; -} - -static int compat_check_entry(struct ip6t_entry *e, struct net *net, - const char *name) -{ - unsigned int j; - int ret = 0; - struct xt_mtchk_param mtpar; - struct xt_entry_match *ematch; - - e->counters.pcnt = xt_percpu_counter_alloc(); - if (IS_ERR_VALUE(e->counters.pcnt)) - return -ENOMEM; - j = 0; - mtpar.net = net; - mtpar.table = name; - mtpar.entryinfo = &e->ipv6; - mtpar.hook_mask = e->comefrom; - mtpar.family = NFPROTO_IPV6; - xt_ematch_foreach(ematch, e) { - ret = check_match(ematch, &mtpar); - if (ret != 0) - goto cleanup_matches; - ++j; - } - - ret = check_target(e, net, name); - if (ret) - goto cleanup_matches; - return 0; - - cleanup_matches: - xt_ematch_foreach(ematch, e) { - if (j-- == 0) - break; - cleanup_match(ematch, net); - } - - xt_percpu_counter_free(e->counters.pcnt); - - return ret; } static int translate_compat_table(struct net *net, - const char *name, - unsigned int valid_hooks, struct xt_table_info **pinfo, void **pentry0, - unsigned int total_size, - unsigned int number, - unsigned int *hook_entries, - unsigned int *underflows) + const struct compat_ip6t_replace *compatr) { unsigned int i, j; struct xt_table_info *newinfo, *info; void *pos, *entry0, *entry1; struct compat_ip6t_entry *iter0; - struct ip6t_entry *iter1; + struct ip6t_replace repl; unsigned int size; int ret = 0; info = *pinfo; entry0 = *pentry0; - size = total_size; - info->number = number; - - /* Init all hooks to impossible value. */ - for (i = 0; i < NF_INET_NUMHOOKS; i++) { - info->hook_entry[i] = 0xFFFFFFFF; - info->underflow[i] = 0xFFFFFFFF; - } + size = compatr->size; + info->number = compatr->num_entries; duprintf("translate_compat_table: size %u\n", info->size); j = 0; xt_compat_lock(AF_INET6); - xt_compat_init_offsets(AF_INET6, number); + xt_compat_init_offsets(AF_INET6, compatr->num_entries); /* Walk through entries, checking offsets. */ - xt_entry_foreach(iter0, entry0, total_size) { + xt_entry_foreach(iter0, entry0, compatr->size) { ret = check_compat_entry_size_and_hooks(iter0, info, &size, entry0, - entry0 + total_size, - hook_entries, - underflows, - name); + entry0 + compatr->size); if (ret != 0) goto out_unlock; ++j; } ret = -EINVAL; - if (j != number) { + if (j != compatr->num_entries) { duprintf("translate_compat_table: %u not %u entries\n", - j, number); + j, compatr->num_entries); goto out_unlock; } - /* Check hooks all assigned */ - for (i = 0; i < NF_INET_NUMHOOKS; i++) { - /* Only hooks which are valid */ - if (!(valid_hooks & (1 << i))) - continue; - if (info->hook_entry[i] == 0xFFFFFFFF) { - duprintf("Invalid hook entry %u %u\n", - i, hook_entries[i]); - goto out_unlock; - } - if (info->underflow[i] == 0xFFFFFFFF) { - duprintf("Invalid underflow %u %u\n", - i, underflows[i]); - goto out_unlock; - } - } - ret = -ENOMEM; newinfo = xt_alloc_table_info(size); if (!newinfo) goto out_unlock; - newinfo->number = number; + newinfo->number = compatr->num_entries; for (i = 0; i < NF_INET_NUMHOOKS; i++) { - newinfo->hook_entry[i] = info->hook_entry[i]; - newinfo->underflow[i] = info->underflow[i]; + newinfo->hook_entry[i] = compatr->hook_entry[i]; + newinfo->underflow[i] = compatr->underflow[i]; } entry1 = newinfo->entries; pos = entry1; - size = total_size; - xt_entry_foreach(iter0, entry0, total_size) { - ret = compat_copy_entry_from_user(iter0, &pos, &size, - name, newinfo, entry1); - if (ret != 0) - break; - } + size = compatr->size; + xt_entry_foreach(iter0, entry0, compatr->size) + compat_copy_entry_from_user(iter0, &pos, &size, + newinfo, entry1); + + /* all module references in entry0 are now gone. */ xt_compat_flush_offsets(AF_INET6); xt_compat_unlock(AF_INET6); - if (ret) - goto free_newinfo; - ret = -ELOOP; - if (!mark_source_chains(newinfo, valid_hooks, entry1)) - goto free_newinfo; + memcpy(&repl, compatr, sizeof(*compatr)); - i = 0; - xt_entry_foreach(iter1, entry1, newinfo->size) { - ret = compat_check_entry(iter1, net, name); - if (ret != 0) - break; - ++i; - if (strcmp(ip6t_get_target(iter1)->u.user.name, - XT_ERROR_TARGET) == 0) - ++newinfo->stacksize; - } - if (ret) { - /* - * The first i matches need cleanup_entry (calls ->destroy) - * because they had called ->check already. The other j-i - * entries need only release. - */ - int skip = i; - j -= i; - xt_entry_foreach(iter0, entry0, newinfo->size) { - if (skip-- > 0) - continue; - if (j-- == 0) - break; - compat_release_entry(iter0); - } - xt_entry_foreach(iter1, entry1, newinfo->size) { - if (i-- == 0) - break; - cleanup_entry(iter1, net); - } - xt_free_table_info(newinfo); - return ret; + for (i = 0; i < NF_INET_NUMHOOKS; i++) { + repl.hook_entry[i] = newinfo->hook_entry[i]; + repl.underflow[i] = newinfo->underflow[i]; } + repl.num_counters = 0; + repl.counters = NULL; + repl.size = newinfo->size; + ret = translate_table(net, newinfo, entry1, &repl); + if (ret) + goto free_newinfo; + *pinfo = newinfo; *pentry0 = entry1; xt_free_table_info(info); @@ -1793,17 +1644,16 @@ translate_compat_table(struct net *net, free_newinfo: xt_free_table_info(newinfo); -out: - xt_entry_foreach(iter0, entry0, total_size) { + return ret; +out_unlock: + xt_compat_flush_offsets(AF_INET6); + xt_compat_unlock(AF_INET6); + xt_entry_foreach(iter0, entry0, compatr->size) { if (j-- == 0) break; compat_release_entry(iter0); } return ret; -out_unlock: - xt_compat_flush_offsets(AF_INET6); - xt_compat_unlock(AF_INET6); - goto out; } static int @@ -1839,10 +1689,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len) goto free_newinfo; } - ret = translate_compat_table(net, tmp.name, tmp.valid_hooks, - &newinfo, &loc_cpu_entry, tmp.size, - tmp.num_entries, tmp.hook_entry, - tmp.underflow); + ret = translate_compat_table(net, &newinfo, &loc_cpu_entry, &tmp); if (ret != 0) goto free_newinfo; diff --git a/kernel/net/ipv6/output_core.c b/kernel/net/ipv6/output_core.c index 462f2a76b..1d184322a 100644 --- a/kernel/net/ipv6/output_core.c +++ b/kernel/net/ipv6/output_core.c @@ -148,6 +148,8 @@ int __ip6_local_out(struct net *net, struct sock *sk, struct sk_buff *skb) ipv6_hdr(skb)->payload_len = htons(len); IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr); + skb->protocol = htons(ETH_P_IPV6); + return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb, NULL, skb_dst(skb)->dev, dst_output); diff --git a/kernel/net/ipv6/ping.c b/kernel/net/ipv6/ping.c index 263a5164a..3e55447b6 100644 --- a/kernel/net/ipv6/ping.c +++ b/kernel/net/ipv6/ping.c @@ -150,8 +150,10 @@ int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) rt = (struct rt6_info *) dst; np = inet6_sk(sk); - if (!np) - return -EBADF; + if (!np) { + err = -EBADF; + goto dst_err_out; + } if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) fl6.flowi6_oif = np->mcast_oif; @@ -186,6 +188,9 @@ int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) } release_sock(sk); +dst_err_out: + dst_release(dst); + if (err) return err; diff --git a/kernel/net/ipv6/raw.c b/kernel/net/ipv6/raw.c index 99140986e..8bca90d6d 100644 --- a/kernel/net/ipv6/raw.c +++ b/kernel/net/ipv6/raw.c @@ -589,7 +589,11 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, } offset += skb_transport_offset(skb); - BUG_ON(skb_copy_bits(skb, offset, &csum, 2)); + err = skb_copy_bits(skb, offset, &csum, 2); + if (err < 0) { + ip6_flush_pending_frames(sk); + goto out; + } /* in case cksum was not initialized */ if (unlikely(csum)) diff --git a/kernel/net/ipv6/reassembly.c b/kernel/net/ipv6/reassembly.c index 45f5ae51d..a234552a7 100644 --- a/kernel/net/ipv6/reassembly.c +++ b/kernel/net/ipv6/reassembly.c @@ -496,10 +496,8 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, IP6CB(head)->flags |= IP6SKB_FRAGMENTED; /* Yes, and fold redundant checksum back. 8) */ - if (head->ip_summed == CHECKSUM_COMPLETE) - head->csum = csum_partial(skb_network_header(head), - skb_network_header_len(head), - head->csum); + skb_postpush_rcsum(head, skb_network_header(head), + skb_network_header_len(head)); rcu_read_lock(); IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMOKS); diff --git a/kernel/net/ipv6/route.c b/kernel/net/ipv6/route.c index 3f164d3aa..36bf4c3fe 100644 --- a/kernel/net/ipv6/route.c +++ b/kernel/net/ipv6/route.c @@ -1727,6 +1727,8 @@ static int ip6_convert_metrics(struct mx6_config *mxc, } else { val = nla_get_u32(nla); } + if (type == RTAX_HOPLIMIT && val > 255) + val = 255; if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK)) goto err; @@ -3138,7 +3140,9 @@ static int rt6_fill_node(struct net *net, if (iif) { #ifdef CONFIG_IPV6_MROUTE if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) { - int err = ip6mr_get_route(net, skb, rtm, nowait); + int err = ip6mr_get_route(net, skb, rtm, nowait, + portid); + if (err <= 0) { if (!nowait) { if (err == 0) @@ -3192,7 +3196,8 @@ static int rt6_fill_node(struct net *net, if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags))) goto nla_put_failure; - lwtunnel_fill_encap(skb, rt->dst.lwtstate); + if (lwtunnel_fill_encap(skb, rt->dst.lwtstate) < 0) + goto nla_put_failure; nlmsg_end(skb, nlh); return 0; diff --git a/kernel/net/ipv6/sit.c b/kernel/net/ipv6/sit.c index dcccae861..184f0fe35 100644 --- a/kernel/net/ipv6/sit.c +++ b/kernel/net/ipv6/sit.c @@ -560,13 +560,13 @@ static int ipip6_err(struct sk_buff *skb, u32 info) if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { ipv4_update_pmtu(skb, dev_net(skb->dev), info, - t->parms.link, 0, IPPROTO_IPV6, 0); + t->parms.link, 0, iph->protocol, 0); err = 0; goto out; } if (type == ICMP_REDIRECT) { ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0, - IPPROTO_IPV6, 0); + iph->protocol, 0); err = 0; goto out; } @@ -681,14 +681,15 @@ static int ipip6_rcv(struct sk_buff *skb) skb->mac_header = skb->network_header; skb_reset_network_header(skb); IPCB(skb)->flags = 0; - skb->protocol = htons(ETH_P_IPV6); + skb->dev = tunnel->dev; if (packet_is_spoofed(skb, iph, tunnel)) { tunnel->dev->stats.rx_errors++; goto out; } - __skb_tunnel_rx(skb, tunnel->dev, tunnel->net); + if (iptunnel_pull_header(skb, 0, htons(ETH_P_IPV6))) + goto out; err = IP_ECN_decapsulate(iph, skb); if (unlikely(err)) { @@ -1388,6 +1389,7 @@ static int ipip6_tunnel_init(struct net_device *dev) tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst); if (!tunnel->dst_cache) { free_percpu(dev->tstats); + dev->tstats = NULL; return -ENOMEM; } diff --git a/kernel/net/ipv6/tcp_ipv6.c b/kernel/net/ipv6/tcp_ipv6.c index b8d405623..76a8c8057 100644 --- a/kernel/net/ipv6/tcp_ipv6.c +++ b/kernel/net/ipv6/tcp_ipv6.c @@ -932,9 +932,15 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV * sk->sk_state == TCP_SYN_RECV -> for Fast Open. */ + /* RFC 7323 2.3 + * The window field (SEG.WND) of every outgoing segment, with the + * exception of segments, MUST be right-shifted by + * Rcv.Wind.Shift bits: + */ tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt, - tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd, + tcp_rsk(req)->rcv_nxt, + req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale, tcp_time_stamp, req->ts_recent, sk->sk_bound_dev_if, tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr), 0, 0); @@ -968,6 +974,16 @@ drop: return 0; /* don't send reset */ } +static void tcp_v6_restore_cb(struct sk_buff *skb) +{ + /* We need to move header back to the beginning if xfrm6_policy_check() + * and tcp_v6_fill_cb() are going to be called again. + * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there. + */ + memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6, + sizeof(struct inet6_skb_parm)); +} + static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct dst_entry *dst, @@ -1157,8 +1173,10 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * sk_gfp_atomic(sk, GFP_ATOMIC)); consume_skb(ireq->pktopts); ireq->pktopts = NULL; - if (newnp->pktoptions) + if (newnp->pktoptions) { + tcp_v6_restore_cb(newnp->pktoptions); skb_set_owner_r(newnp->pktoptions, newsk); + } } } @@ -1198,7 +1216,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) if (skb->protocol == htons(ETH_P_IP)) return tcp_v4_do_rcv(sk, skb); - if (sk_filter(sk, skb)) + if (tcp_filter(sk, skb)) goto discard; /* @@ -1302,6 +1320,7 @@ ipv6_pktoptions: np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb)); if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) { skb_set_owner_r(opt_skb, sk); + tcp_v6_restore_cb(opt_skb); opt_skb = xchg(&np->pktoptions, opt_skb); } else { __kfree_skb(opt_skb); @@ -1335,15 +1354,6 @@ static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr, TCP_SKB_CB(skb)->sacked = 0; } -static void tcp_v6_restore_cb(struct sk_buff *skb) -{ - /* We need to move header back to the beginning if xfrm6_policy_check() - * and tcp_v6_fill_cb() are going to be called again. - */ - memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6, - sizeof(struct inet6_skb_parm)); -} - static int tcp_v6_rcv(struct sk_buff *skb) { const struct tcphdr *th; @@ -1430,8 +1440,10 @@ process: if (tcp_v6_inbound_md5_hash(sk, skb)) goto discard_and_relse; - if (sk_filter(sk, skb)) + if (tcp_filter(sk, skb)) goto discard_and_relse; + th = (const struct tcphdr *)skb->data; + hdr = ipv6_hdr(skb); skb->dev = NULL; @@ -1706,7 +1718,9 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) destp = ntohs(inet->inet_dport); srcp = ntohs(inet->inet_sport); - if (icsk->icsk_pending == ICSK_TIME_RETRANS) { + if (icsk->icsk_pending == ICSK_TIME_RETRANS || + icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || + icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { timer_active = 1; timer_expires = icsk->icsk_timeout; } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { diff --git a/kernel/net/ipv6/udp.c b/kernel/net/ipv6/udp.c index 9da3287a3..dfa85e726 100644 --- a/kernel/net/ipv6/udp.c +++ b/kernel/net/ipv6/udp.c @@ -402,6 +402,7 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int peeked, off = 0; int err; int is_udplite = IS_UDPLITE(sk); + bool checksum_valid = false; int is_udp4; bool slow; @@ -433,11 +434,12 @@ try_again: */ if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) { - if (udp_lib_checksum_complete(skb)) + checksum_valid = !udp_lib_checksum_complete(skb); + if (!checksum_valid) goto csum_copy_err; } - if (skb_csum_unnecessary(skb)) + if (checksum_valid || skb_csum_unnecessary(skb)) err = skb_copy_datagram_msg(skb, sizeof(struct udphdr), msg, copied); else { @@ -496,7 +498,8 @@ try_again: if (is_udp4) { if (inet->cmsg_flags) - ip_cmsg_recv(msg, skb); + ip_cmsg_recv_offset(msg, skb, + sizeof(struct udphdr), off); } else { if (np->rxopt.all) ip6_datagram_recv_specific_ctl(sk, msg, skb); @@ -647,7 +650,7 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) /* if we're overly short, let UDP handle it */ encap_rcv = ACCESS_ONCE(up->encap_rcv); - if (skb->len > sizeof(struct udphdr) && encap_rcv) { + if (encap_rcv) { int ret; /* Verify checksum before giving to encap */ @@ -837,8 +840,8 @@ start_lookup: flush_stack(stack, count, skb, count - 1); } else { if (!inner_flushed) - UDP_INC_STATS_BH(net, UDP_MIB_IGNOREDMULTI, - proto == IPPROTO_UDPLITE); + UDP6_INC_STATS_BH(net, UDP_MIB_IGNOREDMULTI, + proto == IPPROTO_UDPLITE); consume_skb(skb); } return 0; @@ -916,11 +919,9 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, ret = udpv6_queue_rcv_skb(sk, skb); sock_put(sk); - /* a return value > 0 means to resubmit the input, but - * it wants the return to be -protocol, or 0 - */ + /* a return value > 0 means to resubmit the input */ if (ret > 0) - return -ret; + return ret; return 0; } diff --git a/kernel/net/irda/af_irda.c b/kernel/net/irda/af_irda.c index 923abd6b3..8d2f7c9b4 100644 --- a/kernel/net/irda/af_irda.c +++ b/kernel/net/irda/af_irda.c @@ -1024,8 +1024,11 @@ static int irda_connect(struct socket *sock, struct sockaddr *uaddr, } /* Check if we have opened a local TSAP */ - if (!self->tsap) - irda_open_tsap(self, LSAP_ANY, addr->sir_name); + if (!self->tsap) { + err = irda_open_tsap(self, LSAP_ANY, addr->sir_name); + if (err) + goto out; + } /* Move to connecting socket, start sending Connect Requests */ sock->state = SS_CONNECTING; diff --git a/kernel/net/irda/iriap.c b/kernel/net/irda/iriap.c index 4a7ae32af..1138eaf5c 100644 --- a/kernel/net/irda/iriap.c +++ b/kernel/net/irda/iriap.c @@ -185,8 +185,12 @@ struct iriap_cb *iriap_open(__u8 slsap_sel, int mode, void *priv, self->magic = IAS_MAGIC; self->mode = mode; - if (mode == IAS_CLIENT) - iriap_register_lsap(self, slsap_sel, mode); + if (mode == IAS_CLIENT) { + if (iriap_register_lsap(self, slsap_sel, mode)) { + kfree(self); + return NULL; + } + } self->confirm = callback; self->priv = priv; diff --git a/kernel/net/l2tp/l2tp_core.c b/kernel/net/l2tp/l2tp_core.c index afca2eb4d..ec17cbe8a 100644 --- a/kernel/net/l2tp/l2tp_core.c +++ b/kernel/net/l2tp/l2tp_core.c @@ -1581,7 +1581,7 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 /* Mark socket as an encapsulation socket. See net/ipv4/udp.c */ tunnel->encap = encap; if (encap == L2TP_ENCAPTYPE_UDP) { - struct udp_tunnel_sock_cfg udp_cfg; + struct udp_tunnel_sock_cfg udp_cfg = { }; udp_cfg.sk_user_data = tunnel; udp_cfg.encap_type = UDP_ENCAP_L2TPINUDP; diff --git a/kernel/net/l2tp/l2tp_core.h b/kernel/net/l2tp/l2tp_core.h index 5871537af..763e8e241 100644 --- a/kernel/net/l2tp/l2tp_core.h +++ b/kernel/net/l2tp/l2tp_core.h @@ -273,6 +273,7 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int l2tp_nl_register_ops(enum l2tp_pwtype pw_type, const struct l2tp_nl_cmd_ops *ops); void l2tp_nl_unregister_ops(enum l2tp_pwtype pw_type); +int l2tp_ioctl(struct sock *sk, int cmd, unsigned long arg); /* Session reference counts. Incremented when code obtains a reference * to a session. diff --git a/kernel/net/l2tp/l2tp_ip.c b/kernel/net/l2tp/l2tp_ip.c index ec22078b0..445b7cd08 100644 --- a/kernel/net/l2tp/l2tp_ip.c +++ b/kernel/net/l2tp/l2tp_ip.c @@ -11,6 +11,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include #include #include #include @@ -123,12 +124,11 @@ static int l2tp_ip_recv(struct sk_buff *skb) struct l2tp_tunnel *tunnel = NULL; int length; - /* Point to L2TP header */ - optr = ptr = skb->data; - if (!pskb_may_pull(skb, 4)) goto discard; + /* Point to L2TP header */ + optr = ptr = skb->data; session_id = ntohl(*((__be32 *) ptr)); ptr += 4; @@ -156,6 +156,9 @@ static int l2tp_ip_recv(struct sk_buff *skb) if (!pskb_may_pull(skb, length)) goto discard; + /* Point to L2TP header */ + optr = ptr = skb->data; + ptr += 4; pr_debug("%s: ip recv\n", tunnel->name); print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, ptr, length); } @@ -249,8 +252,6 @@ static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) int ret; int chk_addr_ret; - if (!sock_flag(sk, SOCK_ZAPPED)) - return -EINVAL; if (addr_len < sizeof(struct sockaddr_l2tpip)) return -EINVAL; if (addr->l2tp_family != AF_INET) @@ -265,6 +266,9 @@ static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) read_unlock_bh(&l2tp_ip_lock); lock_sock(sk); + if (!sock_flag(sk, SOCK_ZAPPED)) + goto out; + if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_l2tpip)) goto out; @@ -552,6 +556,30 @@ out: return err ? err : copied; } +int l2tp_ioctl(struct sock *sk, int cmd, unsigned long arg) +{ + struct sk_buff *skb; + int amount; + + switch (cmd) { + case SIOCOUTQ: + amount = sk_wmem_alloc_get(sk); + break; + case SIOCINQ: + spin_lock_bh(&sk->sk_receive_queue.lock); + skb = skb_peek(&sk->sk_receive_queue); + amount = skb ? skb->len : 0; + spin_unlock_bh(&sk->sk_receive_queue.lock); + break; + + default: + return -ENOIOCTLCMD; + } + + return put_user(amount, (int __user *)arg); +} +EXPORT_SYMBOL(l2tp_ioctl); + static struct proto l2tp_ip_prot = { .name = "L2TP/IP", .owner = THIS_MODULE, @@ -560,7 +588,7 @@ static struct proto l2tp_ip_prot = { .bind = l2tp_ip_bind, .connect = l2tp_ip_connect, .disconnect = l2tp_ip_disconnect, - .ioctl = udp_ioctl, + .ioctl = l2tp_ioctl, .destroy = l2tp_ip_destroy_sock, .setsockopt = ip_setsockopt, .getsockopt = ip_getsockopt, diff --git a/kernel/net/l2tp/l2tp_ip6.c b/kernel/net/l2tp/l2tp_ip6.c index a2c8747d2..bcdab1cba 100644 --- a/kernel/net/l2tp/l2tp_ip6.c +++ b/kernel/net/l2tp/l2tp_ip6.c @@ -135,12 +135,11 @@ static int l2tp_ip6_recv(struct sk_buff *skb) struct l2tp_tunnel *tunnel = NULL; int length; - /* Point to L2TP header */ - optr = ptr = skb->data; - if (!pskb_may_pull(skb, 4)) goto discard; + /* Point to L2TP header */ + optr = ptr = skb->data; session_id = ntohl(*((__be32 *) ptr)); ptr += 4; @@ -168,6 +167,9 @@ static int l2tp_ip6_recv(struct sk_buff *skb) if (!pskb_may_pull(skb, length)) goto discard; + /* Point to L2TP header */ + optr = ptr = skb->data; + ptr += 4; pr_debug("%s: ip recv\n", tunnel->name); print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, ptr, length); } @@ -264,8 +266,6 @@ static int l2tp_ip6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) int addr_type; int err; - if (!sock_flag(sk, SOCK_ZAPPED)) - return -EINVAL; if (addr->l2tp_family != AF_INET6) return -EINVAL; if (addr_len < sizeof(*addr)) @@ -291,6 +291,9 @@ static int l2tp_ip6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) lock_sock(sk); err = -EINVAL; + if (!sock_flag(sk, SOCK_ZAPPED)) + goto out_unlock; + if (sk->sk_state != TCP_CLOSE) goto out_unlock; @@ -711,7 +714,7 @@ static struct proto l2tp_ip6_prot = { .bind = l2tp_ip6_bind, .connect = l2tp_ip6_connect, .disconnect = l2tp_ip6_disconnect, - .ioctl = udp_ioctl, + .ioctl = l2tp_ioctl, .destroy = l2tp_ip6_destroy_sock, .setsockopt = ipv6_setsockopt, .getsockopt = ipv6_getsockopt, diff --git a/kernel/net/llc/af_llc.c b/kernel/net/llc/af_llc.c index 8dab4e569..bb8edb9ef 100644 --- a/kernel/net/llc/af_llc.c +++ b/kernel/net/llc/af_llc.c @@ -626,6 +626,7 @@ static void llc_cmsg_rcv(struct msghdr *msg, struct sk_buff *skb) if (llc->cmsg_flags & LLC_CMSG_PKTINFO) { struct llc_pktinfo info; + memset(&info, 0, sizeof(info)); info.lpi_ifindex = llc_sk(skb->sk)->dev->ifindex; llc_pdu_decode_dsap(skb, &info.lpi_sap); llc_pdu_decode_da(skb, info.lpi_mac); diff --git a/kernel/net/mac80211/cfg.c b/kernel/net/mac80211/cfg.c index c12f34813..19322c047 100644 --- a/kernel/net/mac80211/cfg.c +++ b/kernel/net/mac80211/cfg.c @@ -865,7 +865,7 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev) /* free all potentially still buffered bcast frames */ local->total_ps_buffered -= skb_queue_len(&sdata->u.ap.ps.bc_buf); - skb_queue_purge(&sdata->u.ap.ps.bc_buf); + ieee80211_purge_tx_queue(&local->hw, &sdata->u.ap.ps.bc_buf); mutex_lock(&local->mtx); ieee80211_vif_copy_chanctx_to_vlans(sdata, true); diff --git a/kernel/net/mac80211/ibss.c b/kernel/net/mac80211/ibss.c index 6a12b0f5c..980e9e9b6 100644 --- a/kernel/net/mac80211/ibss.c +++ b/kernel/net/mac80211/ibss.c @@ -7,6 +7,7 @@ * Copyright 2007, Michael Wu * Copyright 2009, Johannes Berg * Copyright 2013-2014 Intel Mobile Communications GmbH + * Copyright(c) 2016 Intel Deutschland GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -1484,14 +1485,21 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata) sdata_info(sdata, "Trigger new scan to find an IBSS to join\n"); - num = ieee80211_ibss_setup_scan_channels(local->hw.wiphy, - &ifibss->chandef, - channels, - ARRAY_SIZE(channels)); scan_width = cfg80211_chandef_to_scan_width(&ifibss->chandef); - ieee80211_request_ibss_scan(sdata, ifibss->ssid, - ifibss->ssid_len, channels, num, - scan_width); + + if (ifibss->fixed_channel) { + num = ieee80211_ibss_setup_scan_channels(local->hw.wiphy, + &ifibss->chandef, + channels, + ARRAY_SIZE(channels)); + ieee80211_request_ibss_scan(sdata, ifibss->ssid, + ifibss->ssid_len, channels, + num, scan_width); + } else { + ieee80211_request_ibss_scan(sdata, ifibss->ssid, + ifibss->ssid_len, NULL, + 0, scan_width); + } } else { int interval = IEEE80211_SCAN_INTERVAL; diff --git a/kernel/net/mac80211/iface.c b/kernel/net/mac80211/iface.c index c9e325d2e..bcb0a1b64 100644 --- a/kernel/net/mac80211/iface.c +++ b/kernel/net/mac80211/iface.c @@ -977,7 +977,10 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, if (sdata->vif.txq) { struct txq_info *txqi = to_txq_info(sdata->vif.txq); + spin_lock_bh(&txqi->queue.lock); ieee80211_purge_tx_queue(&local->hw, &txqi->queue); + spin_unlock_bh(&txqi->queue.lock); + atomic_set(&sdata->txqs_len[txqi->txq.ac], 0); } @@ -1747,7 +1750,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, ret = dev_alloc_name(ndev, ndev->name); if (ret < 0) { - free_netdev(ndev); + ieee80211_if_free(ndev); return ret; } @@ -1833,7 +1836,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, ret = register_netdevice(ndev); if (ret) { - free_netdev(ndev); + ieee80211_if_free(ndev); return ret; } } diff --git a/kernel/net/mac80211/mesh.c b/kernel/net/mac80211/mesh.c index 6f85b6ab8..9063e8e73 100644 --- a/kernel/net/mac80211/mesh.c +++ b/kernel/net/mac80211/mesh.c @@ -151,19 +151,26 @@ u32 mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata) void mesh_sta_cleanup(struct sta_info *sta) { struct ieee80211_sub_if_data *sdata = sta->sdata; - u32 changed; + u32 changed = 0; /* * maybe userspace handles peer allocation and peering, but in either * case the beacon is still generated by the kernel and we might need * an update. */ - changed = mesh_accept_plinks_update(sdata); + if (sdata->u.mesh.user_mpm && + sta->mesh->plink_state == NL80211_PLINK_ESTAB) + changed |= mesh_plink_dec_estab_count(sdata); + changed |= mesh_accept_plinks_update(sdata); if (!sdata->u.mesh.user_mpm) { changed |= mesh_plink_deactivate(sta); del_timer_sync(&sta->mesh->plink_timer); } + /* make sure no readers can access nexthop sta from here on */ + mesh_path_flush_by_nexthop(sta); + synchronize_net(); + if (changed) ieee80211_mbss_info_change_notify(sdata, changed); } @@ -348,7 +355,7 @@ int mesh_add_vendor_ies(struct ieee80211_sub_if_data *sdata, /* fast-forward to vendor IEs */ offset = ieee80211_ie_split_vendor(ifmsh->ie, ifmsh->ie_len, 0); - if (offset) { + if (offset < ifmsh->ie_len) { len = ifmsh->ie_len - offset; data = ifmsh->ie + offset; if (skb_tailroom(skb) < len) diff --git a/kernel/net/mac80211/mlme.c b/kernel/net/mac80211/mlme.c index 83097c383..23095d5e0 100644 --- a/kernel/net/mac80211/mlme.c +++ b/kernel/net/mac80211/mlme.c @@ -2517,7 +2517,7 @@ static void ieee80211_destroy_auth_data(struct ieee80211_sub_if_data *sdata, } static void ieee80211_destroy_assoc_data(struct ieee80211_sub_if_data *sdata, - bool assoc) + bool assoc, bool abandon) { struct ieee80211_mgd_assoc_data *assoc_data = sdata->u.mgd.assoc_data; @@ -2539,6 +2539,9 @@ static void ieee80211_destroy_assoc_data(struct ieee80211_sub_if_data *sdata, mutex_lock(&sdata->local->mtx); ieee80211_vif_release_channel(sdata); mutex_unlock(&sdata->local->mtx); + + if (abandon) + cfg80211_abandon_assoc(sdata->dev, assoc_data->bss); } kfree(assoc_data); @@ -2768,7 +2771,7 @@ static void ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata, bssid, reason_code, ieee80211_get_reason_code_string(reason_code)); - ieee80211_destroy_assoc_data(sdata, false); + ieee80211_destroy_assoc_data(sdata, false, true); cfg80211_rx_mlme_mgmt(sdata->dev, (u8 *)mgmt, len); return; @@ -3173,14 +3176,14 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, if (status_code != WLAN_STATUS_SUCCESS) { sdata_info(sdata, "%pM denied association (code=%d)\n", mgmt->sa, status_code); - ieee80211_destroy_assoc_data(sdata, false); + ieee80211_destroy_assoc_data(sdata, false, false); event.u.mlme.status = MLME_DENIED; event.u.mlme.reason = status_code; drv_event_callback(sdata->local, sdata, &event); } else { if (!ieee80211_assoc_success(sdata, bss, mgmt, len)) { /* oops -- internal error -- send timeout for now */ - ieee80211_destroy_assoc_data(sdata, false); + ieee80211_destroy_assoc_data(sdata, false, false); cfg80211_assoc_timeout(sdata->dev, bss); return; } @@ -3193,7 +3196,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, * recalc after assoc_data is NULL but before associated * is set can cause the interface to go idle */ - ieee80211_destroy_assoc_data(sdata, true); + ieee80211_destroy_assoc_data(sdata, true, false); /* get uapsd queues configuration */ uapsd_queues = 0; @@ -3888,7 +3891,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) .u.mlme.status = MLME_TIMEOUT, }; - ieee80211_destroy_assoc_data(sdata, false); + ieee80211_destroy_assoc_data(sdata, false, false); cfg80211_assoc_timeout(sdata->dev, bss); drv_event_callback(sdata->local, sdata, &event); } @@ -4029,7 +4032,7 @@ void ieee80211_mgd_quiesce(struct ieee80211_sub_if_data *sdata) WLAN_REASON_DEAUTH_LEAVING, false, frame_buf); if (ifmgd->assoc_data) - ieee80211_destroy_assoc_data(sdata, false); + ieee80211_destroy_assoc_data(sdata, false, true); if (ifmgd->auth_data) ieee80211_destroy_auth_data(sdata, false); cfg80211_tx_mlme_mgmt(sdata->dev, frame_buf, @@ -4905,7 +4908,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, IEEE80211_STYPE_DEAUTH, req->reason_code, tx, frame_buf); - ieee80211_destroy_assoc_data(sdata, false); + ieee80211_destroy_assoc_data(sdata, false, true); ieee80211_report_disconnect(sdata, frame_buf, sizeof(frame_buf), true, req->reason_code); @@ -4980,7 +4983,7 @@ void ieee80211_mgd_stop(struct ieee80211_sub_if_data *sdata) sdata_lock(sdata); if (ifmgd->assoc_data) { struct cfg80211_bss *bss = ifmgd->assoc_data->bss; - ieee80211_destroy_assoc_data(sdata, false); + ieee80211_destroy_assoc_data(sdata, false, false); cfg80211_assoc_timeout(sdata->dev, bss); } if (ifmgd->auth_data) diff --git a/kernel/net/mac80211/rx.c b/kernel/net/mac80211/rx.c index 0e9e264bc..7a681da1d 100644 --- a/kernel/net/mac80211/rx.c +++ b/kernel/net/mac80211/rx.c @@ -2203,16 +2203,22 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx) if (!(status->rx_flags & IEEE80211_RX_AMSDU)) return RX_CONTINUE; - if (ieee80211_has_a4(hdr->frame_control) && - rx->sdata->vif.type == NL80211_IFTYPE_AP_VLAN && - !rx->sdata->u.vlan.sta) - return RX_DROP_UNUSABLE; + if (unlikely(ieee80211_has_a4(hdr->frame_control))) { + switch (rx->sdata->vif.type) { + case NL80211_IFTYPE_AP_VLAN: + if (!rx->sdata->u.vlan.sta) + return RX_DROP_UNUSABLE; + break; + case NL80211_IFTYPE_STATION: + if (!rx->sdata->u.mgd.use_4addr) + return RX_DROP_UNUSABLE; + break; + default: + return RX_DROP_UNUSABLE; + } + } - if (is_multicast_ether_addr(hdr->addr1) && - ((rx->sdata->vif.type == NL80211_IFTYPE_AP_VLAN && - rx->sdata->u.vlan.sta) || - (rx->sdata->vif.type == NL80211_IFTYPE_STATION && - rx->sdata->u.mgd.use_4addr))) + if (is_multicast_ether_addr(hdr->addr1)) return RX_DROP_UNUSABLE; skb->dev = dev; @@ -2250,7 +2256,7 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) struct ieee80211_local *local = rx->local; struct ieee80211_sub_if_data *sdata = rx->sdata; struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; - u16 q, hdrlen; + u16 ac, q, hdrlen; hdr = (struct ieee80211_hdr *) skb->data; hdrlen = ieee80211_hdrlen(hdr->frame_control); @@ -2319,7 +2325,8 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) ether_addr_equal(sdata->vif.addr, hdr->addr3)) return RX_CONTINUE; - q = ieee80211_select_queue_80211(sdata, skb, hdr); + ac = ieee80211_select_queue_80211(sdata, skb, hdr); + q = sdata->vif.hw_queue[ac]; if (ieee80211_queue_stopped(&local->hw, q)) { IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, dropped_frames_congestion); return RX_DROP_MONITOR; diff --git a/kernel/net/mac80211/sta_info.c b/kernel/net/mac80211/sta_info.c index f91d18732..67066d048 100644 --- a/kernel/net/mac80211/sta_info.c +++ b/kernel/net/mac80211/sta_info.c @@ -256,11 +256,11 @@ void sta_info_free(struct ieee80211_local *local, struct sta_info *sta) } /* Caller must hold local->sta_mtx */ -static void sta_info_hash_add(struct ieee80211_local *local, - struct sta_info *sta) +static int sta_info_hash_add(struct ieee80211_local *local, + struct sta_info *sta) { - rhashtable_insert_fast(&local->sta_hash, &sta->hash_node, - sta_rht_params); + return rhashtable_insert_fast(&local->sta_hash, &sta->hash_node, + sta_rht_params); } static void sta_deliver_ps_frames(struct work_struct *wk) @@ -484,11 +484,17 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU) { struct ieee80211_local *local = sta->local; struct ieee80211_sub_if_data *sdata = sta->sdata; - struct station_info sinfo; + struct station_info *sinfo; int err = 0; lockdep_assert_held(&local->sta_mtx); + sinfo = kzalloc(sizeof(struct station_info), GFP_KERNEL); + if (!sinfo) { + err = -ENOMEM; + goto out_err; + } + /* check if STA exists already */ if (sta_info_get_bss(sdata, sta->sta.addr)) { err = -EEXIST; @@ -503,7 +509,9 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU) set_sta_flag(sta, WLAN_STA_BLOCK_BA); /* make the station visible */ - sta_info_hash_add(local, sta); + err = sta_info_hash_add(local, sta); + if (err) + goto out_drop_sta; list_add_tail_rcu(&sta->list, &local->sta_list); @@ -520,10 +528,9 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU) ieee80211_sta_debugfs_add(sta); rate_control_add_sta_debugfs(sta); - memset(&sinfo, 0, sizeof(sinfo)); - sinfo.filled = 0; - sinfo.generation = local->sta_generation; - cfg80211_new_sta(sdata->dev, sta->sta.addr, &sinfo, GFP_KERNEL); + sinfo->generation = local->sta_generation; + cfg80211_new_sta(sdata->dev, sta->sta.addr, sinfo, GFP_KERNEL); + kfree(sinfo); sta_dbg(sdata, "Inserted STA %pM\n", sta->sta.addr); @@ -538,6 +545,7 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU) out_remove: sta_info_hash_del(local, sta); list_del_rcu(&sta->list); + out_drop_sta: local->num_sta--; synchronize_net(); __cleanup_single_sta(sta); @@ -882,7 +890,7 @@ static void __sta_info_destroy_part2(struct sta_info *sta) { struct ieee80211_local *local = sta->local; struct ieee80211_sub_if_data *sdata = sta->sdata; - struct station_info sinfo = {}; + struct station_info *sinfo; int ret; /* @@ -920,8 +928,11 @@ static void __sta_info_destroy_part2(struct sta_info *sta) sta_dbg(sdata, "Removed STA %pM\n", sta->sta.addr); - sta_set_sinfo(sta, &sinfo); - cfg80211_del_sta_sinfo(sdata->dev, sta->sta.addr, &sinfo, GFP_KERNEL); + sinfo = kzalloc(sizeof(*sinfo), GFP_KERNEL); + if (sinfo) + sta_set_sinfo(sta, sinfo); + cfg80211_del_sta_sinfo(sdata->dev, sta->sta.addr, sinfo, GFP_KERNEL); + kfree(sinfo); rate_control_remove_sta_debugfs(sta); ieee80211_sta_debugfs_remove(sta); diff --git a/kernel/net/mac80211/sta_info.h b/kernel/net/mac80211/sta_info.h index 2cafb21b4..15b015028 100644 --- a/kernel/net/mac80211/sta_info.h +++ b/kernel/net/mac80211/sta_info.h @@ -269,7 +269,7 @@ struct ieee80211_fast_tx { u8 sa_offs, da_offs, pn_offs; u8 band; u8 hdr[30 + 2 + IEEE80211_FAST_XMIT_MAX_IV + - sizeof(rfc1042_header)]; + sizeof(rfc1042_header)] __aligned(2); struct rcu_head rcu_head; }; diff --git a/kernel/net/mac80211/tx.c b/kernel/net/mac80211/tx.c index bdc224d50..ea5dfefd7 100644 --- a/kernel/net/mac80211/tx.c +++ b/kernel/net/mac80211/tx.c @@ -365,7 +365,7 @@ static void purge_old_ps_buffers(struct ieee80211_local *local) skb = skb_dequeue(&ps->bc_buf); if (skb) { purged++; - dev_kfree_skb(skb); + ieee80211_free_txskb(&local->hw, skb); } total += skb_queue_len(&ps->bc_buf); } @@ -448,7 +448,7 @@ ieee80211_tx_h_multicast_ps_buf(struct ieee80211_tx_data *tx) if (skb_queue_len(&ps->bc_buf) >= AP_MAX_BC_BUFFER) { ps_dbg(tx->sdata, "BC TX buffer full - dropping the oldest frame\n"); - dev_kfree_skb(skb_dequeue(&ps->bc_buf)); + ieee80211_free_txskb(&tx->local->hw, skb_dequeue(&ps->bc_buf)); } else tx->local->total_ps_buffered++; @@ -2699,7 +2699,7 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, int extra_head = fast_tx->hdr_len - (ETH_HLEN - 2); int hw_headroom = sdata->local->hw.extra_tx_headroom; struct ethhdr eth; - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct ieee80211_tx_info *info; struct ieee80211_hdr *hdr = (void *)fast_tx->hdr; struct ieee80211_tx_data tx; ieee80211_tx_result r; @@ -2761,6 +2761,7 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, memcpy(skb->data + fast_tx->da_offs, eth.h_dest, ETH_ALEN); memcpy(skb->data + fast_tx->sa_offs, eth.h_source, ETH_ALEN); + info = IEEE80211_SKB_CB(skb); memset(info, 0, sizeof(*info)); info->band = fast_tx->band; info->control.vif = &sdata->vif; @@ -3781,7 +3782,7 @@ ieee80211_get_buffered_bc(struct ieee80211_hw *hw, sdata = IEEE80211_DEV_TO_SUB_IF(skb->dev); if (!ieee80211_tx_prepare(sdata, &tx, NULL, skb)) break; - dev_kfree_skb_any(skb); + ieee80211_free_txskb(hw, skb); } info = IEEE80211_SKB_CB(skb); diff --git a/kernel/net/mpls/af_mpls.c b/kernel/net/mpls/af_mpls.c index c32fc411a..881bc2072 100644 --- a/kernel/net/mpls/af_mpls.c +++ b/kernel/net/mpls/af_mpls.c @@ -518,6 +518,9 @@ static struct net_device *find_outdev(struct net *net, if (!dev) return ERR_PTR(-ENODEV); + if (IS_ERR(dev)) + return dev; + /* The caller is holding rtnl anyways, so release the dev reference */ dev_put(dev); diff --git a/kernel/net/netfilter/ipvs/ip_vs_core.c b/kernel/net/netfilter/ipvs/ip_vs_core.c index f57b4dcdb..4da560005 100644 --- a/kernel/net/netfilter/ipvs/ip_vs_core.c +++ b/kernel/net/netfilter/ipvs/ip_vs_core.c @@ -1757,15 +1757,34 @@ ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int cp = pp->conn_in_get(ipvs, af, skb, &iph); conn_reuse_mode = sysctl_conn_reuse_mode(ipvs); - if (conn_reuse_mode && !iph.fragoffs && - is_new_conn(skb, &iph) && cp && - ((unlikely(sysctl_expire_nodest_conn(ipvs)) && cp->dest && - unlikely(!atomic_read(&cp->dest->weight))) || - unlikely(is_new_conn_expected(cp, conn_reuse_mode)))) { - if (!atomic_read(&cp->n_control)) - ip_vs_conn_expire_now(cp); - __ip_vs_conn_put(cp); - cp = NULL; + if (conn_reuse_mode && !iph.fragoffs && is_new_conn(skb, &iph) && cp) { + bool uses_ct = false, resched = false; + + if (unlikely(sysctl_expire_nodest_conn(ipvs)) && cp->dest && + unlikely(!atomic_read(&cp->dest->weight))) { + resched = true; + uses_ct = ip_vs_conn_uses_conntrack(cp, skb); + } else if (is_new_conn_expected(cp, conn_reuse_mode)) { + uses_ct = ip_vs_conn_uses_conntrack(cp, skb); + if (!atomic_read(&cp->n_control)) { + resched = true; + } else { + /* Do not reschedule controlling connection + * that uses conntrack while it is still + * referenced by controlled connection(s). + */ + resched = !uses_ct; + } + } + + if (resched) { + if (!atomic_read(&cp->n_control)) + ip_vs_conn_expire_now(cp); + __ip_vs_conn_put(cp); + if (uses_ct) + return NF_DROP; + cp = NULL; + } } if (unlikely(!cp)) { diff --git a/kernel/net/netfilter/ipvs/ip_vs_pe_sip.c b/kernel/net/netfilter/ipvs/ip_vs_pe_sip.c index 1b8d594e4..0a6eb5c0d 100644 --- a/kernel/net/netfilter/ipvs/ip_vs_pe_sip.c +++ b/kernel/net/netfilter/ipvs/ip_vs_pe_sip.c @@ -70,10 +70,10 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb) const char *dptr; int retc; - ip_vs_fill_iph_skb(p->af, skb, false, &iph); + retc = ip_vs_fill_iph_skb(p->af, skb, false, &iph); /* Only useful with UDP */ - if (iph.protocol != IPPROTO_UDP) + if (!retc || iph.protocol != IPPROTO_UDP) return -EINVAL; /* todo: IPv6 fragments: * I think this only should be done for the first fragment. /HS @@ -88,7 +88,7 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb) dptr = skb->data + dataoff; datalen = skb->len - dataoff; - if (get_callid(dptr, dataoff, datalen, &matchoff, &matchlen)) + if (get_callid(dptr, 0, datalen, &matchoff, &matchlen)) return -EINVAL; /* N.B: pe_data is only set on success, diff --git a/kernel/net/netfilter/ipvs/ip_vs_sync.c b/kernel/net/netfilter/ipvs/ip_vs_sync.c index 803001a45..1b07578be 100644 --- a/kernel/net/netfilter/ipvs/ip_vs_sync.c +++ b/kernel/net/netfilter/ipvs/ip_vs_sync.c @@ -1545,7 +1545,8 @@ error: /* * Set up receiving multicast socket over UDP */ -static struct socket *make_receive_sock(struct netns_ipvs *ipvs, int id) +static struct socket *make_receive_sock(struct netns_ipvs *ipvs, int id, + int ifindex) { /* multicast addr */ union ipvs_sockaddr mcast_addr; @@ -1566,6 +1567,7 @@ static struct socket *make_receive_sock(struct netns_ipvs *ipvs, int id) set_sock_size(sock->sk, 0, result); get_mcast_sockaddr(&mcast_addr, &salen, &ipvs->bcfg, id); + sock->sk->sk_bound_dev_if = ifindex; result = sock->ops->bind(sock, (struct sockaddr *)&mcast_addr, salen); if (result < 0) { pr_err("Error binding to the multicast addr\n"); @@ -1868,7 +1870,7 @@ int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c, if (state == IP_VS_STATE_MASTER) sock = make_send_sock(ipvs, id); else - sock = make_receive_sock(ipvs, id); + sock = make_receive_sock(ipvs, id, dev->ifindex); if (IS_ERR(sock)) { result = PTR_ERR(sock); goto outtinfo; diff --git a/kernel/net/netfilter/nf_conntrack_core.c b/kernel/net/netfilter/nf_conntrack_core.c index 3cb3cb831..86a3c6f0c 100644 --- a/kernel/net/netfilter/nf_conntrack_core.c +++ b/kernel/net/netfilter/nf_conntrack_core.c @@ -1757,6 +1757,7 @@ void nf_conntrack_init_end(void) int nf_conntrack_init_net(struct net *net) { + static atomic64_t unique_id; int ret = -ENOMEM; int cpu; @@ -1779,7 +1780,8 @@ int nf_conntrack_init_net(struct net *net) if (!net->ct.stat) goto err_pcpu_lists; - net->ct.slabname = kasprintf(GFP_KERNEL, "nf_conntrack_%p", net); + net->ct.slabname = kasprintf(GFP_KERNEL, "nf_conntrack_%llu", + (u64)atomic64_inc_return(&unique_id)); if (!net->ct.slabname) goto err_slabname; diff --git a/kernel/net/netfilter/nf_log.c b/kernel/net/netfilter/nf_log.c index a5d41dfa9..2c89f90cd 100644 --- a/kernel/net/netfilter/nf_log.c +++ b/kernel/net/netfilter/nf_log.c @@ -401,7 +401,7 @@ static int nf_log_proc_dostring(struct ctl_table *table, int write, size_t size = *lenp; int r = 0; int tindex = (unsigned long)table->extra1; - struct net *net = current->nsproxy->net_ns; + struct net *net = table->extra2; if (write) { if (size > sizeof(buf)) @@ -453,7 +453,6 @@ static int netfilter_log_sysctl_init(struct net *net) 3, "%d", i); nf_log_sysctl_table[i].procname = nf_log_sysctl_fnames[i]; - nf_log_sysctl_table[i].data = NULL; nf_log_sysctl_table[i].maxlen = NFLOGGER_NAME_LEN; nf_log_sysctl_table[i].mode = 0644; nf_log_sysctl_table[i].proc_handler = @@ -463,6 +462,9 @@ static int netfilter_log_sysctl_init(struct net *net) } } + for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++) + table[i].extra2 = net; + net->nf.nf_log_dir_header = register_net_sysctl(net, "net/netfilter/nf_log", table); diff --git a/kernel/net/netfilter/nft_dynset.c b/kernel/net/netfilter/nft_dynset.c index 9dec3bd1b..0a5df0cba 100644 --- a/kernel/net/netfilter/nft_dynset.c +++ b/kernel/net/netfilter/nft_dynset.c @@ -140,7 +140,8 @@ static int nft_dynset_init(const struct nft_ctx *ctx, if (tb[NFTA_DYNSET_TIMEOUT] != NULL) { if (!(set->flags & NFT_SET_TIMEOUT)) return -EINVAL; - timeout = be64_to_cpu(nla_get_be64(tb[NFTA_DYNSET_TIMEOUT])); + timeout = msecs_to_jiffies(be64_to_cpu(nla_get_be64( + tb[NFTA_DYNSET_TIMEOUT]))); } priv->sreg_key = nft_parse_register(tb[NFTA_DYNSET_SREG_KEY]); @@ -227,7 +228,8 @@ static int nft_dynset_dump(struct sk_buff *skb, const struct nft_expr *expr) goto nla_put_failure; if (nla_put_string(skb, NFTA_DYNSET_SET_NAME, priv->set->name)) goto nla_put_failure; - if (nla_put_be64(skb, NFTA_DYNSET_TIMEOUT, cpu_to_be64(priv->timeout))) + if (nla_put_be64(skb, NFTA_DYNSET_TIMEOUT, + cpu_to_be64(jiffies_to_msecs(priv->timeout)))) goto nla_put_failure; if (priv->expr && nft_expr_dump(skb, NFTA_DYNSET_EXPR, priv->expr)) goto nla_put_failure; diff --git a/kernel/net/netfilter/x_tables.c b/kernel/net/netfilter/x_tables.c index d4aaad747..2fc6ca9d1 100644 --- a/kernel/net/netfilter/x_tables.c +++ b/kernel/net/netfilter/x_tables.c @@ -415,6 +415,47 @@ int xt_check_match(struct xt_mtchk_param *par, } EXPORT_SYMBOL_GPL(xt_check_match); +/** xt_check_entry_match - check that matches end before start of target + * + * @match: beginning of xt_entry_match + * @target: beginning of this rules target (alleged end of matches) + * @alignment: alignment requirement of match structures + * + * Validates that all matches add up to the beginning of the target, + * and that each match covers at least the base structure size. + * + * Return: 0 on success, negative errno on failure. + */ +static int xt_check_entry_match(const char *match, const char *target, + const size_t alignment) +{ + const struct xt_entry_match *pos; + int length = target - match; + + if (length == 0) /* no matches */ + return 0; + + pos = (struct xt_entry_match *)match; + do { + if ((unsigned long)pos % alignment) + return -EINVAL; + + if (length < (int)sizeof(struct xt_entry_match)) + return -EINVAL; + + if (pos->u.match_size < sizeof(struct xt_entry_match)) + return -EINVAL; + + if (pos->u.match_size > length) + return -EINVAL; + + length -= pos->u.match_size; + pos = ((void *)((char *)(pos) + (pos)->u.match_size)); + } while (length > 0); + + return 0; +} + #ifdef CONFIG_COMPAT int xt_compat_add_offset(u_int8_t af, unsigned int offset, int delta) { @@ -484,13 +525,14 @@ int xt_compat_match_offset(const struct xt_match *match) } EXPORT_SYMBOL_GPL(xt_compat_match_offset); -int xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr, - unsigned int *size) +void xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr, + unsigned int *size) { const struct xt_match *match = m->u.kernel.match; struct compat_xt_entry_match *cm = (struct compat_xt_entry_match *)m; int pad, off = xt_compat_match_offset(match); u_int16_t msize = cm->u.user.match_size; + char name[sizeof(m->u.user.name)]; m = *dstptr; memcpy(m, cm, sizeof(*cm)); @@ -504,10 +546,12 @@ int xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr, msize += off; m->u.user.match_size = msize; + strlcpy(name, match->name, sizeof(name)); + module_put(match->me); + strncpy(m->u.user.name, name, sizeof(m->u.user.name)); *size += off; *dstptr += msize; - return 0; } EXPORT_SYMBOL_GPL(xt_compat_match_from_user); @@ -538,8 +582,125 @@ int xt_compat_match_to_user(const struct xt_entry_match *m, return 0; } EXPORT_SYMBOL_GPL(xt_compat_match_to_user); + +/* non-compat version may have padding after verdict */ +struct compat_xt_standard_target { + struct compat_xt_entry_target t; + compat_uint_t verdict; +}; + +int xt_compat_check_entry_offsets(const void *base, const char *elems, + unsigned int target_offset, + unsigned int next_offset) +{ + long size_of_base_struct = elems - (const char *)base; + const struct compat_xt_entry_target *t; + const char *e = base; + + if (target_offset < size_of_base_struct) + return -EINVAL; + + if (target_offset + sizeof(*t) > next_offset) + return -EINVAL; + + t = (void *)(e + target_offset); + if (t->u.target_size < sizeof(*t)) + return -EINVAL; + + if (target_offset + t->u.target_size > next_offset) + return -EINVAL; + + if (strcmp(t->u.user.name, XT_STANDARD_TARGET) == 0 && + COMPAT_XT_ALIGN(target_offset + sizeof(struct compat_xt_standard_target)) != next_offset) + return -EINVAL; + + /* compat_xt_entry match has less strict aligment requirements, + * otherwise they are identical. In case of padding differences + * we need to add compat version of xt_check_entry_match. + */ + BUILD_BUG_ON(sizeof(struct compat_xt_entry_match) != sizeof(struct xt_entry_match)); + + return xt_check_entry_match(elems, base + target_offset, + __alignof__(struct compat_xt_entry_match)); +} +EXPORT_SYMBOL(xt_compat_check_entry_offsets); #endif /* CONFIG_COMPAT */ +/** + * xt_check_entry_offsets - validate arp/ip/ip6t_entry + * + * @base: pointer to arp/ip/ip6t_entry + * @elems: pointer to first xt_entry_match, i.e. ip(6)t_entry->elems + * @target_offset: the arp/ip/ip6_t->target_offset + * @next_offset: the arp/ip/ip6_t->next_offset + * + * validates that target_offset and next_offset are sane and that all + * match sizes (if any) align with the target offset. + * + * This function does not validate the targets or matches themselves, it + * only tests that all the offsets and sizes are correct, that all + * match structures are aligned, and that the last structure ends where + * the target structure begins. + * + * Also see xt_compat_check_entry_offsets for CONFIG_COMPAT version. + * + * The arp/ip/ip6t_entry structure @base must have passed following tests: + * - it must point to a valid memory location + * - base to base + next_offset must be accessible, i.e. not exceed allocated + * length. + * + * A well-formed entry looks like this: + * + * ip(6)t_entry match [mtdata] match [mtdata] target [tgdata] ip(6)t_entry + * e->elems[]-----' | | + * matchsize | | + * matchsize | | + * | | + * target_offset---------------------------------' | + * next_offset---------------------------------------------------' + * + * elems[]: flexible array member at end of ip(6)/arpt_entry struct. + * This is where matches (if any) and the target reside. + * target_offset: beginning of target. + * next_offset: start of the next rule; also: size of this rule. + * Since targets have a minimum size, target_offset + minlen <= next_offset. + * + * Every match stores its size, sum of sizes must not exceed target_offset. + * + * Return: 0 on success, negative errno on failure. + */ +int xt_check_entry_offsets(const void *base, + const char *elems, + unsigned int target_offset, + unsigned int next_offset) +{ + long size_of_base_struct = elems - (const char *)base; + const struct xt_entry_target *t; + const char *e = base; + + /* target start is within the ip/ip6/arpt_entry struct */ + if (target_offset < size_of_base_struct) + return -EINVAL; + + if (target_offset + sizeof(*t) > next_offset) + return -EINVAL; + + t = (void *)(e + target_offset); + if (t->u.target_size < sizeof(*t)) + return -EINVAL; + + if (target_offset + t->u.target_size > next_offset) + return -EINVAL; + + if (strcmp(t->u.user.name, XT_STANDARD_TARGET) == 0 && + XT_ALIGN(target_offset + sizeof(struct xt_standard_target)) != next_offset) + return -EINVAL; + + return xt_check_entry_match(elems, base + target_offset, + __alignof__(struct xt_entry_match)); +} +EXPORT_SYMBOL(xt_check_entry_offsets); + int xt_check_target(struct xt_tgchk_param *par, unsigned int size, u_int8_t proto, bool inv_proto) { @@ -590,6 +751,80 @@ int xt_check_target(struct xt_tgchk_param *par, } EXPORT_SYMBOL_GPL(xt_check_target); +/** + * xt_copy_counters_from_user - copy counters and metadata from userspace + * + * @user: src pointer to userspace memory + * @len: alleged size of userspace memory + * @info: where to store the xt_counters_info metadata + * @compat: true if we setsockopt call is done by 32bit task on 64bit kernel + * + * Copies counter meta data from @user and stores it in @info. + * + * vmallocs memory to hold the counters, then copies the counter data + * from @user to the new memory and returns a pointer to it. + * + * If @compat is true, @info gets converted automatically to the 64bit + * representation. + * + * The metadata associated with the counters is stored in @info. + * + * Return: returns pointer that caller has to test via IS_ERR(). + * If IS_ERR is false, caller has to vfree the pointer. + */ +void *xt_copy_counters_from_user(const void __user *user, unsigned int len, + struct xt_counters_info *info, bool compat) +{ + void *mem; + u64 size; + +#ifdef CONFIG_COMPAT + if (compat) { + /* structures only differ in size due to alignment */ + struct compat_xt_counters_info compat_tmp; + + if (len <= sizeof(compat_tmp)) + return ERR_PTR(-EINVAL); + + len -= sizeof(compat_tmp); + if (copy_from_user(&compat_tmp, user, sizeof(compat_tmp)) != 0) + return ERR_PTR(-EFAULT); + + strlcpy(info->name, compat_tmp.name, sizeof(info->name)); + info->num_counters = compat_tmp.num_counters; + user += sizeof(compat_tmp); + } else +#endif + { + if (len <= sizeof(*info)) + return ERR_PTR(-EINVAL); + + len -= sizeof(*info); + if (copy_from_user(info, user, sizeof(*info)) != 0) + return ERR_PTR(-EFAULT); + + info->name[sizeof(info->name) - 1] = '\0'; + user += sizeof(*info); + } + + size = sizeof(struct xt_counters); + size *= info->num_counters; + + if (size != (u64)len) + return ERR_PTR(-EINVAL); + + mem = vmalloc(len); + if (!mem) + return ERR_PTR(-ENOMEM); + + if (copy_from_user(mem, user, len) == 0) + return mem; + + vfree(mem); + return ERR_PTR(-EFAULT); +} +EXPORT_SYMBOL_GPL(xt_copy_counters_from_user); + #ifdef CONFIG_COMPAT int xt_compat_target_offset(const struct xt_target *target) { @@ -605,6 +840,7 @@ void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr, struct compat_xt_entry_target *ct = (struct compat_xt_entry_target *)t; int pad, off = xt_compat_target_offset(target); u_int16_t tsize = ct->u.user.target_size; + char name[sizeof(t->u.user.name)]; t = *dstptr; memcpy(t, ct, sizeof(*ct)); @@ -618,6 +854,9 @@ void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr, tsize += off; t->u.user.target_size = tsize; + strlcpy(name, target->name, sizeof(name)); + module_put(target->me); + strncpy(t->u.user.name, name, sizeof(t->u.user.name)); *size += off; *dstptr += tsize; @@ -658,6 +897,12 @@ struct xt_table_info *xt_alloc_table_info(unsigned int size) struct xt_table_info *info = NULL; size_t sz = sizeof(*info) + size; + if (sz < sizeof(*info)) + return NULL; + + if (sz < sizeof(*info)) + return NULL; + /* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */ if ((SMP_ALIGN(size) >> PAGE_SHIFT) + 2 > totalram_pages) return NULL; diff --git a/kernel/net/netlabel/netlabel_kapi.c b/kernel/net/netlabel/netlabel_kapi.c index 28cddc85b..bfa2b6d5b 100644 --- a/kernel/net/netlabel/netlabel_kapi.c +++ b/kernel/net/netlabel/netlabel_kapi.c @@ -824,7 +824,11 @@ socket_setattr_return: */ void netlbl_sock_delattr(struct sock *sk) { - cipso_v4_sock_delattr(sk); + switch (sk->sk_family) { + case AF_INET: + cipso_v4_sock_delattr(sk); + break; + } } /** @@ -987,7 +991,11 @@ req_setattr_return: */ void netlbl_req_delattr(struct request_sock *req) { - cipso_v4_req_delattr(req); + switch (req->rsk_ops->family) { + case AF_INET: + cipso_v4_req_delattr(req); + break; + } } /** diff --git a/kernel/net/netlink/af_netlink.c b/kernel/net/netlink/af_netlink.c index 59651af8c..360700a2f 100644 --- a/kernel/net/netlink/af_netlink.c +++ b/kernel/net/netlink/af_netlink.c @@ -931,7 +931,6 @@ static void netlink_sock_destruct(struct sock *sk) if (nlk->cb_running) { if (nlk->cb.done) nlk->cb.done(&nlk->cb); - module_put(nlk->cb.module); kfree_skb(nlk->cb.skb); } @@ -960,6 +959,14 @@ static void netlink_sock_destruct(struct sock *sk) WARN_ON(nlk_sk(sk)->groups); } +static void netlink_sock_destruct_work(struct work_struct *work) +{ + struct netlink_sock *nlk = container_of(work, struct netlink_sock, + work); + + sk_free(&nlk->sk); +} + /* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on * SMP. Look, when several writers sleep and reader wakes them up, all but one * immediately hit write lock and grab all the cpus. Exclusive sleep solves @@ -1265,8 +1272,18 @@ out_module: static void deferred_put_nlk_sk(struct rcu_head *head) { struct netlink_sock *nlk = container_of(head, struct netlink_sock, rcu); + struct sock *sk = &nlk->sk; + + if (!atomic_dec_and_test(&sk->sk_refcnt)) + return; + + if (nlk->cb_running && nlk->cb.done) { + INIT_WORK(&nlk->work, netlink_sock_destruct_work); + schedule_work(&nlk->work); + return; + } - sock_put(&nlk->sk); + sk_free(sk); } static int netlink_release(struct socket *sock) @@ -1305,7 +1322,7 @@ static int netlink_release(struct socket *sock) skb_queue_purge(&sk->sk_write_queue); - if (nlk->portid) { + if (nlk->portid && nlk->bound) { struct netlink_notify n = { .net = sock_net(sk), .protocol = sk->sk_protocol, @@ -2557,7 +2574,7 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, /* Record the max length of recvmsg() calls for future allocations */ nlk->max_recvmsg_len = max(nlk->max_recvmsg_len, len); nlk->max_recvmsg_len = min_t(size_t, nlk->max_recvmsg_len, - 16384); + SKB_WITH_OVERHEAD(32768)); copied = data_skb->len; if (len < copied) { @@ -2784,6 +2801,7 @@ static int netlink_dump(struct sock *sk) struct netlink_callback *cb; struct sk_buff *skb = NULL; struct nlmsghdr *nlh; + struct module *module; int len, err = -ENOBUFS; int alloc_min_size; int alloc_size; @@ -2809,14 +2827,13 @@ static int netlink_dump(struct sock *sk) if (alloc_min_size < nlk->max_recvmsg_len) { alloc_size = nlk->max_recvmsg_len; skb = netlink_alloc_skb(sk, alloc_size, nlk->portid, - GFP_KERNEL | - __GFP_NOWARN | - __GFP_NORETRY); + (GFP_KERNEL & ~__GFP_DIRECT_RECLAIM) | + __GFP_NOWARN | __GFP_NORETRY); } if (!skb) { alloc_size = alloc_min_size; skb = netlink_alloc_skb(sk, alloc_size, nlk->portid, - GFP_KERNEL); + (GFP_KERNEL & ~__GFP_DIRECT_RECLAIM)); } if (!skb) goto errout_skb; @@ -2863,9 +2880,11 @@ static int netlink_dump(struct sock *sk) cb->done(cb); nlk->cb_running = false; + module = cb->module; + skb = cb->skb; mutex_unlock(nlk->cb_mutex); - module_put(cb->module); - consume_skb(cb->skb); + module_put(module); + consume_skb(skb); return 0; errout_skb: diff --git a/kernel/net/netlink/af_netlink.h b/kernel/net/netlink/af_netlink.h index 14437d9b1..df32cb92d 100644 --- a/kernel/net/netlink/af_netlink.h +++ b/kernel/net/netlink/af_netlink.h @@ -3,6 +3,7 @@ #include #include +#include #include #define NLGRPSZ(x) (ALIGN(x, sizeof(unsigned long) * 8) / 8) @@ -53,6 +54,7 @@ struct netlink_sock { struct rhash_head node; struct rcu_head rcu; + struct work_struct work; }; static inline struct netlink_sock *nlk_sk(struct sock *sk) diff --git a/kernel/net/openvswitch/actions.c b/kernel/net/openvswitch/actions.c index c88d0f2d3..7cb8184ac 100644 --- a/kernel/net/openvswitch/actions.c +++ b/kernel/net/openvswitch/actions.c @@ -158,9 +158,7 @@ static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key, new_mpls_lse = (__be32 *)skb_mpls_header(skb); *new_mpls_lse = mpls->mpls_lse; - if (skb->ip_summed == CHECKSUM_COMPLETE) - skb->csum = csum_add(skb->csum, csum_partial(new_mpls_lse, - MPLS_HLEN, 0)); + skb_postpush_rcsum(skb, new_mpls_lse, MPLS_HLEN); hdr = eth_hdr(skb); hdr->h_proto = mpls->mpls_ethertype; @@ -280,7 +278,7 @@ static int set_eth_addr(struct sk_buff *skb, struct sw_flow_key *flow_key, ether_addr_copy_masked(eth_hdr(skb)->h_dest, key->eth_dst, mask->eth_dst); - ovs_skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2); + skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2); ether_addr_copy(flow_key->eth.src, eth_hdr(skb)->h_source); ether_addr_copy(flow_key->eth.dst, eth_hdr(skb)->h_dest); @@ -463,7 +461,7 @@ static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key, mask_ipv6_addr(saddr, key->ipv6_src, mask->ipv6_src, masked); if (unlikely(memcmp(saddr, masked, sizeof(masked)))) { - set_ipv6_addr(skb, key->ipv6_proto, saddr, masked, + set_ipv6_addr(skb, flow_key->ip.proto, saddr, masked, true); memcpy(&flow_key->ipv6.addr.src, masked, sizeof(flow_key->ipv6.addr.src)); @@ -485,7 +483,7 @@ static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key, NULL, &flags) != NEXTHDR_ROUTING); - set_ipv6_addr(skb, key->ipv6_proto, daddr, masked, + set_ipv6_addr(skb, flow_key->ip.proto, daddr, masked, recalc_csum); memcpy(&flow_key->ipv6.addr.dst, masked, sizeof(flow_key->ipv6.addr.dst)); @@ -639,7 +637,7 @@ static int ovs_vport_output(struct net *net, struct sock *sk, struct sk_buff *sk /* Reconstruct the MAC header. */ skb_push(skb, data->l2_len); memcpy(skb->data, &data->l2_data, data->l2_len); - ovs_skb_postpush_rcsum(skb, skb->data, data->l2_len); + skb_postpush_rcsum(skb, skb->data, data->l2_len); skb_reset_mac_header(skb); ovs_vport_send(vport, skb); diff --git a/kernel/net/openvswitch/conntrack.c b/kernel/net/openvswitch/conntrack.c index e004067ec..ad58d2a62 100644 --- a/kernel/net/openvswitch/conntrack.c +++ b/kernel/net/openvswitch/conntrack.c @@ -501,7 +501,7 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb, /* The conntrack module expects to be working at L3. */ nh_ofs = skb_network_offset(skb); - skb_pull(skb, nh_ofs); + skb_pull_rcsum(skb, nh_ofs); if (key->ip.frag != OVS_FRAG_TYPE_NONE) { err = handle_fragments(net, key, info->zone.id, skb); @@ -527,6 +527,7 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb, &info->labels.mask); err: skb_push(skb, nh_ofs); + skb_postpush_rcsum(skb, skb->data, nh_ofs); if (err) kfree_skb(skb); return err; diff --git a/kernel/net/openvswitch/vport-netdev.c b/kernel/net/openvswitch/vport-netdev.c index 6b0190b98..76fcaf1fd 100644 --- a/kernel/net/openvswitch/vport-netdev.c +++ b/kernel/net/openvswitch/vport-netdev.c @@ -58,7 +58,7 @@ static void netdev_port_receive(struct sk_buff *skb) return; skb_push(skb, ETH_HLEN); - ovs_skb_postpush_rcsum(skb, skb->data, ETH_HLEN); + skb_postpush_rcsum(skb, skb->data, ETH_HLEN); ovs_vport_receive(vport, skb, skb_tunnel_info(skb)); return; error: diff --git a/kernel/net/openvswitch/vport-vxlan.c b/kernel/net/openvswitch/vport-vxlan.c index d933cb89e..5eb769434 100644 --- a/kernel/net/openvswitch/vport-vxlan.c +++ b/kernel/net/openvswitch/vport-vxlan.c @@ -91,6 +91,8 @@ static struct vport *vxlan_tnl_create(const struct vport_parms *parms) struct vxlan_config conf = { .no_share = true, .flags = VXLAN_F_COLLECT_METADATA | VXLAN_F_UDP_ZERO_CSUM6_RX, + /* Don't restrict the packets that can be sent by MTU */ + .mtu = IP_MAX_MTU, }; if (!options) { diff --git a/kernel/net/openvswitch/vport.h b/kernel/net/openvswitch/vport.h index 8ea3a9698..6e2b62f9d 100644 --- a/kernel/net/openvswitch/vport.h +++ b/kernel/net/openvswitch/vport.h @@ -184,13 +184,6 @@ static inline struct vport *vport_from_priv(void *priv) int ovs_vport_receive(struct vport *, struct sk_buff *, const struct ip_tunnel_info *); -static inline void ovs_skb_postpush_rcsum(struct sk_buff *skb, - const void *start, unsigned int len) -{ - if (skb->ip_summed == CHECKSUM_COMPLETE) - skb->csum = csum_add(skb->csum, csum_partial(start, len, 0)); -} - static inline const char *ovs_vport_name(struct vport *vport) { return vport->dev->name; diff --git a/kernel/net/packet/af_packet.c b/kernel/net/packet/af_packet.c index 2c23bae0e..590449d00 100644 --- a/kernel/net/packet/af_packet.c +++ b/kernel/net/packet/af_packet.c @@ -250,7 +250,7 @@ static void __fanout_link(struct sock *sk, struct packet_sock *po); static int packet_direct_xmit(struct sk_buff *skb) { struct net_device *dev = skb->dev; - netdev_features_t features; + struct sk_buff *orig_skb = skb; struct netdev_queue *txq; int ret = NETDEV_TX_BUSY; @@ -258,9 +258,8 @@ static int packet_direct_xmit(struct sk_buff *skb) !netif_carrier_ok(dev))) goto drop; - features = netif_skb_features(skb); - if (skb_needs_linearize(skb, features) && - __skb_linearize(skb)) + skb = validate_xmit_skb_list(skb, dev); + if (skb != orig_skb) goto drop; txq = skb_get_tx_queue(dev, skb); @@ -280,7 +279,7 @@ static int packet_direct_xmit(struct sk_buff *skb) return ret; drop: atomic_long_inc(&dev->tx_dropped); - kfree_skb(skb); + kfree_skb_list(skb); return NET_XMIT_DROP; } @@ -1342,7 +1341,7 @@ static unsigned int fanout_demux_hash(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) { - return reciprocal_scale(skb_get_hash(skb), num); + return reciprocal_scale(__skb_get_hash_symmetric(skb), num); } static unsigned int fanout_demux_lb(struct packet_fanout *f, @@ -1917,6 +1916,10 @@ retry: goto retry; } + if (!dev_validate_header(dev, skb->data, len)) { + err = -EINVAL; + goto out_unlock; + } if (len > (dev->mtu + dev->hard_header_len + extra_len) && !packet_extra_vlan_len_allowed(dev, skb)) { err = -EMSGSIZE; @@ -2327,18 +2330,6 @@ static void tpacket_destruct_skb(struct sk_buff *skb) sock_wfree(skb); } -static bool ll_header_truncated(const struct net_device *dev, int len) -{ - /* net device doesn't like empty head */ - if (unlikely(len < dev->hard_header_len)) { - net_warn_ratelimited("%s: packet size is too short (%d < %d)\n", - current->comm, len, dev->hard_header_len); - return true; - } - - return false; -} - static void tpacket_set_protocol(const struct net_device *dev, struct sk_buff *skb) { @@ -2421,19 +2412,19 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, if (unlikely(err < 0)) return -EINVAL; } else if (dev->hard_header_len) { - if (ll_header_truncated(dev, tp_len)) - return -EINVAL; + int hdrlen = min_t(int, dev->hard_header_len, tp_len); skb_push(skb, dev->hard_header_len); - err = skb_store_bits(skb, 0, data, - dev->hard_header_len); + err = skb_store_bits(skb, 0, data, hdrlen); if (unlikely(err)) return err; + if (!dev_validate_header(dev, skb->data, hdrlen)) + return -EINVAL; if (!skb->protocol) tpacket_set_protocol(dev, skb); - data += dev->hard_header_len; - to_write -= dev->hard_header_len; + data += hdrlen; + to_write -= hdrlen; } offset = offset_in_page(data); @@ -2647,7 +2638,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) int vnet_hdr_len; struct packet_sock *po = pkt_sk(sk); unsigned short gso_type = 0; - int hlen, tlen; + int hlen, tlen, linear; int extra_len = 0; ssize_t n; @@ -2751,8 +2742,9 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) err = -ENOBUFS; hlen = LL_RESERVED_SPACE(dev); tlen = dev->needed_tailroom; - skb = packet_alloc_skb(sk, hlen + tlen, hlen, len, - __virtio16_to_cpu(vio_le(), vnet_hdr.hdr_len), + linear = __virtio16_to_cpu(vio_le(), vnet_hdr.hdr_len); + linear = max(linear, min_t(int, len, dev->hard_header_len)); + skb = packet_alloc_skb(sk, hlen + tlen, hlen, len, linear, msg->msg_flags & MSG_DONTWAIT, &err); if (skb == NULL) goto out_unlock; @@ -2764,9 +2756,6 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) offset = dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len); if (unlikely(offset < 0)) goto out_free; - } else { - if (ll_header_truncated(dev, len)) - goto out_free; } /* Returns -EFAULT on error */ @@ -2774,6 +2763,12 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) if (err) goto out_free; + if (sock->type == SOCK_RAW && + !dev_validate_header(dev, skb->data, len)) { + err = -EINVAL; + goto out_free; + } + sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); if (!gso_type && (len > dev->mtu + reserve + extra_len) && @@ -3442,6 +3437,7 @@ static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq) i->ifindex = mreq->mr_ifindex; i->alen = mreq->mr_alen; memcpy(i->addr, mreq->mr_address, i->alen); + memset(i->addr + i->alen, 0, sizeof(i->addr) - i->alen); i->count = 1; i->next = po->mclist; po->mclist = i; @@ -3578,19 +3574,25 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv if (optlen != sizeof(val)) return -EINVAL; - if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) - return -EBUSY; if (copy_from_user(&val, optval, sizeof(val))) return -EFAULT; switch (val) { case TPACKET_V1: case TPACKET_V2: case TPACKET_V3: - po->tp_version = val; - return 0; + break; default: return -EINVAL; } + lock_sock(sk); + if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) { + ret = -EBUSY; + } else { + po->tp_version = val; + ret = 0; + } + release_sock(sk); + return ret; } case PACKET_RESERVE: { @@ -3860,6 +3862,7 @@ static int packet_notifier(struct notifier_block *this, } if (msg == NETDEV_UNREGISTER) { packet_cached_dev_reset(po); + fanout_release(sk); po->ifindex = -1; if (po->prot_hook.dev) dev_put(po->prot_hook.dev); @@ -4072,6 +4075,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, /* Added to avoid minimal code churn */ struct tpacket_req *req = &req_u->req; + lock_sock(sk); /* Opening a Tx-ring is NOT supported in TPACKET_V3 */ if (!closing && tx_ring && (po->tp_version > TPACKET_V2)) { WARN(1, "Tx-ring is not supported.\n"); @@ -4153,7 +4157,6 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, goto out; } - lock_sock(sk); /* Detach socket from network */ spin_lock(&po->bind_lock); @@ -4202,11 +4205,11 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, if (!tx_ring) prb_shutdown_retire_blk_timer(po, rb_queue); } - release_sock(sk); if (pg_vec) free_pg_vec(pg_vec, order, req->tp_block_nr); out: + release_sock(sk); return err; } diff --git a/kernel/net/rds/recv.c b/kernel/net/rds/recv.c index a00462b0d..0514af3ab 100644 --- a/kernel/net/rds/recv.c +++ b/kernel/net/rds/recv.c @@ -545,5 +545,7 @@ void rds_inc_info_copy(struct rds_incoming *inc, minfo.fport = inc->i_hdr.h_dport; } + minfo.flags = 0; + rds_info_copy(iter, &minfo, sizeof(minfo)); } diff --git a/kernel/net/rds/tcp.c b/kernel/net/rds/tcp.c index 9d6ddbacd..18e50a8fc 100644 --- a/kernel/net/rds/tcp.c +++ b/kernel/net/rds/tcp.c @@ -421,7 +421,7 @@ static int rds_tcp_init(void) ret = rds_tcp_recv_init(); if (ret) - goto out_slab; + goto out_pernet; ret = rds_trans_register(&rds_tcp_transport); if (ret) @@ -433,8 +433,9 @@ static int rds_tcp_init(void) out_recv: rds_tcp_recv_exit(); -out_slab: +out_pernet: unregister_pernet_subsys(&rds_tcp_net_ops); +out_slab: kmem_cache_destroy(rds_tcp_conn_slab); out: return ret; diff --git a/kernel/net/sched/act_csum.c b/kernel/net/sched/act_csum.c index b07c535ba..eeb3eb3ea 100644 --- a/kernel/net/sched/act_csum.c +++ b/kernel/net/sched/act_csum.c @@ -105,9 +105,7 @@ static void *tcf_csum_skb_nextlayer(struct sk_buff *skb, int hl = ihl + jhl; if (!pskb_may_pull(skb, ipl + ntkoff) || (ipl < hl) || - (skb_cloned(skb) && - !skb_clone_writable(skb, hl + ntkoff) && - pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) + skb_try_make_writable(skb, hl + ntkoff)) return NULL; else return (void *)(skb_network_header(skb) + ihl); @@ -365,9 +363,7 @@ static int tcf_csum_ipv4(struct sk_buff *skb, u32 update_flags) } if (update_flags & TCA_CSUM_UPDATE_FLAG_IPV4HDR) { - if (skb_cloned(skb) && - !skb_clone_writable(skb, sizeof(*iph) + ntkoff) && - pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) + if (skb_try_make_writable(skb, sizeof(*iph) + ntkoff)) goto fail; ip_send_check(ip_hdr(skb)); diff --git a/kernel/net/sched/act_mirred.c b/kernel/net/sched/act_mirred.c index 32fcdecdb..e384d6aef 100644 --- a/kernel/net/sched/act_mirred.c +++ b/kernel/net/sched/act_mirred.c @@ -170,7 +170,7 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a, if (!(at & AT_EGRESS)) { if (m->tcfm_ok_push) - skb_push(skb2, skb->mac_len); + skb_push_rcsum(skb2, skb->mac_len); } /* mirror is always swallowed */ diff --git a/kernel/net/sched/act_nat.c b/kernel/net/sched/act_nat.c index b7c4ead8b..27607b863 100644 --- a/kernel/net/sched/act_nat.c +++ b/kernel/net/sched/act_nat.c @@ -126,9 +126,7 @@ static int tcf_nat(struct sk_buff *skb, const struct tc_action *a, addr = iph->daddr; if (!((old_addr ^ addr) & mask)) { - if (skb_cloned(skb) && - !skb_clone_writable(skb, sizeof(*iph) + noff) && - pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) + if (skb_try_make_writable(skb, sizeof(*iph) + noff)) goto drop; new_addr &= mask; @@ -156,9 +154,7 @@ static int tcf_nat(struct sk_buff *skb, const struct tc_action *a, struct tcphdr *tcph; if (!pskb_may_pull(skb, ihl + sizeof(*tcph) + noff) || - (skb_cloned(skb) && - !skb_clone_writable(skb, ihl + sizeof(*tcph) + noff) && - pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) + skb_try_make_writable(skb, ihl + sizeof(*tcph) + noff)) goto drop; tcph = (void *)(skb_network_header(skb) + ihl); @@ -171,9 +167,7 @@ static int tcf_nat(struct sk_buff *skb, const struct tc_action *a, struct udphdr *udph; if (!pskb_may_pull(skb, ihl + sizeof(*udph) + noff) || - (skb_cloned(skb) && - !skb_clone_writable(skb, ihl + sizeof(*udph) + noff) && - pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) + skb_try_make_writable(skb, ihl + sizeof(*udph) + noff)) goto drop; udph = (void *)(skb_network_header(skb) + ihl); @@ -213,10 +207,8 @@ static int tcf_nat(struct sk_buff *skb, const struct tc_action *a, if ((old_addr ^ addr) & mask) break; - if (skb_cloned(skb) && - !skb_clone_writable(skb, ihl + sizeof(*icmph) + - sizeof(*iph) + noff) && - pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) + if (skb_try_make_writable(skb, ihl + sizeof(*icmph) + + sizeof(*iph) + noff)) goto drop; icmph = (void *)(skb_network_header(skb) + ihl); diff --git a/kernel/net/sched/act_pedit.c b/kernel/net/sched/act_pedit.c index e38a7701f..c3434e902 100644 --- a/kernel/net/sched/act_pedit.c +++ b/kernel/net/sched/act_pedit.c @@ -104,6 +104,17 @@ static void tcf_pedit_cleanup(struct tc_action *a, int bind) kfree(keys); } +static bool offset_valid(struct sk_buff *skb, int offset) +{ + if (offset > 0 && offset > skb->len) + return false; + + if (offset < 0 && -offset > skb_headroom(skb)) + return false; + + return true; +} + static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { @@ -130,6 +141,11 @@ static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a, if (tkey->offmask) { char *d, _d; + if (!offset_valid(skb, off + tkey->at)) { + pr_info("tc filter pedit 'at' offset %d out of bounds\n", + off + tkey->at); + goto bad; + } d = skb_header_pointer(skb, off + tkey->at, 1, &_d); if (!d) @@ -142,10 +158,10 @@ static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a, " offset must be on 32 bit boundaries\n"); goto bad; } - if (offset > 0 && offset > skb->len) { - pr_info("tc filter pedit" - " offset %d can't exceed pkt length %d\n", - offset, skb->len); + + if (!offset_valid(skb, off + offset)) { + pr_info("tc filter pedit offset %d out of bounds\n", + offset); goto bad; } diff --git a/kernel/net/sched/act_vlan.c b/kernel/net/sched/act_vlan.c index 796785e0b..d7edba453 100644 --- a/kernel/net/sched/act_vlan.c +++ b/kernel/net/sched/act_vlan.c @@ -33,6 +33,12 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a, bstats_update(&v->tcf_bstats, skb); action = v->tcf_action; + /* Ensure 'data' points at mac_header prior calling vlan manipulating + * functions. + */ + if (skb_at_tc_ingress(skb)) + skb_push_rcsum(skb, skb->mac_len); + switch (v->tcfv_action) { case TCA_VLAN_ACT_POP: err = skb_vlan_pop(skb); @@ -54,6 +60,9 @@ drop: action = TC_ACT_SHOT; v->tcf_qstats.drops++; unlock: + if (skb_at_tc_ingress(skb)) + skb_pull_rcsum(skb, skb->mac_len); + spin_unlock(&v->tcf_lock); return action; } diff --git a/kernel/net/sched/cls_api.c b/kernel/net/sched/cls_api.c index a75864d93..20b2f867c 100644 --- a/kernel/net/sched/cls_api.c +++ b/kernel/net/sched/cls_api.c @@ -137,13 +137,15 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n) unsigned long cl; unsigned long fh; int err; - int tp_created = 0; + int tp_created; if ((n->nlmsg_type != RTM_GETTFILTER) && !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) return -EPERM; replay: + tp_created = 0; + err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL); if (err < 0) return err; @@ -315,7 +317,8 @@ replay: if (err == 0) { struct tcf_proto *next = rtnl_dereference(tp->next); - tfilter_notify(net, skb, n, tp, fh, RTM_DELTFILTER); + tfilter_notify(net, skb, n, tp, + t->tcm_handle, RTM_DELTFILTER); if (tcf_destroy(tp, false)) RCU_INIT_POINTER(*back, next); } diff --git a/kernel/net/sched/cls_basic.c b/kernel/net/sched/cls_basic.c index 0b8c3ace6..1bf1f4517 100644 --- a/kernel/net/sched/cls_basic.c +++ b/kernel/net/sched/cls_basic.c @@ -62,9 +62,6 @@ static unsigned long basic_get(struct tcf_proto *tp, u32 handle) struct basic_head *head = rtnl_dereference(tp->root); struct basic_filter *f; - if (head == NULL) - return 0UL; - list_for_each_entry(f, &head->flist, link) { if (f->handle == handle) { l = (unsigned long) f; @@ -109,7 +106,6 @@ static bool basic_destroy(struct tcf_proto *tp, bool force) tcf_unbind_filter(tp, &f->res); call_rcu(&f->rcu, basic_delete_filter); } - RCU_INIT_POINTER(tp->root, NULL); kfree_rcu(head, rcu); return true; } diff --git a/kernel/net/sched/cls_bpf.c b/kernel/net/sched/cls_bpf.c index 5faaa5425..3eef0215e 100644 --- a/kernel/net/sched/cls_bpf.c +++ b/kernel/net/sched/cls_bpf.c @@ -199,7 +199,6 @@ static bool cls_bpf_destroy(struct tcf_proto *tp, bool force) call_rcu(&prog->rcu, __cls_bpf_delete_prog); } - RCU_INIT_POINTER(tp->root, NULL); kfree_rcu(head, rcu); return true; } @@ -210,9 +209,6 @@ static unsigned long cls_bpf_get(struct tcf_proto *tp, u32 handle) struct cls_bpf_prog *prog; unsigned long ret = 0UL; - if (head == NULL) - return 0UL; - list_for_each_entry(prog, &head->plist, link) { if (prog->handle == handle) { ret = (unsigned long) prog; diff --git a/kernel/net/sched/cls_cgroup.c b/kernel/net/sched/cls_cgroup.c index 4c85bd3a7..c104c2019 100644 --- a/kernel/net/sched/cls_cgroup.c +++ b/kernel/net/sched/cls_cgroup.c @@ -130,11 +130,10 @@ static bool cls_cgroup_destroy(struct tcf_proto *tp, bool force) if (!force) return false; - - if (head) { - RCU_INIT_POINTER(tp->root, NULL); + /* Head can still be NULL due to cls_cgroup_init(). */ + if (head) call_rcu(&head->rcu, cls_cgroup_destroy_rcu); - } + return true; } diff --git a/kernel/net/sched/cls_flow.c b/kernel/net/sched/cls_flow.c index fbfec6a18..d7ba2b4ff 100644 --- a/kernel/net/sched/cls_flow.c +++ b/kernel/net/sched/cls_flow.c @@ -583,7 +583,6 @@ static bool flow_destroy(struct tcf_proto *tp, bool force) list_del_rcu(&f->list); call_rcu(&f->rcu, flow_destroy_filter); } - RCU_INIT_POINTER(tp->root, NULL); kfree_rcu(head, rcu); return true; } diff --git a/kernel/net/sched/cls_flower.c b/kernel/net/sched/cls_flower.c index 95b021243..e5a58c827 100644 --- a/kernel/net/sched/cls_flower.c +++ b/kernel/net/sched/cls_flower.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -55,7 +56,10 @@ struct cls_fl_head { bool mask_assigned; struct list_head filters; struct rhashtable_params ht_params; - struct rcu_head rcu; + union { + struct work_struct work; + struct rcu_head rcu; + }; }; struct cls_fl_filter { @@ -165,6 +169,24 @@ static void fl_destroy_filter(struct rcu_head *head) kfree(f); } +static void fl_destroy_sleepable(struct work_struct *work) +{ + struct cls_fl_head *head = container_of(work, struct cls_fl_head, + work); + if (head->mask_assigned) + rhashtable_destroy(&head->ht); + kfree(head); + module_put(THIS_MODULE); +} + +static void fl_destroy_rcu(struct rcu_head *rcu) +{ + struct cls_fl_head *head = container_of(rcu, struct cls_fl_head, rcu); + + INIT_WORK(&head->work, fl_destroy_sleepable); + schedule_work(&head->work); +} + static bool fl_destroy(struct tcf_proto *tp, bool force) { struct cls_fl_head *head = rtnl_dereference(tp->root); @@ -177,10 +199,9 @@ static bool fl_destroy(struct tcf_proto *tp, bool force) list_del_rcu(&f->list); call_rcu(&f->rcu, fl_destroy_filter); } - RCU_INIT_POINTER(tp->root, NULL); - if (head->mask_assigned) - rhashtable_destroy(&head->ht); - kfree_rcu(head, rcu); + + __module_get(THIS_MODULE); + call_rcu(&head->rcu, fl_destroy_rcu); return true; } diff --git a/kernel/net/sched/cls_rsvp.h b/kernel/net/sched/cls_rsvp.h index f9c9fc075..9992dfac6 100644 --- a/kernel/net/sched/cls_rsvp.h +++ b/kernel/net/sched/cls_rsvp.h @@ -152,7 +152,8 @@ static int rsvp_classify(struct sk_buff *skb, const struct tcf_proto *tp, return -1; nhptr = ip_hdr(skb); #endif - + if (unlikely(!head)) + return -1; restart: #if RSVP_DST_LEN == 4 diff --git a/kernel/net/sched/cls_tcindex.c b/kernel/net/sched/cls_tcindex.c index 944c8ff45..403746b20 100644 --- a/kernel/net/sched/cls_tcindex.c +++ b/kernel/net/sched/cls_tcindex.c @@ -503,7 +503,6 @@ static bool tcindex_destroy(struct tcf_proto *tp, bool force) walker.fn = tcindex_destroy_element; tcindex_walk(tp, &walker); - RCU_INIT_POINTER(tp->root, NULL); call_rcu(&p->rcu, __tcindex_destroy); return true; } diff --git a/kernel/net/sched/sch_api.c b/kernel/net/sched/sch_api.c index af1acf009..95b560f0b 100644 --- a/kernel/net/sched/sch_api.c +++ b/kernel/net/sched/sch_api.c @@ -744,14 +744,15 @@ static u32 qdisc_alloc_handle(struct net_device *dev) return 0; } -void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n) +void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n, + unsigned int len) { const struct Qdisc_class_ops *cops; unsigned long cl; u32 parentid; int drops; - if (n == 0) + if (n == 0 && len == 0) return; drops = max_t(int, n, 0); rcu_read_lock(); @@ -774,11 +775,12 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n) cops->put(sch, cl); } sch->q.qlen -= n; + sch->qstats.backlog -= len; __qdisc_qstats_drop(sch, drops); } rcu_read_unlock(); } -EXPORT_SYMBOL(qdisc_tree_decrease_qlen); +EXPORT_SYMBOL(qdisc_tree_reduce_backlog); static void notify_and_destroy(struct net *net, struct sk_buff *skb, struct nlmsghdr *n, u32 clid, diff --git a/kernel/net/sched/sch_cbq.c b/kernel/net/sched/sch_cbq.c index c538d9e4a..baafddf22 100644 --- a/kernel/net/sched/sch_cbq.c +++ b/kernel/net/sched/sch_cbq.c @@ -1624,13 +1624,8 @@ static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, new->reshape_fail = cbq_reshape_fail; #endif } - sch_tree_lock(sch); - *old = cl->q; - cl->q = new; - qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); - qdisc_reset(*old); - sch_tree_unlock(sch); + *old = qdisc_replace(sch, new, &cl->q); return 0; } @@ -1914,7 +1909,7 @@ static int cbq_delete(struct Qdisc *sch, unsigned long arg) { struct cbq_sched_data *q = qdisc_priv(sch); struct cbq_class *cl = (struct cbq_class *)arg; - unsigned int qlen; + unsigned int qlen, backlog; if (cl->filters || cl->children || cl == &q->link) return -EBUSY; @@ -1922,8 +1917,9 @@ static int cbq_delete(struct Qdisc *sch, unsigned long arg) sch_tree_lock(sch); qlen = cl->q->q.qlen; + backlog = cl->q->qstats.backlog; qdisc_reset(cl->q); - qdisc_tree_decrease_qlen(cl->q, qlen); + qdisc_tree_reduce_backlog(cl->q, qlen, backlog); if (cl->next_alive) cbq_deactivate_class(cl); diff --git a/kernel/net/sched/sch_choke.c b/kernel/net/sched/sch_choke.c index 5ffb8b833..0a08c860e 100644 --- a/kernel/net/sched/sch_choke.c +++ b/kernel/net/sched/sch_choke.c @@ -128,8 +128,8 @@ static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx) choke_zap_tail_holes(q); qdisc_qstats_backlog_dec(sch, skb); + qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(skb)); qdisc_drop(skb, sch); - qdisc_tree_decrease_qlen(sch, 1); --sch->q.qlen; } @@ -456,6 +456,7 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt) old = q->tab; if (old) { unsigned int oqlen = sch->q.qlen, tail = 0; + unsigned dropped = 0; while (q->head != q->tail) { struct sk_buff *skb = q->tab[q->head]; @@ -467,11 +468,12 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt) ntab[tail++] = skb; continue; } + dropped += qdisc_pkt_len(skb); qdisc_qstats_backlog_dec(sch, skb); --sch->q.qlen; qdisc_drop(skb, sch); } - qdisc_tree_decrease_qlen(sch, oqlen - sch->q.qlen); + qdisc_tree_reduce_backlog(sch, oqlen - sch->q.qlen, dropped); q->head = 0; q->tail = tail; } diff --git a/kernel/net/sched/sch_codel.c b/kernel/net/sched/sch_codel.c index 535007d5f..9b7e2980e 100644 --- a/kernel/net/sched/sch_codel.c +++ b/kernel/net/sched/sch_codel.c @@ -79,12 +79,13 @@ static struct sk_buff *codel_qdisc_dequeue(struct Qdisc *sch) skb = codel_dequeue(sch, &q->params, &q->vars, &q->stats, dequeue); - /* We cant call qdisc_tree_decrease_qlen() if our qlen is 0, + /* We cant call qdisc_tree_reduce_backlog() if our qlen is 0, * or HTB crashes. Defer it for next round. */ if (q->stats.drop_count && sch->q.qlen) { - qdisc_tree_decrease_qlen(sch, q->stats.drop_count); + qdisc_tree_reduce_backlog(sch, q->stats.drop_count, q->stats.drop_len); q->stats.drop_count = 0; + q->stats.drop_len = 0; } if (skb) qdisc_bstats_update(sch, skb); @@ -116,7 +117,7 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt) { struct codel_sched_data *q = qdisc_priv(sch); struct nlattr *tb[TCA_CODEL_MAX + 1]; - unsigned int qlen; + unsigned int qlen, dropped = 0; int err; if (!opt) @@ -156,10 +157,11 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt) while (sch->q.qlen > sch->limit) { struct sk_buff *skb = __skb_dequeue(&sch->q); + dropped += qdisc_pkt_len(skb); qdisc_qstats_backlog_dec(sch, skb); qdisc_drop(skb, sch); } - qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen); + qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped); sch_tree_unlock(sch); return 0; diff --git a/kernel/net/sched/sch_drr.c b/kernel/net/sched/sch_drr.c index f26bdea87..d6e3ad43c 100644 --- a/kernel/net/sched/sch_drr.c +++ b/kernel/net/sched/sch_drr.c @@ -53,9 +53,10 @@ static struct drr_class *drr_find_class(struct Qdisc *sch, u32 classid) static void drr_purge_queue(struct drr_class *cl) { unsigned int len = cl->qdisc->q.qlen; + unsigned int backlog = cl->qdisc->qstats.backlog; qdisc_reset(cl->qdisc); - qdisc_tree_decrease_qlen(cl->qdisc, len); + qdisc_tree_reduce_backlog(cl->qdisc, len, backlog); } static const struct nla_policy drr_policy[TCA_DRR_MAX + 1] = { @@ -226,11 +227,7 @@ static int drr_graft_class(struct Qdisc *sch, unsigned long arg, new = &noop_qdisc; } - sch_tree_lock(sch); - drr_purge_queue(cl); - *old = cl->qdisc; - cl->qdisc = new; - sch_tree_unlock(sch); + *old = qdisc_replace(sch, new, &cl->qdisc); return 0; } diff --git a/kernel/net/sched/sch_dsmark.c b/kernel/net/sched/sch_dsmark.c index f357f34d0..d0dff0cd8 100644 --- a/kernel/net/sched/sch_dsmark.c +++ b/kernel/net/sched/sch_dsmark.c @@ -73,13 +73,7 @@ static int dsmark_graft(struct Qdisc *sch, unsigned long arg, new = &noop_qdisc; } - sch_tree_lock(sch); - *old = p->q; - p->q = new; - qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); - qdisc_reset(*old); - sch_tree_unlock(sch); - + *old = qdisc_replace(sch, new, &p->q); return 0; } @@ -264,6 +258,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch) return err; } + qdisc_qstats_backlog_inc(sch, skb); sch->q.qlen++; return NET_XMIT_SUCCESS; @@ -286,6 +281,7 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch) return NULL; qdisc_bstats_update(sch, skb); + qdisc_qstats_backlog_dec(sch, skb); sch->q.qlen--; index = skb->tc_index & (p->indices - 1); @@ -401,6 +397,7 @@ static void dsmark_reset(struct Qdisc *sch) pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p); qdisc_reset(p->q); + sch->qstats.backlog = 0; sch->q.qlen = 0; } diff --git a/kernel/net/sched/sch_fifo.c b/kernel/net/sched/sch_fifo.c index 2177eac0a..2e4bd2c0a 100644 --- a/kernel/net/sched/sch_fifo.c +++ b/kernel/net/sched/sch_fifo.c @@ -37,14 +37,18 @@ static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch) static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch) { + unsigned int prev_backlog; + if (likely(skb_queue_len(&sch->q) < sch->limit)) return qdisc_enqueue_tail(skb, sch); + prev_backlog = sch->qstats.backlog; /* queue full, remove one skb to fulfill the limit */ __qdisc_queue_drop_head(sch, &sch->q); qdisc_qstats_drop(sch); qdisc_enqueue_tail(skb, sch); + qdisc_tree_reduce_backlog(sch, 0, prev_backlog - sch->qstats.backlog); return NET_XMIT_CN; } diff --git a/kernel/net/sched/sch_fq.c b/kernel/net/sched/sch_fq.c index 109b23227..3c6a47d66 100644 --- a/kernel/net/sched/sch_fq.c +++ b/kernel/net/sched/sch_fq.c @@ -662,6 +662,7 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt) struct fq_sched_data *q = qdisc_priv(sch); struct nlattr *tb[TCA_FQ_MAX + 1]; int err, drop_count = 0; + unsigned drop_len = 0; u32 fq_log; if (!opt) @@ -736,10 +737,11 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt) if (!skb) break; + drop_len += qdisc_pkt_len(skb); kfree_skb(skb); drop_count++; } - qdisc_tree_decrease_qlen(sch, drop_count); + qdisc_tree_reduce_backlog(sch, drop_count, drop_len); sch_tree_unlock(sch); return err; diff --git a/kernel/net/sched/sch_fq_codel.c b/kernel/net/sched/sch_fq_codel.c index 4c834e93d..d3fc8f9dd 100644 --- a/kernel/net/sched/sch_fq_codel.c +++ b/kernel/net/sched/sch_fq_codel.c @@ -175,7 +175,7 @@ static unsigned int fq_codel_qdisc_drop(struct Qdisc *sch) static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch) { struct fq_codel_sched_data *q = qdisc_priv(sch); - unsigned int idx; + unsigned int idx, prev_backlog; struct fq_codel_flow *flow; int uninitialized_var(ret); @@ -203,6 +203,7 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch) if (++sch->q.qlen <= sch->limit) return NET_XMIT_SUCCESS; + prev_backlog = sch->qstats.backlog; q->drop_overlimit++; /* Return Congestion Notification only if we dropped a packet * from this flow. @@ -211,7 +212,7 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch) return NET_XMIT_CN; /* As we dropped a packet, better let upper stack know this */ - qdisc_tree_decrease_qlen(sch, 1); + qdisc_tree_reduce_backlog(sch, 1, prev_backlog - sch->qstats.backlog); return NET_XMIT_SUCCESS; } @@ -241,6 +242,7 @@ static struct sk_buff *fq_codel_dequeue(struct Qdisc *sch) struct fq_codel_flow *flow; struct list_head *head; u32 prev_drop_count, prev_ecn_mark; + unsigned int prev_backlog; begin: head = &q->new_flows; @@ -259,6 +261,7 @@ begin: prev_drop_count = q->cstats.drop_count; prev_ecn_mark = q->cstats.ecn_mark; + prev_backlog = sch->qstats.backlog; skb = codel_dequeue(sch, &q->cparams, &flow->cvars, &q->cstats, dequeue); @@ -276,12 +279,14 @@ begin: } qdisc_bstats_update(sch, skb); flow->deficit -= qdisc_pkt_len(skb); - /* We cant call qdisc_tree_decrease_qlen() if our qlen is 0, + /* We cant call qdisc_tree_reduce_backlog() if our qlen is 0, * or HTB crashes. Defer it for next round. */ if (q->cstats.drop_count && sch->q.qlen) { - qdisc_tree_decrease_qlen(sch, q->cstats.drop_count); + qdisc_tree_reduce_backlog(sch, q->cstats.drop_count, + q->cstats.drop_len); q->cstats.drop_count = 0; + q->cstats.drop_len = 0; } return skb; } @@ -372,11 +377,13 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt) while (sch->q.qlen > sch->limit) { struct sk_buff *skb = fq_codel_dequeue(sch); + q->cstats.drop_len += qdisc_pkt_len(skb); kfree_skb(skb); q->cstats.drop_count++; } - qdisc_tree_decrease_qlen(sch, q->cstats.drop_count); + qdisc_tree_reduce_backlog(sch, q->cstats.drop_count, q->cstats.drop_len); q->cstats.drop_count = 0; + q->cstats.drop_len = 0; sch_tree_unlock(sch); return 0; diff --git a/kernel/net/sched/sch_generic.c b/kernel/net/sched/sch_generic.c index 47ef1b11b..00b81cab2 100644 --- a/kernel/net/sched/sch_generic.c +++ b/kernel/net/sched/sch_generic.c @@ -159,12 +159,15 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, if (validate) skb = validate_xmit_skb_list(skb, dev); - if (skb) { + if (likely(skb)) { HARD_TX_LOCK(dev, txq, smp_processor_id()); if (!netif_xmit_frozen_or_stopped(txq)) skb = dev_hard_start_xmit(skb, dev, txq, &ret); HARD_TX_UNLOCK(dev, txq); + } else { + spin_lock(root_lock); + return qdisc_qlen(q); } spin_lock(root_lock); diff --git a/kernel/net/sched/sch_hfsc.c b/kernel/net/sched/sch_hfsc.c index b7ebe2c87..d783d7cc3 100644 --- a/kernel/net/sched/sch_hfsc.c +++ b/kernel/net/sched/sch_hfsc.c @@ -895,9 +895,10 @@ static void hfsc_purge_queue(struct Qdisc *sch, struct hfsc_class *cl) { unsigned int len = cl->qdisc->q.qlen; + unsigned int backlog = cl->qdisc->qstats.backlog; qdisc_reset(cl->qdisc); - qdisc_tree_decrease_qlen(cl->qdisc, len); + qdisc_tree_reduce_backlog(cl->qdisc, len, backlog); } static void @@ -1215,11 +1216,7 @@ hfsc_graft_class(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, new = &noop_qdisc; } - sch_tree_lock(sch); - hfsc_purge_queue(sch, cl); - *old = cl->qdisc; - cl->qdisc = new; - sch_tree_unlock(sch); + *old = qdisc_replace(sch, new, &cl->qdisc); return 0; } diff --git a/kernel/net/sched/sch_hhf.c b/kernel/net/sched/sch_hhf.c index 86b04e31e..13d6f83ec 100644 --- a/kernel/net/sched/sch_hhf.c +++ b/kernel/net/sched/sch_hhf.c @@ -382,6 +382,7 @@ static int hhf_enqueue(struct sk_buff *skb, struct Qdisc *sch) struct hhf_sched_data *q = qdisc_priv(sch); enum wdrr_bucket_idx idx; struct wdrr_bucket *bucket; + unsigned int prev_backlog; idx = hhf_classify(skb, sch); @@ -409,6 +410,7 @@ static int hhf_enqueue(struct sk_buff *skb, struct Qdisc *sch) if (++sch->q.qlen <= sch->limit) return NET_XMIT_SUCCESS; + prev_backlog = sch->qstats.backlog; q->drop_overlimit++; /* Return Congestion Notification only if we dropped a packet from this * bucket. @@ -417,7 +419,7 @@ static int hhf_enqueue(struct sk_buff *skb, struct Qdisc *sch) return NET_XMIT_CN; /* As we dropped a packet, better let upper stack know this. */ - qdisc_tree_decrease_qlen(sch, 1); + qdisc_tree_reduce_backlog(sch, 1, prev_backlog - sch->qstats.backlog); return NET_XMIT_SUCCESS; } @@ -527,7 +529,7 @@ static int hhf_change(struct Qdisc *sch, struct nlattr *opt) { struct hhf_sched_data *q = qdisc_priv(sch); struct nlattr *tb[TCA_HHF_MAX + 1]; - unsigned int qlen; + unsigned int qlen, prev_backlog; int err; u64 non_hh_quantum; u32 new_quantum = q->quantum; @@ -577,12 +579,14 @@ static int hhf_change(struct Qdisc *sch, struct nlattr *opt) } qlen = sch->q.qlen; + prev_backlog = sch->qstats.backlog; while (sch->q.qlen > sch->limit) { struct sk_buff *skb = hhf_dequeue(sch); kfree_skb(skb); } - qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen); + qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, + prev_backlog - sch->qstats.backlog); sch_tree_unlock(sch); return 0; diff --git a/kernel/net/sched/sch_htb.c b/kernel/net/sched/sch_htb.c index 15ccd7f8f..87b02ed3d 100644 --- a/kernel/net/sched/sch_htb.c +++ b/kernel/net/sched/sch_htb.c @@ -600,6 +600,7 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch) htb_activate(q, cl); } + qdisc_qstats_backlog_inc(sch, skb); sch->q.qlen++; return NET_XMIT_SUCCESS; } @@ -889,6 +890,7 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch) ok: qdisc_bstats_update(sch, skb); qdisc_unthrottled(sch); + qdisc_qstats_backlog_dec(sch, skb); sch->q.qlen--; return skb; } @@ -955,6 +957,7 @@ static unsigned int htb_drop(struct Qdisc *sch) unsigned int len; if (cl->un.leaf.q->ops->drop && (len = cl->un.leaf.q->ops->drop(cl->un.leaf.q))) { + sch->qstats.backlog -= len; sch->q.qlen--; if (!cl->un.leaf.q->q.qlen) htb_deactivate(q, cl); @@ -984,12 +987,12 @@ static void htb_reset(struct Qdisc *sch) } cl->prio_activity = 0; cl->cmode = HTB_CAN_SEND; - } } qdisc_watchdog_cancel(&q->watchdog); __skb_queue_purge(&q->direct_queue); sch->q.qlen = 0; + sch->qstats.backlog = 0; memset(q->hlevel, 0, sizeof(q->hlevel)); memset(q->row_mask, 0, sizeof(q->row_mask)); for (i = 0; i < TC_HTB_NUMPRIO; i++) @@ -1163,14 +1166,7 @@ static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, cl->common.classid)) == NULL) return -ENOBUFS; - sch_tree_lock(sch); - *old = cl->un.leaf.q; - cl->un.leaf.q = new; - if (*old != NULL) { - qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); - qdisc_reset(*old); - } - sch_tree_unlock(sch); + *old = qdisc_replace(sch, new, &cl->un.leaf.q); return 0; } @@ -1272,7 +1268,6 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg) { struct htb_sched *q = qdisc_priv(sch); struct htb_class *cl = (struct htb_class *)arg; - unsigned int qlen; struct Qdisc *new_q = NULL; int last_child = 0; @@ -1292,9 +1287,11 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg) sch_tree_lock(sch); if (!cl->level) { - qlen = cl->un.leaf.q->q.qlen; + unsigned int qlen = cl->un.leaf.q->q.qlen; + unsigned int backlog = cl->un.leaf.q->qstats.backlog; + qdisc_reset(cl->un.leaf.q); - qdisc_tree_decrease_qlen(cl->un.leaf.q, qlen); + qdisc_tree_reduce_backlog(cl->un.leaf.q, qlen, backlog); } /* delete from hash and active; remainder in destroy_class */ @@ -1428,10 +1425,11 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, sch_tree_lock(sch); if (parent && !parent->level) { unsigned int qlen = parent->un.leaf.q->q.qlen; + unsigned int backlog = parent->un.leaf.q->qstats.backlog; /* turn parent into inner node */ qdisc_reset(parent->un.leaf.q); - qdisc_tree_decrease_qlen(parent->un.leaf.q, qlen); + qdisc_tree_reduce_backlog(parent->un.leaf.q, qlen, backlog); qdisc_destroy(parent->un.leaf.q); if (parent->prio_activity) htb_deactivate(q, parent); diff --git a/kernel/net/sched/sch_multiq.c b/kernel/net/sched/sch_multiq.c index 4e904ca0a..bcdd54bb1 100644 --- a/kernel/net/sched/sch_multiq.c +++ b/kernel/net/sched/sch_multiq.c @@ -218,7 +218,8 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt) if (q->queues[i] != &noop_qdisc) { struct Qdisc *child = q->queues[i]; q->queues[i] = &noop_qdisc; - qdisc_tree_decrease_qlen(child, child->q.qlen); + qdisc_tree_reduce_backlog(child, child->q.qlen, + child->qstats.backlog); qdisc_destroy(child); } } @@ -238,8 +239,9 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt) q->queues[i] = child; if (old != &noop_qdisc) { - qdisc_tree_decrease_qlen(old, - old->q.qlen); + qdisc_tree_reduce_backlog(old, + old->q.qlen, + old->qstats.backlog); qdisc_destroy(old); } sch_tree_unlock(sch); @@ -303,13 +305,7 @@ static int multiq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, if (new == NULL) new = &noop_qdisc; - sch_tree_lock(sch); - *old = q->queues[band]; - q->queues[band] = new; - qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); - qdisc_reset(*old); - sch_tree_unlock(sch); - + *old = qdisc_replace(sch, new, &q->queues[band]); return 0; } diff --git a/kernel/net/sched/sch_netem.c b/kernel/net/sched/sch_netem.c index 5abd1d9de..b7c29d5b6 100644 --- a/kernel/net/sched/sch_netem.c +++ b/kernel/net/sched/sch_netem.c @@ -395,6 +395,25 @@ static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) sch->q.qlen++; } +/* netem can't properly corrupt a megapacket (like we get from GSO), so instead + * when we statistically choose to corrupt one, we instead segment it, returning + * the first packet to be corrupted, and re-enqueue the remaining frames + */ +static struct sk_buff *netem_segment(struct sk_buff *skb, struct Qdisc *sch) +{ + struct sk_buff *segs; + netdev_features_t features = netif_skb_features(skb); + + segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); + + if (IS_ERR_OR_NULL(segs)) { + qdisc_reshape_fail(skb, sch); + return NULL; + } + consume_skb(skb); + return segs; +} + /* * Insert one skb into qdisc. * Note: parent depends on return value to account for queue length. @@ -407,7 +426,11 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) /* We don't fill cb now as skb_unshare() may invalidate it */ struct netem_skb_cb *cb; struct sk_buff *skb2; + struct sk_buff *segs = NULL; + unsigned int len = 0, last_len, prev_len = qdisc_pkt_len(skb); + int nb = 0; int count = 1; + int rc = NET_XMIT_SUCCESS; /* Random duplication */ if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor)) @@ -453,10 +476,23 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) * do it now in software before we mangle it. */ if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) { + if (skb_is_gso(skb)) { + segs = netem_segment(skb, sch); + if (!segs) + return NET_XMIT_DROP; + } else { + segs = skb; + } + + skb = segs; + segs = segs->next; + if (!(skb = skb_unshare(skb, GFP_ATOMIC)) || (skb->ip_summed == CHECKSUM_PARTIAL && - skb_checksum_help(skb))) - return qdisc_drop(skb, sch); + skb_checksum_help(skb))) { + rc = qdisc_drop(skb, sch); + goto finish_segs; + } skb->data[prandom_u32() % skb_headlen(skb)] ^= 1<<(prandom_u32() % 8); @@ -516,6 +552,27 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) sch->qstats.requeues++; } +finish_segs: + if (segs) { + while (segs) { + skb2 = segs->next; + segs->next = NULL; + qdisc_skb_cb(segs)->pkt_len = segs->len; + last_len = segs->len; + rc = qdisc_enqueue(segs, sch); + if (rc != NET_XMIT_SUCCESS) { + if (net_xmit_drop_count(rc)) + qdisc_qstats_drop(sch); + } else { + nb++; + len += last_len; + } + segs = skb2; + } + sch->q.qlen += nb; + if (nb > 1) + qdisc_tree_reduce_backlog(sch, 1 - nb, prev_len - len); + } return NET_XMIT_SUCCESS; } @@ -593,13 +650,14 @@ deliver: #endif if (q->qdisc) { + unsigned int pkt_len = qdisc_pkt_len(skb); int err = qdisc_enqueue(skb, q->qdisc); - if (unlikely(err != NET_XMIT_SUCCESS)) { - if (net_xmit_drop_count(err)) { - qdisc_qstats_drop(sch); - qdisc_tree_decrease_qlen(sch, 1); - } + if (err != NET_XMIT_SUCCESS && + net_xmit_drop_count(err)) { + qdisc_qstats_drop(sch); + qdisc_tree_reduce_backlog(sch, 1, + pkt_len); } goto tfifo_dequeue; } @@ -1037,15 +1095,7 @@ static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, { struct netem_sched_data *q = qdisc_priv(sch); - sch_tree_lock(sch); - *old = q->qdisc; - q->qdisc = new; - if (*old) { - qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); - qdisc_reset(*old); - } - sch_tree_unlock(sch); - + *old = qdisc_replace(sch, new, &q->qdisc); return 0; } diff --git a/kernel/net/sched/sch_pie.c b/kernel/net/sched/sch_pie.c index b783a446d..71ae3b962 100644 --- a/kernel/net/sched/sch_pie.c +++ b/kernel/net/sched/sch_pie.c @@ -183,7 +183,7 @@ static int pie_change(struct Qdisc *sch, struct nlattr *opt) { struct pie_sched_data *q = qdisc_priv(sch); struct nlattr *tb[TCA_PIE_MAX + 1]; - unsigned int qlen; + unsigned int qlen, dropped = 0; int err; if (!opt) @@ -232,10 +232,11 @@ static int pie_change(struct Qdisc *sch, struct nlattr *opt) while (sch->q.qlen > sch->limit) { struct sk_buff *skb = __skb_dequeue(&sch->q); + dropped += qdisc_pkt_len(skb); qdisc_qstats_backlog_dec(sch, skb); qdisc_drop(skb, sch); } - qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen); + qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped); sch_tree_unlock(sch); return 0; diff --git a/kernel/net/sched/sch_prio.c b/kernel/net/sched/sch_prio.c index ba6487f27..fee1b1550 100644 --- a/kernel/net/sched/sch_prio.c +++ b/kernel/net/sched/sch_prio.c @@ -191,7 +191,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt) struct Qdisc *child = q->queues[i]; q->queues[i] = &noop_qdisc; if (child != &noop_qdisc) { - qdisc_tree_decrease_qlen(child, child->q.qlen); + qdisc_tree_reduce_backlog(child, child->q.qlen, child->qstats.backlog); qdisc_destroy(child); } } @@ -210,8 +210,9 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt) q->queues[i] = child; if (old != &noop_qdisc) { - qdisc_tree_decrease_qlen(old, - old->q.qlen); + qdisc_tree_reduce_backlog(old, + old->q.qlen, + old->qstats.backlog); qdisc_destroy(old); } sch_tree_unlock(sch); @@ -268,13 +269,7 @@ static int prio_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, if (new == NULL) new = &noop_qdisc; - sch_tree_lock(sch); - *old = q->queues[band]; - q->queues[band] = new; - qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); - qdisc_reset(*old); - sch_tree_unlock(sch); - + *old = qdisc_replace(sch, new, &q->queues[band]); return 0; } diff --git a/kernel/net/sched/sch_qfq.c b/kernel/net/sched/sch_qfq.c index 3dc3a6e56..8d2d8d953 100644 --- a/kernel/net/sched/sch_qfq.c +++ b/kernel/net/sched/sch_qfq.c @@ -220,9 +220,10 @@ static struct qfq_class *qfq_find_class(struct Qdisc *sch, u32 classid) static void qfq_purge_queue(struct qfq_class *cl) { unsigned int len = cl->qdisc->q.qlen; + unsigned int backlog = cl->qdisc->qstats.backlog; qdisc_reset(cl->qdisc); - qdisc_tree_decrease_qlen(cl->qdisc, len); + qdisc_tree_reduce_backlog(cl->qdisc, len, backlog); } static const struct nla_policy qfq_policy[TCA_QFQ_MAX + 1] = { @@ -617,11 +618,7 @@ static int qfq_graft_class(struct Qdisc *sch, unsigned long arg, new = &noop_qdisc; } - sch_tree_lock(sch); - qfq_purge_queue(cl); - *old = cl->qdisc; - cl->qdisc = new; - sch_tree_unlock(sch); + *old = qdisc_replace(sch, new, &cl->qdisc); return 0; } diff --git a/kernel/net/sched/sch_red.c b/kernel/net/sched/sch_red.c index 6c0534cc7..8c0508c0e 100644 --- a/kernel/net/sched/sch_red.c +++ b/kernel/net/sched/sch_red.c @@ -210,7 +210,8 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt) q->flags = ctl->flags; q->limit = ctl->limit; if (child) { - qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen); + qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen, + q->qdisc->qstats.backlog); qdisc_destroy(q->qdisc); q->qdisc = child; } @@ -313,12 +314,7 @@ static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, if (new == NULL) new = &noop_qdisc; - sch_tree_lock(sch); - *old = q->qdisc; - q->qdisc = new; - qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); - qdisc_reset(*old); - sch_tree_unlock(sch); + *old = qdisc_replace(sch, new, &q->qdisc); return 0; } diff --git a/kernel/net/sched/sch_sfb.c b/kernel/net/sched/sch_sfb.c index 5bbb6332e..c69611640 100644 --- a/kernel/net/sched/sch_sfb.c +++ b/kernel/net/sched/sch_sfb.c @@ -510,7 +510,8 @@ static int sfb_change(struct Qdisc *sch, struct nlattr *opt) sch_tree_lock(sch); - qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen); + qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen, + q->qdisc->qstats.backlog); qdisc_destroy(q->qdisc); q->qdisc = child; @@ -606,12 +607,7 @@ static int sfb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, if (new == NULL) new = &noop_qdisc; - sch_tree_lock(sch); - *old = q->qdisc; - q->qdisc = new; - qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); - qdisc_reset(*old); - sch_tree_unlock(sch); + *old = qdisc_replace(sch, new, &q->qdisc); return 0; } diff --git a/kernel/net/sched/sch_sfq.c b/kernel/net/sched/sch_sfq.c index 3abab534e..498f0a2cb 100644 --- a/kernel/net/sched/sch_sfq.c +++ b/kernel/net/sched/sch_sfq.c @@ -346,7 +346,7 @@ static int sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) { struct sfq_sched_data *q = qdisc_priv(sch); - unsigned int hash; + unsigned int hash, dropped; sfq_index x, qlen; struct sfq_slot *slot; int uninitialized_var(ret); @@ -461,7 +461,7 @@ enqueue: return NET_XMIT_SUCCESS; qlen = slot->qlen; - sfq_drop(sch); + dropped = sfq_drop(sch); /* Return Congestion Notification only if we dropped a packet * from this flow. */ @@ -469,7 +469,7 @@ enqueue: return NET_XMIT_CN; /* As we dropped a packet, better let upper stack know this */ - qdisc_tree_decrease_qlen(sch, 1); + qdisc_tree_reduce_backlog(sch, 1, dropped); return NET_XMIT_SUCCESS; } @@ -537,6 +537,7 @@ static void sfq_rehash(struct Qdisc *sch) struct sfq_slot *slot; struct sk_buff_head list; int dropped = 0; + unsigned int drop_len = 0; __skb_queue_head_init(&list); @@ -565,6 +566,7 @@ static void sfq_rehash(struct Qdisc *sch) if (x >= SFQ_MAX_FLOWS) { drop: qdisc_qstats_backlog_dec(sch, skb); + drop_len += qdisc_pkt_len(skb); kfree_skb(skb); dropped++; continue; @@ -594,7 +596,7 @@ drop: } } sch->q.qlen -= dropped; - qdisc_tree_decrease_qlen(sch, dropped); + qdisc_tree_reduce_backlog(sch, dropped, drop_len); } static void sfq_perturbation(unsigned long arg) @@ -618,7 +620,7 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt) struct sfq_sched_data *q = qdisc_priv(sch); struct tc_sfq_qopt *ctl = nla_data(opt); struct tc_sfq_qopt_v1 *ctl_v1 = NULL; - unsigned int qlen; + unsigned int qlen, dropped = 0; struct red_parms *p = NULL; if (opt->nla_len < nla_attr_size(sizeof(*ctl))) @@ -667,8 +669,8 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt) qlen = sch->q.qlen; while (sch->q.qlen > q->limit) - sfq_drop(sch); - qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen); + dropped += sfq_drop(sch); + qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped); del_timer(&q->perturb_timer); if (q->perturb_period) { diff --git a/kernel/net/sched/sch_tbf.c b/kernel/net/sched/sch_tbf.c index a4afde14e..c2fbde742 100644 --- a/kernel/net/sched/sch_tbf.c +++ b/kernel/net/sched/sch_tbf.c @@ -160,6 +160,7 @@ static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch) struct tbf_sched_data *q = qdisc_priv(sch); struct sk_buff *segs, *nskb; netdev_features_t features = netif_skb_features(skb); + unsigned int len = 0, prev_len = qdisc_pkt_len(skb); int ret, nb; segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); @@ -172,6 +173,7 @@ static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch) nskb = segs->next; segs->next = NULL; qdisc_skb_cb(segs)->pkt_len = segs->len; + len += segs->len; ret = qdisc_enqueue(segs, q->qdisc); if (ret != NET_XMIT_SUCCESS) { if (net_xmit_drop_count(ret)) @@ -183,7 +185,7 @@ static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch) } sch->q.qlen += nb; if (nb > 1) - qdisc_tree_decrease_qlen(sch, 1 - nb); + qdisc_tree_reduce_backlog(sch, 1 - nb, prev_len - len); consume_skb(skb); return nb > 0 ? NET_XMIT_SUCCESS : NET_XMIT_DROP; } @@ -399,7 +401,8 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt) sch_tree_lock(sch); if (child) { - qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen); + qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen, + q->qdisc->qstats.backlog); qdisc_destroy(q->qdisc); q->qdisc = child; } @@ -502,13 +505,7 @@ static int tbf_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, if (new == NULL) new = &noop_qdisc; - sch_tree_lock(sch); - *old = q->qdisc; - q->qdisc = new; - qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); - qdisc_reset(*old); - sch_tree_unlock(sch); - + *old = qdisc_replace(sch, new, &q->qdisc); return 0; } diff --git a/kernel/net/sctp/ipv6.c b/kernel/net/sctp/ipv6.c index ec529121f..ce46f1c7f 100644 --- a/kernel/net/sctp/ipv6.c +++ b/kernel/net/sctp/ipv6.c @@ -526,6 +526,8 @@ static int sctp_v6_cmp_addr(const union sctp_addr *addr1, } return 0; } + if (addr1->v6.sin6_port != addr2->v6.sin6_port) + return 0; if (!ipv6_addr_equal(&addr1->v6.sin6_addr, &addr2->v6.sin6_addr)) return 0; /* If this is a linklocal address, compare the scope_id. */ diff --git a/kernel/net/sctp/sm_statefuns.c b/kernel/net/sctp/sm_statefuns.c index 22c2bf367..29c7c43de 100644 --- a/kernel/net/sctp/sm_statefuns.c +++ b/kernel/net/sctp/sm_statefuns.c @@ -3426,6 +3426,12 @@ sctp_disposition_t sctp_sf_ootb(struct net *net, return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); + /* Report violation if chunk len overflows */ + ch_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length)); + if (ch_end > skb_tail_pointer(skb)) + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, + commands); + /* Now that we know we at least have a chunk header, * do things that are type appropriate. */ @@ -3457,12 +3463,6 @@ sctp_disposition_t sctp_sf_ootb(struct net *net, } } - /* Report violation if chunk len overflows */ - ch_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length)); - if (ch_end > skb_tail_pointer(skb)) - return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, - commands); - ch = (sctp_chunkhdr_t *) ch_end; } while (ch_end < skb_tail_pointer(skb)); diff --git a/kernel/net/sctp/socket.c b/kernel/net/sctp/socket.c index be1489fc3..138f2d667 100644 --- a/kernel/net/sctp/socket.c +++ b/kernel/net/sctp/socket.c @@ -1212,9 +1212,12 @@ static int __sctp_connect(struct sock *sk, timeo = sock_sndtimeo(sk, f_flags & O_NONBLOCK); - err = sctp_wait_for_connect(asoc, &timeo); - if ((err == 0 || err == -EINPROGRESS) && assoc_id) + if (assoc_id) *assoc_id = asoc->assoc_id; + err = sctp_wait_for_connect(asoc, &timeo); + /* Note: the asoc may be freed after the return of + * sctp_wait_for_connect. + */ /* Don't free association on exit. */ asoc = NULL; @@ -4371,7 +4374,7 @@ static int sctp_getsockopt_disable_fragments(struct sock *sk, int len, static int sctp_getsockopt_events(struct sock *sk, int len, char __user *optval, int __user *optlen) { - if (len <= 0) + if (len == 0) return -EINVAL; if (len > sizeof(struct sctp_event_subscribe)) len = sizeof(struct sctp_event_subscribe); @@ -5972,6 +5975,9 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname, if (get_user(len, optlen)) return -EFAULT; + if (len < 0) + return -EINVAL; + lock_sock(sk); switch (optname) { @@ -6954,7 +6960,8 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, */ release_sock(sk); current_timeo = schedule_timeout(current_timeo); - BUG_ON(sk != asoc->base.sk); + if (sk != asoc->base.sk) + goto do_error; lock_sock(sk); *timeo_p = current_timeo; diff --git a/kernel/net/socket.c b/kernel/net/socket.c index d730ef9df..0090225ee 100644 --- a/kernel/net/socket.c +++ b/kernel/net/socket.c @@ -2041,6 +2041,8 @@ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, if (err) break; ++datagrams; + if (msg_data_left(&msg_sys)) + break; } fput_light(sock->file, fput_needed); @@ -2238,31 +2240,31 @@ int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, break; } -out_put: - fput_light(sock->file, fput_needed); - if (err == 0) - return datagrams; + goto out_put; - if (datagrams != 0) { + if (datagrams == 0) { + datagrams = err; + goto out_put; + } + + /* + * We may return less entries than requested (vlen) if the + * sock is non block and there aren't enough datagrams... + */ + if (err != -EAGAIN) { /* - * We may return less entries than requested (vlen) if the - * sock is non block and there aren't enough datagrams... + * ... or if recvmsg returns an error after we + * received some datagrams, where we record the + * error to return on the next call or if the + * app asks about it using getsockopt(SO_ERROR). */ - if (err != -EAGAIN) { - /* - * ... or if recvmsg returns an error after we - * received some datagrams, where we record the - * error to return on the next call or if the - * app asks about it using getsockopt(SO_ERROR). - */ - sock->sk->sk_err = -err; - } - - return datagrams; + sock->sk->sk_err = -err; } +out_put: + fput_light(sock->file, fput_needed); - return err; + return datagrams; } SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg, diff --git a/kernel/net/sunrpc/auth_gss/auth_gss.c b/kernel/net/sunrpc/auth_gss/auth_gss.c index 799e65b94..06095cc88 100644 --- a/kernel/net/sunrpc/auth_gss/auth_gss.c +++ b/kernel/net/sunrpc/auth_gss/auth_gss.c @@ -340,12 +340,14 @@ gss_release_msg(struct gss_upcall_msg *gss_msg) } static struct gss_upcall_msg * -__gss_find_upcall(struct rpc_pipe *pipe, kuid_t uid) +__gss_find_upcall(struct rpc_pipe *pipe, kuid_t uid, const struct gss_auth *auth) { struct gss_upcall_msg *pos; list_for_each_entry(pos, &pipe->in_downcall, list) { if (!uid_eq(pos->uid, uid)) continue; + if (auth && pos->auth->service != auth->service) + continue; atomic_inc(&pos->count); dprintk("RPC: %s found msg %p\n", __func__, pos); return pos; @@ -365,7 +367,7 @@ gss_add_msg(struct gss_upcall_msg *gss_msg) struct gss_upcall_msg *old; spin_lock(&pipe->lock); - old = __gss_find_upcall(pipe, gss_msg->uid); + old = __gss_find_upcall(pipe, gss_msg->uid, gss_msg->auth); if (old == NULL) { atomic_inc(&gss_msg->count); list_add(&gss_msg->list, &pipe->in_downcall); @@ -714,7 +716,7 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) err = -ENOENT; /* Find a matching upcall */ spin_lock(&pipe->lock); - gss_msg = __gss_find_upcall(pipe, uid); + gss_msg = __gss_find_upcall(pipe, uid, NULL); if (gss_msg == NULL) { spin_unlock(&pipe->lock); goto err_put_ctx; diff --git a/kernel/net/sunrpc/auth_gss/gss_rpc_xdr.c b/kernel/net/sunrpc/auth_gss/gss_rpc_xdr.c index eeeba5ade..2410d557a 100644 --- a/kernel/net/sunrpc/auth_gss/gss_rpc_xdr.c +++ b/kernel/net/sunrpc/auth_gss/gss_rpc_xdr.c @@ -260,7 +260,7 @@ static int gssx_dec_option_array(struct xdr_stream *xdr, if (!oa->data) return -ENOMEM; - creds = kmalloc(sizeof(struct svc_cred), GFP_KERNEL); + creds = kzalloc(sizeof(struct svc_cred), GFP_KERNEL); if (!creds) { kfree(oa->data); return -ENOMEM; diff --git a/kernel/net/sunrpc/auth_gss/svcauth_gss.c b/kernel/net/sunrpc/auth_gss/svcauth_gss.c index 1095be9c8..033fec307 100644 --- a/kernel/net/sunrpc/auth_gss/svcauth_gss.c +++ b/kernel/net/sunrpc/auth_gss/svcauth_gss.c @@ -857,8 +857,8 @@ unwrap_integ_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct g goto out; if (svc_getnl(&buf->head[0]) != seq) goto out; - /* trim off the mic at the end before returning */ - xdr_buf_trim(buf, mic.len + 4); + /* trim off the mic and padding at the end before returning */ + xdr_buf_trim(buf, round_up_to_quad(mic.len) + 4); stat = 0; out: kfree(mic.data); @@ -1481,7 +1481,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) case RPC_GSS_PROC_DESTROY: if (gss_write_verf(rqstp, rsci->mechctx, gc->gc_seq)) goto auth_err; - rsci->h.expiry_time = get_seconds(); + rsci->h.expiry_time = seconds_since_boot(); set_bit(CACHE_NEGATIVE, &rsci->h.flags); if (resv->iov_len + 4 > PAGE_SIZE) goto drop; diff --git a/kernel/net/sunrpc/cache.c b/kernel/net/sunrpc/cache.c index 21e203531..63fb5ee21 100644 --- a/kernel/net/sunrpc/cache.c +++ b/kernel/net/sunrpc/cache.c @@ -1182,14 +1182,14 @@ int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h) } crq->q.reader = 0; - crq->item = cache_get(h); crq->buf = buf; crq->len = 0; crq->readers = 0; spin_lock(&queue_lock); - if (test_bit(CACHE_PENDING, &h->flags)) + if (test_bit(CACHE_PENDING, &h->flags)) { + crq->item = cache_get(h); list_add_tail(&crq->q.list, &detail->queue); - else + } else /* Lost a race, no longer PENDING, so don't enqueue */ ret = -EAGAIN; spin_unlock(&queue_lock); diff --git a/kernel/net/sunrpc/clnt.c b/kernel/net/sunrpc/clnt.c index 23608eb0d..f28aeb2cf 100644 --- a/kernel/net/sunrpc/clnt.c +++ b/kernel/net/sunrpc/clnt.c @@ -337,6 +337,11 @@ out: static DEFINE_IDA(rpc_clids); +void rpc_cleanup_clids(void) +{ + ida_destroy(&rpc_clids); +} + static int rpc_alloc_clid(struct rpc_clnt *clnt) { int clid; @@ -442,7 +447,7 @@ out_no_rpciod: return ERR_PTR(err); } -struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args, +static struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args, struct rpc_xprt *xprt) { struct rpc_clnt *clnt = NULL; @@ -474,7 +479,6 @@ struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args, return clnt; } -EXPORT_SYMBOL_GPL(rpc_create_xprt); /** * rpc_create - create an RPC client and transport with one call @@ -500,6 +504,15 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) }; char servername[48]; + if (args->bc_xprt) { + WARN_ON(args->protocol != XPRT_TRANSPORT_BC_TCP); + xprt = args->bc_xprt->xpt_bc_xprt; + if (xprt) { + xprt_get(xprt); + return rpc_create_xprt(args, xprt); + } + } + if (args->flags & RPC_CLNT_CREATE_INFINITE_SLOTS) xprtargs.flags |= XPRT_CREATE_INFINITE_SLOTS; if (args->flags & RPC_CLNT_CREATE_NO_IDLE_TIMEOUT) diff --git a/kernel/net/sunrpc/sunrpc_syms.c b/kernel/net/sunrpc/sunrpc_syms.c index ee5d3d253..3142f38d1 100644 --- a/kernel/net/sunrpc/sunrpc_syms.c +++ b/kernel/net/sunrpc/sunrpc_syms.c @@ -119,6 +119,7 @@ out: static void __exit cleanup_sunrpc(void) { + rpc_cleanup_clids(); rpcauth_remove_module(); cleanup_socket_xprt(); svc_cleanup_xprt_sock(); diff --git a/kernel/net/sunrpc/svc.c b/kernel/net/sunrpc/svc.c index cc9852897..c5b0cb4f4 100644 --- a/kernel/net/sunrpc/svc.c +++ b/kernel/net/sunrpc/svc.c @@ -1188,11 +1188,17 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) *statp = procp->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp); /* Encode reply */ - if (test_bit(RQ_DROPME, &rqstp->rq_flags)) { + if (*statp == rpc_drop_reply || + test_bit(RQ_DROPME, &rqstp->rq_flags)) { if (procp->pc_release) procp->pc_release(rqstp, NULL, rqstp->rq_resp); goto dropit; } + if (*statp == rpc_autherr_badcred) { + if (procp->pc_release) + procp->pc_release(rqstp, NULL, rqstp->rq_resp); + goto err_bad_auth; + } if (*statp == rpc_success && (xdr = procp->pc_encode) && !xdr(rqstp, resv->iov_base+resv->iov_len, rqstp->rq_resp)) { diff --git a/kernel/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/kernel/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index ff4f01e52..d4e0d648b 100644 --- a/kernel/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/kernel/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -346,8 +346,6 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt, atomic_inc(&rdma_stat_read); return ret; err: - ib_dma_unmap_sg(xprt->sc_cm_id->device, - frmr->sg, frmr->sg_nents, frmr->direction); svc_rdma_put_context(ctxt, 0); svc_rdma_put_frmr(xprt, frmr); return ret; diff --git a/kernel/net/sunrpc/xprtsock.c b/kernel/net/sunrpc/xprtsock.c index 027c9ef8a..27b6f55fa 100644 --- a/kernel/net/sunrpc/xprtsock.c +++ b/kernel/net/sunrpc/xprtsock.c @@ -474,7 +474,16 @@ static int xs_nospace(struct rpc_task *task) spin_unlock_bh(&xprt->transport_lock); /* Race breaker in case memory is freed before above code is called */ - sk->sk_write_space(sk); + if (ret == -EAGAIN) { + struct socket_wq *wq; + + rcu_read_lock(); + wq = rcu_dereference(sk->sk_wq); + set_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags); + rcu_read_unlock(); + + sk->sk_write_space(sk); + } return ret; } @@ -2286,6 +2295,10 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) /* SYN_SENT! */ if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO) xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; + break; + case -EADDRNOTAVAIL: + /* Source port number is unavailable. Try a new one! */ + transport->srcport = 0; } out: return ret; diff --git a/kernel/net/switchdev/switchdev.c b/kernel/net/switchdev/switchdev.c index d5d7132ac..1b5886617 100644 --- a/kernel/net/switchdev/switchdev.c +++ b/kernel/net/switchdev/switchdev.c @@ -1169,6 +1169,7 @@ int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, .obj.id = SWITCHDEV_OBJ_ID_IPV4_FIB, .dst = dst, .dst_len = dst_len, + .fi = fi, .tos = tos, .type = type, .nlflags = nlflags, @@ -1177,8 +1178,6 @@ int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, struct net_device *dev; int err = 0; - memcpy(&ipv4_fib.fi, fi, sizeof(ipv4_fib.fi)); - /* Don't offload route if using custom ip rules or if * IPv4 FIB offloading has been disabled completely. */ @@ -1222,6 +1221,7 @@ int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, .obj.id = SWITCHDEV_OBJ_ID_IPV4_FIB, .dst = dst, .dst_len = dst_len, + .fi = fi, .tos = tos, .type = type, .nlflags = 0, @@ -1230,8 +1230,6 @@ int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, struct net_device *dev; int err = 0; - memcpy(&ipv4_fib.fi, fi, sizeof(ipv4_fib.fi)); - if (!(fi->fib_flags & RTNH_F_OFFLOAD)) return 0; diff --git a/kernel/net/sysctl_net.c b/kernel/net/sysctl_net.c index ed98c1fc3..46a71c701 100644 --- a/kernel/net/sysctl_net.c +++ b/kernel/net/sysctl_net.c @@ -46,7 +46,7 @@ static int net_ctl_permissions(struct ctl_table_header *head, kgid_t root_gid = make_kgid(net->user_ns, 0); /* Allow network administrator to have same access as root. */ - if (ns_capable(net->user_ns, CAP_NET_ADMIN) || + if (ns_capable_noaudit(net->user_ns, CAP_NET_ADMIN) || uid_eq(root_uid, current_euid())) { int mode = (table->mode >> 6) & 7; return (mode << 6) | (mode << 3) | mode; diff --git a/kernel/net/tipc/link.c b/kernel/net/tipc/link.c index 91aea071a..72268eac4 100644 --- a/kernel/net/tipc/link.c +++ b/kernel/net/tipc/link.c @@ -1262,6 +1262,8 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, /* fall thru' */ case ACTIVATE_MSG: + skb_linearize(skb); + hdr = buf_msg(skb); /* Complete own link name with peer's interface name */ if_name = strrchr(l->name, ':') + 1; diff --git a/kernel/net/tipc/name_distr.c b/kernel/net/tipc/name_distr.c index c07612bab..f51c8bdbe 100644 --- a/kernel/net/tipc/name_distr.c +++ b/kernel/net/tipc/name_distr.c @@ -397,6 +397,7 @@ void tipc_named_rcv(struct net *net, struct sk_buff_head *inputq) spin_lock_bh(&tn->nametbl_lock); for (skb = skb_dequeue(inputq); skb; skb = skb_dequeue(inputq)) { + skb_linearize(skb); msg = buf_msg(skb); mtype = msg_type(msg); item = (struct distr_item *)msg_data(msg); diff --git a/kernel/net/tipc/netlink_compat.c b/kernel/net/tipc/netlink_compat.c index 1eadc95e1..a0c90572d 100644 --- a/kernel/net/tipc/netlink_compat.c +++ b/kernel/net/tipc/netlink_compat.c @@ -574,7 +574,8 @@ static int tipc_nl_compat_link_dump(struct tipc_nl_compat_msg *msg, link_info.dest = nla_get_flag(link[TIPC_NLA_LINK_DEST]); link_info.up = htonl(nla_get_flag(link[TIPC_NLA_LINK_UP])); - strcpy(link_info.str, nla_data(link[TIPC_NLA_LINK_NAME])); + nla_strlcpy(link_info.str, link[TIPC_NLA_LINK_NAME], + TIPC_MAX_LINK_NAME); return tipc_add_tlv(msg->rep, TIPC_TLV_LINK_INFO, &link_info, sizeof(link_info)); @@ -802,7 +803,7 @@ static int tipc_nl_compat_name_table_dump(struct tipc_nl_compat_msg *msg, goto out; tipc_tlv_sprintf(msg->rep, "%-10u %s", - nla_get_u32(publ[TIPC_NLA_PUBL_REF]), + nla_get_u32(publ[TIPC_NLA_PUBL_KEY]), scope_str[nla_get_u32(publ[TIPC_NLA_PUBL_SCOPE])]); out: tipc_tlv_sprintf(msg->rep, "\n"); diff --git a/kernel/net/tipc/socket.c b/kernel/net/tipc/socket.c index b53246fb0..b26b7a127 100644 --- a/kernel/net/tipc/socket.c +++ b/kernel/net/tipc/socket.c @@ -673,7 +673,7 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq, struct tipc_sock *tsk = tipc_sk(sk); struct net *net = sock_net(sk); struct tipc_msg *mhdr = &tsk->phdr; - struct sk_buff_head *pktchain = &sk->sk_write_queue; + struct sk_buff_head pktchain; struct iov_iter save = msg->msg_iter; uint mtu; int rc; @@ -687,14 +687,16 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq, msg_set_nameupper(mhdr, seq->upper); msg_set_hdr_sz(mhdr, MCAST_H_SIZE); + skb_queue_head_init(&pktchain); + new_mtu: mtu = tipc_bcast_get_mtu(net); - rc = tipc_msg_build(mhdr, msg, 0, dsz, mtu, pktchain); + rc = tipc_msg_build(mhdr, msg, 0, dsz, mtu, &pktchain); if (unlikely(rc < 0)) return rc; do { - rc = tipc_bcast_xmit(net, pktchain); + rc = tipc_bcast_xmit(net, &pktchain); if (likely(!rc)) return dsz; @@ -704,7 +706,7 @@ new_mtu: if (!rc) continue; } - __skb_queue_purge(pktchain); + __skb_queue_purge(&pktchain); if (rc == -EMSGSIZE) { msg->msg_iter = save; goto new_mtu; @@ -863,7 +865,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz) struct net *net = sock_net(sk); struct tipc_msg *mhdr = &tsk->phdr; u32 dnode, dport; - struct sk_buff_head *pktchain = &sk->sk_write_queue; + struct sk_buff_head pktchain; struct sk_buff *skb; struct tipc_name_seq *seq; struct iov_iter save; @@ -924,17 +926,18 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz) msg_set_hdr_sz(mhdr, BASIC_H_SIZE); } + skb_queue_head_init(&pktchain); save = m->msg_iter; new_mtu: mtu = tipc_node_get_mtu(net, dnode, tsk->portid); - rc = tipc_msg_build(mhdr, m, 0, dsz, mtu, pktchain); + rc = tipc_msg_build(mhdr, m, 0, dsz, mtu, &pktchain); if (rc < 0) return rc; do { - skb = skb_peek(pktchain); + skb = skb_peek(&pktchain); TIPC_SKB_CB(skb)->wakeup_pending = tsk->link_cong; - rc = tipc_node_xmit(net, pktchain, dnode, tsk->portid); + rc = tipc_node_xmit(net, &pktchain, dnode, tsk->portid); if (likely(!rc)) { if (sock->state != SS_READY) sock->state = SS_CONNECTING; @@ -946,7 +949,7 @@ new_mtu: if (!rc) continue; } - __skb_queue_purge(pktchain); + __skb_queue_purge(&pktchain); if (rc == -EMSGSIZE) { m->msg_iter = save; goto new_mtu; @@ -1016,7 +1019,7 @@ static int __tipc_send_stream(struct socket *sock, struct msghdr *m, size_t dsz) struct net *net = sock_net(sk); struct tipc_sock *tsk = tipc_sk(sk); struct tipc_msg *mhdr = &tsk->phdr; - struct sk_buff_head *pktchain = &sk->sk_write_queue; + struct sk_buff_head pktchain; DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); u32 portid = tsk->portid; int rc = -EINVAL; @@ -1044,17 +1047,19 @@ static int __tipc_send_stream(struct socket *sock, struct msghdr *m, size_t dsz) timeo = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT); dnode = tsk_peer_node(tsk); + skb_queue_head_init(&pktchain); next: save = m->msg_iter; mtu = tsk->max_pkt; send = min_t(uint, dsz - sent, TIPC_MAX_USER_MSG_SIZE); - rc = tipc_msg_build(mhdr, m, sent, send, mtu, pktchain); + rc = tipc_msg_build(mhdr, m, sent, send, mtu, &pktchain); if (unlikely(rc < 0)) return rc; + do { if (likely(!tsk_conn_cong(tsk))) { - rc = tipc_node_xmit(net, pktchain, dnode, portid); + rc = tipc_node_xmit(net, &pktchain, dnode, portid); if (likely(!rc)) { tsk->sent_unacked++; sent += send; @@ -1063,7 +1068,7 @@ next: goto next; } if (rc == -EMSGSIZE) { - __skb_queue_purge(pktchain); + __skb_queue_purge(&pktchain); tsk->max_pkt = tipc_node_get_mtu(net, dnode, portid); m->msg_iter = save; @@ -1077,7 +1082,7 @@ next: rc = tipc_wait_for_sndpkt(sock, &timeo); } while (!rc); - __skb_queue_purge(pktchain); + __skb_queue_purge(&pktchain); return sent ? sent : rc; } @@ -2106,7 +2111,8 @@ restart: TIPC_CONN_MSG, SHORT_H_SIZE, 0, dnode, onode, dport, oport, TIPC_CONN_SHUTDOWN); - tipc_node_xmit_skb(net, skb, dnode, tsk->portid); + if (skb) + tipc_node_xmit_skb(net, skb, dnode, tsk->portid); } tsk->connected = 0; sock->state = SS_DISCONNECTING; @@ -2809,6 +2815,9 @@ int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb) if (err) return err; + if (!attrs[TIPC_NLA_SOCK]) + return -EINVAL; + err = nla_parse_nested(sock, TIPC_NLA_SOCK_MAX, attrs[TIPC_NLA_SOCK], tipc_nl_sock_policy); diff --git a/kernel/net/tipc/subscr.c b/kernel/net/tipc/subscr.c index 69ee2eeef..f9ff73a8d 100644 --- a/kernel/net/tipc/subscr.c +++ b/kernel/net/tipc/subscr.c @@ -296,7 +296,8 @@ static void tipc_subscrb_rcv_cb(struct net *net, int conid, if (tipc_subscrp_create(net, (struct tipc_subscr *)buf, subscrb, &sub)) return tipc_conn_terminate(tn->topsrv, subscrb->conid); - tipc_nametbl_subscribe(sub); + if (sub) + tipc_nametbl_subscribe(sub); } /* Handle one request to establish a new subscriber */ diff --git a/kernel/net/tipc/udp_media.c b/kernel/net/tipc/udp_media.c index 70c03271b..6af78c627 100644 --- a/kernel/net/tipc/udp_media.c +++ b/kernel/net/tipc/udp_media.c @@ -48,7 +48,6 @@ #include #include "core.h" #include "bearer.h" -#include "msg.h" /* IANA assigned UDP port */ #define UDP_PORT_DEFAULT 6118 @@ -224,10 +223,6 @@ static int tipc_udp_recv(struct sock *sk, struct sk_buff *skb) { struct udp_bearer *ub; struct tipc_bearer *b; - int usr = msg_user(buf_msg(skb)); - - if ((usr == LINK_PROTOCOL) || (usr == NAME_DISTRIBUTOR)) - skb_linearize(skb); ub = rcu_dereference_sk_user_data(sk); if (!ub) { diff --git a/kernel/net/unix/af_unix.c b/kernel/net/unix/af_unix.c index 898a53a56..b2e934ff2 100644 --- a/kernel/net/unix/af_unix.c +++ b/kernel/net/unix/af_unix.c @@ -315,7 +315,7 @@ static struct sock *unix_find_socket_byinode(struct inode *i) &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) { struct dentry *dentry = unix_sk(s)->path.dentry; - if (dentry && d_backing_inode(dentry) == i) { + if (dentry && d_real_inode(dentry) == i) { sock_hold(s); goto found; } @@ -661,11 +661,11 @@ static int unix_set_peek_off(struct sock *sk, int val) { struct unix_sock *u = unix_sk(sk); - if (mutex_lock_interruptible(&u->readlock)) + if (mutex_lock_interruptible(&u->iolock)) return -EINTR; sk->sk_peek_off = val; - mutex_unlock(&u->readlock); + mutex_unlock(&u->iolock); return 0; } @@ -778,7 +778,8 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern) spin_lock_init(&u->lock); atomic_long_set(&u->inflight, 0); INIT_LIST_HEAD(&u->link); - mutex_init(&u->readlock); /* single task reading lock */ + mutex_init(&u->iolock); /* single task reading lock */ + mutex_init(&u->bindlock); /* single task binding lock */ init_waitqueue_head(&u->peer_wait); init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay); unix_insert_socket(unix_sockets_unbound(sk), sk); @@ -847,7 +848,7 @@ static int unix_autobind(struct socket *sock) int err; unsigned int retries = 0; - err = mutex_lock_interruptible(&u->readlock); + err = mutex_lock_interruptible(&u->bindlock); if (err) return err; @@ -894,7 +895,7 @@ retry: spin_unlock(&unix_table_lock); err = 0; -out: mutex_unlock(&u->readlock); +out: mutex_unlock(&u->bindlock); return err; } @@ -911,7 +912,7 @@ static struct sock *unix_find_other(struct net *net, err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path); if (err) goto fail; - inode = d_backing_inode(path.dentry); + inode = d_real_inode(path.dentry); err = inode_permission(inode, MAY_WRITE); if (err) goto put_fail; @@ -953,20 +954,32 @@ fail: return NULL; } -static int unix_mknod(struct dentry *dentry, struct path *path, umode_t mode, - struct path *res) +static int unix_mknod(const char *sun_path, umode_t mode, struct path *res) { - int err; + struct dentry *dentry; + struct path path; + int err = 0; + /* + * Get the parent directory, calculate the hash for last + * component. + */ + dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0); + err = PTR_ERR(dentry); + if (IS_ERR(dentry)) + return err; - err = security_path_mknod(path, dentry, mode, 0); + /* + * All right, let's create it. + */ + err = security_path_mknod(&path, dentry, mode, 0); if (!err) { - err = vfs_mknod(d_inode(path->dentry), dentry, mode, 0); + err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0); if (!err) { - res->mnt = mntget(path->mnt); + res->mnt = mntget(path.mnt); res->dentry = dget(dentry); } } - + done_path_create(&path, dentry); return err; } @@ -977,12 +990,11 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) struct unix_sock *u = unix_sk(sk); struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; char *sun_path = sunaddr->sun_path; - int err, name_err; + int err; unsigned int hash; struct unix_address *addr; struct hlist_head *list; - struct path path; - struct dentry *dentry; + struct path path = { NULL, NULL }; err = -EINVAL; if (sunaddr->sun_family != AF_UNIX) @@ -998,34 +1010,25 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) goto out; addr_len = err; - name_err = 0; - dentry = NULL; if (sun_path[0]) { - /* Get the parent directory, calculate the hash for last - * component. - */ - dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0); - - if (IS_ERR(dentry)) { - /* delay report until after 'already bound' check */ - name_err = PTR_ERR(dentry); - dentry = NULL; + umode_t mode = S_IFSOCK | + (SOCK_INODE(sock)->i_mode & ~current_umask()); + err = unix_mknod(sun_path, mode, &path); + if (err) { + if (err == -EEXIST) + err = -EADDRINUSE; + goto out; } } - err = mutex_lock_interruptible(&u->readlock); + err = mutex_lock_interruptible(&u->bindlock); if (err) - goto out_path; + goto out_put; err = -EINVAL; if (u->addr) goto out_up; - if (name_err) { - err = name_err == -EEXIST ? -EADDRINUSE : name_err; - goto out_up; - } - err = -ENOMEM; addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL); if (!addr) @@ -1036,21 +1039,11 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) addr->hash = hash ^ sk->sk_type; atomic_set(&addr->refcnt, 1); - if (dentry) { - struct path u_path; - umode_t mode = S_IFSOCK | - (SOCK_INODE(sock)->i_mode & ~current_umask()); - err = unix_mknod(dentry, &path, mode, &u_path); - if (err) { - if (err == -EEXIST) - err = -EADDRINUSE; - unix_release_addr(addr); - goto out_up; - } + if (sun_path[0]) { addr->hash = UNIX_HASH_SIZE; - hash = d_backing_inode(dentry)->i_ino & (UNIX_HASH_SIZE - 1); + hash = d_real_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1); spin_lock(&unix_table_lock); - u->path = u_path; + u->path = path; list = &unix_socket_table[hash]; } else { spin_lock(&unix_table_lock); @@ -1072,11 +1065,10 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) out_unlock: spin_unlock(&unix_table_lock); out_up: - mutex_unlock(&u->readlock); -out_path: - if (dentry) - done_path_create(&path, dentry); - + mutex_unlock(&u->bindlock); +out_put: + if (err) + path_put(&path); out: return err; } @@ -1971,17 +1963,17 @@ static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page, if (false) { alloc_skb: unix_state_unlock(other); - mutex_unlock(&unix_sk(other)->readlock); + mutex_unlock(&unix_sk(other)->iolock); newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT, &err, 0); if (!newskb) goto err; } - /* we must acquire readlock as we modify already present + /* we must acquire iolock as we modify already present * skbs in the sk_receive_queue and mess with skb->len */ - err = mutex_lock_interruptible(&unix_sk(other)->readlock); + err = mutex_lock_interruptible(&unix_sk(other)->iolock); if (err) { err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS; goto err; @@ -2048,7 +2040,7 @@ alloc_skb: } unix_state_unlock(other); - mutex_unlock(&unix_sk(other)->readlock); + mutex_unlock(&unix_sk(other)->iolock); other->sk_data_ready(other); scm_destroy(&scm); @@ -2057,7 +2049,7 @@ alloc_skb: err_state_unlock: unix_state_unlock(other); err_unlock: - mutex_unlock(&unix_sk(other)->readlock); + mutex_unlock(&unix_sk(other)->iolock); err: kfree_skb(newskb); if (send_sigpipe && !(flags & MSG_NOSIGNAL)) @@ -2122,7 +2114,7 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, if (flags&MSG_OOB) goto out; - err = mutex_lock_interruptible(&u->readlock); + err = mutex_lock_interruptible(&u->iolock); if (unlikely(err)) { /* recvmsg() in non blocking mode is supposed to return -EAGAIN * sk_rcvtimeo is not honored by mutex_lock_interruptible() @@ -2198,7 +2190,7 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, out_free: skb_free_datagram(sk, skb); out_unlock: - mutex_unlock(&u->readlock); + mutex_unlock(&u->iolock); out: return err; } @@ -2207,7 +2199,8 @@ out: * Sleep until more data has arrived. But check for races.. */ static long unix_stream_data_wait(struct sock *sk, long timeo, - struct sk_buff *last, unsigned int last_len) + struct sk_buff *last, unsigned int last_len, + bool freezable) { struct sk_buff *tail; DEFINE_WAIT(wait); @@ -2228,7 +2221,10 @@ static long unix_stream_data_wait(struct sock *sk, long timeo, sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); unix_state_unlock(sk); - timeo = freezable_schedule_timeout(timeo); + if (freezable) + timeo = freezable_schedule_timeout(timeo); + else + timeo = schedule_timeout(timeo); unix_state_lock(sk); if (sock_flag(sk, SOCK_DEAD)) @@ -2258,7 +2254,8 @@ struct unix_stream_read_state { unsigned int splice_flags; }; -static int unix_stream_read_generic(struct unix_stream_read_state *state) +static int unix_stream_read_generic(struct unix_stream_read_state *state, + bool freezable) { struct scm_cookie scm; struct socket *sock = state->socket; @@ -2293,7 +2290,7 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state) /* Lock the socket to prevent queue disordering * while sleeps in memcpy_tomsg */ - mutex_lock(&u->readlock); + mutex_lock(&u->iolock); if (flags & MSG_PEEK) skip = sk_peek_offset(sk, flags); @@ -2334,10 +2331,10 @@ again: break; } - mutex_unlock(&u->readlock); + mutex_unlock(&u->iolock); timeo = unix_stream_data_wait(sk, timeo, last, - last_len); + last_len, freezable); if (signal_pending(current)) { err = sock_intr_errno(timeo); @@ -2345,7 +2342,7 @@ again: goto out; } - mutex_lock(&u->readlock); + mutex_lock(&u->iolock); continue; unlock: unix_state_unlock(sk); @@ -2448,7 +2445,7 @@ unlock: } } while (size); - mutex_unlock(&u->readlock); + mutex_unlock(&u->iolock); if (state->msg) scm_recv(sock, state->msg, &scm, flags); else @@ -2479,7 +2476,7 @@ static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, .flags = flags }; - return unix_stream_read_generic(&state); + return unix_stream_read_generic(&state, true); } static ssize_t skb_unix_socket_splice(struct sock *sk, @@ -2489,9 +2486,9 @@ static ssize_t skb_unix_socket_splice(struct sock *sk, int ret; struct unix_sock *u = unix_sk(sk); - mutex_unlock(&u->readlock); + mutex_unlock(&u->iolock); ret = splice_to_pipe(pipe, spd); - mutex_lock(&u->readlock); + mutex_lock(&u->iolock); return ret; } @@ -2525,7 +2522,7 @@ static ssize_t unix_stream_splice_read(struct socket *sock, loff_t *ppos, flags & SPLICE_F_NONBLOCK) state.flags = MSG_DONTWAIT; - return unix_stream_read_generic(&state); + return unix_stream_read_generic(&state, false); } static int unix_shutdown(struct socket *sock, int mode) diff --git a/kernel/net/vmw_vsock/af_vsock.c b/kernel/net/vmw_vsock/af_vsock.c index 7fd1220fb..9b5bd6d14 100644 --- a/kernel/net/vmw_vsock/af_vsock.c +++ b/kernel/net/vmw_vsock/af_vsock.c @@ -1794,27 +1794,8 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, else if (sk->sk_shutdown & RCV_SHUTDOWN) err = 0; - if (copied > 0) { - /* We only do these additional bookkeeping/notification steps - * if we actually copied something out of the queue pair - * instead of just peeking ahead. - */ - - if (!(flags & MSG_PEEK)) { - /* If the other side has shutdown for sending and there - * is nothing more to read, then modify the socket - * state. - */ - if (vsk->peer_shutdown & SEND_SHUTDOWN) { - if (vsock_stream_has_data(vsk) <= 0) { - sk->sk_state = SS_UNCONNECTED; - sock_set_flag(sk, SOCK_DONE); - sk->sk_state_change(sk); - } - } - } + if (copied > 0) err = copied; - } out_wait: finish_wait(sk_sleep(sk), &wait); diff --git a/kernel/net/wireless/core.h b/kernel/net/wireless/core.h index a618b4b86..47ea169aa 100644 --- a/kernel/net/wireless/core.h +++ b/kernel/net/wireless/core.h @@ -72,6 +72,7 @@ struct cfg80211_registered_device { struct list_head bss_list; struct rb_root bss_tree; u32 bss_generation; + u32 bss_entries; struct cfg80211_scan_request *scan_req; /* protected by RTNL */ struct sk_buff *scan_msg; struct cfg80211_sched_scan_request __rcu *sched_scan_req; @@ -397,6 +398,7 @@ void cfg80211_sme_disassoc(struct wireless_dev *wdev); void cfg80211_sme_deauth(struct wireless_dev *wdev); void cfg80211_sme_auth_timeout(struct wireless_dev *wdev); void cfg80211_sme_assoc_timeout(struct wireless_dev *wdev); +void cfg80211_sme_abandon_assoc(struct wireless_dev *wdev); /* internal helpers */ bool cfg80211_supported_cipher_suite(struct wiphy *wiphy, u32 cipher); diff --git a/kernel/net/wireless/mlme.c b/kernel/net/wireless/mlme.c index fb44fa3bf..c0e02f72e 100644 --- a/kernel/net/wireless/mlme.c +++ b/kernel/net/wireless/mlme.c @@ -149,6 +149,18 @@ void cfg80211_assoc_timeout(struct net_device *dev, struct cfg80211_bss *bss) } EXPORT_SYMBOL(cfg80211_assoc_timeout); +void cfg80211_abandon_assoc(struct net_device *dev, struct cfg80211_bss *bss) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct wiphy *wiphy = wdev->wiphy; + + cfg80211_sme_abandon_assoc(wdev); + + cfg80211_unhold_bss(bss_from_pub(bss)); + cfg80211_put_bss(wiphy, bss); +} +EXPORT_SYMBOL(cfg80211_abandon_assoc); + void cfg80211_tx_mlme_mgmt(struct net_device *dev, const u8 *buf, size_t len) { struct wireless_dev *wdev = dev->ieee80211_ptr; diff --git a/kernel/net/wireless/nl80211.c b/kernel/net/wireless/nl80211.c index 75b0d23ee..1f0de6d74 100644 --- a/kernel/net/wireless/nl80211.c +++ b/kernel/net/wireless/nl80211.c @@ -6628,7 +6628,7 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info) params.n_counter_offsets_presp = len / sizeof(u16); if (rdev->wiphy.max_num_csa_counters && - (params.n_counter_offsets_beacon > + (params.n_counter_offsets_presp > rdev->wiphy.max_num_csa_counters)) return -EINVAL; @@ -13161,20 +13161,24 @@ static int nl80211_netlink_notify(struct notifier_block * nb, struct wireless_dev *wdev; struct cfg80211_beacon_registration *reg, *tmp; - if (state != NETLINK_URELEASE) + if (state != NETLINK_URELEASE || notify->protocol != NETLINK_GENERIC) return NOTIFY_DONE; rcu_read_lock(); list_for_each_entry_rcu(rdev, &cfg80211_rdev_list, list) { bool schedule_destroy_work = false; - bool schedule_scan_stop = false; struct cfg80211_sched_scan_request *sched_scan_req = rcu_dereference(rdev->sched_scan_req); if (sched_scan_req && notify->portid && - sched_scan_req->owner_nlportid == notify->portid) - schedule_scan_stop = true; + sched_scan_req->owner_nlportid == notify->portid) { + sched_scan_req->owner_nlportid = 0; + + if (rdev->ops->sched_scan_stop && + rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN) + schedule_work(&rdev->sched_scan_stop_wk); + } list_for_each_entry_rcu(wdev, &rdev->wdev_list, list) { cfg80211_mlme_unregister_socket(wdev, notify->portid); @@ -13205,12 +13209,6 @@ static int nl80211_netlink_notify(struct notifier_block * nb, spin_unlock(&rdev->destroy_list_lock); schedule_work(&rdev->destroy_work); } - } else if (schedule_scan_stop) { - sched_scan_req->owner_nlportid = 0; - - if (rdev->ops->sched_scan_stop && - rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN) - schedule_work(&rdev->sched_scan_stop_wk); } } diff --git a/kernel/net/wireless/scan.c b/kernel/net/wireless/scan.c index 14d5369eb..8dde12a11 100644 --- a/kernel/net/wireless/scan.c +++ b/kernel/net/wireless/scan.c @@ -56,6 +56,19 @@ * also linked into the probe response struct. */ +/* + * Limit the number of BSS entries stored in mac80211. Each one is + * a bit over 4k at most, so this limits to roughly 4-5M of memory. + * If somebody wants to really attack this though, they'd likely + * use small beacons, and only one type of frame, limiting each of + * the entries to a much smaller size (in order to generate more + * entries in total, so overhead is bigger.) + */ +static int bss_entries_limit = 1000; +module_param(bss_entries_limit, int, 0644); +MODULE_PARM_DESC(bss_entries_limit, + "limit to number of scan BSS entries (per wiphy, default 1000)"); + #define IEEE80211_SCAN_RESULT_EXPIRE (30 * HZ) static void bss_free(struct cfg80211_internal_bss *bss) @@ -136,6 +149,10 @@ static bool __cfg80211_unlink_bss(struct cfg80211_registered_device *rdev, list_del_init(&bss->list); rb_erase(&bss->rbn, &rdev->bss_tree); + rdev->bss_entries--; + WARN_ONCE((rdev->bss_entries == 0) ^ list_empty(&rdev->bss_list), + "rdev bss entries[%d]/list[empty:%d] corruption\n", + rdev->bss_entries, list_empty(&rdev->bss_list)); bss_ref_put(rdev, bss); return true; } @@ -162,6 +179,40 @@ static void __cfg80211_bss_expire(struct cfg80211_registered_device *rdev, rdev->bss_generation++; } +static bool cfg80211_bss_expire_oldest(struct cfg80211_registered_device *rdev) +{ + struct cfg80211_internal_bss *bss, *oldest = NULL; + bool ret; + + lockdep_assert_held(&rdev->bss_lock); + + list_for_each_entry(bss, &rdev->bss_list, list) { + if (atomic_read(&bss->hold)) + continue; + + if (!list_empty(&bss->hidden_list) && + !bss->pub.hidden_beacon_bss) + continue; + + if (oldest && time_before(oldest->ts, bss->ts)) + continue; + oldest = bss; + } + + if (WARN_ON(!oldest)) + return false; + + /* + * The callers make sure to increase rdev->bss_generation if anything + * gets removed (and a new entry added), so there's no need to also do + * it here. + */ + + ret = __cfg80211_unlink_bss(rdev, oldest); + WARN_ON(!ret); + return ret; +} + void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev, bool send_message) { @@ -687,6 +738,7 @@ static bool cfg80211_combine_bsses(struct cfg80211_registered_device *rdev, const u8 *ie; int i, ssidlen; u8 fold = 0; + u32 n_entries = 0; ies = rcu_access_pointer(new->pub.beacon_ies); if (WARN_ON(!ies)) @@ -710,6 +762,12 @@ static bool cfg80211_combine_bsses(struct cfg80211_registered_device *rdev, /* This is the bad part ... */ list_for_each_entry(bss, &rdev->bss_list, list) { + /* + * we're iterating all the entries anyway, so take the + * opportunity to validate the list length accounting + */ + n_entries++; + if (!ether_addr_equal(bss->pub.bssid, new->pub.bssid)) continue; if (bss->pub.channel != new->pub.channel) @@ -738,6 +796,10 @@ static bool cfg80211_combine_bsses(struct cfg80211_registered_device *rdev, new->pub.beacon_ies); } + WARN_ONCE(n_entries != rdev->bss_entries, + "rdev bss entries[%d]/list[len:%d] corruption\n", + rdev->bss_entries, n_entries); + return true; } @@ -890,7 +952,14 @@ cfg80211_bss_update(struct cfg80211_registered_device *rdev, } } + if (rdev->bss_entries >= bss_entries_limit && + !cfg80211_bss_expire_oldest(rdev)) { + kfree(new); + goto drop; + } + list_add_tail(&new->list, &rdev->bss_list); + rdev->bss_entries++; rb_insert_bss(rdev, new); found = new; } diff --git a/kernel/net/wireless/sme.c b/kernel/net/wireless/sme.c index 8020b5b09..18b4a652c 100644 --- a/kernel/net/wireless/sme.c +++ b/kernel/net/wireless/sme.c @@ -39,6 +39,7 @@ struct cfg80211_conn { CFG80211_CONN_ASSOCIATING, CFG80211_CONN_ASSOC_FAILED, CFG80211_CONN_DEAUTH, + CFG80211_CONN_ABANDON, CFG80211_CONN_CONNECTED, } state; u8 bssid[ETH_ALEN], prev_bssid[ETH_ALEN]; @@ -204,6 +205,8 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev) cfg80211_mlme_deauth(rdev, wdev->netdev, params->bssid, NULL, 0, WLAN_REASON_DEAUTH_LEAVING, false); + /* fall through */ + case CFG80211_CONN_ABANDON: /* free directly, disconnected event already sent */ cfg80211_sme_free(wdev); return 0; @@ -423,6 +426,17 @@ void cfg80211_sme_assoc_timeout(struct wireless_dev *wdev) schedule_work(&rdev->conn_work); } +void cfg80211_sme_abandon_assoc(struct wireless_dev *wdev) +{ + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); + + if (!wdev->conn) + return; + + wdev->conn->state = CFG80211_CONN_ABANDON; + schedule_work(&rdev->conn_work); +} + static int cfg80211_sme_get_conn_ies(struct wireless_dev *wdev, const u8 *ies, size_t ies_len, const u8 **out_ies, size_t *out_ies_len) diff --git a/kernel/net/x25/x25_facilities.c b/kernel/net/x25/x25_facilities.c index 7ecd04c21..997ff7b25 100644 --- a/kernel/net/x25/x25_facilities.c +++ b/kernel/net/x25/x25_facilities.c @@ -277,6 +277,7 @@ int x25_negotiate_facilities(struct sk_buff *skb, struct sock *sk, memset(&theirs, 0, sizeof(theirs)); memcpy(new, ours, sizeof(*new)); + memset(dte, 0, sizeof(*dte)); len = x25_parse_facilities(skb, &theirs, dte, &x25->vc_facil_mask); if (len < 0) diff --git a/kernel/net/xfrm/xfrm_input.c b/kernel/net/xfrm/xfrm_input.c index ad7f5b3f9..1c4ad477c 100644 --- a/kernel/net/xfrm/xfrm_input.c +++ b/kernel/net/xfrm/xfrm_input.c @@ -292,12 +292,15 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) XFRM_SKB_CB(skb)->seq.input.hi = seq_hi; skb_dst_force(skb); + dev_hold(skb->dev); nexthdr = x->type->input(x, skb); if (nexthdr == -EINPROGRESS) return 0; resume: + dev_put(skb->dev); + spin_lock(&x->lock); if (nexthdr <= 0) { if (nexthdr == -EBADMSG) { -- cgit 1.2.3-korg