diff options
Diffstat (limited to 'kernel/net/tipc')
37 files changed, 16372 insertions, 0 deletions
diff --git a/kernel/net/tipc/Kconfig b/kernel/net/tipc/Kconfig new file mode 100644 index 000000000..c25a3a149 --- /dev/null +++ b/kernel/net/tipc/Kconfig @@ -0,0 +1,36 @@ +# +# TIPC configuration +# + +menuconfig TIPC + tristate "The TIPC Protocol" + depends on INET + ---help--- + The Transparent Inter Process Communication (TIPC) protocol is + specially designed for intra cluster communication. This protocol + originates from Ericsson where it has been used in carrier grade + cluster applications for many years. + + For more information about TIPC, see http://tipc.sourceforge.net. + + This protocol support is also available as a module ( = code which + can be inserted in and removed from the running kernel whenever you + want). The module will be called tipc. If you want to compile it + as a module, say M here and read <file:Documentation/kbuild/modules.txt>. + + If in doubt, say N. + +config TIPC_MEDIA_IB + bool "InfiniBand media type support" + depends on TIPC && INFINIBAND_IPOIB + help + Saying Y here will enable support for running TIPC on + IP-over-InfiniBand devices. +config TIPC_MEDIA_UDP + bool "IP/UDP media type support" + depends on TIPC + select NET_UDP_TUNNEL + help + Saying Y here will enable support for running TIPC over IP/UDP + bool + default y diff --git a/kernel/net/tipc/Makefile b/kernel/net/tipc/Makefile new file mode 100644 index 000000000..57e460be4 --- /dev/null +++ b/kernel/net/tipc/Makefile @@ -0,0 +1,15 @@ +# +# Makefile for the Linux TIPC layer +# + +obj-$(CONFIG_TIPC) := tipc.o + +tipc-y += addr.o bcast.o bearer.o \ + core.o link.o discover.o msg.o \ + name_distr.o subscr.o name_table.o net.o \ + netlink.o netlink_compat.o node.o socket.o eth_media.o \ + server.o socket.o + +tipc-$(CONFIG_TIPC_MEDIA_UDP) += udp_media.o +tipc-$(CONFIG_TIPC_MEDIA_IB) += ib_media.o +tipc-$(CONFIG_SYSCTL) += sysctl.o diff --git a/kernel/net/tipc/addr.c b/kernel/net/tipc/addr.c new file mode 100644 index 000000000..ba7daa864 --- /dev/null +++ b/kernel/net/tipc/addr.c @@ -0,0 +1,153 @@ +/* + * net/tipc/addr.c: TIPC address utility routines + * + * Copyright (c) 2000-2006, Ericsson AB + * Copyright (c) 2004-2005, 2010-2011, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/kernel.h> +#include "addr.h" +#include "core.h" + +u32 tipc_own_addr(struct net *net) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + + return tn->own_addr; +} + +/** + * in_own_cluster - test for cluster inclusion; <0.0.0> always matches + */ +int in_own_cluster(struct net *net, u32 addr) +{ + return in_own_cluster_exact(net, addr) || !addr; +} + +int in_own_cluster_exact(struct net *net, u32 addr) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + + return !((addr ^ tn->own_addr) >> 12); +} + +/** + * in_own_node - test for node inclusion; <0.0.0> always matches + */ +int in_own_node(struct net *net, u32 addr) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + + return (addr == tn->own_addr) || !addr; +} + +/** + * addr_domain - convert 2-bit scope value to equivalent message lookup domain + * + * Needed when address of a named message must be looked up a second time + * after a network hop. + */ +u32 addr_domain(struct net *net, u32 sc) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + + if (likely(sc == TIPC_NODE_SCOPE)) + return tn->own_addr; + if (sc == TIPC_CLUSTER_SCOPE) + return tipc_cluster_mask(tn->own_addr); + return tipc_zone_mask(tn->own_addr); +} + +/** + * tipc_addr_domain_valid - validates a network domain address + * + * Accepts <Z.C.N>, <Z.C.0>, <Z.0.0>, and <0.0.0>, + * where Z, C, and N are non-zero. + * + * Returns 1 if domain address is valid, otherwise 0 + */ +int tipc_addr_domain_valid(u32 addr) +{ + u32 n = tipc_node(addr); + u32 c = tipc_cluster(addr); + u32 z = tipc_zone(addr); + + if (n && (!z || !c)) + return 0; + if (c && !z) + return 0; + return 1; +} + +/** + * tipc_addr_node_valid - validates a proposed network address for this node + * + * Accepts <Z.C.N>, where Z, C, and N are non-zero. + * + * Returns 1 if address can be used, otherwise 0 + */ +int tipc_addr_node_valid(u32 addr) +{ + return tipc_addr_domain_valid(addr) && tipc_node(addr); +} + +int tipc_in_scope(u32 domain, u32 addr) +{ + if (!domain || (domain == addr)) + return 1; + if (domain == tipc_cluster_mask(addr)) /* domain <Z.C.0> */ + return 1; + if (domain == tipc_zone_mask(addr)) /* domain <Z.0.0> */ + return 1; + return 0; +} + +/** + * tipc_addr_scope - convert message lookup domain to a 2-bit scope value + */ +int tipc_addr_scope(u32 domain) +{ + if (likely(!domain)) + return TIPC_ZONE_SCOPE; + if (tipc_node(domain)) + return TIPC_NODE_SCOPE; + if (tipc_cluster(domain)) + return TIPC_CLUSTER_SCOPE; + return TIPC_ZONE_SCOPE; +} + +char *tipc_addr_string_fill(char *string, u32 addr) +{ + snprintf(string, 16, "<%u.%u.%u>", + tipc_zone(addr), tipc_cluster(addr), tipc_node(addr)); + return string; +} diff --git a/kernel/net/tipc/addr.h b/kernel/net/tipc/addr.h new file mode 100644 index 000000000..7ba6d5c8a --- /dev/null +++ b/kernel/net/tipc/addr.h @@ -0,0 +1,68 @@ +/* + * net/tipc/addr.h: Include file for TIPC address utility routines + * + * Copyright (c) 2000-2006, Ericsson AB + * Copyright (c) 2004-2005, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _TIPC_ADDR_H +#define _TIPC_ADDR_H + +#include <linux/types.h> +#include <linux/tipc.h> +#include <net/net_namespace.h> +#include <net/netns/generic.h> + +#define TIPC_ZONE_MASK 0xff000000u +#define TIPC_CLUSTER_MASK 0xfffff000u + +static inline u32 tipc_zone_mask(u32 addr) +{ + return addr & TIPC_ZONE_MASK; +} + +static inline u32 tipc_cluster_mask(u32 addr) +{ + return addr & TIPC_CLUSTER_MASK; +} + +u32 tipc_own_addr(struct net *net); +int in_own_cluster(struct net *net, u32 addr); +int in_own_cluster_exact(struct net *net, u32 addr); +int in_own_node(struct net *net, u32 addr); +u32 addr_domain(struct net *net, u32 sc); +int tipc_addr_domain_valid(u32); +int tipc_addr_node_valid(u32 addr); +int tipc_in_scope(u32 domain, u32 addr); +int tipc_addr_scope(u32 domain); +char *tipc_addr_string_fill(char *string, u32 addr); +#endif diff --git a/kernel/net/tipc/bcast.c b/kernel/net/tipc/bcast.c new file mode 100644 index 000000000..c5cbdcb1f --- /dev/null +++ b/kernel/net/tipc/bcast.c @@ -0,0 +1,986 @@ +/* + * net/tipc/bcast.c: TIPC broadcast code + * + * Copyright (c) 2004-2006, 2014-2015, Ericsson AB + * Copyright (c) 2004, Intel Corporation. + * Copyright (c) 2005, 2010-2011, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "socket.h" +#include "msg.h" +#include "bcast.h" +#include "name_distr.h" +#include "core.h" + +#define MAX_PKT_DEFAULT_MCAST 1500 /* bcast link max packet size (fixed) */ +#define BCLINK_WIN_DEFAULT 20 /* bcast link window size (default) */ + +const char tipc_bclink_name[] = "broadcast-link"; + +static void tipc_nmap_diff(struct tipc_node_map *nm_a, + struct tipc_node_map *nm_b, + struct tipc_node_map *nm_diff); +static void tipc_nmap_add(struct tipc_node_map *nm_ptr, u32 node); +static void tipc_nmap_remove(struct tipc_node_map *nm_ptr, u32 node); + +static void tipc_bclink_lock(struct net *net) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + + spin_lock_bh(&tn->bclink->lock); +} + +static void tipc_bclink_unlock(struct net *net) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + + spin_unlock_bh(&tn->bclink->lock); +} + +void tipc_bclink_input(struct net *net) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + + tipc_sk_mcast_rcv(net, &tn->bclink->arrvq, &tn->bclink->inputq); +} + +uint tipc_bclink_get_mtu(void) +{ + return MAX_PKT_DEFAULT_MCAST; +} + +static u32 bcbuf_acks(struct sk_buff *buf) +{ + return (u32)(unsigned long)TIPC_SKB_CB(buf)->handle; +} + +static void bcbuf_set_acks(struct sk_buff *buf, u32 acks) +{ + TIPC_SKB_CB(buf)->handle = (void *)(unsigned long)acks; +} + +static void bcbuf_decr_acks(struct sk_buff *buf) +{ + bcbuf_set_acks(buf, bcbuf_acks(buf) - 1); +} + +void tipc_bclink_add_node(struct net *net, u32 addr) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + + tipc_bclink_lock(net); + tipc_nmap_add(&tn->bclink->bcast_nodes, addr); + tipc_bclink_unlock(net); +} + +void tipc_bclink_remove_node(struct net *net, u32 addr) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + + tipc_bclink_lock(net); + tipc_nmap_remove(&tn->bclink->bcast_nodes, addr); + tipc_bclink_unlock(net); +} + +static void bclink_set_last_sent(struct net *net) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_link *bcl = tn->bcl; + struct sk_buff *skb = skb_peek(&bcl->backlogq); + + if (skb) + bcl->fsm_msg_cnt = mod(buf_seqno(skb) - 1); + else + bcl->fsm_msg_cnt = mod(bcl->next_out_no - 1); +} + +u32 tipc_bclink_get_last_sent(struct net *net) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + + return tn->bcl->fsm_msg_cnt; +} + +static void bclink_update_last_sent(struct tipc_node *node, u32 seqno) +{ + node->bclink.last_sent = less_eq(node->bclink.last_sent, seqno) ? + seqno : node->bclink.last_sent; +} + +/** + * tipc_bclink_retransmit_to - get most recent node to request retransmission + * + * Called with bclink_lock locked + */ +struct tipc_node *tipc_bclink_retransmit_to(struct net *net) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + + return tn->bclink->retransmit_to; +} + +/** + * bclink_retransmit_pkt - retransmit broadcast packets + * @after: sequence number of last packet to *not* retransmit + * @to: sequence number of last packet to retransmit + * + * Called with bclink_lock locked + */ +static void bclink_retransmit_pkt(struct tipc_net *tn, u32 after, u32 to) +{ + struct sk_buff *skb; + struct tipc_link *bcl = tn->bcl; + + skb_queue_walk(&bcl->transmq, skb) { + if (more(buf_seqno(skb), after)) { + tipc_link_retransmit(bcl, skb, mod(to - after)); + break; + } + } +} + +/** + * tipc_bclink_wakeup_users - wake up pending users + * + * Called with no locks taken + */ +void tipc_bclink_wakeup_users(struct net *net) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + + tipc_sk_rcv(net, &tn->bclink->link.wakeupq); +} + +/** + * tipc_bclink_acknowledge - handle acknowledgement of broadcast packets + * @n_ptr: node that sent acknowledgement info + * @acked: broadcast sequence # that has been acknowledged + * + * Node is locked, bclink_lock unlocked. + */ +void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked) +{ + struct sk_buff *skb, *tmp; + unsigned int released = 0; + struct net *net = n_ptr->net; + struct tipc_net *tn = net_generic(net, tipc_net_id); + + if (unlikely(!n_ptr->bclink.recv_permitted)) + return; + + tipc_bclink_lock(net); + + /* Bail out if tx queue is empty (no clean up is required) */ + skb = skb_peek(&tn->bcl->transmq); + if (!skb) + goto exit; + + /* Determine which messages need to be acknowledged */ + if (acked == INVALID_LINK_SEQ) { + /* + * Contact with specified node has been lost, so need to + * acknowledge sent messages only (if other nodes still exist) + * or both sent and unsent messages (otherwise) + */ + if (tn->bclink->bcast_nodes.count) + acked = tn->bcl->fsm_msg_cnt; + else + acked = tn->bcl->next_out_no; + } else { + /* + * Bail out if specified sequence number does not correspond + * to a message that has been sent and not yet acknowledged + */ + if (less(acked, buf_seqno(skb)) || + less(tn->bcl->fsm_msg_cnt, acked) || + less_eq(acked, n_ptr->bclink.acked)) + goto exit; + } + + /* Skip over packets that node has previously acknowledged */ + skb_queue_walk(&tn->bcl->transmq, skb) { + if (more(buf_seqno(skb), n_ptr->bclink.acked)) + break; + } + + /* Update packets that node is now acknowledging */ + skb_queue_walk_from_safe(&tn->bcl->transmq, skb, tmp) { + if (more(buf_seqno(skb), acked)) + break; + bcbuf_decr_acks(skb); + bclink_set_last_sent(net); + if (bcbuf_acks(skb) == 0) { + __skb_unlink(skb, &tn->bcl->transmq); + kfree_skb(skb); + released = 1; + } + } + n_ptr->bclink.acked = acked; + + /* Try resolving broadcast link congestion, if necessary */ + if (unlikely(skb_peek(&tn->bcl->backlogq))) { + tipc_link_push_packets(tn->bcl); + bclink_set_last_sent(net); + } + if (unlikely(released && !skb_queue_empty(&tn->bcl->wakeupq))) + n_ptr->action_flags |= TIPC_WAKEUP_BCAST_USERS; +exit: + tipc_bclink_unlock(net); +} + +/** + * tipc_bclink_update_link_state - update broadcast link state + * + * RCU and node lock set + */ +void tipc_bclink_update_link_state(struct tipc_node *n_ptr, + u32 last_sent) +{ + struct sk_buff *buf; + struct net *net = n_ptr->net; + struct tipc_net *tn = net_generic(net, tipc_net_id); + + /* Ignore "stale" link state info */ + if (less_eq(last_sent, n_ptr->bclink.last_in)) + return; + + /* Update link synchronization state; quit if in sync */ + bclink_update_last_sent(n_ptr, last_sent); + + if (n_ptr->bclink.last_sent == n_ptr->bclink.last_in) + return; + + /* Update out-of-sync state; quit if loss is still unconfirmed */ + if ((++n_ptr->bclink.oos_state) == 1) { + if (n_ptr->bclink.deferred_size < (TIPC_MIN_LINK_WIN / 2)) + return; + n_ptr->bclink.oos_state++; + } + + /* Don't NACK if one has been recently sent (or seen) */ + if (n_ptr->bclink.oos_state & 0x1) + return; + + /* Send NACK */ + buf = tipc_buf_acquire(INT_H_SIZE); + if (buf) { + struct tipc_msg *msg = buf_msg(buf); + struct sk_buff *skb = skb_peek(&n_ptr->bclink.deferdq); + u32 to = skb ? buf_seqno(skb) - 1 : n_ptr->bclink.last_sent; + + tipc_msg_init(tn->own_addr, msg, BCAST_PROTOCOL, STATE_MSG, + INT_H_SIZE, n_ptr->addr); + msg_set_non_seq(msg, 1); + msg_set_mc_netid(msg, tn->net_id); + msg_set_bcast_ack(msg, n_ptr->bclink.last_in); + msg_set_bcgap_after(msg, n_ptr->bclink.last_in); + msg_set_bcgap_to(msg, to); + + tipc_bclink_lock(net); + tipc_bearer_send(net, MAX_BEARERS, buf, NULL); + tn->bcl->stats.sent_nacks++; + tipc_bclink_unlock(net); + kfree_skb(buf); + + n_ptr->bclink.oos_state++; + } +} + +/** + * bclink_peek_nack - monitor retransmission requests sent by other nodes + * + * Delay any upcoming NACK by this node if another node has already + * requested the first message this node is going to ask for. + */ +static void bclink_peek_nack(struct net *net, struct tipc_msg *msg) +{ + struct tipc_node *n_ptr = tipc_node_find(net, msg_destnode(msg)); + + if (unlikely(!n_ptr)) + return; + + tipc_node_lock(n_ptr); + if (n_ptr->bclink.recv_permitted && + (n_ptr->bclink.last_in != n_ptr->bclink.last_sent) && + (n_ptr->bclink.last_in == msg_bcgap_after(msg))) + n_ptr->bclink.oos_state = 2; + tipc_node_unlock(n_ptr); + tipc_node_put(n_ptr); +} + +/* tipc_bclink_xmit - deliver buffer chain to all nodes in cluster + * and to identified node local sockets + * @net: the applicable net namespace + * @list: chain of buffers containing message + * Consumes the buffer chain, except when returning -ELINKCONG + * Returns 0 if success, otherwise errno: -ELINKCONG,-EHOSTUNREACH,-EMSGSIZE + */ +int tipc_bclink_xmit(struct net *net, struct sk_buff_head *list) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_link *bcl = tn->bcl; + struct tipc_bclink *bclink = tn->bclink; + int rc = 0; + int bc = 0; + struct sk_buff *skb; + struct sk_buff_head arrvq; + struct sk_buff_head inputq; + + /* Prepare clone of message for local node */ + skb = tipc_msg_reassemble(list); + if (unlikely(!skb)) { + __skb_queue_purge(list); + return -EHOSTUNREACH; + } + /* Broadcast to all nodes */ + if (likely(bclink)) { + tipc_bclink_lock(net); + if (likely(bclink->bcast_nodes.count)) { + rc = __tipc_link_xmit(net, bcl, list); + if (likely(!rc)) { + u32 len = skb_queue_len(&bcl->transmq); + + bclink_set_last_sent(net); + bcl->stats.queue_sz_counts++; + bcl->stats.accu_queue_sz += len; + } + bc = 1; + } + tipc_bclink_unlock(net); + } + + if (unlikely(!bc)) + __skb_queue_purge(list); + + if (unlikely(rc)) { + kfree_skb(skb); + return rc; + } + /* Deliver message clone */ + __skb_queue_head_init(&arrvq); + skb_queue_head_init(&inputq); + __skb_queue_tail(&arrvq, skb); + tipc_sk_mcast_rcv(net, &arrvq, &inputq); + return rc; +} + +/** + * bclink_accept_pkt - accept an incoming, in-sequence broadcast packet + * + * Called with both sending node's lock and bclink_lock taken. + */ +static void bclink_accept_pkt(struct tipc_node *node, u32 seqno) +{ + struct tipc_net *tn = net_generic(node->net, tipc_net_id); + + bclink_update_last_sent(node, seqno); + node->bclink.last_in = seqno; + node->bclink.oos_state = 0; + tn->bcl->stats.recv_info++; + + /* + * Unicast an ACK periodically, ensuring that + * all nodes in the cluster don't ACK at the same time + */ + if (((seqno - tn->own_addr) % TIPC_MIN_LINK_WIN) == 0) { + tipc_link_proto_xmit(node->active_links[node->addr & 1], + STATE_MSG, 0, 0, 0, 0); + tn->bcl->stats.sent_acks++; + } +} + +/** + * tipc_bclink_rcv - receive a broadcast packet, and deliver upwards + * + * RCU is locked, no other locks set + */ +void tipc_bclink_rcv(struct net *net, struct sk_buff *buf) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_link *bcl = tn->bcl; + struct tipc_msg *msg = buf_msg(buf); + struct tipc_node *node; + u32 next_in; + u32 seqno; + int deferred = 0; + int pos = 0; + struct sk_buff *iskb; + struct sk_buff_head *arrvq, *inputq; + + /* Screen out unwanted broadcast messages */ + if (msg_mc_netid(msg) != tn->net_id) + goto exit; + + node = tipc_node_find(net, msg_prevnode(msg)); + if (unlikely(!node)) + goto exit; + + tipc_node_lock(node); + if (unlikely(!node->bclink.recv_permitted)) + goto unlock; + + /* Handle broadcast protocol message */ + if (unlikely(msg_user(msg) == BCAST_PROTOCOL)) { + if (msg_type(msg) != STATE_MSG) + goto unlock; + if (msg_destnode(msg) == tn->own_addr) { + tipc_bclink_acknowledge(node, msg_bcast_ack(msg)); + tipc_bclink_lock(net); + bcl->stats.recv_nacks++; + tn->bclink->retransmit_to = node; + bclink_retransmit_pkt(tn, msg_bcgap_after(msg), + msg_bcgap_to(msg)); + tipc_bclink_unlock(net); + tipc_node_unlock(node); + } else { + tipc_node_unlock(node); + bclink_peek_nack(net, msg); + } + tipc_node_put(node); + goto exit; + } + + /* Handle in-sequence broadcast message */ + seqno = msg_seqno(msg); + next_in = mod(node->bclink.last_in + 1); + arrvq = &tn->bclink->arrvq; + inputq = &tn->bclink->inputq; + + if (likely(seqno == next_in)) { +receive: + /* Deliver message to destination */ + if (likely(msg_isdata(msg))) { + tipc_bclink_lock(net); + bclink_accept_pkt(node, seqno); + spin_lock_bh(&inputq->lock); + __skb_queue_tail(arrvq, buf); + spin_unlock_bh(&inputq->lock); + node->action_flags |= TIPC_BCAST_MSG_EVT; + tipc_bclink_unlock(net); + tipc_node_unlock(node); + } else if (msg_user(msg) == MSG_BUNDLER) { + tipc_bclink_lock(net); + bclink_accept_pkt(node, seqno); + bcl->stats.recv_bundles++; + bcl->stats.recv_bundled += msg_msgcnt(msg); + pos = 0; + while (tipc_msg_extract(buf, &iskb, &pos)) { + spin_lock_bh(&inputq->lock); + __skb_queue_tail(arrvq, iskb); + spin_unlock_bh(&inputq->lock); + } + node->action_flags |= TIPC_BCAST_MSG_EVT; + tipc_bclink_unlock(net); + tipc_node_unlock(node); + } else if (msg_user(msg) == MSG_FRAGMENTER) { + tipc_bclink_lock(net); + bclink_accept_pkt(node, seqno); + tipc_buf_append(&node->bclink.reasm_buf, &buf); + if (unlikely(!buf && !node->bclink.reasm_buf)) { + tipc_bclink_unlock(net); + goto unlock; + } + bcl->stats.recv_fragments++; + if (buf) { + bcl->stats.recv_fragmented++; + msg = buf_msg(buf); + tipc_bclink_unlock(net); + goto receive; + } + tipc_bclink_unlock(net); + tipc_node_unlock(node); + } else { + tipc_bclink_lock(net); + bclink_accept_pkt(node, seqno); + tipc_bclink_unlock(net); + tipc_node_unlock(node); + kfree_skb(buf); + } + buf = NULL; + + /* Determine new synchronization state */ + tipc_node_lock(node); + if (unlikely(!tipc_node_is_up(node))) + goto unlock; + + if (node->bclink.last_in == node->bclink.last_sent) + goto unlock; + + if (skb_queue_empty(&node->bclink.deferdq)) { + node->bclink.oos_state = 1; + goto unlock; + } + + msg = buf_msg(skb_peek(&node->bclink.deferdq)); + seqno = msg_seqno(msg); + next_in = mod(next_in + 1); + if (seqno != next_in) + goto unlock; + + /* Take in-sequence message from deferred queue & deliver it */ + buf = __skb_dequeue(&node->bclink.deferdq); + goto receive; + } + + /* Handle out-of-sequence broadcast message */ + if (less(next_in, seqno)) { + deferred = tipc_link_defer_pkt(&node->bclink.deferdq, + buf); + bclink_update_last_sent(node, seqno); + buf = NULL; + } + + tipc_bclink_lock(net); + + if (deferred) + bcl->stats.deferred_recv++; + else + bcl->stats.duplicates++; + + tipc_bclink_unlock(net); + +unlock: + tipc_node_unlock(node); + tipc_node_put(node); +exit: + kfree_skb(buf); +} + +u32 tipc_bclink_acks_missing(struct tipc_node *n_ptr) +{ + return (n_ptr->bclink.recv_permitted && + (tipc_bclink_get_last_sent(n_ptr->net) != n_ptr->bclink.acked)); +} + + +/** + * tipc_bcbearer_send - send a packet through the broadcast pseudo-bearer + * + * Send packet over as many bearers as necessary to reach all nodes + * that have joined the broadcast link. + * + * Returns 0 (packet sent successfully) under all circumstances, + * since the broadcast link's pseudo-bearer never blocks + */ +static int tipc_bcbearer_send(struct net *net, struct sk_buff *buf, + struct tipc_bearer *unused1, + struct tipc_media_addr *unused2) +{ + int bp_index; + struct tipc_msg *msg = buf_msg(buf); + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_bcbearer *bcbearer = tn->bcbearer; + struct tipc_bclink *bclink = tn->bclink; + + /* Prepare broadcast link message for reliable transmission, + * if first time trying to send it; + * preparation is skipped for broadcast link protocol messages + * since they are sent in an unreliable manner and don't need it + */ + if (likely(!msg_non_seq(buf_msg(buf)))) { + bcbuf_set_acks(buf, bclink->bcast_nodes.count); + msg_set_non_seq(msg, 1); + msg_set_mc_netid(msg, tn->net_id); + tn->bcl->stats.sent_info++; + if (WARN_ON(!bclink->bcast_nodes.count)) { + dump_stack(); + return 0; + } + } + + /* Send buffer over bearers until all targets reached */ + bcbearer->remains = bclink->bcast_nodes; + + for (bp_index = 0; bp_index < MAX_BEARERS; bp_index++) { + struct tipc_bearer *p = bcbearer->bpairs[bp_index].primary; + struct tipc_bearer *s = bcbearer->bpairs[bp_index].secondary; + struct tipc_bearer *bp[2] = {p, s}; + struct tipc_bearer *b = bp[msg_link_selector(msg)]; + struct sk_buff *tbuf; + + if (!p) + break; /* No more bearers to try */ + if (!b) + b = p; + tipc_nmap_diff(&bcbearer->remains, &b->nodes, + &bcbearer->remains_new); + if (bcbearer->remains_new.count == bcbearer->remains.count) + continue; /* Nothing added by bearer pair */ + + if (bp_index == 0) { + /* Use original buffer for first bearer */ + tipc_bearer_send(net, b->identity, buf, &b->bcast_addr); + } else { + /* Avoid concurrent buffer access */ + tbuf = pskb_copy_for_clone(buf, GFP_ATOMIC); + if (!tbuf) + break; + tipc_bearer_send(net, b->identity, tbuf, + &b->bcast_addr); + kfree_skb(tbuf); /* Bearer keeps a clone */ + } + if (bcbearer->remains_new.count == 0) + break; /* All targets reached */ + + bcbearer->remains = bcbearer->remains_new; + } + + return 0; +} + +/** + * tipc_bcbearer_sort - create sets of bearer pairs used by broadcast bearer + */ +void tipc_bcbearer_sort(struct net *net, struct tipc_node_map *nm_ptr, + u32 node, bool action) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_bcbearer *bcbearer = tn->bcbearer; + struct tipc_bcbearer_pair *bp_temp = bcbearer->bpairs_temp; + struct tipc_bcbearer_pair *bp_curr; + struct tipc_bearer *b; + int b_index; + int pri; + + tipc_bclink_lock(net); + + if (action) + tipc_nmap_add(nm_ptr, node); + else + tipc_nmap_remove(nm_ptr, node); + + /* Group bearers by priority (can assume max of two per priority) */ + memset(bp_temp, 0, sizeof(bcbearer->bpairs_temp)); + + rcu_read_lock(); + for (b_index = 0; b_index < MAX_BEARERS; b_index++) { + b = rcu_dereference_rtnl(tn->bearer_list[b_index]); + if (!b || !b->nodes.count) + continue; + + if (!bp_temp[b->priority].primary) + bp_temp[b->priority].primary = b; + else + bp_temp[b->priority].secondary = b; + } + rcu_read_unlock(); + + /* Create array of bearer pairs for broadcasting */ + bp_curr = bcbearer->bpairs; + memset(bcbearer->bpairs, 0, sizeof(bcbearer->bpairs)); + + for (pri = TIPC_MAX_LINK_PRI; pri >= 0; pri--) { + + if (!bp_temp[pri].primary) + continue; + + bp_curr->primary = bp_temp[pri].primary; + + if (bp_temp[pri].secondary) { + if (tipc_nmap_equal(&bp_temp[pri].primary->nodes, + &bp_temp[pri].secondary->nodes)) { + bp_curr->secondary = bp_temp[pri].secondary; + } else { + bp_curr++; + bp_curr->primary = bp_temp[pri].secondary; + } + } + + bp_curr++; + } + + tipc_bclink_unlock(net); +} + +static int __tipc_nl_add_bc_link_stat(struct sk_buff *skb, + struct tipc_stats *stats) +{ + int i; + struct nlattr *nest; + + struct nla_map { + __u32 key; + __u32 val; + }; + + struct nla_map map[] = { + {TIPC_NLA_STATS_RX_INFO, stats->recv_info}, + {TIPC_NLA_STATS_RX_FRAGMENTS, stats->recv_fragments}, + {TIPC_NLA_STATS_RX_FRAGMENTED, stats->recv_fragmented}, + {TIPC_NLA_STATS_RX_BUNDLES, stats->recv_bundles}, + {TIPC_NLA_STATS_RX_BUNDLED, stats->recv_bundled}, + {TIPC_NLA_STATS_TX_INFO, stats->sent_info}, + {TIPC_NLA_STATS_TX_FRAGMENTS, stats->sent_fragments}, + {TIPC_NLA_STATS_TX_FRAGMENTED, stats->sent_fragmented}, + {TIPC_NLA_STATS_TX_BUNDLES, stats->sent_bundles}, + {TIPC_NLA_STATS_TX_BUNDLED, stats->sent_bundled}, + {TIPC_NLA_STATS_RX_NACKS, stats->recv_nacks}, + {TIPC_NLA_STATS_RX_DEFERRED, stats->deferred_recv}, + {TIPC_NLA_STATS_TX_NACKS, stats->sent_nacks}, + {TIPC_NLA_STATS_TX_ACKS, stats->sent_acks}, + {TIPC_NLA_STATS_RETRANSMITTED, stats->retransmitted}, + {TIPC_NLA_STATS_DUPLICATES, stats->duplicates}, + {TIPC_NLA_STATS_LINK_CONGS, stats->link_congs}, + {TIPC_NLA_STATS_MAX_QUEUE, stats->max_queue_sz}, + {TIPC_NLA_STATS_AVG_QUEUE, stats->queue_sz_counts ? + (stats->accu_queue_sz / stats->queue_sz_counts) : 0} + }; + + nest = nla_nest_start(skb, TIPC_NLA_LINK_STATS); + if (!nest) + return -EMSGSIZE; + + for (i = 0; i < ARRAY_SIZE(map); i++) + if (nla_put_u32(skb, map[i].key, map[i].val)) + goto msg_full; + + nla_nest_end(skb, nest); + + return 0; +msg_full: + nla_nest_cancel(skb, nest); + + return -EMSGSIZE; +} + +int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg) +{ + int err; + void *hdr; + struct nlattr *attrs; + struct nlattr *prop; + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_link *bcl = tn->bcl; + + if (!bcl) + return 0; + + tipc_bclink_lock(net); + + hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, + NLM_F_MULTI, TIPC_NL_LINK_GET); + if (!hdr) + return -EMSGSIZE; + + attrs = nla_nest_start(msg->skb, TIPC_NLA_LINK); + if (!attrs) + goto msg_full; + + /* The broadcast link is always up */ + if (nla_put_flag(msg->skb, TIPC_NLA_LINK_UP)) + goto attr_msg_full; + + if (nla_put_flag(msg->skb, TIPC_NLA_LINK_BROADCAST)) + goto attr_msg_full; + if (nla_put_string(msg->skb, TIPC_NLA_LINK_NAME, bcl->name)) + goto attr_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_LINK_RX, bcl->next_in_no)) + goto attr_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_LINK_TX, bcl->next_out_no)) + goto attr_msg_full; + + prop = nla_nest_start(msg->skb, TIPC_NLA_LINK_PROP); + if (!prop) + goto attr_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, bcl->window)) + goto prop_msg_full; + nla_nest_end(msg->skb, prop); + + err = __tipc_nl_add_bc_link_stat(msg->skb, &bcl->stats); + if (err) + goto attr_msg_full; + + tipc_bclink_unlock(net); + nla_nest_end(msg->skb, attrs); + genlmsg_end(msg->skb, hdr); + + return 0; + +prop_msg_full: + nla_nest_cancel(msg->skb, prop); +attr_msg_full: + nla_nest_cancel(msg->skb, attrs); +msg_full: + tipc_bclink_unlock(net); + genlmsg_cancel(msg->skb, hdr); + + return -EMSGSIZE; +} + +int tipc_bclink_reset_stats(struct net *net) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_link *bcl = tn->bcl; + + if (!bcl) + return -ENOPROTOOPT; + + tipc_bclink_lock(net); + memset(&bcl->stats, 0, sizeof(bcl->stats)); + tipc_bclink_unlock(net); + return 0; +} + +int tipc_bclink_set_queue_limits(struct net *net, u32 limit) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_link *bcl = tn->bcl; + + if (!bcl) + return -ENOPROTOOPT; + if ((limit < TIPC_MIN_LINK_WIN) || (limit > TIPC_MAX_LINK_WIN)) + return -EINVAL; + + tipc_bclink_lock(net); + tipc_link_set_queue_limits(bcl, limit); + tipc_bclink_unlock(net); + return 0; +} + +int tipc_bclink_init(struct net *net) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_bcbearer *bcbearer; + struct tipc_bclink *bclink; + struct tipc_link *bcl; + + bcbearer = kzalloc(sizeof(*bcbearer), GFP_ATOMIC); + if (!bcbearer) + return -ENOMEM; + + bclink = kzalloc(sizeof(*bclink), GFP_ATOMIC); + if (!bclink) { + kfree(bcbearer); + return -ENOMEM; + } + + bcl = &bclink->link; + bcbearer->bearer.media = &bcbearer->media; + bcbearer->media.send_msg = tipc_bcbearer_send; + sprintf(bcbearer->media.name, "tipc-broadcast"); + + spin_lock_init(&bclink->lock); + __skb_queue_head_init(&bcl->transmq); + __skb_queue_head_init(&bcl->backlogq); + __skb_queue_head_init(&bcl->deferdq); + skb_queue_head_init(&bcl->wakeupq); + bcl->next_out_no = 1; + spin_lock_init(&bclink->node.lock); + __skb_queue_head_init(&bclink->arrvq); + skb_queue_head_init(&bclink->inputq); + bcl->owner = &bclink->node; + bcl->owner->net = net; + bcl->mtu = MAX_PKT_DEFAULT_MCAST; + tipc_link_set_queue_limits(bcl, BCLINK_WIN_DEFAULT); + bcl->bearer_id = MAX_BEARERS; + rcu_assign_pointer(tn->bearer_list[MAX_BEARERS], &bcbearer->bearer); + bcl->state = WORKING_WORKING; + bcl->pmsg = (struct tipc_msg *)&bcl->proto_msg; + msg_set_prevnode(bcl->pmsg, tn->own_addr); + strlcpy(bcl->name, tipc_bclink_name, TIPC_MAX_LINK_NAME); + tn->bcbearer = bcbearer; + tn->bclink = bclink; + tn->bcl = bcl; + return 0; +} + +void tipc_bclink_stop(struct net *net) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + + tipc_bclink_lock(net); + tipc_link_purge_queues(tn->bcl); + tipc_bclink_unlock(net); + + RCU_INIT_POINTER(tn->bearer_list[BCBEARER], NULL); + synchronize_net(); + kfree(tn->bcbearer); + kfree(tn->bclink); +} + +/** + * tipc_nmap_add - add a node to a node map + */ +static void tipc_nmap_add(struct tipc_node_map *nm_ptr, u32 node) +{ + int n = tipc_node(node); + int w = n / WSIZE; + u32 mask = (1 << (n % WSIZE)); + + if ((nm_ptr->map[w] & mask) == 0) { + nm_ptr->count++; + nm_ptr->map[w] |= mask; + } +} + +/** + * tipc_nmap_remove - remove a node from a node map + */ +static void tipc_nmap_remove(struct tipc_node_map *nm_ptr, u32 node) +{ + int n = tipc_node(node); + int w = n / WSIZE; + u32 mask = (1 << (n % WSIZE)); + + if ((nm_ptr->map[w] & mask) != 0) { + nm_ptr->map[w] &= ~mask; + nm_ptr->count--; + } +} + +/** + * tipc_nmap_diff - find differences between node maps + * @nm_a: input node map A + * @nm_b: input node map B + * @nm_diff: output node map A-B (i.e. nodes of A that are not in B) + */ +static void tipc_nmap_diff(struct tipc_node_map *nm_a, + struct tipc_node_map *nm_b, + struct tipc_node_map *nm_diff) +{ + int stop = ARRAY_SIZE(nm_a->map); + int w; + int b; + u32 map; + + memset(nm_diff, 0, sizeof(*nm_diff)); + for (w = 0; w < stop; w++) { + map = nm_a->map[w] ^ (nm_a->map[w] & nm_b->map[w]); + nm_diff->map[w] = map; + if (map != 0) { + for (b = 0 ; b < WSIZE; b++) { + if (map & (1 << b)) + nm_diff->count++; + } + } + } +} diff --git a/kernel/net/tipc/bcast.h b/kernel/net/tipc/bcast.h new file mode 100644 index 000000000..4bdc12277 --- /dev/null +++ b/kernel/net/tipc/bcast.h @@ -0,0 +1,136 @@ +/* + * net/tipc/bcast.h: Include file for TIPC broadcast code + * + * Copyright (c) 2003-2006, 2014-2015, Ericsson AB + * Copyright (c) 2005, 2010-2011, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _TIPC_BCAST_H +#define _TIPC_BCAST_H + +#include <linux/tipc_config.h> +#include "link.h" +#include "node.h" + +/** + * struct tipc_bcbearer_pair - a pair of bearers used by broadcast link + * @primary: pointer to primary bearer + * @secondary: pointer to secondary bearer + * + * Bearers must have same priority and same set of reachable destinations + * to be paired. + */ + +struct tipc_bcbearer_pair { + struct tipc_bearer *primary; + struct tipc_bearer *secondary; +}; + +#define BCBEARER MAX_BEARERS + +/** + * struct tipc_bcbearer - bearer used by broadcast link + * @bearer: (non-standard) broadcast bearer structure + * @media: (non-standard) broadcast media structure + * @bpairs: array of bearer pairs + * @bpairs_temp: temporary array of bearer pairs used by tipc_bcbearer_sort() + * @remains: temporary node map used by tipc_bcbearer_send() + * @remains_new: temporary node map used tipc_bcbearer_send() + * + * Note: The fields labelled "temporary" are incorporated into the bearer + * to avoid consuming potentially limited stack space through the use of + * large local variables within multicast routines. Concurrent access is + * prevented through use of the spinlock "bclink_lock". + */ +struct tipc_bcbearer { + struct tipc_bearer bearer; + struct tipc_media media; + struct tipc_bcbearer_pair bpairs[MAX_BEARERS]; + struct tipc_bcbearer_pair bpairs_temp[TIPC_MAX_LINK_PRI + 1]; + struct tipc_node_map remains; + struct tipc_node_map remains_new; +}; + +/** + * struct tipc_bclink - link used for broadcast messages + * @lock: spinlock governing access to structure + * @link: (non-standard) broadcast link structure + * @node: (non-standard) node structure representing b'cast link's peer node + * @bcast_nodes: map of broadcast-capable nodes + * @retransmit_to: node that most recently requested a retransmit + * + * Handles sequence numbering, fragmentation, bundling, etc. + */ +struct tipc_bclink { + spinlock_t lock; + struct tipc_link link; + struct tipc_node node; + struct sk_buff_head arrvq; + struct sk_buff_head inputq; + struct tipc_node_map bcast_nodes; + struct tipc_node *retransmit_to; +}; + +struct tipc_node; +extern const char tipc_bclink_name[]; + +/** + * tipc_nmap_equal - test for equality of node maps + */ +static inline int tipc_nmap_equal(struct tipc_node_map *nm_a, + struct tipc_node_map *nm_b) +{ + return !memcmp(nm_a, nm_b, sizeof(*nm_a)); +} + +int tipc_bclink_init(struct net *net); +void tipc_bclink_stop(struct net *net); +void tipc_bclink_add_node(struct net *net, u32 addr); +void tipc_bclink_remove_node(struct net *net, u32 addr); +struct tipc_node *tipc_bclink_retransmit_to(struct net *tn); +void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked); +void tipc_bclink_rcv(struct net *net, struct sk_buff *buf); +u32 tipc_bclink_get_last_sent(struct net *net); +u32 tipc_bclink_acks_missing(struct tipc_node *n_ptr); +void tipc_bclink_update_link_state(struct tipc_node *node, + u32 last_sent); +int tipc_bclink_reset_stats(struct net *net); +int tipc_bclink_set_queue_limits(struct net *net, u32 limit); +void tipc_bcbearer_sort(struct net *net, struct tipc_node_map *nm_ptr, + u32 node, bool action); +uint tipc_bclink_get_mtu(void); +int tipc_bclink_xmit(struct net *net, struct sk_buff_head *list); +void tipc_bclink_wakeup_users(struct net *net); +int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg); +void tipc_bclink_input(struct net *net); + +#endif diff --git a/kernel/net/tipc/bearer.c b/kernel/net/tipc/bearer.c new file mode 100644 index 000000000..70e3dacbf --- /dev/null +++ b/kernel/net/tipc/bearer.c @@ -0,0 +1,1025 @@ +/* + * net/tipc/bearer.c: TIPC bearer code + * + * Copyright (c) 1996-2006, 2013-2014, Ericsson AB + * Copyright (c) 2004-2006, 2010-2013, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <net/sock.h> +#include "core.h" +#include "bearer.h" +#include "link.h" +#include "discover.h" +#include "bcast.h" + +#define MAX_ADDR_STR 60 + +static struct tipc_media * const media_info_array[] = { + ð_media_info, +#ifdef CONFIG_TIPC_MEDIA_IB + &ib_media_info, +#endif +#ifdef CONFIG_TIPC_MEDIA_UDP + &udp_media_info, +#endif + NULL +}; + +static const struct nla_policy +tipc_nl_bearer_policy[TIPC_NLA_BEARER_MAX + 1] = { + [TIPC_NLA_BEARER_UNSPEC] = { .type = NLA_UNSPEC }, + [TIPC_NLA_BEARER_NAME] = { + .type = NLA_STRING, + .len = TIPC_MAX_BEARER_NAME + }, + [TIPC_NLA_BEARER_PROP] = { .type = NLA_NESTED }, + [TIPC_NLA_BEARER_DOMAIN] = { .type = NLA_U32 } +}; + +static const struct nla_policy tipc_nl_media_policy[TIPC_NLA_MEDIA_MAX + 1] = { + [TIPC_NLA_MEDIA_UNSPEC] = { .type = NLA_UNSPEC }, + [TIPC_NLA_MEDIA_NAME] = { .type = NLA_STRING }, + [TIPC_NLA_MEDIA_PROP] = { .type = NLA_NESTED } +}; + +static void bearer_disable(struct net *net, struct tipc_bearer *b_ptr, + bool shutting_down); + +/** + * tipc_media_find - locates specified media object by name + */ +struct tipc_media *tipc_media_find(const char *name) +{ + u32 i; + + for (i = 0; media_info_array[i] != NULL; i++) { + if (!strcmp(media_info_array[i]->name, name)) + break; + } + return media_info_array[i]; +} + +/** + * media_find_id - locates specified media object by type identifier + */ +static struct tipc_media *media_find_id(u8 type) +{ + u32 i; + + for (i = 0; media_info_array[i] != NULL; i++) { + if (media_info_array[i]->type_id == type) + break; + } + return media_info_array[i]; +} + +/** + * tipc_media_addr_printf - record media address in print buffer + */ +void tipc_media_addr_printf(char *buf, int len, struct tipc_media_addr *a) +{ + char addr_str[MAX_ADDR_STR]; + struct tipc_media *m_ptr; + int ret; + + m_ptr = media_find_id(a->media_id); + + if (m_ptr && !m_ptr->addr2str(a, addr_str, sizeof(addr_str))) + ret = scnprintf(buf, len, "%s(%s)", m_ptr->name, addr_str); + else { + u32 i; + + ret = scnprintf(buf, len, "UNKNOWN(%u)", a->media_id); + for (i = 0; i < sizeof(a->value); i++) + ret += scnprintf(buf - ret, len + ret, + "-%02x", a->value[i]); + } +} + +/** + * bearer_name_validate - validate & (optionally) deconstruct bearer name + * @name: ptr to bearer name string + * @name_parts: ptr to area for bearer name components (or NULL if not needed) + * + * Returns 1 if bearer name is valid, otherwise 0. + */ +static int bearer_name_validate(const char *name, + struct tipc_bearer_names *name_parts) +{ + char name_copy[TIPC_MAX_BEARER_NAME]; + char *media_name; + char *if_name; + u32 media_len; + u32 if_len; + + /* copy bearer name & ensure length is OK */ + name_copy[TIPC_MAX_BEARER_NAME - 1] = 0; + /* need above in case non-Posix strncpy() doesn't pad with nulls */ + strncpy(name_copy, name, TIPC_MAX_BEARER_NAME); + if (name_copy[TIPC_MAX_BEARER_NAME - 1] != 0) + return 0; + + /* ensure all component parts of bearer name are present */ + media_name = name_copy; + if_name = strchr(media_name, ':'); + if (if_name == NULL) + return 0; + *(if_name++) = 0; + media_len = if_name - media_name; + if_len = strlen(if_name) + 1; + + /* validate component parts of bearer name */ + if ((media_len <= 1) || (media_len > TIPC_MAX_MEDIA_NAME) || + (if_len <= 1) || (if_len > TIPC_MAX_IF_NAME)) + return 0; + + /* return bearer name components, if necessary */ + if (name_parts) { + strcpy(name_parts->media_name, media_name); + strcpy(name_parts->if_name, if_name); + } + return 1; +} + +/** + * tipc_bearer_find - locates bearer object with matching bearer name + */ +struct tipc_bearer *tipc_bearer_find(struct net *net, const char *name) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_bearer *b_ptr; + u32 i; + + for (i = 0; i < MAX_BEARERS; i++) { + b_ptr = rtnl_dereference(tn->bearer_list[i]); + if (b_ptr && (!strcmp(b_ptr->name, name))) + return b_ptr; + } + return NULL; +} + +void tipc_bearer_add_dest(struct net *net, u32 bearer_id, u32 dest) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_bearer *b_ptr; + + rcu_read_lock(); + b_ptr = rcu_dereference_rtnl(tn->bearer_list[bearer_id]); + if (b_ptr) { + tipc_bcbearer_sort(net, &b_ptr->nodes, dest, true); + tipc_disc_add_dest(b_ptr->link_req); + } + rcu_read_unlock(); +} + +void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_bearer *b_ptr; + + rcu_read_lock(); + b_ptr = rcu_dereference_rtnl(tn->bearer_list[bearer_id]); + if (b_ptr) { + tipc_bcbearer_sort(net, &b_ptr->nodes, dest, false); + tipc_disc_remove_dest(b_ptr->link_req); + } + rcu_read_unlock(); +} + +/** + * tipc_enable_bearer - enable bearer with the given name + */ +static int tipc_enable_bearer(struct net *net, const char *name, + u32 disc_domain, u32 priority, + struct nlattr *attr[]) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_bearer *b_ptr; + struct tipc_media *m_ptr; + struct tipc_bearer_names b_names; + char addr_string[16]; + u32 bearer_id; + u32 with_this_prio; + u32 i; + int res = -EINVAL; + + if (!tn->own_addr) { + pr_warn("Bearer <%s> rejected, not supported in standalone mode\n", + name); + return -ENOPROTOOPT; + } + if (!bearer_name_validate(name, &b_names)) { + pr_warn("Bearer <%s> rejected, illegal name\n", name); + return -EINVAL; + } + if (tipc_addr_domain_valid(disc_domain) && + (disc_domain != tn->own_addr)) { + if (tipc_in_scope(disc_domain, tn->own_addr)) { + disc_domain = tn->own_addr & TIPC_CLUSTER_MASK; + res = 0; /* accept any node in own cluster */ + } else if (in_own_cluster_exact(net, disc_domain)) + res = 0; /* accept specified node in own cluster */ + } + if (res) { + pr_warn("Bearer <%s> rejected, illegal discovery domain\n", + name); + return -EINVAL; + } + if ((priority > TIPC_MAX_LINK_PRI) && + (priority != TIPC_MEDIA_LINK_PRI)) { + pr_warn("Bearer <%s> rejected, illegal priority\n", name); + return -EINVAL; + } + + m_ptr = tipc_media_find(b_names.media_name); + if (!m_ptr) { + pr_warn("Bearer <%s> rejected, media <%s> not registered\n", + name, b_names.media_name); + return -EINVAL; + } + + if (priority == TIPC_MEDIA_LINK_PRI) + priority = m_ptr->priority; + +restart: + bearer_id = MAX_BEARERS; + with_this_prio = 1; + for (i = MAX_BEARERS; i-- != 0; ) { + b_ptr = rtnl_dereference(tn->bearer_list[i]); + if (!b_ptr) { + bearer_id = i; + continue; + } + if (!strcmp(name, b_ptr->name)) { + pr_warn("Bearer <%s> rejected, already enabled\n", + name); + return -EINVAL; + } + if ((b_ptr->priority == priority) && + (++with_this_prio > 2)) { + if (priority-- == 0) { + pr_warn("Bearer <%s> rejected, duplicate priority\n", + name); + return -EINVAL; + } + pr_warn("Bearer <%s> priority adjustment required %u->%u\n", + name, priority + 1, priority); + goto restart; + } + } + if (bearer_id >= MAX_BEARERS) { + pr_warn("Bearer <%s> rejected, bearer limit reached (%u)\n", + name, MAX_BEARERS); + return -EINVAL; + } + + b_ptr = kzalloc(sizeof(*b_ptr), GFP_ATOMIC); + if (!b_ptr) + return -ENOMEM; + + strcpy(b_ptr->name, name); + b_ptr->media = m_ptr; + res = m_ptr->enable_media(net, b_ptr, attr); + if (res) { + pr_warn("Bearer <%s> rejected, enable failure (%d)\n", + name, -res); + return -EINVAL; + } + + b_ptr->identity = bearer_id; + b_ptr->tolerance = m_ptr->tolerance; + b_ptr->window = m_ptr->window; + b_ptr->domain = disc_domain; + b_ptr->net_plane = bearer_id + 'A'; + b_ptr->priority = priority; + + res = tipc_disc_create(net, b_ptr, &b_ptr->bcast_addr); + if (res) { + bearer_disable(net, b_ptr, false); + pr_warn("Bearer <%s> rejected, discovery object creation failed\n", + name); + return -EINVAL; + } + + rcu_assign_pointer(tn->bearer_list[bearer_id], b_ptr); + + pr_info("Enabled bearer <%s>, discovery domain %s, priority %u\n", + name, + tipc_addr_string_fill(addr_string, disc_domain), priority); + return res; +} + +/** + * tipc_reset_bearer - Reset all links established over this bearer + */ +static int tipc_reset_bearer(struct net *net, struct tipc_bearer *b_ptr) +{ + pr_info("Resetting bearer <%s>\n", b_ptr->name); + tipc_link_reset_list(net, b_ptr->identity); + tipc_disc_reset(net, b_ptr); + return 0; +} + +/** + * bearer_disable + * + * Note: This routine assumes caller holds RTNL lock. + */ +static void bearer_disable(struct net *net, struct tipc_bearer *b_ptr, + bool shutting_down) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + u32 i; + + pr_info("Disabling bearer <%s>\n", b_ptr->name); + b_ptr->media->disable_media(b_ptr); + + tipc_link_delete_list(net, b_ptr->identity, shutting_down); + if (b_ptr->link_req) + tipc_disc_delete(b_ptr->link_req); + + for (i = 0; i < MAX_BEARERS; i++) { + if (b_ptr == rtnl_dereference(tn->bearer_list[i])) { + RCU_INIT_POINTER(tn->bearer_list[i], NULL); + break; + } + } + kfree_rcu(b_ptr, rcu); +} + +int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b, + struct nlattr *attr[]) +{ + struct net_device *dev; + char *driver_name = strchr((const char *)b->name, ':') + 1; + + /* Find device with specified name */ + dev = dev_get_by_name(net, driver_name); + if (!dev) + return -ENODEV; + + /* Associate TIPC bearer with L2 bearer */ + rcu_assign_pointer(b->media_ptr, dev); + memset(&b->bcast_addr, 0, sizeof(b->bcast_addr)); + memcpy(b->bcast_addr.value, dev->broadcast, b->media->hwaddr_len); + b->bcast_addr.media_id = b->media->type_id; + b->bcast_addr.broadcast = 1; + b->mtu = dev->mtu; + b->media->raw2addr(b, &b->addr, (char *)dev->dev_addr); + rcu_assign_pointer(dev->tipc_ptr, b); + return 0; +} + +/* tipc_disable_l2_media - detach TIPC bearer from an L2 interface + * + * Mark L2 bearer as inactive so that incoming buffers are thrown away, + * then get worker thread to complete bearer cleanup. (Can't do cleanup + * here because cleanup code needs to sleep and caller holds spinlocks.) + */ +void tipc_disable_l2_media(struct tipc_bearer *b) +{ + struct net_device *dev; + + dev = (struct net_device *)rtnl_dereference(b->media_ptr); + RCU_INIT_POINTER(b->media_ptr, NULL); + RCU_INIT_POINTER(dev->tipc_ptr, NULL); + synchronize_net(); + dev_put(dev); +} + +/** + * tipc_l2_send_msg - send a TIPC packet out over an L2 interface + * @buf: the packet to be sent + * @b_ptr: the bearer through which the packet is to be sent + * @dest: peer destination address + */ +int tipc_l2_send_msg(struct net *net, struct sk_buff *buf, + struct tipc_bearer *b, struct tipc_media_addr *dest) +{ + struct sk_buff *clone; + struct net_device *dev; + int delta; + + dev = (struct net_device *)rcu_dereference_rtnl(b->media_ptr); + if (!dev) + return 0; + + clone = skb_clone(buf, GFP_ATOMIC); + if (!clone) + return 0; + + delta = dev->hard_header_len - skb_headroom(buf); + if ((delta > 0) && + pskb_expand_head(clone, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) { + kfree_skb(clone); + return 0; + } + + skb_reset_network_header(clone); + clone->dev = dev; + clone->protocol = htons(ETH_P_TIPC); + dev_hard_header(clone, dev, ETH_P_TIPC, dest->value, + dev->dev_addr, clone->len); + dev_queue_xmit(clone); + return 0; +} + +/* tipc_bearer_send- sends buffer to destination over bearer + * + * IMPORTANT: + * The media send routine must not alter the buffer being passed in + * as it may be needed for later retransmission! + */ +void tipc_bearer_send(struct net *net, u32 bearer_id, struct sk_buff *buf, + struct tipc_media_addr *dest) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_bearer *b_ptr; + + rcu_read_lock(); + b_ptr = rcu_dereference_rtnl(tn->bearer_list[bearer_id]); + if (likely(b_ptr)) + b_ptr->media->send_msg(net, buf, b_ptr, dest); + rcu_read_unlock(); +} + +/** + * tipc_l2_rcv_msg - handle incoming TIPC message from an interface + * @buf: the received packet + * @dev: the net device that the packet was received on + * @pt: the packet_type structure which was used to register this handler + * @orig_dev: the original receive net device in case the device is a bond + * + * Accept only packets explicitly sent to this node, or broadcast packets; + * ignores packets sent using interface multicast, and traffic sent to other + * nodes (which can happen if interface is running in promiscuous mode). + */ +static int tipc_l2_rcv_msg(struct sk_buff *buf, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev) +{ + struct tipc_bearer *b_ptr; + + rcu_read_lock(); + b_ptr = rcu_dereference_rtnl(dev->tipc_ptr); + if (likely(b_ptr)) { + if (likely(buf->pkt_type <= PACKET_BROADCAST)) { + buf->next = NULL; + tipc_rcv(dev_net(dev), buf, b_ptr); + rcu_read_unlock(); + return NET_RX_SUCCESS; + } + } + rcu_read_unlock(); + + kfree_skb(buf); + return NET_RX_DROP; +} + +/** + * tipc_l2_device_event - handle device events from network device + * @nb: the context of the notification + * @evt: the type of event + * @ptr: the net device that the event was on + * + * This function is called by the Ethernet driver in case of link + * change event. + */ +static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt, + void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct net *net = dev_net(dev); + struct tipc_bearer *b_ptr; + + b_ptr = rtnl_dereference(dev->tipc_ptr); + if (!b_ptr) + return NOTIFY_DONE; + + b_ptr->mtu = dev->mtu; + + switch (evt) { + case NETDEV_CHANGE: + if (netif_carrier_ok(dev)) + break; + case NETDEV_DOWN: + case NETDEV_CHANGEMTU: + tipc_reset_bearer(net, b_ptr); + break; + case NETDEV_CHANGEADDR: + b_ptr->media->raw2addr(b_ptr, &b_ptr->addr, + (char *)dev->dev_addr); + tipc_reset_bearer(net, b_ptr); + break; + case NETDEV_UNREGISTER: + case NETDEV_CHANGENAME: + bearer_disable(dev_net(dev), b_ptr, false); + break; + } + return NOTIFY_OK; +} + +static struct packet_type tipc_packet_type __read_mostly = { + .type = htons(ETH_P_TIPC), + .func = tipc_l2_rcv_msg, +}; + +static struct notifier_block notifier = { + .notifier_call = tipc_l2_device_event, + .priority = 0, +}; + +int tipc_bearer_setup(void) +{ + int err; + + err = register_netdevice_notifier(¬ifier); + if (err) + return err; + dev_add_pack(&tipc_packet_type); + return 0; +} + +void tipc_bearer_cleanup(void) +{ + unregister_netdevice_notifier(¬ifier); + dev_remove_pack(&tipc_packet_type); +} + +void tipc_bearer_stop(struct net *net) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_bearer *b_ptr; + u32 i; + + for (i = 0; i < MAX_BEARERS; i++) { + b_ptr = rtnl_dereference(tn->bearer_list[i]); + if (b_ptr) { + bearer_disable(net, b_ptr, true); + tn->bearer_list[i] = NULL; + } + } +} + +/* Caller should hold rtnl_lock to protect the bearer */ +static int __tipc_nl_add_bearer(struct tipc_nl_msg *msg, + struct tipc_bearer *bearer, int nlflags) +{ + void *hdr; + struct nlattr *attrs; + struct nlattr *prop; + + hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, + nlflags, TIPC_NL_BEARER_GET); + if (!hdr) + return -EMSGSIZE; + + attrs = nla_nest_start(msg->skb, TIPC_NLA_BEARER); + if (!attrs) + goto msg_full; + + if (nla_put_string(msg->skb, TIPC_NLA_BEARER_NAME, bearer->name)) + goto attr_msg_full; + + prop = nla_nest_start(msg->skb, TIPC_NLA_BEARER_PROP); + if (!prop) + goto prop_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_PRIO, bearer->priority)) + goto prop_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_TOL, bearer->tolerance)) + goto prop_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, bearer->window)) + goto prop_msg_full; + + nla_nest_end(msg->skb, prop); + nla_nest_end(msg->skb, attrs); + genlmsg_end(msg->skb, hdr); + + return 0; + +prop_msg_full: + nla_nest_cancel(msg->skb, prop); +attr_msg_full: + nla_nest_cancel(msg->skb, attrs); +msg_full: + genlmsg_cancel(msg->skb, hdr); + + return -EMSGSIZE; +} + +int tipc_nl_bearer_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + int err; + int i = cb->args[0]; + struct tipc_bearer *bearer; + struct tipc_nl_msg msg; + struct net *net = sock_net(skb->sk); + struct tipc_net *tn = net_generic(net, tipc_net_id); + + if (i == MAX_BEARERS) + return 0; + + msg.skb = skb; + msg.portid = NETLINK_CB(cb->skb).portid; + msg.seq = cb->nlh->nlmsg_seq; + + rtnl_lock(); + for (i = 0; i < MAX_BEARERS; i++) { + bearer = rtnl_dereference(tn->bearer_list[i]); + if (!bearer) + continue; + + err = __tipc_nl_add_bearer(&msg, bearer, NLM_F_MULTI); + if (err) + break; + } + rtnl_unlock(); + + cb->args[0] = i; + return skb->len; +} + +int tipc_nl_bearer_get(struct sk_buff *skb, struct genl_info *info) +{ + int err; + char *name; + struct sk_buff *rep; + struct tipc_bearer *bearer; + struct tipc_nl_msg msg; + struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1]; + struct net *net = genl_info_net(info); + + if (!info->attrs[TIPC_NLA_BEARER]) + return -EINVAL; + + err = nla_parse_nested(attrs, TIPC_NLA_BEARER_MAX, + info->attrs[TIPC_NLA_BEARER], + tipc_nl_bearer_policy); + if (err) + return err; + + if (!attrs[TIPC_NLA_BEARER_NAME]) + return -EINVAL; + name = nla_data(attrs[TIPC_NLA_BEARER_NAME]); + + rep = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!rep) + return -ENOMEM; + + msg.skb = rep; + msg.portid = info->snd_portid; + msg.seq = info->snd_seq; + + rtnl_lock(); + bearer = tipc_bearer_find(net, name); + if (!bearer) { + err = -EINVAL; + goto err_out; + } + + err = __tipc_nl_add_bearer(&msg, bearer, 0); + if (err) + goto err_out; + rtnl_unlock(); + + return genlmsg_reply(rep, info); +err_out: + rtnl_unlock(); + nlmsg_free(rep); + + return err; +} + +int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info) +{ + int err; + char *name; + struct tipc_bearer *bearer; + struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1]; + struct net *net = sock_net(skb->sk); + + if (!info->attrs[TIPC_NLA_BEARER]) + return -EINVAL; + + err = nla_parse_nested(attrs, TIPC_NLA_BEARER_MAX, + info->attrs[TIPC_NLA_BEARER], + tipc_nl_bearer_policy); + if (err) + return err; + + if (!attrs[TIPC_NLA_BEARER_NAME]) + return -EINVAL; + + name = nla_data(attrs[TIPC_NLA_BEARER_NAME]); + + rtnl_lock(); + bearer = tipc_bearer_find(net, name); + if (!bearer) { + rtnl_unlock(); + return -EINVAL; + } + + bearer_disable(net, bearer, false); + rtnl_unlock(); + + return 0; +} + +int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info) +{ + int err; + char *bearer; + struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1]; + struct net *net = sock_net(skb->sk); + struct tipc_net *tn = net_generic(net, tipc_net_id); + u32 domain; + u32 prio; + + prio = TIPC_MEDIA_LINK_PRI; + domain = tn->own_addr & TIPC_CLUSTER_MASK; + + if (!info->attrs[TIPC_NLA_BEARER]) + return -EINVAL; + + err = nla_parse_nested(attrs, TIPC_NLA_BEARER_MAX, + info->attrs[TIPC_NLA_BEARER], + tipc_nl_bearer_policy); + if (err) + return err; + + if (!attrs[TIPC_NLA_BEARER_NAME]) + return -EINVAL; + + bearer = nla_data(attrs[TIPC_NLA_BEARER_NAME]); + + if (attrs[TIPC_NLA_BEARER_DOMAIN]) + domain = nla_get_u32(attrs[TIPC_NLA_BEARER_DOMAIN]); + + if (attrs[TIPC_NLA_BEARER_PROP]) { + struct nlattr *props[TIPC_NLA_PROP_MAX + 1]; + + err = tipc_nl_parse_link_prop(attrs[TIPC_NLA_BEARER_PROP], + props); + if (err) + return err; + + if (props[TIPC_NLA_PROP_PRIO]) + prio = nla_get_u32(props[TIPC_NLA_PROP_PRIO]); + } + + rtnl_lock(); + err = tipc_enable_bearer(net, bearer, domain, prio, attrs); + if (err) { + rtnl_unlock(); + return err; + } + rtnl_unlock(); + + return 0; +} + +int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info) +{ + int err; + char *name; + struct tipc_bearer *b; + struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1]; + struct net *net = genl_info_net(info); + + if (!info->attrs[TIPC_NLA_BEARER]) + return -EINVAL; + + err = nla_parse_nested(attrs, TIPC_NLA_BEARER_MAX, + info->attrs[TIPC_NLA_BEARER], + tipc_nl_bearer_policy); + if (err) + return err; + + if (!attrs[TIPC_NLA_BEARER_NAME]) + return -EINVAL; + name = nla_data(attrs[TIPC_NLA_BEARER_NAME]); + + rtnl_lock(); + b = tipc_bearer_find(net, name); + if (!b) { + rtnl_unlock(); + return -EINVAL; + } + + if (attrs[TIPC_NLA_BEARER_PROP]) { + struct nlattr *props[TIPC_NLA_PROP_MAX + 1]; + + err = tipc_nl_parse_link_prop(attrs[TIPC_NLA_BEARER_PROP], + props); + if (err) { + rtnl_unlock(); + return err; + } + + if (props[TIPC_NLA_PROP_TOL]) + b->tolerance = nla_get_u32(props[TIPC_NLA_PROP_TOL]); + if (props[TIPC_NLA_PROP_PRIO]) + b->priority = nla_get_u32(props[TIPC_NLA_PROP_PRIO]); + if (props[TIPC_NLA_PROP_WIN]) + b->window = nla_get_u32(props[TIPC_NLA_PROP_WIN]); + } + rtnl_unlock(); + + return 0; +} + +static int __tipc_nl_add_media(struct tipc_nl_msg *msg, + struct tipc_media *media, int nlflags) +{ + void *hdr; + struct nlattr *attrs; + struct nlattr *prop; + + hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, + nlflags, TIPC_NL_MEDIA_GET); + if (!hdr) + return -EMSGSIZE; + + attrs = nla_nest_start(msg->skb, TIPC_NLA_MEDIA); + if (!attrs) + goto msg_full; + + if (nla_put_string(msg->skb, TIPC_NLA_MEDIA_NAME, media->name)) + goto attr_msg_full; + + prop = nla_nest_start(msg->skb, TIPC_NLA_MEDIA_PROP); + if (!prop) + goto prop_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_PRIO, media->priority)) + goto prop_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_TOL, media->tolerance)) + goto prop_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, media->window)) + goto prop_msg_full; + + nla_nest_end(msg->skb, prop); + nla_nest_end(msg->skb, attrs); + genlmsg_end(msg->skb, hdr); + + return 0; + +prop_msg_full: + nla_nest_cancel(msg->skb, prop); +attr_msg_full: + nla_nest_cancel(msg->skb, attrs); +msg_full: + genlmsg_cancel(msg->skb, hdr); + + return -EMSGSIZE; +} + +int tipc_nl_media_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + int err; + int i = cb->args[0]; + struct tipc_nl_msg msg; + + if (i == MAX_MEDIA) + return 0; + + msg.skb = skb; + msg.portid = NETLINK_CB(cb->skb).portid; + msg.seq = cb->nlh->nlmsg_seq; + + rtnl_lock(); + for (; media_info_array[i] != NULL; i++) { + err = __tipc_nl_add_media(&msg, media_info_array[i], + NLM_F_MULTI); + if (err) + break; + } + rtnl_unlock(); + + cb->args[0] = i; + return skb->len; +} + +int tipc_nl_media_get(struct sk_buff *skb, struct genl_info *info) +{ + int err; + char *name; + struct tipc_nl_msg msg; + struct tipc_media *media; + struct sk_buff *rep; + struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1]; + + if (!info->attrs[TIPC_NLA_MEDIA]) + return -EINVAL; + + err = nla_parse_nested(attrs, TIPC_NLA_MEDIA_MAX, + info->attrs[TIPC_NLA_MEDIA], + tipc_nl_media_policy); + if (err) + return err; + + if (!attrs[TIPC_NLA_MEDIA_NAME]) + return -EINVAL; + name = nla_data(attrs[TIPC_NLA_MEDIA_NAME]); + + rep = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!rep) + return -ENOMEM; + + msg.skb = rep; + msg.portid = info->snd_portid; + msg.seq = info->snd_seq; + + rtnl_lock(); + media = tipc_media_find(name); + if (!media) { + err = -EINVAL; + goto err_out; + } + + err = __tipc_nl_add_media(&msg, media, 0); + if (err) + goto err_out; + rtnl_unlock(); + + return genlmsg_reply(rep, info); +err_out: + rtnl_unlock(); + nlmsg_free(rep); + + return err; +} + +int tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info) +{ + int err; + char *name; + struct tipc_media *m; + struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1]; + + if (!info->attrs[TIPC_NLA_MEDIA]) + return -EINVAL; + + err = nla_parse_nested(attrs, TIPC_NLA_MEDIA_MAX, + info->attrs[TIPC_NLA_MEDIA], + tipc_nl_media_policy); + + if (!attrs[TIPC_NLA_MEDIA_NAME]) + return -EINVAL; + name = nla_data(attrs[TIPC_NLA_MEDIA_NAME]); + + rtnl_lock(); + m = tipc_media_find(name); + if (!m) { + rtnl_unlock(); + return -EINVAL; + } + + if (attrs[TIPC_NLA_MEDIA_PROP]) { + struct nlattr *props[TIPC_NLA_PROP_MAX + 1]; + + err = tipc_nl_parse_link_prop(attrs[TIPC_NLA_MEDIA_PROP], + props); + if (err) { + rtnl_unlock(); + return err; + } + + if (props[TIPC_NLA_PROP_TOL]) + m->tolerance = nla_get_u32(props[TIPC_NLA_PROP_TOL]); + if (props[TIPC_NLA_PROP_PRIO]) + m->priority = nla_get_u32(props[TIPC_NLA_PROP_PRIO]); + if (props[TIPC_NLA_PROP_WIN]) + m->window = nla_get_u32(props[TIPC_NLA_PROP_WIN]); + } + rtnl_unlock(); + + return 0; +} diff --git a/kernel/net/tipc/bearer.h b/kernel/net/tipc/bearer.h new file mode 100644 index 000000000..5cad243ee --- /dev/null +++ b/kernel/net/tipc/bearer.h @@ -0,0 +1,221 @@ +/* + * net/tipc/bearer.h: Include file for TIPC bearer code + * + * Copyright (c) 1996-2006, 2013-2014, Ericsson AB + * Copyright (c) 2005, 2010-2011, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _TIPC_BEARER_H +#define _TIPC_BEARER_H + +#include "netlink.h" +#include <net/genetlink.h> + +#define MAX_BEARERS 2 +#define MAX_MEDIA 3 +#define MAX_NODES 4096 +#define WSIZE 32 + +/* Identifiers associated with TIPC message header media address info + * - address info field is 32 bytes long + * - the field's actual content and length is defined per media + * - remaining unused bytes in the field are set to zero + */ +#define TIPC_MEDIA_INFO_SIZE 32 +#define TIPC_MEDIA_TYPE_OFFSET 3 +#define TIPC_MEDIA_ADDR_OFFSET 4 + +/* + * Identifiers of supported TIPC media types + */ +#define TIPC_MEDIA_TYPE_ETH 1 +#define TIPC_MEDIA_TYPE_IB 2 +#define TIPC_MEDIA_TYPE_UDP 3 + +/** + * struct tipc_node_map - set of node identifiers + * @count: # of nodes in set + * @map: bitmap of node identifiers that are in the set + */ +struct tipc_node_map { + u32 count; + u32 map[MAX_NODES / WSIZE]; +}; + +/** + * struct tipc_media_addr - destination address used by TIPC bearers + * @value: address info (format defined by media) + * @media_id: TIPC media type identifier + * @broadcast: non-zero if address is a broadcast address + */ +struct tipc_media_addr { + u8 value[TIPC_MEDIA_INFO_SIZE]; + u8 media_id; + u8 broadcast; +}; + +struct tipc_bearer; + +/** + * struct tipc_media - Media specific info exposed to generic bearer layer + * @send_msg: routine which handles buffer transmission + * @enable_media: routine which enables a media + * @disable_media: routine which disables a media + * @addr2str: convert media address format to string + * @addr2msg: convert from media addr format to discovery msg addr format + * @msg2addr: convert from discovery msg addr format to media addr format + * @raw2addr: convert from raw addr format to media addr format + * @priority: default link (and bearer) priority + * @tolerance: default time (in ms) before declaring link failure + * @window: default window (in packets) before declaring link congestion + * @type_id: TIPC media identifier + * @hwaddr_len: TIPC media address len + * @name: media name + */ +struct tipc_media { + int (*send_msg)(struct net *net, struct sk_buff *buf, + struct tipc_bearer *b_ptr, + struct tipc_media_addr *dest); + int (*enable_media)(struct net *net, struct tipc_bearer *b_ptr, + struct nlattr *attr[]); + void (*disable_media)(struct tipc_bearer *b_ptr); + int (*addr2str)(struct tipc_media_addr *addr, + char *strbuf, + int bufsz); + int (*addr2msg)(char *msg, struct tipc_media_addr *addr); + int (*msg2addr)(struct tipc_bearer *b, + struct tipc_media_addr *addr, + char *msg); + int (*raw2addr)(struct tipc_bearer *b, + struct tipc_media_addr *addr, + char *raw); + u32 priority; + u32 tolerance; + u32 window; + u32 type_id; + u32 hwaddr_len; + char name[TIPC_MAX_MEDIA_NAME]; +}; + +/** + * struct tipc_bearer - Generic TIPC bearer structure + * @media_ptr: pointer to additional media-specific information about bearer + * @mtu: max packet size bearer can support + * @addr: media-specific address associated with bearer + * @name: bearer name (format = media:interface) + * @media: ptr to media structure associated with bearer + * @bcast_addr: media address used in broadcasting + * @rcu: rcu struct for tipc_bearer + * @priority: default link priority for bearer + * @window: default window size for bearer + * @tolerance: default link tolerance for bearer + * @domain: network domain to which links can be established + * @identity: array index of this bearer within TIPC bearer array + * @link_req: ptr to (optional) structure making periodic link setup requests + * @net_plane: network plane ('A' through 'H') currently associated with bearer + * @nodes: indicates which nodes in cluster can be reached through bearer + * + * Note: media-specific code is responsible for initialization of the fields + * indicated below when a bearer is enabled; TIPC's generic bearer code takes + * care of initializing all other fields. + */ +struct tipc_bearer { + void __rcu *media_ptr; /* initalized by media */ + u32 mtu; /* initalized by media */ + struct tipc_media_addr addr; /* initalized by media */ + char name[TIPC_MAX_BEARER_NAME]; + struct tipc_media *media; + struct tipc_media_addr bcast_addr; + struct rcu_head rcu; + u32 priority; + u32 window; + u32 tolerance; + u32 domain; + u32 identity; + struct tipc_link_req *link_req; + char net_plane; + struct tipc_node_map nodes; +}; + +struct tipc_bearer_names { + char media_name[TIPC_MAX_MEDIA_NAME]; + char if_name[TIPC_MAX_IF_NAME]; +}; + +/* + * TIPC routines available to supported media types + */ + +void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr); + +/* + * Routines made available to TIPC by supported media types + */ +extern struct tipc_media eth_media_info; + +#ifdef CONFIG_TIPC_MEDIA_IB +extern struct tipc_media ib_media_info; +#endif +#ifdef CONFIG_TIPC_MEDIA_UDP +extern struct tipc_media udp_media_info; +#endif + +int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info); +int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info); +int tipc_nl_bearer_dump(struct sk_buff *skb, struct netlink_callback *cb); +int tipc_nl_bearer_get(struct sk_buff *skb, struct genl_info *info); +int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info); + +int tipc_nl_media_dump(struct sk_buff *skb, struct netlink_callback *cb); +int tipc_nl_media_get(struct sk_buff *skb, struct genl_info *info); +int tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info); + +int tipc_media_set_priority(const char *name, u32 new_value); +int tipc_media_set_window(const char *name, u32 new_value); +void tipc_media_addr_printf(char *buf, int len, struct tipc_media_addr *a); +int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b, + struct nlattr *attrs[]); +void tipc_disable_l2_media(struct tipc_bearer *b); +int tipc_l2_send_msg(struct net *net, struct sk_buff *buf, + struct tipc_bearer *b, struct tipc_media_addr *dest); + +void tipc_bearer_add_dest(struct net *net, u32 bearer_id, u32 dest); +void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest); +struct tipc_bearer *tipc_bearer_find(struct net *net, const char *name); +struct tipc_media *tipc_media_find(const char *name); +int tipc_bearer_setup(void); +void tipc_bearer_cleanup(void); +void tipc_bearer_stop(struct net *net); +void tipc_bearer_send(struct net *net, u32 bearer_id, struct sk_buff *buf, + struct tipc_media_addr *dest); + +#endif /* _TIPC_BEARER_H */ diff --git a/kernel/net/tipc/core.c b/kernel/net/tipc/core.c new file mode 100644 index 000000000..be1c9fa60 --- /dev/null +++ b/kernel/net/tipc/core.c @@ -0,0 +1,169 @@ +/* + * net/tipc/core.c: TIPC module code + * + * Copyright (c) 2003-2006, 2013, Ericsson AB + * Copyright (c) 2005-2006, 2010-2013, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include "core.h" +#include "name_table.h" +#include "subscr.h" +#include "bearer.h" +#include "net.h" +#include "socket.h" + +#include <linux/module.h> + +/* configurable TIPC parameters */ +int tipc_net_id __read_mostly; +int sysctl_tipc_rmem[3] __read_mostly; /* min/default/max */ + +static int __net_init tipc_init_net(struct net *net) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + int err; + + tn->net_id = 4711; + tn->own_addr = 0; + get_random_bytes(&tn->random, sizeof(int)); + INIT_LIST_HEAD(&tn->node_list); + spin_lock_init(&tn->node_list_lock); + + err = tipc_sk_rht_init(net); + if (err) + goto out_sk_rht; + + err = tipc_nametbl_init(net); + if (err) + goto out_nametbl; + + err = tipc_subscr_start(net); + if (err) + goto out_subscr; + return 0; + +out_subscr: + tipc_nametbl_stop(net); +out_nametbl: + tipc_sk_rht_destroy(net); +out_sk_rht: + return err; +} + +static void __net_exit tipc_exit_net(struct net *net) +{ + tipc_subscr_stop(net); + tipc_net_stop(net); + tipc_nametbl_stop(net); + tipc_sk_rht_destroy(net); +} + +static struct pernet_operations tipc_net_ops = { + .init = tipc_init_net, + .exit = tipc_exit_net, + .id = &tipc_net_id, + .size = sizeof(struct tipc_net), +}; + +static int __init tipc_init(void) +{ + int err; + + pr_info("Activated (version " TIPC_MOD_VER ")\n"); + + sysctl_tipc_rmem[0] = TIPC_CONN_OVERLOAD_LIMIT >> 4 << + TIPC_LOW_IMPORTANCE; + sysctl_tipc_rmem[1] = TIPC_CONN_OVERLOAD_LIMIT >> 4 << + TIPC_CRITICAL_IMPORTANCE; + sysctl_tipc_rmem[2] = TIPC_CONN_OVERLOAD_LIMIT; + + err = tipc_netlink_start(); + if (err) + goto out_netlink; + + err = tipc_netlink_compat_start(); + if (err) + goto out_netlink_compat; + + err = tipc_socket_init(); + if (err) + goto out_socket; + + err = tipc_register_sysctl(); + if (err) + goto out_sysctl; + + err = register_pernet_subsys(&tipc_net_ops); + if (err) + goto out_pernet; + + err = tipc_bearer_setup(); + if (err) + goto out_bearer; + + pr_info("Started in single node mode\n"); + return 0; +out_bearer: + unregister_pernet_subsys(&tipc_net_ops); +out_pernet: + tipc_unregister_sysctl(); +out_sysctl: + tipc_socket_stop(); +out_socket: + tipc_netlink_compat_stop(); +out_netlink_compat: + tipc_netlink_stop(); +out_netlink: + pr_err("Unable to start in single node mode\n"); + return err; +} + +static void __exit tipc_exit(void) +{ + tipc_bearer_cleanup(); + unregister_pernet_subsys(&tipc_net_ops); + tipc_netlink_stop(); + tipc_netlink_compat_stop(); + tipc_socket_stop(); + tipc_unregister_sysctl(); + + pr_info("Deactivated\n"); +} + +module_init(tipc_init); +module_exit(tipc_exit); + +MODULE_DESCRIPTION("TIPC: Transparent Inter Process Communication"); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_VERSION(TIPC_MOD_VER); diff --git a/kernel/net/tipc/core.h b/kernel/net/tipc/core.h new file mode 100644 index 000000000..3dc68c7a9 --- /dev/null +++ b/kernel/net/tipc/core.h @@ -0,0 +1,116 @@ +/* + * net/tipc/core.h: Include file for TIPC global declarations + * + * Copyright (c) 2005-2006, 2013 Ericsson AB + * Copyright (c) 2005-2007, 2010-2013, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _TIPC_CORE_H +#define _TIPC_CORE_H + +#include <linux/tipc.h> +#include <linux/tipc_config.h> +#include <linux/tipc_netlink.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/mm.h> +#include <linux/timer.h> +#include <linux/string.h> +#include <linux/uaccess.h> +#include <linux/interrupt.h> +#include <linux/atomic.h> +#include <asm/hardirq.h> +#include <linux/netdevice.h> +#include <linux/in.h> +#include <linux/list.h> +#include <linux/slab.h> +#include <linux/vmalloc.h> +#include <linux/rtnetlink.h> +#include <linux/etherdevice.h> +#include <net/netns/generic.h> +#include <linux/rhashtable.h> + +#include "node.h" +#include "bearer.h" +#include "bcast.h" +#include "netlink.h" +#include "link.h" +#include "node.h" +#include "msg.h" + +#define TIPC_MOD_VER "2.0.0" + +extern int tipc_net_id __read_mostly; +extern int sysctl_tipc_rmem[3] __read_mostly; +extern int sysctl_tipc_named_timeout __read_mostly; + +struct tipc_net { + u32 own_addr; + int net_id; + int random; + + /* Node table and node list */ + spinlock_t node_list_lock; + struct hlist_head node_htable[NODE_HTABLE_SIZE]; + struct list_head node_list; + u32 num_nodes; + u32 num_links; + + /* Bearer list */ + struct tipc_bearer __rcu *bearer_list[MAX_BEARERS + 1]; + + /* Broadcast link */ + struct tipc_bcbearer *bcbearer; + struct tipc_bclink *bclink; + struct tipc_link *bcl; + + /* Socket hash table */ + struct rhashtable sk_rht; + + /* Name table */ + spinlock_t nametbl_lock; + struct name_table *nametbl; + + /* Topology subscription server */ + struct tipc_server *topsrv; + atomic_t subscription_count; +}; + +#ifdef CONFIG_SYSCTL +int tipc_register_sysctl(void); +void tipc_unregister_sysctl(void); +#else +#define tipc_register_sysctl() 0 +#define tipc_unregister_sysctl() +#endif +#endif diff --git a/kernel/net/tipc/discover.c b/kernel/net/tipc/discover.c new file mode 100644 index 000000000..967e292f5 --- /dev/null +++ b/kernel/net/tipc/discover.c @@ -0,0 +1,418 @@ +/* + * net/tipc/discover.c + * + * Copyright (c) 2003-2006, 2014-2015, Ericsson AB + * Copyright (c) 2005-2006, 2010-2011, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "core.h" +#include "link.h" +#include "discover.h" + +/* min delay during bearer start up */ +#define TIPC_LINK_REQ_INIT msecs_to_jiffies(125) +/* max delay if bearer has no links */ +#define TIPC_LINK_REQ_FAST msecs_to_jiffies(1000) +/* max delay if bearer has links */ +#define TIPC_LINK_REQ_SLOW msecs_to_jiffies(60000) +/* indicates no timer in use */ +#define TIPC_LINK_REQ_INACTIVE 0xffffffff + +/** + * struct tipc_link_req - information about an ongoing link setup request + * @bearer_id: identity of bearer issuing requests + * @net: network namespace instance + * @dest: destination address for request messages + * @domain: network domain to which links can be established + * @num_nodes: number of nodes currently discovered (i.e. with an active link) + * @lock: spinlock for controlling access to requests + * @buf: request message to be (repeatedly) sent + * @timer: timer governing period between requests + * @timer_intv: current interval between requests (in ms) + */ +struct tipc_link_req { + u32 bearer_id; + struct tipc_media_addr dest; + struct net *net; + u32 domain; + int num_nodes; + spinlock_t lock; + struct sk_buff *buf; + struct timer_list timer; + unsigned long timer_intv; +}; + +/** + * tipc_disc_init_msg - initialize a link setup message + * @net: the applicable net namespace + * @type: message type (request or response) + * @b_ptr: ptr to bearer issuing message + */ +static void tipc_disc_init_msg(struct net *net, struct sk_buff *buf, u32 type, + struct tipc_bearer *b_ptr) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_msg *msg; + u32 dest_domain = b_ptr->domain; + + msg = buf_msg(buf); + tipc_msg_init(tn->own_addr, msg, LINK_CONFIG, type, + MAX_H_SIZE, dest_domain); + msg_set_non_seq(msg, 1); + msg_set_node_sig(msg, tn->random); + msg_set_node_capabilities(msg, 0); + msg_set_dest_domain(msg, dest_domain); + msg_set_bc_netid(msg, tn->net_id); + b_ptr->media->addr2msg(msg_media_addr(msg), &b_ptr->addr); +} + +/** + * disc_dupl_alert - issue node address duplication alert + * @b_ptr: pointer to bearer detecting duplication + * @node_addr: duplicated node address + * @media_addr: media address advertised by duplicated node + */ +static void disc_dupl_alert(struct tipc_bearer *b_ptr, u32 node_addr, + struct tipc_media_addr *media_addr) +{ + char node_addr_str[16]; + char media_addr_str[64]; + + tipc_addr_string_fill(node_addr_str, node_addr); + tipc_media_addr_printf(media_addr_str, sizeof(media_addr_str), + media_addr); + pr_warn("Duplicate %s using %s seen on <%s>\n", node_addr_str, + media_addr_str, b_ptr->name); +} + +/** + * tipc_disc_rcv - handle incoming discovery message (request or response) + * @net: the applicable net namespace + * @buf: buffer containing message + * @bearer: bearer that message arrived on + */ +void tipc_disc_rcv(struct net *net, struct sk_buff *buf, + struct tipc_bearer *bearer) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_node *node; + struct tipc_link *link; + struct tipc_media_addr maddr; + struct sk_buff *rbuf; + struct tipc_msg *msg = buf_msg(buf); + u32 ddom = msg_dest_domain(msg); + u32 onode = msg_prevnode(msg); + u32 net_id = msg_bc_netid(msg); + u32 mtyp = msg_type(msg); + u32 signature = msg_node_sig(msg); + u16 caps = msg_node_capabilities(msg); + bool addr_match = false; + bool sign_match = false; + bool link_up = false; + bool accept_addr = false; + bool accept_sign = false; + bool respond = false; + + bearer->media->msg2addr(bearer, &maddr, msg_media_addr(msg)); + kfree_skb(buf); + + /* Ensure message from node is valid and communication is permitted */ + if (net_id != tn->net_id) + return; + if (maddr.broadcast) + return; + if (!tipc_addr_domain_valid(ddom)) + return; + if (!tipc_addr_node_valid(onode)) + return; + + if (in_own_node(net, onode)) { + if (memcmp(&maddr, &bearer->addr, sizeof(maddr))) + disc_dupl_alert(bearer, tn->own_addr, &maddr); + return; + } + if (!tipc_in_scope(ddom, tn->own_addr)) + return; + if (!tipc_in_scope(bearer->domain, onode)) + return; + + node = tipc_node_create(net, onode); + if (!node) + return; + tipc_node_lock(node); + node->capabilities = caps; + link = node->links[bearer->identity]; + + /* Prepare to validate requesting node's signature and media address */ + sign_match = (signature == node->signature); + addr_match = link && !memcmp(&link->media_addr, &maddr, sizeof(maddr)); + link_up = link && tipc_link_is_up(link); + + + /* These three flags give us eight permutations: */ + + if (sign_match && addr_match && link_up) { + /* All is fine. Do nothing. */ + } else if (sign_match && addr_match && !link_up) { + /* Respond. The link will come up in due time */ + respond = true; + } else if (sign_match && !addr_match && link_up) { + /* Peer has changed i/f address without rebooting. + * If so, the link will reset soon, and the next + * discovery will be accepted. So we can ignore it. + * It may also be an cloned or malicious peer having + * chosen the same node address and signature as an + * existing one. + * Ignore requests until the link goes down, if ever. + */ + disc_dupl_alert(bearer, onode, &maddr); + } else if (sign_match && !addr_match && !link_up) { + /* Peer link has changed i/f address without rebooting. + * It may also be a cloned or malicious peer; we can't + * distinguish between the two. + * The signature is correct, so we must accept. + */ + accept_addr = true; + respond = true; + } else if (!sign_match && addr_match && link_up) { + /* Peer node rebooted. Two possibilities: + * - Delayed re-discovery; this link endpoint has already + * reset and re-established contact with the peer, before + * receiving a discovery message from that node. + * (The peer happened to receive one from this node first). + * - The peer came back so fast that our side has not + * discovered it yet. Probing from this side will soon + * reset the link, since there can be no working link + * endpoint at the peer end, and the link will re-establish. + * Accept the signature, since it comes from a known peer. + */ + accept_sign = true; + } else if (!sign_match && addr_match && !link_up) { + /* The peer node has rebooted. + * Accept signature, since it is a known peer. + */ + accept_sign = true; + respond = true; + } else if (!sign_match && !addr_match && link_up) { + /* Peer rebooted with new address, or a new/duplicate peer. + * Ignore until the link goes down, if ever. + */ + disc_dupl_alert(bearer, onode, &maddr); + } else if (!sign_match && !addr_match && !link_up) { + /* Peer rebooted with new address, or it is a new peer. + * Accept signature and address. + */ + accept_sign = true; + accept_addr = true; + respond = true; + } + + if (accept_sign) + node->signature = signature; + + if (accept_addr) { + if (!link) + link = tipc_link_create(node, bearer, &maddr); + if (link) { + memcpy(&link->media_addr, &maddr, sizeof(maddr)); + tipc_link_reset(link); + } else { + respond = false; + } + } + + /* Send response, if necessary */ + if (respond && (mtyp == DSC_REQ_MSG)) { + rbuf = tipc_buf_acquire(MAX_H_SIZE); + if (rbuf) { + tipc_disc_init_msg(net, rbuf, DSC_RESP_MSG, bearer); + tipc_bearer_send(net, bearer->identity, rbuf, &maddr); + kfree_skb(rbuf); + } + } + tipc_node_unlock(node); + tipc_node_put(node); +} + +/** + * disc_update - update frequency of periodic link setup requests + * @req: ptr to link request structure + * + * Reinitiates discovery process if discovery object has no associated nodes + * and is either not currently searching or is searching at a slow rate + */ +static void disc_update(struct tipc_link_req *req) +{ + if (!req->num_nodes) { + if ((req->timer_intv == TIPC_LINK_REQ_INACTIVE) || + (req->timer_intv > TIPC_LINK_REQ_FAST)) { + req->timer_intv = TIPC_LINK_REQ_INIT; + mod_timer(&req->timer, jiffies + req->timer_intv); + } + } +} + +/** + * tipc_disc_add_dest - increment set of discovered nodes + * @req: ptr to link request structure + */ +void tipc_disc_add_dest(struct tipc_link_req *req) +{ + spin_lock_bh(&req->lock); + req->num_nodes++; + spin_unlock_bh(&req->lock); +} + +/** + * tipc_disc_remove_dest - decrement set of discovered nodes + * @req: ptr to link request structure + */ +void tipc_disc_remove_dest(struct tipc_link_req *req) +{ + spin_lock_bh(&req->lock); + req->num_nodes--; + disc_update(req); + spin_unlock_bh(&req->lock); +} + +/** + * disc_timeout - send a periodic link setup request + * @data: ptr to link request structure + * + * Called whenever a link setup request timer associated with a bearer expires. + */ +static void disc_timeout(unsigned long data) +{ + struct tipc_link_req *req = (struct tipc_link_req *)data; + int max_delay; + + spin_lock_bh(&req->lock); + + /* Stop searching if only desired node has been found */ + if (tipc_node(req->domain) && req->num_nodes) { + req->timer_intv = TIPC_LINK_REQ_INACTIVE; + goto exit; + } + + /* + * Send discovery message, then update discovery timer + * + * Keep doubling time between requests until limit is reached; + * hold at fast polling rate if don't have any associated nodes, + * otherwise hold at slow polling rate + */ + tipc_bearer_send(req->net, req->bearer_id, req->buf, &req->dest); + + + req->timer_intv *= 2; + if (req->num_nodes) + max_delay = TIPC_LINK_REQ_SLOW; + else + max_delay = TIPC_LINK_REQ_FAST; + if (req->timer_intv > max_delay) + req->timer_intv = max_delay; + + mod_timer(&req->timer, jiffies + req->timer_intv); +exit: + spin_unlock_bh(&req->lock); +} + +/** + * tipc_disc_create - create object to send periodic link setup requests + * @net: the applicable net namespace + * @b_ptr: ptr to bearer issuing requests + * @dest: destination address for request messages + * @dest_domain: network domain to which links can be established + * + * Returns 0 if successful, otherwise -errno. + */ +int tipc_disc_create(struct net *net, struct tipc_bearer *b_ptr, + struct tipc_media_addr *dest) +{ + struct tipc_link_req *req; + + req = kmalloc(sizeof(*req), GFP_ATOMIC); + if (!req) + return -ENOMEM; + req->buf = tipc_buf_acquire(MAX_H_SIZE); + if (!req->buf) { + kfree(req); + return -ENOMEM; + } + + tipc_disc_init_msg(net, req->buf, DSC_REQ_MSG, b_ptr); + memcpy(&req->dest, dest, sizeof(*dest)); + req->net = net; + req->bearer_id = b_ptr->identity; + req->domain = b_ptr->domain; + req->num_nodes = 0; + req->timer_intv = TIPC_LINK_REQ_INIT; + spin_lock_init(&req->lock); + setup_timer(&req->timer, disc_timeout, (unsigned long)req); + mod_timer(&req->timer, jiffies + req->timer_intv); + b_ptr->link_req = req; + tipc_bearer_send(net, req->bearer_id, req->buf, &req->dest); + return 0; +} + +/** + * tipc_disc_delete - destroy object sending periodic link setup requests + * @req: ptr to link request structure + */ +void tipc_disc_delete(struct tipc_link_req *req) +{ + del_timer_sync(&req->timer); + kfree_skb(req->buf); + kfree(req); +} + +/** + * tipc_disc_reset - reset object to send periodic link setup requests + * @net: the applicable net namespace + * @b_ptr: ptr to bearer issuing requests + * @dest_domain: network domain to which links can be established + */ +void tipc_disc_reset(struct net *net, struct tipc_bearer *b_ptr) +{ + struct tipc_link_req *req = b_ptr->link_req; + + spin_lock_bh(&req->lock); + tipc_disc_init_msg(net, req->buf, DSC_REQ_MSG, b_ptr); + req->net = net; + req->bearer_id = b_ptr->identity; + req->domain = b_ptr->domain; + req->num_nodes = 0; + req->timer_intv = TIPC_LINK_REQ_INIT; + mod_timer(&req->timer, jiffies + req->timer_intv); + tipc_bearer_send(net, req->bearer_id, req->buf, &req->dest); + spin_unlock_bh(&req->lock); +} diff --git a/kernel/net/tipc/discover.h b/kernel/net/tipc/discover.h new file mode 100644 index 000000000..c9b12770c --- /dev/null +++ b/kernel/net/tipc/discover.h @@ -0,0 +1,51 @@ +/* + * net/tipc/discover.h + * + * Copyright (c) 2003-2006, Ericsson AB + * Copyright (c) 2005, 2010-2011, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _TIPC_DISCOVER_H +#define _TIPC_DISCOVER_H + +struct tipc_link_req; + +int tipc_disc_create(struct net *net, struct tipc_bearer *b_ptr, + struct tipc_media_addr *dest); +void tipc_disc_delete(struct tipc_link_req *req); +void tipc_disc_reset(struct net *net, struct tipc_bearer *b_ptr); +void tipc_disc_add_dest(struct tipc_link_req *req); +void tipc_disc_remove_dest(struct tipc_link_req *req); +void tipc_disc_rcv(struct net *net, struct sk_buff *buf, + struct tipc_bearer *b_ptr); + +#endif diff --git a/kernel/net/tipc/eth_media.c b/kernel/net/tipc/eth_media.c new file mode 100644 index 000000000..f69a2fde9 --- /dev/null +++ b/kernel/net/tipc/eth_media.c @@ -0,0 +1,99 @@ +/* + * net/tipc/eth_media.c: Ethernet bearer support for TIPC + * + * Copyright (c) 2001-2007, 2013-2014, Ericsson AB + * Copyright (c) 2005-2008, 2011-2013, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "core.h" +#include "bearer.h" + +/* Convert Ethernet address (media address format) to string */ +static int tipc_eth_addr2str(struct tipc_media_addr *addr, + char *strbuf, int bufsz) +{ + if (bufsz < 18) /* 18 = strlen("aa:bb:cc:dd:ee:ff\0") */ + return 1; + + sprintf(strbuf, "%pM", addr->value); + return 0; +} + +/* Convert from media address format to discovery message addr format */ +static int tipc_eth_addr2msg(char *msg, struct tipc_media_addr *addr) +{ + memset(msg, 0, TIPC_MEDIA_INFO_SIZE); + msg[TIPC_MEDIA_TYPE_OFFSET] = TIPC_MEDIA_TYPE_ETH; + memcpy(msg + TIPC_MEDIA_ADDR_OFFSET, addr->value, ETH_ALEN); + return 0; +} + +/* Convert raw mac address format to media addr format */ +static int tipc_eth_raw2addr(struct tipc_bearer *b, + struct tipc_media_addr *addr, + char *msg) +{ + char bcast_mac[ETH_ALEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + + memset(addr, 0, sizeof(*addr)); + ether_addr_copy(addr->value, msg); + addr->media_id = TIPC_MEDIA_TYPE_ETH; + addr->broadcast = !memcmp(addr->value, bcast_mac, ETH_ALEN); + return 0; +} + +/* Convert discovery msg addr format to Ethernet media addr format */ +static int tipc_eth_msg2addr(struct tipc_bearer *b, + struct tipc_media_addr *addr, + char *msg) +{ + /* Skip past preamble: */ + msg += TIPC_MEDIA_ADDR_OFFSET; + return tipc_eth_raw2addr(b, addr, msg); +} + +/* Ethernet media registration info */ +struct tipc_media eth_media_info = { + .send_msg = tipc_l2_send_msg, + .enable_media = tipc_enable_l2_media, + .disable_media = tipc_disable_l2_media, + .addr2str = tipc_eth_addr2str, + .addr2msg = tipc_eth_addr2msg, + .msg2addr = tipc_eth_msg2addr, + .raw2addr = tipc_eth_raw2addr, + .priority = TIPC_DEF_LINK_PRI, + .tolerance = TIPC_DEF_LINK_TOL, + .window = TIPC_DEF_LINK_WIN, + .type_id = TIPC_MEDIA_TYPE_ETH, + .hwaddr_len = ETH_ALEN, + .name = "eth" +}; diff --git a/kernel/net/tipc/ib_media.c b/kernel/net/tipc/ib_media.c new file mode 100644 index 000000000..e8c16718e --- /dev/null +++ b/kernel/net/tipc/ib_media.c @@ -0,0 +1,101 @@ +/* + * net/tipc/ib_media.c: Infiniband bearer support for TIPC + * + * Copyright (c) 2013 Patrick McHardy <kaber@trash.net> + * + * Based on eth_media.c, which carries the following copyright notice: + * + * Copyright (c) 2001-2007, Ericsson AB + * Copyright (c) 2005-2008, 2011, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/if_infiniband.h> +#include "core.h" +#include "bearer.h" + +/* convert InfiniBand address (media address format) media address to string */ +static int tipc_ib_addr2str(struct tipc_media_addr *a, char *str_buf, + int str_size) +{ + if (str_size < 60) /* 60 = 19 * strlen("xx:") + strlen("xx\0") */ + return 1; + + sprintf(str_buf, "%20phC", a->value); + + return 0; +} + +/* Convert from media address format to discovery message addr format */ +static int tipc_ib_addr2msg(char *msg, struct tipc_media_addr *addr) +{ + memset(msg, 0, TIPC_MEDIA_INFO_SIZE); + memcpy(msg, addr->value, INFINIBAND_ALEN); + return 0; +} + +/* Convert raw InfiniBand address format to media addr format */ +static int tipc_ib_raw2addr(struct tipc_bearer *b, + struct tipc_media_addr *addr, + char *msg) +{ + memset(addr, 0, sizeof(*addr)); + memcpy(addr->value, msg, INFINIBAND_ALEN); + addr->media_id = TIPC_MEDIA_TYPE_IB; + addr->broadcast = !memcmp(msg, b->bcast_addr.value, + INFINIBAND_ALEN); + return 0; +} + +/* Convert discovery msg addr format to InfiniBand media addr format */ +static int tipc_ib_msg2addr(struct tipc_bearer *b, + struct tipc_media_addr *addr, + char *msg) +{ + return tipc_ib_raw2addr(b, addr, msg); +} + +/* InfiniBand media registration info */ +struct tipc_media ib_media_info = { + .send_msg = tipc_l2_send_msg, + .enable_media = tipc_enable_l2_media, + .disable_media = tipc_disable_l2_media, + .addr2str = tipc_ib_addr2str, + .addr2msg = tipc_ib_addr2msg, + .msg2addr = tipc_ib_msg2addr, + .raw2addr = tipc_ib_raw2addr, + .priority = TIPC_DEF_LINK_PRI, + .tolerance = TIPC_DEF_LINK_TOL, + .window = TIPC_DEF_LINK_WIN, + .type_id = TIPC_MEDIA_TYPE_IB, + .hwaddr_len = INFINIBAND_ALEN, + .name = "ib" +}; diff --git a/kernel/net/tipc/link.c b/kernel/net/tipc/link.c new file mode 100644 index 000000000..43a515dc9 --- /dev/null +++ b/kernel/net/tipc/link.c @@ -0,0 +1,2272 @@ +/* + * net/tipc/link.c: TIPC link code + * + * Copyright (c) 1996-2007, 2012-2015, Ericsson AB + * Copyright (c) 2004-2007, 2010-2013, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "core.h" +#include "subscr.h" +#include "link.h" +#include "bcast.h" +#include "socket.h" +#include "name_distr.h" +#include "discover.h" +#include "netlink.h" + +#include <linux/pkt_sched.h> + +/* + * Error message prefixes + */ +static const char *link_co_err = "Link changeover error, "; +static const char *link_rst_msg = "Resetting link "; +static const char *link_unk_evt = "Unknown link event "; + +static const struct nla_policy tipc_nl_link_policy[TIPC_NLA_LINK_MAX + 1] = { + [TIPC_NLA_LINK_UNSPEC] = { .type = NLA_UNSPEC }, + [TIPC_NLA_LINK_NAME] = { + .type = NLA_STRING, + .len = TIPC_MAX_LINK_NAME + }, + [TIPC_NLA_LINK_MTU] = { .type = NLA_U32 }, + [TIPC_NLA_LINK_BROADCAST] = { .type = NLA_FLAG }, + [TIPC_NLA_LINK_UP] = { .type = NLA_FLAG }, + [TIPC_NLA_LINK_ACTIVE] = { .type = NLA_FLAG }, + [TIPC_NLA_LINK_PROP] = { .type = NLA_NESTED }, + [TIPC_NLA_LINK_STATS] = { .type = NLA_NESTED }, + [TIPC_NLA_LINK_RX] = { .type = NLA_U32 }, + [TIPC_NLA_LINK_TX] = { .type = NLA_U32 } +}; + +/* Properties valid for media, bearar and link */ +static const struct nla_policy tipc_nl_prop_policy[TIPC_NLA_PROP_MAX + 1] = { + [TIPC_NLA_PROP_UNSPEC] = { .type = NLA_UNSPEC }, + [TIPC_NLA_PROP_PRIO] = { .type = NLA_U32 }, + [TIPC_NLA_PROP_TOL] = { .type = NLA_U32 }, + [TIPC_NLA_PROP_WIN] = { .type = NLA_U32 } +}; + +/* + * Out-of-range value for link session numbers + */ +#define INVALID_SESSION 0x10000 + +/* + * Link state events: + */ +#define STARTING_EVT 856384768 /* link processing trigger */ +#define TRAFFIC_MSG_EVT 560815u /* rx'd ??? */ +#define TIMEOUT_EVT 560817u /* link timer expired */ + +/* + * State value stored in 'failover_pkts' + */ +#define FIRST_FAILOVER 0xffffu + +static void link_handle_out_of_seq_msg(struct tipc_link *link, + struct sk_buff *skb); +static void tipc_link_proto_rcv(struct tipc_link *link, + struct sk_buff *skb); +static void link_set_supervision_props(struct tipc_link *l_ptr, u32 tol); +static void link_state_event(struct tipc_link *l_ptr, u32 event); +static void link_reset_statistics(struct tipc_link *l_ptr); +static void link_print(struct tipc_link *l_ptr, const char *str); +static void tipc_link_sync_xmit(struct tipc_link *l); +static void tipc_link_sync_rcv(struct tipc_node *n, struct sk_buff *buf); +static void tipc_link_input(struct tipc_link *l, struct sk_buff *skb); +static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb); +static bool tipc_link_failover_rcv(struct tipc_link *l, struct sk_buff **skb); +/* + * Simple link routines + */ +static unsigned int align(unsigned int i) +{ + return (i + 3) & ~3u; +} + +static void tipc_link_release(struct kref *kref) +{ + kfree(container_of(kref, struct tipc_link, ref)); +} + +static void tipc_link_get(struct tipc_link *l_ptr) +{ + kref_get(&l_ptr->ref); +} + +static void tipc_link_put(struct tipc_link *l_ptr) +{ + kref_put(&l_ptr->ref, tipc_link_release); +} + +static struct tipc_link *tipc_parallel_link(struct tipc_link *l) +{ + if (l->owner->active_links[0] != l) + return l->owner->active_links[0]; + return l->owner->active_links[1]; +} + +/* + * Simple non-static link routines (i.e. referenced outside this file) + */ +int tipc_link_is_up(struct tipc_link *l_ptr) +{ + if (!l_ptr) + return 0; + return link_working_working(l_ptr) || link_working_unknown(l_ptr); +} + +int tipc_link_is_active(struct tipc_link *l_ptr) +{ + return (l_ptr->owner->active_links[0] == l_ptr) || + (l_ptr->owner->active_links[1] == l_ptr); +} + +/** + * link_timeout - handle expiration of link timer + * @l_ptr: pointer to link + */ +static void link_timeout(unsigned long data) +{ + struct tipc_link *l_ptr = (struct tipc_link *)data; + struct sk_buff *skb; + + tipc_node_lock(l_ptr->owner); + + /* update counters used in statistical profiling of send traffic */ + l_ptr->stats.accu_queue_sz += skb_queue_len(&l_ptr->transmq); + l_ptr->stats.queue_sz_counts++; + + skb = skb_peek(&l_ptr->transmq); + if (skb) { + struct tipc_msg *msg = buf_msg(skb); + u32 length = msg_size(msg); + + if ((msg_user(msg) == MSG_FRAGMENTER) && + (msg_type(msg) == FIRST_FRAGMENT)) { + length = msg_size(msg_get_wrapped(msg)); + } + if (length) { + l_ptr->stats.msg_lengths_total += length; + l_ptr->stats.msg_length_counts++; + if (length <= 64) + l_ptr->stats.msg_length_profile[0]++; + else if (length <= 256) + l_ptr->stats.msg_length_profile[1]++; + else if (length <= 1024) + l_ptr->stats.msg_length_profile[2]++; + else if (length <= 4096) + l_ptr->stats.msg_length_profile[3]++; + else if (length <= 16384) + l_ptr->stats.msg_length_profile[4]++; + else if (length <= 32768) + l_ptr->stats.msg_length_profile[5]++; + else + l_ptr->stats.msg_length_profile[6]++; + } + } + + /* do all other link processing performed on a periodic basis */ + link_state_event(l_ptr, TIMEOUT_EVT); + + if (skb_queue_len(&l_ptr->backlogq)) + tipc_link_push_packets(l_ptr); + + tipc_node_unlock(l_ptr->owner); + tipc_link_put(l_ptr); +} + +static void link_set_timer(struct tipc_link *link, unsigned long time) +{ + if (!mod_timer(&link->timer, jiffies + time)) + tipc_link_get(link); +} + +/** + * tipc_link_create - create a new link + * @n_ptr: pointer to associated node + * @b_ptr: pointer to associated bearer + * @media_addr: media address to use when sending messages over link + * + * Returns pointer to link. + */ +struct tipc_link *tipc_link_create(struct tipc_node *n_ptr, + struct tipc_bearer *b_ptr, + const struct tipc_media_addr *media_addr) +{ + struct tipc_net *tn = net_generic(n_ptr->net, tipc_net_id); + struct tipc_link *l_ptr; + struct tipc_msg *msg; + char *if_name; + char addr_string[16]; + u32 peer = n_ptr->addr; + + if (n_ptr->link_cnt >= MAX_BEARERS) { + tipc_addr_string_fill(addr_string, n_ptr->addr); + pr_err("Attempt to establish %uth link to %s. Max %u allowed.\n", + n_ptr->link_cnt, addr_string, MAX_BEARERS); + return NULL; + } + + if (n_ptr->links[b_ptr->identity]) { + tipc_addr_string_fill(addr_string, n_ptr->addr); + pr_err("Attempt to establish second link on <%s> to %s\n", + b_ptr->name, addr_string); + return NULL; + } + + l_ptr = kzalloc(sizeof(*l_ptr), GFP_ATOMIC); + if (!l_ptr) { + pr_warn("Link creation failed, no memory\n"); + return NULL; + } + kref_init(&l_ptr->ref); + l_ptr->addr = peer; + if_name = strchr(b_ptr->name, ':') + 1; + sprintf(l_ptr->name, "%u.%u.%u:%s-%u.%u.%u:unknown", + tipc_zone(tn->own_addr), tipc_cluster(tn->own_addr), + tipc_node(tn->own_addr), + if_name, + tipc_zone(peer), tipc_cluster(peer), tipc_node(peer)); + /* note: peer i/f name is updated by reset/activate message */ + memcpy(&l_ptr->media_addr, media_addr, sizeof(*media_addr)); + l_ptr->owner = n_ptr; + l_ptr->checkpoint = 1; + l_ptr->peer_session = INVALID_SESSION; + l_ptr->bearer_id = b_ptr->identity; + link_set_supervision_props(l_ptr, b_ptr->tolerance); + l_ptr->state = RESET_UNKNOWN; + + l_ptr->pmsg = (struct tipc_msg *)&l_ptr->proto_msg; + msg = l_ptr->pmsg; + tipc_msg_init(tn->own_addr, msg, LINK_PROTOCOL, RESET_MSG, INT_H_SIZE, + l_ptr->addr); + msg_set_size(msg, sizeof(l_ptr->proto_msg)); + msg_set_session(msg, (tn->random & 0xffff)); + msg_set_bearer_id(msg, b_ptr->identity); + strcpy((char *)msg_data(msg), if_name); + l_ptr->net_plane = b_ptr->net_plane; + l_ptr->advertised_mtu = b_ptr->mtu; + l_ptr->mtu = l_ptr->advertised_mtu; + l_ptr->priority = b_ptr->priority; + tipc_link_set_queue_limits(l_ptr, b_ptr->window); + l_ptr->next_out_no = 1; + __skb_queue_head_init(&l_ptr->transmq); + __skb_queue_head_init(&l_ptr->backlogq); + __skb_queue_head_init(&l_ptr->deferdq); + skb_queue_head_init(&l_ptr->wakeupq); + skb_queue_head_init(&l_ptr->inputq); + skb_queue_head_init(&l_ptr->namedq); + link_reset_statistics(l_ptr); + tipc_node_attach_link(n_ptr, l_ptr); + setup_timer(&l_ptr->timer, link_timeout, (unsigned long)l_ptr); + link_state_event(l_ptr, STARTING_EVT); + + return l_ptr; +} + +/** + * tipc_link_delete - Delete a link + * @l: link to be deleted + */ +void tipc_link_delete(struct tipc_link *l) +{ + tipc_link_reset(l); + if (del_timer(&l->timer)) + tipc_link_put(l); + l->flags |= LINK_STOPPED; + /* Delete link now, or when timer is finished: */ + tipc_link_reset_fragments(l); + tipc_node_detach_link(l->owner, l); + tipc_link_put(l); +} + +void tipc_link_delete_list(struct net *net, unsigned int bearer_id, + bool shutting_down) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_link *link; + struct tipc_node *node; + + rcu_read_lock(); + list_for_each_entry_rcu(node, &tn->node_list, list) { + tipc_node_lock(node); + link = node->links[bearer_id]; + if (link) + tipc_link_delete(link); + tipc_node_unlock(node); + } + rcu_read_unlock(); +} + +/** + * link_schedule_user - schedule a message sender for wakeup after congestion + * @link: congested link + * @list: message that was attempted sent + * Create pseudo msg to send back to user when congestion abates + * Only consumes message if there is an error + */ +static int link_schedule_user(struct tipc_link *link, struct sk_buff_head *list) +{ + struct tipc_msg *msg = buf_msg(skb_peek(list)); + int imp = msg_importance(msg); + u32 oport = msg_origport(msg); + u32 addr = link_own_addr(link); + struct sk_buff *skb; + + /* This really cannot happen... */ + if (unlikely(imp > TIPC_CRITICAL_IMPORTANCE)) { + pr_warn("%s<%s>, send queue full", link_rst_msg, link->name); + tipc_link_reset(link); + goto err; + } + /* Non-blocking sender: */ + if (TIPC_SKB_CB(skb_peek(list))->wakeup_pending) + return -ELINKCONG; + + /* Create and schedule wakeup pseudo message */ + skb = tipc_msg_create(SOCK_WAKEUP, 0, INT_H_SIZE, 0, + addr, addr, oport, 0, 0); + if (!skb) + goto err; + TIPC_SKB_CB(skb)->chain_sz = skb_queue_len(list); + TIPC_SKB_CB(skb)->chain_imp = imp; + skb_queue_tail(&link->wakeupq, skb); + link->stats.link_congs++; + return -ELINKCONG; +err: + __skb_queue_purge(list); + return -ENOBUFS; +} + +/** + * link_prepare_wakeup - prepare users for wakeup after congestion + * @link: congested link + * Move a number of waiting users, as permitted by available space in + * the send queue, from link wait queue to node wait queue for wakeup + */ +void link_prepare_wakeup(struct tipc_link *l) +{ + int pnd[TIPC_SYSTEM_IMPORTANCE + 1] = {0,}; + int imp, lim; + struct sk_buff *skb, *tmp; + + skb_queue_walk_safe(&l->wakeupq, skb, tmp) { + imp = TIPC_SKB_CB(skb)->chain_imp; + lim = l->window + l->backlog[imp].limit; + pnd[imp] += TIPC_SKB_CB(skb)->chain_sz; + if ((pnd[imp] + l->backlog[imp].len) >= lim) + break; + skb_unlink(skb, &l->wakeupq); + skb_queue_tail(&l->inputq, skb); + l->owner->inputq = &l->inputq; + l->owner->action_flags |= TIPC_MSG_EVT; + } +} + +/** + * tipc_link_reset_fragments - purge link's inbound message fragments queue + * @l_ptr: pointer to link + */ +void tipc_link_reset_fragments(struct tipc_link *l_ptr) +{ + kfree_skb(l_ptr->reasm_buf); + l_ptr->reasm_buf = NULL; +} + +static void tipc_link_purge_backlog(struct tipc_link *l) +{ + __skb_queue_purge(&l->backlogq); + l->backlog[TIPC_LOW_IMPORTANCE].len = 0; + l->backlog[TIPC_MEDIUM_IMPORTANCE].len = 0; + l->backlog[TIPC_HIGH_IMPORTANCE].len = 0; + l->backlog[TIPC_CRITICAL_IMPORTANCE].len = 0; + l->backlog[TIPC_SYSTEM_IMPORTANCE].len = 0; +} + +/** + * tipc_link_purge_queues - purge all pkt queues associated with link + * @l_ptr: pointer to link + */ +void tipc_link_purge_queues(struct tipc_link *l_ptr) +{ + __skb_queue_purge(&l_ptr->deferdq); + __skb_queue_purge(&l_ptr->transmq); + tipc_link_purge_backlog(l_ptr); + tipc_link_reset_fragments(l_ptr); +} + +void tipc_link_reset(struct tipc_link *l_ptr) +{ + u32 prev_state = l_ptr->state; + int was_active_link = tipc_link_is_active(l_ptr); + struct tipc_node *owner = l_ptr->owner; + struct tipc_link *pl = tipc_parallel_link(l_ptr); + + msg_set_session(l_ptr->pmsg, ((msg_session(l_ptr->pmsg) + 1) & 0xffff)); + + /* Link is down, accept any session */ + l_ptr->peer_session = INVALID_SESSION; + + /* Prepare for renewed mtu size negotiation */ + l_ptr->mtu = l_ptr->advertised_mtu; + + l_ptr->state = RESET_UNKNOWN; + + if ((prev_state == RESET_UNKNOWN) || (prev_state == RESET_RESET)) + return; + + tipc_node_link_down(l_ptr->owner, l_ptr); + tipc_bearer_remove_dest(owner->net, l_ptr->bearer_id, l_ptr->addr); + + if (was_active_link && tipc_node_is_up(l_ptr->owner) && (pl != l_ptr)) { + l_ptr->flags |= LINK_FAILINGOVER; + l_ptr->failover_checkpt = l_ptr->next_in_no; + pl->failover_pkts = FIRST_FAILOVER; + pl->failover_checkpt = l_ptr->next_in_no; + pl->failover_skb = l_ptr->reasm_buf; + } else { + kfree_skb(l_ptr->reasm_buf); + } + /* Clean up all queues, except inputq: */ + __skb_queue_purge(&l_ptr->transmq); + __skb_queue_purge(&l_ptr->deferdq); + if (!owner->inputq) + owner->inputq = &l_ptr->inputq; + skb_queue_splice_init(&l_ptr->wakeupq, owner->inputq); + if (!skb_queue_empty(owner->inputq)) + owner->action_flags |= TIPC_MSG_EVT; + tipc_link_purge_backlog(l_ptr); + l_ptr->reasm_buf = NULL; + l_ptr->rcv_unacked = 0; + l_ptr->checkpoint = 1; + l_ptr->next_out_no = 1; + l_ptr->fsm_msg_cnt = 0; + l_ptr->stale_count = 0; + link_reset_statistics(l_ptr); +} + +void tipc_link_reset_list(struct net *net, unsigned int bearer_id) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_link *l_ptr; + struct tipc_node *n_ptr; + + rcu_read_lock(); + list_for_each_entry_rcu(n_ptr, &tn->node_list, list) { + tipc_node_lock(n_ptr); + l_ptr = n_ptr->links[bearer_id]; + if (l_ptr) + tipc_link_reset(l_ptr); + tipc_node_unlock(n_ptr); + } + rcu_read_unlock(); +} + +static void link_activate(struct tipc_link *link) +{ + struct tipc_node *node = link->owner; + + link->next_in_no = 1; + link->stats.recv_info = 1; + tipc_node_link_up(node, link); + tipc_bearer_add_dest(node->net, link->bearer_id, link->addr); +} + +/** + * link_state_event - link finite state machine + * @l_ptr: pointer to link + * @event: state machine event to process + */ +static void link_state_event(struct tipc_link *l_ptr, unsigned int event) +{ + struct tipc_link *other; + unsigned long cont_intv = l_ptr->cont_intv; + + if (l_ptr->flags & LINK_STOPPED) + return; + + if (!(l_ptr->flags & LINK_STARTED) && (event != STARTING_EVT)) + return; /* Not yet. */ + + if (l_ptr->flags & LINK_FAILINGOVER) { + if (event == TIMEOUT_EVT) + link_set_timer(l_ptr, cont_intv); + return; + } + + switch (l_ptr->state) { + case WORKING_WORKING: + switch (event) { + case TRAFFIC_MSG_EVT: + case ACTIVATE_MSG: + break; + case TIMEOUT_EVT: + if (l_ptr->next_in_no != l_ptr->checkpoint) { + l_ptr->checkpoint = l_ptr->next_in_no; + if (tipc_bclink_acks_missing(l_ptr->owner)) { + tipc_link_proto_xmit(l_ptr, STATE_MSG, + 0, 0, 0, 0); + l_ptr->fsm_msg_cnt++; + } + link_set_timer(l_ptr, cont_intv); + break; + } + l_ptr->state = WORKING_UNKNOWN; + l_ptr->fsm_msg_cnt = 0; + tipc_link_proto_xmit(l_ptr, STATE_MSG, 1, 0, 0, 0); + l_ptr->fsm_msg_cnt++; + link_set_timer(l_ptr, cont_intv / 4); + break; + case RESET_MSG: + pr_debug("%s<%s>, requested by peer\n", + link_rst_msg, l_ptr->name); + tipc_link_reset(l_ptr); + l_ptr->state = RESET_RESET; + l_ptr->fsm_msg_cnt = 0; + tipc_link_proto_xmit(l_ptr, ACTIVATE_MSG, + 0, 0, 0, 0); + l_ptr->fsm_msg_cnt++; + link_set_timer(l_ptr, cont_intv); + break; + default: + pr_debug("%s%u in WW state\n", link_unk_evt, event); + } + break; + case WORKING_UNKNOWN: + switch (event) { + case TRAFFIC_MSG_EVT: + case ACTIVATE_MSG: + l_ptr->state = WORKING_WORKING; + l_ptr->fsm_msg_cnt = 0; + link_set_timer(l_ptr, cont_intv); + break; + case RESET_MSG: + pr_debug("%s<%s>, requested by peer while probing\n", + link_rst_msg, l_ptr->name); + tipc_link_reset(l_ptr); + l_ptr->state = RESET_RESET; + l_ptr->fsm_msg_cnt = 0; + tipc_link_proto_xmit(l_ptr, ACTIVATE_MSG, + 0, 0, 0, 0); + l_ptr->fsm_msg_cnt++; + link_set_timer(l_ptr, cont_intv); + break; + case TIMEOUT_EVT: + if (l_ptr->next_in_no != l_ptr->checkpoint) { + l_ptr->state = WORKING_WORKING; + l_ptr->fsm_msg_cnt = 0; + l_ptr->checkpoint = l_ptr->next_in_no; + if (tipc_bclink_acks_missing(l_ptr->owner)) { + tipc_link_proto_xmit(l_ptr, STATE_MSG, + 0, 0, 0, 0); + l_ptr->fsm_msg_cnt++; + } + link_set_timer(l_ptr, cont_intv); + } else if (l_ptr->fsm_msg_cnt < l_ptr->abort_limit) { + tipc_link_proto_xmit(l_ptr, STATE_MSG, + 1, 0, 0, 0); + l_ptr->fsm_msg_cnt++; + link_set_timer(l_ptr, cont_intv / 4); + } else { /* Link has failed */ + pr_debug("%s<%s>, peer not responding\n", + link_rst_msg, l_ptr->name); + tipc_link_reset(l_ptr); + l_ptr->state = RESET_UNKNOWN; + l_ptr->fsm_msg_cnt = 0; + tipc_link_proto_xmit(l_ptr, RESET_MSG, + 0, 0, 0, 0); + l_ptr->fsm_msg_cnt++; + link_set_timer(l_ptr, cont_intv); + } + break; + default: + pr_err("%s%u in WU state\n", link_unk_evt, event); + } + break; + case RESET_UNKNOWN: + switch (event) { + case TRAFFIC_MSG_EVT: + break; + case ACTIVATE_MSG: + other = l_ptr->owner->active_links[0]; + if (other && link_working_unknown(other)) + break; + l_ptr->state = WORKING_WORKING; + l_ptr->fsm_msg_cnt = 0; + link_activate(l_ptr); + tipc_link_proto_xmit(l_ptr, STATE_MSG, 1, 0, 0, 0); + l_ptr->fsm_msg_cnt++; + if (l_ptr->owner->working_links == 1) + tipc_link_sync_xmit(l_ptr); + link_set_timer(l_ptr, cont_intv); + break; + case RESET_MSG: + l_ptr->state = RESET_RESET; + l_ptr->fsm_msg_cnt = 0; + tipc_link_proto_xmit(l_ptr, ACTIVATE_MSG, + 1, 0, 0, 0); + l_ptr->fsm_msg_cnt++; + link_set_timer(l_ptr, cont_intv); + break; + case STARTING_EVT: + l_ptr->flags |= LINK_STARTED; + l_ptr->fsm_msg_cnt++; + link_set_timer(l_ptr, cont_intv); + break; + case TIMEOUT_EVT: + tipc_link_proto_xmit(l_ptr, RESET_MSG, 0, 0, 0, 0); + l_ptr->fsm_msg_cnt++; + link_set_timer(l_ptr, cont_intv); + break; + default: + pr_err("%s%u in RU state\n", link_unk_evt, event); + } + break; + case RESET_RESET: + switch (event) { + case TRAFFIC_MSG_EVT: + case ACTIVATE_MSG: + other = l_ptr->owner->active_links[0]; + if (other && link_working_unknown(other)) + break; + l_ptr->state = WORKING_WORKING; + l_ptr->fsm_msg_cnt = 0; + link_activate(l_ptr); + tipc_link_proto_xmit(l_ptr, STATE_MSG, 1, 0, 0, 0); + l_ptr->fsm_msg_cnt++; + if (l_ptr->owner->working_links == 1) + tipc_link_sync_xmit(l_ptr); + link_set_timer(l_ptr, cont_intv); + break; + case RESET_MSG: + break; + case TIMEOUT_EVT: + tipc_link_proto_xmit(l_ptr, ACTIVATE_MSG, + 0, 0, 0, 0); + l_ptr->fsm_msg_cnt++; + link_set_timer(l_ptr, cont_intv); + break; + default: + pr_err("%s%u in RR state\n", link_unk_evt, event); + } + break; + default: + pr_err("Unknown link state %u/%u\n", l_ptr->state, event); + } +} + +/** + * __tipc_link_xmit(): same as tipc_link_xmit, but destlink is known & locked + * @link: link to use + * @list: chain of buffers containing message + * + * Consumes the buffer chain, except when returning -ELINKCONG, + * since the caller then may want to make more send attempts. + * Returns 0 if success, or errno: -ELINKCONG, -EMSGSIZE or -ENOBUFS + * Messages at TIPC_SYSTEM_IMPORTANCE are always accepted + */ +int __tipc_link_xmit(struct net *net, struct tipc_link *link, + struct sk_buff_head *list) +{ + struct tipc_msg *msg = buf_msg(skb_peek(list)); + unsigned int maxwin = link->window; + unsigned int imp = msg_importance(msg); + uint mtu = link->mtu; + uint ack = mod(link->next_in_no - 1); + uint seqno = link->next_out_no; + uint bc_last_in = link->owner->bclink.last_in; + struct tipc_media_addr *addr = &link->media_addr; + struct sk_buff_head *transmq = &link->transmq; + struct sk_buff_head *backlogq = &link->backlogq; + struct sk_buff *skb, *tmp; + + /* Match backlog limit against msg importance: */ + if (unlikely(link->backlog[imp].len >= link->backlog[imp].limit)) + return link_schedule_user(link, list); + + if (unlikely(msg_size(msg) > mtu)) { + __skb_queue_purge(list); + return -EMSGSIZE; + } + /* Prepare each packet for sending, and add to relevant queue: */ + skb_queue_walk_safe(list, skb, tmp) { + __skb_unlink(skb, list); + msg = buf_msg(skb); + msg_set_seqno(msg, seqno); + msg_set_ack(msg, ack); + msg_set_bcast_ack(msg, bc_last_in); + + if (likely(skb_queue_len(transmq) < maxwin)) { + __skb_queue_tail(transmq, skb); + tipc_bearer_send(net, link->bearer_id, skb, addr); + link->rcv_unacked = 0; + seqno++; + continue; + } + if (tipc_msg_bundle(skb_peek_tail(backlogq), skb, mtu)) { + link->stats.sent_bundled++; + continue; + } + if (tipc_msg_make_bundle(&skb, mtu, link->addr)) { + link->stats.sent_bundled++; + link->stats.sent_bundles++; + imp = msg_importance(buf_msg(skb)); + } + __skb_queue_tail(backlogq, skb); + link->backlog[imp].len++; + seqno++; + } + link->next_out_no = seqno; + return 0; +} + +static void skb2list(struct sk_buff *skb, struct sk_buff_head *list) +{ + skb_queue_head_init(list); + __skb_queue_tail(list, skb); +} + +static int __tipc_link_xmit_skb(struct tipc_link *link, struct sk_buff *skb) +{ + struct sk_buff_head head; + + skb2list(skb, &head); + return __tipc_link_xmit(link->owner->net, link, &head); +} + +/* tipc_link_xmit_skb(): send single buffer to destination + * Buffers sent via this functon are generally TIPC_SYSTEM_IMPORTANCE + * messages, which will not be rejected + * The only exception is datagram messages rerouted after secondary + * lookup, which are rare and safe to dispose of anyway. + * TODO: Return real return value, and let callers use + * tipc_wait_for_sendpkt() where applicable + */ +int tipc_link_xmit_skb(struct net *net, struct sk_buff *skb, u32 dnode, + u32 selector) +{ + struct sk_buff_head head; + int rc; + + skb2list(skb, &head); + rc = tipc_link_xmit(net, &head, dnode, selector); + if (rc == -ELINKCONG) + kfree_skb(skb); + return 0; +} + +/** + * tipc_link_xmit() is the general link level function for message sending + * @net: the applicable net namespace + * @list: chain of buffers containing message + * @dsz: amount of user data to be sent + * @dnode: address of destination node + * @selector: a number used for deterministic link selection + * Consumes the buffer chain, except when returning -ELINKCONG + * Returns 0 if success, otherwise errno: -ELINKCONG,-EHOSTUNREACH,-EMSGSIZE + */ +int tipc_link_xmit(struct net *net, struct sk_buff_head *list, u32 dnode, + u32 selector) +{ + struct tipc_link *link = NULL; + struct tipc_node *node; + int rc = -EHOSTUNREACH; + + node = tipc_node_find(net, dnode); + if (node) { + tipc_node_lock(node); + link = node->active_links[selector & 1]; + if (link) + rc = __tipc_link_xmit(net, link, list); + tipc_node_unlock(node); + tipc_node_put(node); + } + if (link) + return rc; + + if (likely(in_own_node(net, dnode))) { + tipc_sk_rcv(net, list); + return 0; + } + + __skb_queue_purge(list); + return rc; +} + +/* + * tipc_link_sync_xmit - synchronize broadcast link endpoints. + * + * Give a newly added peer node the sequence number where it should + * start receiving and acking broadcast packets. + * + * Called with node locked + */ +static void tipc_link_sync_xmit(struct tipc_link *link) +{ + struct sk_buff *skb; + struct tipc_msg *msg; + + skb = tipc_buf_acquire(INT_H_SIZE); + if (!skb) + return; + + msg = buf_msg(skb); + tipc_msg_init(link_own_addr(link), msg, BCAST_PROTOCOL, STATE_MSG, + INT_H_SIZE, link->addr); + msg_set_last_bcast(msg, link->owner->bclink.acked); + __tipc_link_xmit_skb(link, skb); +} + +/* + * tipc_link_sync_rcv - synchronize broadcast link endpoints. + * Receive the sequence number where we should start receiving and + * acking broadcast packets from a newly added peer node, and open + * up for reception of such packets. + * + * Called with node locked + */ +static void tipc_link_sync_rcv(struct tipc_node *n, struct sk_buff *buf) +{ + struct tipc_msg *msg = buf_msg(buf); + + n->bclink.last_sent = n->bclink.last_in = msg_last_bcast(msg); + n->bclink.recv_permitted = true; + kfree_skb(buf); +} + +/* + * tipc_link_push_packets - push unsent packets to bearer + * + * Push out the unsent messages of a link where congestion + * has abated. Node is locked. + * + * Called with node locked + */ +void tipc_link_push_packets(struct tipc_link *link) +{ + struct sk_buff *skb; + struct tipc_msg *msg; + unsigned int ack = mod(link->next_in_no - 1); + + while (skb_queue_len(&link->transmq) < link->window) { + skb = __skb_dequeue(&link->backlogq); + if (!skb) + break; + msg = buf_msg(skb); + link->backlog[msg_importance(msg)].len--; + msg_set_ack(msg, ack); + msg_set_bcast_ack(msg, link->owner->bclink.last_in); + link->rcv_unacked = 0; + __skb_queue_tail(&link->transmq, skb); + tipc_bearer_send(link->owner->net, link->bearer_id, + skb, &link->media_addr); + } +} + +void tipc_link_reset_all(struct tipc_node *node) +{ + char addr_string[16]; + u32 i; + + tipc_node_lock(node); + + pr_warn("Resetting all links to %s\n", + tipc_addr_string_fill(addr_string, node->addr)); + + for (i = 0; i < MAX_BEARERS; i++) { + if (node->links[i]) { + link_print(node->links[i], "Resetting link\n"); + tipc_link_reset(node->links[i]); + } + } + + tipc_node_unlock(node); +} + +static void link_retransmit_failure(struct tipc_link *l_ptr, + struct sk_buff *buf) +{ + struct tipc_msg *msg = buf_msg(buf); + struct net *net = l_ptr->owner->net; + + pr_warn("Retransmission failure on link <%s>\n", l_ptr->name); + + if (l_ptr->addr) { + /* Handle failure on standard link */ + link_print(l_ptr, "Resetting link\n"); + tipc_link_reset(l_ptr); + + } else { + /* Handle failure on broadcast link */ + struct tipc_node *n_ptr; + char addr_string[16]; + + pr_info("Msg seq number: %u, ", msg_seqno(msg)); + pr_cont("Outstanding acks: %lu\n", + (unsigned long) TIPC_SKB_CB(buf)->handle); + + n_ptr = tipc_bclink_retransmit_to(net); + + tipc_addr_string_fill(addr_string, n_ptr->addr); + pr_info("Broadcast link info for %s\n", addr_string); + pr_info("Reception permitted: %d, Acked: %u\n", + n_ptr->bclink.recv_permitted, + n_ptr->bclink.acked); + pr_info("Last in: %u, Oos state: %u, Last sent: %u\n", + n_ptr->bclink.last_in, + n_ptr->bclink.oos_state, + n_ptr->bclink.last_sent); + + n_ptr->action_flags |= TIPC_BCAST_RESET; + l_ptr->stale_count = 0; + } +} + +void tipc_link_retransmit(struct tipc_link *l_ptr, struct sk_buff *skb, + u32 retransmits) +{ + struct tipc_msg *msg; + + if (!skb) + return; + + msg = buf_msg(skb); + + /* Detect repeated retransmit failures */ + if (l_ptr->last_retransmitted == msg_seqno(msg)) { + if (++l_ptr->stale_count > 100) { + link_retransmit_failure(l_ptr, skb); + return; + } + } else { + l_ptr->last_retransmitted = msg_seqno(msg); + l_ptr->stale_count = 1; + } + + skb_queue_walk_from(&l_ptr->transmq, skb) { + if (!retransmits) + break; + msg = buf_msg(skb); + msg_set_ack(msg, mod(l_ptr->next_in_no - 1)); + msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in); + tipc_bearer_send(l_ptr->owner->net, l_ptr->bearer_id, skb, + &l_ptr->media_addr); + retransmits--; + l_ptr->stats.retransmitted++; + } +} + +/* link_synch(): check if all packets arrived before the synch + * point have been consumed + * Returns true if the parallel links are synched, otherwise false + */ +static bool link_synch(struct tipc_link *l) +{ + unsigned int post_synch; + struct tipc_link *pl; + + pl = tipc_parallel_link(l); + if (pl == l) + goto synched; + + /* Was last pre-synch packet added to input queue ? */ + if (less_eq(pl->next_in_no, l->synch_point)) + return false; + + /* Is it still in the input queue ? */ + post_synch = mod(pl->next_in_no - l->synch_point) - 1; + if (skb_queue_len(&pl->inputq) > post_synch) + return false; +synched: + l->flags &= ~LINK_SYNCHING; + return true; +} + +static void link_retrieve_defq(struct tipc_link *link, + struct sk_buff_head *list) +{ + u32 seq_no; + + if (skb_queue_empty(&link->deferdq)) + return; + + seq_no = buf_seqno(skb_peek(&link->deferdq)); + if (seq_no == mod(link->next_in_no)) + skb_queue_splice_tail_init(&link->deferdq, list); +} + +/** + * tipc_rcv - process TIPC packets/messages arriving from off-node + * @net: the applicable net namespace + * @skb: TIPC packet + * @b_ptr: pointer to bearer message arrived on + * + * Invoked with no locks held. Bearer pointer must point to a valid bearer + * structure (i.e. cannot be NULL), but bearer can be inactive. + */ +void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct sk_buff_head head; + struct tipc_node *n_ptr; + struct tipc_link *l_ptr; + struct sk_buff *skb1, *tmp; + struct tipc_msg *msg; + u32 seq_no; + u32 ackd; + u32 released; + + skb2list(skb, &head); + + while ((skb = __skb_dequeue(&head))) { + /* Ensure message is well-formed */ + if (unlikely(!tipc_msg_validate(skb))) + goto discard; + + /* Handle arrival of a non-unicast link message */ + msg = buf_msg(skb); + if (unlikely(msg_non_seq(msg))) { + if (msg_user(msg) == LINK_CONFIG) + tipc_disc_rcv(net, skb, b_ptr); + else + tipc_bclink_rcv(net, skb); + continue; + } + + /* Discard unicast link messages destined for another node */ + if (unlikely(!msg_short(msg) && + (msg_destnode(msg) != tn->own_addr))) + goto discard; + + /* Locate neighboring node that sent message */ + n_ptr = tipc_node_find(net, msg_prevnode(msg)); + if (unlikely(!n_ptr)) + goto discard; + + tipc_node_lock(n_ptr); + /* Locate unicast link endpoint that should handle message */ + l_ptr = n_ptr->links[b_ptr->identity]; + if (unlikely(!l_ptr)) + goto unlock; + + /* Verify that communication with node is currently allowed */ + if ((n_ptr->action_flags & TIPC_WAIT_PEER_LINKS_DOWN) && + msg_user(msg) == LINK_PROTOCOL && + (msg_type(msg) == RESET_MSG || + msg_type(msg) == ACTIVATE_MSG) && + !msg_redundant_link(msg)) + n_ptr->action_flags &= ~TIPC_WAIT_PEER_LINKS_DOWN; + + if (tipc_node_blocked(n_ptr)) + goto unlock; + + /* Validate message sequence number info */ + seq_no = msg_seqno(msg); + ackd = msg_ack(msg); + + /* Release acked messages */ + if (unlikely(n_ptr->bclink.acked != msg_bcast_ack(msg))) + tipc_bclink_acknowledge(n_ptr, msg_bcast_ack(msg)); + + released = 0; + skb_queue_walk_safe(&l_ptr->transmq, skb1, tmp) { + if (more(buf_seqno(skb1), ackd)) + break; + __skb_unlink(skb1, &l_ptr->transmq); + kfree_skb(skb1); + released = 1; + } + + /* Try sending any messages link endpoint has pending */ + if (unlikely(skb_queue_len(&l_ptr->backlogq))) + tipc_link_push_packets(l_ptr); + + if (released && !skb_queue_empty(&l_ptr->wakeupq)) + link_prepare_wakeup(l_ptr); + + /* Process the incoming packet */ + if (unlikely(!link_working_working(l_ptr))) { + if (msg_user(msg) == LINK_PROTOCOL) { + tipc_link_proto_rcv(l_ptr, skb); + link_retrieve_defq(l_ptr, &head); + skb = NULL; + goto unlock; + } + + /* Traffic message. Conditionally activate link */ + link_state_event(l_ptr, TRAFFIC_MSG_EVT); + + if (link_working_working(l_ptr)) { + /* Re-insert buffer in front of queue */ + __skb_queue_head(&head, skb); + skb = NULL; + goto unlock; + } + goto unlock; + } + + /* Link is now in state WORKING_WORKING */ + if (unlikely(seq_no != mod(l_ptr->next_in_no))) { + link_handle_out_of_seq_msg(l_ptr, skb); + link_retrieve_defq(l_ptr, &head); + skb = NULL; + goto unlock; + } + /* Synchronize with parallel link if applicable */ + if (unlikely((l_ptr->flags & LINK_SYNCHING) && !msg_dup(msg))) { + if (!link_synch(l_ptr)) + goto unlock; + } + l_ptr->next_in_no++; + if (unlikely(!skb_queue_empty(&l_ptr->deferdq))) + link_retrieve_defq(l_ptr, &head); + if (unlikely(++l_ptr->rcv_unacked >= TIPC_MIN_LINK_WIN)) { + l_ptr->stats.sent_acks++; + tipc_link_proto_xmit(l_ptr, STATE_MSG, 0, 0, 0, 0); + } + tipc_link_input(l_ptr, skb); + skb = NULL; +unlock: + tipc_node_unlock(n_ptr); + tipc_node_put(n_ptr); +discard: + if (unlikely(skb)) + kfree_skb(skb); + } +} + +/* tipc_data_input - deliver data and name distr msgs to upper layer + * + * Consumes buffer if message is of right type + * Node lock must be held + */ +static bool tipc_data_input(struct tipc_link *link, struct sk_buff *skb) +{ + struct tipc_node *node = link->owner; + struct tipc_msg *msg = buf_msg(skb); + u32 dport = msg_destport(msg); + + switch (msg_user(msg)) { + case TIPC_LOW_IMPORTANCE: + case TIPC_MEDIUM_IMPORTANCE: + case TIPC_HIGH_IMPORTANCE: + case TIPC_CRITICAL_IMPORTANCE: + case CONN_MANAGER: + if (tipc_skb_queue_tail(&link->inputq, skb, dport)) { + node->inputq = &link->inputq; + node->action_flags |= TIPC_MSG_EVT; + } + return true; + case NAME_DISTRIBUTOR: + node->bclink.recv_permitted = true; + node->namedq = &link->namedq; + skb_queue_tail(&link->namedq, skb); + if (skb_queue_len(&link->namedq) == 1) + node->action_flags |= TIPC_NAMED_MSG_EVT; + return true; + case MSG_BUNDLER: + case TUNNEL_PROTOCOL: + case MSG_FRAGMENTER: + case BCAST_PROTOCOL: + return false; + default: + pr_warn("Dropping received illegal msg type\n"); + kfree_skb(skb); + return false; + }; +} + +/* tipc_link_input - process packet that has passed link protocol check + * + * Consumes buffer + * Node lock must be held + */ +static void tipc_link_input(struct tipc_link *link, struct sk_buff *skb) +{ + struct tipc_node *node = link->owner; + struct tipc_msg *msg = buf_msg(skb); + struct sk_buff *iskb; + int pos = 0; + + if (likely(tipc_data_input(link, skb))) + return; + + switch (msg_user(msg)) { + case TUNNEL_PROTOCOL: + if (msg_dup(msg)) { + link->flags |= LINK_SYNCHING; + link->synch_point = msg_seqno(msg_get_wrapped(msg)); + kfree_skb(skb); + break; + } + if (!tipc_link_failover_rcv(link, &skb)) + break; + if (msg_user(buf_msg(skb)) != MSG_BUNDLER) { + tipc_data_input(link, skb); + break; + } + case MSG_BUNDLER: + link->stats.recv_bundles++; + link->stats.recv_bundled += msg_msgcnt(msg); + + while (tipc_msg_extract(skb, &iskb, &pos)) + tipc_data_input(link, iskb); + break; + case MSG_FRAGMENTER: + link->stats.recv_fragments++; + if (tipc_buf_append(&link->reasm_buf, &skb)) { + link->stats.recv_fragmented++; + tipc_data_input(link, skb); + } else if (!link->reasm_buf) { + tipc_link_reset(link); + } + break; + case BCAST_PROTOCOL: + tipc_link_sync_rcv(node, skb); + break; + default: + break; + }; +} + +/** + * tipc_link_defer_pkt - Add out-of-sequence message to deferred reception queue + * + * Returns increase in queue length (i.e. 0 or 1) + */ +u32 tipc_link_defer_pkt(struct sk_buff_head *list, struct sk_buff *skb) +{ + struct sk_buff *skb1; + u32 seq_no = buf_seqno(skb); + + /* Empty queue ? */ + if (skb_queue_empty(list)) { + __skb_queue_tail(list, skb); + return 1; + } + + /* Last ? */ + if (less(buf_seqno(skb_peek_tail(list)), seq_no)) { + __skb_queue_tail(list, skb); + return 1; + } + + /* Locate insertion point in queue, then insert; discard if duplicate */ + skb_queue_walk(list, skb1) { + u32 curr_seqno = buf_seqno(skb1); + + if (seq_no == curr_seqno) { + kfree_skb(skb); + return 0; + } + + if (less(seq_no, curr_seqno)) + break; + } + + __skb_queue_before(list, skb1, skb); + return 1; +} + +/* + * link_handle_out_of_seq_msg - handle arrival of out-of-sequence packet + */ +static void link_handle_out_of_seq_msg(struct tipc_link *l_ptr, + struct sk_buff *buf) +{ + u32 seq_no = buf_seqno(buf); + + if (likely(msg_user(buf_msg(buf)) == LINK_PROTOCOL)) { + tipc_link_proto_rcv(l_ptr, buf); + return; + } + + /* Record OOS packet arrival (force mismatch on next timeout) */ + l_ptr->checkpoint--; + + /* + * Discard packet if a duplicate; otherwise add it to deferred queue + * and notify peer of gap as per protocol specification + */ + if (less(seq_no, mod(l_ptr->next_in_no))) { + l_ptr->stats.duplicates++; + kfree_skb(buf); + return; + } + + if (tipc_link_defer_pkt(&l_ptr->deferdq, buf)) { + l_ptr->stats.deferred_recv++; + if ((skb_queue_len(&l_ptr->deferdq) % TIPC_MIN_LINK_WIN) == 1) + tipc_link_proto_xmit(l_ptr, STATE_MSG, 0, 0, 0, 0); + } else { + l_ptr->stats.duplicates++; + } +} + +/* + * Send protocol message to the other endpoint. + */ +void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int probe_msg, + u32 gap, u32 tolerance, u32 priority) +{ + struct sk_buff *buf = NULL; + struct tipc_msg *msg = l_ptr->pmsg; + u32 msg_size = sizeof(l_ptr->proto_msg); + int r_flag; + + /* Don't send protocol message during link failover */ + if (l_ptr->flags & LINK_FAILINGOVER) + return; + + /* Abort non-RESET send if communication with node is prohibited */ + if ((tipc_node_blocked(l_ptr->owner)) && (msg_typ != RESET_MSG)) + return; + + /* Create protocol message with "out-of-sequence" sequence number */ + msg_set_type(msg, msg_typ); + msg_set_net_plane(msg, l_ptr->net_plane); + msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in); + msg_set_last_bcast(msg, tipc_bclink_get_last_sent(l_ptr->owner->net)); + + if (msg_typ == STATE_MSG) { + u32 next_sent = mod(l_ptr->next_out_no); + + if (!tipc_link_is_up(l_ptr)) + return; + if (skb_queue_len(&l_ptr->backlogq)) + next_sent = buf_seqno(skb_peek(&l_ptr->backlogq)); + msg_set_next_sent(msg, next_sent); + if (!skb_queue_empty(&l_ptr->deferdq)) { + u32 rec = buf_seqno(skb_peek(&l_ptr->deferdq)); + gap = mod(rec - mod(l_ptr->next_in_no)); + } + msg_set_seq_gap(msg, gap); + if (gap) + l_ptr->stats.sent_nacks++; + msg_set_link_tolerance(msg, tolerance); + msg_set_linkprio(msg, priority); + msg_set_max_pkt(msg, l_ptr->mtu); + msg_set_ack(msg, mod(l_ptr->next_in_no - 1)); + msg_set_probe(msg, probe_msg != 0); + if (probe_msg) + l_ptr->stats.sent_probes++; + l_ptr->stats.sent_states++; + } else { /* RESET_MSG or ACTIVATE_MSG */ + msg_set_ack(msg, mod(l_ptr->failover_checkpt - 1)); + msg_set_seq_gap(msg, 0); + msg_set_next_sent(msg, 1); + msg_set_probe(msg, 0); + msg_set_link_tolerance(msg, l_ptr->tolerance); + msg_set_linkprio(msg, l_ptr->priority); + msg_set_max_pkt(msg, l_ptr->advertised_mtu); + } + + r_flag = (l_ptr->owner->working_links > tipc_link_is_up(l_ptr)); + msg_set_redundant_link(msg, r_flag); + msg_set_linkprio(msg, l_ptr->priority); + msg_set_size(msg, msg_size); + + msg_set_seqno(msg, mod(l_ptr->next_out_no + (0xffff/2))); + + buf = tipc_buf_acquire(msg_size); + if (!buf) + return; + + skb_copy_to_linear_data(buf, msg, sizeof(l_ptr->proto_msg)); + buf->priority = TC_PRIO_CONTROL; + tipc_bearer_send(l_ptr->owner->net, l_ptr->bearer_id, buf, + &l_ptr->media_addr); + l_ptr->rcv_unacked = 0; + kfree_skb(buf); +} + +/* + * Receive protocol message : + * Note that network plane id propagates through the network, and may + * change at any time. The node with lowest address rules + */ +static void tipc_link_proto_rcv(struct tipc_link *l_ptr, + struct sk_buff *buf) +{ + u32 rec_gap = 0; + u32 msg_tol; + struct tipc_msg *msg = buf_msg(buf); + + if (l_ptr->flags & LINK_FAILINGOVER) + goto exit; + + if (l_ptr->net_plane != msg_net_plane(msg)) + if (link_own_addr(l_ptr) > msg_prevnode(msg)) + l_ptr->net_plane = msg_net_plane(msg); + + switch (msg_type(msg)) { + + case RESET_MSG: + if (!link_working_unknown(l_ptr) && + (l_ptr->peer_session != INVALID_SESSION)) { + if (less_eq(msg_session(msg), l_ptr->peer_session)) + break; /* duplicate or old reset: ignore */ + } + + if (!msg_redundant_link(msg) && (link_working_working(l_ptr) || + link_working_unknown(l_ptr))) { + /* + * peer has lost contact -- don't allow peer's links + * to reactivate before we recognize loss & clean up + */ + l_ptr->owner->action_flags |= TIPC_WAIT_OWN_LINKS_DOWN; + } + + link_state_event(l_ptr, RESET_MSG); + + /* fall thru' */ + case ACTIVATE_MSG: + /* Update link settings according other endpoint's values */ + strcpy((strrchr(l_ptr->name, ':') + 1), (char *)msg_data(msg)); + + msg_tol = msg_link_tolerance(msg); + if (msg_tol > l_ptr->tolerance) + link_set_supervision_props(l_ptr, msg_tol); + + if (msg_linkprio(msg) > l_ptr->priority) + l_ptr->priority = msg_linkprio(msg); + + if (l_ptr->mtu > msg_max_pkt(msg)) + l_ptr->mtu = msg_max_pkt(msg); + + /* Synchronize broadcast link info, if not done previously */ + if (!tipc_node_is_up(l_ptr->owner)) { + l_ptr->owner->bclink.last_sent = + l_ptr->owner->bclink.last_in = + msg_last_bcast(msg); + l_ptr->owner->bclink.oos_state = 0; + } + + l_ptr->peer_session = msg_session(msg); + l_ptr->peer_bearer_id = msg_bearer_id(msg); + + if (msg_type(msg) == ACTIVATE_MSG) + link_state_event(l_ptr, ACTIVATE_MSG); + break; + case STATE_MSG: + + msg_tol = msg_link_tolerance(msg); + if (msg_tol) + link_set_supervision_props(l_ptr, msg_tol); + + if (msg_linkprio(msg) && + (msg_linkprio(msg) != l_ptr->priority)) { + pr_debug("%s<%s>, priority change %u->%u\n", + link_rst_msg, l_ptr->name, + l_ptr->priority, msg_linkprio(msg)); + l_ptr->priority = msg_linkprio(msg); + tipc_link_reset(l_ptr); /* Enforce change to take effect */ + break; + } + + /* Record reception; force mismatch at next timeout: */ + l_ptr->checkpoint--; + + link_state_event(l_ptr, TRAFFIC_MSG_EVT); + l_ptr->stats.recv_states++; + if (link_reset_unknown(l_ptr)) + break; + + if (less_eq(mod(l_ptr->next_in_no), msg_next_sent(msg))) { + rec_gap = mod(msg_next_sent(msg) - + mod(l_ptr->next_in_no)); + } + + if (msg_probe(msg)) + l_ptr->stats.recv_probes++; + + /* Protocol message before retransmits, reduce loss risk */ + if (l_ptr->owner->bclink.recv_permitted) + tipc_bclink_update_link_state(l_ptr->owner, + msg_last_bcast(msg)); + + if (rec_gap || (msg_probe(msg))) { + tipc_link_proto_xmit(l_ptr, STATE_MSG, 0, + rec_gap, 0, 0); + } + if (msg_seq_gap(msg)) { + l_ptr->stats.recv_nacks++; + tipc_link_retransmit(l_ptr, skb_peek(&l_ptr->transmq), + msg_seq_gap(msg)); + } + break; + } +exit: + kfree_skb(buf); +} + + +/* tipc_link_tunnel_xmit(): Tunnel one packet via a link belonging to + * a different bearer. Owner node is locked. + */ +static void tipc_link_tunnel_xmit(struct tipc_link *l_ptr, + struct tipc_msg *tunnel_hdr, + struct tipc_msg *msg, + u32 selector) +{ + struct tipc_link *tunnel; + struct sk_buff *skb; + u32 length = msg_size(msg); + + tunnel = l_ptr->owner->active_links[selector & 1]; + if (!tipc_link_is_up(tunnel)) { + pr_warn("%stunnel link no longer available\n", link_co_err); + return; + } + msg_set_size(tunnel_hdr, length + INT_H_SIZE); + skb = tipc_buf_acquire(length + INT_H_SIZE); + if (!skb) { + pr_warn("%sunable to send tunnel msg\n", link_co_err); + return; + } + skb_copy_to_linear_data(skb, tunnel_hdr, INT_H_SIZE); + skb_copy_to_linear_data_offset(skb, INT_H_SIZE, msg, length); + __tipc_link_xmit_skb(tunnel, skb); +} + + +/* tipc_link_failover_send_queue(): A link has gone down, but a second + * link is still active. We can do failover. Tunnel the failing link's + * whole send queue via the remaining link. This way, we don't lose + * any packets, and sequence order is preserved for subsequent traffic + * sent over the remaining link. Owner node is locked. + */ +void tipc_link_failover_send_queue(struct tipc_link *l_ptr) +{ + int msgcount; + struct tipc_link *tunnel = l_ptr->owner->active_links[0]; + struct tipc_msg tunnel_hdr; + struct sk_buff *skb; + int split_bundles; + + if (!tunnel) + return; + + tipc_msg_init(link_own_addr(l_ptr), &tunnel_hdr, TUNNEL_PROTOCOL, + FAILOVER_MSG, INT_H_SIZE, l_ptr->addr); + skb_queue_splice_tail_init(&l_ptr->backlogq, &l_ptr->transmq); + tipc_link_purge_backlog(l_ptr); + msgcount = skb_queue_len(&l_ptr->transmq); + msg_set_bearer_id(&tunnel_hdr, l_ptr->peer_bearer_id); + msg_set_msgcnt(&tunnel_hdr, msgcount); + + if (skb_queue_empty(&l_ptr->transmq)) { + skb = tipc_buf_acquire(INT_H_SIZE); + if (skb) { + skb_copy_to_linear_data(skb, &tunnel_hdr, INT_H_SIZE); + msg_set_size(&tunnel_hdr, INT_H_SIZE); + __tipc_link_xmit_skb(tunnel, skb); + } else { + pr_warn("%sunable to send changeover msg\n", + link_co_err); + } + return; + } + + split_bundles = (l_ptr->owner->active_links[0] != + l_ptr->owner->active_links[1]); + + skb_queue_walk(&l_ptr->transmq, skb) { + struct tipc_msg *msg = buf_msg(skb); + + if ((msg_user(msg) == MSG_BUNDLER) && split_bundles) { + struct tipc_msg *m = msg_get_wrapped(msg); + unchar *pos = (unchar *)m; + + msgcount = msg_msgcnt(msg); + while (msgcount--) { + msg_set_seqno(m, msg_seqno(msg)); + tipc_link_tunnel_xmit(l_ptr, &tunnel_hdr, m, + msg_link_selector(m)); + pos += align(msg_size(m)); + m = (struct tipc_msg *)pos; + } + } else { + tipc_link_tunnel_xmit(l_ptr, &tunnel_hdr, msg, + msg_link_selector(msg)); + } + } +} + +/* tipc_link_dup_queue_xmit(): A second link has become active. Tunnel a + * duplicate of the first link's send queue via the new link. This way, we + * are guaranteed that currently queued packets from a socket are delivered + * before future traffic from the same socket, even if this is using the + * new link. The last arriving copy of each duplicate packet is dropped at + * the receiving end by the regular protocol check, so packet cardinality + * and sequence order is preserved per sender/receiver socket pair. + * Owner node is locked. + */ +void tipc_link_dup_queue_xmit(struct tipc_link *link, + struct tipc_link *tnl) +{ + struct sk_buff *skb; + struct tipc_msg tnl_hdr; + struct sk_buff_head *queue = &link->transmq; + int mcnt; + + tipc_msg_init(link_own_addr(link), &tnl_hdr, TUNNEL_PROTOCOL, + SYNCH_MSG, INT_H_SIZE, link->addr); + mcnt = skb_queue_len(&link->transmq) + skb_queue_len(&link->backlogq); + msg_set_msgcnt(&tnl_hdr, mcnt); + msg_set_bearer_id(&tnl_hdr, link->peer_bearer_id); + +tunnel_queue: + skb_queue_walk(queue, skb) { + struct sk_buff *outskb; + struct tipc_msg *msg = buf_msg(skb); + u32 len = msg_size(msg); + + msg_set_ack(msg, mod(link->next_in_no - 1)); + msg_set_bcast_ack(msg, link->owner->bclink.last_in); + msg_set_size(&tnl_hdr, len + INT_H_SIZE); + outskb = tipc_buf_acquire(len + INT_H_SIZE); + if (outskb == NULL) { + pr_warn("%sunable to send duplicate msg\n", + link_co_err); + return; + } + skb_copy_to_linear_data(outskb, &tnl_hdr, INT_H_SIZE); + skb_copy_to_linear_data_offset(outskb, INT_H_SIZE, + skb->data, len); + __tipc_link_xmit_skb(tnl, outskb); + if (!tipc_link_is_up(link)) + return; + } + if (queue == &link->backlogq) + return; + queue = &link->backlogq; + goto tunnel_queue; +} + +/* tipc_link_failover_rcv(): Receive a tunnelled FAILOVER_MSG packet + * Owner node is locked. + */ +static bool tipc_link_failover_rcv(struct tipc_link *link, + struct sk_buff **skb) +{ + struct tipc_msg *msg = buf_msg(*skb); + struct sk_buff *iskb = NULL; + struct tipc_link *pl = NULL; + int bearer_id = msg_bearer_id(msg); + int pos = 0; + + if (msg_type(msg) != FAILOVER_MSG) { + pr_warn("%sunknown tunnel pkt received\n", link_co_err); + goto exit; + } + if (bearer_id >= MAX_BEARERS) + goto exit; + + if (bearer_id == link->bearer_id) + goto exit; + + pl = link->owner->links[bearer_id]; + if (pl && tipc_link_is_up(pl)) + tipc_link_reset(pl); + + if (link->failover_pkts == FIRST_FAILOVER) + link->failover_pkts = msg_msgcnt(msg); + + /* Should we expect an inner packet? */ + if (!link->failover_pkts) + goto exit; + + if (!tipc_msg_extract(*skb, &iskb, &pos)) { + pr_warn("%sno inner failover pkt\n", link_co_err); + *skb = NULL; + goto exit; + } + link->failover_pkts--; + *skb = NULL; + + /* Was this packet already delivered? */ + if (less(buf_seqno(iskb), link->failover_checkpt)) { + kfree_skb(iskb); + iskb = NULL; + goto exit; + } + if (msg_user(buf_msg(iskb)) == MSG_FRAGMENTER) { + link->stats.recv_fragments++; + tipc_buf_append(&link->failover_skb, &iskb); + } +exit: + if (!link->failover_pkts && pl) + pl->flags &= ~LINK_FAILINGOVER; + kfree_skb(*skb); + *skb = iskb; + return *skb; +} + +static void link_set_supervision_props(struct tipc_link *l_ptr, u32 tol) +{ + unsigned long intv = ((tol / 4) > 500) ? 500 : tol / 4; + + if ((tol < TIPC_MIN_LINK_TOL) || (tol > TIPC_MAX_LINK_TOL)) + return; + + l_ptr->tolerance = tol; + l_ptr->cont_intv = msecs_to_jiffies(intv); + l_ptr->abort_limit = tol / (jiffies_to_msecs(l_ptr->cont_intv) / 4); +} + +void tipc_link_set_queue_limits(struct tipc_link *l, u32 win) +{ + int max_bulk = TIPC_MAX_PUBLICATIONS / (l->mtu / ITEM_SIZE); + + l->window = win; + l->backlog[TIPC_LOW_IMPORTANCE].limit = win / 2; + l->backlog[TIPC_MEDIUM_IMPORTANCE].limit = win; + l->backlog[TIPC_HIGH_IMPORTANCE].limit = win / 2 * 3; + l->backlog[TIPC_CRITICAL_IMPORTANCE].limit = win * 2; + l->backlog[TIPC_SYSTEM_IMPORTANCE].limit = max_bulk; +} + +/* tipc_link_find_owner - locate owner node of link by link's name + * @net: the applicable net namespace + * @name: pointer to link name string + * @bearer_id: pointer to index in 'node->links' array where the link was found. + * + * Returns pointer to node owning the link, or 0 if no matching link is found. + */ +static struct tipc_node *tipc_link_find_owner(struct net *net, + const char *link_name, + unsigned int *bearer_id) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_link *l_ptr; + struct tipc_node *n_ptr; + struct tipc_node *found_node = NULL; + int i; + + *bearer_id = 0; + rcu_read_lock(); + list_for_each_entry_rcu(n_ptr, &tn->node_list, list) { + tipc_node_lock(n_ptr); + for (i = 0; i < MAX_BEARERS; i++) { + l_ptr = n_ptr->links[i]; + if (l_ptr && !strcmp(l_ptr->name, link_name)) { + *bearer_id = i; + found_node = n_ptr; + break; + } + } + tipc_node_unlock(n_ptr); + if (found_node) + break; + } + rcu_read_unlock(); + + return found_node; +} + +/** + * link_reset_statistics - reset link statistics + * @l_ptr: pointer to link + */ +static void link_reset_statistics(struct tipc_link *l_ptr) +{ + memset(&l_ptr->stats, 0, sizeof(l_ptr->stats)); + l_ptr->stats.sent_info = l_ptr->next_out_no; + l_ptr->stats.recv_info = l_ptr->next_in_no; +} + +static void link_print(struct tipc_link *l_ptr, const char *str) +{ + struct tipc_net *tn = net_generic(l_ptr->owner->net, tipc_net_id); + struct tipc_bearer *b_ptr; + + rcu_read_lock(); + b_ptr = rcu_dereference_rtnl(tn->bearer_list[l_ptr->bearer_id]); + if (b_ptr) + pr_info("%s Link %x<%s>:", str, l_ptr->addr, b_ptr->name); + rcu_read_unlock(); + + if (link_working_unknown(l_ptr)) + pr_cont(":WU\n"); + else if (link_reset_reset(l_ptr)) + pr_cont(":RR\n"); + else if (link_reset_unknown(l_ptr)) + pr_cont(":RU\n"); + else if (link_working_working(l_ptr)) + pr_cont(":WW\n"); + else + pr_cont("\n"); +} + +/* Parse and validate nested (link) properties valid for media, bearer and link + */ +int tipc_nl_parse_link_prop(struct nlattr *prop, struct nlattr *props[]) +{ + int err; + + err = nla_parse_nested(props, TIPC_NLA_PROP_MAX, prop, + tipc_nl_prop_policy); + if (err) + return err; + + if (props[TIPC_NLA_PROP_PRIO]) { + u32 prio; + + prio = nla_get_u32(props[TIPC_NLA_PROP_PRIO]); + if (prio > TIPC_MAX_LINK_PRI) + return -EINVAL; + } + + if (props[TIPC_NLA_PROP_TOL]) { + u32 tol; + + tol = nla_get_u32(props[TIPC_NLA_PROP_TOL]); + if ((tol < TIPC_MIN_LINK_TOL) || (tol > TIPC_MAX_LINK_TOL)) + return -EINVAL; + } + + if (props[TIPC_NLA_PROP_WIN]) { + u32 win; + + win = nla_get_u32(props[TIPC_NLA_PROP_WIN]); + if ((win < TIPC_MIN_LINK_WIN) || (win > TIPC_MAX_LINK_WIN)) + return -EINVAL; + } + + return 0; +} + +int tipc_nl_link_set(struct sk_buff *skb, struct genl_info *info) +{ + int err; + int res = 0; + int bearer_id; + char *name; + struct tipc_link *link; + struct tipc_node *node; + struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1]; + struct net *net = sock_net(skb->sk); + + if (!info->attrs[TIPC_NLA_LINK]) + return -EINVAL; + + err = nla_parse_nested(attrs, TIPC_NLA_LINK_MAX, + info->attrs[TIPC_NLA_LINK], + tipc_nl_link_policy); + if (err) + return err; + + if (!attrs[TIPC_NLA_LINK_NAME]) + return -EINVAL; + + name = nla_data(attrs[TIPC_NLA_LINK_NAME]); + + node = tipc_link_find_owner(net, name, &bearer_id); + if (!node) + return -EINVAL; + + tipc_node_lock(node); + + link = node->links[bearer_id]; + if (!link) { + res = -EINVAL; + goto out; + } + + if (attrs[TIPC_NLA_LINK_PROP]) { + struct nlattr *props[TIPC_NLA_PROP_MAX + 1]; + + err = tipc_nl_parse_link_prop(attrs[TIPC_NLA_LINK_PROP], + props); + if (err) { + res = err; + goto out; + } + + if (props[TIPC_NLA_PROP_TOL]) { + u32 tol; + + tol = nla_get_u32(props[TIPC_NLA_PROP_TOL]); + link_set_supervision_props(link, tol); + tipc_link_proto_xmit(link, STATE_MSG, 0, 0, tol, 0); + } + if (props[TIPC_NLA_PROP_PRIO]) { + u32 prio; + + prio = nla_get_u32(props[TIPC_NLA_PROP_PRIO]); + link->priority = prio; + tipc_link_proto_xmit(link, STATE_MSG, 0, 0, 0, prio); + } + if (props[TIPC_NLA_PROP_WIN]) { + u32 win; + + win = nla_get_u32(props[TIPC_NLA_PROP_WIN]); + tipc_link_set_queue_limits(link, win); + } + } + +out: + tipc_node_unlock(node); + + return res; +} + +static int __tipc_nl_add_stats(struct sk_buff *skb, struct tipc_stats *s) +{ + int i; + struct nlattr *stats; + + struct nla_map { + u32 key; + u32 val; + }; + + struct nla_map map[] = { + {TIPC_NLA_STATS_RX_INFO, s->recv_info}, + {TIPC_NLA_STATS_RX_FRAGMENTS, s->recv_fragments}, + {TIPC_NLA_STATS_RX_FRAGMENTED, s->recv_fragmented}, + {TIPC_NLA_STATS_RX_BUNDLES, s->recv_bundles}, + {TIPC_NLA_STATS_RX_BUNDLED, s->recv_bundled}, + {TIPC_NLA_STATS_TX_INFO, s->sent_info}, + {TIPC_NLA_STATS_TX_FRAGMENTS, s->sent_fragments}, + {TIPC_NLA_STATS_TX_FRAGMENTED, s->sent_fragmented}, + {TIPC_NLA_STATS_TX_BUNDLES, s->sent_bundles}, + {TIPC_NLA_STATS_TX_BUNDLED, s->sent_bundled}, + {TIPC_NLA_STATS_MSG_PROF_TOT, (s->msg_length_counts) ? + s->msg_length_counts : 1}, + {TIPC_NLA_STATS_MSG_LEN_CNT, s->msg_length_counts}, + {TIPC_NLA_STATS_MSG_LEN_TOT, s->msg_lengths_total}, + {TIPC_NLA_STATS_MSG_LEN_P0, s->msg_length_profile[0]}, + {TIPC_NLA_STATS_MSG_LEN_P1, s->msg_length_profile[1]}, + {TIPC_NLA_STATS_MSG_LEN_P2, s->msg_length_profile[2]}, + {TIPC_NLA_STATS_MSG_LEN_P3, s->msg_length_profile[3]}, + {TIPC_NLA_STATS_MSG_LEN_P4, s->msg_length_profile[4]}, + {TIPC_NLA_STATS_MSG_LEN_P5, s->msg_length_profile[5]}, + {TIPC_NLA_STATS_MSG_LEN_P6, s->msg_length_profile[6]}, + {TIPC_NLA_STATS_RX_STATES, s->recv_states}, + {TIPC_NLA_STATS_RX_PROBES, s->recv_probes}, + {TIPC_NLA_STATS_RX_NACKS, s->recv_nacks}, + {TIPC_NLA_STATS_RX_DEFERRED, s->deferred_recv}, + {TIPC_NLA_STATS_TX_STATES, s->sent_states}, + {TIPC_NLA_STATS_TX_PROBES, s->sent_probes}, + {TIPC_NLA_STATS_TX_NACKS, s->sent_nacks}, + {TIPC_NLA_STATS_TX_ACKS, s->sent_acks}, + {TIPC_NLA_STATS_RETRANSMITTED, s->retransmitted}, + {TIPC_NLA_STATS_DUPLICATES, s->duplicates}, + {TIPC_NLA_STATS_LINK_CONGS, s->link_congs}, + {TIPC_NLA_STATS_MAX_QUEUE, s->max_queue_sz}, + {TIPC_NLA_STATS_AVG_QUEUE, s->queue_sz_counts ? + (s->accu_queue_sz / s->queue_sz_counts) : 0} + }; + + stats = nla_nest_start(skb, TIPC_NLA_LINK_STATS); + if (!stats) + return -EMSGSIZE; + + for (i = 0; i < ARRAY_SIZE(map); i++) + if (nla_put_u32(skb, map[i].key, map[i].val)) + goto msg_full; + + nla_nest_end(skb, stats); + + return 0; +msg_full: + nla_nest_cancel(skb, stats); + + return -EMSGSIZE; +} + +/* Caller should hold appropriate locks to protect the link */ +static int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg, + struct tipc_link *link, int nlflags) +{ + int err; + void *hdr; + struct nlattr *attrs; + struct nlattr *prop; + struct tipc_net *tn = net_generic(net, tipc_net_id); + + hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, + nlflags, TIPC_NL_LINK_GET); + if (!hdr) + return -EMSGSIZE; + + attrs = nla_nest_start(msg->skb, TIPC_NLA_LINK); + if (!attrs) + goto msg_full; + + if (nla_put_string(msg->skb, TIPC_NLA_LINK_NAME, link->name)) + goto attr_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_LINK_DEST, + tipc_cluster_mask(tn->own_addr))) + goto attr_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_LINK_MTU, link->mtu)) + goto attr_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_LINK_RX, link->next_in_no)) + goto attr_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_LINK_TX, link->next_out_no)) + goto attr_msg_full; + + if (tipc_link_is_up(link)) + if (nla_put_flag(msg->skb, TIPC_NLA_LINK_UP)) + goto attr_msg_full; + if (tipc_link_is_active(link)) + if (nla_put_flag(msg->skb, TIPC_NLA_LINK_ACTIVE)) + goto attr_msg_full; + + prop = nla_nest_start(msg->skb, TIPC_NLA_LINK_PROP); + if (!prop) + goto attr_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_PRIO, link->priority)) + goto prop_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_TOL, link->tolerance)) + goto prop_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, + link->window)) + goto prop_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_PRIO, link->priority)) + goto prop_msg_full; + nla_nest_end(msg->skb, prop); + + err = __tipc_nl_add_stats(msg->skb, &link->stats); + if (err) + goto attr_msg_full; + + nla_nest_end(msg->skb, attrs); + genlmsg_end(msg->skb, hdr); + + return 0; + +prop_msg_full: + nla_nest_cancel(msg->skb, prop); +attr_msg_full: + nla_nest_cancel(msg->skb, attrs); +msg_full: + genlmsg_cancel(msg->skb, hdr); + + return -EMSGSIZE; +} + +/* Caller should hold node lock */ +static int __tipc_nl_add_node_links(struct net *net, struct tipc_nl_msg *msg, + struct tipc_node *node, u32 *prev_link) +{ + u32 i; + int err; + + for (i = *prev_link; i < MAX_BEARERS; i++) { + *prev_link = i; + + if (!node->links[i]) + continue; + + err = __tipc_nl_add_link(net, msg, node->links[i], NLM_F_MULTI); + if (err) + return err; + } + *prev_link = 0; + + return 0; +} + +int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct net *net = sock_net(skb->sk); + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_node *node; + struct tipc_nl_msg msg; + u32 prev_node = cb->args[0]; + u32 prev_link = cb->args[1]; + int done = cb->args[2]; + int err; + + if (done) + return 0; + + msg.skb = skb; + msg.portid = NETLINK_CB(cb->skb).portid; + msg.seq = cb->nlh->nlmsg_seq; + + rcu_read_lock(); + if (prev_node) { + node = tipc_node_find(net, prev_node); + if (!node) { + /* We never set seq or call nl_dump_check_consistent() + * this means that setting prev_seq here will cause the + * consistence check to fail in the netlink callback + * handler. Resulting in the last NLMSG_DONE message + * having the NLM_F_DUMP_INTR flag set. + */ + cb->prev_seq = 1; + goto out; + } + tipc_node_put(node); + + list_for_each_entry_continue_rcu(node, &tn->node_list, + list) { + tipc_node_lock(node); + err = __tipc_nl_add_node_links(net, &msg, node, + &prev_link); + tipc_node_unlock(node); + if (err) + goto out; + + prev_node = node->addr; + } + } else { + err = tipc_nl_add_bc_link(net, &msg); + if (err) + goto out; + + list_for_each_entry_rcu(node, &tn->node_list, list) { + tipc_node_lock(node); + err = __tipc_nl_add_node_links(net, &msg, node, + &prev_link); + tipc_node_unlock(node); + if (err) + goto out; + + prev_node = node->addr; + } + } + done = 1; +out: + rcu_read_unlock(); + + cb->args[0] = prev_node; + cb->args[1] = prev_link; + cb->args[2] = done; + + return skb->len; +} + +int tipc_nl_link_get(struct sk_buff *skb, struct genl_info *info) +{ + struct net *net = genl_info_net(info); + struct sk_buff *ans_skb; + struct tipc_nl_msg msg; + struct tipc_link *link; + struct tipc_node *node; + char *name; + int bearer_id; + int err; + + if (!info->attrs[TIPC_NLA_LINK_NAME]) + return -EINVAL; + + name = nla_data(info->attrs[TIPC_NLA_LINK_NAME]); + node = tipc_link_find_owner(net, name, &bearer_id); + if (!node) + return -EINVAL; + + ans_skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!ans_skb) + return -ENOMEM; + + msg.skb = ans_skb; + msg.portid = info->snd_portid; + msg.seq = info->snd_seq; + + tipc_node_lock(node); + link = node->links[bearer_id]; + if (!link) { + err = -EINVAL; + goto err_out; + } + + err = __tipc_nl_add_link(net, &msg, link, 0); + if (err) + goto err_out; + + tipc_node_unlock(node); + + return genlmsg_reply(ans_skb, info); + +err_out: + tipc_node_unlock(node); + nlmsg_free(ans_skb); + + return err; +} + +int tipc_nl_link_reset_stats(struct sk_buff *skb, struct genl_info *info) +{ + int err; + char *link_name; + unsigned int bearer_id; + struct tipc_link *link; + struct tipc_node *node; + struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1]; + struct net *net = sock_net(skb->sk); + + if (!info->attrs[TIPC_NLA_LINK]) + return -EINVAL; + + err = nla_parse_nested(attrs, TIPC_NLA_LINK_MAX, + info->attrs[TIPC_NLA_LINK], + tipc_nl_link_policy); + if (err) + return err; + + if (!attrs[TIPC_NLA_LINK_NAME]) + return -EINVAL; + + link_name = nla_data(attrs[TIPC_NLA_LINK_NAME]); + + if (strcmp(link_name, tipc_bclink_name) == 0) { + err = tipc_bclink_reset_stats(net); + if (err) + return err; + return 0; + } + + node = tipc_link_find_owner(net, link_name, &bearer_id); + if (!node) + return -EINVAL; + + tipc_node_lock(node); + + link = node->links[bearer_id]; + if (!link) { + tipc_node_unlock(node); + return -EINVAL; + } + + link_reset_statistics(link); + + tipc_node_unlock(node); + + return 0; +} diff --git a/kernel/net/tipc/link.h b/kernel/net/tipc/link.h new file mode 100644 index 000000000..b5b4e3554 --- /dev/null +++ b/kernel/net/tipc/link.h @@ -0,0 +1,311 @@ +/* + * net/tipc/link.h: Include file for TIPC link code + * + * Copyright (c) 1995-2006, 2013-2014, Ericsson AB + * Copyright (c) 2004-2005, 2010-2011, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _TIPC_LINK_H +#define _TIPC_LINK_H + +#include <net/genetlink.h> +#include "msg.h" +#include "node.h" + +/* TIPC-specific error codes +*/ +#define ELINKCONG EAGAIN /* link congestion <=> resource unavailable */ + +/* Out-of-range value for link sequence numbers + */ +#define INVALID_LINK_SEQ 0x10000 + +/* Link working states + */ +#define WORKING_WORKING 560810u +#define WORKING_UNKNOWN 560811u +#define RESET_UNKNOWN 560812u +#define RESET_RESET 560813u + +/* Link endpoint execution states + */ +#define LINK_STARTED 0x0001 +#define LINK_STOPPED 0x0002 +#define LINK_SYNCHING 0x0004 +#define LINK_FAILINGOVER 0x0008 + +/* Starting value for maximum packet size negotiation on unicast links + * (unless bearer MTU is less) + */ +#define MAX_PKT_DEFAULT 1500 + +struct tipc_stats { + u32 sent_info; /* used in counting # sent packets */ + u32 recv_info; /* used in counting # recv'd packets */ + u32 sent_states; + u32 recv_states; + u32 sent_probes; + u32 recv_probes; + u32 sent_nacks; + u32 recv_nacks; + u32 sent_acks; + u32 sent_bundled; + u32 sent_bundles; + u32 recv_bundled; + u32 recv_bundles; + u32 retransmitted; + u32 sent_fragmented; + u32 sent_fragments; + u32 recv_fragmented; + u32 recv_fragments; + u32 link_congs; /* # port sends blocked by congestion */ + u32 deferred_recv; + u32 duplicates; + u32 max_queue_sz; /* send queue size high water mark */ + u32 accu_queue_sz; /* used for send queue size profiling */ + u32 queue_sz_counts; /* used for send queue size profiling */ + u32 msg_length_counts; /* used for message length profiling */ + u32 msg_lengths_total; /* used for message length profiling */ + u32 msg_length_profile[7]; /* used for msg. length profiling */ +}; + +/** + * struct tipc_link - TIPC link data structure + * @addr: network address of link's peer node + * @name: link name character string + * @media_addr: media address to use when sending messages over link + * @timer: link timer + * @owner: pointer to peer node + * @refcnt: reference counter for permanent references (owner node & timer) + * @flags: execution state flags for link endpoint instance + * @checkpoint: reference point for triggering link continuity checking + * @peer_session: link session # being used by peer end of link + * @peer_bearer_id: bearer id used by link's peer endpoint + * @bearer_id: local bearer id used by link + * @tolerance: minimum link continuity loss needed to reset link [in ms] + * @cont_intv: link continuity testing interval + * @abort_limit: # of unacknowledged continuity probes needed to reset link + * @state: current state of link FSM + * @fsm_msg_cnt: # of protocol messages link FSM has sent in current state + * @proto_msg: template for control messages generated by link + * @pmsg: convenience pointer to "proto_msg" field + * @priority: current link priority + * @net_plane: current link network plane ('A' through 'H') + * @backlog_limit: backlog queue congestion thresholds (indexed by importance) + * @exp_msg_count: # of tunnelled messages expected during link changeover + * @reset_checkpoint: seq # of last acknowledged message at time of link reset + * @mtu: current maximum packet size for this link + * @advertised_mtu: advertised own mtu when link is being established + * @transmitq: queue for sent, non-acked messages + * @backlogq: queue for messages waiting to be sent + * @next_out_no: next sequence number to use for outbound messages + * @last_retransmitted: sequence number of most recently retransmitted message + * @stale_count: # of identical retransmit requests made by peer + * @next_in_no: next sequence number to expect for inbound messages + * @deferred_queue: deferred queue saved OOS b'cast message received from node + * @unacked_window: # of inbound messages rx'd without ack'ing back to peer + * @inputq: buffer queue for messages to be delivered upwards + * @namedq: buffer queue for name table messages to be delivered upwards + * @next_out: ptr to first unsent outbound message in queue + * @wakeupq: linked list of wakeup msgs waiting for link congestion to abate + * @long_msg_seq_no: next identifier to use for outbound fragmented messages + * @reasm_buf: head of partially reassembled inbound message fragments + * @stats: collects statistics regarding link activity + */ +struct tipc_link { + u32 addr; + char name[TIPC_MAX_LINK_NAME]; + struct tipc_media_addr media_addr; + struct timer_list timer; + struct tipc_node *owner; + struct kref ref; + + /* Management and link supervision data */ + unsigned int flags; + u32 checkpoint; + u32 peer_session; + u32 peer_bearer_id; + u32 bearer_id; + u32 tolerance; + unsigned long cont_intv; + u32 abort_limit; + int state; + u32 fsm_msg_cnt; + struct { + unchar hdr[INT_H_SIZE]; + unchar body[TIPC_MAX_IF_NAME]; + } proto_msg; + struct tipc_msg *pmsg; + u32 priority; + char net_plane; + u16 synch_point; + + /* Failover */ + u16 failover_pkts; + u16 failover_checkpt; + struct sk_buff *failover_skb; + + /* Max packet negotiation */ + u16 mtu; + u16 advertised_mtu; + + /* Sending */ + struct sk_buff_head transmq; + struct sk_buff_head backlogq; + struct { + u16 len; + u16 limit; + } backlog[5]; + u32 next_out_no; + u32 window; + u32 last_retransmitted; + u32 stale_count; + + /* Reception */ + u32 next_in_no; + u32 rcv_unacked; + struct sk_buff_head deferdq; + struct sk_buff_head inputq; + struct sk_buff_head namedq; + + /* Congestion handling */ + struct sk_buff_head wakeupq; + + /* Fragmentation/reassembly */ + struct sk_buff *reasm_buf; + + /* Statistics */ + struct tipc_stats stats; +}; + +struct tipc_port; + +struct tipc_link *tipc_link_create(struct tipc_node *n_ptr, + struct tipc_bearer *b_ptr, + const struct tipc_media_addr *media_addr); +void tipc_link_delete(struct tipc_link *link); +void tipc_link_delete_list(struct net *net, unsigned int bearer_id, + bool shutting_down); +void tipc_link_failover_send_queue(struct tipc_link *l_ptr); +void tipc_link_dup_queue_xmit(struct tipc_link *l_ptr, struct tipc_link *dest); +void tipc_link_reset_fragments(struct tipc_link *l_ptr); +int tipc_link_is_up(struct tipc_link *l_ptr); +int tipc_link_is_active(struct tipc_link *l_ptr); +void tipc_link_purge_queues(struct tipc_link *l_ptr); +void tipc_link_reset_all(struct tipc_node *node); +void tipc_link_reset(struct tipc_link *l_ptr); +void tipc_link_reset_list(struct net *net, unsigned int bearer_id); +int tipc_link_xmit_skb(struct net *net, struct sk_buff *skb, u32 dest, + u32 selector); +int tipc_link_xmit(struct net *net, struct sk_buff_head *list, u32 dest, + u32 selector); +int __tipc_link_xmit(struct net *net, struct tipc_link *link, + struct sk_buff_head *list); +void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int prob, + u32 gap, u32 tolerance, u32 priority); +void tipc_link_push_packets(struct tipc_link *l_ptr); +u32 tipc_link_defer_pkt(struct sk_buff_head *list, struct sk_buff *buf); +void tipc_link_set_queue_limits(struct tipc_link *l_ptr, u32 window); +void tipc_link_retransmit(struct tipc_link *l_ptr, + struct sk_buff *start, u32 retransmits); +struct sk_buff *tipc_skb_queue_next(const struct sk_buff_head *list, + const struct sk_buff *skb); + +int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb); +int tipc_nl_link_get(struct sk_buff *skb, struct genl_info *info); +int tipc_nl_link_set(struct sk_buff *skb, struct genl_info *info); +int tipc_nl_link_reset_stats(struct sk_buff *skb, struct genl_info *info); +int tipc_nl_parse_link_prop(struct nlattr *prop, struct nlattr *props[]); +void link_prepare_wakeup(struct tipc_link *l); + +/* + * Link sequence number manipulation routines (uses modulo 2**16 arithmetic) + */ +static inline u32 buf_seqno(struct sk_buff *buf) +{ + return msg_seqno(buf_msg(buf)); +} + +static inline u32 mod(u32 x) +{ + return x & 0xffffu; +} + +static inline int less_eq(u32 left, u32 right) +{ + return mod(right - left) < 32768u; +} + +static inline int more(u32 left, u32 right) +{ + return !less_eq(left, right); +} + +static inline int less(u32 left, u32 right) +{ + return less_eq(left, right) && (mod(right) != mod(left)); +} + +static inline u32 lesser(u32 left, u32 right) +{ + return less_eq(left, right) ? left : right; +} + +static inline u32 link_own_addr(struct tipc_link *l) +{ + return msg_prevnode(l->pmsg); +} + +/* + * Link status checking routines + */ +static inline int link_working_working(struct tipc_link *l_ptr) +{ + return l_ptr->state == WORKING_WORKING; +} + +static inline int link_working_unknown(struct tipc_link *l_ptr) +{ + return l_ptr->state == WORKING_UNKNOWN; +} + +static inline int link_reset_unknown(struct tipc_link *l_ptr) +{ + return l_ptr->state == RESET_UNKNOWN; +} + +static inline int link_reset_reset(struct tipc_link *l_ptr) +{ + return l_ptr->state == RESET_RESET; +} + +#endif diff --git a/kernel/net/tipc/msg.c b/kernel/net/tipc/msg.c new file mode 100644 index 000000000..c3e96e815 --- /dev/null +++ b/kernel/net/tipc/msg.c @@ -0,0 +1,574 @@ +/* + * net/tipc/msg.c: TIPC message header routines + * + * Copyright (c) 2000-2006, 2014-2015, Ericsson AB + * Copyright (c) 2005, 2010-2011, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <net/sock.h> +#include "core.h" +#include "msg.h" +#include "addr.h" +#include "name_table.h" + +#define MAX_FORWARD_SIZE 1024 + +static unsigned int align(unsigned int i) +{ + return (i + 3) & ~3u; +} + +/** + * tipc_buf_acquire - creates a TIPC message buffer + * @size: message size (including TIPC header) + * + * Returns a new buffer with data pointers set to the specified size. + * + * NOTE: Headroom is reserved to allow prepending of a data link header. + * There may also be unrequested tailroom present at the buffer's end. + */ +struct sk_buff *tipc_buf_acquire(u32 size) +{ + struct sk_buff *skb; + unsigned int buf_size = (BUF_HEADROOM + size + 3) & ~3u; + + skb = alloc_skb_fclone(buf_size, GFP_ATOMIC); + if (skb) { + skb_reserve(skb, BUF_HEADROOM); + skb_put(skb, size); + skb->next = NULL; + } + return skb; +} + +void tipc_msg_init(u32 own_node, struct tipc_msg *m, u32 user, u32 type, + u32 hsize, u32 dnode) +{ + memset(m, 0, hsize); + msg_set_version(m); + msg_set_user(m, user); + msg_set_hdr_sz(m, hsize); + msg_set_size(m, hsize); + msg_set_prevnode(m, own_node); + msg_set_type(m, type); + if (hsize > SHORT_H_SIZE) { + msg_set_orignode(m, own_node); + msg_set_destnode(m, dnode); + } +} + +struct sk_buff *tipc_msg_create(uint user, uint type, + uint hdr_sz, uint data_sz, u32 dnode, + u32 onode, u32 dport, u32 oport, int errcode) +{ + struct tipc_msg *msg; + struct sk_buff *buf; + + buf = tipc_buf_acquire(hdr_sz + data_sz); + if (unlikely(!buf)) + return NULL; + + msg = buf_msg(buf); + tipc_msg_init(onode, msg, user, type, hdr_sz, dnode); + msg_set_size(msg, hdr_sz + data_sz); + msg_set_origport(msg, oport); + msg_set_destport(msg, dport); + msg_set_errcode(msg, errcode); + if (hdr_sz > SHORT_H_SIZE) { + msg_set_orignode(msg, onode); + msg_set_destnode(msg, dnode); + } + return buf; +} + +/* tipc_buf_append(): Append a buffer to the fragment list of another buffer + * @*headbuf: in: NULL for first frag, otherwise value returned from prev call + * out: set when successful non-complete reassembly, otherwise NULL + * @*buf: in: the buffer to append. Always defined + * out: head buf after successful complete reassembly, otherwise NULL + * Returns 1 when reassembly complete, otherwise 0 + */ +int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf) +{ + struct sk_buff *head = *headbuf; + struct sk_buff *frag = *buf; + struct sk_buff *tail; + struct tipc_msg *msg; + u32 fragid; + int delta; + bool headstolen; + + if (!frag) + goto err; + + msg = buf_msg(frag); + fragid = msg_type(msg); + frag->next = NULL; + skb_pull(frag, msg_hdr_sz(msg)); + + if (fragid == FIRST_FRAGMENT) { + if (unlikely(head)) + goto err; + if (unlikely(skb_unclone(frag, GFP_ATOMIC))) + goto err; + head = *headbuf = frag; + skb_frag_list_init(head); + TIPC_SKB_CB(head)->tail = NULL; + *buf = NULL; + return 0; + } + + if (!head) + goto err; + + if (skb_try_coalesce(head, frag, &headstolen, &delta)) { + kfree_skb_partial(frag, headstolen); + } else { + tail = TIPC_SKB_CB(head)->tail; + if (!skb_has_frag_list(head)) + skb_shinfo(head)->frag_list = frag; + else + tail->next = frag; + head->truesize += frag->truesize; + head->data_len += frag->len; + head->len += frag->len; + TIPC_SKB_CB(head)->tail = frag; + } + + if (fragid == LAST_FRAGMENT) { + TIPC_SKB_CB(head)->validated = false; + if (unlikely(!tipc_msg_validate(head))) + goto err; + *buf = head; + TIPC_SKB_CB(head)->tail = NULL; + *headbuf = NULL; + return 1; + } + *buf = NULL; + return 0; +err: + pr_warn_ratelimited("Unable to build fragment list\n"); + kfree_skb(*buf); + kfree_skb(*headbuf); + *buf = *headbuf = NULL; + return 0; +} + +/* tipc_msg_validate - validate basic format of received message + * + * This routine ensures a TIPC message has an acceptable header, and at least + * as much data as the header indicates it should. The routine also ensures + * that the entire message header is stored in the main fragment of the message + * buffer, to simplify future access to message header fields. + * + * Note: Having extra info present in the message header or data areas is OK. + * TIPC will ignore the excess, under the assumption that it is optional info + * introduced by a later release of the protocol. + */ +bool tipc_msg_validate(struct sk_buff *skb) +{ + struct tipc_msg *msg; + int msz, hsz; + + if (unlikely(TIPC_SKB_CB(skb)->validated)) + return true; + if (unlikely(!pskb_may_pull(skb, MIN_H_SIZE))) + return false; + + hsz = msg_hdr_sz(buf_msg(skb)); + if (unlikely(hsz < MIN_H_SIZE) || (hsz > MAX_H_SIZE)) + return false; + if (unlikely(!pskb_may_pull(skb, hsz))) + return false; + + msg = buf_msg(skb); + if (unlikely(msg_version(msg) != TIPC_VERSION)) + return false; + + msz = msg_size(msg); + if (unlikely(msz < hsz)) + return false; + if (unlikely((msz - hsz) > TIPC_MAX_USER_MSG_SIZE)) + return false; + if (unlikely(skb->len < msz)) + return false; + + TIPC_SKB_CB(skb)->validated = true; + return true; +} + +/** + * tipc_msg_build - create buffer chain containing specified header and data + * @mhdr: Message header, to be prepended to data + * @m: User message + * @dsz: Total length of user data + * @pktmax: Max packet size that can be used + * @list: Buffer or chain of buffers to be returned to caller + * + * Returns message data size or errno: -ENOMEM, -EFAULT + */ +int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, + int offset, int dsz, int pktmax, struct sk_buff_head *list) +{ + int mhsz = msg_hdr_sz(mhdr); + int msz = mhsz + dsz; + int pktno = 1; + int pktsz; + int pktrem = pktmax; + int drem = dsz; + struct tipc_msg pkthdr; + struct sk_buff *skb; + char *pktpos; + int rc; + + msg_set_size(mhdr, msz); + + /* No fragmentation needed? */ + if (likely(msz <= pktmax)) { + skb = tipc_buf_acquire(msz); + if (unlikely(!skb)) + return -ENOMEM; + skb_orphan(skb); + __skb_queue_tail(list, skb); + skb_copy_to_linear_data(skb, mhdr, mhsz); + pktpos = skb->data + mhsz; + if (copy_from_iter(pktpos, dsz, &m->msg_iter) == dsz) + return dsz; + rc = -EFAULT; + goto error; + } + + /* Prepare reusable fragment header */ + tipc_msg_init(msg_prevnode(mhdr), &pkthdr, MSG_FRAGMENTER, + FIRST_FRAGMENT, INT_H_SIZE, msg_destnode(mhdr)); + msg_set_size(&pkthdr, pktmax); + msg_set_fragm_no(&pkthdr, pktno); + msg_set_importance(&pkthdr, msg_importance(mhdr)); + + /* Prepare first fragment */ + skb = tipc_buf_acquire(pktmax); + if (!skb) + return -ENOMEM; + skb_orphan(skb); + __skb_queue_tail(list, skb); + pktpos = skb->data; + skb_copy_to_linear_data(skb, &pkthdr, INT_H_SIZE); + pktpos += INT_H_SIZE; + pktrem -= INT_H_SIZE; + skb_copy_to_linear_data_offset(skb, INT_H_SIZE, mhdr, mhsz); + pktpos += mhsz; + pktrem -= mhsz; + + do { + if (drem < pktrem) + pktrem = drem; + + if (copy_from_iter(pktpos, pktrem, &m->msg_iter) != pktrem) { + rc = -EFAULT; + goto error; + } + drem -= pktrem; + + if (!drem) + break; + + /* Prepare new fragment: */ + if (drem < (pktmax - INT_H_SIZE)) + pktsz = drem + INT_H_SIZE; + else + pktsz = pktmax; + skb = tipc_buf_acquire(pktsz); + if (!skb) { + rc = -ENOMEM; + goto error; + } + skb_orphan(skb); + __skb_queue_tail(list, skb); + msg_set_type(&pkthdr, FRAGMENT); + msg_set_size(&pkthdr, pktsz); + msg_set_fragm_no(&pkthdr, ++pktno); + skb_copy_to_linear_data(skb, &pkthdr, INT_H_SIZE); + pktpos = skb->data + INT_H_SIZE; + pktrem = pktsz - INT_H_SIZE; + + } while (1); + msg_set_type(buf_msg(skb), LAST_FRAGMENT); + return dsz; +error: + __skb_queue_purge(list); + __skb_queue_head_init(list); + return rc; +} + +/** + * tipc_msg_bundle(): Append contents of a buffer to tail of an existing one + * @bskb: the buffer to append to ("bundle") + * @skb: buffer to be appended + * @mtu: max allowable size for the bundle buffer + * Consumes buffer if successful + * Returns true if bundling could be performed, otherwise false + */ +bool tipc_msg_bundle(struct sk_buff *bskb, struct sk_buff *skb, u32 mtu) +{ + struct tipc_msg *bmsg; + struct tipc_msg *msg = buf_msg(skb); + unsigned int bsz; + unsigned int msz = msg_size(msg); + u32 start, pad; + u32 max = mtu - INT_H_SIZE; + + if (likely(msg_user(msg) == MSG_FRAGMENTER)) + return false; + if (!bskb) + return false; + bmsg = buf_msg(bskb); + bsz = msg_size(bmsg); + start = align(bsz); + pad = start - bsz; + + if (unlikely(msg_user(msg) == TUNNEL_PROTOCOL)) + return false; + if (unlikely(msg_user(msg) == BCAST_PROTOCOL)) + return false; + if (likely(msg_user(bmsg) != MSG_BUNDLER)) + return false; + if (unlikely(skb_tailroom(bskb) < (pad + msz))) + return false; + if (unlikely(max < (start + msz))) + return false; + + skb_put(bskb, pad + msz); + skb_copy_to_linear_data_offset(bskb, start, skb->data, msz); + msg_set_size(bmsg, start + msz); + msg_set_msgcnt(bmsg, msg_msgcnt(bmsg) + 1); + kfree_skb(skb); + return true; +} + +/** + * tipc_msg_extract(): extract bundled inner packet from buffer + * @skb: buffer to be extracted from. + * @iskb: extracted inner buffer, to be returned + * @pos: position in outer message of msg to be extracted. + * Returns position of next msg + * Consumes outer buffer when last packet extracted + * Returns true when when there is an extracted buffer, otherwise false + */ +bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos) +{ + struct tipc_msg *msg; + int imsz, offset; + + *iskb = NULL; + if (unlikely(skb_linearize(skb))) + goto none; + + msg = buf_msg(skb); + offset = msg_hdr_sz(msg) + *pos; + if (unlikely(offset > (msg_size(msg) - MIN_H_SIZE))) + goto none; + + *iskb = skb_clone(skb, GFP_ATOMIC); + if (unlikely(!*iskb)) + goto none; + skb_pull(*iskb, offset); + imsz = msg_size(buf_msg(*iskb)); + skb_trim(*iskb, imsz); + if (unlikely(!tipc_msg_validate(*iskb))) + goto none; + *pos += align(imsz); + return true; +none: + kfree_skb(skb); + kfree_skb(*iskb); + *iskb = NULL; + return false; +} + +/** + * tipc_msg_make_bundle(): Create bundle buf and append message to its tail + * @list: the buffer chain + * @skb: buffer to be appended and replaced + * @mtu: max allowable size for the bundle buffer, inclusive header + * @dnode: destination node for message. (Not always present in header) + * Replaces buffer if successful + * Returns true if success, otherwise false + */ +bool tipc_msg_make_bundle(struct sk_buff **skb, u32 mtu, u32 dnode) +{ + struct sk_buff *bskb; + struct tipc_msg *bmsg; + struct tipc_msg *msg = buf_msg(*skb); + u32 msz = msg_size(msg); + u32 max = mtu - INT_H_SIZE; + + if (msg_user(msg) == MSG_FRAGMENTER) + return false; + if (msg_user(msg) == TUNNEL_PROTOCOL) + return false; + if (msg_user(msg) == BCAST_PROTOCOL) + return false; + if (msz > (max / 2)) + return false; + + bskb = tipc_buf_acquire(max); + if (!bskb) + return false; + + skb_trim(bskb, INT_H_SIZE); + bmsg = buf_msg(bskb); + tipc_msg_init(msg_prevnode(msg), bmsg, MSG_BUNDLER, 0, + INT_H_SIZE, dnode); + msg_set_seqno(bmsg, msg_seqno(msg)); + msg_set_ack(bmsg, msg_ack(msg)); + msg_set_bcast_ack(bmsg, msg_bcast_ack(msg)); + tipc_msg_bundle(bskb, *skb, mtu); + *skb = bskb; + return true; +} + +/** + * tipc_msg_reverse(): swap source and destination addresses and add error code + * @buf: buffer containing message to be reversed + * @dnode: return value: node where to send message after reversal + * @err: error code to be set in message + * Consumes buffer if failure + * Returns true if success, otherwise false + */ +bool tipc_msg_reverse(u32 own_addr, struct sk_buff *buf, u32 *dnode, + int err) +{ + struct tipc_msg *msg = buf_msg(buf); + struct tipc_msg ohdr; + uint rdsz = min_t(uint, msg_data_sz(msg), MAX_FORWARD_SIZE); + + if (skb_linearize(buf)) + goto exit; + msg = buf_msg(buf); + if (msg_dest_droppable(msg)) + goto exit; + if (msg_errcode(msg)) + goto exit; + memcpy(&ohdr, msg, msg_hdr_sz(msg)); + msg_set_errcode(msg, err); + msg_set_origport(msg, msg_destport(&ohdr)); + msg_set_destport(msg, msg_origport(&ohdr)); + msg_set_prevnode(msg, own_addr); + if (!msg_short(msg)) { + msg_set_orignode(msg, msg_destnode(&ohdr)); + msg_set_destnode(msg, msg_orignode(&ohdr)); + } + msg_set_size(msg, msg_hdr_sz(msg) + rdsz); + skb_trim(buf, msg_size(msg)); + skb_orphan(buf); + *dnode = msg_orignode(&ohdr); + return true; +exit: + kfree_skb(buf); + *dnode = 0; + return false; +} + +/** + * tipc_msg_lookup_dest(): try to find new destination for named message + * @skb: the buffer containing the message. + * @dnode: return value: next-hop node, if destination found + * @err: return value: error code to use, if message to be rejected + * Does not consume buffer + * Returns true if a destination is found, false otherwise + */ +bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, + u32 *dnode, int *err) +{ + struct tipc_msg *msg = buf_msg(skb); + u32 dport; + u32 own_addr = tipc_own_addr(net); + + if (!msg_isdata(msg)) + return false; + if (!msg_named(msg)) + return false; + if (msg_errcode(msg)) + return false; + *err = -TIPC_ERR_NO_NAME; + if (skb_linearize(skb)) + return false; + if (msg_reroute_cnt(msg)) + return false; + *dnode = addr_domain(net, msg_lookup_scope(msg)); + dport = tipc_nametbl_translate(net, msg_nametype(msg), + msg_nameinst(msg), dnode); + if (!dport) + return false; + msg_incr_reroute_cnt(msg); + if (*dnode != own_addr) + msg_set_prevnode(msg, own_addr); + msg_set_destnode(msg, *dnode); + msg_set_destport(msg, dport); + *err = TIPC_OK; + return true; +} + +/* tipc_msg_reassemble() - clone a buffer chain of fragments and + * reassemble the clones into one message + */ +struct sk_buff *tipc_msg_reassemble(struct sk_buff_head *list) +{ + struct sk_buff *skb; + struct sk_buff *frag = NULL; + struct sk_buff *head = NULL; + int hdr_sz; + + /* Copy header if single buffer */ + if (skb_queue_len(list) == 1) { + skb = skb_peek(list); + hdr_sz = skb_headroom(skb) + msg_hdr_sz(buf_msg(skb)); + return __pskb_copy(skb, hdr_sz, GFP_ATOMIC); + } + + /* Clone all fragments and reassemble */ + skb_queue_walk(list, skb) { + frag = skb_clone(skb, GFP_ATOMIC); + if (!frag) + goto error; + frag->next = NULL; + if (tipc_buf_append(&head, &frag)) + break; + if (!head) + goto error; + } + return frag; +error: + pr_warn("Failed do clone local mcast rcv buffer\n"); + kfree_skb(head); + return NULL; +} diff --git a/kernel/net/tipc/msg.h b/kernel/net/tipc/msg.h new file mode 100644 index 000000000..e1d3595e2 --- /dev/null +++ b/kernel/net/tipc/msg.h @@ -0,0 +1,873 @@ +/* + * net/tipc/msg.h: Include file for TIPC message header routines + * + * Copyright (c) 2000-2007, 2014-2015 Ericsson AB + * Copyright (c) 2005-2008, 2010-2011, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _TIPC_MSG_H +#define _TIPC_MSG_H + +#include <linux/tipc.h> + +/* + * Constants and routines used to read and write TIPC payload message headers + * + * Note: Some items are also used with TIPC internal message headers + */ +#define TIPC_VERSION 2 +struct plist; + +/* + * Payload message users are defined in TIPC's public API: + * - TIPC_LOW_IMPORTANCE + * - TIPC_MEDIUM_IMPORTANCE + * - TIPC_HIGH_IMPORTANCE + * - TIPC_CRITICAL_IMPORTANCE + */ +#define TIPC_SYSTEM_IMPORTANCE 4 + + +/* + * Payload message types + */ +#define TIPC_CONN_MSG 0 +#define TIPC_MCAST_MSG 1 +#define TIPC_NAMED_MSG 2 +#define TIPC_DIRECT_MSG 3 + +/* + * Internal message users + */ +#define BCAST_PROTOCOL 5 +#define MSG_BUNDLER 6 +#define LINK_PROTOCOL 7 +#define CONN_MANAGER 8 +#define TUNNEL_PROTOCOL 10 +#define NAME_DISTRIBUTOR 11 +#define MSG_FRAGMENTER 12 +#define LINK_CONFIG 13 +#define SOCK_WAKEUP 14 /* pseudo user */ + +/* + * Message header sizes + */ +#define SHORT_H_SIZE 24 /* In-cluster basic payload message */ +#define BASIC_H_SIZE 32 /* Basic payload message */ +#define NAMED_H_SIZE 40 /* Named payload message */ +#define MCAST_H_SIZE 44 /* Multicast payload message */ +#define INT_H_SIZE 40 /* Internal messages */ +#define MIN_H_SIZE 24 /* Smallest legal TIPC header size */ +#define MAX_H_SIZE 60 /* Largest possible TIPC header size */ + +#define MAX_MSG_SIZE (MAX_H_SIZE + TIPC_MAX_USER_MSG_SIZE) + +#define TIPC_MEDIA_INFO_OFFSET 5 + +/** + * TIPC message buffer code + * + * TIPC message buffer headroom reserves space for the worst-case + * link-level device header (in case the message is sent off-node). + * + * Note: Headroom should be a multiple of 4 to ensure the TIPC header fields + * are word aligned for quicker access + */ +#define BUF_HEADROOM (LL_MAX_HEADER + 48) + +struct tipc_skb_cb { + void *handle; + struct sk_buff *tail; + bool validated; + bool wakeup_pending; + bool bundling; + u16 chain_sz; + u16 chain_imp; +}; + +#define TIPC_SKB_CB(__skb) ((struct tipc_skb_cb *)&((__skb)->cb[0])) + +struct tipc_msg { + __be32 hdr[15]; +}; + +static inline struct tipc_msg *buf_msg(struct sk_buff *skb) +{ + return (struct tipc_msg *)skb->data; +} + +static inline u32 msg_word(struct tipc_msg *m, u32 pos) +{ + return ntohl(m->hdr[pos]); +} + +static inline void msg_set_word(struct tipc_msg *m, u32 w, u32 val) +{ + m->hdr[w] = htonl(val); +} + +static inline u32 msg_bits(struct tipc_msg *m, u32 w, u32 pos, u32 mask) +{ + return (msg_word(m, w) >> pos) & mask; +} + +static inline void msg_set_bits(struct tipc_msg *m, u32 w, + u32 pos, u32 mask, u32 val) +{ + val = (val & mask) << pos; + mask = mask << pos; + m->hdr[w] &= ~htonl(mask); + m->hdr[w] |= htonl(val); +} + +static inline void msg_swap_words(struct tipc_msg *msg, u32 a, u32 b) +{ + u32 temp = msg->hdr[a]; + + msg->hdr[a] = msg->hdr[b]; + msg->hdr[b] = temp; +} + +/* + * Word 0 + */ +static inline u32 msg_version(struct tipc_msg *m) +{ + return msg_bits(m, 0, 29, 7); +} + +static inline void msg_set_version(struct tipc_msg *m) +{ + msg_set_bits(m, 0, 29, 7, TIPC_VERSION); +} + +static inline u32 msg_user(struct tipc_msg *m) +{ + return msg_bits(m, 0, 25, 0xf); +} + +static inline u32 msg_isdata(struct tipc_msg *m) +{ + return msg_user(m) <= TIPC_CRITICAL_IMPORTANCE; +} + +static inline void msg_set_user(struct tipc_msg *m, u32 n) +{ + msg_set_bits(m, 0, 25, 0xf, n); +} + +static inline u32 msg_hdr_sz(struct tipc_msg *m) +{ + return msg_bits(m, 0, 21, 0xf) << 2; +} + +static inline void msg_set_hdr_sz(struct tipc_msg *m, u32 n) +{ + msg_set_bits(m, 0, 21, 0xf, n>>2); +} + +static inline u32 msg_size(struct tipc_msg *m) +{ + return msg_bits(m, 0, 0, 0x1ffff); +} + +static inline u32 msg_data_sz(struct tipc_msg *m) +{ + return msg_size(m) - msg_hdr_sz(m); +} + +static inline int msg_non_seq(struct tipc_msg *m) +{ + return msg_bits(m, 0, 20, 1); +} + +static inline void msg_set_non_seq(struct tipc_msg *m, u32 n) +{ + msg_set_bits(m, 0, 20, 1, n); +} + +static inline int msg_dest_droppable(struct tipc_msg *m) +{ + return msg_bits(m, 0, 19, 1); +} + +static inline void msg_set_dest_droppable(struct tipc_msg *m, u32 d) +{ + msg_set_bits(m, 0, 19, 1, d); +} + +static inline int msg_src_droppable(struct tipc_msg *m) +{ + return msg_bits(m, 0, 18, 1); +} + +static inline void msg_set_src_droppable(struct tipc_msg *m, u32 d) +{ + msg_set_bits(m, 0, 18, 1, d); +} + +static inline void msg_set_size(struct tipc_msg *m, u32 sz) +{ + m->hdr[0] = htonl((msg_word(m, 0) & ~0x1ffff) | sz); +} + +static inline unchar *msg_data(struct tipc_msg *m) +{ + return ((unchar *)m) + msg_hdr_sz(m); +} + +static inline struct tipc_msg *msg_get_wrapped(struct tipc_msg *m) +{ + return (struct tipc_msg *)msg_data(m); +} + +/* + * Word 1 + */ +static inline u32 msg_type(struct tipc_msg *m) +{ + return msg_bits(m, 1, 29, 0x7); +} + +static inline void msg_set_type(struct tipc_msg *m, u32 n) +{ + msg_set_bits(m, 1, 29, 0x7, n); +} + +static inline u32 msg_named(struct tipc_msg *m) +{ + return msg_type(m) == TIPC_NAMED_MSG; +} + +static inline u32 msg_mcast(struct tipc_msg *m) +{ + return msg_type(m) == TIPC_MCAST_MSG; +} + +static inline u32 msg_connected(struct tipc_msg *m) +{ + return msg_type(m) == TIPC_CONN_MSG; +} + +static inline u32 msg_errcode(struct tipc_msg *m) +{ + return msg_bits(m, 1, 25, 0xf); +} + +static inline void msg_set_errcode(struct tipc_msg *m, u32 err) +{ + msg_set_bits(m, 1, 25, 0xf, err); +} + +static inline u32 msg_reroute_cnt(struct tipc_msg *m) +{ + return msg_bits(m, 1, 21, 0xf); +} + +static inline void msg_incr_reroute_cnt(struct tipc_msg *m) +{ + msg_set_bits(m, 1, 21, 0xf, msg_reroute_cnt(m) + 1); +} + +static inline void msg_reset_reroute_cnt(struct tipc_msg *m) +{ + msg_set_bits(m, 1, 21, 0xf, 0); +} + +static inline u32 msg_lookup_scope(struct tipc_msg *m) +{ + return msg_bits(m, 1, 19, 0x3); +} + +static inline void msg_set_lookup_scope(struct tipc_msg *m, u32 n) +{ + msg_set_bits(m, 1, 19, 0x3, n); +} + +static inline u32 msg_bcast_ack(struct tipc_msg *m) +{ + return msg_bits(m, 1, 0, 0xffff); +} + +static inline void msg_set_bcast_ack(struct tipc_msg *m, u32 n) +{ + msg_set_bits(m, 1, 0, 0xffff, n); +} + + +/* + * Word 2 + */ +static inline u32 msg_ack(struct tipc_msg *m) +{ + return msg_bits(m, 2, 16, 0xffff); +} + +static inline void msg_set_ack(struct tipc_msg *m, u32 n) +{ + msg_set_bits(m, 2, 16, 0xffff, n); +} + +static inline u32 msg_seqno(struct tipc_msg *m) +{ + return msg_bits(m, 2, 0, 0xffff); +} + +static inline void msg_set_seqno(struct tipc_msg *m, u32 n) +{ + msg_set_bits(m, 2, 0, 0xffff, n); +} + +/* + * Words 3-10 + */ +static inline u32 msg_importance(struct tipc_msg *m) +{ + if (unlikely(msg_user(m) == MSG_FRAGMENTER)) + return msg_bits(m, 5, 13, 0x7); + if (likely(msg_isdata(m) && !msg_errcode(m))) + return msg_user(m); + return TIPC_SYSTEM_IMPORTANCE; +} + +static inline void msg_set_importance(struct tipc_msg *m, u32 i) +{ + if (unlikely(msg_user(m) == MSG_FRAGMENTER)) + msg_set_bits(m, 5, 13, 0x7, i); + else if (likely(i < TIPC_SYSTEM_IMPORTANCE)) + msg_set_user(m, i); + else + pr_warn("Trying to set illegal importance in message\n"); +} + +static inline u32 msg_prevnode(struct tipc_msg *m) +{ + return msg_word(m, 3); +} + +static inline void msg_set_prevnode(struct tipc_msg *m, u32 a) +{ + msg_set_word(m, 3, a); +} + +static inline u32 msg_origport(struct tipc_msg *m) +{ + if (msg_user(m) == MSG_FRAGMENTER) + m = msg_get_wrapped(m); + return msg_word(m, 4); +} + +static inline void msg_set_origport(struct tipc_msg *m, u32 p) +{ + msg_set_word(m, 4, p); +} + +static inline u32 msg_destport(struct tipc_msg *m) +{ + return msg_word(m, 5); +} + +static inline void msg_set_destport(struct tipc_msg *m, u32 p) +{ + msg_set_word(m, 5, p); +} + +static inline u32 msg_mc_netid(struct tipc_msg *m) +{ + return msg_word(m, 5); +} + +static inline void msg_set_mc_netid(struct tipc_msg *m, u32 p) +{ + msg_set_word(m, 5, p); +} + +static inline int msg_short(struct tipc_msg *m) +{ + return msg_hdr_sz(m) == SHORT_H_SIZE; +} + +static inline u32 msg_orignode(struct tipc_msg *m) +{ + if (likely(msg_short(m))) + return msg_prevnode(m); + return msg_word(m, 6); +} + +static inline void msg_set_orignode(struct tipc_msg *m, u32 a) +{ + msg_set_word(m, 6, a); +} + +static inline u32 msg_destnode(struct tipc_msg *m) +{ + return msg_word(m, 7); +} + +static inline void msg_set_destnode(struct tipc_msg *m, u32 a) +{ + msg_set_word(m, 7, a); +} + +static inline u32 msg_nametype(struct tipc_msg *m) +{ + return msg_word(m, 8); +} + +static inline void msg_set_nametype(struct tipc_msg *m, u32 n) +{ + msg_set_word(m, 8, n); +} + +static inline u32 msg_nameinst(struct tipc_msg *m) +{ + return msg_word(m, 9); +} + +static inline u32 msg_namelower(struct tipc_msg *m) +{ + return msg_nameinst(m); +} + +static inline void msg_set_namelower(struct tipc_msg *m, u32 n) +{ + msg_set_word(m, 9, n); +} + +static inline void msg_set_nameinst(struct tipc_msg *m, u32 n) +{ + msg_set_namelower(m, n); +} + +static inline u32 msg_nameupper(struct tipc_msg *m) +{ + return msg_word(m, 10); +} + +static inline void msg_set_nameupper(struct tipc_msg *m, u32 n) +{ + msg_set_word(m, 10, n); +} + +/* + * Constants and routines used to read and write TIPC internal message headers + */ + +/* + * Connection management protocol message types + */ +#define CONN_PROBE 0 +#define CONN_PROBE_REPLY 1 +#define CONN_ACK 2 + +/* + * Name distributor message types + */ +#define PUBLICATION 0 +#define WITHDRAWAL 1 + +/* + * Segmentation message types + */ +#define FIRST_FRAGMENT 0 +#define FRAGMENT 1 +#define LAST_FRAGMENT 2 + +/* + * Link management protocol message types + */ +#define STATE_MSG 0 +#define RESET_MSG 1 +#define ACTIVATE_MSG 2 + +/* + * Changeover tunnel message types + */ +#define SYNCH_MSG 0 +#define FAILOVER_MSG 1 + +/* + * Config protocol message types + */ +#define DSC_REQ_MSG 0 +#define DSC_RESP_MSG 1 + +/* + * Word 1 + */ +static inline u32 msg_seq_gap(struct tipc_msg *m) +{ + return msg_bits(m, 1, 16, 0x1fff); +} + +static inline void msg_set_seq_gap(struct tipc_msg *m, u32 n) +{ + msg_set_bits(m, 1, 16, 0x1fff, n); +} + +static inline u32 msg_node_sig(struct tipc_msg *m) +{ + return msg_bits(m, 1, 0, 0xffff); +} + +static inline void msg_set_node_sig(struct tipc_msg *m, u32 n) +{ + msg_set_bits(m, 1, 0, 0xffff, n); +} + +static inline u32 msg_node_capabilities(struct tipc_msg *m) +{ + return msg_bits(m, 1, 15, 0x1fff); +} + +static inline void msg_set_node_capabilities(struct tipc_msg *m, u32 n) +{ + msg_set_bits(m, 1, 15, 0x1fff, n); +} + +static inline bool msg_dup(struct tipc_msg *m) +{ + if (likely(msg_user(m) != TUNNEL_PROTOCOL)) + return false; + if (msg_type(m) != SYNCH_MSG) + return false; + return true; +} + +/* + * Word 2 + */ +static inline u32 msg_dest_domain(struct tipc_msg *m) +{ + return msg_word(m, 2); +} + +static inline void msg_set_dest_domain(struct tipc_msg *m, u32 n) +{ + msg_set_word(m, 2, n); +} + +static inline u32 msg_bcgap_after(struct tipc_msg *m) +{ + return msg_bits(m, 2, 16, 0xffff); +} + +static inline void msg_set_bcgap_after(struct tipc_msg *m, u32 n) +{ + msg_set_bits(m, 2, 16, 0xffff, n); +} + +static inline u32 msg_bcgap_to(struct tipc_msg *m) +{ + return msg_bits(m, 2, 0, 0xffff); +} + +static inline void msg_set_bcgap_to(struct tipc_msg *m, u32 n) +{ + msg_set_bits(m, 2, 0, 0xffff, n); +} + + +/* + * Word 4 + */ +static inline u32 msg_last_bcast(struct tipc_msg *m) +{ + return msg_bits(m, 4, 16, 0xffff); +} + +static inline void msg_set_last_bcast(struct tipc_msg *m, u32 n) +{ + msg_set_bits(m, 4, 16, 0xffff, n); +} + +static inline void msg_set_fragm_no(struct tipc_msg *m, u32 n) +{ + msg_set_bits(m, 4, 16, 0xffff, n); +} + + +static inline u32 msg_next_sent(struct tipc_msg *m) +{ + return msg_bits(m, 4, 0, 0xffff); +} + +static inline void msg_set_next_sent(struct tipc_msg *m, u32 n) +{ + msg_set_bits(m, 4, 0, 0xffff, n); +} + +static inline void msg_set_long_msgno(struct tipc_msg *m, u32 n) +{ + msg_set_bits(m, 4, 0, 0xffff, n); +} + +static inline u32 msg_bc_netid(struct tipc_msg *m) +{ + return msg_word(m, 4); +} + +static inline void msg_set_bc_netid(struct tipc_msg *m, u32 id) +{ + msg_set_word(m, 4, id); +} + +static inline u32 msg_link_selector(struct tipc_msg *m) +{ + return msg_bits(m, 4, 0, 1); +} + +static inline void msg_set_link_selector(struct tipc_msg *m, u32 n) +{ + msg_set_bits(m, 4, 0, 1, n); +} + +/* + * Word 5 + */ +static inline u32 msg_session(struct tipc_msg *m) +{ + return msg_bits(m, 5, 16, 0xffff); +} + +static inline void msg_set_session(struct tipc_msg *m, u32 n) +{ + msg_set_bits(m, 5, 16, 0xffff, n); +} + +static inline u32 msg_probe(struct tipc_msg *m) +{ + return msg_bits(m, 5, 0, 1); +} + +static inline void msg_set_probe(struct tipc_msg *m, u32 val) +{ + msg_set_bits(m, 5, 0, 1, val); +} + +static inline char msg_net_plane(struct tipc_msg *m) +{ + return msg_bits(m, 5, 1, 7) + 'A'; +} + +static inline void msg_set_net_plane(struct tipc_msg *m, char n) +{ + msg_set_bits(m, 5, 1, 7, (n - 'A')); +} + +static inline u32 msg_linkprio(struct tipc_msg *m) +{ + return msg_bits(m, 5, 4, 0x1f); +} + +static inline void msg_set_linkprio(struct tipc_msg *m, u32 n) +{ + msg_set_bits(m, 5, 4, 0x1f, n); +} + +static inline u32 msg_bearer_id(struct tipc_msg *m) +{ + return msg_bits(m, 5, 9, 0x7); +} + +static inline void msg_set_bearer_id(struct tipc_msg *m, u32 n) +{ + msg_set_bits(m, 5, 9, 0x7, n); +} + +static inline u32 msg_redundant_link(struct tipc_msg *m) +{ + return msg_bits(m, 5, 12, 0x1); +} + +static inline void msg_set_redundant_link(struct tipc_msg *m, u32 r) +{ + msg_set_bits(m, 5, 12, 0x1, r); +} + +static inline char *msg_media_addr(struct tipc_msg *m) +{ + return (char *)&m->hdr[TIPC_MEDIA_INFO_OFFSET]; +} + +/* + * Word 9 + */ +static inline u32 msg_msgcnt(struct tipc_msg *m) +{ + return msg_bits(m, 9, 16, 0xffff); +} + +static inline void msg_set_msgcnt(struct tipc_msg *m, u32 n) +{ + msg_set_bits(m, 9, 16, 0xffff, n); +} + +static inline u32 msg_bcast_tag(struct tipc_msg *m) +{ + return msg_bits(m, 9, 16, 0xffff); +} + +static inline void msg_set_bcast_tag(struct tipc_msg *m, u32 n) +{ + msg_set_bits(m, 9, 16, 0xffff, n); +} + +static inline u32 msg_max_pkt(struct tipc_msg *m) +{ + return msg_bits(m, 9, 16, 0xffff) * 4; +} + +static inline void msg_set_max_pkt(struct tipc_msg *m, u32 n) +{ + msg_set_bits(m, 9, 16, 0xffff, (n / 4)); +} + +static inline u32 msg_link_tolerance(struct tipc_msg *m) +{ + return msg_bits(m, 9, 0, 0xffff); +} + +static inline void msg_set_link_tolerance(struct tipc_msg *m, u32 n) +{ + msg_set_bits(m, 9, 0, 0xffff, n); +} + +struct sk_buff *tipc_buf_acquire(u32 size); +bool tipc_msg_validate(struct sk_buff *skb); +bool tipc_msg_reverse(u32 own_addr, struct sk_buff *buf, u32 *dnode, + int err); +void tipc_msg_init(u32 own_addr, struct tipc_msg *m, u32 user, u32 type, + u32 hsize, u32 destnode); +struct sk_buff *tipc_msg_create(uint user, uint type, uint hdr_sz, + uint data_sz, u32 dnode, u32 onode, + u32 dport, u32 oport, int errcode); +int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf); +bool tipc_msg_bundle(struct sk_buff *bskb, struct sk_buff *skb, u32 mtu); + +bool tipc_msg_make_bundle(struct sk_buff **skb, u32 mtu, u32 dnode); +bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos); +int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, + int offset, int dsz, int mtu, struct sk_buff_head *list); +bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, u32 *dnode, + int *err); +struct sk_buff *tipc_msg_reassemble(struct sk_buff_head *list); + +/* tipc_skb_peek(): peek and reserve first buffer in list + * @list: list to be peeked in + * Returns pointer to first buffer in list, if any + */ +static inline struct sk_buff *tipc_skb_peek(struct sk_buff_head *list, + spinlock_t *lock) +{ + struct sk_buff *skb; + + spin_lock_bh(lock); + skb = skb_peek(list); + if (skb) + skb_get(skb); + spin_unlock_bh(lock); + return skb; +} + +/* tipc_skb_peek_port(): find a destination port, ignoring all destinations + * up to and including 'filter'. + * Note: ignoring previously tried destinations minimizes the risk of + * contention on the socket lock + * @list: list to be peeked in + * @filter: last destination to be ignored from search + * Returns a destination port number, of applicable. + */ +static inline u32 tipc_skb_peek_port(struct sk_buff_head *list, u32 filter) +{ + struct sk_buff *skb; + u32 dport = 0; + bool ignore = true; + + spin_lock_bh(&list->lock); + skb_queue_walk(list, skb) { + dport = msg_destport(buf_msg(skb)); + if (!filter || skb_queue_is_last(list, skb)) + break; + if (dport == filter) + ignore = false; + else if (!ignore) + break; + } + spin_unlock_bh(&list->lock); + return dport; +} + +/* tipc_skb_dequeue(): unlink first buffer with dest 'dport' from list + * @list: list to be unlinked from + * @dport: selection criteria for buffer to unlink + */ +static inline struct sk_buff *tipc_skb_dequeue(struct sk_buff_head *list, + u32 dport) +{ + struct sk_buff *_skb, *tmp, *skb = NULL; + + spin_lock_bh(&list->lock); + skb_queue_walk_safe(list, _skb, tmp) { + if (msg_destport(buf_msg(_skb)) == dport) { + __skb_unlink(_skb, list); + skb = _skb; + break; + } + } + spin_unlock_bh(&list->lock); + return skb; +} + +/* tipc_skb_queue_tail(): add buffer to tail of list; + * @list: list to be appended to + * @skb: buffer to append. Always appended + * @dport: the destination port of the buffer + * returns true if dport differs from previous destination + */ +static inline bool tipc_skb_queue_tail(struct sk_buff_head *list, + struct sk_buff *skb, u32 dport) +{ + struct sk_buff *_skb = NULL; + bool rv = false; + + spin_lock_bh(&list->lock); + _skb = skb_peek_tail(list); + if (!_skb || (msg_destport(buf_msg(_skb)) != dport) || + (skb_queue_len(list) > 32)) + rv = true; + __skb_queue_tail(list, skb); + spin_unlock_bh(&list->lock); + return rv; +} + +#endif diff --git a/kernel/net/tipc/name_distr.c b/kernel/net/tipc/name_distr.c new file mode 100644 index 000000000..41e7b7e4d --- /dev/null +++ b/kernel/net/tipc/name_distr.c @@ -0,0 +1,437 @@ +/* + * net/tipc/name_distr.c: TIPC name distribution code + * + * Copyright (c) 2000-2006, 2014, Ericsson AB + * Copyright (c) 2005, 2010-2011, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "core.h" +#include "link.h" +#include "name_distr.h" + +int sysctl_tipc_named_timeout __read_mostly = 2000; + +/** + * struct tipc_dist_queue - queue holding deferred name table updates + */ +static struct list_head tipc_dist_queue = LIST_HEAD_INIT(tipc_dist_queue); + +struct distr_queue_item { + struct distr_item i; + u32 dtype; + u32 node; + unsigned long expires; + struct list_head next; +}; + +/** + * publ_to_item - add publication info to a publication message + */ +static void publ_to_item(struct distr_item *i, struct publication *p) +{ + i->type = htonl(p->type); + i->lower = htonl(p->lower); + i->upper = htonl(p->upper); + i->ref = htonl(p->ref); + i->key = htonl(p->key); +} + +/** + * named_prepare_buf - allocate & initialize a publication message + */ +static struct sk_buff *named_prepare_buf(struct net *net, u32 type, u32 size, + u32 dest) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct sk_buff *buf = tipc_buf_acquire(INT_H_SIZE + size); + struct tipc_msg *msg; + + if (buf != NULL) { + msg = buf_msg(buf); + tipc_msg_init(tn->own_addr, msg, NAME_DISTRIBUTOR, type, + INT_H_SIZE, dest); + msg_set_size(msg, INT_H_SIZE + size); + } + return buf; +} + +void named_cluster_distribute(struct net *net, struct sk_buff *skb) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct sk_buff *oskb; + struct tipc_node *node; + u32 dnode; + + rcu_read_lock(); + list_for_each_entry_rcu(node, &tn->node_list, list) { + dnode = node->addr; + if (in_own_node(net, dnode)) + continue; + if (!tipc_node_active_links(node)) + continue; + oskb = pskb_copy(skb, GFP_ATOMIC); + if (!oskb) + break; + msg_set_destnode(buf_msg(oskb), dnode); + tipc_link_xmit_skb(net, oskb, dnode, dnode); + } + rcu_read_unlock(); + + kfree_skb(skb); +} + +/** + * tipc_named_publish - tell other nodes about a new publication by this node + */ +struct sk_buff *tipc_named_publish(struct net *net, struct publication *publ) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct sk_buff *buf; + struct distr_item *item; + + list_add_tail_rcu(&publ->local_list, + &tn->nametbl->publ_list[publ->scope]); + + if (publ->scope == TIPC_NODE_SCOPE) + return NULL; + + buf = named_prepare_buf(net, PUBLICATION, ITEM_SIZE, 0); + if (!buf) { + pr_warn("Publication distribution failure\n"); + return NULL; + } + + item = (struct distr_item *)msg_data(buf_msg(buf)); + publ_to_item(item, publ); + return buf; +} + +/** + * tipc_named_withdraw - tell other nodes about a withdrawn publication by this node + */ +struct sk_buff *tipc_named_withdraw(struct net *net, struct publication *publ) +{ + struct sk_buff *buf; + struct distr_item *item; + + list_del(&publ->local_list); + + if (publ->scope == TIPC_NODE_SCOPE) + return NULL; + + buf = named_prepare_buf(net, WITHDRAWAL, ITEM_SIZE, 0); + if (!buf) { + pr_warn("Withdrawal distribution failure\n"); + return NULL; + } + + item = (struct distr_item *)msg_data(buf_msg(buf)); + publ_to_item(item, publ); + return buf; +} + +/** + * named_distribute - prepare name info for bulk distribution to another node + * @list: list of messages (buffers) to be returned from this function + * @dnode: node to be updated + * @pls: linked list of publication items to be packed into buffer chain + */ +static void named_distribute(struct net *net, struct sk_buff_head *list, + u32 dnode, struct list_head *pls) +{ + struct publication *publ; + struct sk_buff *skb = NULL; + struct distr_item *item = NULL; + uint msg_dsz = (tipc_node_get_mtu(net, dnode, 0) / ITEM_SIZE) * + ITEM_SIZE; + uint msg_rem = msg_dsz; + + list_for_each_entry(publ, pls, local_list) { + /* Prepare next buffer: */ + if (!skb) { + skb = named_prepare_buf(net, PUBLICATION, msg_rem, + dnode); + if (!skb) { + pr_warn("Bulk publication failure\n"); + return; + } + item = (struct distr_item *)msg_data(buf_msg(skb)); + } + + /* Pack publication into message: */ + publ_to_item(item, publ); + item++; + msg_rem -= ITEM_SIZE; + + /* Append full buffer to list: */ + if (!msg_rem) { + __skb_queue_tail(list, skb); + skb = NULL; + msg_rem = msg_dsz; + } + } + if (skb) { + msg_set_size(buf_msg(skb), INT_H_SIZE + (msg_dsz - msg_rem)); + skb_trim(skb, INT_H_SIZE + (msg_dsz - msg_rem)); + __skb_queue_tail(list, skb); + } +} + +/** + * tipc_named_node_up - tell specified node about all publications by this node + */ +void tipc_named_node_up(struct net *net, u32 dnode) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct sk_buff_head head; + + __skb_queue_head_init(&head); + + rcu_read_lock(); + named_distribute(net, &head, dnode, + &tn->nametbl->publ_list[TIPC_CLUSTER_SCOPE]); + named_distribute(net, &head, dnode, + &tn->nametbl->publ_list[TIPC_ZONE_SCOPE]); + rcu_read_unlock(); + + tipc_link_xmit(net, &head, dnode, dnode); +} + +static void tipc_publ_subscribe(struct net *net, struct publication *publ, + u32 addr) +{ + struct tipc_node *node; + + if (in_own_node(net, addr)) + return; + + node = tipc_node_find(net, addr); + if (!node) { + pr_warn("Node subscription rejected, unknown node 0x%x\n", + addr); + return; + } + + tipc_node_lock(node); + list_add_tail(&publ->nodesub_list, &node->publ_list); + tipc_node_unlock(node); + tipc_node_put(node); +} + +static void tipc_publ_unsubscribe(struct net *net, struct publication *publ, + u32 addr) +{ + struct tipc_node *node; + + node = tipc_node_find(net, addr); + if (!node) + return; + + tipc_node_lock(node); + list_del_init(&publ->nodesub_list); + tipc_node_unlock(node); + tipc_node_put(node); +} + +/** + * tipc_publ_purge - remove publication associated with a failed node + * + * Invoked for each publication issued by a newly failed node. + * Removes publication structure from name table & deletes it. + */ +static void tipc_publ_purge(struct net *net, struct publication *publ, u32 addr) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct publication *p; + + spin_lock_bh(&tn->nametbl_lock); + p = tipc_nametbl_remove_publ(net, publ->type, publ->lower, + publ->node, publ->ref, publ->key); + if (p) + tipc_publ_unsubscribe(net, p, addr); + spin_unlock_bh(&tn->nametbl_lock); + + if (p != publ) { + pr_err("Unable to remove publication from failed node\n" + " (type=%u, lower=%u, node=0x%x, ref=%u, key=%u)\n", + publ->type, publ->lower, publ->node, publ->ref, + publ->key); + } + + kfree_rcu(p, rcu); +} + +void tipc_publ_notify(struct net *net, struct list_head *nsub_list, u32 addr) +{ + struct publication *publ, *tmp; + + list_for_each_entry_safe(publ, tmp, nsub_list, nodesub_list) + tipc_publ_purge(net, publ, addr); +} + +/** + * tipc_update_nametbl - try to process a nametable update and notify + * subscribers + * + * tipc_nametbl_lock must be held. + * Returns the publication item if successful, otherwise NULL. + */ +static bool tipc_update_nametbl(struct net *net, struct distr_item *i, + u32 node, u32 dtype) +{ + struct publication *publ = NULL; + + if (dtype == PUBLICATION) { + publ = tipc_nametbl_insert_publ(net, ntohl(i->type), + ntohl(i->lower), + ntohl(i->upper), + TIPC_CLUSTER_SCOPE, node, + ntohl(i->ref), ntohl(i->key)); + if (publ) { + tipc_publ_subscribe(net, publ, node); + return true; + } + } else if (dtype == WITHDRAWAL) { + publ = tipc_nametbl_remove_publ(net, ntohl(i->type), + ntohl(i->lower), + node, ntohl(i->ref), + ntohl(i->key)); + if (publ) { + tipc_publ_unsubscribe(net, publ, node); + kfree_rcu(publ, rcu); + return true; + } + } else { + pr_warn("Unrecognized name table message received\n"); + } + return false; +} + +/** + * tipc_named_add_backlog - add a failed name table update to the backlog + * + */ +static void tipc_named_add_backlog(struct distr_item *i, u32 type, u32 node) +{ + struct distr_queue_item *e; + unsigned long now = get_jiffies_64(); + + e = kzalloc(sizeof(*e), GFP_ATOMIC); + if (!e) + return; + e->dtype = type; + e->node = node; + e->expires = now + msecs_to_jiffies(sysctl_tipc_named_timeout); + memcpy(e, i, sizeof(*i)); + list_add_tail(&e->next, &tipc_dist_queue); +} + +/** + * tipc_named_process_backlog - try to process any pending name table updates + * from the network. + */ +void tipc_named_process_backlog(struct net *net) +{ + struct distr_queue_item *e, *tmp; + char addr[16]; + unsigned long now = get_jiffies_64(); + + list_for_each_entry_safe(e, tmp, &tipc_dist_queue, next) { + if (time_after(e->expires, now)) { + if (!tipc_update_nametbl(net, &e->i, e->node, e->dtype)) + continue; + } else { + tipc_addr_string_fill(addr, e->node); + pr_warn_ratelimited("Dropping name table update (%d) of {%u, %u, %u} from %s key=%u\n", + e->dtype, ntohl(e->i.type), + ntohl(e->i.lower), + ntohl(e->i.upper), + addr, ntohl(e->i.key)); + } + list_del(&e->next); + kfree(e); + } +} + +/** + * tipc_named_rcv - process name table update messages sent by another node + */ +void tipc_named_rcv(struct net *net, struct sk_buff_head *inputq) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_msg *msg; + struct distr_item *item; + uint count; + u32 node; + struct sk_buff *skb; + int mtype; + + spin_lock_bh(&tn->nametbl_lock); + for (skb = skb_dequeue(inputq); skb; skb = skb_dequeue(inputq)) { + msg = buf_msg(skb); + mtype = msg_type(msg); + item = (struct distr_item *)msg_data(msg); + count = msg_data_sz(msg) / ITEM_SIZE; + node = msg_orignode(msg); + while (count--) { + if (!tipc_update_nametbl(net, item, node, mtype)) + tipc_named_add_backlog(item, mtype, node); + item++; + } + kfree_skb(skb); + tipc_named_process_backlog(net); + } + spin_unlock_bh(&tn->nametbl_lock); +} + +/** + * tipc_named_reinit - re-initialize local publications + * + * This routine is called whenever TIPC networking is enabled. + * All name table entries published by this node are updated to reflect + * the node's new network address. + */ +void tipc_named_reinit(struct net *net) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct publication *publ; + int scope; + + spin_lock_bh(&tn->nametbl_lock); + + for (scope = TIPC_ZONE_SCOPE; scope <= TIPC_NODE_SCOPE; scope++) + list_for_each_entry_rcu(publ, &tn->nametbl->publ_list[scope], + local_list) + publ->node = tn->own_addr; + + spin_unlock_bh(&tn->nametbl_lock); +} diff --git a/kernel/net/tipc/name_distr.h b/kernel/net/tipc/name_distr.h new file mode 100644 index 000000000..dd2d9fd80 --- /dev/null +++ b/kernel/net/tipc/name_distr.h @@ -0,0 +1,79 @@ +/* + * net/tipc/name_distr.h: Include file for TIPC name distribution code + * + * Copyright (c) 2000-2006, Ericsson AB + * Copyright (c) 2005, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _TIPC_NAME_DISTR_H +#define _TIPC_NAME_DISTR_H + +#include "name_table.h" + +#define ITEM_SIZE sizeof(struct distr_item) + +/** + * struct distr_item - publication info distributed to other nodes + * @type: name sequence type + * @lower: name sequence lower bound + * @upper: name sequence upper bound + * @ref: publishing port reference + * @key: publication key + * + * ===> All fields are stored in network byte order. <=== + * + * First 3 fields identify (name or) name sequence being published. + * Reference field uniquely identifies port that published name sequence. + * Key field uniquely identifies publication, in the event a port has + * multiple publications of the same name sequence. + * + * Note: There is no field that identifies the publishing node because it is + * the same for all items contained within a publication message. + */ +struct distr_item { + __be32 type; + __be32 lower; + __be32 upper; + __be32 ref; + __be32 key; +}; + +struct sk_buff *tipc_named_publish(struct net *net, struct publication *publ); +struct sk_buff *tipc_named_withdraw(struct net *net, struct publication *publ); +void named_cluster_distribute(struct net *net, struct sk_buff *buf); +void tipc_named_node_up(struct net *net, u32 dnode); +void tipc_named_rcv(struct net *net, struct sk_buff_head *msg_queue); +void tipc_named_reinit(struct net *net); +void tipc_named_process_backlog(struct net *net); +void tipc_publ_notify(struct net *net, struct list_head *nsub_list, u32 addr); + +#endif diff --git a/kernel/net/tipc/name_table.c b/kernel/net/tipc/name_table.c new file mode 100644 index 000000000..ab0ac62a1 --- /dev/null +++ b/kernel/net/tipc/name_table.c @@ -0,0 +1,1070 @@ +/* + * net/tipc/name_table.c: TIPC name table code + * + * Copyright (c) 2000-2006, 2014-2015, Ericsson AB + * Copyright (c) 2004-2008, 2010-2014, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <net/sock.h> +#include "core.h" +#include "netlink.h" +#include "name_table.h" +#include "name_distr.h" +#include "subscr.h" +#include "bcast.h" +#include "addr.h" +#include <net/genetlink.h> + +#define TIPC_NAMETBL_SIZE 1024 /* must be a power of 2 */ + +static const struct nla_policy +tipc_nl_name_table_policy[TIPC_NLA_NAME_TABLE_MAX + 1] = { + [TIPC_NLA_NAME_TABLE_UNSPEC] = { .type = NLA_UNSPEC }, + [TIPC_NLA_NAME_TABLE_PUBL] = { .type = NLA_NESTED } +}; + +/** + * struct name_info - name sequence publication info + * @node_list: circular list of publications made by own node + * @cluster_list: circular list of publications made by own cluster + * @zone_list: circular list of publications made by own zone + * @node_list_size: number of entries in "node_list" + * @cluster_list_size: number of entries in "cluster_list" + * @zone_list_size: number of entries in "zone_list" + * + * Note: The zone list always contains at least one entry, since all + * publications of the associated name sequence belong to it. + * (The cluster and node lists may be empty.) + */ +struct name_info { + struct list_head node_list; + struct list_head cluster_list; + struct list_head zone_list; + u32 node_list_size; + u32 cluster_list_size; + u32 zone_list_size; +}; + +/** + * struct sub_seq - container for all published instances of a name sequence + * @lower: name sequence lower bound + * @upper: name sequence upper bound + * @info: pointer to name sequence publication info + */ +struct sub_seq { + u32 lower; + u32 upper; + struct name_info *info; +}; + +/** + * struct name_seq - container for all published instances of a name type + * @type: 32 bit 'type' value for name sequence + * @sseq: pointer to dynamically-sized array of sub-sequences of this 'type'; + * sub-sequences are sorted in ascending order + * @alloc: number of sub-sequences currently in array + * @first_free: array index of first unused sub-sequence entry + * @ns_list: links to adjacent name sequences in hash chain + * @subscriptions: list of subscriptions for this 'type' + * @lock: spinlock controlling access to publication lists of all sub-sequences + * @rcu: RCU callback head used for deferred freeing + */ +struct name_seq { + u32 type; + struct sub_seq *sseqs; + u32 alloc; + u32 first_free; + struct hlist_node ns_list; + struct list_head subscriptions; + spinlock_t lock; + struct rcu_head rcu; +}; + +static int hash(int x) +{ + return x & (TIPC_NAMETBL_SIZE - 1); +} + +/** + * publ_create - create a publication structure + */ +static struct publication *publ_create(u32 type, u32 lower, u32 upper, + u32 scope, u32 node, u32 port_ref, + u32 key) +{ + struct publication *publ = kzalloc(sizeof(*publ), GFP_ATOMIC); + if (publ == NULL) { + pr_warn("Publication creation failure, no memory\n"); + return NULL; + } + + publ->type = type; + publ->lower = lower; + publ->upper = upper; + publ->scope = scope; + publ->node = node; + publ->ref = port_ref; + publ->key = key; + INIT_LIST_HEAD(&publ->pport_list); + return publ; +} + +/** + * tipc_subseq_alloc - allocate a specified number of sub-sequence structures + */ +static struct sub_seq *tipc_subseq_alloc(u32 cnt) +{ + return kcalloc(cnt, sizeof(struct sub_seq), GFP_ATOMIC); +} + +/** + * tipc_nameseq_create - create a name sequence structure for the specified 'type' + * + * Allocates a single sub-sequence structure and sets it to all 0's. + */ +static struct name_seq *tipc_nameseq_create(u32 type, struct hlist_head *seq_head) +{ + struct name_seq *nseq = kzalloc(sizeof(*nseq), GFP_ATOMIC); + struct sub_seq *sseq = tipc_subseq_alloc(1); + + if (!nseq || !sseq) { + pr_warn("Name sequence creation failed, no memory\n"); + kfree(nseq); + kfree(sseq); + return NULL; + } + + spin_lock_init(&nseq->lock); + nseq->type = type; + nseq->sseqs = sseq; + nseq->alloc = 1; + INIT_HLIST_NODE(&nseq->ns_list); + INIT_LIST_HEAD(&nseq->subscriptions); + hlist_add_head_rcu(&nseq->ns_list, seq_head); + return nseq; +} + +/** + * nameseq_find_subseq - find sub-sequence (if any) matching a name instance + * + * Very time-critical, so binary searches through sub-sequence array. + */ +static struct sub_seq *nameseq_find_subseq(struct name_seq *nseq, + u32 instance) +{ + struct sub_seq *sseqs = nseq->sseqs; + int low = 0; + int high = nseq->first_free - 1; + int mid; + + while (low <= high) { + mid = (low + high) / 2; + if (instance < sseqs[mid].lower) + high = mid - 1; + else if (instance > sseqs[mid].upper) + low = mid + 1; + else + return &sseqs[mid]; + } + return NULL; +} + +/** + * nameseq_locate_subseq - determine position of name instance in sub-sequence + * + * Returns index in sub-sequence array of the entry that contains the specified + * instance value; if no entry contains that value, returns the position + * where a new entry for it would be inserted in the array. + * + * Note: Similar to binary search code for locating a sub-sequence. + */ +static u32 nameseq_locate_subseq(struct name_seq *nseq, u32 instance) +{ + struct sub_seq *sseqs = nseq->sseqs; + int low = 0; + int high = nseq->first_free - 1; + int mid; + + while (low <= high) { + mid = (low + high) / 2; + if (instance < sseqs[mid].lower) + high = mid - 1; + else if (instance > sseqs[mid].upper) + low = mid + 1; + else + return mid; + } + return low; +} + +/** + * tipc_nameseq_insert_publ + */ +static struct publication *tipc_nameseq_insert_publ(struct net *net, + struct name_seq *nseq, + u32 type, u32 lower, + u32 upper, u32 scope, + u32 node, u32 port, u32 key) +{ + struct tipc_subscription *s; + struct tipc_subscription *st; + struct publication *publ; + struct sub_seq *sseq; + struct name_info *info; + int created_subseq = 0; + + sseq = nameseq_find_subseq(nseq, lower); + if (sseq) { + + /* Lower end overlaps existing entry => need an exact match */ + if ((sseq->lower != lower) || (sseq->upper != upper)) { + return NULL; + } + + info = sseq->info; + + /* Check if an identical publication already exists */ + list_for_each_entry(publ, &info->zone_list, zone_list) { + if ((publ->ref == port) && (publ->key == key) && + (!publ->node || (publ->node == node))) + return NULL; + } + } else { + u32 inspos; + struct sub_seq *freesseq; + + /* Find where lower end should be inserted */ + inspos = nameseq_locate_subseq(nseq, lower); + + /* Fail if upper end overlaps into an existing entry */ + if ((inspos < nseq->first_free) && + (upper >= nseq->sseqs[inspos].lower)) { + return NULL; + } + + /* Ensure there is space for new sub-sequence */ + if (nseq->first_free == nseq->alloc) { + struct sub_seq *sseqs = tipc_subseq_alloc(nseq->alloc * 2); + + if (!sseqs) { + pr_warn("Cannot publish {%u,%u,%u}, no memory\n", + type, lower, upper); + return NULL; + } + memcpy(sseqs, nseq->sseqs, + nseq->alloc * sizeof(struct sub_seq)); + kfree(nseq->sseqs); + nseq->sseqs = sseqs; + nseq->alloc *= 2; + } + + info = kzalloc(sizeof(*info), GFP_ATOMIC); + if (!info) { + pr_warn("Cannot publish {%u,%u,%u}, no memory\n", + type, lower, upper); + return NULL; + } + + INIT_LIST_HEAD(&info->node_list); + INIT_LIST_HEAD(&info->cluster_list); + INIT_LIST_HEAD(&info->zone_list); + + /* Insert new sub-sequence */ + sseq = &nseq->sseqs[inspos]; + freesseq = &nseq->sseqs[nseq->first_free]; + memmove(sseq + 1, sseq, (freesseq - sseq) * sizeof(*sseq)); + memset(sseq, 0, sizeof(*sseq)); + nseq->first_free++; + sseq->lower = lower; + sseq->upper = upper; + sseq->info = info; + created_subseq = 1; + } + + /* Insert a publication */ + publ = publ_create(type, lower, upper, scope, node, port, key); + if (!publ) + return NULL; + + list_add(&publ->zone_list, &info->zone_list); + info->zone_list_size++; + + if (in_own_cluster(net, node)) { + list_add(&publ->cluster_list, &info->cluster_list); + info->cluster_list_size++; + } + + if (in_own_node(net, node)) { + list_add(&publ->node_list, &info->node_list); + info->node_list_size++; + } + + /* Any subscriptions waiting for notification? */ + list_for_each_entry_safe(s, st, &nseq->subscriptions, nameseq_list) { + tipc_subscr_report_overlap(s, + publ->lower, + publ->upper, + TIPC_PUBLISHED, + publ->ref, + publ->node, + created_subseq); + } + return publ; +} + +/** + * tipc_nameseq_remove_publ + * + * NOTE: There may be cases where TIPC is asked to remove a publication + * that is not in the name table. For example, if another node issues a + * publication for a name sequence that overlaps an existing name sequence + * the publication will not be recorded, which means the publication won't + * be found when the name sequence is later withdrawn by that node. + * A failed withdraw request simply returns a failure indication and lets the + * caller issue any error or warning messages associated with such a problem. + */ +static struct publication *tipc_nameseq_remove_publ(struct net *net, + struct name_seq *nseq, + u32 inst, u32 node, + u32 ref, u32 key) +{ + struct publication *publ; + struct sub_seq *sseq = nameseq_find_subseq(nseq, inst); + struct name_info *info; + struct sub_seq *free; + struct tipc_subscription *s, *st; + int removed_subseq = 0; + + if (!sseq) + return NULL; + + info = sseq->info; + + /* Locate publication, if it exists */ + list_for_each_entry(publ, &info->zone_list, zone_list) { + if ((publ->key == key) && (publ->ref == ref) && + (!publ->node || (publ->node == node))) + goto found; + } + return NULL; + +found: + /* Remove publication from zone scope list */ + list_del(&publ->zone_list); + info->zone_list_size--; + + /* Remove publication from cluster scope list, if present */ + if (in_own_cluster(net, node)) { + list_del(&publ->cluster_list); + info->cluster_list_size--; + } + + /* Remove publication from node scope list, if present */ + if (in_own_node(net, node)) { + list_del(&publ->node_list); + info->node_list_size--; + } + + /* Contract subseq list if no more publications for that subseq */ + if (list_empty(&info->zone_list)) { + kfree(info); + free = &nseq->sseqs[nseq->first_free--]; + memmove(sseq, sseq + 1, (free - (sseq + 1)) * sizeof(*sseq)); + removed_subseq = 1; + } + + /* Notify any waiting subscriptions */ + list_for_each_entry_safe(s, st, &nseq->subscriptions, nameseq_list) { + tipc_subscr_report_overlap(s, + publ->lower, + publ->upper, + TIPC_WITHDRAWN, + publ->ref, + publ->node, + removed_subseq); + } + + return publ; +} + +/** + * tipc_nameseq_subscribe - attach a subscription, and issue + * the prescribed number of events if there is any sub- + * sequence overlapping with the requested sequence + */ +static void tipc_nameseq_subscribe(struct name_seq *nseq, + struct tipc_subscription *s) +{ + struct sub_seq *sseq = nseq->sseqs; + + list_add(&s->nameseq_list, &nseq->subscriptions); + + if (!sseq) + return; + + while (sseq != &nseq->sseqs[nseq->first_free]) { + if (tipc_subscr_overlap(s, sseq->lower, sseq->upper)) { + struct publication *crs; + struct name_info *info = sseq->info; + int must_report = 1; + + list_for_each_entry(crs, &info->zone_list, zone_list) { + tipc_subscr_report_overlap(s, + sseq->lower, + sseq->upper, + TIPC_PUBLISHED, + crs->ref, + crs->node, + must_report); + must_report = 0; + } + } + sseq++; + } +} + +static struct name_seq *nametbl_find_seq(struct net *net, u32 type) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct hlist_head *seq_head; + struct name_seq *ns; + + seq_head = &tn->nametbl->seq_hlist[hash(type)]; + hlist_for_each_entry_rcu(ns, seq_head, ns_list) { + if (ns->type == type) + return ns; + } + + return NULL; +}; + +struct publication *tipc_nametbl_insert_publ(struct net *net, u32 type, + u32 lower, u32 upper, u32 scope, + u32 node, u32 port, u32 key) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct publication *publ; + struct name_seq *seq = nametbl_find_seq(net, type); + int index = hash(type); + + if ((scope < TIPC_ZONE_SCOPE) || (scope > TIPC_NODE_SCOPE) || + (lower > upper)) { + pr_debug("Failed to publish illegal {%u,%u,%u} with scope %u\n", + type, lower, upper, scope); + return NULL; + } + + if (!seq) + seq = tipc_nameseq_create(type, &tn->nametbl->seq_hlist[index]); + if (!seq) + return NULL; + + spin_lock_bh(&seq->lock); + publ = tipc_nameseq_insert_publ(net, seq, type, lower, upper, + scope, node, port, key); + spin_unlock_bh(&seq->lock); + return publ; +} + +struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type, + u32 lower, u32 node, u32 ref, + u32 key) +{ + struct publication *publ; + struct name_seq *seq = nametbl_find_seq(net, type); + + if (!seq) + return NULL; + + spin_lock_bh(&seq->lock); + publ = tipc_nameseq_remove_publ(net, seq, lower, node, ref, key); + if (!seq->first_free && list_empty(&seq->subscriptions)) { + hlist_del_init_rcu(&seq->ns_list); + kfree(seq->sseqs); + spin_unlock_bh(&seq->lock); + kfree_rcu(seq, rcu); + return publ; + } + spin_unlock_bh(&seq->lock); + return publ; +} + +/** + * tipc_nametbl_translate - perform name translation + * + * On entry, 'destnode' is the search domain used during translation. + * + * On exit: + * - if name translation is deferred to another node/cluster/zone, + * leaves 'destnode' unchanged (will be non-zero) and returns 0 + * - if name translation is attempted and succeeds, sets 'destnode' + * to publishing node and returns port reference (will be non-zero) + * - if name translation is attempted and fails, sets 'destnode' to 0 + * and returns 0 + */ +u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance, + u32 *destnode) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct sub_seq *sseq; + struct name_info *info; + struct publication *publ; + struct name_seq *seq; + u32 ref = 0; + u32 node = 0; + + if (!tipc_in_scope(*destnode, tn->own_addr)) + return 0; + + rcu_read_lock(); + seq = nametbl_find_seq(net, type); + if (unlikely(!seq)) + goto not_found; + spin_lock_bh(&seq->lock); + sseq = nameseq_find_subseq(seq, instance); + if (unlikely(!sseq)) + goto no_match; + info = sseq->info; + + /* Closest-First Algorithm */ + if (likely(!*destnode)) { + if (!list_empty(&info->node_list)) { + publ = list_first_entry(&info->node_list, + struct publication, + node_list); + list_move_tail(&publ->node_list, + &info->node_list); + } else if (!list_empty(&info->cluster_list)) { + publ = list_first_entry(&info->cluster_list, + struct publication, + cluster_list); + list_move_tail(&publ->cluster_list, + &info->cluster_list); + } else { + publ = list_first_entry(&info->zone_list, + struct publication, + zone_list); + list_move_tail(&publ->zone_list, + &info->zone_list); + } + } + + /* Round-Robin Algorithm */ + else if (*destnode == tn->own_addr) { + if (list_empty(&info->node_list)) + goto no_match; + publ = list_first_entry(&info->node_list, struct publication, + node_list); + list_move_tail(&publ->node_list, &info->node_list); + } else if (in_own_cluster_exact(net, *destnode)) { + if (list_empty(&info->cluster_list)) + goto no_match; + publ = list_first_entry(&info->cluster_list, struct publication, + cluster_list); + list_move_tail(&publ->cluster_list, &info->cluster_list); + } else { + publ = list_first_entry(&info->zone_list, struct publication, + zone_list); + list_move_tail(&publ->zone_list, &info->zone_list); + } + + ref = publ->ref; + node = publ->node; +no_match: + spin_unlock_bh(&seq->lock); +not_found: + rcu_read_unlock(); + *destnode = node; + return ref; +} + +/** + * tipc_nametbl_mc_translate - find multicast destinations + * + * Creates list of all local ports that overlap the given multicast address; + * also determines if any off-node ports overlap. + * + * Note: Publications with a scope narrower than 'limit' are ignored. + * (i.e. local node-scope publications mustn't receive messages arriving + * from another node, even if the multcast link brought it here) + * + * Returns non-zero if any off-node ports overlap + */ +int tipc_nametbl_mc_translate(struct net *net, u32 type, u32 lower, u32 upper, + u32 limit, struct tipc_plist *dports) +{ + struct name_seq *seq; + struct sub_seq *sseq; + struct sub_seq *sseq_stop; + struct name_info *info; + int res = 0; + + rcu_read_lock(); + seq = nametbl_find_seq(net, type); + if (!seq) + goto exit; + + spin_lock_bh(&seq->lock); + sseq = seq->sseqs + nameseq_locate_subseq(seq, lower); + sseq_stop = seq->sseqs + seq->first_free; + for (; sseq != sseq_stop; sseq++) { + struct publication *publ; + + if (sseq->lower > upper) + break; + + info = sseq->info; + list_for_each_entry(publ, &info->node_list, node_list) { + if (publ->scope <= limit) + tipc_plist_push(dports, publ->ref); + } + + if (info->cluster_list_size != info->node_list_size) + res = 1; + } + spin_unlock_bh(&seq->lock); +exit: + rcu_read_unlock(); + return res; +} + +/* + * tipc_nametbl_publish - add name publication to network name tables + */ +struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower, + u32 upper, u32 scope, u32 port_ref, + u32 key) +{ + struct publication *publ; + struct sk_buff *buf = NULL; + struct tipc_net *tn = net_generic(net, tipc_net_id); + + spin_lock_bh(&tn->nametbl_lock); + if (tn->nametbl->local_publ_count >= TIPC_MAX_PUBLICATIONS) { + pr_warn("Publication failed, local publication limit reached (%u)\n", + TIPC_MAX_PUBLICATIONS); + spin_unlock_bh(&tn->nametbl_lock); + return NULL; + } + + publ = tipc_nametbl_insert_publ(net, type, lower, upper, scope, + tn->own_addr, port_ref, key); + if (likely(publ)) { + tn->nametbl->local_publ_count++; + buf = tipc_named_publish(net, publ); + /* Any pending external events? */ + tipc_named_process_backlog(net); + } + spin_unlock_bh(&tn->nametbl_lock); + + if (buf) + named_cluster_distribute(net, buf); + return publ; +} + +/** + * tipc_nametbl_withdraw - withdraw name publication from network name tables + */ +int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 ref, + u32 key) +{ + struct publication *publ; + struct sk_buff *skb = NULL; + struct tipc_net *tn = net_generic(net, tipc_net_id); + + spin_lock_bh(&tn->nametbl_lock); + publ = tipc_nametbl_remove_publ(net, type, lower, tn->own_addr, + ref, key); + if (likely(publ)) { + tn->nametbl->local_publ_count--; + skb = tipc_named_withdraw(net, publ); + /* Any pending external events? */ + tipc_named_process_backlog(net); + list_del_init(&publ->pport_list); + kfree_rcu(publ, rcu); + } else { + pr_err("Unable to remove local publication\n" + "(type=%u, lower=%u, ref=%u, key=%u)\n", + type, lower, ref, key); + } + spin_unlock_bh(&tn->nametbl_lock); + + if (skb) { + named_cluster_distribute(net, skb); + return 1; + } + return 0; +} + +/** + * tipc_nametbl_subscribe - add a subscription object to the name table + */ +void tipc_nametbl_subscribe(struct tipc_subscription *s) +{ + struct tipc_net *tn = net_generic(s->net, tipc_net_id); + u32 type = s->seq.type; + int index = hash(type); + struct name_seq *seq; + + spin_lock_bh(&tn->nametbl_lock); + seq = nametbl_find_seq(s->net, type); + if (!seq) + seq = tipc_nameseq_create(type, &tn->nametbl->seq_hlist[index]); + if (seq) { + spin_lock_bh(&seq->lock); + tipc_nameseq_subscribe(seq, s); + spin_unlock_bh(&seq->lock); + } else { + pr_warn("Failed to create subscription for {%u,%u,%u}\n", + s->seq.type, s->seq.lower, s->seq.upper); + } + spin_unlock_bh(&tn->nametbl_lock); +} + +/** + * tipc_nametbl_unsubscribe - remove a subscription object from name table + */ +void tipc_nametbl_unsubscribe(struct tipc_subscription *s) +{ + struct tipc_net *tn = net_generic(s->net, tipc_net_id); + struct name_seq *seq; + + spin_lock_bh(&tn->nametbl_lock); + seq = nametbl_find_seq(s->net, s->seq.type); + if (seq != NULL) { + spin_lock_bh(&seq->lock); + list_del_init(&s->nameseq_list); + if (!seq->first_free && list_empty(&seq->subscriptions)) { + hlist_del_init_rcu(&seq->ns_list); + kfree(seq->sseqs); + spin_unlock_bh(&seq->lock); + kfree_rcu(seq, rcu); + } else { + spin_unlock_bh(&seq->lock); + } + } + spin_unlock_bh(&tn->nametbl_lock); +} + +int tipc_nametbl_init(struct net *net) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct name_table *tipc_nametbl; + int i; + + tipc_nametbl = kzalloc(sizeof(*tipc_nametbl), GFP_ATOMIC); + if (!tipc_nametbl) + return -ENOMEM; + + for (i = 0; i < TIPC_NAMETBL_SIZE; i++) + INIT_HLIST_HEAD(&tipc_nametbl->seq_hlist[i]); + + INIT_LIST_HEAD(&tipc_nametbl->publ_list[TIPC_ZONE_SCOPE]); + INIT_LIST_HEAD(&tipc_nametbl->publ_list[TIPC_CLUSTER_SCOPE]); + INIT_LIST_HEAD(&tipc_nametbl->publ_list[TIPC_NODE_SCOPE]); + tn->nametbl = tipc_nametbl; + spin_lock_init(&tn->nametbl_lock); + return 0; +} + +/** + * tipc_purge_publications - remove all publications for a given type + * + * tipc_nametbl_lock must be held when calling this function + */ +static void tipc_purge_publications(struct net *net, struct name_seq *seq) +{ + struct publication *publ, *safe; + struct sub_seq *sseq; + struct name_info *info; + + spin_lock_bh(&seq->lock); + sseq = seq->sseqs; + info = sseq->info; + list_for_each_entry_safe(publ, safe, &info->zone_list, zone_list) { + tipc_nameseq_remove_publ(net, seq, publ->lower, publ->node, + publ->ref, publ->key); + kfree_rcu(publ, rcu); + } + hlist_del_init_rcu(&seq->ns_list); + kfree(seq->sseqs); + spin_unlock_bh(&seq->lock); + + kfree_rcu(seq, rcu); +} + +void tipc_nametbl_stop(struct net *net) +{ + u32 i; + struct name_seq *seq; + struct hlist_head *seq_head; + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct name_table *tipc_nametbl = tn->nametbl; + + /* Verify name table is empty and purge any lingering + * publications, then release the name table + */ + spin_lock_bh(&tn->nametbl_lock); + for (i = 0; i < TIPC_NAMETBL_SIZE; i++) { + if (hlist_empty(&tipc_nametbl->seq_hlist[i])) + continue; + seq_head = &tipc_nametbl->seq_hlist[i]; + hlist_for_each_entry_rcu(seq, seq_head, ns_list) { + tipc_purge_publications(net, seq); + } + } + spin_unlock_bh(&tn->nametbl_lock); + + synchronize_net(); + kfree(tipc_nametbl); + +} + +static int __tipc_nl_add_nametable_publ(struct tipc_nl_msg *msg, + struct name_seq *seq, + struct sub_seq *sseq, u32 *last_publ) +{ + void *hdr; + struct nlattr *attrs; + struct nlattr *publ; + struct publication *p; + + if (*last_publ) { + list_for_each_entry(p, &sseq->info->zone_list, zone_list) + if (p->key == *last_publ) + break; + if (p->key != *last_publ) + return -EPIPE; + } else { + p = list_first_entry(&sseq->info->zone_list, struct publication, + zone_list); + } + + list_for_each_entry_from(p, &sseq->info->zone_list, zone_list) { + *last_publ = p->key; + + hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, + &tipc_genl_family, NLM_F_MULTI, + TIPC_NL_NAME_TABLE_GET); + if (!hdr) + return -EMSGSIZE; + + attrs = nla_nest_start(msg->skb, TIPC_NLA_NAME_TABLE); + if (!attrs) + goto msg_full; + + publ = nla_nest_start(msg->skb, TIPC_NLA_NAME_TABLE_PUBL); + if (!publ) + goto attr_msg_full; + + if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_TYPE, seq->type)) + goto publ_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_LOWER, sseq->lower)) + goto publ_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_UPPER, sseq->upper)) + goto publ_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_SCOPE, p->scope)) + goto publ_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_NODE, p->node)) + goto publ_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_REF, p->ref)) + goto publ_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_KEY, p->key)) + goto publ_msg_full; + + nla_nest_end(msg->skb, publ); + nla_nest_end(msg->skb, attrs); + genlmsg_end(msg->skb, hdr); + } + *last_publ = 0; + + return 0; + +publ_msg_full: + nla_nest_cancel(msg->skb, publ); +attr_msg_full: + nla_nest_cancel(msg->skb, attrs); +msg_full: + genlmsg_cancel(msg->skb, hdr); + + return -EMSGSIZE; +} + +static int __tipc_nl_subseq_list(struct tipc_nl_msg *msg, struct name_seq *seq, + u32 *last_lower, u32 *last_publ) +{ + struct sub_seq *sseq; + struct sub_seq *sseq_start; + int err; + + if (*last_lower) { + sseq_start = nameseq_find_subseq(seq, *last_lower); + if (!sseq_start) + return -EPIPE; + } else { + sseq_start = seq->sseqs; + } + + for (sseq = sseq_start; sseq != &seq->sseqs[seq->first_free]; sseq++) { + err = __tipc_nl_add_nametable_publ(msg, seq, sseq, last_publ); + if (err) { + *last_lower = sseq->lower; + return err; + } + } + *last_lower = 0; + + return 0; +} + +static int tipc_nl_seq_list(struct net *net, struct tipc_nl_msg *msg, + u32 *last_type, u32 *last_lower, u32 *last_publ) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct hlist_head *seq_head; + struct name_seq *seq = NULL; + int err; + int i; + + if (*last_type) + i = hash(*last_type); + else + i = 0; + + for (; i < TIPC_NAMETBL_SIZE; i++) { + seq_head = &tn->nametbl->seq_hlist[i]; + + if (*last_type) { + seq = nametbl_find_seq(net, *last_type); + if (!seq) + return -EPIPE; + } else { + hlist_for_each_entry_rcu(seq, seq_head, ns_list) + break; + if (!seq) + continue; + } + + hlist_for_each_entry_from_rcu(seq, ns_list) { + spin_lock_bh(&seq->lock); + err = __tipc_nl_subseq_list(msg, seq, last_lower, + last_publ); + + if (err) { + *last_type = seq->type; + spin_unlock_bh(&seq->lock); + return err; + } + spin_unlock_bh(&seq->lock); + } + *last_type = 0; + } + return 0; +} + +int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + int err; + int done = cb->args[3]; + u32 last_type = cb->args[0]; + u32 last_lower = cb->args[1]; + u32 last_publ = cb->args[2]; + struct net *net = sock_net(skb->sk); + struct tipc_nl_msg msg; + + if (done) + return 0; + + msg.skb = skb; + msg.portid = NETLINK_CB(cb->skb).portid; + msg.seq = cb->nlh->nlmsg_seq; + + rcu_read_lock(); + err = tipc_nl_seq_list(net, &msg, &last_type, &last_lower, &last_publ); + if (!err) { + done = 1; + } else if (err != -EMSGSIZE) { + /* We never set seq or call nl_dump_check_consistent() this + * means that setting prev_seq here will cause the consistence + * check to fail in the netlink callback handler. Resulting in + * the NLMSG_DONE message having the NLM_F_DUMP_INTR flag set if + * we got an error. + */ + cb->prev_seq = 1; + } + rcu_read_unlock(); + + cb->args[0] = last_type; + cb->args[1] = last_lower; + cb->args[2] = last_publ; + cb->args[3] = done; + + return skb->len; +} + +void tipc_plist_push(struct tipc_plist *pl, u32 port) +{ + struct tipc_plist *nl; + + if (likely(!pl->port)) { + pl->port = port; + return; + } + if (pl->port == port) + return; + list_for_each_entry(nl, &pl->list, list) { + if (nl->port == port) + return; + } + nl = kmalloc(sizeof(*nl), GFP_ATOMIC); + if (nl) { + nl->port = port; + list_add(&nl->list, &pl->list); + } +} + +u32 tipc_plist_pop(struct tipc_plist *pl) +{ + struct tipc_plist *nl; + u32 port = 0; + + if (likely(list_empty(&pl->list))) { + port = pl->port; + pl->port = 0; + return port; + } + nl = list_first_entry(&pl->list, typeof(*nl), list); + port = nl->port; + list_del(&nl->list); + kfree(nl); + return port; +} diff --git a/kernel/net/tipc/name_table.h b/kernel/net/tipc/name_table.h new file mode 100644 index 000000000..1524a7383 --- /dev/null +++ b/kernel/net/tipc/name_table.h @@ -0,0 +1,133 @@ +/* + * net/tipc/name_table.h: Include file for TIPC name table code + * + * Copyright (c) 2000-2006, 2014-2015, Ericsson AB + * Copyright (c) 2004-2005, 2010-2011, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _TIPC_NAME_TABLE_H +#define _TIPC_NAME_TABLE_H + +struct tipc_subscription; +struct tipc_plist; + +/* + * TIPC name types reserved for internal TIPC use (both current and planned) + */ +#define TIPC_ZM_SRV 3 /* zone master service name type */ +#define TIPC_PUBL_SCOPE_NUM (TIPC_NODE_SCOPE + 1) +#define TIPC_NAMETBL_SIZE 1024 /* must be a power of 2 */ + +/** + * struct publication - info about a published (name or) name sequence + * @type: name sequence type + * @lower: name sequence lower bound + * @upper: name sequence upper bound + * @scope: scope of publication + * @node: network address of publishing port's node + * @ref: publishing port + * @key: publication key + * @nodesub_list: subscription to "node down" event (off-node publication only) + * @local_list: adjacent entries in list of publications made by this node + * @pport_list: adjacent entries in list of publications made by this port + * @node_list: adjacent matching name seq publications with >= node scope + * @cluster_list: adjacent matching name seq publications with >= cluster scope + * @zone_list: adjacent matching name seq publications with >= zone scope + * @rcu: RCU callback head used for deferred freeing + * + * Note that the node list, cluster list, and zone list are circular lists. + */ +struct publication { + u32 type; + u32 lower; + u32 upper; + u32 scope; + u32 node; + u32 ref; + u32 key; + struct list_head nodesub_list; + struct list_head local_list; + struct list_head pport_list; + struct list_head node_list; + struct list_head cluster_list; + struct list_head zone_list; + struct rcu_head rcu; +}; + +/** + * struct name_table - table containing all existing port name publications + * @seq_hlist: name sequence hash lists + * @publ_list: pulication lists + * @local_publ_count: number of publications issued by this node + */ +struct name_table { + struct hlist_head seq_hlist[TIPC_NAMETBL_SIZE]; + struct list_head publ_list[TIPC_PUBL_SCOPE_NUM]; + u32 local_publ_count; +}; + +int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb); + +u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance, u32 *node); +int tipc_nametbl_mc_translate(struct net *net, u32 type, u32 lower, u32 upper, + u32 limit, struct tipc_plist *dports); +struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower, + u32 upper, u32 scope, u32 port_ref, + u32 key); +int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 ref, + u32 key); +struct publication *tipc_nametbl_insert_publ(struct net *net, u32 type, + u32 lower, u32 upper, u32 scope, + u32 node, u32 ref, u32 key); +struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type, + u32 lower, u32 node, u32 ref, + u32 key); +void tipc_nametbl_subscribe(struct tipc_subscription *s); +void tipc_nametbl_unsubscribe(struct tipc_subscription *s); +int tipc_nametbl_init(struct net *net); +void tipc_nametbl_stop(struct net *net); + +struct tipc_plist { + struct list_head list; + u32 port; +}; + +static inline void tipc_plist_init(struct tipc_plist *pl) +{ + INIT_LIST_HEAD(&pl->list); + pl->port = 0; +} + +void tipc_plist_push(struct tipc_plist *pl, u32 port); +u32 tipc_plist_pop(struct tipc_plist *pl); + +#endif diff --git a/kernel/net/tipc/net.c b/kernel/net/tipc/net.c new file mode 100644 index 000000000..a54f3cbe2 --- /dev/null +++ b/kernel/net/tipc/net.c @@ -0,0 +1,254 @@ +/* + * net/tipc/net.c: TIPC network routing code + * + * Copyright (c) 1995-2006, 2014, Ericsson AB + * Copyright (c) 2005, 2010-2011, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "core.h" +#include "net.h" +#include "name_distr.h" +#include "subscr.h" +#include "socket.h" +#include "node.h" + +static const struct nla_policy tipc_nl_net_policy[TIPC_NLA_NET_MAX + 1] = { + [TIPC_NLA_NET_UNSPEC] = { .type = NLA_UNSPEC }, + [TIPC_NLA_NET_ID] = { .type = NLA_U32 } +}; + +/* + * The TIPC locking policy is designed to ensure a very fine locking + * granularity, permitting complete parallel access to individual + * port and node/link instances. The code consists of four major + * locking domains, each protected with their own disjunct set of locks. + * + * 1: The bearer level. + * RTNL lock is used to serialize the process of configuring bearer + * on update side, and RCU lock is applied on read side to make + * bearer instance valid on both paths of message transmission and + * reception. + * + * 2: The node and link level. + * All node instances are saved into two tipc_node_list and node_htable + * lists. The two lists are protected by node_list_lock on write side, + * and they are guarded with RCU lock on read side. Especially node + * instance is destroyed only when TIPC module is removed, and we can + * confirm that there has no any user who is accessing the node at the + * moment. Therefore, Except for iterating the two lists within RCU + * protection, it's no needed to hold RCU that we access node instance + * in other places. + * + * In addition, all members in node structure including link instances + * are protected by node spin lock. + * + * 3: The transport level of the protocol. + * This consists of the structures port, (and its user level + * representations, such as user_port and tipc_sock), reference and + * tipc_user (port.c, reg.c, socket.c). + * + * This layer has four different locks: + * - The tipc_port spin_lock. This is protecting each port instance + * from parallel data access and removal. Since we can not place + * this lock in the port itself, it has been placed in the + * corresponding reference table entry, which has the same life + * cycle as the module. This entry is difficult to access from + * outside the TIPC core, however, so a pointer to the lock has + * been added in the port instance, -to be used for unlocking + * only. + * - A read/write lock to protect the reference table itself (teg.c). + * (Nobody is using read-only access to this, so it can just as + * well be changed to a spin_lock) + * - A spin lock to protect the registry of kernel/driver users (reg.c) + * - A global spin_lock (tipc_port_lock), which only task is to ensure + * consistency where more than one port is involved in an operation, + * i.e., whe a port is part of a linked list of ports. + * There are two such lists; 'port_list', which is used for management, + * and 'wait_list', which is used to queue ports during congestion. + * + * 4: The name table (name_table.c, name_distr.c, subscription.c) + * - There is one big read/write-lock (tipc_nametbl_lock) protecting the + * overall name table structure. Nothing must be added/removed to + * this structure without holding write access to it. + * - There is one local spin_lock per sub_sequence, which can be seen + * as a sub-domain to the tipc_nametbl_lock domain. It is used only + * for translation operations, and is needed because a translation + * steps the root of the 'publication' linked list between each lookup. + * This is always used within the scope of a tipc_nametbl_lock(read). + * - A local spin_lock protecting the queue of subscriber events. +*/ + +int tipc_net_start(struct net *net, u32 addr) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + char addr_string[16]; + int res; + + tn->own_addr = addr; + tipc_named_reinit(net); + tipc_sk_reinit(net); + res = tipc_bclink_init(net); + if (res) + return res; + + tipc_nametbl_publish(net, TIPC_CFG_SRV, tn->own_addr, tn->own_addr, + TIPC_ZONE_SCOPE, 0, tn->own_addr); + + pr_info("Started in network mode\n"); + pr_info("Own node address %s, network identity %u\n", + tipc_addr_string_fill(addr_string, tn->own_addr), + tn->net_id); + return 0; +} + +void tipc_net_stop(struct net *net) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + + if (!tn->own_addr) + return; + + tipc_nametbl_withdraw(net, TIPC_CFG_SRV, tn->own_addr, 0, + tn->own_addr); + rtnl_lock(); + tipc_bearer_stop(net); + tipc_bclink_stop(net); + tipc_node_stop(net); + rtnl_unlock(); + + pr_info("Left network mode\n"); +} + +static int __tipc_nl_add_net(struct net *net, struct tipc_nl_msg *msg) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + void *hdr; + struct nlattr *attrs; + + hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, + NLM_F_MULTI, TIPC_NL_NET_GET); + if (!hdr) + return -EMSGSIZE; + + attrs = nla_nest_start(msg->skb, TIPC_NLA_NET); + if (!attrs) + goto msg_full; + + if (nla_put_u32(msg->skb, TIPC_NLA_NET_ID, tn->net_id)) + goto attr_msg_full; + + nla_nest_end(msg->skb, attrs); + genlmsg_end(msg->skb, hdr); + + return 0; + +attr_msg_full: + nla_nest_cancel(msg->skb, attrs); +msg_full: + genlmsg_cancel(msg->skb, hdr); + + return -EMSGSIZE; +} + +int tipc_nl_net_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct net *net = sock_net(skb->sk); + int err; + int done = cb->args[0]; + struct tipc_nl_msg msg; + + if (done) + return 0; + + msg.skb = skb; + msg.portid = NETLINK_CB(cb->skb).portid; + msg.seq = cb->nlh->nlmsg_seq; + + err = __tipc_nl_add_net(net, &msg); + if (err) + goto out; + + done = 1; +out: + cb->args[0] = done; + + return skb->len; +} + +int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info) +{ + struct net *net = sock_net(skb->sk); + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct nlattr *attrs[TIPC_NLA_NET_MAX + 1]; + int err; + + if (!info->attrs[TIPC_NLA_NET]) + return -EINVAL; + + err = nla_parse_nested(attrs, TIPC_NLA_NET_MAX, + info->attrs[TIPC_NLA_NET], + tipc_nl_net_policy); + if (err) + return err; + + if (attrs[TIPC_NLA_NET_ID]) { + u32 val; + + /* Can't change net id once TIPC has joined a network */ + if (tn->own_addr) + return -EPERM; + + val = nla_get_u32(attrs[TIPC_NLA_NET_ID]); + if (val < 1 || val > 9999) + return -EINVAL; + + tn->net_id = val; + } + + if (attrs[TIPC_NLA_NET_ADDR]) { + u32 addr; + + /* Can't change net addr once TIPC has joined a network */ + if (tn->own_addr) + return -EPERM; + + addr = nla_get_u32(attrs[TIPC_NLA_NET_ADDR]); + if (!tipc_addr_node_valid(addr)) + return -EINVAL; + + rtnl_lock(); + tipc_net_start(net, addr); + rtnl_unlock(); + } + + return 0; +} diff --git a/kernel/net/tipc/net.h b/kernel/net/tipc/net.h new file mode 100644 index 000000000..77a7a1189 --- /dev/null +++ b/kernel/net/tipc/net.h @@ -0,0 +1,49 @@ +/* + * net/tipc/net.h: Include file for TIPC network routing code + * + * Copyright (c) 1995-2006, 2014, Ericsson AB + * Copyright (c) 2005, 2010-2011, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _TIPC_NET_H +#define _TIPC_NET_H + +#include <net/genetlink.h> + +int tipc_net_start(struct net *net, u32 addr); + +void tipc_net_stop(struct net *net); + +int tipc_nl_net_dump(struct sk_buff *skb, struct netlink_callback *cb); +int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info); + +#endif diff --git a/kernel/net/tipc/netlink.c b/kernel/net/tipc/netlink.c new file mode 100644 index 000000000..7f6475efc --- /dev/null +++ b/kernel/net/tipc/netlink.c @@ -0,0 +1,178 @@ +/* + * net/tipc/netlink.c: TIPC configuration handling + * + * Copyright (c) 2005-2006, 2014, Ericsson AB + * Copyright (c) 2005-2007, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "core.h" +#include "socket.h" +#include "name_table.h" +#include "bearer.h" +#include "link.h" +#include "node.h" +#include "net.h" +#include <net/genetlink.h> + +static const struct nla_policy tipc_nl_policy[TIPC_NLA_MAX + 1] = { + [TIPC_NLA_UNSPEC] = { .type = NLA_UNSPEC, }, + [TIPC_NLA_BEARER] = { .type = NLA_NESTED, }, + [TIPC_NLA_SOCK] = { .type = NLA_NESTED, }, + [TIPC_NLA_PUBL] = { .type = NLA_NESTED, }, + [TIPC_NLA_LINK] = { .type = NLA_NESTED, }, + [TIPC_NLA_MEDIA] = { .type = NLA_NESTED, }, + [TIPC_NLA_NODE] = { .type = NLA_NESTED, }, + [TIPC_NLA_NET] = { .type = NLA_NESTED, }, + [TIPC_NLA_NAME_TABLE] = { .type = NLA_NESTED, } +}; + +/* Users of the legacy API (tipc-config) can't handle that we add operations, + * so we have a separate genl handling for the new API. + */ +struct genl_family tipc_genl_family = { + .id = GENL_ID_GENERATE, + .name = TIPC_GENL_V2_NAME, + .version = TIPC_GENL_V2_VERSION, + .hdrsize = 0, + .maxattr = TIPC_NLA_MAX, + .netnsok = true, +}; + +static const struct genl_ops tipc_genl_v2_ops[] = { + { + .cmd = TIPC_NL_BEARER_DISABLE, + .doit = tipc_nl_bearer_disable, + .policy = tipc_nl_policy, + }, + { + .cmd = TIPC_NL_BEARER_ENABLE, + .doit = tipc_nl_bearer_enable, + .policy = tipc_nl_policy, + }, + { + .cmd = TIPC_NL_BEARER_GET, + .doit = tipc_nl_bearer_get, + .dumpit = tipc_nl_bearer_dump, + .policy = tipc_nl_policy, + }, + { + .cmd = TIPC_NL_BEARER_SET, + .doit = tipc_nl_bearer_set, + .policy = tipc_nl_policy, + }, + { + .cmd = TIPC_NL_SOCK_GET, + .dumpit = tipc_nl_sk_dump, + .policy = tipc_nl_policy, + }, + { + .cmd = TIPC_NL_PUBL_GET, + .dumpit = tipc_nl_publ_dump, + .policy = tipc_nl_policy, + }, + { + .cmd = TIPC_NL_LINK_GET, + .doit = tipc_nl_link_get, + .dumpit = tipc_nl_link_dump, + .policy = tipc_nl_policy, + }, + { + .cmd = TIPC_NL_LINK_SET, + .doit = tipc_nl_link_set, + .policy = tipc_nl_policy, + }, + { + .cmd = TIPC_NL_LINK_RESET_STATS, + .doit = tipc_nl_link_reset_stats, + .policy = tipc_nl_policy, + }, + { + .cmd = TIPC_NL_MEDIA_GET, + .doit = tipc_nl_media_get, + .dumpit = tipc_nl_media_dump, + .policy = tipc_nl_policy, + }, + { + .cmd = TIPC_NL_MEDIA_SET, + .doit = tipc_nl_media_set, + .policy = tipc_nl_policy, + }, + { + .cmd = TIPC_NL_NODE_GET, + .dumpit = tipc_nl_node_dump, + .policy = tipc_nl_policy, + }, + { + .cmd = TIPC_NL_NET_GET, + .dumpit = tipc_nl_net_dump, + .policy = tipc_nl_policy, + }, + { + .cmd = TIPC_NL_NET_SET, + .doit = tipc_nl_net_set, + .policy = tipc_nl_policy, + }, + { + .cmd = TIPC_NL_NAME_TABLE_GET, + .dumpit = tipc_nl_name_table_dump, + .policy = tipc_nl_policy, + } +}; + +int tipc_nlmsg_parse(const struct nlmsghdr *nlh, struct nlattr ***attr) +{ + u32 maxattr = tipc_genl_family.maxattr; + + *attr = tipc_genl_family.attrbuf; + if (!*attr) + return -EOPNOTSUPP; + + return nlmsg_parse(nlh, GENL_HDRLEN, *attr, maxattr, tipc_nl_policy); +} + +int tipc_netlink_start(void) +{ + int res; + + res = genl_register_family_with_ops(&tipc_genl_family, + tipc_genl_v2_ops); + if (res) { + pr_err("Failed to register netlink interface\n"); + return res; + } + return 0; +} + +void tipc_netlink_stop(void) +{ + genl_unregister_family(&tipc_genl_family); +} diff --git a/kernel/net/tipc/netlink.h b/kernel/net/tipc/netlink.h new file mode 100644 index 000000000..08a1db67b --- /dev/null +++ b/kernel/net/tipc/netlink.h @@ -0,0 +1,53 @@ +/* + * net/tipc/netlink.h: Include file for TIPC netlink code + * + * Copyright (c) 2014, Ericsson AB + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _TIPC_NETLINK_H +#define _TIPC_NETLINK_H + +extern struct genl_family tipc_genl_family; +int tipc_nlmsg_parse(const struct nlmsghdr *nlh, struct nlattr ***buf); + +struct tipc_nl_msg { + struct sk_buff *skb; + u32 portid; + u32 seq; +}; + +int tipc_netlink_start(void); +int tipc_netlink_compat_start(void); +void tipc_netlink_stop(void); +void tipc_netlink_compat_stop(void); + +#endif diff --git a/kernel/net/tipc/netlink_compat.c b/kernel/net/tipc/netlink_compat.c new file mode 100644 index 000000000..ce9121e8e --- /dev/null +++ b/kernel/net/tipc/netlink_compat.c @@ -0,0 +1,1084 @@ +/* + * Copyright (c) 2014, Ericsson AB + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "core.h" +#include "bearer.h" +#include "link.h" +#include "name_table.h" +#include "socket.h" +#include "node.h" +#include "net.h" +#include <net/genetlink.h> +#include <linux/tipc_config.h> + +/* The legacy API had an artificial message length limit called + * ULTRA_STRING_MAX_LEN. + */ +#define ULTRA_STRING_MAX_LEN 32768 + +#define TIPC_SKB_MAX TLV_SPACE(ULTRA_STRING_MAX_LEN) + +#define REPLY_TRUNCATED "<truncated>\n" + +struct tipc_nl_compat_msg { + u16 cmd; + int rep_type; + int rep_size; + int req_type; + struct sk_buff *rep; + struct tlv_desc *req; + struct sock *dst_sk; +}; + +struct tipc_nl_compat_cmd_dump { + int (*header)(struct tipc_nl_compat_msg *); + int (*dumpit)(struct sk_buff *, struct netlink_callback *); + int (*format)(struct tipc_nl_compat_msg *msg, struct nlattr **attrs); +}; + +struct tipc_nl_compat_cmd_doit { + int (*doit)(struct sk_buff *skb, struct genl_info *info); + int (*transcode)(struct sk_buff *skb, struct tipc_nl_compat_msg *msg); +}; + +static int tipc_skb_tailroom(struct sk_buff *skb) +{ + int tailroom; + int limit; + + tailroom = skb_tailroom(skb); + limit = TIPC_SKB_MAX - skb->len; + + if (tailroom < limit) + return tailroom; + + return limit; +} + +static int tipc_add_tlv(struct sk_buff *skb, u16 type, void *data, u16 len) +{ + struct tlv_desc *tlv = (struct tlv_desc *)skb_tail_pointer(skb); + + if (tipc_skb_tailroom(skb) < TLV_SPACE(len)) + return -EMSGSIZE; + + skb_put(skb, TLV_SPACE(len)); + tlv->tlv_type = htons(type); + tlv->tlv_len = htons(TLV_LENGTH(len)); + if (len && data) + memcpy(TLV_DATA(tlv), data, len); + + return 0; +} + +static void tipc_tlv_init(struct sk_buff *skb, u16 type) +{ + struct tlv_desc *tlv = (struct tlv_desc *)skb->data; + + TLV_SET_LEN(tlv, 0); + TLV_SET_TYPE(tlv, type); + skb_put(skb, sizeof(struct tlv_desc)); +} + +static int tipc_tlv_sprintf(struct sk_buff *skb, const char *fmt, ...) +{ + int n; + u16 len; + u32 rem; + char *buf; + struct tlv_desc *tlv; + va_list args; + + rem = tipc_skb_tailroom(skb); + + tlv = (struct tlv_desc *)skb->data; + len = TLV_GET_LEN(tlv); + buf = TLV_DATA(tlv) + len; + + va_start(args, fmt); + n = vscnprintf(buf, rem, fmt, args); + va_end(args); + + TLV_SET_LEN(tlv, n + len); + skb_put(skb, n); + + return n; +} + +static struct sk_buff *tipc_tlv_alloc(int size) +{ + int hdr_len; + struct sk_buff *buf; + + size = TLV_SPACE(size); + hdr_len = nlmsg_total_size(GENL_HDRLEN + TIPC_GENL_HDRLEN); + + buf = alloc_skb(hdr_len + size, GFP_KERNEL); + if (!buf) + return NULL; + + skb_reserve(buf, hdr_len); + + return buf; +} + +static struct sk_buff *tipc_get_err_tlv(char *str) +{ + int str_len = strlen(str) + 1; + struct sk_buff *buf; + + buf = tipc_tlv_alloc(TLV_SPACE(str_len)); + if (buf) + tipc_add_tlv(buf, TIPC_TLV_ERROR_STRING, str, str_len); + + return buf; +} + +static int __tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd, + struct tipc_nl_compat_msg *msg, + struct sk_buff *arg) +{ + int len = 0; + int err; + struct sk_buff *buf; + struct nlmsghdr *nlmsg; + struct netlink_callback cb; + + memset(&cb, 0, sizeof(cb)); + cb.nlh = (struct nlmsghdr *)arg->data; + cb.skb = arg; + + buf = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + buf->sk = msg->dst_sk; + + do { + int rem; + + len = (*cmd->dumpit)(buf, &cb); + + nlmsg_for_each_msg(nlmsg, nlmsg_hdr(buf), len, rem) { + struct nlattr **attrs; + + err = tipc_nlmsg_parse(nlmsg, &attrs); + if (err) + goto err_out; + + err = (*cmd->format)(msg, attrs); + if (err) + goto err_out; + + if (tipc_skb_tailroom(msg->rep) <= 1) { + err = -EMSGSIZE; + goto err_out; + } + } + + skb_reset_tail_pointer(buf); + buf->len = 0; + + } while (len); + + err = 0; + +err_out: + kfree_skb(buf); + + if (err == -EMSGSIZE) { + /* The legacy API only considered messages filling + * "ULTRA_STRING_MAX_LEN" to be truncated. + */ + if ((TIPC_SKB_MAX - msg->rep->len) <= 1) { + char *tail = skb_tail_pointer(msg->rep); + + if (*tail != '\0') + sprintf(tail - sizeof(REPLY_TRUNCATED) - 1, + REPLY_TRUNCATED); + } + + return 0; + } + + return err; +} + +static int tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd, + struct tipc_nl_compat_msg *msg) +{ + int err; + struct sk_buff *arg; + + if (msg->req_type && !TLV_CHECK_TYPE(msg->req, msg->req_type)) + return -EINVAL; + + msg->rep = tipc_tlv_alloc(msg->rep_size); + if (!msg->rep) + return -ENOMEM; + + if (msg->rep_type) + tipc_tlv_init(msg->rep, msg->rep_type); + + if (cmd->header) + (*cmd->header)(msg); + + arg = nlmsg_new(0, GFP_KERNEL); + if (!arg) { + kfree_skb(msg->rep); + return -ENOMEM; + } + + err = __tipc_nl_compat_dumpit(cmd, msg, arg); + if (err) + kfree_skb(msg->rep); + + kfree_skb(arg); + + return err; +} + +static int __tipc_nl_compat_doit(struct tipc_nl_compat_cmd_doit *cmd, + struct tipc_nl_compat_msg *msg) +{ + int err; + struct sk_buff *doit_buf; + struct sk_buff *trans_buf; + struct nlattr **attrbuf; + struct genl_info info; + + trans_buf = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (!trans_buf) + return -ENOMEM; + + err = (*cmd->transcode)(trans_buf, msg); + if (err) + goto trans_out; + + attrbuf = kmalloc((tipc_genl_family.maxattr + 1) * + sizeof(struct nlattr *), GFP_KERNEL); + if (!attrbuf) { + err = -ENOMEM; + goto trans_out; + } + + err = nla_parse(attrbuf, tipc_genl_family.maxattr, + (const struct nlattr *)trans_buf->data, + trans_buf->len, NULL); + if (err) + goto parse_out; + + doit_buf = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (!doit_buf) { + err = -ENOMEM; + goto parse_out; + } + + doit_buf->sk = msg->dst_sk; + + memset(&info, 0, sizeof(info)); + info.attrs = attrbuf; + + err = (*cmd->doit)(doit_buf, &info); + + kfree_skb(doit_buf); +parse_out: + kfree(attrbuf); +trans_out: + kfree_skb(trans_buf); + + return err; +} + +static int tipc_nl_compat_doit(struct tipc_nl_compat_cmd_doit *cmd, + struct tipc_nl_compat_msg *msg) +{ + int err; + + if (msg->req_type && !TLV_CHECK_TYPE(msg->req, msg->req_type)) + return -EINVAL; + + err = __tipc_nl_compat_doit(cmd, msg); + if (err) + return err; + + /* The legacy API considered an empty message a success message */ + msg->rep = tipc_tlv_alloc(0); + if (!msg->rep) + return -ENOMEM; + + return 0; +} + +static int tipc_nl_compat_bearer_dump(struct tipc_nl_compat_msg *msg, + struct nlattr **attrs) +{ + struct nlattr *bearer[TIPC_NLA_BEARER_MAX + 1]; + + nla_parse_nested(bearer, TIPC_NLA_BEARER_MAX, attrs[TIPC_NLA_BEARER], + NULL); + + return tipc_add_tlv(msg->rep, TIPC_TLV_BEARER_NAME, + nla_data(bearer[TIPC_NLA_BEARER_NAME]), + nla_len(bearer[TIPC_NLA_BEARER_NAME])); +} + +static int tipc_nl_compat_bearer_enable(struct sk_buff *skb, + struct tipc_nl_compat_msg *msg) +{ + struct nlattr *prop; + struct nlattr *bearer; + struct tipc_bearer_config *b; + + b = (struct tipc_bearer_config *)TLV_DATA(msg->req); + + bearer = nla_nest_start(skb, TIPC_NLA_BEARER); + if (!bearer) + return -EMSGSIZE; + + if (nla_put_string(skb, TIPC_NLA_BEARER_NAME, b->name)) + return -EMSGSIZE; + + if (nla_put_u32(skb, TIPC_NLA_BEARER_DOMAIN, ntohl(b->disc_domain))) + return -EMSGSIZE; + + if (ntohl(b->priority) <= TIPC_MAX_LINK_PRI) { + prop = nla_nest_start(skb, TIPC_NLA_BEARER_PROP); + if (!prop) + return -EMSGSIZE; + if (nla_put_u32(skb, TIPC_NLA_PROP_PRIO, ntohl(b->priority))) + return -EMSGSIZE; + nla_nest_end(skb, prop); + } + nla_nest_end(skb, bearer); + + return 0; +} + +static int tipc_nl_compat_bearer_disable(struct sk_buff *skb, + struct tipc_nl_compat_msg *msg) +{ + char *name; + struct nlattr *bearer; + + name = (char *)TLV_DATA(msg->req); + + bearer = nla_nest_start(skb, TIPC_NLA_BEARER); + if (!bearer) + return -EMSGSIZE; + + if (nla_put_string(skb, TIPC_NLA_BEARER_NAME, name)) + return -EMSGSIZE; + + nla_nest_end(skb, bearer); + + return 0; +} + +static inline u32 perc(u32 count, u32 total) +{ + return (count * 100 + (total / 2)) / total; +} + +static void __fill_bc_link_stat(struct tipc_nl_compat_msg *msg, + struct nlattr *prop[], struct nlattr *stats[]) +{ + tipc_tlv_sprintf(msg->rep, " Window:%u packets\n", + nla_get_u32(prop[TIPC_NLA_PROP_WIN])); + + tipc_tlv_sprintf(msg->rep, + " RX packets:%u fragments:%u/%u bundles:%u/%u\n", + nla_get_u32(stats[TIPC_NLA_STATS_RX_INFO]), + nla_get_u32(stats[TIPC_NLA_STATS_RX_FRAGMENTS]), + nla_get_u32(stats[TIPC_NLA_STATS_RX_FRAGMENTED]), + nla_get_u32(stats[TIPC_NLA_STATS_RX_BUNDLES]), + nla_get_u32(stats[TIPC_NLA_STATS_RX_BUNDLED])); + + tipc_tlv_sprintf(msg->rep, + " TX packets:%u fragments:%u/%u bundles:%u/%u\n", + nla_get_u32(stats[TIPC_NLA_STATS_TX_INFO]), + nla_get_u32(stats[TIPC_NLA_STATS_TX_FRAGMENTS]), + nla_get_u32(stats[TIPC_NLA_STATS_TX_FRAGMENTED]), + nla_get_u32(stats[TIPC_NLA_STATS_TX_BUNDLES]), + nla_get_u32(stats[TIPC_NLA_STATS_TX_BUNDLED])); + + tipc_tlv_sprintf(msg->rep, " RX naks:%u defs:%u dups:%u\n", + nla_get_u32(stats[TIPC_NLA_STATS_RX_NACKS]), + nla_get_u32(stats[TIPC_NLA_STATS_RX_DEFERRED]), + nla_get_u32(stats[TIPC_NLA_STATS_DUPLICATES])); + + tipc_tlv_sprintf(msg->rep, " TX naks:%u acks:%u dups:%u\n", + nla_get_u32(stats[TIPC_NLA_STATS_TX_NACKS]), + nla_get_u32(stats[TIPC_NLA_STATS_TX_ACKS]), + nla_get_u32(stats[TIPC_NLA_STATS_RETRANSMITTED])); + + tipc_tlv_sprintf(msg->rep, + " Congestion link:%u Send queue max:%u avg:%u", + nla_get_u32(stats[TIPC_NLA_STATS_LINK_CONGS]), + nla_get_u32(stats[TIPC_NLA_STATS_MAX_QUEUE]), + nla_get_u32(stats[TIPC_NLA_STATS_AVG_QUEUE])); +} + +static int tipc_nl_compat_link_stat_dump(struct tipc_nl_compat_msg *msg, + struct nlattr **attrs) +{ + char *name; + struct nlattr *link[TIPC_NLA_LINK_MAX + 1]; + struct nlattr *prop[TIPC_NLA_PROP_MAX + 1]; + struct nlattr *stats[TIPC_NLA_STATS_MAX + 1]; + + nla_parse_nested(link, TIPC_NLA_LINK_MAX, attrs[TIPC_NLA_LINK], NULL); + + nla_parse_nested(prop, TIPC_NLA_PROP_MAX, link[TIPC_NLA_LINK_PROP], + NULL); + + nla_parse_nested(stats, TIPC_NLA_STATS_MAX, link[TIPC_NLA_LINK_STATS], + NULL); + + name = (char *)TLV_DATA(msg->req); + if (strcmp(name, nla_data(link[TIPC_NLA_LINK_NAME])) != 0) + return 0; + + tipc_tlv_sprintf(msg->rep, "\nLink <%s>\n", + nla_data(link[TIPC_NLA_LINK_NAME])); + + if (link[TIPC_NLA_LINK_BROADCAST]) { + __fill_bc_link_stat(msg, prop, stats); + return 0; + } + + if (link[TIPC_NLA_LINK_ACTIVE]) + tipc_tlv_sprintf(msg->rep, " ACTIVE"); + else if (link[TIPC_NLA_LINK_UP]) + tipc_tlv_sprintf(msg->rep, " STANDBY"); + else + tipc_tlv_sprintf(msg->rep, " DEFUNCT"); + + tipc_tlv_sprintf(msg->rep, " MTU:%u Priority:%u", + nla_get_u32(link[TIPC_NLA_LINK_MTU]), + nla_get_u32(prop[TIPC_NLA_PROP_PRIO])); + + tipc_tlv_sprintf(msg->rep, " Tolerance:%u ms Window:%u packets\n", + nla_get_u32(prop[TIPC_NLA_PROP_TOL]), + nla_get_u32(prop[TIPC_NLA_PROP_WIN])); + + tipc_tlv_sprintf(msg->rep, + " RX packets:%u fragments:%u/%u bundles:%u/%u\n", + nla_get_u32(link[TIPC_NLA_LINK_RX]) - + nla_get_u32(stats[TIPC_NLA_STATS_RX_INFO]), + nla_get_u32(stats[TIPC_NLA_STATS_RX_FRAGMENTS]), + nla_get_u32(stats[TIPC_NLA_STATS_RX_FRAGMENTED]), + nla_get_u32(stats[TIPC_NLA_STATS_RX_BUNDLES]), + nla_get_u32(stats[TIPC_NLA_STATS_RX_BUNDLED])); + + tipc_tlv_sprintf(msg->rep, + " TX packets:%u fragments:%u/%u bundles:%u/%u\n", + nla_get_u32(link[TIPC_NLA_LINK_TX]) - + nla_get_u32(stats[TIPC_NLA_STATS_TX_INFO]), + nla_get_u32(stats[TIPC_NLA_STATS_TX_FRAGMENTS]), + nla_get_u32(stats[TIPC_NLA_STATS_TX_FRAGMENTED]), + nla_get_u32(stats[TIPC_NLA_STATS_TX_BUNDLES]), + nla_get_u32(stats[TIPC_NLA_STATS_TX_BUNDLED])); + + tipc_tlv_sprintf(msg->rep, + " TX profile sample:%u packets average:%u octets\n", + nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_CNT]), + nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_TOT]) / + nla_get_u32(stats[TIPC_NLA_STATS_MSG_PROF_TOT])); + + tipc_tlv_sprintf(msg->rep, + " 0-64:%u%% -256:%u%% -1024:%u%% -4096:%u%% ", + perc(nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_P0]), + nla_get_u32(stats[TIPC_NLA_STATS_MSG_PROF_TOT])), + perc(nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_P1]), + nla_get_u32(stats[TIPC_NLA_STATS_MSG_PROF_TOT])), + perc(nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_P2]), + nla_get_u32(stats[TIPC_NLA_STATS_MSG_PROF_TOT])), + perc(nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_P3]), + nla_get_u32(stats[TIPC_NLA_STATS_MSG_PROF_TOT]))); + + tipc_tlv_sprintf(msg->rep, "-16384:%u%% -32768:%u%% -66000:%u%%\n", + perc(nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_P4]), + nla_get_u32(stats[TIPC_NLA_STATS_MSG_PROF_TOT])), + perc(nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_P5]), + nla_get_u32(stats[TIPC_NLA_STATS_MSG_PROF_TOT])), + perc(nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_P6]), + nla_get_u32(stats[TIPC_NLA_STATS_MSG_PROF_TOT]))); + + tipc_tlv_sprintf(msg->rep, + " RX states:%u probes:%u naks:%u defs:%u dups:%u\n", + nla_get_u32(stats[TIPC_NLA_STATS_RX_STATES]), + nla_get_u32(stats[TIPC_NLA_STATS_RX_PROBES]), + nla_get_u32(stats[TIPC_NLA_STATS_RX_NACKS]), + nla_get_u32(stats[TIPC_NLA_STATS_RX_DEFERRED]), + nla_get_u32(stats[TIPC_NLA_STATS_DUPLICATES])); + + tipc_tlv_sprintf(msg->rep, + " TX states:%u probes:%u naks:%u acks:%u dups:%u\n", + nla_get_u32(stats[TIPC_NLA_STATS_TX_STATES]), + nla_get_u32(stats[TIPC_NLA_STATS_TX_PROBES]), + nla_get_u32(stats[TIPC_NLA_STATS_TX_NACKS]), + nla_get_u32(stats[TIPC_NLA_STATS_TX_ACKS]), + nla_get_u32(stats[TIPC_NLA_STATS_RETRANSMITTED])); + + tipc_tlv_sprintf(msg->rep, + " Congestion link:%u Send queue max:%u avg:%u", + nla_get_u32(stats[TIPC_NLA_STATS_LINK_CONGS]), + nla_get_u32(stats[TIPC_NLA_STATS_MAX_QUEUE]), + nla_get_u32(stats[TIPC_NLA_STATS_AVG_QUEUE])); + + return 0; +} + +static int tipc_nl_compat_link_dump(struct tipc_nl_compat_msg *msg, + struct nlattr **attrs) +{ + struct nlattr *link[TIPC_NLA_LINK_MAX + 1]; + struct tipc_link_info link_info; + + nla_parse_nested(link, TIPC_NLA_LINK_MAX, attrs[TIPC_NLA_LINK], NULL); + + link_info.dest = nla_get_flag(link[TIPC_NLA_LINK_DEST]); + link_info.up = htonl(nla_get_flag(link[TIPC_NLA_LINK_UP])); + strcpy(link_info.str, nla_data(link[TIPC_NLA_LINK_NAME])); + + return tipc_add_tlv(msg->rep, TIPC_TLV_LINK_INFO, + &link_info, sizeof(link_info)); +} + +static int tipc_nl_compat_link_set(struct sk_buff *skb, + struct tipc_nl_compat_msg *msg) +{ + struct nlattr *link; + struct nlattr *prop; + struct tipc_link_config *lc; + + lc = (struct tipc_link_config *)TLV_DATA(msg->req); + + link = nla_nest_start(skb, TIPC_NLA_LINK); + if (!link) + return -EMSGSIZE; + + if (nla_put_string(skb, TIPC_NLA_LINK_NAME, lc->name)) + return -EMSGSIZE; + + prop = nla_nest_start(skb, TIPC_NLA_LINK_PROP); + if (!prop) + return -EMSGSIZE; + + if (msg->cmd == TIPC_CMD_SET_LINK_PRI) { + if (nla_put_u32(skb, TIPC_NLA_PROP_PRIO, ntohl(lc->value))) + return -EMSGSIZE; + } else if (msg->cmd == TIPC_CMD_SET_LINK_TOL) { + if (nla_put_u32(skb, TIPC_NLA_PROP_TOL, ntohl(lc->value))) + return -EMSGSIZE; + } else if (msg->cmd == TIPC_CMD_SET_LINK_WINDOW) { + if (nla_put_u32(skb, TIPC_NLA_PROP_WIN, ntohl(lc->value))) + return -EMSGSIZE; + } + + nla_nest_end(skb, prop); + nla_nest_end(skb, link); + + return 0; +} + +static int tipc_nl_compat_link_reset_stats(struct sk_buff *skb, + struct tipc_nl_compat_msg *msg) +{ + char *name; + struct nlattr *link; + + name = (char *)TLV_DATA(msg->req); + + link = nla_nest_start(skb, TIPC_NLA_LINK); + if (!link) + return -EMSGSIZE; + + if (nla_put_string(skb, TIPC_NLA_LINK_NAME, name)) + return -EMSGSIZE; + + nla_nest_end(skb, link); + + return 0; +} + +static int tipc_nl_compat_name_table_dump_header(struct tipc_nl_compat_msg *msg) +{ + int i; + u32 depth; + struct tipc_name_table_query *ntq; + static const char * const header[] = { + "Type ", + "Lower Upper ", + "Port Identity ", + "Publication Scope" + }; + + ntq = (struct tipc_name_table_query *)TLV_DATA(msg->req); + + depth = ntohl(ntq->depth); + + if (depth > 4) + depth = 4; + for (i = 0; i < depth; i++) + tipc_tlv_sprintf(msg->rep, header[i]); + tipc_tlv_sprintf(msg->rep, "\n"); + + return 0; +} + +static int tipc_nl_compat_name_table_dump(struct tipc_nl_compat_msg *msg, + struct nlattr **attrs) +{ + char port_str[27]; + struct tipc_name_table_query *ntq; + struct nlattr *nt[TIPC_NLA_NAME_TABLE_MAX + 1]; + struct nlattr *publ[TIPC_NLA_PUBL_MAX + 1]; + u32 node, depth, type, lowbound, upbound; + static const char * const scope_str[] = {"", " zone", " cluster", + " node"}; + + nla_parse_nested(nt, TIPC_NLA_NAME_TABLE_MAX, + attrs[TIPC_NLA_NAME_TABLE], NULL); + + nla_parse_nested(publ, TIPC_NLA_PUBL_MAX, nt[TIPC_NLA_NAME_TABLE_PUBL], + NULL); + + ntq = (struct tipc_name_table_query *)TLV_DATA(msg->req); + + depth = ntohl(ntq->depth); + type = ntohl(ntq->type); + lowbound = ntohl(ntq->lowbound); + upbound = ntohl(ntq->upbound); + + if (!(depth & TIPC_NTQ_ALLTYPES) && + (type != nla_get_u32(publ[TIPC_NLA_PUBL_TYPE]))) + return 0; + if (lowbound && (lowbound > nla_get_u32(publ[TIPC_NLA_PUBL_UPPER]))) + return 0; + if (upbound && (upbound < nla_get_u32(publ[TIPC_NLA_PUBL_LOWER]))) + return 0; + + tipc_tlv_sprintf(msg->rep, "%-10u ", + nla_get_u32(publ[TIPC_NLA_PUBL_TYPE])); + + if (depth == 1) + goto out; + + tipc_tlv_sprintf(msg->rep, "%-10u %-10u ", + nla_get_u32(publ[TIPC_NLA_PUBL_LOWER]), + nla_get_u32(publ[TIPC_NLA_PUBL_UPPER])); + + if (depth == 2) + goto out; + + node = nla_get_u32(publ[TIPC_NLA_PUBL_NODE]); + sprintf(port_str, "<%u.%u.%u:%u>", tipc_zone(node), tipc_cluster(node), + tipc_node(node), nla_get_u32(publ[TIPC_NLA_PUBL_REF])); + tipc_tlv_sprintf(msg->rep, "%-26s ", port_str); + + if (depth == 3) + goto out; + + tipc_tlv_sprintf(msg->rep, "%-10u %s", + nla_get_u32(publ[TIPC_NLA_PUBL_REF]), + scope_str[nla_get_u32(publ[TIPC_NLA_PUBL_SCOPE])]); +out: + tipc_tlv_sprintf(msg->rep, "\n"); + + return 0; +} + +static int __tipc_nl_compat_publ_dump(struct tipc_nl_compat_msg *msg, + struct nlattr **attrs) +{ + u32 type, lower, upper; + struct nlattr *publ[TIPC_NLA_PUBL_MAX + 1]; + + nla_parse_nested(publ, TIPC_NLA_PUBL_MAX, attrs[TIPC_NLA_PUBL], NULL); + + type = nla_get_u32(publ[TIPC_NLA_PUBL_TYPE]); + lower = nla_get_u32(publ[TIPC_NLA_PUBL_LOWER]); + upper = nla_get_u32(publ[TIPC_NLA_PUBL_UPPER]); + + if (lower == upper) + tipc_tlv_sprintf(msg->rep, " {%u,%u}", type, lower); + else + tipc_tlv_sprintf(msg->rep, " {%u,%u,%u}", type, lower, upper); + + return 0; +} + +static int tipc_nl_compat_publ_dump(struct tipc_nl_compat_msg *msg, u32 sock) +{ + int err; + void *hdr; + struct nlattr *nest; + struct sk_buff *args; + struct tipc_nl_compat_cmd_dump dump; + + args = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!args) + return -ENOMEM; + + hdr = genlmsg_put(args, 0, 0, &tipc_genl_family, NLM_F_MULTI, + TIPC_NL_PUBL_GET); + + nest = nla_nest_start(args, TIPC_NLA_SOCK); + if (!nest) { + kfree_skb(args); + return -EMSGSIZE; + } + + if (nla_put_u32(args, TIPC_NLA_SOCK_REF, sock)) { + kfree_skb(args); + return -EMSGSIZE; + } + + nla_nest_end(args, nest); + genlmsg_end(args, hdr); + + dump.dumpit = tipc_nl_publ_dump; + dump.format = __tipc_nl_compat_publ_dump; + + err = __tipc_nl_compat_dumpit(&dump, msg, args); + + kfree_skb(args); + + return err; +} + +static int tipc_nl_compat_sk_dump(struct tipc_nl_compat_msg *msg, + struct nlattr **attrs) +{ + int err; + u32 sock_ref; + struct nlattr *sock[TIPC_NLA_SOCK_MAX + 1]; + + nla_parse_nested(sock, TIPC_NLA_SOCK_MAX, attrs[TIPC_NLA_SOCK], NULL); + + sock_ref = nla_get_u32(sock[TIPC_NLA_SOCK_REF]); + tipc_tlv_sprintf(msg->rep, "%u:", sock_ref); + + if (sock[TIPC_NLA_SOCK_CON]) { + u32 node; + struct nlattr *con[TIPC_NLA_CON_MAX + 1]; + + nla_parse_nested(con, TIPC_NLA_CON_MAX, sock[TIPC_NLA_SOCK_CON], + NULL); + + node = nla_get_u32(con[TIPC_NLA_CON_NODE]); + tipc_tlv_sprintf(msg->rep, " connected to <%u.%u.%u:%u>", + tipc_zone(node), + tipc_cluster(node), + tipc_node(node), + nla_get_u32(con[TIPC_NLA_CON_SOCK])); + + if (con[TIPC_NLA_CON_FLAG]) + tipc_tlv_sprintf(msg->rep, " via {%u,%u}\n", + nla_get_u32(con[TIPC_NLA_CON_TYPE]), + nla_get_u32(con[TIPC_NLA_CON_INST])); + else + tipc_tlv_sprintf(msg->rep, "\n"); + } else if (sock[TIPC_NLA_SOCK_HAS_PUBL]) { + tipc_tlv_sprintf(msg->rep, " bound to"); + + err = tipc_nl_compat_publ_dump(msg, sock_ref); + if (err) + return err; + } + tipc_tlv_sprintf(msg->rep, "\n"); + + return 0; +} + +static int tipc_nl_compat_media_dump(struct tipc_nl_compat_msg *msg, + struct nlattr **attrs) +{ + struct nlattr *media[TIPC_NLA_MEDIA_MAX + 1]; + + nla_parse_nested(media, TIPC_NLA_MEDIA_MAX, attrs[TIPC_NLA_MEDIA], + NULL); + + return tipc_add_tlv(msg->rep, TIPC_TLV_MEDIA_NAME, + nla_data(media[TIPC_NLA_MEDIA_NAME]), + nla_len(media[TIPC_NLA_MEDIA_NAME])); +} + +static int tipc_nl_compat_node_dump(struct tipc_nl_compat_msg *msg, + struct nlattr **attrs) +{ + struct tipc_node_info node_info; + struct nlattr *node[TIPC_NLA_NODE_MAX + 1]; + + nla_parse_nested(node, TIPC_NLA_NODE_MAX, attrs[TIPC_NLA_NODE], NULL); + + node_info.addr = htonl(nla_get_u32(node[TIPC_NLA_NODE_ADDR])); + node_info.up = htonl(nla_get_flag(node[TIPC_NLA_NODE_UP])); + + return tipc_add_tlv(msg->rep, TIPC_TLV_NODE_INFO, &node_info, + sizeof(node_info)); +} + +static int tipc_nl_compat_net_set(struct sk_buff *skb, + struct tipc_nl_compat_msg *msg) +{ + u32 val; + struct nlattr *net; + + val = ntohl(*(__be32 *)TLV_DATA(msg->req)); + + net = nla_nest_start(skb, TIPC_NLA_NET); + if (!net) + return -EMSGSIZE; + + if (msg->cmd == TIPC_CMD_SET_NODE_ADDR) { + if (nla_put_u32(skb, TIPC_NLA_NET_ADDR, val)) + return -EMSGSIZE; + } else if (msg->cmd == TIPC_CMD_SET_NETID) { + if (nla_put_u32(skb, TIPC_NLA_NET_ID, val)) + return -EMSGSIZE; + } + nla_nest_end(skb, net); + + return 0; +} + +static int tipc_nl_compat_net_dump(struct tipc_nl_compat_msg *msg, + struct nlattr **attrs) +{ + __be32 id; + struct nlattr *net[TIPC_NLA_NET_MAX + 1]; + + nla_parse_nested(net, TIPC_NLA_NET_MAX, attrs[TIPC_NLA_NET], NULL); + id = htonl(nla_get_u32(net[TIPC_NLA_NET_ID])); + + return tipc_add_tlv(msg->rep, TIPC_TLV_UNSIGNED, &id, sizeof(id)); +} + +static int tipc_cmd_show_stats_compat(struct tipc_nl_compat_msg *msg) +{ + msg->rep = tipc_tlv_alloc(ULTRA_STRING_MAX_LEN); + if (!msg->rep) + return -ENOMEM; + + tipc_tlv_init(msg->rep, TIPC_TLV_ULTRA_STRING); + tipc_tlv_sprintf(msg->rep, "TIPC version " TIPC_MOD_VER "\n"); + + return 0; +} + +static int tipc_nl_compat_handle(struct tipc_nl_compat_msg *msg) +{ + struct tipc_nl_compat_cmd_dump dump; + struct tipc_nl_compat_cmd_doit doit; + + memset(&dump, 0, sizeof(dump)); + memset(&doit, 0, sizeof(doit)); + + switch (msg->cmd) { + case TIPC_CMD_NOOP: + msg->rep = tipc_tlv_alloc(0); + if (!msg->rep) + return -ENOMEM; + return 0; + case TIPC_CMD_GET_BEARER_NAMES: + msg->rep_size = MAX_BEARERS * TLV_SPACE(TIPC_MAX_BEARER_NAME); + dump.dumpit = tipc_nl_bearer_dump; + dump.format = tipc_nl_compat_bearer_dump; + return tipc_nl_compat_dumpit(&dump, msg); + case TIPC_CMD_ENABLE_BEARER: + msg->req_type = TIPC_TLV_BEARER_CONFIG; + doit.doit = tipc_nl_bearer_enable; + doit.transcode = tipc_nl_compat_bearer_enable; + return tipc_nl_compat_doit(&doit, msg); + case TIPC_CMD_DISABLE_BEARER: + msg->req_type = TIPC_TLV_BEARER_NAME; + doit.doit = tipc_nl_bearer_disable; + doit.transcode = tipc_nl_compat_bearer_disable; + return tipc_nl_compat_doit(&doit, msg); + case TIPC_CMD_SHOW_LINK_STATS: + msg->req_type = TIPC_TLV_LINK_NAME; + msg->rep_size = ULTRA_STRING_MAX_LEN; + msg->rep_type = TIPC_TLV_ULTRA_STRING; + dump.dumpit = tipc_nl_link_dump; + dump.format = tipc_nl_compat_link_stat_dump; + return tipc_nl_compat_dumpit(&dump, msg); + case TIPC_CMD_GET_LINKS: + msg->req_type = TIPC_TLV_NET_ADDR; + msg->rep_size = ULTRA_STRING_MAX_LEN; + dump.dumpit = tipc_nl_link_dump; + dump.format = tipc_nl_compat_link_dump; + return tipc_nl_compat_dumpit(&dump, msg); + case TIPC_CMD_SET_LINK_TOL: + case TIPC_CMD_SET_LINK_PRI: + case TIPC_CMD_SET_LINK_WINDOW: + msg->req_type = TIPC_TLV_LINK_CONFIG; + doit.doit = tipc_nl_link_set; + doit.transcode = tipc_nl_compat_link_set; + return tipc_nl_compat_doit(&doit, msg); + case TIPC_CMD_RESET_LINK_STATS: + msg->req_type = TIPC_TLV_LINK_NAME; + doit.doit = tipc_nl_link_reset_stats; + doit.transcode = tipc_nl_compat_link_reset_stats; + return tipc_nl_compat_doit(&doit, msg); + case TIPC_CMD_SHOW_NAME_TABLE: + msg->req_type = TIPC_TLV_NAME_TBL_QUERY; + msg->rep_size = ULTRA_STRING_MAX_LEN; + msg->rep_type = TIPC_TLV_ULTRA_STRING; + dump.header = tipc_nl_compat_name_table_dump_header; + dump.dumpit = tipc_nl_name_table_dump; + dump.format = tipc_nl_compat_name_table_dump; + return tipc_nl_compat_dumpit(&dump, msg); + case TIPC_CMD_SHOW_PORTS: + msg->rep_size = ULTRA_STRING_MAX_LEN; + msg->rep_type = TIPC_TLV_ULTRA_STRING; + dump.dumpit = tipc_nl_sk_dump; + dump.format = tipc_nl_compat_sk_dump; + return tipc_nl_compat_dumpit(&dump, msg); + case TIPC_CMD_GET_MEDIA_NAMES: + msg->rep_size = MAX_MEDIA * TLV_SPACE(TIPC_MAX_MEDIA_NAME); + dump.dumpit = tipc_nl_media_dump; + dump.format = tipc_nl_compat_media_dump; + return tipc_nl_compat_dumpit(&dump, msg); + case TIPC_CMD_GET_NODES: + msg->rep_size = ULTRA_STRING_MAX_LEN; + dump.dumpit = tipc_nl_node_dump; + dump.format = tipc_nl_compat_node_dump; + return tipc_nl_compat_dumpit(&dump, msg); + case TIPC_CMD_SET_NODE_ADDR: + msg->req_type = TIPC_TLV_NET_ADDR; + doit.doit = tipc_nl_net_set; + doit.transcode = tipc_nl_compat_net_set; + return tipc_nl_compat_doit(&doit, msg); + case TIPC_CMD_SET_NETID: + msg->req_type = TIPC_TLV_UNSIGNED; + doit.doit = tipc_nl_net_set; + doit.transcode = tipc_nl_compat_net_set; + return tipc_nl_compat_doit(&doit, msg); + case TIPC_CMD_GET_NETID: + msg->rep_size = sizeof(u32); + dump.dumpit = tipc_nl_net_dump; + dump.format = tipc_nl_compat_net_dump; + return tipc_nl_compat_dumpit(&dump, msg); + case TIPC_CMD_SHOW_STATS: + return tipc_cmd_show_stats_compat(msg); + } + + return -EOPNOTSUPP; +} + +static int tipc_nl_compat_recv(struct sk_buff *skb, struct genl_info *info) +{ + int err; + int len; + struct tipc_nl_compat_msg msg; + struct nlmsghdr *req_nlh; + struct nlmsghdr *rep_nlh; + struct tipc_genlmsghdr *req_userhdr = info->userhdr; + struct net *net = genl_info_net(info); + + memset(&msg, 0, sizeof(msg)); + + req_nlh = (struct nlmsghdr *)skb->data; + msg.req = nlmsg_data(req_nlh) + GENL_HDRLEN + TIPC_GENL_HDRLEN; + msg.cmd = req_userhdr->cmd; + msg.dst_sk = info->dst_sk; + + if ((msg.cmd & 0xC000) && (!netlink_net_capable(skb, CAP_NET_ADMIN))) { + msg.rep = tipc_get_err_tlv(TIPC_CFG_NOT_NET_ADMIN); + err = -EACCES; + goto send; + } + + len = nlmsg_attrlen(req_nlh, GENL_HDRLEN + TIPC_GENL_HDRLEN); + if (TLV_GET_LEN(msg.req) && !TLV_OK(msg.req, len)) { + msg.rep = tipc_get_err_tlv(TIPC_CFG_NOT_SUPPORTED); + err = -EOPNOTSUPP; + goto send; + } + + err = tipc_nl_compat_handle(&msg); + if (err == -EOPNOTSUPP) + msg.rep = tipc_get_err_tlv(TIPC_CFG_NOT_SUPPORTED); + else if (err == -EINVAL) + msg.rep = tipc_get_err_tlv(TIPC_CFG_TLV_ERROR); +send: + if (!msg.rep) + return err; + + len = nlmsg_total_size(GENL_HDRLEN + TIPC_GENL_HDRLEN); + skb_push(msg.rep, len); + rep_nlh = nlmsg_hdr(msg.rep); + memcpy(rep_nlh, info->nlhdr, len); + rep_nlh->nlmsg_len = msg.rep->len; + genlmsg_unicast(net, msg.rep, NETLINK_CB(skb).portid); + + return err; +} + +static struct genl_family tipc_genl_compat_family = { + .id = GENL_ID_GENERATE, + .name = TIPC_GENL_NAME, + .version = TIPC_GENL_VERSION, + .hdrsize = TIPC_GENL_HDRLEN, + .maxattr = 0, + .netnsok = true, +}; + +static struct genl_ops tipc_genl_compat_ops[] = { + { + .cmd = TIPC_GENL_CMD, + .doit = tipc_nl_compat_recv, + }, +}; + +int tipc_netlink_compat_start(void) +{ + int res; + + res = genl_register_family_with_ops(&tipc_genl_compat_family, + tipc_genl_compat_ops); + if (res) { + pr_err("Failed to register legacy compat interface\n"); + return res; + } + + return 0; +} + +void tipc_netlink_compat_stop(void) +{ + genl_unregister_family(&tipc_genl_compat_family); +} diff --git a/kernel/net/tipc/node.c b/kernel/net/tipc/node.c new file mode 100644 index 000000000..22c059ad2 --- /dev/null +++ b/kernel/net/tipc/node.c @@ -0,0 +1,621 @@ +/* + * net/tipc/node.c: TIPC node management routines + * + * Copyright (c) 2000-2006, 2012-2014, Ericsson AB + * Copyright (c) 2005-2006, 2010-2014, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "core.h" +#include "link.h" +#include "node.h" +#include "name_distr.h" +#include "socket.h" + +static void node_lost_contact(struct tipc_node *n_ptr); +static void node_established_contact(struct tipc_node *n_ptr); +static void tipc_node_delete(struct tipc_node *node); + +struct tipc_sock_conn { + u32 port; + u32 peer_port; + u32 peer_node; + struct list_head list; +}; + +static const struct nla_policy tipc_nl_node_policy[TIPC_NLA_NODE_MAX + 1] = { + [TIPC_NLA_NODE_UNSPEC] = { .type = NLA_UNSPEC }, + [TIPC_NLA_NODE_ADDR] = { .type = NLA_U32 }, + [TIPC_NLA_NODE_UP] = { .type = NLA_FLAG } +}; + +/* + * A trivial power-of-two bitmask technique is used for speed, since this + * operation is done for every incoming TIPC packet. The number of hash table + * entries has been chosen so that no hash chain exceeds 8 nodes and will + * usually be much smaller (typically only a single node). + */ +static unsigned int tipc_hashfn(u32 addr) +{ + return addr & (NODE_HTABLE_SIZE - 1); +} + +static void tipc_node_kref_release(struct kref *kref) +{ + struct tipc_node *node = container_of(kref, struct tipc_node, kref); + + tipc_node_delete(node); +} + +void tipc_node_put(struct tipc_node *node) +{ + kref_put(&node->kref, tipc_node_kref_release); +} + +static void tipc_node_get(struct tipc_node *node) +{ + kref_get(&node->kref); +} + +/* + * tipc_node_find - locate specified node object, if it exists + */ +struct tipc_node *tipc_node_find(struct net *net, u32 addr) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_node *node; + + if (unlikely(!in_own_cluster_exact(net, addr))) + return NULL; + + rcu_read_lock(); + hlist_for_each_entry_rcu(node, &tn->node_htable[tipc_hashfn(addr)], + hash) { + if (node->addr == addr) { + tipc_node_get(node); + rcu_read_unlock(); + return node; + } + } + rcu_read_unlock(); + return NULL; +} + +struct tipc_node *tipc_node_create(struct net *net, u32 addr) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_node *n_ptr, *temp_node; + + spin_lock_bh(&tn->node_list_lock); + n_ptr = tipc_node_find(net, addr); + if (n_ptr) + goto exit; + n_ptr = kzalloc(sizeof(*n_ptr), GFP_ATOMIC); + if (!n_ptr) { + pr_warn("Node creation failed, no memory\n"); + goto exit; + } + n_ptr->addr = addr; + n_ptr->net = net; + kref_init(&n_ptr->kref); + spin_lock_init(&n_ptr->lock); + INIT_HLIST_NODE(&n_ptr->hash); + INIT_LIST_HEAD(&n_ptr->list); + INIT_LIST_HEAD(&n_ptr->publ_list); + INIT_LIST_HEAD(&n_ptr->conn_sks); + __skb_queue_head_init(&n_ptr->bclink.deferdq); + hlist_add_head_rcu(&n_ptr->hash, &tn->node_htable[tipc_hashfn(addr)]); + list_for_each_entry_rcu(temp_node, &tn->node_list, list) { + if (n_ptr->addr < temp_node->addr) + break; + } + list_add_tail_rcu(&n_ptr->list, &temp_node->list); + n_ptr->action_flags = TIPC_WAIT_PEER_LINKS_DOWN; + n_ptr->signature = INVALID_NODE_SIG; + tipc_node_get(n_ptr); +exit: + spin_unlock_bh(&tn->node_list_lock); + return n_ptr; +} + +static void tipc_node_delete(struct tipc_node *node) +{ + list_del_rcu(&node->list); + hlist_del_rcu(&node->hash); + kfree_rcu(node, rcu); +} + +void tipc_node_stop(struct net *net) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_node *node, *t_node; + + spin_lock_bh(&tn->node_list_lock); + list_for_each_entry_safe(node, t_node, &tn->node_list, list) + tipc_node_put(node); + spin_unlock_bh(&tn->node_list_lock); +} + +int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port) +{ + struct tipc_node *node; + struct tipc_sock_conn *conn; + int err = 0; + + if (in_own_node(net, dnode)) + return 0; + + node = tipc_node_find(net, dnode); + if (!node) { + pr_warn("Connecting sock to node 0x%x failed\n", dnode); + return -EHOSTUNREACH; + } + conn = kmalloc(sizeof(*conn), GFP_ATOMIC); + if (!conn) { + err = -EHOSTUNREACH; + goto exit; + } + conn->peer_node = dnode; + conn->port = port; + conn->peer_port = peer_port; + + tipc_node_lock(node); + list_add_tail(&conn->list, &node->conn_sks); + tipc_node_unlock(node); +exit: + tipc_node_put(node); + return err; +} + +void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port) +{ + struct tipc_node *node; + struct tipc_sock_conn *conn, *safe; + + if (in_own_node(net, dnode)) + return; + + node = tipc_node_find(net, dnode); + if (!node) + return; + + tipc_node_lock(node); + list_for_each_entry_safe(conn, safe, &node->conn_sks, list) { + if (port != conn->port) + continue; + list_del(&conn->list); + kfree(conn); + } + tipc_node_unlock(node); + tipc_node_put(node); +} + +/** + * tipc_node_link_up - handle addition of link + * + * Link becomes active (alone or shared) or standby, depending on its priority. + */ +void tipc_node_link_up(struct tipc_node *n_ptr, struct tipc_link *l_ptr) +{ + struct tipc_link **active = &n_ptr->active_links[0]; + + n_ptr->working_links++; + n_ptr->action_flags |= TIPC_NOTIFY_LINK_UP; + n_ptr->link_id = l_ptr->peer_bearer_id << 16 | l_ptr->bearer_id; + + pr_debug("Established link <%s> on network plane %c\n", + l_ptr->name, l_ptr->net_plane); + + if (!active[0]) { + active[0] = active[1] = l_ptr; + node_established_contact(n_ptr); + goto exit; + } + if (l_ptr->priority < active[0]->priority) { + pr_debug("New link <%s> becomes standby\n", l_ptr->name); + goto exit; + } + tipc_link_dup_queue_xmit(active[0], l_ptr); + if (l_ptr->priority == active[0]->priority) { + active[0] = l_ptr; + goto exit; + } + pr_debug("Old link <%s> becomes standby\n", active[0]->name); + if (active[1] != active[0]) + pr_debug("Old link <%s> becomes standby\n", active[1]->name); + active[0] = active[1] = l_ptr; +exit: + /* Leave room for changeover header when returning 'mtu' to users: */ + n_ptr->act_mtus[0] = active[0]->mtu - INT_H_SIZE; + n_ptr->act_mtus[1] = active[1]->mtu - INT_H_SIZE; +} + +/** + * node_select_active_links - select active link + */ +static void node_select_active_links(struct tipc_node *n_ptr) +{ + struct tipc_link **active = &n_ptr->active_links[0]; + u32 i; + u32 highest_prio = 0; + + active[0] = active[1] = NULL; + + for (i = 0; i < MAX_BEARERS; i++) { + struct tipc_link *l_ptr = n_ptr->links[i]; + + if (!l_ptr || !tipc_link_is_up(l_ptr) || + (l_ptr->priority < highest_prio)) + continue; + + if (l_ptr->priority > highest_prio) { + highest_prio = l_ptr->priority; + active[0] = active[1] = l_ptr; + } else { + active[1] = l_ptr; + } + } +} + +/** + * tipc_node_link_down - handle loss of link + */ +void tipc_node_link_down(struct tipc_node *n_ptr, struct tipc_link *l_ptr) +{ + struct tipc_net *tn = net_generic(n_ptr->net, tipc_net_id); + struct tipc_link **active; + + n_ptr->working_links--; + n_ptr->action_flags |= TIPC_NOTIFY_LINK_DOWN; + n_ptr->link_id = l_ptr->peer_bearer_id << 16 | l_ptr->bearer_id; + + if (!tipc_link_is_active(l_ptr)) { + pr_debug("Lost standby link <%s> on network plane %c\n", + l_ptr->name, l_ptr->net_plane); + return; + } + pr_debug("Lost link <%s> on network plane %c\n", + l_ptr->name, l_ptr->net_plane); + + active = &n_ptr->active_links[0]; + if (active[0] == l_ptr) + active[0] = active[1]; + if (active[1] == l_ptr) + active[1] = active[0]; + if (active[0] == l_ptr) + node_select_active_links(n_ptr); + if (tipc_node_is_up(n_ptr)) + tipc_link_failover_send_queue(l_ptr); + else + node_lost_contact(n_ptr); + + /* Leave room for changeover header when returning 'mtu' to users: */ + if (active[0]) { + n_ptr->act_mtus[0] = active[0]->mtu - INT_H_SIZE; + n_ptr->act_mtus[1] = active[1]->mtu - INT_H_SIZE; + return; + } + /* Loopback link went down? No fragmentation needed from now on. */ + if (n_ptr->addr == tn->own_addr) { + n_ptr->act_mtus[0] = MAX_MSG_SIZE; + n_ptr->act_mtus[1] = MAX_MSG_SIZE; + } +} + +int tipc_node_active_links(struct tipc_node *n_ptr) +{ + return n_ptr->active_links[0] != NULL; +} + +int tipc_node_is_up(struct tipc_node *n_ptr) +{ + return tipc_node_active_links(n_ptr); +} + +void tipc_node_attach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr) +{ + n_ptr->links[l_ptr->bearer_id] = l_ptr; + n_ptr->link_cnt++; +} + +void tipc_node_detach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr) +{ + int i; + + for (i = 0; i < MAX_BEARERS; i++) { + if (l_ptr != n_ptr->links[i]) + continue; + n_ptr->links[i] = NULL; + n_ptr->link_cnt--; + } +} + +static void node_established_contact(struct tipc_node *n_ptr) +{ + n_ptr->action_flags |= TIPC_NOTIFY_NODE_UP; + n_ptr->bclink.oos_state = 0; + n_ptr->bclink.acked = tipc_bclink_get_last_sent(n_ptr->net); + tipc_bclink_add_node(n_ptr->net, n_ptr->addr); +} + +static void node_lost_contact(struct tipc_node *n_ptr) +{ + char addr_string[16]; + struct tipc_sock_conn *conn, *safe; + struct list_head *conns = &n_ptr->conn_sks; + struct sk_buff *skb; + struct tipc_net *tn = net_generic(n_ptr->net, tipc_net_id); + uint i; + + pr_debug("Lost contact with %s\n", + tipc_addr_string_fill(addr_string, n_ptr->addr)); + + /* Flush broadcast link info associated with lost node */ + if (n_ptr->bclink.recv_permitted) { + __skb_queue_purge(&n_ptr->bclink.deferdq); + + if (n_ptr->bclink.reasm_buf) { + kfree_skb(n_ptr->bclink.reasm_buf); + n_ptr->bclink.reasm_buf = NULL; + } + + tipc_bclink_remove_node(n_ptr->net, n_ptr->addr); + tipc_bclink_acknowledge(n_ptr, INVALID_LINK_SEQ); + + n_ptr->bclink.recv_permitted = false; + } + + /* Abort any ongoing link failover */ + for (i = 0; i < MAX_BEARERS; i++) { + struct tipc_link *l_ptr = n_ptr->links[i]; + if (!l_ptr) + continue; + l_ptr->flags &= ~LINK_FAILINGOVER; + l_ptr->failover_checkpt = 0; + l_ptr->failover_pkts = 0; + kfree_skb(l_ptr->failover_skb); + l_ptr->failover_skb = NULL; + tipc_link_reset_fragments(l_ptr); + } + + n_ptr->action_flags &= ~TIPC_WAIT_OWN_LINKS_DOWN; + + /* Prevent re-contact with node until cleanup is done */ + n_ptr->action_flags |= TIPC_WAIT_PEER_LINKS_DOWN; + + /* Notify publications from this node */ + n_ptr->action_flags |= TIPC_NOTIFY_NODE_DOWN; + + /* Notify sockets connected to node */ + list_for_each_entry_safe(conn, safe, conns, list) { + skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG, + SHORT_H_SIZE, 0, tn->own_addr, + conn->peer_node, conn->port, + conn->peer_port, TIPC_ERR_NO_NODE); + if (likely(skb)) { + skb_queue_tail(n_ptr->inputq, skb); + n_ptr->action_flags |= TIPC_MSG_EVT; + } + list_del(&conn->list); + kfree(conn); + } +} + +/** + * tipc_node_get_linkname - get the name of a link + * + * @bearer_id: id of the bearer + * @node: peer node address + * @linkname: link name output buffer + * + * Returns 0 on success + */ +int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 addr, + char *linkname, size_t len) +{ + struct tipc_link *link; + int err = -EINVAL; + struct tipc_node *node = tipc_node_find(net, addr); + + if (!node) + return err; + + if (bearer_id >= MAX_BEARERS) + goto exit; + + tipc_node_lock(node); + link = node->links[bearer_id]; + if (link) { + strncpy(linkname, link->name, len); + err = 0; + } +exit: + tipc_node_unlock(node); + tipc_node_put(node); + return err; +} + +void tipc_node_unlock(struct tipc_node *node) +{ + struct net *net = node->net; + u32 addr = 0; + u32 flags = node->action_flags; + u32 link_id = 0; + struct list_head *publ_list; + struct sk_buff_head *inputq = node->inputq; + struct sk_buff_head *namedq; + + if (likely(!flags || (flags == TIPC_MSG_EVT))) { + node->action_flags = 0; + spin_unlock_bh(&node->lock); + if (flags == TIPC_MSG_EVT) + tipc_sk_rcv(net, inputq); + return; + } + + addr = node->addr; + link_id = node->link_id; + namedq = node->namedq; + publ_list = &node->publ_list; + + node->action_flags &= ~(TIPC_MSG_EVT | + TIPC_NOTIFY_NODE_DOWN | TIPC_NOTIFY_NODE_UP | + TIPC_NOTIFY_LINK_DOWN | TIPC_NOTIFY_LINK_UP | + TIPC_WAKEUP_BCAST_USERS | TIPC_BCAST_MSG_EVT | + TIPC_NAMED_MSG_EVT | TIPC_BCAST_RESET); + + spin_unlock_bh(&node->lock); + + if (flags & TIPC_NOTIFY_NODE_DOWN) + tipc_publ_notify(net, publ_list, addr); + + if (flags & TIPC_WAKEUP_BCAST_USERS) + tipc_bclink_wakeup_users(net); + + if (flags & TIPC_NOTIFY_NODE_UP) + tipc_named_node_up(net, addr); + + if (flags & TIPC_NOTIFY_LINK_UP) + tipc_nametbl_publish(net, TIPC_LINK_STATE, addr, addr, + TIPC_NODE_SCOPE, link_id, addr); + + if (flags & TIPC_NOTIFY_LINK_DOWN) + tipc_nametbl_withdraw(net, TIPC_LINK_STATE, addr, + link_id, addr); + + if (flags & TIPC_MSG_EVT) + tipc_sk_rcv(net, inputq); + + if (flags & TIPC_NAMED_MSG_EVT) + tipc_named_rcv(net, namedq); + + if (flags & TIPC_BCAST_MSG_EVT) + tipc_bclink_input(net); + + if (flags & TIPC_BCAST_RESET) + tipc_link_reset_all(node); +} + +/* Caller should hold node lock for the passed node */ +static int __tipc_nl_add_node(struct tipc_nl_msg *msg, struct tipc_node *node) +{ + void *hdr; + struct nlattr *attrs; + + hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, + NLM_F_MULTI, TIPC_NL_NODE_GET); + if (!hdr) + return -EMSGSIZE; + + attrs = nla_nest_start(msg->skb, TIPC_NLA_NODE); + if (!attrs) + goto msg_full; + + if (nla_put_u32(msg->skb, TIPC_NLA_NODE_ADDR, node->addr)) + goto attr_msg_full; + if (tipc_node_is_up(node)) + if (nla_put_flag(msg->skb, TIPC_NLA_NODE_UP)) + goto attr_msg_full; + + nla_nest_end(msg->skb, attrs); + genlmsg_end(msg->skb, hdr); + + return 0; + +attr_msg_full: + nla_nest_cancel(msg->skb, attrs); +msg_full: + genlmsg_cancel(msg->skb, hdr); + + return -EMSGSIZE; +} + +int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + int err; + struct net *net = sock_net(skb->sk); + struct tipc_net *tn = net_generic(net, tipc_net_id); + int done = cb->args[0]; + int last_addr = cb->args[1]; + struct tipc_node *node; + struct tipc_nl_msg msg; + + if (done) + return 0; + + msg.skb = skb; + msg.portid = NETLINK_CB(cb->skb).portid; + msg.seq = cb->nlh->nlmsg_seq; + + rcu_read_lock(); + if (last_addr) { + node = tipc_node_find(net, last_addr); + if (!node) { + rcu_read_unlock(); + /* We never set seq or call nl_dump_check_consistent() + * this means that setting prev_seq here will cause the + * consistence check to fail in the netlink callback + * handler. Resulting in the NLMSG_DONE message having + * the NLM_F_DUMP_INTR flag set if the node state + * changed while we released the lock. + */ + cb->prev_seq = 1; + return -EPIPE; + } + tipc_node_put(node); + } + + list_for_each_entry_rcu(node, &tn->node_list, list) { + if (last_addr) { + if (node->addr == last_addr) + last_addr = 0; + else + continue; + } + + tipc_node_lock(node); + err = __tipc_nl_add_node(&msg, node); + if (err) { + last_addr = node->addr; + tipc_node_unlock(node); + goto out; + } + + tipc_node_unlock(node); + } + done = 1; +out: + cb->args[0] = done; + cb->args[1] = last_addr; + rcu_read_unlock(); + + return skb->len; +} diff --git a/kernel/net/tipc/node.h b/kernel/net/tipc/node.h new file mode 100644 index 000000000..02d5c20dc --- /dev/null +++ b/kernel/net/tipc/node.h @@ -0,0 +1,187 @@ +/* + * net/tipc/node.h: Include file for TIPC node management routines + * + * Copyright (c) 2000-2006, 2014-2015, Ericsson AB + * Copyright (c) 2005, 2010-2014, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _TIPC_NODE_H +#define _TIPC_NODE_H + +#include "addr.h" +#include "net.h" +#include "bearer.h" +#include "msg.h" + +/* Out-of-range value for node signature */ +#define INVALID_NODE_SIG 0x10000 + +#define NODE_HTABLE_SIZE 512 + +/* Flags used to take different actions according to flag type + * TIPC_WAIT_PEER_LINKS_DOWN: wait to see that peer's links are down + * TIPC_WAIT_OWN_LINKS_DOWN: wait until peer node is declared down + * TIPC_NOTIFY_NODE_DOWN: notify node is down + * TIPC_NOTIFY_NODE_UP: notify node is up + * TIPC_DISTRIBUTE_NAME: publish or withdraw link state name type + */ +enum { + TIPC_MSG_EVT = 1, + TIPC_WAIT_PEER_LINKS_DOWN = (1 << 1), + TIPC_WAIT_OWN_LINKS_DOWN = (1 << 2), + TIPC_NOTIFY_NODE_DOWN = (1 << 3), + TIPC_NOTIFY_NODE_UP = (1 << 4), + TIPC_WAKEUP_BCAST_USERS = (1 << 5), + TIPC_NOTIFY_LINK_UP = (1 << 6), + TIPC_NOTIFY_LINK_DOWN = (1 << 7), + TIPC_NAMED_MSG_EVT = (1 << 8), + TIPC_BCAST_MSG_EVT = (1 << 9), + TIPC_BCAST_RESET = (1 << 10) +}; + +/** + * struct tipc_node_bclink - TIPC node bclink structure + * @acked: sequence # of last outbound b'cast message acknowledged by node + * @last_in: sequence # of last in-sequence b'cast message received from node + * @last_sent: sequence # of last b'cast message sent by node + * @oos_state: state tracker for handling OOS b'cast messages + * @deferred_queue: deferred queue saved OOS b'cast message received from node + * @reasm_buf: broadcast reassembly queue head from node + * @inputq_map: bitmap indicating which inqueues should be kicked + * @recv_permitted: true if node is allowed to receive b'cast messages + */ +struct tipc_node_bclink { + u32 acked; + u32 last_in; + u32 last_sent; + u32 oos_state; + u32 deferred_size; + struct sk_buff_head deferdq; + struct sk_buff *reasm_buf; + int inputq_map; + bool recv_permitted; +}; + +/** + * struct tipc_node - TIPC node structure + * @addr: network address of node + * @ref: reference counter to node object + * @lock: spinlock governing access to structure + * @net: the applicable net namespace + * @hash: links to adjacent nodes in unsorted hash chain + * @inputq: pointer to input queue containing messages for msg event + * @namedq: pointer to name table input queue with name table messages + * @curr_link: the link holding the node lock, if any + * @active_links: pointers to active links to node + * @links: pointers to all links to node + * @action_flags: bit mask of different types of node actions + * @bclink: broadcast-related info + * @list: links to adjacent nodes in sorted list of cluster's nodes + * @working_links: number of working links to node (both active and standby) + * @link_cnt: number of links to node + * @capabilities: bitmap, indicating peer node's functional capabilities + * @signature: node instance identifier + * @link_id: local and remote bearer ids of changing link, if any + * @publ_list: list of publications + * @rcu: rcu struct for tipc_node + */ +struct tipc_node { + u32 addr; + struct kref kref; + spinlock_t lock; + struct net *net; + struct hlist_node hash; + struct sk_buff_head *inputq; + struct sk_buff_head *namedq; + struct tipc_link *active_links[2]; + u32 act_mtus[2]; + struct tipc_link *links[MAX_BEARERS]; + int action_flags; + struct tipc_node_bclink bclink; + struct list_head list; + int link_cnt; + u16 working_links; + u16 capabilities; + u32 signature; + u32 link_id; + struct list_head publ_list; + struct list_head conn_sks; + struct rcu_head rcu; +}; + +struct tipc_node *tipc_node_find(struct net *net, u32 addr); +void tipc_node_put(struct tipc_node *node); +struct tipc_node *tipc_node_create(struct net *net, u32 addr); +void tipc_node_stop(struct net *net); +void tipc_node_attach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr); +void tipc_node_detach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr); +void tipc_node_link_down(struct tipc_node *n_ptr, struct tipc_link *l_ptr); +void tipc_node_link_up(struct tipc_node *n_ptr, struct tipc_link *l_ptr); +int tipc_node_active_links(struct tipc_node *n_ptr); +int tipc_node_is_up(struct tipc_node *n_ptr); +int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 node, + char *linkname, size_t len); +void tipc_node_unlock(struct tipc_node *node); +int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port); +void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port); + +int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb); + +static inline void tipc_node_lock(struct tipc_node *node) +{ + spin_lock_bh(&node->lock); +} + +static inline bool tipc_node_blocked(struct tipc_node *node) +{ + return (node->action_flags & (TIPC_WAIT_PEER_LINKS_DOWN | + TIPC_NOTIFY_NODE_DOWN | TIPC_WAIT_OWN_LINKS_DOWN)); +} + +static inline uint tipc_node_get_mtu(struct net *net, u32 addr, u32 selector) +{ + struct tipc_node *node; + u32 mtu; + + node = tipc_node_find(net, addr); + + if (likely(node)) { + mtu = node->act_mtus[selector & 1]; + tipc_node_put(node); + } else { + mtu = MAX_MSG_SIZE; + } + + return mtu; +} + +#endif diff --git a/kernel/net/tipc/server.c b/kernel/net/tipc/server.c new file mode 100644 index 000000000..77ff03ed1 --- /dev/null +++ b/kernel/net/tipc/server.c @@ -0,0 +1,634 @@ +/* + * net/tipc/server.c: TIPC server infrastructure + * + * Copyright (c) 2012-2013, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "server.h" +#include "core.h" +#include "socket.h" +#include <net/sock.h> +#include <linux/module.h> + +/* Number of messages to send before rescheduling */ +#define MAX_SEND_MSG_COUNT 25 +#define MAX_RECV_MSG_COUNT 25 +#define CF_CONNECTED 1 +#define CF_SERVER 2 + +#define sock2con(x) ((struct tipc_conn *)(x)->sk_user_data) + +/** + * struct tipc_conn - TIPC connection structure + * @kref: reference counter to connection object + * @conid: connection identifier + * @sock: socket handler associated with connection + * @flags: indicates connection state + * @server: pointer to connected server + * @rwork: receive work item + * @usr_data: user-specified field + * @rx_action: what to do when connection socket is active + * @outqueue: pointer to first outbound message in queue + * @outqueue_lock: control access to the outqueue + * @outqueue: list of connection objects for its server + * @swork: send work item + */ +struct tipc_conn { + struct kref kref; + int conid; + struct socket *sock; + unsigned long flags; + struct tipc_server *server; + struct work_struct rwork; + int (*rx_action) (struct tipc_conn *con); + void *usr_data; + struct list_head outqueue; + spinlock_t outqueue_lock; + struct work_struct swork; +}; + +/* An entry waiting to be sent */ +struct outqueue_entry { + struct list_head list; + struct kvec iov; + struct sockaddr_tipc dest; +}; + +static void tipc_recv_work(struct work_struct *work); +static void tipc_send_work(struct work_struct *work); +static void tipc_clean_outqueues(struct tipc_conn *con); + +static void tipc_conn_kref_release(struct kref *kref) +{ + struct tipc_conn *con = container_of(kref, struct tipc_conn, kref); + struct sockaddr_tipc *saddr = con->server->saddr; + struct socket *sock = con->sock; + struct sock *sk; + + if (sock) { + sk = sock->sk; + if (test_bit(CF_SERVER, &con->flags)) { + __module_get(sock->ops->owner); + __module_get(sk->sk_prot_creator->owner); + } + saddr->scope = -TIPC_NODE_SCOPE; + kernel_bind(sock, (struct sockaddr *)saddr, sizeof(*saddr)); + sock_release(sock); + con->sock = NULL; + } + + tipc_clean_outqueues(con); + kfree(con); +} + +static void conn_put(struct tipc_conn *con) +{ + kref_put(&con->kref, tipc_conn_kref_release); +} + +static void conn_get(struct tipc_conn *con) +{ + kref_get(&con->kref); +} + +static struct tipc_conn *tipc_conn_lookup(struct tipc_server *s, int conid) +{ + struct tipc_conn *con; + + spin_lock_bh(&s->idr_lock); + con = idr_find(&s->conn_idr, conid); + if (con) + conn_get(con); + spin_unlock_bh(&s->idr_lock); + return con; +} + +static void sock_data_ready(struct sock *sk) +{ + struct tipc_conn *con; + + read_lock(&sk->sk_callback_lock); + con = sock2con(sk); + if (con && test_bit(CF_CONNECTED, &con->flags)) { + conn_get(con); + if (!queue_work(con->server->rcv_wq, &con->rwork)) + conn_put(con); + } + read_unlock(&sk->sk_callback_lock); +} + +static void sock_write_space(struct sock *sk) +{ + struct tipc_conn *con; + + read_lock(&sk->sk_callback_lock); + con = sock2con(sk); + if (con && test_bit(CF_CONNECTED, &con->flags)) { + conn_get(con); + if (!queue_work(con->server->send_wq, &con->swork)) + conn_put(con); + } + read_unlock(&sk->sk_callback_lock); +} + +static void tipc_register_callbacks(struct socket *sock, struct tipc_conn *con) +{ + struct sock *sk = sock->sk; + + write_lock_bh(&sk->sk_callback_lock); + + sk->sk_data_ready = sock_data_ready; + sk->sk_write_space = sock_write_space; + sk->sk_user_data = con; + + con->sock = sock; + + write_unlock_bh(&sk->sk_callback_lock); +} + +static void tipc_unregister_callbacks(struct tipc_conn *con) +{ + struct sock *sk = con->sock->sk; + + write_lock_bh(&sk->sk_callback_lock); + sk->sk_user_data = NULL; + write_unlock_bh(&sk->sk_callback_lock); +} + +static void tipc_close_conn(struct tipc_conn *con) +{ + struct tipc_server *s = con->server; + + if (test_and_clear_bit(CF_CONNECTED, &con->flags)) { + if (con->conid) + s->tipc_conn_shutdown(con->conid, con->usr_data); + + spin_lock_bh(&s->idr_lock); + idr_remove(&s->conn_idr, con->conid); + s->idr_in_use--; + spin_unlock_bh(&s->idr_lock); + + tipc_unregister_callbacks(con); + + /* We shouldn't flush pending works as we may be in the + * thread. In fact the races with pending rx/tx work structs + * are harmless for us here as we have already deleted this + * connection from server connection list and set + * sk->sk_user_data to 0 before releasing connection object. + */ + kernel_sock_shutdown(con->sock, SHUT_RDWR); + + conn_put(con); + } +} + +static struct tipc_conn *tipc_alloc_conn(struct tipc_server *s) +{ + struct tipc_conn *con; + int ret; + + con = kzalloc(sizeof(struct tipc_conn), GFP_ATOMIC); + if (!con) + return ERR_PTR(-ENOMEM); + + kref_init(&con->kref); + INIT_LIST_HEAD(&con->outqueue); + spin_lock_init(&con->outqueue_lock); + INIT_WORK(&con->swork, tipc_send_work); + INIT_WORK(&con->rwork, tipc_recv_work); + + spin_lock_bh(&s->idr_lock); + ret = idr_alloc(&s->conn_idr, con, 0, 0, GFP_ATOMIC); + if (ret < 0) { + kfree(con); + spin_unlock_bh(&s->idr_lock); + return ERR_PTR(-ENOMEM); + } + con->conid = ret; + s->idr_in_use++; + spin_unlock_bh(&s->idr_lock); + + set_bit(CF_CONNECTED, &con->flags); + con->server = s; + + return con; +} + +static int tipc_receive_from_sock(struct tipc_conn *con) +{ + struct msghdr msg = {}; + struct tipc_server *s = con->server; + struct sockaddr_tipc addr; + struct kvec iov; + void *buf; + int ret; + + buf = kmem_cache_alloc(s->rcvbuf_cache, GFP_ATOMIC); + if (!buf) { + ret = -ENOMEM; + goto out_close; + } + + iov.iov_base = buf; + iov.iov_len = s->max_rcvbuf_size; + msg.msg_name = &addr; + ret = kernel_recvmsg(con->sock, &msg, &iov, 1, iov.iov_len, + MSG_DONTWAIT); + if (ret <= 0) { + kmem_cache_free(s->rcvbuf_cache, buf); + goto out_close; + } + + s->tipc_conn_recvmsg(sock_net(con->sock->sk), con->conid, &addr, + con->usr_data, buf, ret); + + kmem_cache_free(s->rcvbuf_cache, buf); + + return 0; + +out_close: + if (ret != -EWOULDBLOCK) + tipc_close_conn(con); + else if (ret == 0) + /* Don't return success if we really got EOF */ + ret = -EAGAIN; + + return ret; +} + +static int tipc_accept_from_sock(struct tipc_conn *con) +{ + struct tipc_server *s = con->server; + struct socket *sock = con->sock; + struct socket *newsock; + struct tipc_conn *newcon; + int ret; + + ret = kernel_accept(sock, &newsock, O_NONBLOCK); + if (ret < 0) + return ret; + + newcon = tipc_alloc_conn(con->server); + if (IS_ERR(newcon)) { + ret = PTR_ERR(newcon); + sock_release(newsock); + return ret; + } + + newcon->rx_action = tipc_receive_from_sock; + tipc_register_callbacks(newsock, newcon); + + /* Notify that new connection is incoming */ + newcon->usr_data = s->tipc_conn_new(newcon->conid); + + /* Wake up receive process in case of 'SYN+' message */ + newsock->sk->sk_data_ready(newsock->sk); + return ret; +} + +static struct socket *tipc_create_listen_sock(struct tipc_conn *con) +{ + struct tipc_server *s = con->server; + struct socket *sock = NULL; + int ret; + + ret = __sock_create(s->net, AF_TIPC, SOCK_SEQPACKET, 0, &sock, 1); + if (ret < 0) + return NULL; + ret = kernel_setsockopt(sock, SOL_TIPC, TIPC_IMPORTANCE, + (char *)&s->imp, sizeof(s->imp)); + if (ret < 0) + goto create_err; + ret = kernel_bind(sock, (struct sockaddr *)s->saddr, sizeof(*s->saddr)); + if (ret < 0) + goto create_err; + + switch (s->type) { + case SOCK_STREAM: + case SOCK_SEQPACKET: + con->rx_action = tipc_accept_from_sock; + + ret = kernel_listen(sock, 0); + if (ret < 0) + goto create_err; + break; + case SOCK_DGRAM: + case SOCK_RDM: + con->rx_action = tipc_receive_from_sock; + break; + default: + pr_err("Unknown socket type %d\n", s->type); + goto create_err; + } + + /* As server's listening socket owner and creator is the same module, + * we have to decrease TIPC module reference count to guarantee that + * it remains zero after the server socket is created, otherwise, + * executing "rmmod" command is unable to make TIPC module deleted + * after TIPC module is inserted successfully. + * + * However, the reference count is ever increased twice in + * sock_create_kern(): one is to increase the reference count of owner + * of TIPC socket's proto_ops struct; another is to increment the + * reference count of owner of TIPC proto struct. Therefore, we must + * decrement the module reference count twice to ensure that it keeps + * zero after server's listening socket is created. Of course, we + * must bump the module reference count twice as well before the socket + * is closed. + */ + module_put(sock->ops->owner); + module_put(sock->sk->sk_prot_creator->owner); + set_bit(CF_SERVER, &con->flags); + + return sock; + +create_err: + kernel_sock_shutdown(sock, SHUT_RDWR); + sock_release(sock); + return NULL; +} + +static int tipc_open_listening_sock(struct tipc_server *s) +{ + struct socket *sock; + struct tipc_conn *con; + + con = tipc_alloc_conn(s); + if (IS_ERR(con)) + return PTR_ERR(con); + + sock = tipc_create_listen_sock(con); + if (!sock) { + idr_remove(&s->conn_idr, con->conid); + s->idr_in_use--; + kfree(con); + return -EINVAL; + } + + tipc_register_callbacks(sock, con); + return 0; +} + +static struct outqueue_entry *tipc_alloc_entry(void *data, int len) +{ + struct outqueue_entry *entry; + void *buf; + + entry = kmalloc(sizeof(struct outqueue_entry), GFP_ATOMIC); + if (!entry) + return NULL; + + buf = kmalloc(len, GFP_ATOMIC); + if (!buf) { + kfree(entry); + return NULL; + } + + memcpy(buf, data, len); + entry->iov.iov_base = buf; + entry->iov.iov_len = len; + + return entry; +} + +static void tipc_free_entry(struct outqueue_entry *e) +{ + kfree(e->iov.iov_base); + kfree(e); +} + +static void tipc_clean_outqueues(struct tipc_conn *con) +{ + struct outqueue_entry *e, *safe; + + spin_lock_bh(&con->outqueue_lock); + list_for_each_entry_safe(e, safe, &con->outqueue, list) { + list_del(&e->list); + tipc_free_entry(e); + } + spin_unlock_bh(&con->outqueue_lock); +} + +int tipc_conn_sendmsg(struct tipc_server *s, int conid, + struct sockaddr_tipc *addr, void *data, size_t len) +{ + struct outqueue_entry *e; + struct tipc_conn *con; + + con = tipc_conn_lookup(s, conid); + if (!con) + return -EINVAL; + + e = tipc_alloc_entry(data, len); + if (!e) { + conn_put(con); + return -ENOMEM; + } + + if (addr) + memcpy(&e->dest, addr, sizeof(struct sockaddr_tipc)); + + spin_lock_bh(&con->outqueue_lock); + list_add_tail(&e->list, &con->outqueue); + spin_unlock_bh(&con->outqueue_lock); + + if (test_bit(CF_CONNECTED, &con->flags)) { + if (!queue_work(s->send_wq, &con->swork)) + conn_put(con); + } else { + conn_put(con); + } + return 0; +} + +void tipc_conn_terminate(struct tipc_server *s, int conid) +{ + struct tipc_conn *con; + + con = tipc_conn_lookup(s, conid); + if (con) { + tipc_close_conn(con); + conn_put(con); + } +} + +static void tipc_send_to_sock(struct tipc_conn *con) +{ + int count = 0; + struct tipc_server *s = con->server; + struct outqueue_entry *e; + struct msghdr msg; + int ret; + + spin_lock_bh(&con->outqueue_lock); + while (1) { + e = list_entry(con->outqueue.next, struct outqueue_entry, + list); + if ((struct list_head *) e == &con->outqueue) + break; + spin_unlock_bh(&con->outqueue_lock); + + memset(&msg, 0, sizeof(msg)); + msg.msg_flags = MSG_DONTWAIT; + + if (s->type == SOCK_DGRAM || s->type == SOCK_RDM) { + msg.msg_name = &e->dest; + msg.msg_namelen = sizeof(struct sockaddr_tipc); + } + ret = kernel_sendmsg(con->sock, &msg, &e->iov, 1, + e->iov.iov_len); + if (ret == -EWOULDBLOCK || ret == 0) { + cond_resched(); + goto out; + } else if (ret < 0) { + goto send_err; + } + + /* Don't starve users filling buffers */ + if (++count >= MAX_SEND_MSG_COUNT) { + cond_resched(); + count = 0; + } + + spin_lock_bh(&con->outqueue_lock); + list_del(&e->list); + tipc_free_entry(e); + } + spin_unlock_bh(&con->outqueue_lock); +out: + return; + +send_err: + tipc_close_conn(con); +} + +static void tipc_recv_work(struct work_struct *work) +{ + struct tipc_conn *con = container_of(work, struct tipc_conn, rwork); + int count = 0; + + while (test_bit(CF_CONNECTED, &con->flags)) { + if (con->rx_action(con)) + break; + + /* Don't flood Rx machine */ + if (++count >= MAX_RECV_MSG_COUNT) { + cond_resched(); + count = 0; + } + } + conn_put(con); +} + +static void tipc_send_work(struct work_struct *work) +{ + struct tipc_conn *con = container_of(work, struct tipc_conn, swork); + + if (test_bit(CF_CONNECTED, &con->flags)) + tipc_send_to_sock(con); + + conn_put(con); +} + +static void tipc_work_stop(struct tipc_server *s) +{ + destroy_workqueue(s->rcv_wq); + destroy_workqueue(s->send_wq); +} + +static int tipc_work_start(struct tipc_server *s) +{ + s->rcv_wq = alloc_workqueue("tipc_rcv", WQ_UNBOUND, 1); + if (!s->rcv_wq) { + pr_err("can't start tipc receive workqueue\n"); + return -ENOMEM; + } + + s->send_wq = alloc_workqueue("tipc_send", WQ_UNBOUND, 1); + if (!s->send_wq) { + pr_err("can't start tipc send workqueue\n"); + destroy_workqueue(s->rcv_wq); + return -ENOMEM; + } + + return 0; +} + +int tipc_server_start(struct tipc_server *s) +{ + int ret; + + spin_lock_init(&s->idr_lock); + idr_init(&s->conn_idr); + s->idr_in_use = 0; + + s->rcvbuf_cache = kmem_cache_create(s->name, s->max_rcvbuf_size, + 0, SLAB_HWCACHE_ALIGN, NULL); + if (!s->rcvbuf_cache) + return -ENOMEM; + + ret = tipc_work_start(s); + if (ret < 0) { + kmem_cache_destroy(s->rcvbuf_cache); + return ret; + } + ret = tipc_open_listening_sock(s); + if (ret < 0) { + tipc_work_stop(s); + kmem_cache_destroy(s->rcvbuf_cache); + return ret; + } + return ret; +} + +void tipc_server_stop(struct tipc_server *s) +{ + struct tipc_conn *con; + int total = 0; + int id; + + spin_lock_bh(&s->idr_lock); + for (id = 0; total < s->idr_in_use; id++) { + con = idr_find(&s->conn_idr, id); + if (con) { + total++; + spin_unlock_bh(&s->idr_lock); + tipc_close_conn(con); + spin_lock_bh(&s->idr_lock); + } + } + spin_unlock_bh(&s->idr_lock); + + tipc_work_stop(s); + kmem_cache_destroy(s->rcvbuf_cache); + idr_destroy(&s->conn_idr); +} diff --git a/kernel/net/tipc/server.h b/kernel/net/tipc/server.h new file mode 100644 index 000000000..9015faedb --- /dev/null +++ b/kernel/net/tipc/server.h @@ -0,0 +1,97 @@ +/* + * net/tipc/server.h: Include file for TIPC server code + * + * Copyright (c) 2012-2013, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _TIPC_SERVER_H +#define _TIPC_SERVER_H + +#include <linux/idr.h> +#include <linux/tipc.h> +#include <net/net_namespace.h> + +#define TIPC_SERVER_NAME_LEN 32 + +/** + * struct tipc_server - TIPC server structure + * @conn_idr: identifier set of connection + * @idr_lock: protect the connection identifier set + * @idr_in_use: amount of allocated identifier entry + * @net: network namspace instance + * @rcvbuf_cache: memory cache of server receive buffer + * @rcv_wq: receive workqueue + * @send_wq: send workqueue + * @max_rcvbuf_size: maximum permitted receive message length + * @tipc_conn_new: callback will be called when new connection is incoming + * @tipc_conn_shutdown: callback will be called when connection is shut down + * @tipc_conn_recvmsg: callback will be called when message arrives + * @saddr: TIPC server address + * @name: server name + * @imp: message importance + * @type: socket type + */ +struct tipc_server { + struct idr conn_idr; + spinlock_t idr_lock; + int idr_in_use; + struct net *net; + struct kmem_cache *rcvbuf_cache; + struct workqueue_struct *rcv_wq; + struct workqueue_struct *send_wq; + int max_rcvbuf_size; + void *(*tipc_conn_new)(int conid); + void (*tipc_conn_shutdown)(int conid, void *usr_data); + void (*tipc_conn_recvmsg)(struct net *net, int conid, + struct sockaddr_tipc *addr, void *usr_data, + void *buf, size_t len); + struct sockaddr_tipc *saddr; + char name[TIPC_SERVER_NAME_LEN]; + int imp; + int type; +}; + +int tipc_conn_sendmsg(struct tipc_server *s, int conid, + struct sockaddr_tipc *addr, void *data, size_t len); + +/** + * tipc_conn_terminate - terminate connection with server + * + * Note: Must call it in process context since it might sleep + */ +void tipc_conn_terminate(struct tipc_server *s, int conid); + +int tipc_server_start(struct tipc_server *s); + +void tipc_server_stop(struct tipc_server *s); + +#endif diff --git a/kernel/net/tipc/socket.c b/kernel/net/tipc/socket.c new file mode 100644 index 000000000..f485600c4 --- /dev/null +++ b/kernel/net/tipc/socket.c @@ -0,0 +1,2837 @@ +/* + * net/tipc/socket.c: TIPC socket API + * + * Copyright (c) 2001-2007, 2012-2015, Ericsson AB + * Copyright (c) 2004-2008, 2010-2013, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/rhashtable.h> +#include "core.h" +#include "name_table.h" +#include "node.h" +#include "link.h" +#include "name_distr.h" +#include "socket.h" + +#define SS_LISTENING -1 /* socket is listening */ +#define SS_READY -2 /* socket is connectionless */ + +#define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */ +#define CONN_PROBING_INTERVAL msecs_to_jiffies(3600000) /* [ms] => 1 h */ +#define TIPC_FWD_MSG 1 +#define TIPC_CONN_OK 0 +#define TIPC_CONN_PROBING 1 +#define TIPC_MAX_PORT 0xffffffff +#define TIPC_MIN_PORT 1 + +/** + * struct tipc_sock - TIPC socket structure + * @sk: socket - interacts with 'port' and with user via the socket API + * @connected: non-zero if port is currently connected to a peer port + * @conn_type: TIPC type used when connection was established + * @conn_instance: TIPC instance used when connection was established + * @published: non-zero if port has one or more associated names + * @max_pkt: maximum packet size "hint" used when building messages sent by port + * @portid: unique port identity in TIPC socket hash table + * @phdr: preformatted message header used when sending messages + * @port_list: adjacent ports in TIPC's global list of ports + * @publications: list of publications for port + * @pub_count: total # of publications port has made during its lifetime + * @probing_state: + * @probing_intv: + * @conn_timeout: the time we can wait for an unresponded setup request + * @dupl_rcvcnt: number of bytes counted twice, in both backlog and rcv queue + * @link_cong: non-zero if owner must sleep because of link congestion + * @sent_unacked: # messages sent by socket, and not yet acked by peer + * @rcv_unacked: # messages read by user, but not yet acked back to peer + * @remote: 'connected' peer for dgram/rdm + * @node: hash table node + * @rcu: rcu struct for tipc_sock + */ +struct tipc_sock { + struct sock sk; + int connected; + u32 conn_type; + u32 conn_instance; + int published; + u32 max_pkt; + u32 portid; + struct tipc_msg phdr; + struct list_head sock_list; + struct list_head publications; + u32 pub_count; + u32 probing_state; + unsigned long probing_intv; + uint conn_timeout; + atomic_t dupl_rcvcnt; + bool link_cong; + uint sent_unacked; + uint rcv_unacked; + struct sockaddr_tipc remote; + struct rhash_head node; + struct rcu_head rcu; +}; + +static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb); +static void tipc_data_ready(struct sock *sk); +static void tipc_write_space(struct sock *sk); +static int tipc_release(struct socket *sock); +static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags); +static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p); +static void tipc_sk_timeout(unsigned long data); +static int tipc_sk_publish(struct tipc_sock *tsk, uint scope, + struct tipc_name_seq const *seq); +static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope, + struct tipc_name_seq const *seq); +static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid); +static int tipc_sk_insert(struct tipc_sock *tsk); +static void tipc_sk_remove(struct tipc_sock *tsk); +static int __tipc_send_stream(struct socket *sock, struct msghdr *m, + size_t dsz); +static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz); + +static const struct proto_ops packet_ops; +static const struct proto_ops stream_ops; +static const struct proto_ops msg_ops; +static struct proto tipc_proto; + +static const struct nla_policy tipc_nl_sock_policy[TIPC_NLA_SOCK_MAX + 1] = { + [TIPC_NLA_SOCK_UNSPEC] = { .type = NLA_UNSPEC }, + [TIPC_NLA_SOCK_ADDR] = { .type = NLA_U32 }, + [TIPC_NLA_SOCK_REF] = { .type = NLA_U32 }, + [TIPC_NLA_SOCK_CON] = { .type = NLA_NESTED }, + [TIPC_NLA_SOCK_HAS_PUBL] = { .type = NLA_FLAG } +}; + +static const struct rhashtable_params tsk_rht_params; + +/* + * Revised TIPC socket locking policy: + * + * Most socket operations take the standard socket lock when they start + * and hold it until they finish (or until they need to sleep). Acquiring + * this lock grants the owner exclusive access to the fields of the socket + * data structures, with the exception of the backlog queue. A few socket + * operations can be done without taking the socket lock because they only + * read socket information that never changes during the life of the socket. + * + * Socket operations may acquire the lock for the associated TIPC port if they + * need to perform an operation on the port. If any routine needs to acquire + * both the socket lock and the port lock it must take the socket lock first + * to avoid the risk of deadlock. + * + * The dispatcher handling incoming messages cannot grab the socket lock in + * the standard fashion, since invoked it runs at the BH level and cannot block. + * Instead, it checks to see if the socket lock is currently owned by someone, + * and either handles the message itself or adds it to the socket's backlog + * queue; in the latter case the queued message is processed once the process + * owning the socket lock releases it. + * + * NOTE: Releasing the socket lock while an operation is sleeping overcomes + * the problem of a blocked socket operation preventing any other operations + * from occurring. However, applications must be careful if they have + * multiple threads trying to send (or receive) on the same socket, as these + * operations might interfere with each other. For example, doing a connect + * and a receive at the same time might allow the receive to consume the + * ACK message meant for the connect. While additional work could be done + * to try and overcome this, it doesn't seem to be worthwhile at the present. + * + * NOTE: Releasing the socket lock while an operation is sleeping also ensures + * that another operation that must be performed in a non-blocking manner is + * not delayed for very long because the lock has already been taken. + * + * NOTE: This code assumes that certain fields of a port/socket pair are + * constant over its lifetime; such fields can be examined without taking + * the socket lock and/or port lock, and do not need to be re-read even + * after resuming processing after waiting. These fields include: + * - socket type + * - pointer to socket sk structure (aka tipc_sock structure) + * - pointer to port structure + * - port reference + */ + +static u32 tsk_own_node(struct tipc_sock *tsk) +{ + return msg_prevnode(&tsk->phdr); +} + +static u32 tsk_peer_node(struct tipc_sock *tsk) +{ + return msg_destnode(&tsk->phdr); +} + +static u32 tsk_peer_port(struct tipc_sock *tsk) +{ + return msg_destport(&tsk->phdr); +} + +static bool tsk_unreliable(struct tipc_sock *tsk) +{ + return msg_src_droppable(&tsk->phdr) != 0; +} + +static void tsk_set_unreliable(struct tipc_sock *tsk, bool unreliable) +{ + msg_set_src_droppable(&tsk->phdr, unreliable ? 1 : 0); +} + +static bool tsk_unreturnable(struct tipc_sock *tsk) +{ + return msg_dest_droppable(&tsk->phdr) != 0; +} + +static void tsk_set_unreturnable(struct tipc_sock *tsk, bool unreturnable) +{ + msg_set_dest_droppable(&tsk->phdr, unreturnable ? 1 : 0); +} + +static int tsk_importance(struct tipc_sock *tsk) +{ + return msg_importance(&tsk->phdr); +} + +static int tsk_set_importance(struct tipc_sock *tsk, int imp) +{ + if (imp > TIPC_CRITICAL_IMPORTANCE) + return -EINVAL; + msg_set_importance(&tsk->phdr, (u32)imp); + return 0; +} + +static struct tipc_sock *tipc_sk(const struct sock *sk) +{ + return container_of(sk, struct tipc_sock, sk); +} + +static int tsk_conn_cong(struct tipc_sock *tsk) +{ + return tsk->sent_unacked >= TIPC_FLOWCTRL_WIN; +} + +/** + * tsk_advance_rx_queue - discard first buffer in socket receive queue + * + * Caller must hold socket lock + */ +static void tsk_advance_rx_queue(struct sock *sk) +{ + kfree_skb(__skb_dequeue(&sk->sk_receive_queue)); +} + +/** + * tsk_rej_rx_queue - reject all buffers in socket receive queue + * + * Caller must hold socket lock + */ +static void tsk_rej_rx_queue(struct sock *sk) +{ + struct sk_buff *skb; + u32 dnode; + u32 own_node = tsk_own_node(tipc_sk(sk)); + + while ((skb = __skb_dequeue(&sk->sk_receive_queue))) { + if (tipc_msg_reverse(own_node, skb, &dnode, TIPC_ERR_NO_PORT)) + tipc_link_xmit_skb(sock_net(sk), skb, dnode, 0); + } +} + +/* tsk_peer_msg - verify if message was sent by connected port's peer + * + * Handles cases where the node's network address has changed from + * the default of <0.0.0> to its configured setting. + */ +static bool tsk_peer_msg(struct tipc_sock *tsk, struct tipc_msg *msg) +{ + struct tipc_net *tn = net_generic(sock_net(&tsk->sk), tipc_net_id); + u32 peer_port = tsk_peer_port(tsk); + u32 orig_node; + u32 peer_node; + + if (unlikely(!tsk->connected)) + return false; + + if (unlikely(msg_origport(msg) != peer_port)) + return false; + + orig_node = msg_orignode(msg); + peer_node = tsk_peer_node(tsk); + + if (likely(orig_node == peer_node)) + return true; + + if (!orig_node && (peer_node == tn->own_addr)) + return true; + + if (!peer_node && (orig_node == tn->own_addr)) + return true; + + return false; +} + +/** + * tipc_sk_create - create a TIPC socket + * @net: network namespace (must be default network) + * @sock: pre-allocated socket structure + * @protocol: protocol indicator (must be 0) + * @kern: caused by kernel or by userspace? + * + * This routine creates additional data structures used by the TIPC socket, + * initializes them, and links them together. + * + * Returns 0 on success, errno otherwise + */ +static int tipc_sk_create(struct net *net, struct socket *sock, + int protocol, int kern) +{ + struct tipc_net *tn; + const struct proto_ops *ops; + socket_state state; + struct sock *sk; + struct tipc_sock *tsk; + struct tipc_msg *msg; + + /* Validate arguments */ + if (unlikely(protocol != 0)) + return -EPROTONOSUPPORT; + + switch (sock->type) { + case SOCK_STREAM: + ops = &stream_ops; + state = SS_UNCONNECTED; + break; + case SOCK_SEQPACKET: + ops = &packet_ops; + state = SS_UNCONNECTED; + break; + case SOCK_DGRAM: + case SOCK_RDM: + ops = &msg_ops; + state = SS_READY; + break; + default: + return -EPROTOTYPE; + } + + /* Allocate socket's protocol area */ + sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto); + if (sk == NULL) + return -ENOMEM; + + tsk = tipc_sk(sk); + tsk->max_pkt = MAX_PKT_DEFAULT; + INIT_LIST_HEAD(&tsk->publications); + msg = &tsk->phdr; + tn = net_generic(sock_net(sk), tipc_net_id); + tipc_msg_init(tn->own_addr, msg, TIPC_LOW_IMPORTANCE, TIPC_NAMED_MSG, + NAMED_H_SIZE, 0); + + /* Finish initializing socket data structures */ + sock->ops = ops; + sock->state = state; + sock_init_data(sock, sk); + if (tipc_sk_insert(tsk)) { + pr_warn("Socket create failed; port numbrer exhausted\n"); + return -EINVAL; + } + msg_set_origport(msg, tsk->portid); + setup_timer(&sk->sk_timer, tipc_sk_timeout, (unsigned long)tsk); + sk->sk_backlog_rcv = tipc_backlog_rcv; + sk->sk_rcvbuf = sysctl_tipc_rmem[1]; + sk->sk_data_ready = tipc_data_ready; + sk->sk_write_space = tipc_write_space; + tsk->conn_timeout = CONN_TIMEOUT_DEFAULT; + tsk->sent_unacked = 0; + atomic_set(&tsk->dupl_rcvcnt, 0); + + if (sock->state == SS_READY) { + tsk_set_unreturnable(tsk, true); + if (sock->type == SOCK_DGRAM) + tsk_set_unreliable(tsk, true); + } + return 0; +} + +static void tipc_sk_callback(struct rcu_head *head) +{ + struct tipc_sock *tsk = container_of(head, struct tipc_sock, rcu); + + sock_put(&tsk->sk); +} + +/** + * tipc_release - destroy a TIPC socket + * @sock: socket to destroy + * + * This routine cleans up any messages that are still queued on the socket. + * For DGRAM and RDM socket types, all queued messages are rejected. + * For SEQPACKET and STREAM socket types, the first message is rejected + * and any others are discarded. (If the first message on a STREAM socket + * is partially-read, it is discarded and the next one is rejected instead.) + * + * NOTE: Rejected messages are not necessarily returned to the sender! They + * are returned or discarded according to the "destination droppable" setting + * specified for the message by the sender. + * + * Returns 0 on success, errno otherwise + */ +static int tipc_release(struct socket *sock) +{ + struct sock *sk = sock->sk; + struct net *net; + struct tipc_sock *tsk; + struct sk_buff *skb; + u32 dnode, probing_state; + + /* + * Exit if socket isn't fully initialized (occurs when a failed accept() + * releases a pre-allocated child socket that was never used) + */ + if (sk == NULL) + return 0; + + net = sock_net(sk); + tsk = tipc_sk(sk); + lock_sock(sk); + + /* + * Reject all unreceived messages, except on an active connection + * (which disconnects locally & sends a 'FIN+' to peer) + */ + dnode = tsk_peer_node(tsk); + while (sock->state != SS_DISCONNECTING) { + skb = __skb_dequeue(&sk->sk_receive_queue); + if (skb == NULL) + break; + if (TIPC_SKB_CB(skb)->handle != NULL) + kfree_skb(skb); + else { + if ((sock->state == SS_CONNECTING) || + (sock->state == SS_CONNECTED)) { + sock->state = SS_DISCONNECTING; + tsk->connected = 0; + tipc_node_remove_conn(net, dnode, tsk->portid); + } + if (tipc_msg_reverse(tsk_own_node(tsk), skb, &dnode, + TIPC_ERR_NO_PORT)) + tipc_link_xmit_skb(net, skb, dnode, 0); + } + } + + tipc_sk_withdraw(tsk, 0, NULL); + probing_state = tsk->probing_state; + if (del_timer_sync(&sk->sk_timer) && + probing_state != TIPC_CONN_PROBING) + sock_put(sk); + tipc_sk_remove(tsk); + if (tsk->connected) { + skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, + TIPC_CONN_MSG, SHORT_H_SIZE, 0, dnode, + tsk_own_node(tsk), tsk_peer_port(tsk), + tsk->portid, TIPC_ERR_NO_PORT); + if (skb) + tipc_link_xmit_skb(net, skb, dnode, tsk->portid); + tipc_node_remove_conn(net, dnode, tsk->portid); + } + + /* Discard any remaining (connection-based) messages in receive queue */ + __skb_queue_purge(&sk->sk_receive_queue); + + /* Reject any messages that accumulated in backlog queue */ + sock->state = SS_DISCONNECTING; + release_sock(sk); + + call_rcu(&tsk->rcu, tipc_sk_callback); + sock->sk = NULL; + + return 0; +} + +/** + * tipc_bind - associate or disassocate TIPC name(s) with a socket + * @sock: socket structure + * @uaddr: socket address describing name(s) and desired operation + * @uaddr_len: size of socket address data structure + * + * Name and name sequence binding is indicated using a positive scope value; + * a negative scope value unbinds the specified name. Specifying no name + * (i.e. a socket address length of 0) unbinds all names from the socket. + * + * Returns 0 on success, errno otherwise + * + * NOTE: This routine doesn't need to take the socket lock since it doesn't + * access any non-constant socket information. + */ +static int tipc_bind(struct socket *sock, struct sockaddr *uaddr, + int uaddr_len) +{ + struct sock *sk = sock->sk; + struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr; + struct tipc_sock *tsk = tipc_sk(sk); + int res = -EINVAL; + + lock_sock(sk); + if (unlikely(!uaddr_len)) { + res = tipc_sk_withdraw(tsk, 0, NULL); + goto exit; + } + + if (uaddr_len < sizeof(struct sockaddr_tipc)) { + res = -EINVAL; + goto exit; + } + if (addr->family != AF_TIPC) { + res = -EAFNOSUPPORT; + goto exit; + } + + if (addr->addrtype == TIPC_ADDR_NAME) + addr->addr.nameseq.upper = addr->addr.nameseq.lower; + else if (addr->addrtype != TIPC_ADDR_NAMESEQ) { + res = -EAFNOSUPPORT; + goto exit; + } + + if ((addr->addr.nameseq.type < TIPC_RESERVED_TYPES) && + (addr->addr.nameseq.type != TIPC_TOP_SRV) && + (addr->addr.nameseq.type != TIPC_CFG_SRV)) { + res = -EACCES; + goto exit; + } + + res = (addr->scope > 0) ? + tipc_sk_publish(tsk, addr->scope, &addr->addr.nameseq) : + tipc_sk_withdraw(tsk, -addr->scope, &addr->addr.nameseq); +exit: + release_sock(sk); + return res; +} + +/** + * tipc_getname - get port ID of socket or peer socket + * @sock: socket structure + * @uaddr: area for returned socket address + * @uaddr_len: area for returned length of socket address + * @peer: 0 = own ID, 1 = current peer ID, 2 = current/former peer ID + * + * Returns 0 on success, errno otherwise + * + * NOTE: This routine doesn't need to take the socket lock since it only + * accesses socket information that is unchanging (or which changes in + * a completely predictable manner). + */ +static int tipc_getname(struct socket *sock, struct sockaddr *uaddr, + int *uaddr_len, int peer) +{ + struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr; + struct tipc_sock *tsk = tipc_sk(sock->sk); + struct tipc_net *tn = net_generic(sock_net(sock->sk), tipc_net_id); + + memset(addr, 0, sizeof(*addr)); + if (peer) { + if ((sock->state != SS_CONNECTED) && + ((peer != 2) || (sock->state != SS_DISCONNECTING))) + return -ENOTCONN; + addr->addr.id.ref = tsk_peer_port(tsk); + addr->addr.id.node = tsk_peer_node(tsk); + } else { + addr->addr.id.ref = tsk->portid; + addr->addr.id.node = tn->own_addr; + } + + *uaddr_len = sizeof(*addr); + addr->addrtype = TIPC_ADDR_ID; + addr->family = AF_TIPC; + addr->scope = 0; + addr->addr.name.domain = 0; + + return 0; +} + +/** + * tipc_poll - read and possibly block on pollmask + * @file: file structure associated with the socket + * @sock: socket for which to calculate the poll bits + * @wait: ??? + * + * Returns pollmask value + * + * COMMENTARY: + * It appears that the usual socket locking mechanisms are not useful here + * since the pollmask info is potentially out-of-date the moment this routine + * exits. TCP and other protocols seem to rely on higher level poll routines + * to handle any preventable race conditions, so TIPC will do the same ... + * + * TIPC sets the returned events as follows: + * + * socket state flags set + * ------------ --------- + * unconnected no read flags + * POLLOUT if port is not congested + * + * connecting POLLIN/POLLRDNORM if ACK/NACK in rx queue + * no write flags + * + * connected POLLIN/POLLRDNORM if data in rx queue + * POLLOUT if port is not congested + * + * disconnecting POLLIN/POLLRDNORM/POLLHUP + * no write flags + * + * listening POLLIN if SYN in rx queue + * no write flags + * + * ready POLLIN/POLLRDNORM if data in rx queue + * [connectionless] POLLOUT (since port cannot be congested) + * + * IMPORTANT: The fact that a read or write operation is indicated does NOT + * imply that the operation will succeed, merely that it should be performed + * and will not block. + */ +static unsigned int tipc_poll(struct file *file, struct socket *sock, + poll_table *wait) +{ + struct sock *sk = sock->sk; + struct tipc_sock *tsk = tipc_sk(sk); + u32 mask = 0; + + sock_poll_wait(file, sk_sleep(sk), wait); + + switch ((int)sock->state) { + case SS_UNCONNECTED: + if (!tsk->link_cong) + mask |= POLLOUT; + break; + case SS_READY: + case SS_CONNECTED: + if (!tsk->link_cong && !tsk_conn_cong(tsk)) + mask |= POLLOUT; + /* fall thru' */ + case SS_CONNECTING: + case SS_LISTENING: + if (!skb_queue_empty(&sk->sk_receive_queue)) + mask |= (POLLIN | POLLRDNORM); + break; + case SS_DISCONNECTING: + mask = (POLLIN | POLLRDNORM | POLLHUP); + break; + } + + return mask; +} + +/** + * tipc_sendmcast - send multicast message + * @sock: socket structure + * @seq: destination address + * @msg: message to send + * @dsz: total length of message data + * @timeo: timeout to wait for wakeup + * + * Called from function tipc_sendmsg(), which has done all sanity checks + * Returns the number of bytes sent on success, or errno + */ +static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq, + struct msghdr *msg, size_t dsz, long timeo) +{ + struct sock *sk = sock->sk; + struct tipc_sock *tsk = tipc_sk(sk); + struct net *net = sock_net(sk); + struct tipc_msg *mhdr = &tsk->phdr; + struct sk_buff_head *pktchain = &sk->sk_write_queue; + struct iov_iter save = msg->msg_iter; + uint mtu; + int rc; + + msg_set_type(mhdr, TIPC_MCAST_MSG); + msg_set_lookup_scope(mhdr, TIPC_CLUSTER_SCOPE); + msg_set_destport(mhdr, 0); + msg_set_destnode(mhdr, 0); + msg_set_nametype(mhdr, seq->type); + msg_set_namelower(mhdr, seq->lower); + msg_set_nameupper(mhdr, seq->upper); + msg_set_hdr_sz(mhdr, MCAST_H_SIZE); + +new_mtu: + mtu = tipc_bclink_get_mtu(); + rc = tipc_msg_build(mhdr, msg, 0, dsz, mtu, pktchain); + if (unlikely(rc < 0)) + return rc; + + do { + rc = tipc_bclink_xmit(net, pktchain); + if (likely(rc >= 0)) { + rc = dsz; + break; + } + if (rc == -EMSGSIZE) { + msg->msg_iter = save; + goto new_mtu; + } + if (rc != -ELINKCONG) + break; + tipc_sk(sk)->link_cong = 1; + rc = tipc_wait_for_sndmsg(sock, &timeo); + if (rc) + __skb_queue_purge(pktchain); + } while (!rc); + return rc; +} + +/** + * tipc_sk_mcast_rcv - Deliver multicast messages to all destination sockets + * @arrvq: queue with arriving messages, to be cloned after destination lookup + * @inputq: queue with cloned messages, delivered to socket after dest lookup + * + * Multi-threaded: parallel calls with reference to same queues may occur + */ +void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq, + struct sk_buff_head *inputq) +{ + struct tipc_msg *msg; + struct tipc_plist dports; + u32 portid; + u32 scope = TIPC_CLUSTER_SCOPE; + struct sk_buff_head tmpq; + uint hsz; + struct sk_buff *skb, *_skb; + + __skb_queue_head_init(&tmpq); + tipc_plist_init(&dports); + + skb = tipc_skb_peek(arrvq, &inputq->lock); + for (; skb; skb = tipc_skb_peek(arrvq, &inputq->lock)) { + msg = buf_msg(skb); + hsz = skb_headroom(skb) + msg_hdr_sz(msg); + + if (in_own_node(net, msg_orignode(msg))) + scope = TIPC_NODE_SCOPE; + + /* Create destination port list and message clones: */ + tipc_nametbl_mc_translate(net, + msg_nametype(msg), msg_namelower(msg), + msg_nameupper(msg), scope, &dports); + portid = tipc_plist_pop(&dports); + for (; portid; portid = tipc_plist_pop(&dports)) { + _skb = __pskb_copy(skb, hsz, GFP_ATOMIC); + if (_skb) { + msg_set_destport(buf_msg(_skb), portid); + __skb_queue_tail(&tmpq, _skb); + continue; + } + pr_warn("Failed to clone mcast rcv buffer\n"); + } + /* Append to inputq if not already done by other thread */ + spin_lock_bh(&inputq->lock); + if (skb_peek(arrvq) == skb) { + skb_queue_splice_tail_init(&tmpq, inputq); + kfree_skb(__skb_dequeue(arrvq)); + } + spin_unlock_bh(&inputq->lock); + __skb_queue_purge(&tmpq); + kfree_skb(skb); + } + tipc_sk_rcv(net, inputq); +} + +/** + * tipc_sk_proto_rcv - receive a connection mng protocol message + * @tsk: receiving socket + * @skb: pointer to message buffer. Set to NULL if buffer is consumed. + */ +static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff **skb) +{ + struct tipc_msg *msg = buf_msg(*skb); + int conn_cong; + u32 dnode; + u32 own_node = tsk_own_node(tsk); + /* Ignore if connection cannot be validated: */ + if (!tsk_peer_msg(tsk, msg)) + goto exit; + + tsk->probing_state = TIPC_CONN_OK; + + if (msg_type(msg) == CONN_ACK) { + conn_cong = tsk_conn_cong(tsk); + tsk->sent_unacked -= msg_msgcnt(msg); + if (conn_cong) + tsk->sk.sk_write_space(&tsk->sk); + } else if (msg_type(msg) == CONN_PROBE) { + if (tipc_msg_reverse(own_node, *skb, &dnode, TIPC_OK)) { + msg_set_type(msg, CONN_PROBE_REPLY); + return; + } + } + /* Do nothing if msg_type() == CONN_PROBE_REPLY */ +exit: + kfree_skb(*skb); + *skb = NULL; +} + +static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p) +{ + struct sock *sk = sock->sk; + struct tipc_sock *tsk = tipc_sk(sk); + DEFINE_WAIT(wait); + int done; + + do { + int err = sock_error(sk); + if (err) + return err; + if (sock->state == SS_DISCONNECTING) + return -EPIPE; + if (!*timeo_p) + return -EAGAIN; + if (signal_pending(current)) + return sock_intr_errno(*timeo_p); + + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + done = sk_wait_event(sk, timeo_p, !tsk->link_cong); + finish_wait(sk_sleep(sk), &wait); + } while (!done); + return 0; +} + +/** + * tipc_sendmsg - send message in connectionless manner + * @sock: socket structure + * @m: message to send + * @dsz: amount of user data to be sent + * + * Message must have an destination specified explicitly. + * Used for SOCK_RDM and SOCK_DGRAM messages, + * and for 'SYN' messages on SOCK_SEQPACKET and SOCK_STREAM connections. + * (Note: 'SYN+' is prohibited on SOCK_STREAM.) + * + * Returns the number of bytes sent on success, or errno otherwise + */ +static int tipc_sendmsg(struct socket *sock, + struct msghdr *m, size_t dsz) +{ + struct sock *sk = sock->sk; + int ret; + + lock_sock(sk); + ret = __tipc_sendmsg(sock, m, dsz); + release_sock(sk); + + return ret; +} + +static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz) +{ + DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); + struct sock *sk = sock->sk; + struct tipc_sock *tsk = tipc_sk(sk); + struct net *net = sock_net(sk); + struct tipc_msg *mhdr = &tsk->phdr; + u32 dnode, dport; + struct sk_buff_head *pktchain = &sk->sk_write_queue; + struct sk_buff *skb; + struct tipc_name_seq *seq; + struct iov_iter save; + u32 mtu; + long timeo; + int rc; + + if (dsz > TIPC_MAX_USER_MSG_SIZE) + return -EMSGSIZE; + if (unlikely(!dest)) { + if (tsk->connected && sock->state == SS_READY) + dest = &tsk->remote; + else + return -EDESTADDRREQ; + } else if (unlikely(m->msg_namelen < sizeof(*dest)) || + dest->family != AF_TIPC) { + return -EINVAL; + } + if (unlikely(sock->state != SS_READY)) { + if (sock->state == SS_LISTENING) + return -EPIPE; + if (sock->state != SS_UNCONNECTED) + return -EISCONN; + if (tsk->published) + return -EOPNOTSUPP; + if (dest->addrtype == TIPC_ADDR_NAME) { + tsk->conn_type = dest->addr.name.name.type; + tsk->conn_instance = dest->addr.name.name.instance; + } + } + seq = &dest->addr.nameseq; + timeo = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT); + + if (dest->addrtype == TIPC_ADDR_MCAST) { + return tipc_sendmcast(sock, seq, m, dsz, timeo); + } else if (dest->addrtype == TIPC_ADDR_NAME) { + u32 type = dest->addr.name.name.type; + u32 inst = dest->addr.name.name.instance; + u32 domain = dest->addr.name.domain; + + dnode = domain; + msg_set_type(mhdr, TIPC_NAMED_MSG); + msg_set_hdr_sz(mhdr, NAMED_H_SIZE); + msg_set_nametype(mhdr, type); + msg_set_nameinst(mhdr, inst); + msg_set_lookup_scope(mhdr, tipc_addr_scope(domain)); + dport = tipc_nametbl_translate(net, type, inst, &dnode); + msg_set_destnode(mhdr, dnode); + msg_set_destport(mhdr, dport); + if (unlikely(!dport && !dnode)) + return -EHOSTUNREACH; + } else if (dest->addrtype == TIPC_ADDR_ID) { + dnode = dest->addr.id.node; + msg_set_type(mhdr, TIPC_DIRECT_MSG); + msg_set_lookup_scope(mhdr, 0); + msg_set_destnode(mhdr, dnode); + msg_set_destport(mhdr, dest->addr.id.ref); + msg_set_hdr_sz(mhdr, BASIC_H_SIZE); + } + + save = m->msg_iter; +new_mtu: + mtu = tipc_node_get_mtu(net, dnode, tsk->portid); + rc = tipc_msg_build(mhdr, m, 0, dsz, mtu, pktchain); + if (rc < 0) + return rc; + + do { + skb = skb_peek(pktchain); + TIPC_SKB_CB(skb)->wakeup_pending = tsk->link_cong; + rc = tipc_link_xmit(net, pktchain, dnode, tsk->portid); + if (likely(rc >= 0)) { + if (sock->state != SS_READY) + sock->state = SS_CONNECTING; + rc = dsz; + break; + } + if (rc == -EMSGSIZE) { + m->msg_iter = save; + goto new_mtu; + } + if (rc != -ELINKCONG) + break; + tsk->link_cong = 1; + rc = tipc_wait_for_sndmsg(sock, &timeo); + if (rc) + __skb_queue_purge(pktchain); + } while (!rc); + + return rc; +} + +static int tipc_wait_for_sndpkt(struct socket *sock, long *timeo_p) +{ + struct sock *sk = sock->sk; + struct tipc_sock *tsk = tipc_sk(sk); + DEFINE_WAIT(wait); + int done; + + do { + int err = sock_error(sk); + if (err) + return err; + if (sock->state == SS_DISCONNECTING) + return -EPIPE; + else if (sock->state != SS_CONNECTED) + return -ENOTCONN; + if (!*timeo_p) + return -EAGAIN; + if (signal_pending(current)) + return sock_intr_errno(*timeo_p); + + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + done = sk_wait_event(sk, timeo_p, + (!tsk->link_cong && + !tsk_conn_cong(tsk)) || + !tsk->connected); + finish_wait(sk_sleep(sk), &wait); + } while (!done); + return 0; +} + +/** + * tipc_send_stream - send stream-oriented data + * @sock: socket structure + * @m: data to send + * @dsz: total length of data to be transmitted + * + * Used for SOCK_STREAM data. + * + * Returns the number of bytes sent on success (or partial success), + * or errno if no data sent + */ +static int tipc_send_stream(struct socket *sock, struct msghdr *m, size_t dsz) +{ + struct sock *sk = sock->sk; + int ret; + + lock_sock(sk); + ret = __tipc_send_stream(sock, m, dsz); + release_sock(sk); + + return ret; +} + +static int __tipc_send_stream(struct socket *sock, struct msghdr *m, size_t dsz) +{ + struct sock *sk = sock->sk; + struct net *net = sock_net(sk); + struct tipc_sock *tsk = tipc_sk(sk); + struct tipc_msg *mhdr = &tsk->phdr; + struct sk_buff_head *pktchain = &sk->sk_write_queue; + DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); + u32 portid = tsk->portid; + int rc = -EINVAL; + long timeo; + u32 dnode; + uint mtu, send, sent = 0; + struct iov_iter save; + + /* Handle implied connection establishment */ + if (unlikely(dest)) { + rc = __tipc_sendmsg(sock, m, dsz); + if (dsz && (dsz == rc)) + tsk->sent_unacked = 1; + return rc; + } + if (dsz > (uint)INT_MAX) + return -EMSGSIZE; + + if (unlikely(sock->state != SS_CONNECTED)) { + if (sock->state == SS_DISCONNECTING) + return -EPIPE; + else + return -ENOTCONN; + } + + timeo = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT); + dnode = tsk_peer_node(tsk); + +next: + save = m->msg_iter; + mtu = tsk->max_pkt; + send = min_t(uint, dsz - sent, TIPC_MAX_USER_MSG_SIZE); + rc = tipc_msg_build(mhdr, m, sent, send, mtu, pktchain); + if (unlikely(rc < 0)) + return rc; + do { + if (likely(!tsk_conn_cong(tsk))) { + rc = tipc_link_xmit(net, pktchain, dnode, portid); + if (likely(!rc)) { + tsk->sent_unacked++; + sent += send; + if (sent == dsz) + break; + goto next; + } + if (rc == -EMSGSIZE) { + tsk->max_pkt = tipc_node_get_mtu(net, dnode, + portid); + m->msg_iter = save; + goto next; + } + if (rc != -ELINKCONG) + break; + tsk->link_cong = 1; + } + rc = tipc_wait_for_sndpkt(sock, &timeo); + if (rc) + __skb_queue_purge(pktchain); + } while (!rc); + + return sent ? sent : rc; +} + +/** + * tipc_send_packet - send a connection-oriented message + * @sock: socket structure + * @m: message to send + * @dsz: length of data to be transmitted + * + * Used for SOCK_SEQPACKET messages. + * + * Returns the number of bytes sent on success, or errno otherwise + */ +static int tipc_send_packet(struct socket *sock, struct msghdr *m, size_t dsz) +{ + if (dsz > TIPC_MAX_USER_MSG_SIZE) + return -EMSGSIZE; + + return tipc_send_stream(sock, m, dsz); +} + +/* tipc_sk_finish_conn - complete the setup of a connection + */ +static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port, + u32 peer_node) +{ + struct sock *sk = &tsk->sk; + struct net *net = sock_net(sk); + struct tipc_msg *msg = &tsk->phdr; + + msg_set_destnode(msg, peer_node); + msg_set_destport(msg, peer_port); + msg_set_type(msg, TIPC_CONN_MSG); + msg_set_lookup_scope(msg, 0); + msg_set_hdr_sz(msg, SHORT_H_SIZE); + + tsk->probing_intv = CONN_PROBING_INTERVAL; + tsk->probing_state = TIPC_CONN_OK; + tsk->connected = 1; + sk_reset_timer(sk, &sk->sk_timer, jiffies + tsk->probing_intv); + tipc_node_add_conn(net, peer_node, tsk->portid, peer_port); + tsk->max_pkt = tipc_node_get_mtu(net, peer_node, tsk->portid); +} + +/** + * set_orig_addr - capture sender's address for received message + * @m: descriptor for message info + * @msg: received message header + * + * Note: Address is not captured if not requested by receiver. + */ +static void set_orig_addr(struct msghdr *m, struct tipc_msg *msg) +{ + DECLARE_SOCKADDR(struct sockaddr_tipc *, addr, m->msg_name); + + if (addr) { + addr->family = AF_TIPC; + addr->addrtype = TIPC_ADDR_ID; + memset(&addr->addr, 0, sizeof(addr->addr)); + addr->addr.id.ref = msg_origport(msg); + addr->addr.id.node = msg_orignode(msg); + addr->addr.name.domain = 0; /* could leave uninitialized */ + addr->scope = 0; /* could leave uninitialized */ + m->msg_namelen = sizeof(struct sockaddr_tipc); + } +} + +/** + * tipc_sk_anc_data_recv - optionally capture ancillary data for received message + * @m: descriptor for message info + * @msg: received message header + * @tsk: TIPC port associated with message + * + * Note: Ancillary data is not captured if not requested by receiver. + * + * Returns 0 if successful, otherwise errno + */ +static int tipc_sk_anc_data_recv(struct msghdr *m, struct tipc_msg *msg, + struct tipc_sock *tsk) +{ + u32 anc_data[3]; + u32 err; + u32 dest_type; + int has_name; + int res; + + if (likely(m->msg_controllen == 0)) + return 0; + + /* Optionally capture errored message object(s) */ + err = msg ? msg_errcode(msg) : 0; + if (unlikely(err)) { + anc_data[0] = err; + anc_data[1] = msg_data_sz(msg); + res = put_cmsg(m, SOL_TIPC, TIPC_ERRINFO, 8, anc_data); + if (res) + return res; + if (anc_data[1]) { + res = put_cmsg(m, SOL_TIPC, TIPC_RETDATA, anc_data[1], + msg_data(msg)); + if (res) + return res; + } + } + + /* Optionally capture message destination object */ + dest_type = msg ? msg_type(msg) : TIPC_DIRECT_MSG; + switch (dest_type) { + case TIPC_NAMED_MSG: + has_name = 1; + anc_data[0] = msg_nametype(msg); + anc_data[1] = msg_namelower(msg); + anc_data[2] = msg_namelower(msg); + break; + case TIPC_MCAST_MSG: + has_name = 1; + anc_data[0] = msg_nametype(msg); + anc_data[1] = msg_namelower(msg); + anc_data[2] = msg_nameupper(msg); + break; + case TIPC_CONN_MSG: + has_name = (tsk->conn_type != 0); + anc_data[0] = tsk->conn_type; + anc_data[1] = tsk->conn_instance; + anc_data[2] = tsk->conn_instance; + break; + default: + has_name = 0; + } + if (has_name) { + res = put_cmsg(m, SOL_TIPC, TIPC_DESTNAME, 12, anc_data); + if (res) + return res; + } + + return 0; +} + +static void tipc_sk_send_ack(struct tipc_sock *tsk, uint ack) +{ + struct net *net = sock_net(&tsk->sk); + struct sk_buff *skb = NULL; + struct tipc_msg *msg; + u32 peer_port = tsk_peer_port(tsk); + u32 dnode = tsk_peer_node(tsk); + + if (!tsk->connected) + return; + skb = tipc_msg_create(CONN_MANAGER, CONN_ACK, INT_H_SIZE, 0, + dnode, tsk_own_node(tsk), peer_port, + tsk->portid, TIPC_OK); + if (!skb) + return; + msg = buf_msg(skb); + msg_set_msgcnt(msg, ack); + tipc_link_xmit_skb(net, skb, dnode, msg_link_selector(msg)); +} + +static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop) +{ + struct sock *sk = sock->sk; + DEFINE_WAIT(wait); + long timeo = *timeop; + int err; + + for (;;) { + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + if (timeo && skb_queue_empty(&sk->sk_receive_queue)) { + if (sock->state == SS_DISCONNECTING) { + err = -ENOTCONN; + break; + } + release_sock(sk); + timeo = schedule_timeout(timeo); + lock_sock(sk); + } + err = 0; + if (!skb_queue_empty(&sk->sk_receive_queue)) + break; + err = -EAGAIN; + if (!timeo) + break; + err = sock_intr_errno(timeo); + if (signal_pending(current)) + break; + } + finish_wait(sk_sleep(sk), &wait); + *timeop = timeo; + return err; +} + +/** + * tipc_recvmsg - receive packet-oriented message + * @m: descriptor for message info + * @buf_len: total size of user buffer area + * @flags: receive flags + * + * Used for SOCK_DGRAM, SOCK_RDM, and SOCK_SEQPACKET messages. + * If the complete message doesn't fit in user area, truncate it. + * + * Returns size of returned message data, errno otherwise + */ +static int tipc_recvmsg(struct socket *sock, struct msghdr *m, size_t buf_len, + int flags) +{ + struct sock *sk = sock->sk; + struct tipc_sock *tsk = tipc_sk(sk); + struct sk_buff *buf; + struct tipc_msg *msg; + long timeo; + unsigned int sz; + u32 err; + int res; + + /* Catch invalid receive requests */ + if (unlikely(!buf_len)) + return -EINVAL; + + lock_sock(sk); + + if (unlikely(sock->state == SS_UNCONNECTED)) { + res = -ENOTCONN; + goto exit; + } + + timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); +restart: + + /* Look for a message in receive queue; wait if necessary */ + res = tipc_wait_for_rcvmsg(sock, &timeo); + if (res) + goto exit; + + /* Look at first message in receive queue */ + buf = skb_peek(&sk->sk_receive_queue); + msg = buf_msg(buf); + sz = msg_data_sz(msg); + err = msg_errcode(msg); + + /* Discard an empty non-errored message & try again */ + if ((!sz) && (!err)) { + tsk_advance_rx_queue(sk); + goto restart; + } + + /* Capture sender's address (optional) */ + set_orig_addr(m, msg); + + /* Capture ancillary data (optional) */ + res = tipc_sk_anc_data_recv(m, msg, tsk); + if (res) + goto exit; + + /* Capture message data (if valid) & compute return value (always) */ + if (!err) { + if (unlikely(buf_len < sz)) { + sz = buf_len; + m->msg_flags |= MSG_TRUNC; + } + res = skb_copy_datagram_msg(buf, msg_hdr_sz(msg), m, sz); + if (res) + goto exit; + res = sz; + } else { + if ((sock->state == SS_READY) || + ((err == TIPC_CONN_SHUTDOWN) || m->msg_control)) + res = 0; + else + res = -ECONNRESET; + } + + /* Consume received message (optional) */ + if (likely(!(flags & MSG_PEEK))) { + if ((sock->state != SS_READY) && + (++tsk->rcv_unacked >= TIPC_CONNACK_INTV)) { + tipc_sk_send_ack(tsk, tsk->rcv_unacked); + tsk->rcv_unacked = 0; + } + tsk_advance_rx_queue(sk); + } +exit: + release_sock(sk); + return res; +} + +/** + * tipc_recv_stream - receive stream-oriented data + * @m: descriptor for message info + * @buf_len: total size of user buffer area + * @flags: receive flags + * + * Used for SOCK_STREAM messages only. If not enough data is available + * will optionally wait for more; never truncates data. + * + * Returns size of returned message data, errno otherwise + */ +static int tipc_recv_stream(struct socket *sock, struct msghdr *m, + size_t buf_len, int flags) +{ + struct sock *sk = sock->sk; + struct tipc_sock *tsk = tipc_sk(sk); + struct sk_buff *buf; + struct tipc_msg *msg; + long timeo; + unsigned int sz; + int sz_to_copy, target, needed; + int sz_copied = 0; + u32 err; + int res = 0; + + /* Catch invalid receive attempts */ + if (unlikely(!buf_len)) + return -EINVAL; + + lock_sock(sk); + + if (unlikely(sock->state == SS_UNCONNECTED)) { + res = -ENOTCONN; + goto exit; + } + + target = sock_rcvlowat(sk, flags & MSG_WAITALL, buf_len); + timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); + +restart: + /* Look for a message in receive queue; wait if necessary */ + res = tipc_wait_for_rcvmsg(sock, &timeo); + if (res) + goto exit; + + /* Look at first message in receive queue */ + buf = skb_peek(&sk->sk_receive_queue); + msg = buf_msg(buf); + sz = msg_data_sz(msg); + err = msg_errcode(msg); + + /* Discard an empty non-errored message & try again */ + if ((!sz) && (!err)) { + tsk_advance_rx_queue(sk); + goto restart; + } + + /* Optionally capture sender's address & ancillary data of first msg */ + if (sz_copied == 0) { + set_orig_addr(m, msg); + res = tipc_sk_anc_data_recv(m, msg, tsk); + if (res) + goto exit; + } + + /* Capture message data (if valid) & compute return value (always) */ + if (!err) { + u32 offset = (u32)(unsigned long)(TIPC_SKB_CB(buf)->handle); + + sz -= offset; + needed = (buf_len - sz_copied); + sz_to_copy = (sz <= needed) ? sz : needed; + + res = skb_copy_datagram_msg(buf, msg_hdr_sz(msg) + offset, + m, sz_to_copy); + if (res) + goto exit; + + sz_copied += sz_to_copy; + + if (sz_to_copy < sz) { + if (!(flags & MSG_PEEK)) + TIPC_SKB_CB(buf)->handle = + (void *)(unsigned long)(offset + sz_to_copy); + goto exit; + } + } else { + if (sz_copied != 0) + goto exit; /* can't add error msg to valid data */ + + if ((err == TIPC_CONN_SHUTDOWN) || m->msg_control) + res = 0; + else + res = -ECONNRESET; + } + + /* Consume received message (optional) */ + if (likely(!(flags & MSG_PEEK))) { + if (unlikely(++tsk->rcv_unacked >= TIPC_CONNACK_INTV)) { + tipc_sk_send_ack(tsk, tsk->rcv_unacked); + tsk->rcv_unacked = 0; + } + tsk_advance_rx_queue(sk); + } + + /* Loop around if more data is required */ + if ((sz_copied < buf_len) && /* didn't get all requested data */ + (!skb_queue_empty(&sk->sk_receive_queue) || + (sz_copied < target)) && /* and more is ready or required */ + (!(flags & MSG_PEEK)) && /* and aren't just peeking at data */ + (!err)) /* and haven't reached a FIN */ + goto restart; + +exit: + release_sock(sk); + return sz_copied ? sz_copied : res; +} + +/** + * tipc_write_space - wake up thread if port congestion is released + * @sk: socket + */ +static void tipc_write_space(struct sock *sk) +{ + struct socket_wq *wq; + + rcu_read_lock(); + wq = rcu_dereference(sk->sk_wq); + if (wq_has_sleeper(wq)) + wake_up_interruptible_sync_poll(&wq->wait, POLLOUT | + POLLWRNORM | POLLWRBAND); + rcu_read_unlock(); +} + +/** + * tipc_data_ready - wake up threads to indicate messages have been received + * @sk: socket + * @len: the length of messages + */ +static void tipc_data_ready(struct sock *sk) +{ + struct socket_wq *wq; + + rcu_read_lock(); + wq = rcu_dereference(sk->sk_wq); + if (wq_has_sleeper(wq)) + wake_up_interruptible_sync_poll(&wq->wait, POLLIN | + POLLRDNORM | POLLRDBAND); + rcu_read_unlock(); +} + +/** + * filter_connect - Handle all incoming messages for a connection-based socket + * @tsk: TIPC socket + * @skb: pointer to message buffer. Set to NULL if buffer is consumed + * + * Returns 0 (TIPC_OK) if everything ok, -TIPC_ERR_NO_PORT otherwise + */ +static int filter_connect(struct tipc_sock *tsk, struct sk_buff **skb) +{ + struct sock *sk = &tsk->sk; + struct net *net = sock_net(sk); + struct socket *sock = sk->sk_socket; + struct tipc_msg *msg = buf_msg(*skb); + int retval = -TIPC_ERR_NO_PORT; + + if (msg_mcast(msg)) + return retval; + + switch ((int)sock->state) { + case SS_CONNECTED: + /* Accept only connection-based messages sent by peer */ + if (tsk_peer_msg(tsk, msg)) { + if (unlikely(msg_errcode(msg))) { + sock->state = SS_DISCONNECTING; + tsk->connected = 0; + /* let timer expire on it's own */ + tipc_node_remove_conn(net, tsk_peer_node(tsk), + tsk->portid); + } + retval = TIPC_OK; + } + break; + case SS_CONNECTING: + /* Accept only ACK or NACK message */ + + if (unlikely(!msg_connected(msg))) + break; + + if (unlikely(msg_errcode(msg))) { + sock->state = SS_DISCONNECTING; + sk->sk_err = ECONNREFUSED; + retval = TIPC_OK; + break; + } + + if (unlikely(msg_importance(msg) > TIPC_CRITICAL_IMPORTANCE)) { + sock->state = SS_DISCONNECTING; + sk->sk_err = EINVAL; + retval = TIPC_OK; + break; + } + + tipc_sk_finish_conn(tsk, msg_origport(msg), msg_orignode(msg)); + msg_set_importance(&tsk->phdr, msg_importance(msg)); + sock->state = SS_CONNECTED; + + /* If an incoming message is an 'ACK-', it should be + * discarded here because it doesn't contain useful + * data. In addition, we should try to wake up + * connect() routine if sleeping. + */ + if (msg_data_sz(msg) == 0) { + kfree_skb(*skb); + *skb = NULL; + if (waitqueue_active(sk_sleep(sk))) + wake_up_interruptible(sk_sleep(sk)); + } + retval = TIPC_OK; + break; + case SS_LISTENING: + case SS_UNCONNECTED: + /* Accept only SYN message */ + if (!msg_connected(msg) && !(msg_errcode(msg))) + retval = TIPC_OK; + break; + case SS_DISCONNECTING: + break; + default: + pr_err("Unknown socket state %u\n", sock->state); + } + return retval; +} + +/** + * rcvbuf_limit - get proper overload limit of socket receive queue + * @sk: socket + * @buf: message + * + * For all connection oriented messages, irrespective of importance, + * the default overload value (i.e. 67MB) is set as limit. + * + * For all connectionless messages, by default new queue limits are + * as belows: + * + * TIPC_LOW_IMPORTANCE (4 MB) + * TIPC_MEDIUM_IMPORTANCE (8 MB) + * TIPC_HIGH_IMPORTANCE (16 MB) + * TIPC_CRITICAL_IMPORTANCE (32 MB) + * + * Returns overload limit according to corresponding message importance + */ +static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *buf) +{ + struct tipc_msg *msg = buf_msg(buf); + + if (msg_connected(msg)) + return sysctl_tipc_rmem[2]; + + return sk->sk_rcvbuf >> TIPC_CRITICAL_IMPORTANCE << + msg_importance(msg); +} + +/** + * filter_rcv - validate incoming message + * @sk: socket + * @skb: pointer to message. Set to NULL if buffer is consumed. + * + * Enqueues message on receive queue if acceptable; optionally handles + * disconnect indication for a connected socket. + * + * Called with socket lock already taken + * + * Returns 0 (TIPC_OK) if message was ok, -TIPC error code if rejected + */ +static int filter_rcv(struct sock *sk, struct sk_buff **skb) +{ + struct socket *sock = sk->sk_socket; + struct tipc_sock *tsk = tipc_sk(sk); + struct tipc_msg *msg = buf_msg(*skb); + unsigned int limit = rcvbuf_limit(sk, *skb); + int rc = TIPC_OK; + + if (unlikely(msg_user(msg) == CONN_MANAGER)) { + tipc_sk_proto_rcv(tsk, skb); + return TIPC_OK; + } + + if (unlikely(msg_user(msg) == SOCK_WAKEUP)) { + kfree_skb(*skb); + tsk->link_cong = 0; + sk->sk_write_space(sk); + *skb = NULL; + return TIPC_OK; + } + + /* Reject message if it is wrong sort of message for socket */ + if (msg_type(msg) > TIPC_DIRECT_MSG) + return -TIPC_ERR_NO_PORT; + + if (sock->state == SS_READY) { + if (msg_connected(msg)) + return -TIPC_ERR_NO_PORT; + } else { + rc = filter_connect(tsk, skb); + if (rc != TIPC_OK || !*skb) + return rc; + } + + /* Reject message if there isn't room to queue it */ + if (sk_rmem_alloc_get(sk) + (*skb)->truesize >= limit) + return -TIPC_ERR_OVERLOAD; + + /* Enqueue message */ + TIPC_SKB_CB(*skb)->handle = NULL; + __skb_queue_tail(&sk->sk_receive_queue, *skb); + skb_set_owner_r(*skb, sk); + + sk->sk_data_ready(sk); + *skb = NULL; + return TIPC_OK; +} + +/** + * tipc_backlog_rcv - handle incoming message from backlog queue + * @sk: socket + * @skb: message + * + * Caller must hold socket lock + * + * Returns 0 + */ +static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb) +{ + int err; + atomic_t *dcnt; + u32 dnode; + struct tipc_sock *tsk = tipc_sk(sk); + struct net *net = sock_net(sk); + uint truesize = skb->truesize; + + err = filter_rcv(sk, &skb); + if (likely(!skb)) { + dcnt = &tsk->dupl_rcvcnt; + if (atomic_read(dcnt) < TIPC_CONN_OVERLOAD_LIMIT) + atomic_add(truesize, dcnt); + return 0; + } + if (!err || tipc_msg_reverse(tsk_own_node(tsk), skb, &dnode, -err)) + tipc_link_xmit_skb(net, skb, dnode, tsk->portid); + return 0; +} + +/** + * tipc_sk_enqueue - extract all buffers with destination 'dport' from + * inputq and try adding them to socket or backlog queue + * @inputq: list of incoming buffers with potentially different destinations + * @sk: socket where the buffers should be enqueued + * @dport: port number for the socket + * @_skb: returned buffer to be forwarded or rejected, if applicable + * + * Caller must hold socket lock + * + * Returns TIPC_OK if all buffers enqueued, otherwise -TIPC_ERR_OVERLOAD + * or -TIPC_ERR_NO_PORT + */ +static int tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk, + u32 dport, struct sk_buff **_skb) +{ + unsigned int lim; + atomic_t *dcnt; + int err; + struct sk_buff *skb; + unsigned long time_limit = jiffies + 2; + + while (skb_queue_len(inputq)) { + if (unlikely(time_after_eq(jiffies, time_limit))) + return TIPC_OK; + skb = tipc_skb_dequeue(inputq, dport); + if (unlikely(!skb)) + return TIPC_OK; + if (!sock_owned_by_user(sk)) { + err = filter_rcv(sk, &skb); + if (likely(!skb)) + continue; + *_skb = skb; + return err; + } + dcnt = &tipc_sk(sk)->dupl_rcvcnt; + if (sk->sk_backlog.len) + atomic_set(dcnt, 0); + lim = rcvbuf_limit(sk, skb) + atomic_read(dcnt); + if (likely(!sk_add_backlog(sk, skb, lim))) + continue; + *_skb = skb; + return -TIPC_ERR_OVERLOAD; + } + return TIPC_OK; +} + +/** + * tipc_sk_rcv - handle a chain of incoming buffers + * @inputq: buffer list containing the buffers + * Consumes all buffers in list until inputq is empty + * Note: may be called in multiple threads referring to the same queue + * Returns 0 if last buffer was accepted, otherwise -EHOSTUNREACH + * Only node local calls check the return value, sending single-buffer queues + */ +int tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq) +{ + u32 dnode, dport = 0; + int err; + struct sk_buff *skb; + struct tipc_sock *tsk; + struct tipc_net *tn; + struct sock *sk; + + while (skb_queue_len(inputq)) { + err = -TIPC_ERR_NO_PORT; + skb = NULL; + dport = tipc_skb_peek_port(inputq, dport); + tsk = tipc_sk_lookup(net, dport); + if (likely(tsk)) { + sk = &tsk->sk; + if (likely(spin_trylock_bh(&sk->sk_lock.slock))) { + err = tipc_sk_enqueue(inputq, sk, dport, &skb); + spin_unlock_bh(&sk->sk_lock.slock); + dport = 0; + } + sock_put(sk); + } else { + skb = tipc_skb_dequeue(inputq, dport); + } + if (likely(!skb)) + continue; + if (tipc_msg_lookup_dest(net, skb, &dnode, &err)) + goto xmit; + if (!err) { + dnode = msg_destnode(buf_msg(skb)); + goto xmit; + } + tn = net_generic(net, tipc_net_id); + if (!tipc_msg_reverse(tn->own_addr, skb, &dnode, -err)) + continue; +xmit: + tipc_link_xmit_skb(net, skb, dnode, dport); + } + return err ? -EHOSTUNREACH : 0; +} + +static int tipc_wait_for_connect(struct socket *sock, long *timeo_p) +{ + struct sock *sk = sock->sk; + DEFINE_WAIT(wait); + int done; + + do { + int err = sock_error(sk); + if (err) + return err; + if (!*timeo_p) + return -ETIMEDOUT; + if (signal_pending(current)) + return sock_intr_errno(*timeo_p); + + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + done = sk_wait_event(sk, timeo_p, sock->state != SS_CONNECTING); + finish_wait(sk_sleep(sk), &wait); + } while (!done); + return 0; +} + +/** + * tipc_connect - establish a connection to another TIPC port + * @sock: socket structure + * @dest: socket address for destination port + * @destlen: size of socket address data structure + * @flags: file-related flags associated with socket + * + * Returns 0 on success, errno otherwise + */ +static int tipc_connect(struct socket *sock, struct sockaddr *dest, + int destlen, int flags) +{ + struct sock *sk = sock->sk; + struct tipc_sock *tsk = tipc_sk(sk); + struct sockaddr_tipc *dst = (struct sockaddr_tipc *)dest; + struct msghdr m = {NULL,}; + long timeout = (flags & O_NONBLOCK) ? 0 : tsk->conn_timeout; + socket_state previous; + int res = 0; + + lock_sock(sk); + + /* DGRAM/RDM connect(), just save the destaddr */ + if (sock->state == SS_READY) { + if (dst->family == AF_UNSPEC) { + memset(&tsk->remote, 0, sizeof(struct sockaddr_tipc)); + tsk->connected = 0; + } else if (destlen != sizeof(struct sockaddr_tipc)) { + res = -EINVAL; + } else { + memcpy(&tsk->remote, dest, destlen); + tsk->connected = 1; + } + goto exit; + } + + /* + * Reject connection attempt using multicast address + * + * Note: send_msg() validates the rest of the address fields, + * so there's no need to do it here + */ + if (dst->addrtype == TIPC_ADDR_MCAST) { + res = -EINVAL; + goto exit; + } + + previous = sock->state; + switch (sock->state) { + case SS_UNCONNECTED: + /* Send a 'SYN-' to destination */ + m.msg_name = dest; + m.msg_namelen = destlen; + + /* If connect is in non-blocking case, set MSG_DONTWAIT to + * indicate send_msg() is never blocked. + */ + if (!timeout) + m.msg_flags = MSG_DONTWAIT; + + res = __tipc_sendmsg(sock, &m, 0); + if ((res < 0) && (res != -EWOULDBLOCK)) + goto exit; + + /* Just entered SS_CONNECTING state; the only + * difference is that return value in non-blocking + * case is EINPROGRESS, rather than EALREADY. + */ + res = -EINPROGRESS; + case SS_CONNECTING: + if (previous == SS_CONNECTING) + res = -EALREADY; + if (!timeout) + goto exit; + timeout = msecs_to_jiffies(timeout); + /* Wait until an 'ACK' or 'RST' arrives, or a timeout occurs */ + res = tipc_wait_for_connect(sock, &timeout); + break; + case SS_CONNECTED: + res = -EISCONN; + break; + default: + res = -EINVAL; + break; + } +exit: + release_sock(sk); + return res; +} + +/** + * tipc_listen - allow socket to listen for incoming connections + * @sock: socket structure + * @len: (unused) + * + * Returns 0 on success, errno otherwise + */ +static int tipc_listen(struct socket *sock, int len) +{ + struct sock *sk = sock->sk; + int res; + + lock_sock(sk); + + if (sock->state != SS_UNCONNECTED) + res = -EINVAL; + else { + sock->state = SS_LISTENING; + res = 0; + } + + release_sock(sk); + return res; +} + +static int tipc_wait_for_accept(struct socket *sock, long timeo) +{ + struct sock *sk = sock->sk; + DEFINE_WAIT(wait); + int err; + + /* True wake-one mechanism for incoming connections: only + * one process gets woken up, not the 'whole herd'. + * Since we do not 'race & poll' for established sockets + * anymore, the common case will execute the loop only once. + */ + for (;;) { + prepare_to_wait_exclusive(sk_sleep(sk), &wait, + TASK_INTERRUPTIBLE); + if (timeo && skb_queue_empty(&sk->sk_receive_queue)) { + release_sock(sk); + timeo = schedule_timeout(timeo); + lock_sock(sk); + } + err = 0; + if (!skb_queue_empty(&sk->sk_receive_queue)) + break; + err = -EINVAL; + if (sock->state != SS_LISTENING) + break; + err = -EAGAIN; + if (!timeo) + break; + err = sock_intr_errno(timeo); + if (signal_pending(current)) + break; + } + finish_wait(sk_sleep(sk), &wait); + return err; +} + +/** + * tipc_accept - wait for connection request + * @sock: listening socket + * @newsock: new socket that is to be connected + * @flags: file-related flags associated with socket + * + * Returns 0 on success, errno otherwise + */ +static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags) +{ + struct sock *new_sk, *sk = sock->sk; + struct sk_buff *buf; + struct tipc_sock *new_tsock; + struct tipc_msg *msg; + long timeo; + int res; + + lock_sock(sk); + + if (sock->state != SS_LISTENING) { + res = -EINVAL; + goto exit; + } + timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); + res = tipc_wait_for_accept(sock, timeo); + if (res) + goto exit; + + buf = skb_peek(&sk->sk_receive_queue); + + res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, 1); + if (res) + goto exit; + + new_sk = new_sock->sk; + new_tsock = tipc_sk(new_sk); + msg = buf_msg(buf); + + /* we lock on new_sk; but lockdep sees the lock on sk */ + lock_sock_nested(new_sk, SINGLE_DEPTH_NESTING); + + /* + * Reject any stray messages received by new socket + * before the socket lock was taken (very, very unlikely) + */ + tsk_rej_rx_queue(new_sk); + + /* Connect new socket to it's peer */ + tipc_sk_finish_conn(new_tsock, msg_origport(msg), msg_orignode(msg)); + new_sock->state = SS_CONNECTED; + + tsk_set_importance(new_tsock, msg_importance(msg)); + if (msg_named(msg)) { + new_tsock->conn_type = msg_nametype(msg); + new_tsock->conn_instance = msg_nameinst(msg); + } + + /* + * Respond to 'SYN-' by discarding it & returning 'ACK'-. + * Respond to 'SYN+' by queuing it on new socket. + */ + if (!msg_data_sz(msg)) { + struct msghdr m = {NULL,}; + + tsk_advance_rx_queue(sk); + __tipc_send_stream(new_sock, &m, 0); + } else { + __skb_dequeue(&sk->sk_receive_queue); + __skb_queue_head(&new_sk->sk_receive_queue, buf); + skb_set_owner_r(buf, new_sk); + } + release_sock(new_sk); +exit: + release_sock(sk); + return res; +} + +/** + * tipc_shutdown - shutdown socket connection + * @sock: socket structure + * @how: direction to close (must be SHUT_RDWR) + * + * Terminates connection (if necessary), then purges socket's receive queue. + * + * Returns 0 on success, errno otherwise + */ +static int tipc_shutdown(struct socket *sock, int how) +{ + struct sock *sk = sock->sk; + struct net *net = sock_net(sk); + struct tipc_sock *tsk = tipc_sk(sk); + struct sk_buff *skb; + u32 dnode; + int res; + + if (how != SHUT_RDWR) + return -EINVAL; + + lock_sock(sk); + + switch (sock->state) { + case SS_CONNECTING: + case SS_CONNECTED: + +restart: + /* Disconnect and send a 'FIN+' or 'FIN-' message to peer */ + skb = __skb_dequeue(&sk->sk_receive_queue); + if (skb) { + if (TIPC_SKB_CB(skb)->handle != NULL) { + kfree_skb(skb); + goto restart; + } + if (tipc_msg_reverse(tsk_own_node(tsk), skb, &dnode, + TIPC_CONN_SHUTDOWN)) + tipc_link_xmit_skb(net, skb, dnode, + tsk->portid); + } else { + dnode = tsk_peer_node(tsk); + + skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, + TIPC_CONN_MSG, SHORT_H_SIZE, + 0, dnode, tsk_own_node(tsk), + tsk_peer_port(tsk), + tsk->portid, TIPC_CONN_SHUTDOWN); + tipc_link_xmit_skb(net, skb, dnode, tsk->portid); + } + tsk->connected = 0; + sock->state = SS_DISCONNECTING; + tipc_node_remove_conn(net, dnode, tsk->portid); + /* fall through */ + + case SS_DISCONNECTING: + + /* Discard any unreceived messages */ + __skb_queue_purge(&sk->sk_receive_queue); + + /* Wake up anyone sleeping in poll */ + sk->sk_state_change(sk); + res = 0; + break; + + default: + res = -ENOTCONN; + } + + release_sock(sk); + return res; +} + +static void tipc_sk_timeout(unsigned long data) +{ + struct tipc_sock *tsk = (struct tipc_sock *)data; + struct sock *sk = &tsk->sk; + struct sk_buff *skb = NULL; + u32 peer_port, peer_node; + u32 own_node = tsk_own_node(tsk); + + bh_lock_sock(sk); + if (!tsk->connected) { + bh_unlock_sock(sk); + goto exit; + } + peer_port = tsk_peer_port(tsk); + peer_node = tsk_peer_node(tsk); + + if (tsk->probing_state == TIPC_CONN_PROBING) { + if (!sock_owned_by_user(sk)) { + sk->sk_socket->state = SS_DISCONNECTING; + tsk->connected = 0; + tipc_node_remove_conn(sock_net(sk), tsk_peer_node(tsk), + tsk_peer_port(tsk)); + sk->sk_state_change(sk); + } else { + /* Try again later */ + sk_reset_timer(sk, &sk->sk_timer, (HZ / 20)); + } + + } else { + skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE, + INT_H_SIZE, 0, peer_node, own_node, + peer_port, tsk->portid, TIPC_OK); + tsk->probing_state = TIPC_CONN_PROBING; + sk_reset_timer(sk, &sk->sk_timer, jiffies + tsk->probing_intv); + } + bh_unlock_sock(sk); + if (skb) + tipc_link_xmit_skb(sock_net(sk), skb, peer_node, tsk->portid); +exit: + sock_put(sk); +} + +static int tipc_sk_publish(struct tipc_sock *tsk, uint scope, + struct tipc_name_seq const *seq) +{ + struct net *net = sock_net(&tsk->sk); + struct publication *publ; + u32 key; + + if (tsk->connected) + return -EINVAL; + key = tsk->portid + tsk->pub_count + 1; + if (key == tsk->portid) + return -EADDRINUSE; + + publ = tipc_nametbl_publish(net, seq->type, seq->lower, seq->upper, + scope, tsk->portid, key); + if (unlikely(!publ)) + return -EINVAL; + + list_add(&publ->pport_list, &tsk->publications); + tsk->pub_count++; + tsk->published = 1; + return 0; +} + +static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope, + struct tipc_name_seq const *seq) +{ + struct net *net = sock_net(&tsk->sk); + struct publication *publ; + struct publication *safe; + int rc = -EINVAL; + + list_for_each_entry_safe(publ, safe, &tsk->publications, pport_list) { + if (seq) { + if (publ->scope != scope) + continue; + if (publ->type != seq->type) + continue; + if (publ->lower != seq->lower) + continue; + if (publ->upper != seq->upper) + break; + tipc_nametbl_withdraw(net, publ->type, publ->lower, + publ->ref, publ->key); + rc = 0; + break; + } + tipc_nametbl_withdraw(net, publ->type, publ->lower, + publ->ref, publ->key); + rc = 0; + } + if (list_empty(&tsk->publications)) + tsk->published = 0; + return rc; +} + +/* tipc_sk_reinit: set non-zero address in all existing sockets + * when we go from standalone to network mode. + */ +void tipc_sk_reinit(struct net *net) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + const struct bucket_table *tbl; + struct rhash_head *pos; + struct tipc_sock *tsk; + struct tipc_msg *msg; + int i; + + rcu_read_lock(); + tbl = rht_dereference_rcu((&tn->sk_rht)->tbl, &tn->sk_rht); + for (i = 0; i < tbl->size; i++) { + rht_for_each_entry_rcu(tsk, pos, tbl, i, node) { + spin_lock_bh(&tsk->sk.sk_lock.slock); + msg = &tsk->phdr; + msg_set_prevnode(msg, tn->own_addr); + msg_set_orignode(msg, tn->own_addr); + spin_unlock_bh(&tsk->sk.sk_lock.slock); + } + } + rcu_read_unlock(); +} + +static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_sock *tsk; + + rcu_read_lock(); + tsk = rhashtable_lookup_fast(&tn->sk_rht, &portid, tsk_rht_params); + if (tsk) + sock_hold(&tsk->sk); + rcu_read_unlock(); + + return tsk; +} + +static int tipc_sk_insert(struct tipc_sock *tsk) +{ + struct sock *sk = &tsk->sk; + struct net *net = sock_net(sk); + struct tipc_net *tn = net_generic(net, tipc_net_id); + u32 remaining = (TIPC_MAX_PORT - TIPC_MIN_PORT) + 1; + u32 portid = prandom_u32() % remaining + TIPC_MIN_PORT; + + while (remaining--) { + portid++; + if ((portid < TIPC_MIN_PORT) || (portid > TIPC_MAX_PORT)) + portid = TIPC_MIN_PORT; + tsk->portid = portid; + sock_hold(&tsk->sk); + if (!rhashtable_lookup_insert_fast(&tn->sk_rht, &tsk->node, + tsk_rht_params)) + return 0; + sock_put(&tsk->sk); + } + + return -1; +} + +static void tipc_sk_remove(struct tipc_sock *tsk) +{ + struct sock *sk = &tsk->sk; + struct tipc_net *tn = net_generic(sock_net(sk), tipc_net_id); + + if (!rhashtable_remove_fast(&tn->sk_rht, &tsk->node, tsk_rht_params)) { + WARN_ON(atomic_read(&sk->sk_refcnt) == 1); + __sock_put(sk); + } +} + +static const struct rhashtable_params tsk_rht_params = { + .nelem_hint = 192, + .head_offset = offsetof(struct tipc_sock, node), + .key_offset = offsetof(struct tipc_sock, portid), + .key_len = sizeof(u32), /* portid */ + .max_size = 1048576, + .min_size = 256, + .automatic_shrinking = true, +}; + +int tipc_sk_rht_init(struct net *net) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + + return rhashtable_init(&tn->sk_rht, &tsk_rht_params); +} + +void tipc_sk_rht_destroy(struct net *net) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + + /* Wait for socket readers to complete */ + synchronize_net(); + + rhashtable_destroy(&tn->sk_rht); +} + +/** + * tipc_setsockopt - set socket option + * @sock: socket structure + * @lvl: option level + * @opt: option identifier + * @ov: pointer to new option value + * @ol: length of option value + * + * For stream sockets only, accepts and ignores all IPPROTO_TCP options + * (to ease compatibility). + * + * Returns 0 on success, errno otherwise + */ +static int tipc_setsockopt(struct socket *sock, int lvl, int opt, + char __user *ov, unsigned int ol) +{ + struct sock *sk = sock->sk; + struct tipc_sock *tsk = tipc_sk(sk); + u32 value; + int res; + + if ((lvl == IPPROTO_TCP) && (sock->type == SOCK_STREAM)) + return 0; + if (lvl != SOL_TIPC) + return -ENOPROTOOPT; + if (ol < sizeof(value)) + return -EINVAL; + res = get_user(value, (u32 __user *)ov); + if (res) + return res; + + lock_sock(sk); + + switch (opt) { + case TIPC_IMPORTANCE: + res = tsk_set_importance(tsk, value); + break; + case TIPC_SRC_DROPPABLE: + if (sock->type != SOCK_STREAM) + tsk_set_unreliable(tsk, value); + else + res = -ENOPROTOOPT; + break; + case TIPC_DEST_DROPPABLE: + tsk_set_unreturnable(tsk, value); + break; + case TIPC_CONN_TIMEOUT: + tipc_sk(sk)->conn_timeout = value; + /* no need to set "res", since already 0 at this point */ + break; + default: + res = -EINVAL; + } + + release_sock(sk); + + return res; +} + +/** + * tipc_getsockopt - get socket option + * @sock: socket structure + * @lvl: option level + * @opt: option identifier + * @ov: receptacle for option value + * @ol: receptacle for length of option value + * + * For stream sockets only, returns 0 length result for all IPPROTO_TCP options + * (to ease compatibility). + * + * Returns 0 on success, errno otherwise + */ +static int tipc_getsockopt(struct socket *sock, int lvl, int opt, + char __user *ov, int __user *ol) +{ + struct sock *sk = sock->sk; + struct tipc_sock *tsk = tipc_sk(sk); + int len; + u32 value; + int res; + + if ((lvl == IPPROTO_TCP) && (sock->type == SOCK_STREAM)) + return put_user(0, ol); + if (lvl != SOL_TIPC) + return -ENOPROTOOPT; + res = get_user(len, ol); + if (res) + return res; + + lock_sock(sk); + + switch (opt) { + case TIPC_IMPORTANCE: + value = tsk_importance(tsk); + break; + case TIPC_SRC_DROPPABLE: + value = tsk_unreliable(tsk); + break; + case TIPC_DEST_DROPPABLE: + value = tsk_unreturnable(tsk); + break; + case TIPC_CONN_TIMEOUT: + value = tsk->conn_timeout; + /* no need to set "res", since already 0 at this point */ + break; + case TIPC_NODE_RECVQ_DEPTH: + value = 0; /* was tipc_queue_size, now obsolete */ + break; + case TIPC_SOCK_RECVQ_DEPTH: + value = skb_queue_len(&sk->sk_receive_queue); + break; + default: + res = -EINVAL; + } + + release_sock(sk); + + if (res) + return res; /* "get" failed */ + + if (len < sizeof(value)) + return -EINVAL; + + if (copy_to_user(ov, &value, sizeof(value))) + return -EFAULT; + + return put_user(sizeof(value), ol); +} + +static int tipc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) +{ + struct sock *sk = sock->sk; + struct tipc_sioc_ln_req lnr; + void __user *argp = (void __user *)arg; + + switch (cmd) { + case SIOCGETLINKNAME: + if (copy_from_user(&lnr, argp, sizeof(lnr))) + return -EFAULT; + if (!tipc_node_get_linkname(sock_net(sk), + lnr.bearer_id & 0xffff, lnr.peer, + lnr.linkname, TIPC_MAX_LINK_NAME)) { + if (copy_to_user(argp, &lnr, sizeof(lnr))) + return -EFAULT; + return 0; + } + return -EADDRNOTAVAIL; + default: + return -ENOIOCTLCMD; + } +} + +/* Protocol switches for the various types of TIPC sockets */ + +static const struct proto_ops msg_ops = { + .owner = THIS_MODULE, + .family = AF_TIPC, + .release = tipc_release, + .bind = tipc_bind, + .connect = tipc_connect, + .socketpair = sock_no_socketpair, + .accept = sock_no_accept, + .getname = tipc_getname, + .poll = tipc_poll, + .ioctl = tipc_ioctl, + .listen = sock_no_listen, + .shutdown = tipc_shutdown, + .setsockopt = tipc_setsockopt, + .getsockopt = tipc_getsockopt, + .sendmsg = tipc_sendmsg, + .recvmsg = tipc_recvmsg, + .mmap = sock_no_mmap, + .sendpage = sock_no_sendpage +}; + +static const struct proto_ops packet_ops = { + .owner = THIS_MODULE, + .family = AF_TIPC, + .release = tipc_release, + .bind = tipc_bind, + .connect = tipc_connect, + .socketpair = sock_no_socketpair, + .accept = tipc_accept, + .getname = tipc_getname, + .poll = tipc_poll, + .ioctl = tipc_ioctl, + .listen = tipc_listen, + .shutdown = tipc_shutdown, + .setsockopt = tipc_setsockopt, + .getsockopt = tipc_getsockopt, + .sendmsg = tipc_send_packet, + .recvmsg = tipc_recvmsg, + .mmap = sock_no_mmap, + .sendpage = sock_no_sendpage +}; + +static const struct proto_ops stream_ops = { + .owner = THIS_MODULE, + .family = AF_TIPC, + .release = tipc_release, + .bind = tipc_bind, + .connect = tipc_connect, + .socketpair = sock_no_socketpair, + .accept = tipc_accept, + .getname = tipc_getname, + .poll = tipc_poll, + .ioctl = tipc_ioctl, + .listen = tipc_listen, + .shutdown = tipc_shutdown, + .setsockopt = tipc_setsockopt, + .getsockopt = tipc_getsockopt, + .sendmsg = tipc_send_stream, + .recvmsg = tipc_recv_stream, + .mmap = sock_no_mmap, + .sendpage = sock_no_sendpage +}; + +static const struct net_proto_family tipc_family_ops = { + .owner = THIS_MODULE, + .family = AF_TIPC, + .create = tipc_sk_create +}; + +static struct proto tipc_proto = { + .name = "TIPC", + .owner = THIS_MODULE, + .obj_size = sizeof(struct tipc_sock), + .sysctl_rmem = sysctl_tipc_rmem +}; + +/** + * tipc_socket_init - initialize TIPC socket interface + * + * Returns 0 on success, errno otherwise + */ +int tipc_socket_init(void) +{ + int res; + + res = proto_register(&tipc_proto, 1); + if (res) { + pr_err("Failed to register TIPC protocol type\n"); + goto out; + } + + res = sock_register(&tipc_family_ops); + if (res) { + pr_err("Failed to register TIPC socket type\n"); + proto_unregister(&tipc_proto); + goto out; + } + out: + return res; +} + +/** + * tipc_socket_stop - stop TIPC socket interface + */ +void tipc_socket_stop(void) +{ + sock_unregister(tipc_family_ops.family); + proto_unregister(&tipc_proto); +} + +/* Caller should hold socket lock for the passed tipc socket. */ +static int __tipc_nl_add_sk_con(struct sk_buff *skb, struct tipc_sock *tsk) +{ + u32 peer_node; + u32 peer_port; + struct nlattr *nest; + + peer_node = tsk_peer_node(tsk); + peer_port = tsk_peer_port(tsk); + + nest = nla_nest_start(skb, TIPC_NLA_SOCK_CON); + + if (nla_put_u32(skb, TIPC_NLA_CON_NODE, peer_node)) + goto msg_full; + if (nla_put_u32(skb, TIPC_NLA_CON_SOCK, peer_port)) + goto msg_full; + + if (tsk->conn_type != 0) { + if (nla_put_flag(skb, TIPC_NLA_CON_FLAG)) + goto msg_full; + if (nla_put_u32(skb, TIPC_NLA_CON_TYPE, tsk->conn_type)) + goto msg_full; + if (nla_put_u32(skb, TIPC_NLA_CON_INST, tsk->conn_instance)) + goto msg_full; + } + nla_nest_end(skb, nest); + + return 0; + +msg_full: + nla_nest_cancel(skb, nest); + + return -EMSGSIZE; +} + +/* Caller should hold socket lock for the passed tipc socket. */ +static int __tipc_nl_add_sk(struct sk_buff *skb, struct netlink_callback *cb, + struct tipc_sock *tsk) +{ + int err; + void *hdr; + struct nlattr *attrs; + struct net *net = sock_net(skb->sk); + struct tipc_net *tn = net_generic(net, tipc_net_id); + + hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, + &tipc_genl_family, NLM_F_MULTI, TIPC_NL_SOCK_GET); + if (!hdr) + goto msg_cancel; + + attrs = nla_nest_start(skb, TIPC_NLA_SOCK); + if (!attrs) + goto genlmsg_cancel; + if (nla_put_u32(skb, TIPC_NLA_SOCK_REF, tsk->portid)) + goto attr_msg_cancel; + if (nla_put_u32(skb, TIPC_NLA_SOCK_ADDR, tn->own_addr)) + goto attr_msg_cancel; + + if (tsk->connected) { + err = __tipc_nl_add_sk_con(skb, tsk); + if (err) + goto attr_msg_cancel; + } else if (!list_empty(&tsk->publications)) { + if (nla_put_flag(skb, TIPC_NLA_SOCK_HAS_PUBL)) + goto attr_msg_cancel; + } + nla_nest_end(skb, attrs); + genlmsg_end(skb, hdr); + + return 0; + +attr_msg_cancel: + nla_nest_cancel(skb, attrs); +genlmsg_cancel: + genlmsg_cancel(skb, hdr); +msg_cancel: + return -EMSGSIZE; +} + +int tipc_nl_sk_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + int err; + struct tipc_sock *tsk; + const struct bucket_table *tbl; + struct rhash_head *pos; + struct net *net = sock_net(skb->sk); + struct tipc_net *tn = net_generic(net, tipc_net_id); + u32 tbl_id = cb->args[0]; + u32 prev_portid = cb->args[1]; + + rcu_read_lock(); + tbl = rht_dereference_rcu((&tn->sk_rht)->tbl, &tn->sk_rht); + for (; tbl_id < tbl->size; tbl_id++) { + rht_for_each_entry_rcu(tsk, pos, tbl, tbl_id, node) { + spin_lock_bh(&tsk->sk.sk_lock.slock); + if (prev_portid && prev_portid != tsk->portid) { + spin_unlock_bh(&tsk->sk.sk_lock.slock); + continue; + } + + err = __tipc_nl_add_sk(skb, cb, tsk); + if (err) { + prev_portid = tsk->portid; + spin_unlock_bh(&tsk->sk.sk_lock.slock); + goto out; + } + prev_portid = 0; + spin_unlock_bh(&tsk->sk.sk_lock.slock); + } + } +out: + rcu_read_unlock(); + cb->args[0] = tbl_id; + cb->args[1] = prev_portid; + + return skb->len; +} + +/* Caller should hold socket lock for the passed tipc socket. */ +static int __tipc_nl_add_sk_publ(struct sk_buff *skb, + struct netlink_callback *cb, + struct publication *publ) +{ + void *hdr; + struct nlattr *attrs; + + hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, + &tipc_genl_family, NLM_F_MULTI, TIPC_NL_PUBL_GET); + if (!hdr) + goto msg_cancel; + + attrs = nla_nest_start(skb, TIPC_NLA_PUBL); + if (!attrs) + goto genlmsg_cancel; + + if (nla_put_u32(skb, TIPC_NLA_PUBL_KEY, publ->key)) + goto attr_msg_cancel; + if (nla_put_u32(skb, TIPC_NLA_PUBL_TYPE, publ->type)) + goto attr_msg_cancel; + if (nla_put_u32(skb, TIPC_NLA_PUBL_LOWER, publ->lower)) + goto attr_msg_cancel; + if (nla_put_u32(skb, TIPC_NLA_PUBL_UPPER, publ->upper)) + goto attr_msg_cancel; + + nla_nest_end(skb, attrs); + genlmsg_end(skb, hdr); + + return 0; + +attr_msg_cancel: + nla_nest_cancel(skb, attrs); +genlmsg_cancel: + genlmsg_cancel(skb, hdr); +msg_cancel: + return -EMSGSIZE; +} + +/* Caller should hold socket lock for the passed tipc socket. */ +static int __tipc_nl_list_sk_publ(struct sk_buff *skb, + struct netlink_callback *cb, + struct tipc_sock *tsk, u32 *last_publ) +{ + int err; + struct publication *p; + + if (*last_publ) { + list_for_each_entry(p, &tsk->publications, pport_list) { + if (p->key == *last_publ) + break; + } + if (p->key != *last_publ) { + /* We never set seq or call nl_dump_check_consistent() + * this means that setting prev_seq here will cause the + * consistence check to fail in the netlink callback + * handler. Resulting in the last NLMSG_DONE message + * having the NLM_F_DUMP_INTR flag set. + */ + cb->prev_seq = 1; + *last_publ = 0; + return -EPIPE; + } + } else { + p = list_first_entry(&tsk->publications, struct publication, + pport_list); + } + + list_for_each_entry_from(p, &tsk->publications, pport_list) { + err = __tipc_nl_add_sk_publ(skb, cb, p); + if (err) { + *last_publ = p->key; + return err; + } + } + *last_publ = 0; + + return 0; +} + +int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + int err; + u32 tsk_portid = cb->args[0]; + u32 last_publ = cb->args[1]; + u32 done = cb->args[2]; + struct net *net = sock_net(skb->sk); + struct tipc_sock *tsk; + + if (!tsk_portid) { + struct nlattr **attrs; + struct nlattr *sock[TIPC_NLA_SOCK_MAX + 1]; + + err = tipc_nlmsg_parse(cb->nlh, &attrs); + if (err) + return err; + + err = nla_parse_nested(sock, TIPC_NLA_SOCK_MAX, + attrs[TIPC_NLA_SOCK], + tipc_nl_sock_policy); + if (err) + return err; + + if (!sock[TIPC_NLA_SOCK_REF]) + return -EINVAL; + + tsk_portid = nla_get_u32(sock[TIPC_NLA_SOCK_REF]); + } + + if (done) + return 0; + + tsk = tipc_sk_lookup(net, tsk_portid); + if (!tsk) + return -EINVAL; + + lock_sock(&tsk->sk); + err = __tipc_nl_list_sk_publ(skb, cb, tsk, &last_publ); + if (!err) + done = 1; + release_sock(&tsk->sk); + sock_put(&tsk->sk); + + cb->args[0] = tsk_portid; + cb->args[1] = last_publ; + cb->args[2] = done; + + return skb->len; +} diff --git a/kernel/net/tipc/socket.h b/kernel/net/tipc/socket.h new file mode 100644 index 000000000..bf6551389 --- /dev/null +++ b/kernel/net/tipc/socket.h @@ -0,0 +1,56 @@ +/* net/tipc/socket.h: Include file for TIPC socket code + * + * Copyright (c) 2014-2015, Ericsson AB + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _TIPC_SOCK_H +#define _TIPC_SOCK_H + +#include <net/sock.h> +#include <net/genetlink.h> + +#define TIPC_CONNACK_INTV 256 +#define TIPC_FLOWCTRL_WIN (TIPC_CONNACK_INTV * 2) +#define TIPC_CONN_OVERLOAD_LIMIT ((TIPC_FLOWCTRL_WIN * 2 + 1) * \ + SKB_TRUESIZE(TIPC_MAX_USER_MSG_SIZE)) +int tipc_socket_init(void); +void tipc_socket_stop(void); +int tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq); +void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq, + struct sk_buff_head *inputq); +void tipc_sk_reinit(struct net *net); +int tipc_sk_rht_init(struct net *net); +void tipc_sk_rht_destroy(struct net *net); +int tipc_nl_sk_dump(struct sk_buff *skb, struct netlink_callback *cb); +int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb); + +#endif diff --git a/kernel/net/tipc/subscr.c b/kernel/net/tipc/subscr.c new file mode 100644 index 000000000..1c147c869 --- /dev/null +++ b/kernel/net/tipc/subscr.c @@ -0,0 +1,377 @@ +/* + * net/tipc/subscr.c: TIPC network topology service + * + * Copyright (c) 2000-2006, Ericsson AB + * Copyright (c) 2005-2007, 2010-2013, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "core.h" +#include "name_table.h" +#include "subscr.h" + +/** + * struct tipc_subscriber - TIPC network topology subscriber + * @conid: connection identifier to server connecting to subscriber + * @lock: control access to subscriber + * @subscription_list: list of subscription objects for this subscriber + */ +struct tipc_subscriber { + int conid; + spinlock_t lock; + struct list_head subscription_list; +}; + +/** + * htohl - convert value to endianness used by destination + * @in: value to convert + * @swap: non-zero if endianness must be reversed + * + * Returns converted value + */ +static u32 htohl(u32 in, int swap) +{ + return swap ? swab32(in) : in; +} + +static void subscr_send_event(struct tipc_subscription *sub, u32 found_lower, + u32 found_upper, u32 event, u32 port_ref, + u32 node) +{ + struct tipc_net *tn = net_generic(sub->net, tipc_net_id); + struct tipc_subscriber *subscriber = sub->subscriber; + struct kvec msg_sect; + + msg_sect.iov_base = (void *)&sub->evt; + msg_sect.iov_len = sizeof(struct tipc_event); + sub->evt.event = htohl(event, sub->swap); + sub->evt.found_lower = htohl(found_lower, sub->swap); + sub->evt.found_upper = htohl(found_upper, sub->swap); + sub->evt.port.ref = htohl(port_ref, sub->swap); + sub->evt.port.node = htohl(node, sub->swap); + tipc_conn_sendmsg(tn->topsrv, subscriber->conid, NULL, + msg_sect.iov_base, msg_sect.iov_len); +} + +/** + * tipc_subscr_overlap - test for subscription overlap with the given values + * + * Returns 1 if there is overlap, otherwise 0. + */ +int tipc_subscr_overlap(struct tipc_subscription *sub, u32 found_lower, + u32 found_upper) +{ + if (found_lower < sub->seq.lower) + found_lower = sub->seq.lower; + if (found_upper > sub->seq.upper) + found_upper = sub->seq.upper; + if (found_lower > found_upper) + return 0; + return 1; +} + +/** + * tipc_subscr_report_overlap - issue event if there is subscription overlap + * + * Protected by nameseq.lock in name_table.c + */ +void tipc_subscr_report_overlap(struct tipc_subscription *sub, u32 found_lower, + u32 found_upper, u32 event, u32 port_ref, + u32 node, int must) +{ + if (!tipc_subscr_overlap(sub, found_lower, found_upper)) + return; + if (!must && !(sub->filter & TIPC_SUB_PORTS)) + return; + + subscr_send_event(sub, found_lower, found_upper, event, port_ref, node); +} + +static void subscr_timeout(unsigned long data) +{ + struct tipc_subscription *sub = (struct tipc_subscription *)data; + struct tipc_subscriber *subscriber = sub->subscriber; + struct tipc_net *tn = net_generic(sub->net, tipc_net_id); + + /* The spin lock per subscriber is used to protect its members */ + spin_lock_bh(&subscriber->lock); + + /* Validate timeout (in case subscription is being cancelled) */ + if (sub->timeout == TIPC_WAIT_FOREVER) { + spin_unlock_bh(&subscriber->lock); + return; + } + + /* Unlink subscription from name table */ + tipc_nametbl_unsubscribe(sub); + + /* Unlink subscription from subscriber */ + list_del(&sub->subscription_list); + + spin_unlock_bh(&subscriber->lock); + + /* Notify subscriber of timeout */ + subscr_send_event(sub, sub->evt.s.seq.lower, sub->evt.s.seq.upper, + TIPC_SUBSCR_TIMEOUT, 0, 0); + + /* Now destroy subscription */ + kfree(sub); + atomic_dec(&tn->subscription_count); +} + +/** + * subscr_del - delete a subscription within a subscription list + * + * Called with subscriber lock held. + */ +static void subscr_del(struct tipc_subscription *sub) +{ + struct tipc_net *tn = net_generic(sub->net, tipc_net_id); + + tipc_nametbl_unsubscribe(sub); + list_del(&sub->subscription_list); + kfree(sub); + atomic_dec(&tn->subscription_count); +} + +static void subscr_release(struct tipc_subscriber *subscriber) +{ + struct tipc_subscription *sub; + struct tipc_subscription *sub_temp; + + spin_lock_bh(&subscriber->lock); + + /* Destroy any existing subscriptions for subscriber */ + list_for_each_entry_safe(sub, sub_temp, &subscriber->subscription_list, + subscription_list) { + if (sub->timeout != TIPC_WAIT_FOREVER) { + spin_unlock_bh(&subscriber->lock); + del_timer_sync(&sub->timer); + spin_lock_bh(&subscriber->lock); + } + subscr_del(sub); + } + spin_unlock_bh(&subscriber->lock); + + /* Now destroy subscriber */ + kfree(subscriber); +} + +/** + * subscr_cancel - handle subscription cancellation request + * + * Called with subscriber lock held. Routine must temporarily release lock + * to enable the subscription timeout routine to finish without deadlocking; + * the lock is then reclaimed to allow caller to release it upon return. + * + * Note that fields of 's' use subscriber's endianness! + */ +static void subscr_cancel(struct tipc_subscr *s, + struct tipc_subscriber *subscriber) +{ + struct tipc_subscription *sub; + struct tipc_subscription *sub_temp; + int found = 0; + + /* Find first matching subscription, exit if not found */ + list_for_each_entry_safe(sub, sub_temp, &subscriber->subscription_list, + subscription_list) { + if (!memcmp(s, &sub->evt.s, sizeof(struct tipc_subscr))) { + found = 1; + break; + } + } + if (!found) + return; + + /* Cancel subscription timer (if used), then delete subscription */ + if (sub->timeout != TIPC_WAIT_FOREVER) { + sub->timeout = TIPC_WAIT_FOREVER; + spin_unlock_bh(&subscriber->lock); + del_timer_sync(&sub->timer); + spin_lock_bh(&subscriber->lock); + } + subscr_del(sub); +} + +/** + * subscr_subscribe - create subscription for subscriber + * + * Called with subscriber lock held. + */ +static int subscr_subscribe(struct net *net, struct tipc_subscr *s, + struct tipc_subscriber *subscriber, + struct tipc_subscription **sub_p) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_subscription *sub; + int swap; + + /* Determine subscriber's endianness */ + swap = !(s->filter & (TIPC_SUB_PORTS | TIPC_SUB_SERVICE)); + + /* Detect & process a subscription cancellation request */ + if (s->filter & htohl(TIPC_SUB_CANCEL, swap)) { + s->filter &= ~htohl(TIPC_SUB_CANCEL, swap); + subscr_cancel(s, subscriber); + return 0; + } + + /* Refuse subscription if global limit exceeded */ + if (atomic_read(&tn->subscription_count) >= TIPC_MAX_SUBSCRIPTIONS) { + pr_warn("Subscription rejected, limit reached (%u)\n", + TIPC_MAX_SUBSCRIPTIONS); + return -EINVAL; + } + + /* Allocate subscription object */ + sub = kmalloc(sizeof(*sub), GFP_ATOMIC); + if (!sub) { + pr_warn("Subscription rejected, no memory\n"); + return -ENOMEM; + } + + /* Initialize subscription object */ + sub->net = net; + sub->seq.type = htohl(s->seq.type, swap); + sub->seq.lower = htohl(s->seq.lower, swap); + sub->seq.upper = htohl(s->seq.upper, swap); + sub->timeout = msecs_to_jiffies(htohl(s->timeout, swap)); + sub->filter = htohl(s->filter, swap); + if ((!(sub->filter & TIPC_SUB_PORTS) == + !(sub->filter & TIPC_SUB_SERVICE)) || + (sub->seq.lower > sub->seq.upper)) { + pr_warn("Subscription rejected, illegal request\n"); + kfree(sub); + return -EINVAL; + } + list_add(&sub->subscription_list, &subscriber->subscription_list); + sub->subscriber = subscriber; + sub->swap = swap; + memcpy(&sub->evt.s, s, sizeof(struct tipc_subscr)); + atomic_inc(&tn->subscription_count); + if (sub->timeout != TIPC_WAIT_FOREVER) { + setup_timer(&sub->timer, subscr_timeout, (unsigned long)sub); + mod_timer(&sub->timer, jiffies + sub->timeout); + } + *sub_p = sub; + return 0; +} + +/* Handle one termination request for the subscriber */ +static void subscr_conn_shutdown_event(int conid, void *usr_data) +{ + subscr_release((struct tipc_subscriber *)usr_data); +} + +/* Handle one request to create a new subscription for the subscriber */ +static void subscr_conn_msg_event(struct net *net, int conid, + struct sockaddr_tipc *addr, void *usr_data, + void *buf, size_t len) +{ + struct tipc_subscriber *subscriber = usr_data; + struct tipc_subscription *sub = NULL; + struct tipc_net *tn = net_generic(net, tipc_net_id); + + spin_lock_bh(&subscriber->lock); + subscr_subscribe(net, (struct tipc_subscr *)buf, subscriber, &sub); + if (sub) + tipc_nametbl_subscribe(sub); + else + tipc_conn_terminate(tn->topsrv, subscriber->conid); + spin_unlock_bh(&subscriber->lock); +} + +/* Handle one request to establish a new subscriber */ +static void *subscr_named_msg_event(int conid) +{ + struct tipc_subscriber *subscriber; + + /* Create subscriber object */ + subscriber = kzalloc(sizeof(struct tipc_subscriber), GFP_ATOMIC); + if (subscriber == NULL) { + pr_warn("Subscriber rejected, no memory\n"); + return NULL; + } + INIT_LIST_HEAD(&subscriber->subscription_list); + subscriber->conid = conid; + spin_lock_init(&subscriber->lock); + + return (void *)subscriber; +} + +int tipc_subscr_start(struct net *net) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + const char name[] = "topology_server"; + struct tipc_server *topsrv; + struct sockaddr_tipc *saddr; + + saddr = kzalloc(sizeof(*saddr), GFP_ATOMIC); + if (!saddr) + return -ENOMEM; + saddr->family = AF_TIPC; + saddr->addrtype = TIPC_ADDR_NAMESEQ; + saddr->addr.nameseq.type = TIPC_TOP_SRV; + saddr->addr.nameseq.lower = TIPC_TOP_SRV; + saddr->addr.nameseq.upper = TIPC_TOP_SRV; + saddr->scope = TIPC_NODE_SCOPE; + + topsrv = kzalloc(sizeof(*topsrv), GFP_ATOMIC); + if (!topsrv) { + kfree(saddr); + return -ENOMEM; + } + topsrv->net = net; + topsrv->saddr = saddr; + topsrv->imp = TIPC_CRITICAL_IMPORTANCE; + topsrv->type = SOCK_SEQPACKET; + topsrv->max_rcvbuf_size = sizeof(struct tipc_subscr); + topsrv->tipc_conn_recvmsg = subscr_conn_msg_event; + topsrv->tipc_conn_new = subscr_named_msg_event; + topsrv->tipc_conn_shutdown = subscr_conn_shutdown_event; + + strncpy(topsrv->name, name, strlen(name) + 1); + tn->topsrv = topsrv; + atomic_set(&tn->subscription_count, 0); + + return tipc_server_start(topsrv); +} + +void tipc_subscr_stop(struct net *net) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_server *topsrv = tn->topsrv; + + tipc_server_stop(topsrv); + kfree(topsrv->saddr); + kfree(topsrv); +} diff --git a/kernel/net/tipc/subscr.h b/kernel/net/tipc/subscr.h new file mode 100644 index 000000000..33488bd9f --- /dev/null +++ b/kernel/net/tipc/subscr.h @@ -0,0 +1,83 @@ +/* + * net/tipc/subscr.h: Include file for TIPC network topology service + * + * Copyright (c) 2003-2006, Ericsson AB + * Copyright (c) 2005-2007, 2012-2013, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _TIPC_SUBSCR_H +#define _TIPC_SUBSCR_H + +#include "server.h" + +#define TIPC_MAX_SUBSCRIPTIONS 65535 +#define TIPC_MAX_PUBLICATIONS 65535 + +struct tipc_subscription; +struct tipc_subscriber; + +/** + * struct tipc_subscription - TIPC network topology subscription object + * @subscriber: pointer to its subscriber + * @seq: name sequence associated with subscription + * @net: point to network namespace + * @timeout: duration of subscription (in ms) + * @filter: event filtering to be done for subscription + * @timer: timer governing subscription duration (optional) + * @nameseq_list: adjacent subscriptions in name sequence's subscription list + * @subscription_list: adjacent subscriptions in subscriber's subscription list + * @server_ref: object reference of server port associated with subscription + * @swap: indicates if subscriber uses opposite endianness in its messages + * @evt: template for events generated by subscription + */ +struct tipc_subscription { + struct tipc_subscriber *subscriber; + struct tipc_name_seq seq; + struct net *net; + unsigned long timeout; + u32 filter; + struct timer_list timer; + struct list_head nameseq_list; + struct list_head subscription_list; + int swap; + struct tipc_event evt; +}; + +int tipc_subscr_overlap(struct tipc_subscription *sub, u32 found_lower, + u32 found_upper); +void tipc_subscr_report_overlap(struct tipc_subscription *sub, u32 found_lower, + u32 found_upper, u32 event, u32 port_ref, + u32 node, int must); +int tipc_subscr_start(struct net *net); +void tipc_subscr_stop(struct net *net); + +#endif diff --git a/kernel/net/tipc/sysctl.c b/kernel/net/tipc/sysctl.c new file mode 100644 index 000000000..1a779b1e8 --- /dev/null +++ b/kernel/net/tipc/sysctl.c @@ -0,0 +1,71 @@ +/* + * net/tipc/sysctl.c: sysctl interface to TIPC subsystem + * + * Copyright (c) 2013, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "core.h" + +#include <linux/sysctl.h> + +static struct ctl_table_header *tipc_ctl_hdr; + +static struct ctl_table tipc_table[] = { + { + .procname = "tipc_rmem", + .data = &sysctl_tipc_rmem, + .maxlen = sizeof(sysctl_tipc_rmem), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "named_timeout", + .data = &sysctl_tipc_named_timeout, + .maxlen = sizeof(sysctl_tipc_named_timeout), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + {} +}; + +int tipc_register_sysctl(void) +{ + tipc_ctl_hdr = register_net_sysctl(&init_net, "net/tipc", tipc_table); + if (tipc_ctl_hdr == NULL) + return -ENOMEM; + return 0; +} + +void tipc_unregister_sysctl(void) +{ + unregister_net_sysctl_table(tipc_ctl_hdr); +} diff --git a/kernel/net/tipc/udp_media.c b/kernel/net/tipc/udp_media.c new file mode 100644 index 000000000..66deebc66 --- /dev/null +++ b/kernel/net/tipc/udp_media.c @@ -0,0 +1,448 @@ +/* net/tipc/udp_media.c: IP bearer support for TIPC + * + * Copyright (c) 2015, Ericsson AB + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/socket.h> +#include <linux/ip.h> +#include <linux/udp.h> +#include <linux/inet.h> +#include <linux/inetdevice.h> +#include <linux/igmp.h> +#include <linux/kernel.h> +#include <linux/workqueue.h> +#include <linux/list.h> +#include <net/sock.h> +#include <net/ip.h> +#include <net/udp_tunnel.h> +#include <net/addrconf.h> +#include <linux/tipc_netlink.h> +#include "core.h" +#include "bearer.h" + +/* IANA assigned UDP port */ +#define UDP_PORT_DEFAULT 6118 + +static const struct nla_policy tipc_nl_udp_policy[TIPC_NLA_UDP_MAX + 1] = { + [TIPC_NLA_UDP_UNSPEC] = {.type = NLA_UNSPEC}, + [TIPC_NLA_UDP_LOCAL] = {.type = NLA_BINARY, + .len = sizeof(struct sockaddr_storage)}, + [TIPC_NLA_UDP_REMOTE] = {.type = NLA_BINARY, + .len = sizeof(struct sockaddr_storage)}, +}; + +/** + * struct udp_media_addr - IP/UDP addressing information + * + * This is the bearer level originating address used in neighbor discovery + * messages, and all fields should be in network byte order + */ +struct udp_media_addr { + __be16 proto; + __be16 udp_port; + union { + struct in_addr ipv4; + struct in6_addr ipv6; + }; +}; + +/** + * struct udp_bearer - ip/udp bearer data structure + * @bearer: associated generic tipc bearer + * @ubsock: bearer associated socket + * @ifindex: local address scope + * @work: used to schedule deferred work on a bearer + */ +struct udp_bearer { + struct tipc_bearer __rcu *bearer; + struct socket *ubsock; + u32 ifindex; + struct work_struct work; +}; + +/* udp_media_addr_set - convert a ip/udp address to a TIPC media address */ +static void tipc_udp_media_addr_set(struct tipc_media_addr *addr, + struct udp_media_addr *ua) +{ + memset(addr, 0, sizeof(struct tipc_media_addr)); + addr->media_id = TIPC_MEDIA_TYPE_UDP; + memcpy(addr->value, ua, sizeof(struct udp_media_addr)); + if (ntohs(ua->proto) == ETH_P_IP) { + if (ipv4_is_multicast(ua->ipv4.s_addr)) + addr->broadcast = 1; + } else if (ntohs(ua->proto) == ETH_P_IPV6) { + if (ipv6_addr_type(&ua->ipv6) & IPV6_ADDR_MULTICAST) + addr->broadcast = 1; + } else { + pr_err("Invalid UDP media address\n"); + } +} + +/* tipc_udp_addr2str - convert ip/udp address to string */ +static int tipc_udp_addr2str(struct tipc_media_addr *a, char *buf, int size) +{ + struct udp_media_addr *ua = (struct udp_media_addr *)&a->value; + + if (ntohs(ua->proto) == ETH_P_IP) + snprintf(buf, size, "%pI4:%u", &ua->ipv4, ntohs(ua->udp_port)); + else if (ntohs(ua->proto) == ETH_P_IPV6) + snprintf(buf, size, "%pI6:%u", &ua->ipv6, ntohs(ua->udp_port)); + else + pr_err("Invalid UDP media address\n"); + return 0; +} + +/* tipc_udp_msg2addr - extract an ip/udp address from a TIPC ndisc message */ +static int tipc_udp_msg2addr(struct tipc_bearer *b, struct tipc_media_addr *a, + char *msg) +{ + struct udp_media_addr *ua; + + ua = (struct udp_media_addr *) (msg + TIPC_MEDIA_ADDR_OFFSET); + if (msg[TIPC_MEDIA_TYPE_OFFSET] != TIPC_MEDIA_TYPE_UDP) + return -EINVAL; + tipc_udp_media_addr_set(a, ua); + return 0; +} + +/* tipc_udp_addr2msg - write an ip/udp address to a TIPC ndisc message */ +static int tipc_udp_addr2msg(char *msg, struct tipc_media_addr *a) +{ + memset(msg, 0, TIPC_MEDIA_INFO_SIZE); + msg[TIPC_MEDIA_TYPE_OFFSET] = TIPC_MEDIA_TYPE_UDP; + memcpy(msg + TIPC_MEDIA_ADDR_OFFSET, a->value, + sizeof(struct udp_media_addr)); + return 0; +} + +/* tipc_send_msg - enqueue a send request */ +static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb, + struct tipc_bearer *b, + struct tipc_media_addr *dest) +{ + int ttl, err = 0; + struct udp_bearer *ub; + struct udp_media_addr *dst = (struct udp_media_addr *)&dest->value; + struct udp_media_addr *src = (struct udp_media_addr *)&b->addr.value; + struct sk_buff *clone; + struct rtable *rt; + + clone = skb_clone(skb, GFP_ATOMIC); + skb_set_inner_protocol(clone, htons(ETH_P_TIPC)); + ub = rcu_dereference_rtnl(b->media_ptr); + if (!ub) { + err = -ENODEV; + goto tx_error; + } + if (dst->proto == htons(ETH_P_IP)) { + struct flowi4 fl = { + .daddr = dst->ipv4.s_addr, + .saddr = src->ipv4.s_addr, + .flowi4_mark = clone->mark, + .flowi4_proto = IPPROTO_UDP + }; + rt = ip_route_output_key(net, &fl); + if (IS_ERR(rt)) { + err = PTR_ERR(rt); + goto tx_error; + } + ttl = ip4_dst_hoplimit(&rt->dst); + err = udp_tunnel_xmit_skb(rt, ub->ubsock->sk, clone, + src->ipv4.s_addr, + dst->ipv4.s_addr, 0, ttl, 0, + src->udp_port, dst->udp_port, + false, true); + if (err < 0) { + ip_rt_put(rt); + goto tx_error; + } +#if IS_ENABLED(CONFIG_IPV6) + } else { + struct dst_entry *ndst; + struct flowi6 fl6 = { + .flowi6_oif = ub->ifindex, + .daddr = dst->ipv6, + .saddr = src->ipv6, + .flowi6_proto = IPPROTO_UDP + }; + err = ipv6_stub->ipv6_dst_lookup(ub->ubsock->sk, &ndst, &fl6); + if (err) + goto tx_error; + ttl = ip6_dst_hoplimit(ndst); + err = udp_tunnel6_xmit_skb(ndst, ub->ubsock->sk, clone, + ndst->dev, &src->ipv6, + &dst->ipv6, 0, ttl, src->udp_port, + dst->udp_port, false); +#endif + } + return err; + +tx_error: + kfree_skb(clone); + return err; +} + +/* tipc_udp_recv - read data from bearer socket */ +static int tipc_udp_recv(struct sock *sk, struct sk_buff *skb) +{ + struct udp_bearer *ub; + struct tipc_bearer *b; + + ub = rcu_dereference_sk_user_data(sk); + if (!ub) { + pr_err_ratelimited("Failed to get UDP bearer reference"); + kfree_skb(skb); + return 0; + } + + skb_pull(skb, sizeof(struct udphdr)); + rcu_read_lock(); + b = rcu_dereference_rtnl(ub->bearer); + + if (b) { + tipc_rcv(sock_net(sk), skb, b); + rcu_read_unlock(); + return 0; + } + rcu_read_unlock(); + kfree_skb(skb); + return 0; +} + +static int enable_mcast(struct udp_bearer *ub, struct udp_media_addr *remote) +{ + int err = 0; + struct ip_mreqn mreqn; + struct sock *sk = ub->ubsock->sk; + + if (ntohs(remote->proto) == ETH_P_IP) { + if (!ipv4_is_multicast(remote->ipv4.s_addr)) + return 0; + mreqn.imr_multiaddr = remote->ipv4; + mreqn.imr_ifindex = ub->ifindex; + err = ip_mc_join_group(sk, &mreqn); +#if IS_ENABLED(CONFIG_IPV6) + } else { + if (!ipv6_addr_is_multicast(&remote->ipv6)) + return 0; + err = ipv6_stub->ipv6_sock_mc_join(sk, ub->ifindex, + &remote->ipv6); +#endif + } + return err; +} + +/** + * parse_options - build local/remote addresses from configuration + * @attrs: netlink config data + * @ub: UDP bearer instance + * @local: local bearer IP address/port + * @remote: peer or multicast IP/port + */ +static int parse_options(struct nlattr *attrs[], struct udp_bearer *ub, + struct udp_media_addr *local, + struct udp_media_addr *remote) +{ + struct nlattr *opts[TIPC_NLA_UDP_MAX + 1]; + struct sockaddr_storage *sa_local, *sa_remote; + + if (!attrs[TIPC_NLA_BEARER_UDP_OPTS]) + goto err; + if (nla_parse_nested(opts, TIPC_NLA_UDP_MAX, + attrs[TIPC_NLA_BEARER_UDP_OPTS], + tipc_nl_udp_policy)) + goto err; + if (opts[TIPC_NLA_UDP_LOCAL] && opts[TIPC_NLA_UDP_REMOTE]) { + sa_local = nla_data(opts[TIPC_NLA_UDP_LOCAL]); + sa_remote = nla_data(opts[TIPC_NLA_UDP_REMOTE]); + } else { +err: + pr_err("Invalid UDP bearer configuration"); + return -EINVAL; + } + if ((sa_local->ss_family & sa_remote->ss_family) == AF_INET) { + struct sockaddr_in *ip4; + + ip4 = (struct sockaddr_in *)sa_local; + local->proto = htons(ETH_P_IP); + local->udp_port = ip4->sin_port; + local->ipv4.s_addr = ip4->sin_addr.s_addr; + + ip4 = (struct sockaddr_in *)sa_remote; + remote->proto = htons(ETH_P_IP); + remote->udp_port = ip4->sin_port; + remote->ipv4.s_addr = ip4->sin_addr.s_addr; + return 0; + +#if IS_ENABLED(CONFIG_IPV6) + } else if ((sa_local->ss_family & sa_remote->ss_family) == AF_INET6) { + struct sockaddr_in6 *ip6; + + ip6 = (struct sockaddr_in6 *)sa_local; + local->proto = htons(ETH_P_IPV6); + local->udp_port = ip6->sin6_port; + local->ipv6 = ip6->sin6_addr; + ub->ifindex = ip6->sin6_scope_id; + + ip6 = (struct sockaddr_in6 *)sa_remote; + remote->proto = htons(ETH_P_IPV6); + remote->udp_port = ip6->sin6_port; + remote->ipv6 = ip6->sin6_addr; + return 0; +#endif + } + return -EADDRNOTAVAIL; +} + +/** + * tipc_udp_enable - callback to create a new udp bearer instance + * @net: network namespace + * @b: pointer to generic tipc_bearer + * @attrs: netlink bearer configuration + * + * validate the bearer parameters and initialize the udp bearer + * rtnl_lock should be held + */ +static int tipc_udp_enable(struct net *net, struct tipc_bearer *b, + struct nlattr *attrs[]) +{ + int err = -EINVAL; + struct udp_bearer *ub; + struct udp_media_addr *remote; + struct udp_media_addr local = {0}; + struct udp_port_cfg udp_conf = {0}; + struct udp_tunnel_sock_cfg tuncfg = {NULL}; + + ub = kzalloc(sizeof(*ub), GFP_ATOMIC); + if (!ub) + return -ENOMEM; + + remote = (struct udp_media_addr *)&b->bcast_addr.value; + memset(remote, 0, sizeof(struct udp_media_addr)); + err = parse_options(attrs, ub, &local, remote); + if (err) + goto err; + + b->bcast_addr.media_id = TIPC_MEDIA_TYPE_UDP; + b->bcast_addr.broadcast = 1; + rcu_assign_pointer(b->media_ptr, ub); + rcu_assign_pointer(ub->bearer, b); + tipc_udp_media_addr_set(&b->addr, &local); + if (local.proto == htons(ETH_P_IP)) { + struct net_device *dev; + + dev = __ip_dev_find(net, local.ipv4.s_addr, false); + if (!dev) { + err = -ENODEV; + goto err; + } + udp_conf.family = AF_INET; + udp_conf.local_ip.s_addr = htonl(INADDR_ANY); + udp_conf.use_udp_checksums = false; + ub->ifindex = dev->ifindex; + b->mtu = dev->mtu - sizeof(struct iphdr) + - sizeof(struct udphdr); +#if IS_ENABLED(CONFIG_IPV6) + } else if (local.proto == htons(ETH_P_IPV6)) { + udp_conf.family = AF_INET6; + udp_conf.use_udp6_tx_checksums = true; + udp_conf.use_udp6_rx_checksums = true; + udp_conf.local_ip6 = in6addr_any; + b->mtu = 1280; +#endif + } else { + err = -EAFNOSUPPORT; + goto err; + } + udp_conf.local_udp_port = local.udp_port; + err = udp_sock_create(net, &udp_conf, &ub->ubsock); + if (err) + goto err; + tuncfg.sk_user_data = ub; + tuncfg.encap_type = 1; + tuncfg.encap_rcv = tipc_udp_recv; + tuncfg.encap_destroy = NULL; + setup_udp_tunnel_sock(net, ub->ubsock, &tuncfg); + + if (enable_mcast(ub, remote)) + goto err; + return 0; +err: + kfree(ub); + return err; +} + +/* cleanup_bearer - break the socket/bearer association */ +static void cleanup_bearer(struct work_struct *work) +{ + struct udp_bearer *ub = container_of(work, struct udp_bearer, work); + + if (ub->ubsock) + udp_tunnel_sock_release(ub->ubsock); + synchronize_net(); + kfree(ub); +} + +/* tipc_udp_disable - detach bearer from socket */ +static void tipc_udp_disable(struct tipc_bearer *b) +{ + struct udp_bearer *ub; + + ub = rcu_dereference_rtnl(b->media_ptr); + if (!ub) { + pr_err("UDP bearer instance not found\n"); + return; + } + if (ub->ubsock) + sock_set_flag(ub->ubsock->sk, SOCK_DEAD); + RCU_INIT_POINTER(b->media_ptr, NULL); + RCU_INIT_POINTER(ub->bearer, NULL); + + /* sock_release need to be done outside of rtnl lock */ + INIT_WORK(&ub->work, cleanup_bearer); + schedule_work(&ub->work); +} + +struct tipc_media udp_media_info = { + .send_msg = tipc_udp_send_msg, + .enable_media = tipc_udp_enable, + .disable_media = tipc_udp_disable, + .addr2str = tipc_udp_addr2str, + .addr2msg = tipc_udp_addr2msg, + .msg2addr = tipc_udp_msg2addr, + .priority = TIPC_DEF_LINK_PRI, + .tolerance = TIPC_DEF_LINK_TOL, + .window = TIPC_DEF_LINK_WIN, + .type_id = TIPC_MEDIA_TYPE_UDP, + .hwaddr_len = 0, + .name = "udp" +}; |