diff options
Diffstat (limited to 'kernel/net/rds/tcp.c')
-rw-r--r-- | kernel/net/rds/tcp.c | 181 |
1 files changed, 151 insertions, 30 deletions
diff --git a/kernel/net/rds/tcp.c b/kernel/net/rds/tcp.c index edac9ef2b..9d6ddbacd 100644 --- a/kernel/net/rds/tcp.c +++ b/kernel/net/rds/tcp.c @@ -35,6 +35,9 @@ #include <linux/in.h> #include <linux/module.h> #include <net/tcp.h> +#include <net/net_namespace.h> +#include <net/netns/generic.h> +#include <net/tcp.h> #include "rds.h" #include "tcp.h" @@ -64,21 +67,13 @@ void rds_tcp_nonagle(struct socket *sock) set_fs(oldfs); } +/* All module specific customizations to the RDS-TCP socket should be done in + * rds_tcp_tune() and applied after socket creation. In general these + * customizations should be tunable via module_param() + */ void rds_tcp_tune(struct socket *sock) { - struct sock *sk = sock->sk; - rds_tcp_nonagle(sock); - - /* - * We're trying to saturate gigabit with the default, - * see svc_sock_setbufsize(). - */ - lock_sock(sk); - sk->sk_sndbuf = RDS_TCP_DEFAULT_BUFSIZE; - sk->sk_rcvbuf = RDS_TCP_DEFAULT_BUFSIZE; - sk->sk_userlocks |= SOCK_SNDBUF_LOCK|SOCK_RCVBUF_LOCK; - release_sock(sk); } u32 rds_tcp_snd_nxt(struct rds_tcp_connection *tc) @@ -189,9 +184,9 @@ out: spin_unlock_irqrestore(&rds_tcp_tc_list_lock, flags); } -static int rds_tcp_laddr_check(__be32 addr) +static int rds_tcp_laddr_check(struct net *net, __be32 addr) { - if (inet_addr_type(&init_net, addr) == RTN_LOCAL) + if (inet_addr_type(net, addr) == RTN_LOCAL) return 0; return -EADDRNOTAVAIL; } @@ -250,16 +245,7 @@ static void rds_tcp_destroy_conns(void) } } -static void rds_tcp_exit(void) -{ - rds_info_deregister_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info); - rds_tcp_listen_stop(); - rds_tcp_destroy_conns(); - rds_trans_unregister(&rds_tcp_transport); - rds_tcp_recv_exit(); - kmem_cache_destroy(rds_tcp_conn_slab); -} -module_exit(rds_tcp_exit); +static void rds_tcp_exit(void); struct rds_transport rds_tcp_transport = { .laddr_check = rds_tcp_laddr_check, @@ -281,6 +267,136 @@ struct rds_transport rds_tcp_transport = { .t_prefer_loopback = 1, }; +static int rds_tcp_netid; + +/* per-network namespace private data for this module */ +struct rds_tcp_net { + struct socket *rds_tcp_listen_sock; + struct work_struct rds_tcp_accept_w; +}; + +static void rds_tcp_accept_worker(struct work_struct *work) +{ + struct rds_tcp_net *rtn = container_of(work, + struct rds_tcp_net, + rds_tcp_accept_w); + + while (rds_tcp_accept_one(rtn->rds_tcp_listen_sock) == 0) + cond_resched(); +} + +void rds_tcp_accept_work(struct sock *sk) +{ + struct net *net = sock_net(sk); + struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid); + + queue_work(rds_wq, &rtn->rds_tcp_accept_w); +} + +static __net_init int rds_tcp_init_net(struct net *net) +{ + struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid); + + rtn->rds_tcp_listen_sock = rds_tcp_listen_init(net); + if (!rtn->rds_tcp_listen_sock) { + pr_warn("could not set up listen sock\n"); + return -EAFNOSUPPORT; + } + INIT_WORK(&rtn->rds_tcp_accept_w, rds_tcp_accept_worker); + return 0; +} + +static void __net_exit rds_tcp_exit_net(struct net *net) +{ + struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid); + + /* If rds_tcp_exit_net() is called as a result of netns deletion, + * the rds_tcp_kill_sock() device notifier would already have cleaned + * up the listen socket, thus there is no work to do in this function. + * + * If rds_tcp_exit_net() is called as a result of module unload, + * i.e., due to rds_tcp_exit() -> unregister_pernet_subsys(), then + * we do need to clean up the listen socket here. + */ + if (rtn->rds_tcp_listen_sock) { + rds_tcp_listen_stop(rtn->rds_tcp_listen_sock); + rtn->rds_tcp_listen_sock = NULL; + flush_work(&rtn->rds_tcp_accept_w); + } +} + +static struct pernet_operations rds_tcp_net_ops = { + .init = rds_tcp_init_net, + .exit = rds_tcp_exit_net, + .id = &rds_tcp_netid, + .size = sizeof(struct rds_tcp_net), +}; + +static void rds_tcp_kill_sock(struct net *net) +{ + struct rds_tcp_connection *tc, *_tc; + struct sock *sk; + LIST_HEAD(tmp_list); + struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid); + + rds_tcp_listen_stop(rtn->rds_tcp_listen_sock); + rtn->rds_tcp_listen_sock = NULL; + flush_work(&rtn->rds_tcp_accept_w); + spin_lock_irq(&rds_tcp_conn_lock); + list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) { + struct net *c_net = read_pnet(&tc->conn->c_net); + + if (net != c_net || !tc->t_sock) + continue; + list_move_tail(&tc->t_tcp_node, &tmp_list); + } + spin_unlock_irq(&rds_tcp_conn_lock); + list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node) { + sk = tc->t_sock->sk; + sk->sk_prot->disconnect(sk, 0); + tcp_done(sk); + if (tc->conn->c_passive) + rds_conn_destroy(tc->conn->c_passive); + rds_conn_destroy(tc->conn); + } +} + +static int rds_tcp_dev_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + + /* rds-tcp registers as a pernet subys, so the ->exit will only + * get invoked after network acitivity has quiesced. We need to + * clean up all sockets to quiesce network activity, and use + * the unregistration of the per-net loopback device as a trigger + * to start that cleanup. + */ + if (event == NETDEV_UNREGISTER_FINAL && + dev->ifindex == LOOPBACK_IFINDEX) + rds_tcp_kill_sock(dev_net(dev)); + + return NOTIFY_DONE; +} + +static struct notifier_block rds_tcp_dev_notifier = { + .notifier_call = rds_tcp_dev_event, + .priority = -10, /* must be called after other network notifiers */ +}; + +static void rds_tcp_exit(void) +{ + rds_info_deregister_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info); + unregister_pernet_subsys(&rds_tcp_net_ops); + if (unregister_netdevice_notifier(&rds_tcp_dev_notifier)) + pr_warn("could not unregister rds_tcp_dev_notifier\n"); + rds_tcp_destroy_conns(); + rds_trans_unregister(&rds_tcp_transport); + rds_tcp_recv_exit(); + kmem_cache_destroy(rds_tcp_conn_slab); +} +module_exit(rds_tcp_exit); + static int rds_tcp_init(void) { int ret; @@ -293,6 +409,16 @@ static int rds_tcp_init(void) goto out; } + ret = register_netdevice_notifier(&rds_tcp_dev_notifier); + if (ret) { + pr_warn("could not register rds_tcp_dev_notifier\n"); + goto out; + } + + ret = register_pernet_subsys(&rds_tcp_net_ops); + if (ret) + goto out_slab; + ret = rds_tcp_recv_init(); if (ret) goto out_slab; @@ -301,19 +427,14 @@ static int rds_tcp_init(void) if (ret) goto out_recv; - ret = rds_tcp_listen_init(); - if (ret) - goto out_register; - rds_info_register_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info); goto out; -out_register: - rds_trans_unregister(&rds_tcp_transport); out_recv: rds_tcp_recv_exit(); out_slab: + unregister_pernet_subsys(&rds_tcp_net_ops); kmem_cache_destroy(rds_tcp_conn_slab); out: return ret; |