summaryrefslogtreecommitdiffstats
path: root/kernel/net/netfilter/ipvs
diff options
context:
space:
mode:
authorJosé Pekkarinen <jose.pekkarinen@nokia.com>2016-04-11 10:41:07 +0300
committerJosé Pekkarinen <jose.pekkarinen@nokia.com>2016-04-13 08:17:18 +0300
commite09b41010ba33a20a87472ee821fa407a5b8da36 (patch)
treed10dc367189862e7ca5c592f033dc3726e1df4e3 /kernel/net/netfilter/ipvs
parentf93b97fd65072de626c074dbe099a1fff05ce060 (diff)
These changes are the raw update to linux-4.4.6-rt14. Kernel sources
are taken from kernel.org, and rt patch from the rt wiki download page. During the rebasing, the following patch collided: Force tick interrupt and get rid of softirq magic(I70131fb85). Collisions have been removed because its logic was found on the source already. Change-Id: I7f57a4081d9deaa0d9ccfc41a6c8daccdee3b769 Signed-off-by: José Pekkarinen <jose.pekkarinen@nokia.com>
Diffstat (limited to 'kernel/net/netfilter/ipvs')
-rw-r--r--kernel/net/netfilter/ipvs/Kconfig11
-rw-r--r--kernel/net/netfilter/ipvs/Makefile1
-rw-r--r--kernel/net/netfilter/ipvs/ip_vs_app.c36
-rw-r--r--kernel/net/netfilter/ipvs/ip_vs_conn.c91
-rw-r--r--kernel/net/netfilter/ipvs/ip_vs_core.c566
-rw-r--r--kernel/net/netfilter/ipvs/ip_vs_ctl.c502
-rw-r--r--kernel/net/netfilter/ipvs/ip_vs_est.c20
-rw-r--r--kernel/net/netfilter/ipvs/ip_vs_ftp.c27
-rw-r--r--kernel/net/netfilter/ipvs/ip_vs_lblc.c3
-rw-r--r--kernel/net/netfilter/ipvs/ip_vs_lblcr.c3
-rw-r--r--kernel/net/netfilter/ipvs/ip_vs_nfct.c5
-rw-r--r--kernel/net/netfilter/ipvs/ip_vs_ovf.c86
-rw-r--r--kernel/net/netfilter/ipvs/ip_vs_pe_sip.c2
-rw-r--r--kernel/net/netfilter/ipvs/ip_vs_proto.c33
-rw-r--r--kernel/net/netfilter/ipvs/ip_vs_proto_ah_esp.c32
-rw-r--r--kernel/net/netfilter/ipvs/ip_vs_proto_sctp.c58
-rw-r--r--kernel/net/netfilter/ipvs/ip_vs_proto_tcp.c61
-rw-r--r--kernel/net/netfilter/ipvs/ip_vs_proto_udp.c49
-rw-r--r--kernel/net/netfilter/ipvs/ip_vs_sched.c14
-rw-r--r--kernel/net/netfilter/ipvs/ip_vs_sh.c45
-rw-r--r--kernel/net/netfilter/ipvs/ip_vs_sync.c374
-rw-r--r--kernel/net/netfilter/ipvs/ip_vs_xmit.c145
22 files changed, 1264 insertions, 900 deletions
diff --git a/kernel/net/netfilter/ipvs/Kconfig b/kernel/net/netfilter/ipvs/Kconfig
index 3b6929dec..b32fb0dbe 100644
--- a/kernel/net/netfilter/ipvs/Kconfig
+++ b/kernel/net/netfilter/ipvs/Kconfig
@@ -162,6 +162,17 @@ config IP_VS_FO
If you want to compile it in kernel, say Y. To compile it as a
module, choose M here. If unsure, say N.
+config IP_VS_OVF
+ tristate "weighted overflow scheduling"
+ ---help---
+ The weighted overflow scheduling algorithm directs network
+ connections to the server with the highest weight that is
+ currently available and overflows to the next when active
+ connections exceed the node's weight.
+
+ If you want to compile it in kernel, say Y. To compile it as a
+ module, choose M here. If unsure, say N.
+
config IP_VS_LBLC
tristate "locality-based least-connection scheduling"
---help---
diff --git a/kernel/net/netfilter/ipvs/Makefile b/kernel/net/netfilter/ipvs/Makefile
index 38b2723b2..67f3f4389 100644
--- a/kernel/net/netfilter/ipvs/Makefile
+++ b/kernel/net/netfilter/ipvs/Makefile
@@ -27,6 +27,7 @@ obj-$(CONFIG_IP_VS_WRR) += ip_vs_wrr.o
obj-$(CONFIG_IP_VS_LC) += ip_vs_lc.o
obj-$(CONFIG_IP_VS_WLC) += ip_vs_wlc.o
obj-$(CONFIG_IP_VS_FO) += ip_vs_fo.o
+obj-$(CONFIG_IP_VS_OVF) += ip_vs_ovf.o
obj-$(CONFIG_IP_VS_LBLC) += ip_vs_lblc.o
obj-$(CONFIG_IP_VS_LBLCR) += ip_vs_lblcr.o
obj-$(CONFIG_IP_VS_DH) += ip_vs_dh.o
diff --git a/kernel/net/netfilter/ipvs/ip_vs_app.c b/kernel/net/netfilter/ipvs/ip_vs_app.c
index dfd7b65b3..0328f7250 100644
--- a/kernel/net/netfilter/ipvs/ip_vs_app.c
+++ b/kernel/net/netfilter/ipvs/ip_vs_app.c
@@ -75,7 +75,7 @@ static void ip_vs_app_inc_rcu_free(struct rcu_head *head)
* Allocate/initialize app incarnation and register it in proto apps.
*/
static int
-ip_vs_app_inc_new(struct net *net, struct ip_vs_app *app, __u16 proto,
+ip_vs_app_inc_new(struct netns_ipvs *ipvs, struct ip_vs_app *app, __u16 proto,
__u16 port)
{
struct ip_vs_protocol *pp;
@@ -107,7 +107,7 @@ ip_vs_app_inc_new(struct net *net, struct ip_vs_app *app, __u16 proto,
}
}
- ret = pp->register_app(net, inc);
+ ret = pp->register_app(ipvs, inc);
if (ret)
goto out;
@@ -127,7 +127,7 @@ ip_vs_app_inc_new(struct net *net, struct ip_vs_app *app, __u16 proto,
* Release app incarnation
*/
static void
-ip_vs_app_inc_release(struct net *net, struct ip_vs_app *inc)
+ip_vs_app_inc_release(struct netns_ipvs *ipvs, struct ip_vs_app *inc)
{
struct ip_vs_protocol *pp;
@@ -135,7 +135,7 @@ ip_vs_app_inc_release(struct net *net, struct ip_vs_app *inc)
return;
if (pp->unregister_app)
- pp->unregister_app(net, inc);
+ pp->unregister_app(ipvs, inc);
IP_VS_DBG(9, "%s App %s:%u unregistered\n",
pp->name, inc->name, ntohs(inc->port));
@@ -175,14 +175,14 @@ void ip_vs_app_inc_put(struct ip_vs_app *inc)
* Register an application incarnation in protocol applications
*/
int
-register_ip_vs_app_inc(struct net *net, struct ip_vs_app *app, __u16 proto,
+register_ip_vs_app_inc(struct netns_ipvs *ipvs, struct ip_vs_app *app, __u16 proto,
__u16 port)
{
int result;
mutex_lock(&__ip_vs_app_mutex);
- result = ip_vs_app_inc_new(net, app, proto, port);
+ result = ip_vs_app_inc_new(ipvs, app, proto, port);
mutex_unlock(&__ip_vs_app_mutex);
@@ -191,15 +191,11 @@ register_ip_vs_app_inc(struct net *net, struct ip_vs_app *app, __u16 proto,
/* Register application for netns */
-struct ip_vs_app *register_ip_vs_app(struct net *net, struct ip_vs_app *app)
+struct ip_vs_app *register_ip_vs_app(struct netns_ipvs *ipvs, struct ip_vs_app *app)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
struct ip_vs_app *a;
int err = 0;
- if (!ipvs)
- return ERR_PTR(-ENOENT);
-
mutex_lock(&__ip_vs_app_mutex);
list_for_each_entry(a, &ipvs->app_list, a_list) {
@@ -230,21 +226,17 @@ out_unlock:
* We are sure there are no app incarnations attached to services
* Caller should use synchronize_rcu() or rcu_barrier()
*/
-void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app)
+void unregister_ip_vs_app(struct netns_ipvs *ipvs, struct ip_vs_app *app)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
struct ip_vs_app *a, *anxt, *inc, *nxt;
- if (!ipvs)
- return;
-
mutex_lock(&__ip_vs_app_mutex);
list_for_each_entry_safe(a, anxt, &ipvs->app_list, a_list) {
if (app && strcmp(app->name, a->name))
continue;
list_for_each_entry_safe(inc, nxt, &a->incs_list, a_list) {
- ip_vs_app_inc_release(net, inc);
+ ip_vs_app_inc_release(ipvs, inc);
}
list_del(&a->a_list);
@@ -611,17 +603,19 @@ static const struct file_operations ip_vs_app_fops = {
};
#endif
-int __net_init ip_vs_app_net_init(struct net *net)
+int __net_init ip_vs_app_net_init(struct netns_ipvs *ipvs)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
+ struct net *net = ipvs->net;
INIT_LIST_HEAD(&ipvs->app_list);
proc_create("ip_vs_app", 0, net->proc_net, &ip_vs_app_fops);
return 0;
}
-void __net_exit ip_vs_app_net_cleanup(struct net *net)
+void __net_exit ip_vs_app_net_cleanup(struct netns_ipvs *ipvs)
{
- unregister_ip_vs_app(net, NULL /* all */);
+ struct net *net = ipvs->net;
+
+ unregister_ip_vs_app(ipvs, NULL /* all */);
remove_proc_entry("ip_vs_app", net->proc_net);
}
diff --git a/kernel/net/netfilter/ipvs/ip_vs_conn.c b/kernel/net/netfilter/ipvs/ip_vs_conn.c
index b0f7b626b..85ca189bd 100644
--- a/kernel/net/netfilter/ipvs/ip_vs_conn.c
+++ b/kernel/net/netfilter/ipvs/ip_vs_conn.c
@@ -108,7 +108,7 @@ static inline void ct_write_unlock_bh(unsigned int key)
/*
* Returns hash value for IPVS connection entry
*/
-static unsigned int ip_vs_conn_hashkey(struct net *net, int af, unsigned int proto,
+static unsigned int ip_vs_conn_hashkey(struct netns_ipvs *ipvs, int af, unsigned int proto,
const union nf_inet_addr *addr,
__be16 port)
{
@@ -116,11 +116,11 @@ static unsigned int ip_vs_conn_hashkey(struct net *net, int af, unsigned int pro
if (af == AF_INET6)
return (jhash_3words(jhash(addr, 16, ip_vs_conn_rnd),
(__force u32)port, proto, ip_vs_conn_rnd) ^
- ((size_t)net>>8)) & ip_vs_conn_tab_mask;
+ ((size_t)ipvs>>8)) & ip_vs_conn_tab_mask;
#endif
return (jhash_3words((__force u32)addr->ip, (__force u32)port, proto,
ip_vs_conn_rnd) ^
- ((size_t)net>>8)) & ip_vs_conn_tab_mask;
+ ((size_t)ipvs>>8)) & ip_vs_conn_tab_mask;
}
static unsigned int ip_vs_conn_hashkey_param(const struct ip_vs_conn_param *p,
@@ -141,14 +141,14 @@ static unsigned int ip_vs_conn_hashkey_param(const struct ip_vs_conn_param *p,
port = p->vport;
}
- return ip_vs_conn_hashkey(p->net, p->af, p->protocol, addr, port);
+ return ip_vs_conn_hashkey(p->ipvs, p->af, p->protocol, addr, port);
}
static unsigned int ip_vs_conn_hashkey_conn(const struct ip_vs_conn *cp)
{
struct ip_vs_conn_param p;
- ip_vs_conn_fill_param(ip_vs_conn_net(cp), cp->af, cp->protocol,
+ ip_vs_conn_fill_param(cp->ipvs, cp->af, cp->protocol,
&cp->caddr, cp->cport, NULL, 0, &p);
if (cp->pe) {
@@ -279,7 +279,7 @@ __ip_vs_conn_in_get(const struct ip_vs_conn_param *p)
ip_vs_addr_equal(p->af, p->vaddr, &cp->vaddr) &&
((!p->cport) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) &&
p->protocol == cp->protocol &&
- ip_vs_conn_net_eq(cp, p->net)) {
+ cp->ipvs == p->ipvs) {
if (!__ip_vs_conn_get(cp))
continue;
/* HIT */
@@ -314,33 +314,34 @@ struct ip_vs_conn *ip_vs_conn_in_get(const struct ip_vs_conn_param *p)
}
static int
-ip_vs_conn_fill_param_proto(int af, const struct sk_buff *skb,
+ip_vs_conn_fill_param_proto(struct netns_ipvs *ipvs,
+ int af, const struct sk_buff *skb,
const struct ip_vs_iphdr *iph,
- int inverse, struct ip_vs_conn_param *p)
+ struct ip_vs_conn_param *p)
{
__be16 _ports[2], *pptr;
- struct net *net = skb_net(skb);
pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph);
if (pptr == NULL)
return 1;
- if (likely(!inverse))
- ip_vs_conn_fill_param(net, af, iph->protocol, &iph->saddr,
+ if (likely(!ip_vs_iph_inverse(iph)))
+ ip_vs_conn_fill_param(ipvs, af, iph->protocol, &iph->saddr,
pptr[0], &iph->daddr, pptr[1], p);
else
- ip_vs_conn_fill_param(net, af, iph->protocol, &iph->daddr,
+ ip_vs_conn_fill_param(ipvs, af, iph->protocol, &iph->daddr,
pptr[1], &iph->saddr, pptr[0], p);
return 0;
}
struct ip_vs_conn *
-ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb,
- const struct ip_vs_iphdr *iph, int inverse)
+ip_vs_conn_in_get_proto(struct netns_ipvs *ipvs, int af,
+ const struct sk_buff *skb,
+ const struct ip_vs_iphdr *iph)
{
struct ip_vs_conn_param p;
- if (ip_vs_conn_fill_param_proto(af, skb, iph, inverse, &p))
+ if (ip_vs_conn_fill_param_proto(ipvs, af, skb, iph, &p))
return NULL;
return ip_vs_conn_in_get(&p);
@@ -359,7 +360,7 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p)
hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) {
if (unlikely(p->pe_data && p->pe->ct_match)) {
- if (!ip_vs_conn_net_eq(cp, p->net))
+ if (cp->ipvs != p->ipvs)
continue;
if (p->pe == cp->pe && p->pe->ct_match(p, cp)) {
if (__ip_vs_conn_get(cp))
@@ -377,7 +378,7 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p)
p->vport == cp->vport && p->cport == cp->cport &&
cp->flags & IP_VS_CONN_F_TEMPLATE &&
p->protocol == cp->protocol &&
- ip_vs_conn_net_eq(cp, p->net)) {
+ cp->ipvs == p->ipvs) {
if (__ip_vs_conn_get(cp))
goto out;
}
@@ -418,7 +419,7 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p)
ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) &&
ip_vs_addr_equal(p->af, p->caddr, &cp->daddr) &&
p->protocol == cp->protocol &&
- ip_vs_conn_net_eq(cp, p->net)) {
+ cp->ipvs == p->ipvs) {
if (!__ip_vs_conn_get(cp))
continue;
/* HIT */
@@ -439,12 +440,13 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p)
}
struct ip_vs_conn *
-ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb,
- const struct ip_vs_iphdr *iph, int inverse)
+ip_vs_conn_out_get_proto(struct netns_ipvs *ipvs, int af,
+ const struct sk_buff *skb,
+ const struct ip_vs_iphdr *iph)
{
struct ip_vs_conn_param p;
- if (ip_vs_conn_fill_param_proto(af, skb, iph, inverse, &p))
+ if (ip_vs_conn_fill_param_proto(ipvs, af, skb, iph, &p))
return NULL;
return ip_vs_conn_out_get(&p);
@@ -638,7 +640,7 @@ void ip_vs_try_bind_dest(struct ip_vs_conn *cp)
* so we can make the assumption that the svc_af is the same as the
* dest_af
*/
- dest = ip_vs_find_dest(ip_vs_conn_net(cp), cp->af, cp->af, &cp->daddr,
+ dest = ip_vs_find_dest(cp->ipvs, cp->af, cp->af, &cp->daddr,
cp->dport, &cp->vaddr, cp->vport,
cp->protocol, cp->fwmark, cp->flags);
if (dest) {
@@ -668,7 +670,7 @@ void ip_vs_try_bind_dest(struct ip_vs_conn *cp)
#endif
ip_vs_bind_xmit(cp);
- pd = ip_vs_proto_data_get(ip_vs_conn_net(cp), cp->protocol);
+ pd = ip_vs_proto_data_get(cp->ipvs, cp->protocol);
if (pd && atomic_read(&pd->appcnt))
ip_vs_bind_app(cp, pd->pp);
}
@@ -746,7 +748,7 @@ static int expire_quiescent_template(struct netns_ipvs *ipvs,
int ip_vs_check_template(struct ip_vs_conn *ct)
{
struct ip_vs_dest *dest = ct->dest;
- struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(ct));
+ struct netns_ipvs *ipvs = ct->ipvs;
/*
* Checking the dest server status.
@@ -800,8 +802,7 @@ static void ip_vs_conn_rcu_free(struct rcu_head *head)
static void ip_vs_conn_expire(unsigned long data)
{
struct ip_vs_conn *cp = (struct ip_vs_conn *)data;
- struct net *net = ip_vs_conn_net(cp);
- struct netns_ipvs *ipvs = net_ipvs(net);
+ struct netns_ipvs *ipvs = cp->ipvs;
/*
* do I control anybody?
@@ -847,7 +848,7 @@ static void ip_vs_conn_expire(unsigned long data)
cp->timeout = 60*HZ;
if (ipvs->sync_state & IP_VS_STATE_MASTER)
- ip_vs_sync_conn(net, cp, sysctl_sync_threshold(ipvs));
+ ip_vs_sync_conn(ipvs, cp, sysctl_sync_threshold(ipvs));
ip_vs_conn_put(cp);
}
@@ -875,8 +876,8 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af,
struct ip_vs_dest *dest, __u32 fwmark)
{
struct ip_vs_conn *cp;
- struct netns_ipvs *ipvs = net_ipvs(p->net);
- struct ip_vs_proto_data *pd = ip_vs_proto_data_get(p->net,
+ struct netns_ipvs *ipvs = p->ipvs;
+ struct ip_vs_proto_data *pd = ip_vs_proto_data_get(p->ipvs,
p->protocol);
cp = kmem_cache_alloc(ip_vs_conn_cachep, GFP_ATOMIC);
@@ -887,7 +888,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af,
INIT_HLIST_NODE(&cp->c_list);
setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp);
- ip_vs_conn_net_set(cp, p->net);
+ cp->ipvs = ipvs;
cp->af = p->af;
cp->daf = dest_af;
cp->protocol = p->protocol;
@@ -1061,7 +1062,7 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v)
size_t len = 0;
char dbuf[IP_VS_ADDRSTRLEN];
- if (!ip_vs_conn_net_eq(cp, net))
+ if (!net_eq(cp->ipvs->net, net))
return 0;
if (cp->pe_data) {
pe_data[0] = ' ';
@@ -1146,7 +1147,7 @@ static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v)
const struct ip_vs_conn *cp = v;
struct net *net = seq_file_net(seq);
- if (!ip_vs_conn_net_eq(cp, net))
+ if (!net_eq(cp->ipvs->net, net))
return 0;
#ifdef CONFIG_IP_VS_IPV6
@@ -1240,7 +1241,7 @@ static inline int todrop_entry(struct ip_vs_conn *cp)
}
/* Called from keventd and must protect itself from softirqs */
-void ip_vs_random_dropentry(struct net *net)
+void ip_vs_random_dropentry(struct netns_ipvs *ipvs)
{
int idx;
struct ip_vs_conn *cp, *cp_c;
@@ -1256,7 +1257,7 @@ void ip_vs_random_dropentry(struct net *net)
if (cp->flags & IP_VS_CONN_F_TEMPLATE)
/* connection template */
continue;
- if (!ip_vs_conn_net_eq(cp, net))
+ if (cp->ipvs != ipvs)
continue;
if (cp->protocol == IPPROTO_TCP) {
switch(cp->state) {
@@ -1308,18 +1309,17 @@ void ip_vs_random_dropentry(struct net *net)
/*
* Flush all the connection entries in the ip_vs_conn_tab
*/
-static void ip_vs_conn_flush(struct net *net)
+static void ip_vs_conn_flush(struct netns_ipvs *ipvs)
{
int idx;
struct ip_vs_conn *cp, *cp_c;
- struct netns_ipvs *ipvs = net_ipvs(net);
flush_again:
rcu_read_lock();
for (idx = 0; idx < ip_vs_conn_tab_size; idx++) {
hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) {
- if (!ip_vs_conn_net_eq(cp, net))
+ if (cp->ipvs != ipvs)
continue;
IP_VS_DBG(4, "del connection\n");
ip_vs_conn_expire_now(cp);
@@ -1345,23 +1345,22 @@ flush_again:
/*
* per netns init and exit
*/
-int __net_init ip_vs_conn_net_init(struct net *net)
+int __net_init ip_vs_conn_net_init(struct netns_ipvs *ipvs)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
-
atomic_set(&ipvs->conn_count, 0);
- proc_create("ip_vs_conn", 0, net->proc_net, &ip_vs_conn_fops);
- proc_create("ip_vs_conn_sync", 0, net->proc_net, &ip_vs_conn_sync_fops);
+ proc_create("ip_vs_conn", 0, ipvs->net->proc_net, &ip_vs_conn_fops);
+ proc_create("ip_vs_conn_sync", 0, ipvs->net->proc_net,
+ &ip_vs_conn_sync_fops);
return 0;
}
-void __net_exit ip_vs_conn_net_cleanup(struct net *net)
+void __net_exit ip_vs_conn_net_cleanup(struct netns_ipvs *ipvs)
{
/* flush all the connection entries first */
- ip_vs_conn_flush(net);
- remove_proc_entry("ip_vs_conn", net->proc_net);
- remove_proc_entry("ip_vs_conn_sync", net->proc_net);
+ ip_vs_conn_flush(ipvs);
+ remove_proc_entry("ip_vs_conn", ipvs->net->proc_net);
+ remove_proc_entry("ip_vs_conn_sync", ipvs->net->proc_net);
}
int __init ip_vs_conn_init(void)
diff --git a/kernel/net/netfilter/ipvs/ip_vs_core.c b/kernel/net/netfilter/ipvs/ip_vs_core.c
index 5d2b806a8..f57b4dcdb 100644
--- a/kernel/net/netfilter/ipvs/ip_vs_core.c
+++ b/kernel/net/netfilter/ipvs/ip_vs_core.c
@@ -112,7 +112,7 @@ static inline void
ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
{
struct ip_vs_dest *dest = cp->dest;
- struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
+ struct netns_ipvs *ipvs = cp->ipvs;
if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
struct ip_vs_cpu_stats *s;
@@ -146,7 +146,7 @@ static inline void
ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
{
struct ip_vs_dest *dest = cp->dest;
- struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
+ struct netns_ipvs *ipvs = cp->ipvs;
if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
struct ip_vs_cpu_stats *s;
@@ -179,7 +179,7 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
static inline void
ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc)
{
- struct netns_ipvs *ipvs = net_ipvs(svc->net);
+ struct netns_ipvs *ipvs = svc->ipvs;
struct ip_vs_cpu_stats *s;
s = this_cpu_ptr(cp->dest->stats.cpustats);
@@ -215,7 +215,7 @@ ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc,
const union nf_inet_addr *vaddr, __be16 vport,
struct ip_vs_conn_param *p)
{
- ip_vs_conn_fill_param(svc->net, svc->af, protocol, caddr, cport, vaddr,
+ ip_vs_conn_fill_param(svc->ipvs, svc->af, protocol, caddr, cport, vaddr,
vport, p);
p->pe = rcu_dereference(svc->pe);
if (p->pe && p->pe->fill_param)
@@ -245,20 +245,30 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
const union nf_inet_addr fwmark = { .ip = htonl(svc->fwmark) };
union nf_inet_addr snet; /* source network of the client,
after masking */
+ const union nf_inet_addr *src_addr, *dst_addr;
+
+ if (likely(!ip_vs_iph_inverse(iph))) {
+ src_addr = &iph->saddr;
+ dst_addr = &iph->daddr;
+ } else {
+ src_addr = &iph->daddr;
+ dst_addr = &iph->saddr;
+ }
+
/* Mask saddr with the netmask to adjust template granularity */
#ifdef CONFIG_IP_VS_IPV6
if (svc->af == AF_INET6)
- ipv6_addr_prefix(&snet.in6, &iph->saddr.in6,
+ ipv6_addr_prefix(&snet.in6, &src_addr->in6,
(__force __u32) svc->netmask);
else
#endif
- snet.ip = iph->saddr.ip & svc->netmask;
+ snet.ip = src_addr->ip & svc->netmask;
IP_VS_DBG_BUF(6, "p-schedule: src %s:%u dest %s:%u "
"mnet %s\n",
- IP_VS_DBG_ADDR(svc->af, &iph->saddr), ntohs(src_port),
- IP_VS_DBG_ADDR(svc->af, &iph->daddr), ntohs(dst_port),
+ IP_VS_DBG_ADDR(svc->af, src_addr), ntohs(src_port),
+ IP_VS_DBG_ADDR(svc->af, dst_addr), ntohs(dst_port),
IP_VS_DBG_ADDR(svc->af, &snet));
/*
@@ -276,7 +286,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
*/
{
int protocol = iph->protocol;
- const union nf_inet_addr *vaddr = &iph->daddr;
+ const union nf_inet_addr *vaddr = dst_addr;
__be16 vport = 0;
if (dst_port == svc->port) {
@@ -319,7 +329,13 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
* return *ignored=0 i.e. ICMP and NF_DROP
*/
sched = rcu_dereference(svc->scheduler);
- dest = sched->schedule(svc, skb, iph);
+ if (sched) {
+ /* read svc->sched_data after svc->scheduler */
+ smp_rmb();
+ dest = sched->schedule(svc, skb, iph);
+ } else {
+ dest = NULL;
+ }
if (!dest) {
IP_VS_DBG(1, "p-schedule: no dest found.\n");
kfree(param.pe_data);
@@ -360,8 +376,8 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
/*
* Create a new connection according to the template
*/
- ip_vs_conn_fill_param(svc->net, svc->af, iph->protocol, &iph->saddr,
- src_port, &iph->daddr, dst_port, &param);
+ ip_vs_conn_fill_param(svc->ipvs, svc->af, iph->protocol, src_addr,
+ src_port, dst_addr, dst_port, &param);
cp = ip_vs_conn_new(&param, dest->af, &dest->addr, dport, flags, dest,
skb->mark);
@@ -412,7 +428,8 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
struct ip_vs_conn *cp = NULL;
struct ip_vs_scheduler *sched;
struct ip_vs_dest *dest;
- __be16 _ports[2], *pptr;
+ __be16 _ports[2], *pptr, cport, vport;
+ const void *caddr, *vaddr;
unsigned int flags;
*ignored = 1;
@@ -423,14 +440,26 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
if (pptr == NULL)
return NULL;
+ if (likely(!ip_vs_iph_inverse(iph))) {
+ cport = pptr[0];
+ caddr = &iph->saddr;
+ vport = pptr[1];
+ vaddr = &iph->daddr;
+ } else {
+ cport = pptr[1];
+ caddr = &iph->daddr;
+ vport = pptr[0];
+ vaddr = &iph->saddr;
+ }
+
/*
* FTPDATA needs this check when using local real server.
* Never schedule Active FTPDATA connections from real server.
* For LVS-NAT they must be already created. For other methods
* with persistence the connection is created on SYN+ACK.
*/
- if (pptr[0] == FTPDATA) {
- IP_VS_DBG_PKT(12, svc->af, pp, skb, 0,
+ if (cport == FTPDATA) {
+ IP_VS_DBG_PKT(12, svc->af, pp, skb, iph->off,
"Not scheduling FTPDATA");
return NULL;
}
@@ -438,19 +467,25 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
/*
* Do not schedule replies from local real server.
*/
- if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
- (cp = pp->conn_in_get(svc->af, skb, iph, 1))) {
- IP_VS_DBG_PKT(12, svc->af, pp, skb, 0,
- "Not scheduling reply for existing connection");
- __ip_vs_conn_put(cp);
- return NULL;
+ if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK)) {
+ iph->hdr_flags ^= IP_VS_HDR_INVERSE;
+ cp = pp->conn_in_get(svc->ipvs, svc->af, skb, iph);
+ iph->hdr_flags ^= IP_VS_HDR_INVERSE;
+
+ if (cp) {
+ IP_VS_DBG_PKT(12, svc->af, pp, skb, iph->off,
+ "Not scheduling reply for existing"
+ " connection");
+ __ip_vs_conn_put(cp);
+ return NULL;
+ }
}
/*
* Persistent service
*/
if (svc->flags & IP_VS_SVC_F_PERSISTENT)
- return ip_vs_sched_persist(svc, skb, pptr[0], pptr[1], ignored,
+ return ip_vs_sched_persist(svc, skb, cport, vport, ignored,
iph);
*ignored = 0;
@@ -458,7 +493,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
/*
* Non-persistent service
*/
- if (!svc->fwmark && pptr[1] != svc->port) {
+ if (!svc->fwmark && vport != svc->port) {
if (!svc->port)
pr_err("Schedule: port zero only supported "
"in persistent services, "
@@ -467,7 +502,13 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
}
sched = rcu_dereference(svc->scheduler);
- dest = sched->schedule(svc, skb, iph);
+ if (sched) {
+ /* read svc->sched_data after svc->scheduler */
+ smp_rmb();
+ dest = sched->schedule(svc, skb, iph);
+ } else {
+ dest = NULL;
+ }
if (dest == NULL) {
IP_VS_DBG(1, "Schedule: no dest found.\n");
return NULL;
@@ -483,11 +524,10 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
{
struct ip_vs_conn_param p;
- ip_vs_conn_fill_param(svc->net, svc->af, iph->protocol,
- &iph->saddr, pptr[0], &iph->daddr,
- pptr[1], &p);
+ ip_vs_conn_fill_param(svc->ipvs, svc->af, iph->protocol,
+ caddr, cport, vaddr, vport, &p);
cp = ip_vs_conn_new(&p, dest->af, &dest->addr,
- dest->port ? dest->port : pptr[1],
+ dest->port ? dest->port : vport,
flags, dest, skb->mark);
if (!cp) {
*ignored = -1;
@@ -507,6 +547,15 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
return cp;
}
+static inline int ip_vs_addr_is_unicast(struct net *net, int af,
+ union nf_inet_addr *addr)
+{
+#ifdef CONFIG_IP_VS_IPV6
+ if (af == AF_INET6)
+ return ipv6_addr_type(&addr->in6) & IPV6_ADDR_UNICAST;
+#endif
+ return (inet_addr_type(net, addr->ip) == RTN_UNICAST);
+}
/*
* Pass or drop the packet.
@@ -516,33 +565,21 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
struct ip_vs_proto_data *pd, struct ip_vs_iphdr *iph)
{
- __be16 _ports[2], *pptr;
-#ifdef CONFIG_SYSCTL
- struct net *net;
- struct netns_ipvs *ipvs;
- int unicast;
-#endif
+ __be16 _ports[2], *pptr, dport;
+ struct netns_ipvs *ipvs = svc->ipvs;
+ struct net *net = ipvs->net;
pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph);
- if (pptr == NULL) {
+ if (!pptr)
return NF_DROP;
- }
-
-#ifdef CONFIG_SYSCTL
- net = skb_net(skb);
-
-#ifdef CONFIG_IP_VS_IPV6
- if (svc->af == AF_INET6)
- unicast = ipv6_addr_type(&iph->daddr.in6) & IPV6_ADDR_UNICAST;
- else
-#endif
- unicast = (inet_addr_type(net, iph->daddr.ip) == RTN_UNICAST);
+ dport = likely(!ip_vs_iph_inverse(iph)) ? pptr[1] : pptr[0];
/* if it is fwmark-based service, the cache_bypass sysctl is up
and the destination is a non-local unicast, then create
a cache_bypass connection entry */
- ipvs = net_ipvs(net);
- if (ipvs->sysctl_cache_bypass && svc->fwmark && unicast) {
+ if (sysctl_cache_bypass(ipvs) && svc->fwmark &&
+ !(iph->hdr_flags & (IP_VS_HDR_INVERSE | IP_VS_HDR_ICMP)) &&
+ ip_vs_addr_is_unicast(net, svc->af, &iph->daddr)) {
int ret;
struct ip_vs_conn *cp;
unsigned int flags = (svc->flags & IP_VS_SVC_F_ONEPACKET &&
@@ -554,7 +591,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__);
{
struct ip_vs_conn_param p;
- ip_vs_conn_fill_param(svc->net, svc->af, iph->protocol,
+ ip_vs_conn_fill_param(svc->ipvs, svc->af, iph->protocol,
&iph->saddr, pptr[0],
&iph->daddr, pptr[1], &p);
cp = ip_vs_conn_new(&p, svc->af, &daddr, 0,
@@ -578,7 +615,6 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
ip_vs_conn_put(cp);
return ret;
}
-#endif
/*
* When the virtual ftp service is presented, packets destined
@@ -586,9 +622,12 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
* listed in the ipvs table), pass the packets, because it is
* not ipvs job to decide to drop the packets.
*/
- if ((svc->port == FTPPORT) && (pptr[1] != FTPPORT))
+ if (svc->port == FTPPORT && dport != FTPPORT)
return NF_ACCEPT;
+ if (unlikely(ip_vs_iph_icmp(iph)))
+ return NF_DROP;
+
/*
* Notify the client that the destination is unreachable, and
* release the socket buffer.
@@ -598,11 +637,8 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
*/
#ifdef CONFIG_IP_VS_IPV6
if (svc->af == AF_INET6) {
- if (!skb->dev) {
- struct net *net_ = dev_net(skb_dst(skb)->dev);
-
- skb->dev = net_->loopback_dev;
- }
+ if (!skb->dev)
+ skb->dev = net->loopback_dev;
icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
} else
#endif
@@ -613,15 +649,13 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
#ifdef CONFIG_SYSCTL
-static int sysctl_snat_reroute(struct sk_buff *skb)
+static int sysctl_snat_reroute(struct netns_ipvs *ipvs)
{
- struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
return ipvs->sysctl_snat_reroute;
}
-static int sysctl_nat_icmp_send(struct net *net)
+static int sysctl_nat_icmp_send(struct netns_ipvs *ipvs)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
return ipvs->sysctl_nat_icmp_send;
}
@@ -632,8 +666,8 @@ static int sysctl_expire_nodest_conn(struct netns_ipvs *ipvs)
#else
-static int sysctl_snat_reroute(struct sk_buff *skb) { return 0; }
-static int sysctl_nat_icmp_send(struct net *net) { return 0; }
+static int sysctl_snat_reroute(struct netns_ipvs *ipvs) { return 0; }
+static int sysctl_nat_icmp_send(struct netns_ipvs *ipvs) { return 0; }
static int sysctl_expire_nodest_conn(struct netns_ipvs *ipvs) { return 0; }
#endif
@@ -652,12 +686,13 @@ static inline enum ip_defrag_users ip_vs_defrag_user(unsigned int hooknum)
return IP_DEFRAG_VS_OUT;
}
-static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
+static inline int ip_vs_gather_frags(struct netns_ipvs *ipvs,
+ struct sk_buff *skb, u_int32_t user)
{
int err;
local_bh_disable();
- err = ip_defrag(skb, user);
+ err = ip_defrag(ipvs->net, skb, user);
local_bh_enable();
if (!err)
ip_send_check(ip_hdr(skb));
@@ -665,10 +700,10 @@ static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
return err;
}
-static int ip_vs_route_me_harder(int af, struct sk_buff *skb,
- unsigned int hooknum)
+static int ip_vs_route_me_harder(struct netns_ipvs *ipvs, int af,
+ struct sk_buff *skb, unsigned int hooknum)
{
- if (!sysctl_snat_reroute(skb))
+ if (!sysctl_snat_reroute(ipvs))
return 0;
/* Reroute replies only to remote clients (FORWARD and LOCAL_OUT) */
if (NF_INET_LOCAL_IN == hooknum)
@@ -678,12 +713,12 @@ static int ip_vs_route_me_harder(int af, struct sk_buff *skb,
struct dst_entry *dst = skb_dst(skb);
if (dst->dev && !(dst->dev->flags & IFF_LOOPBACK) &&
- ip6_route_me_harder(skb) != 0)
+ ip6_route_me_harder(ipvs->net, skb) != 0)
return 1;
} else
#endif
if (!(skb_rtable(skb)->rt_flags & RTCF_LOCAL) &&
- ip_route_me_harder(skb, RTN_LOCAL) != 0)
+ ip_route_me_harder(ipvs->net, skb, RTN_LOCAL) != 0)
return 1;
return 0;
@@ -836,7 +871,7 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
#endif
ip_vs_nat_icmp(skb, pp, cp, 1);
- if (ip_vs_route_me_harder(af, skb, hooknum))
+ if (ip_vs_route_me_harder(cp->ipvs, af, skb, hooknum))
goto out;
/* do the statistics and put it back */
@@ -860,8 +895,8 @@ out:
* Find any that might be relevant, check against existing connections.
* Currently handles error types - unreachable, quench, ttl exceeded.
*/
-static int ip_vs_out_icmp(struct sk_buff *skb, int *related,
- unsigned int hooknum)
+static int ip_vs_out_icmp(struct netns_ipvs *ipvs, struct sk_buff *skb,
+ int *related, unsigned int hooknum)
{
struct iphdr *iph;
struct icmphdr _icmph, *ic;
@@ -876,7 +911,7 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related,
/* reassemble IP fragments */
if (ip_is_fragment(ip_hdr(skb))) {
- if (ip_vs_gather_frags(skb, ip_vs_defrag_user(hooknum)))
+ if (ip_vs_gather_frags(ipvs, skb, ip_vs_defrag_user(hooknum)))
return NF_STOLEN;
}
@@ -922,10 +957,10 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related,
IP_VS_DBG_PKT(11, AF_INET, pp, skb, offset,
"Checking outgoing ICMP for");
- ip_vs_fill_ip4hdr(cih, &ciph);
- ciph.len += offset;
+ ip_vs_fill_iph_skb_icmp(AF_INET, skb, offset, true, &ciph);
+
/* The embedded headers contain source and dest in reverse order */
- cp = pp->conn_out_get(AF_INET, skb, &ciph, 1);
+ cp = pp->conn_out_get(ipvs, AF_INET, skb, &ciph);
if (!cp)
return NF_ACCEPT;
@@ -935,16 +970,16 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related,
}
#ifdef CONFIG_IP_VS_IPV6
-static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related,
- unsigned int hooknum, struct ip_vs_iphdr *ipvsh)
+static int ip_vs_out_icmp_v6(struct netns_ipvs *ipvs, struct sk_buff *skb,
+ int *related, unsigned int hooknum,
+ struct ip_vs_iphdr *ipvsh)
{
struct icmp6hdr _icmph, *ic;
- struct ipv6hdr _ip6h, *ip6h; /* The ip header contained within ICMP */
struct ip_vs_iphdr ciph = {.flags = 0, .fragoffs = 0};/*Contained IP */
struct ip_vs_conn *cp;
struct ip_vs_protocol *pp;
union nf_inet_addr snet;
- unsigned int writable;
+ unsigned int offset;
*related = 1;
ic = frag_safe_skb_hp(skb, ipvsh->len, sizeof(_icmph), &_icmph, ipvsh);
@@ -972,31 +1007,23 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related,
ic->icmp6_type, ntohs(icmpv6_id(ic)),
&ipvsh->saddr, &ipvsh->daddr);
- /* Now find the contained IP header */
- ciph.len = ipvsh->len + sizeof(_icmph);
- ip6h = skb_header_pointer(skb, ciph.len, sizeof(_ip6h), &_ip6h);
- if (ip6h == NULL)
+ if (!ip_vs_fill_iph_skb_icmp(AF_INET6, skb, ipvsh->len + sizeof(_icmph),
+ true, &ciph))
return NF_ACCEPT; /* The packet looks wrong, ignore */
- ciph.saddr.in6 = ip6h->saddr; /* conn_out_get() handles reverse order */
- ciph.daddr.in6 = ip6h->daddr;
- /* skip possible IPv6 exthdrs of contained IPv6 packet */
- ciph.protocol = ipv6_find_hdr(skb, &ciph.len, -1, &ciph.fragoffs, NULL);
- if (ciph.protocol < 0)
- return NF_ACCEPT; /* Contained IPv6 hdr looks wrong, ignore */
pp = ip_vs_proto_get(ciph.protocol);
if (!pp)
return NF_ACCEPT;
/* The embedded headers contain source and dest in reverse order */
- cp = pp->conn_out_get(AF_INET6, skb, &ciph, 1);
+ cp = pp->conn_out_get(ipvs, AF_INET6, skb, &ciph);
if (!cp)
return NF_ACCEPT;
snet.in6 = ciph.saddr.in6;
- writable = ciph.len;
+ offset = ciph.len;
return handle_response_icmp(AF_INET6, skb, &snet, ciph.protocol, cp,
- pp, writable, sizeof(struct ipv6hdr),
+ pp, offset, sizeof(struct ipv6hdr),
hooknum);
}
#endif
@@ -1081,7 +1108,7 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
{
struct ip_vs_protocol *pp = pd->pp;
- IP_VS_DBG_PKT(11, af, pp, skb, 0, "Outgoing packet");
+ IP_VS_DBG_PKT(11, af, pp, skb, iph->off, "Outgoing packet");
if (!skb_make_writable(skb, iph->len))
goto drop;
@@ -1115,10 +1142,10 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
* if it came from this machine itself. So re-compute
* the routing information.
*/
- if (ip_vs_route_me_harder(af, skb, hooknum))
+ if (ip_vs_route_me_harder(cp->ipvs, af, skb, hooknum))
goto drop;
- IP_VS_DBG_PKT(10, af, pp, skb, 0, "After SNAT");
+ IP_VS_DBG_PKT(10, af, pp, skb, iph->off, "After SNAT");
ip_vs_out_stats(cp, skb);
ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pd);
@@ -1143,13 +1170,13 @@ drop:
* Check if outgoing packet belongs to the established ip_vs_conn.
*/
static unsigned int
-ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
+ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int af)
{
- struct net *net = NULL;
struct ip_vs_iphdr iph;
struct ip_vs_protocol *pp;
struct ip_vs_proto_data *pd;
struct ip_vs_conn *cp;
+ struct sock *sk;
EnterFunction(11);
@@ -1157,29 +1184,27 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
if (skb->ipvs_property)
return NF_ACCEPT;
+ sk = skb_to_full_sk(skb);
/* Bad... Do not break raw sockets */
- if (unlikely(skb->sk != NULL && hooknum == NF_INET_LOCAL_OUT &&
+ if (unlikely(sk && hooknum == NF_INET_LOCAL_OUT &&
af == AF_INET)) {
- struct sock *sk = skb->sk;
- struct inet_sock *inet = inet_sk(skb->sk);
- if (inet && sk->sk_family == PF_INET && inet->nodefrag)
+ if (sk->sk_family == PF_INET && inet_sk(sk)->nodefrag)
return NF_ACCEPT;
}
if (unlikely(!skb_dst(skb)))
return NF_ACCEPT;
- net = skb_net(skb);
- if (!net_ipvs(net)->enable)
+ if (!ipvs->enable)
return NF_ACCEPT;
- ip_vs_fill_iph_skb(af, skb, &iph);
+ ip_vs_fill_iph_skb(af, skb, false, &iph);
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6) {
if (unlikely(iph.protocol == IPPROTO_ICMPV6)) {
int related;
- int verdict = ip_vs_out_icmp_v6(skb, &related,
+ int verdict = ip_vs_out_icmp_v6(ipvs, skb, &related,
hooknum, &iph);
if (related)
@@ -1189,13 +1214,13 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
#endif
if (unlikely(iph.protocol == IPPROTO_ICMP)) {
int related;
- int verdict = ip_vs_out_icmp(skb, &related, hooknum);
+ int verdict = ip_vs_out_icmp(ipvs, skb, &related, hooknum);
if (related)
return verdict;
}
- pd = ip_vs_proto_data_get(net, iph.protocol);
+ pd = ip_vs_proto_data_get(ipvs, iph.protocol);
if (unlikely(!pd))
return NF_ACCEPT;
pp = pd->pp;
@@ -1205,21 +1230,21 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
if (af == AF_INET)
#endif
if (unlikely(ip_is_fragment(ip_hdr(skb)) && !pp->dont_defrag)) {
- if (ip_vs_gather_frags(skb,
+ if (ip_vs_gather_frags(ipvs, skb,
ip_vs_defrag_user(hooknum)))
return NF_STOLEN;
- ip_vs_fill_ip4hdr(skb_network_header(skb), &iph);
+ ip_vs_fill_iph_skb(AF_INET, skb, false, &iph);
}
/*
* Check if the packet belongs to an existing entry
*/
- cp = pp->conn_out_get(af, skb, &iph, 0);
+ cp = pp->conn_out_get(ipvs, af, skb, &iph);
if (likely(cp))
return handle_response(af, skb, pd, cp, &iph, hooknum);
- if (sysctl_nat_icmp_send(net) &&
+ if (sysctl_nat_icmp_send(ipvs) &&
(pp->protocol == IPPROTO_TCP ||
pp->protocol == IPPROTO_UDP ||
pp->protocol == IPPROTO_SCTP)) {
@@ -1229,7 +1254,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
sizeof(_ports), _ports, &iph);
if (pptr == NULL)
return NF_ACCEPT; /* Not for me */
- if (ip_vs_has_real_service(net, af, iph.protocol, &iph.saddr,
+ if (ip_vs_has_real_service(ipvs, af, iph.protocol, &iph.saddr,
pptr[0])) {
/*
* Notify the real server: there is no
@@ -1246,7 +1271,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6) {
if (!skb->dev)
- skb->dev = net->loopback_dev;
+ skb->dev = ipvs->net->loopback_dev;
icmpv6_send(skb,
ICMPV6_DEST_UNREACH,
ICMPV6_PORT_UNREACH,
@@ -1260,7 +1285,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
}
}
}
- IP_VS_DBG_PKT(12, af, pp, skb, 0,
+ IP_VS_DBG_PKT(12, af, pp, skb, iph.off,
"ip_vs_out: packet continues traversal as normal");
return NF_ACCEPT;
}
@@ -1271,10 +1296,10 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
* Check if packet is reply for established ip_vs_conn.
*/
static unsigned int
-ip_vs_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ip_vs_reply4(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
- return ip_vs_out(ops->hooknum, skb, AF_INET);
+ return ip_vs_out(net_ipvs(state->net), state->hook, skb, AF_INET);
}
/*
@@ -1282,10 +1307,10 @@ ip_vs_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb,
* Check if packet is reply for established ip_vs_conn.
*/
static unsigned int
-ip_vs_local_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ip_vs_local_reply4(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
- return ip_vs_out(ops->hooknum, skb, AF_INET);
+ return ip_vs_out(net_ipvs(state->net), state->hook, skb, AF_INET);
}
#ifdef CONFIG_IP_VS_IPV6
@@ -1296,10 +1321,10 @@ ip_vs_local_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb,
* Check if packet is reply for established ip_vs_conn.
*/
static unsigned int
-ip_vs_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ip_vs_reply6(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
- return ip_vs_out(ops->hooknum, skb, AF_INET6);
+ return ip_vs_out(net_ipvs(state->net), state->hook, skb, AF_INET6);
}
/*
@@ -1307,14 +1332,51 @@ ip_vs_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb,
* Check if packet is reply for established ip_vs_conn.
*/
static unsigned int
-ip_vs_local_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ip_vs_local_reply6(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
- return ip_vs_out(ops->hooknum, skb, AF_INET6);
+ return ip_vs_out(net_ipvs(state->net), state->hook, skb, AF_INET6);
}
#endif
+static unsigned int
+ip_vs_try_to_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
+ struct ip_vs_proto_data *pd,
+ int *verdict, struct ip_vs_conn **cpp,
+ struct ip_vs_iphdr *iph)
+{
+ struct ip_vs_protocol *pp = pd->pp;
+
+ if (!iph->fragoffs) {
+ /* No (second) fragments need to enter here, as nf_defrag_ipv6
+ * replayed fragment zero will already have created the cp
+ */
+
+ /* Schedule and create new connection entry into cpp */
+ if (!pp->conn_schedule(ipvs, af, skb, pd, verdict, cpp, iph))
+ return 0;
+ }
+
+ if (unlikely(!*cpp)) {
+ /* sorry, all this trouble for a no-hit :) */
+ IP_VS_DBG_PKT(12, af, pp, skb, iph->off,
+ "ip_vs_in: packet continues traversal as normal");
+ if (iph->fragoffs) {
+ /* Fragment that couldn't be mapped to a conn entry
+ * is missing module nf_defrag_ipv6
+ */
+ IP_VS_DBG_RL("Unhandled frag, load nf_defrag_ipv6\n");
+ IP_VS_DBG_PKT(7, af, pp, skb, iph->off,
+ "unhandled fragment");
+ }
+ *verdict = NF_ACCEPT;
+ return 0;
+ }
+
+ return 1;
+}
+
/*
* Handle ICMP messages in the outside-to-inside direction (incoming).
* Find any that might be relevant, check against existing connections,
@@ -1322,9 +1384,9 @@ ip_vs_local_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb,
* Currently handles error types - unreachable, quench, ttl exceeded.
*/
static int
-ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
+ip_vs_in_icmp(struct netns_ipvs *ipvs, struct sk_buff *skb, int *related,
+ unsigned int hooknum)
{
- struct net *net = NULL;
struct iphdr *iph;
struct icmphdr _icmph, *ic;
struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */
@@ -1333,13 +1395,13 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
struct ip_vs_protocol *pp;
struct ip_vs_proto_data *pd;
unsigned int offset, offset2, ihl, verdict;
- bool ipip;
+ bool ipip, new_cp = false;
*related = 1;
/* reassemble IP fragments */
if (ip_is_fragment(ip_hdr(skb))) {
- if (ip_vs_gather_frags(skb, ip_vs_defrag_user(hooknum)))
+ if (ip_vs_gather_frags(ipvs, skb, ip_vs_defrag_user(hooknum)))
return NF_STOLEN;
}
@@ -1373,8 +1435,6 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
if (cih == NULL)
return NF_ACCEPT; /* The packet looks wrong, ignore */
- net = skb_net(skb);
-
/* Special case for errors for IPIP packets */
ipip = false;
if (cih->protocol == IPPROTO_IPIP) {
@@ -1390,7 +1450,7 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
ipip = true;
}
- pd = ip_vs_proto_data_get(net, cih->protocol);
+ pd = ip_vs_proto_data_get(ipvs, cih->protocol);
if (!pd)
return NF_ACCEPT;
pp = pd->pp;
@@ -1404,15 +1464,24 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
"Checking incoming ICMP for");
offset2 = offset;
- ip_vs_fill_ip4hdr(cih, &ciph);
- ciph.len += offset;
+ ip_vs_fill_iph_skb_icmp(AF_INET, skb, offset, !ipip, &ciph);
offset = ciph.len;
+
/* The embedded headers contain source and dest in reverse order.
* For IPIP this is error for request, not for reply.
*/
- cp = pp->conn_in_get(AF_INET, skb, &ciph, ipip ? 0 : 1);
- if (!cp)
- return NF_ACCEPT;
+ cp = pp->conn_in_get(ipvs, AF_INET, skb, &ciph);
+
+ if (!cp) {
+ int v;
+
+ if (!sysctl_schedule_icmp(ipvs))
+ return NF_ACCEPT;
+
+ if (!ip_vs_try_to_schedule(ipvs, AF_INET, skb, pd, &v, &cp, &ciph))
+ return v;
+ new_cp = true;
+ }
verdict = NF_DROP;
@@ -1443,7 +1512,7 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
skb_reset_network_header(skb);
IP_VS_DBG(12, "ICMP for IPIP %pI4->%pI4: mtu=%u\n",
&ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr, mtu);
- ipv4_update_pmtu(skb, dev_net(skb->dev),
+ ipv4_update_pmtu(skb, ipvs->net,
mtu, 0, 0, 0, 0);
/* Client uses PMTUD? */
if (!(frag_off & htons(IP_DF)))
@@ -1489,23 +1558,26 @@ ignore_ipip:
verdict = ip_vs_icmp_xmit(skb, cp, pp, offset, hooknum, &ciph);
out:
- __ip_vs_conn_put(cp);
+ if (likely(!new_cp))
+ __ip_vs_conn_put(cp);
+ else
+ ip_vs_conn_put(cp);
return verdict;
}
#ifdef CONFIG_IP_VS_IPV6
-static int ip_vs_in_icmp_v6(struct sk_buff *skb, int *related,
- unsigned int hooknum, struct ip_vs_iphdr *iph)
+static int ip_vs_in_icmp_v6(struct netns_ipvs *ipvs, struct sk_buff *skb,
+ int *related, unsigned int hooknum,
+ struct ip_vs_iphdr *iph)
{
- struct net *net = NULL;
- struct ipv6hdr _ip6h, *ip6h;
struct icmp6hdr _icmph, *ic;
struct ip_vs_iphdr ciph = {.flags = 0, .fragoffs = 0};/*Contained IP */
struct ip_vs_conn *cp;
struct ip_vs_protocol *pp;
struct ip_vs_proto_data *pd;
- unsigned int offs_ciph, writable, verdict;
+ unsigned int offset, verdict;
+ bool new_cp = false;
*related = 1;
@@ -1534,21 +1606,11 @@ static int ip_vs_in_icmp_v6(struct sk_buff *skb, int *related,
ic->icmp6_type, ntohs(icmpv6_id(ic)),
&iph->saddr, &iph->daddr);
- /* Now find the contained IP header */
- ciph.len = iph->len + sizeof(_icmph);
- offs_ciph = ciph.len; /* Save ip header offset */
- ip6h = skb_header_pointer(skb, ciph.len, sizeof(_ip6h), &_ip6h);
- if (ip6h == NULL)
- return NF_ACCEPT; /* The packet looks wrong, ignore */
- ciph.saddr.in6 = ip6h->saddr; /* conn_in_get() handles reverse order */
- ciph.daddr.in6 = ip6h->daddr;
- /* skip possible IPv6 exthdrs of contained IPv6 packet */
- ciph.protocol = ipv6_find_hdr(skb, &ciph.len, -1, &ciph.fragoffs, NULL);
- if (ciph.protocol < 0)
- return NF_ACCEPT; /* Contained IPv6 hdr looks wrong, ignore */
-
- net = skb_net(skb);
- pd = ip_vs_proto_data_get(net, ciph.protocol);
+ offset = iph->len + sizeof(_icmph);
+ if (!ip_vs_fill_iph_skb_icmp(AF_INET6, skb, offset, true, &ciph))
+ return NF_ACCEPT;
+
+ pd = ip_vs_proto_data_get(ipvs, ciph.protocol);
if (!pd)
return NF_ACCEPT;
pp = pd->pp;
@@ -1557,36 +1619,49 @@ static int ip_vs_in_icmp_v6(struct sk_buff *skb, int *related,
if (ciph.fragoffs)
return NF_ACCEPT;
- IP_VS_DBG_PKT(11, AF_INET6, pp, skb, offs_ciph,
+ IP_VS_DBG_PKT(11, AF_INET6, pp, skb, offset,
"Checking incoming ICMPv6 for");
/* The embedded headers contain source and dest in reverse order
* if not from localhost
*/
- cp = pp->conn_in_get(AF_INET6, skb, &ciph,
- (hooknum == NF_INET_LOCAL_OUT) ? 0 : 1);
+ cp = pp->conn_in_get(ipvs, AF_INET6, skb, &ciph);
+
+ if (!cp) {
+ int v;
+
+ if (!sysctl_schedule_icmp(ipvs))
+ return NF_ACCEPT;
+
+ if (!ip_vs_try_to_schedule(ipvs, AF_INET6, skb, pd, &v, &cp, &ciph))
+ return v;
+
+ new_cp = true;
+ }
- if (!cp)
- return NF_ACCEPT;
/* VS/TUN, VS/DR and LOCALNODE just let it go */
if ((hooknum == NF_INET_LOCAL_OUT) &&
(IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)) {
- __ip_vs_conn_put(cp);
- return NF_ACCEPT;
+ verdict = NF_ACCEPT;
+ goto out;
}
/* do the statistics and put it back */
ip_vs_in_stats(cp, skb);
/* Need to mangle contained IPv6 header in ICMPv6 packet */
- writable = ciph.len;
+ offset = ciph.len;
if (IPPROTO_TCP == ciph.protocol || IPPROTO_UDP == ciph.protocol ||
IPPROTO_SCTP == ciph.protocol)
- writable += 2 * sizeof(__u16); /* Also mangle ports */
+ offset += 2 * sizeof(__u16); /* Also mangle ports */
- verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, writable, hooknum, &ciph);
+ verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, offset, hooknum, &ciph);
- __ip_vs_conn_put(cp);
+out:
+ if (likely(!new_cp))
+ __ip_vs_conn_put(cp);
+ else
+ ip_vs_conn_put(cp);
return verdict;
}
@@ -1598,16 +1673,15 @@ static int ip_vs_in_icmp_v6(struct sk_buff *skb, int *related,
* and send it on its way...
*/
static unsigned int
-ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
+ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int af)
{
- struct net *net;
struct ip_vs_iphdr iph;
struct ip_vs_protocol *pp;
struct ip_vs_proto_data *pd;
struct ip_vs_conn *cp;
int ret, pkts;
- struct netns_ipvs *ipvs;
int conn_reuse_mode;
+ struct sock *sk;
/* Already marked as IPVS request or reply? */
if (skb->ipvs_property)
@@ -1621,7 +1695,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
if (unlikely((skb->pkt_type != PACKET_HOST &&
hooknum != NF_INET_LOCAL_OUT) ||
!skb_dst(skb))) {
- ip_vs_fill_iph_skb(af, skb, &iph);
+ ip_vs_fill_iph_skb(af, skb, false, &iph);
IP_VS_DBG_BUF(12, "packet type=%d proto=%d daddr=%s"
" ignored in hook %u\n",
skb->pkt_type, iph.protocol,
@@ -1629,20 +1703,17 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
return NF_ACCEPT;
}
/* ipvs enabled in this netns ? */
- net = skb_net(skb);
- ipvs = net_ipvs(net);
if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable))
return NF_ACCEPT;
- ip_vs_fill_iph_skb(af, skb, &iph);
+ ip_vs_fill_iph_skb(af, skb, false, &iph);
/* Bad... Do not break raw sockets */
- if (unlikely(skb->sk != NULL && hooknum == NF_INET_LOCAL_OUT &&
+ sk = skb_to_full_sk(skb);
+ if (unlikely(sk && hooknum == NF_INET_LOCAL_OUT &&
af == AF_INET)) {
- struct sock *sk = skb->sk;
- struct inet_sock *inet = inet_sk(skb->sk);
- if (inet && sk->sk_family == PF_INET && inet->nodefrag)
+ if (sk->sk_family == PF_INET && inet_sk(sk)->nodefrag)
return NF_ACCEPT;
}
@@ -1650,8 +1721,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
if (af == AF_INET6) {
if (unlikely(iph.protocol == IPPROTO_ICMPV6)) {
int related;
- int verdict = ip_vs_in_icmp_v6(skb, &related, hooknum,
- &iph);
+ int verdict = ip_vs_in_icmp_v6(ipvs, skb, &related,
+ hooknum, &iph);
if (related)
return verdict;
@@ -1660,21 +1731,30 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
#endif
if (unlikely(iph.protocol == IPPROTO_ICMP)) {
int related;
- int verdict = ip_vs_in_icmp(skb, &related, hooknum);
+ int verdict = ip_vs_in_icmp(ipvs, skb, &related,
+ hooknum);
if (related)
return verdict;
}
/* Protocol supported? */
- pd = ip_vs_proto_data_get(net, iph.protocol);
- if (unlikely(!pd))
+ pd = ip_vs_proto_data_get(ipvs, iph.protocol);
+ if (unlikely(!pd)) {
+ /* The only way we'll see this packet again is if it's
+ * encapsulated, so mark it with ipvs_property=1 so we
+ * skip it if we're ignoring tunneled packets
+ */
+ if (sysctl_ignore_tunneled(ipvs))
+ skb->ipvs_property = 1;
+
return NF_ACCEPT;
+ }
pp = pd->pp;
/*
* Check if the packet belongs to an existing connection entry
*/
- cp = pp->conn_in_get(af, skb, &iph, 0);
+ cp = pp->conn_in_get(ipvs, af, skb, &iph);
conn_reuse_mode = sysctl_conn_reuse_mode(ipvs);
if (conn_reuse_mode && !iph.fragoffs &&
@@ -1688,32 +1768,15 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
cp = NULL;
}
- if (unlikely(!cp) && !iph.fragoffs) {
- /* No (second) fragments need to enter here, as nf_defrag_ipv6
- * replayed fragment zero will already have created the cp
- */
+ if (unlikely(!cp)) {
int v;
- /* Schedule and create new connection entry into &cp */
- if (!pp->conn_schedule(af, skb, pd, &v, &cp, &iph))
+ if (!ip_vs_try_to_schedule(ipvs, af, skb, pd, &v, &cp, &iph))
return v;
}
- if (unlikely(!cp)) {
- /* sorry, all this trouble for a no-hit :) */
- IP_VS_DBG_PKT(12, af, pp, skb, 0,
- "ip_vs_in: packet continues traversal as normal");
- if (iph.fragoffs) {
- /* Fragment that couldn't be mapped to a conn entry
- * is missing module nf_defrag_ipv6
- */
- IP_VS_DBG_RL("Unhandled frag, load nf_defrag_ipv6\n");
- IP_VS_DBG_PKT(7, af, pp, skb, 0, "unhandled fragment");
- }
- return NF_ACCEPT;
- }
+ IP_VS_DBG_PKT(11, af, pp, skb, iph.off, "Incoming packet");
- IP_VS_DBG_PKT(11, af, pp, skb, 0, "Incoming packet");
/* Check the server status */
if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) {
/* the destination server is not available */
@@ -1753,7 +1816,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
pkts = atomic_add_return(1, &cp->in_pkts);
if (ipvs->sync_state & IP_VS_STATE_MASTER)
- ip_vs_sync_conn(net, cp, pkts);
+ ip_vs_sync_conn(ipvs, cp, pkts);
ip_vs_conn_put(cp);
return ret;
@@ -1764,10 +1827,10 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
* Schedule and forward packets from remote clients
*/
static unsigned int
-ip_vs_remote_request4(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ip_vs_remote_request4(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
- return ip_vs_in(ops->hooknum, skb, AF_INET);
+ return ip_vs_in(net_ipvs(state->net), state->hook, skb, AF_INET);
}
/*
@@ -1775,10 +1838,10 @@ ip_vs_remote_request4(const struct nf_hook_ops *ops, struct sk_buff *skb,
* Schedule and forward packets from local clients
*/
static unsigned int
-ip_vs_local_request4(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ip_vs_local_request4(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
- return ip_vs_in(ops->hooknum, skb, AF_INET);
+ return ip_vs_in(net_ipvs(state->net), state->hook, skb, AF_INET);
}
#ifdef CONFIG_IP_VS_IPV6
@@ -1788,10 +1851,10 @@ ip_vs_local_request4(const struct nf_hook_ops *ops, struct sk_buff *skb,
* Schedule and forward packets from remote clients
*/
static unsigned int
-ip_vs_remote_request6(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ip_vs_remote_request6(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
- return ip_vs_in(ops->hooknum, skb, AF_INET6);
+ return ip_vs_in(net_ipvs(state->net), state->hook, skb, AF_INET6);
}
/*
@@ -1799,10 +1862,10 @@ ip_vs_remote_request6(const struct nf_hook_ops *ops, struct sk_buff *skb,
* Schedule and forward packets from local clients
*/
static unsigned int
-ip_vs_local_request6(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ip_vs_local_request6(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
- return ip_vs_in(ops->hooknum, skb, AF_INET6);
+ return ip_vs_in(net_ipvs(state->net), state->hook, skb, AF_INET6);
}
#endif
@@ -1818,46 +1881,40 @@ ip_vs_local_request6(const struct nf_hook_ops *ops, struct sk_buff *skb,
* and send them to ip_vs_in_icmp.
*/
static unsigned int
-ip_vs_forward_icmp(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ip_vs_forward_icmp(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
int r;
- struct net *net;
- struct netns_ipvs *ipvs;
+ struct netns_ipvs *ipvs = net_ipvs(state->net);
if (ip_hdr(skb)->protocol != IPPROTO_ICMP)
return NF_ACCEPT;
/* ipvs enabled in this netns ? */
- net = skb_net(skb);
- ipvs = net_ipvs(net);
if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable))
return NF_ACCEPT;
- return ip_vs_in_icmp(skb, &r, ops->hooknum);
+ return ip_vs_in_icmp(ipvs, skb, &r, state->hook);
}
#ifdef CONFIG_IP_VS_IPV6
static unsigned int
-ip_vs_forward_icmp_v6(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ip_vs_forward_icmp_v6(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
int r;
- struct net *net;
- struct netns_ipvs *ipvs;
+ struct netns_ipvs *ipvs = net_ipvs(state->net);
struct ip_vs_iphdr iphdr;
- ip_vs_fill_iph_skb(AF_INET6, skb, &iphdr);
+ ip_vs_fill_iph_skb(AF_INET6, skb, false, &iphdr);
if (iphdr.protocol != IPPROTO_ICMPV6)
return NF_ACCEPT;
/* ipvs enabled in this netns ? */
- net = skb_net(skb);
- ipvs = net_ipvs(net);
if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable))
return NF_ACCEPT;
- return ip_vs_in_icmp_v6(skb, &r, ops->hooknum, &iphdr);
+ return ip_vs_in_icmp_v6(ipvs, skb, &r, state->hook, &iphdr);
}
#endif
@@ -1866,7 +1923,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
/* After packet filtering, change source only for VS/NAT */
{
.hook = ip_vs_reply4,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP_PRI_NAT_SRC - 2,
@@ -1876,7 +1932,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
* applied to IPVS. */
{
.hook = ip_vs_remote_request4,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP_PRI_NAT_SRC - 1,
@@ -1884,7 +1939,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
/* Before ip_vs_in, change source only for VS/NAT */
{
.hook = ip_vs_local_reply4,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_OUT,
.priority = NF_IP_PRI_NAT_DST + 1,
@@ -1892,7 +1946,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
/* After mangle, schedule and forward local requests */
{
.hook = ip_vs_local_request4,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_OUT,
.priority = NF_IP_PRI_NAT_DST + 2,
@@ -1901,7 +1954,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
* destined for 0.0.0.0/0, which is for incoming IPVS connections */
{
.hook = ip_vs_forward_icmp,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_FORWARD,
.priority = 99,
@@ -1909,7 +1961,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
/* After packet filtering, change source only for VS/NAT */
{
.hook = ip_vs_reply4,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_FORWARD,
.priority = 100,
@@ -1918,7 +1969,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
/* After packet filtering, change source only for VS/NAT */
{
.hook = ip_vs_reply6,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP6_PRI_NAT_SRC - 2,
@@ -1928,7 +1978,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
* applied to IPVS. */
{
.hook = ip_vs_remote_request6,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP6_PRI_NAT_SRC - 1,
@@ -1936,7 +1985,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
/* Before ip_vs_in, change source only for VS/NAT */
{
.hook = ip_vs_local_reply6,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_LOCAL_OUT,
.priority = NF_IP6_PRI_NAT_DST + 1,
@@ -1944,7 +1992,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
/* After mangle, schedule and forward local requests */
{
.hook = ip_vs_local_request6,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_LOCAL_OUT,
.priority = NF_IP6_PRI_NAT_DST + 2,
@@ -1953,7 +2000,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
* destined for 0.0.0.0/0, which is for incoming IPVS connections */
{
.hook = ip_vs_forward_icmp_v6,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_FORWARD,
.priority = 99,
@@ -1961,7 +2007,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
/* After packet filtering, change source only for VS/NAT */
{
.hook = ip_vs_reply6,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_FORWARD,
.priority = 100,
@@ -1987,22 +2032,22 @@ static int __net_init __ip_vs_init(struct net *net)
atomic_inc(&ipvs_netns_cnt);
net->ipvs = ipvs;
- if (ip_vs_estimator_net_init(net) < 0)
+ if (ip_vs_estimator_net_init(ipvs) < 0)
goto estimator_fail;
- if (ip_vs_control_net_init(net) < 0)
+ if (ip_vs_control_net_init(ipvs) < 0)
goto control_fail;
- if (ip_vs_protocol_net_init(net) < 0)
+ if (ip_vs_protocol_net_init(ipvs) < 0)
goto protocol_fail;
- if (ip_vs_app_net_init(net) < 0)
+ if (ip_vs_app_net_init(ipvs) < 0)
goto app_fail;
- if (ip_vs_conn_net_init(net) < 0)
+ if (ip_vs_conn_net_init(ipvs) < 0)
goto conn_fail;
- if (ip_vs_sync_net_init(net) < 0)
+ if (ip_vs_sync_net_init(ipvs) < 0)
goto sync_fail;
printk(KERN_INFO "IPVS: Creating netns size=%zu id=%d\n",
@@ -2013,15 +2058,15 @@ static int __net_init __ip_vs_init(struct net *net)
*/
sync_fail:
- ip_vs_conn_net_cleanup(net);
+ ip_vs_conn_net_cleanup(ipvs);
conn_fail:
- ip_vs_app_net_cleanup(net);
+ ip_vs_app_net_cleanup(ipvs);
app_fail:
- ip_vs_protocol_net_cleanup(net);
+ ip_vs_protocol_net_cleanup(ipvs);
protocol_fail:
- ip_vs_control_net_cleanup(net);
+ ip_vs_control_net_cleanup(ipvs);
control_fail:
- ip_vs_estimator_net_cleanup(net);
+ ip_vs_estimator_net_cleanup(ipvs);
estimator_fail:
net->ipvs = NULL;
return -ENOMEM;
@@ -2029,22 +2074,25 @@ estimator_fail:
static void __net_exit __ip_vs_cleanup(struct net *net)
{
- ip_vs_service_net_cleanup(net); /* ip_vs_flush() with locks */
- ip_vs_conn_net_cleanup(net);
- ip_vs_app_net_cleanup(net);
- ip_vs_protocol_net_cleanup(net);
- ip_vs_control_net_cleanup(net);
- ip_vs_estimator_net_cleanup(net);
- IP_VS_DBG(2, "ipvs netns %d released\n", net_ipvs(net)->gen);
+ struct netns_ipvs *ipvs = net_ipvs(net);
+
+ ip_vs_service_net_cleanup(ipvs); /* ip_vs_flush() with locks */
+ ip_vs_conn_net_cleanup(ipvs);
+ ip_vs_app_net_cleanup(ipvs);
+ ip_vs_protocol_net_cleanup(ipvs);
+ ip_vs_control_net_cleanup(ipvs);
+ ip_vs_estimator_net_cleanup(ipvs);
+ IP_VS_DBG(2, "ipvs netns %d released\n", ipvs->gen);
net->ipvs = NULL;
}
static void __net_exit __ip_vs_dev_cleanup(struct net *net)
{
+ struct netns_ipvs *ipvs = net_ipvs(net);
EnterFunction(2);
- net_ipvs(net)->enable = 0; /* Disable packet reception */
+ ipvs->enable = 0; /* Disable packet reception */
smp_wmb();
- ip_vs_sync_net_cleanup(net);
+ ip_vs_sync_net_cleanup(ipvs);
LeaveFunction(2);
}
diff --git a/kernel/net/netfilter/ipvs/ip_vs_ctl.c b/kernel/net/netfilter/ipvs/ip_vs_ctl.c
index 285eae3a1..e7c1b052c 100644
--- a/kernel/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/kernel/net/netfilter/ipvs/ip_vs_ctl.c
@@ -228,7 +228,7 @@ static void defense_work_handler(struct work_struct *work)
update_defense_level(ipvs);
if (atomic_read(&ipvs->dropentry))
- ip_vs_random_dropentry(ipvs->net);
+ ip_vs_random_dropentry(ipvs);
schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
}
#endif
@@ -263,7 +263,7 @@ static struct hlist_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
* Returns hash value for virtual service
*/
static inline unsigned int
-ip_vs_svc_hashkey(struct net *net, int af, unsigned int proto,
+ip_vs_svc_hashkey(struct netns_ipvs *ipvs, int af, unsigned int proto,
const union nf_inet_addr *addr, __be16 port)
{
register unsigned int porth = ntohs(port);
@@ -276,7 +276,7 @@ ip_vs_svc_hashkey(struct net *net, int af, unsigned int proto,
addr->ip6[2]^addr->ip6[3];
#endif
ahash = ntohl(addr_fold);
- ahash ^= ((size_t) net >> 8);
+ ahash ^= ((size_t) ipvs >> 8);
return (proto ^ ahash ^ (porth >> IP_VS_SVC_TAB_BITS) ^ porth) &
IP_VS_SVC_TAB_MASK;
@@ -285,9 +285,9 @@ ip_vs_svc_hashkey(struct net *net, int af, unsigned int proto,
/*
* Returns hash value of fwmark for virtual service lookup
*/
-static inline unsigned int ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark)
+static inline unsigned int ip_vs_svc_fwm_hashkey(struct netns_ipvs *ipvs, __u32 fwmark)
{
- return (((size_t)net>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK;
+ return (((size_t)ipvs>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK;
}
/*
@@ -309,14 +309,14 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc)
/*
* Hash it by <netns,protocol,addr,port> in ip_vs_svc_table
*/
- hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol,
+ hash = ip_vs_svc_hashkey(svc->ipvs, svc->af, svc->protocol,
&svc->addr, svc->port);
hlist_add_head_rcu(&svc->s_list, &ip_vs_svc_table[hash]);
} else {
/*
* Hash it by fwmark in svc_fwm_table
*/
- hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark);
+ hash = ip_vs_svc_fwm_hashkey(svc->ipvs, svc->fwmark);
hlist_add_head_rcu(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
}
@@ -357,21 +357,21 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc)
* Get service by {netns, proto,addr,port} in the service table.
*/
static inline struct ip_vs_service *
-__ip_vs_service_find(struct net *net, int af, __u16 protocol,
+__ip_vs_service_find(struct netns_ipvs *ipvs, int af, __u16 protocol,
const union nf_inet_addr *vaddr, __be16 vport)
{
unsigned int hash;
struct ip_vs_service *svc;
/* Check for "full" addressed entries */
- hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport);
+ hash = ip_vs_svc_hashkey(ipvs, af, protocol, vaddr, vport);
hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[hash], s_list) {
if ((svc->af == af)
&& ip_vs_addr_equal(af, &svc->addr, vaddr)
&& (svc->port == vport)
&& (svc->protocol == protocol)
- && net_eq(svc->net, net)) {
+ && (svc->ipvs == ipvs)) {
/* HIT */
return svc;
}
@@ -385,17 +385,17 @@ __ip_vs_service_find(struct net *net, int af, __u16 protocol,
* Get service by {fwmark} in the service table.
*/
static inline struct ip_vs_service *
-__ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
+__ip_vs_svc_fwm_find(struct netns_ipvs *ipvs, int af, __u32 fwmark)
{
unsigned int hash;
struct ip_vs_service *svc;
/* Check for fwmark addressed entries */
- hash = ip_vs_svc_fwm_hashkey(net, fwmark);
+ hash = ip_vs_svc_fwm_hashkey(ipvs, fwmark);
hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[hash], f_list) {
if (svc->fwmark == fwmark && svc->af == af
- && net_eq(svc->net, net)) {
+ && (svc->ipvs == ipvs)) {
/* HIT */
return svc;
}
@@ -406,17 +406,16 @@ __ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
/* Find service, called under RCU lock */
struct ip_vs_service *
-ip_vs_service_find(struct net *net, int af, __u32 fwmark, __u16 protocol,
+ip_vs_service_find(struct netns_ipvs *ipvs, int af, __u32 fwmark, __u16 protocol,
const union nf_inet_addr *vaddr, __be16 vport)
{
struct ip_vs_service *svc;
- struct netns_ipvs *ipvs = net_ipvs(net);
/*
* Check the table hashed by fwmark first
*/
if (fwmark) {
- svc = __ip_vs_svc_fwm_find(net, af, fwmark);
+ svc = __ip_vs_svc_fwm_find(ipvs, af, fwmark);
if (svc)
goto out;
}
@@ -425,7 +424,7 @@ ip_vs_service_find(struct net *net, int af, __u32 fwmark, __u16 protocol,
* Check the table hashed by <protocol,addr,port>
* for "full" addressed entries
*/
- svc = __ip_vs_service_find(net, af, protocol, vaddr, vport);
+ svc = __ip_vs_service_find(ipvs, af, protocol, vaddr, vport);
if (svc == NULL
&& protocol == IPPROTO_TCP
@@ -435,7 +434,7 @@ ip_vs_service_find(struct net *net, int af, __u32 fwmark, __u16 protocol,
* Check if ftp service entry exists, the packet
* might belong to FTP data connections.
*/
- svc = __ip_vs_service_find(net, af, protocol, vaddr, FTPPORT);
+ svc = __ip_vs_service_find(ipvs, af, protocol, vaddr, FTPPORT);
}
if (svc == NULL
@@ -443,7 +442,7 @@ ip_vs_service_find(struct net *net, int af, __u32 fwmark, __u16 protocol,
/*
* Check if the catch-all port (port zero) exists
*/
- svc = __ip_vs_service_find(net, af, protocol, vaddr, 0);
+ svc = __ip_vs_service_find(ipvs, af, protocol, vaddr, 0);
}
out:
@@ -543,10 +542,9 @@ static void ip_vs_rs_unhash(struct ip_vs_dest *dest)
}
/* Check if real service by <proto,addr,port> is present */
-bool ip_vs_has_real_service(struct net *net, int af, __u16 protocol,
+bool ip_vs_has_real_service(struct netns_ipvs *ipvs, int af, __u16 protocol,
const union nf_inet_addr *daddr, __be16 dport)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
unsigned int hash;
struct ip_vs_dest *dest;
@@ -601,7 +599,7 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, int dest_af,
* on the backup.
* Called under RCU lock, no refcnt is returned.
*/
-struct ip_vs_dest *ip_vs_find_dest(struct net *net, int svc_af, int dest_af,
+struct ip_vs_dest *ip_vs_find_dest(struct netns_ipvs *ipvs, int svc_af, int dest_af,
const union nf_inet_addr *daddr,
__be16 dport,
const union nf_inet_addr *vaddr,
@@ -612,7 +610,7 @@ struct ip_vs_dest *ip_vs_find_dest(struct net *net, int svc_af, int dest_af,
struct ip_vs_service *svc;
__be16 port = dport;
- svc = ip_vs_service_find(net, svc_af, fwmark, protocol, vaddr, vport);
+ svc = ip_vs_service_find(ipvs, svc_af, fwmark, protocol, vaddr, vport);
if (!svc)
return NULL;
if (fwmark && (flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ)
@@ -660,7 +658,7 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, int dest_af,
const union nf_inet_addr *daddr, __be16 dport)
{
struct ip_vs_dest *dest;
- struct netns_ipvs *ipvs = net_ipvs(svc->net);
+ struct netns_ipvs *ipvs = svc->ipvs;
/*
* Find the destination in trash
@@ -715,10 +713,9 @@ static void ip_vs_dest_free(struct ip_vs_dest *dest)
* are expired, and the refcnt of each destination in the trash must
* be 0, so we simply release them here.
*/
-static void ip_vs_trash_cleanup(struct net *net)
+static void ip_vs_trash_cleanup(struct netns_ipvs *ipvs)
{
struct ip_vs_dest *dest, *nxt;
- struct netns_ipvs *ipvs = net_ipvs(net);
del_timer_sync(&ipvs->dest_trash_timer);
/* No need to use dest_trash_lock */
@@ -788,7 +785,7 @@ static void
__ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
struct ip_vs_dest_user_kern *udest, int add)
{
- struct netns_ipvs *ipvs = net_ipvs(svc->net);
+ struct netns_ipvs *ipvs = svc->ipvs;
struct ip_vs_service *old_svc;
struct ip_vs_scheduler *sched;
int conn_flags;
@@ -842,15 +839,16 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
__ip_vs_dst_cache_reset(dest);
spin_unlock_bh(&dest->dst_lock);
- sched = rcu_dereference_protected(svc->scheduler, 1);
if (add) {
- ip_vs_start_estimator(svc->net, &dest->stats);
+ ip_vs_start_estimator(svc->ipvs, &dest->stats);
list_add_rcu(&dest->n_list, &svc->destinations);
svc->num_dests++;
- if (sched->add_dest)
+ sched = rcu_dereference_protected(svc->scheduler, 1);
+ if (sched && sched->add_dest)
sched->add_dest(svc, dest);
} else {
- if (sched->upd_dest)
+ sched = rcu_dereference_protected(svc->scheduler, 1);
+ if (sched && sched->upd_dest)
sched->upd_dest(svc, dest);
}
}
@@ -873,12 +871,12 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
atype = ipv6_addr_type(&udest->addr.in6);
if ((!(atype & IPV6_ADDR_UNICAST) ||
atype & IPV6_ADDR_LINKLOCAL) &&
- !__ip_vs_addr_is_local_v6(svc->net, &udest->addr.in6))
+ !__ip_vs_addr_is_local_v6(svc->ipvs->net, &udest->addr.in6))
return -EINVAL;
} else
#endif
{
- atype = inet_addr_type(svc->net, udest->addr.ip);
+ atype = inet_addr_type(svc->ipvs->net, udest->addr.ip);
if (atype != RTN_LOCAL && atype != RTN_UNICAST)
return -EINVAL;
}
@@ -1035,12 +1033,10 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
/*
* Delete a destination (must be already unlinked from the service)
*/
-static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest,
+static void __ip_vs_del_dest(struct netns_ipvs *ipvs, struct ip_vs_dest *dest,
bool cleanup)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
-
- ip_vs_stop_estimator(net, &dest->stats);
+ ip_vs_stop_estimator(ipvs, &dest->stats);
/*
* Remove it from the d-linked list with the real services.
@@ -1078,13 +1074,13 @@ static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
svc->num_dests--;
if (dest->af != svc->af)
- net_ipvs(svc->net)->mixed_address_family_dests--;
+ svc->ipvs->mixed_address_family_dests--;
if (svcupd) {
struct ip_vs_scheduler *sched;
sched = rcu_dereference_protected(svc->scheduler, 1);
- if (sched->del_dest)
+ if (sched && sched->del_dest)
sched->del_dest(svc, dest);
}
}
@@ -1119,7 +1115,7 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
/*
* Delete the destination
*/
- __ip_vs_del_dest(svc->net, dest, false);
+ __ip_vs_del_dest(svc->ipvs, dest, false);
LeaveFunction(2);
@@ -1128,8 +1124,7 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
static void ip_vs_dest_trash_expire(unsigned long data)
{
- struct net *net = (struct net *) data;
- struct netns_ipvs *ipvs = net_ipvs(net);
+ struct netns_ipvs *ipvs = (struct netns_ipvs *)data;
struct ip_vs_dest *dest, *next;
unsigned long now = jiffies;
@@ -1162,24 +1157,26 @@ static void ip_vs_dest_trash_expire(unsigned long data)
* Add a service into the service hash table
*/
static int
-ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
+ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u,
struct ip_vs_service **svc_p)
{
int ret = 0, i;
struct ip_vs_scheduler *sched = NULL;
struct ip_vs_pe *pe = NULL;
struct ip_vs_service *svc = NULL;
- struct netns_ipvs *ipvs = net_ipvs(net);
/* increase the module use count */
ip_vs_use_count_inc();
/* Lookup the scheduler by 'u->sched_name' */
- sched = ip_vs_scheduler_get(u->sched_name);
- if (sched == NULL) {
- pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
- ret = -ENOENT;
- goto out_err;
+ if (strcmp(u->sched_name, "none")) {
+ sched = ip_vs_scheduler_get(u->sched_name);
+ if (!sched) {
+ pr_info("Scheduler module ip_vs_%s not found\n",
+ u->sched_name);
+ ret = -ENOENT;
+ goto out_err;
+ }
}
if (u->pe_name && *u->pe_name) {
@@ -1233,17 +1230,19 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
svc->flags = u->flags;
svc->timeout = u->timeout * HZ;
svc->netmask = u->netmask;
- svc->net = net;
+ svc->ipvs = ipvs;
INIT_LIST_HEAD(&svc->destinations);
spin_lock_init(&svc->sched_lock);
spin_lock_init(&svc->stats.lock);
/* Bind the scheduler */
- ret = ip_vs_bind_scheduler(svc, sched);
- if (ret)
- goto out_err;
- sched = NULL;
+ if (sched) {
+ ret = ip_vs_bind_scheduler(svc, sched);
+ if (ret)
+ goto out_err;
+ sched = NULL;
+ }
/* Bind the ct retriever */
RCU_INIT_POINTER(svc->pe, pe);
@@ -1255,7 +1254,7 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
else if (svc->port == 0)
atomic_inc(&ipvs->nullsvc_counter);
- ip_vs_start_estimator(net, &svc->stats);
+ ip_vs_start_estimator(ipvs, &svc->stats);
/* Count only IPv4 services for old get/setsockopt interface */
if (svc->af == AF_INET)
@@ -1291,17 +1290,20 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
static int
ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
{
- struct ip_vs_scheduler *sched, *old_sched;
+ struct ip_vs_scheduler *sched = NULL, *old_sched;
struct ip_vs_pe *pe = NULL, *old_pe = NULL;
int ret = 0;
/*
* Lookup the scheduler, by 'u->sched_name'
*/
- sched = ip_vs_scheduler_get(u->sched_name);
- if (sched == NULL) {
- pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
- return -ENOENT;
+ if (strcmp(u->sched_name, "none")) {
+ sched = ip_vs_scheduler_get(u->sched_name);
+ if (!sched) {
+ pr_info("Scheduler module ip_vs_%s not found\n",
+ u->sched_name);
+ return -ENOENT;
+ }
}
old_sched = sched;
@@ -1329,14 +1331,20 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
old_sched = rcu_dereference_protected(svc->scheduler, 1);
if (sched != old_sched) {
+ if (old_sched) {
+ ip_vs_unbind_scheduler(svc, old_sched);
+ RCU_INIT_POINTER(svc->scheduler, NULL);
+ /* Wait all svc->sched_data users */
+ synchronize_rcu();
+ }
/* Bind the new scheduler */
- ret = ip_vs_bind_scheduler(svc, sched);
- if (ret) {
- old_sched = sched;
- goto out;
+ if (sched) {
+ ret = ip_vs_bind_scheduler(svc, sched);
+ if (ret) {
+ ip_vs_scheduler_put(sched);
+ goto out;
+ }
}
- /* Unbind the old scheduler on success */
- ip_vs_unbind_scheduler(svc, old_sched);
}
/*
@@ -1366,7 +1374,7 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup)
struct ip_vs_dest *dest, *nxt;
struct ip_vs_scheduler *old_sched;
struct ip_vs_pe *old_pe;
- struct netns_ipvs *ipvs = net_ipvs(svc->net);
+ struct netns_ipvs *ipvs = svc->ipvs;
pr_info("%s: enter\n", __func__);
@@ -1374,7 +1382,7 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup)
if (svc->af == AF_INET)
ipvs->num_services--;
- ip_vs_stop_estimator(svc->net, &svc->stats);
+ ip_vs_stop_estimator(svc->ipvs, &svc->stats);
/* Unbind scheduler */
old_sched = rcu_dereference_protected(svc->scheduler, 1);
@@ -1390,7 +1398,7 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup)
*/
list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
__ip_vs_unlink_dest(svc, dest, 0);
- __ip_vs_del_dest(svc->net, dest, cleanup);
+ __ip_vs_del_dest(svc->ipvs, dest, cleanup);
}
/*
@@ -1441,7 +1449,7 @@ static int ip_vs_del_service(struct ip_vs_service *svc)
/*
* Flush all the virtual services
*/
-static int ip_vs_flush(struct net *net, bool cleanup)
+static int ip_vs_flush(struct netns_ipvs *ipvs, bool cleanup)
{
int idx;
struct ip_vs_service *svc;
@@ -1453,7 +1461,7 @@ static int ip_vs_flush(struct net *net, bool cleanup)
for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
hlist_for_each_entry_safe(svc, n, &ip_vs_svc_table[idx],
s_list) {
- if (net_eq(svc->net, net))
+ if (svc->ipvs == ipvs)
ip_vs_unlink_service(svc, cleanup);
}
}
@@ -1464,7 +1472,7 @@ static int ip_vs_flush(struct net *net, bool cleanup)
for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
hlist_for_each_entry_safe(svc, n, &ip_vs_svc_fwm_table[idx],
f_list) {
- if (net_eq(svc->net, net))
+ if (svc->ipvs == ipvs)
ip_vs_unlink_service(svc, cleanup);
}
}
@@ -1476,12 +1484,12 @@ static int ip_vs_flush(struct net *net, bool cleanup)
* Delete service by {netns} in the service table.
* Called by __ip_vs_cleanup()
*/
-void ip_vs_service_net_cleanup(struct net *net)
+void ip_vs_service_net_cleanup(struct netns_ipvs *ipvs)
{
EnterFunction(2);
/* Check for "full" addressed entries */
mutex_lock(&__ip_vs_mutex);
- ip_vs_flush(net, true);
+ ip_vs_flush(ipvs, true);
mutex_unlock(&__ip_vs_mutex);
LeaveFunction(2);
}
@@ -1525,7 +1533,7 @@ static int ip_vs_dst_event(struct notifier_block *this, unsigned long event,
mutex_lock(&__ip_vs_mutex);
for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
- if (net_eq(svc->net, net)) {
+ if (svc->ipvs == ipvs) {
list_for_each_entry(dest, &svc->destinations,
n_list) {
ip_vs_forget_dev(dest, dev);
@@ -1534,7 +1542,7 @@ static int ip_vs_dst_event(struct notifier_block *this, unsigned long event,
}
hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
- if (net_eq(svc->net, net)) {
+ if (svc->ipvs == ipvs) {
list_for_each_entry(dest, &svc->destinations,
n_list) {
ip_vs_forget_dev(dest, dev);
@@ -1568,26 +1576,26 @@ static int ip_vs_zero_service(struct ip_vs_service *svc)
return 0;
}
-static int ip_vs_zero_all(struct net *net)
+static int ip_vs_zero_all(struct netns_ipvs *ipvs)
{
int idx;
struct ip_vs_service *svc;
for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
- if (net_eq(svc->net, net))
+ if (svc->ipvs == ipvs)
ip_vs_zero_service(svc);
}
}
for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
- if (net_eq(svc->net, net))
+ if (svc->ipvs == ipvs)
ip_vs_zero_service(svc);
}
}
- ip_vs_zero_stats(&net_ipvs(net)->tot_stats);
+ ip_vs_zero_stats(&ipvs->tot_stats);
return 0;
}
@@ -1600,7 +1608,7 @@ static int
proc_do_defense_mode(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
- struct net *net = current->nsproxy->net_ns;
+ struct netns_ipvs *ipvs = table->extra2;
int *valp = table->data;
int val = *valp;
int rc;
@@ -1611,7 +1619,7 @@ proc_do_defense_mode(struct ctl_table *table, int write,
/* Restore the correct value */
*valp = val;
} else {
- update_defense_level(net_ipvs(net));
+ update_defense_level(ipvs);
}
}
return rc;
@@ -1829,6 +1837,18 @@ static struct ctl_table vs_vars[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
+ {
+ .procname = "schedule_icmp",
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "ignore_tunneled",
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
#ifdef CONFIG_IP_VS_DEBUG
{
.procname = "debug_level",
@@ -1874,6 +1894,7 @@ static inline const char *ip_vs_fwd_name(unsigned int flags)
static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
{
struct net *net = seq_file_net(seq);
+ struct netns_ipvs *ipvs = net_ipvs(net);
struct ip_vs_iter *iter = seq->private;
int idx;
struct ip_vs_service *svc;
@@ -1881,7 +1902,7 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
/* look in hash by protocol */
for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[idx], s_list) {
- if (net_eq(svc->net, net) && pos-- == 0) {
+ if ((svc->ipvs == ipvs) && pos-- == 0) {
iter->table = ip_vs_svc_table;
iter->bucket = idx;
return svc;
@@ -1893,7 +1914,7 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[idx],
f_list) {
- if (net_eq(svc->net, net) && pos-- == 0) {
+ if ((svc->ipvs == ipvs) && pos-- == 0) {
iter->table = ip_vs_svc_fwm_table;
iter->bucket = idx;
return svc;
@@ -1982,6 +2003,7 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
const struct ip_vs_iter *iter = seq->private;
const struct ip_vs_dest *dest;
struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler);
+ char *sched_name = sched ? sched->name : "none";
if (iter->table == ip_vs_svc_table) {
#ifdef CONFIG_IP_VS_IPV6
@@ -1990,18 +2012,18 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
ip_vs_proto_name(svc->protocol),
&svc->addr.in6,
ntohs(svc->port),
- sched->name);
+ sched_name);
else
#endif
seq_printf(seq, "%s %08X:%04X %s %s ",
ip_vs_proto_name(svc->protocol),
ntohl(svc->addr.ip),
ntohs(svc->port),
- sched->name,
+ sched_name,
(svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
} else {
seq_printf(seq, "FWM %08X %s %s",
- svc->fwmark, sched->name,
+ svc->fwmark, sched_name,
(svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
}
@@ -2180,7 +2202,7 @@ static const struct file_operations ip_vs_stats_percpu_fops = {
/*
* Set timeout values for tcp tcpfin udp in the timeout_table.
*/
-static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u)
+static int ip_vs_set_timeout(struct netns_ipvs *ipvs, struct ip_vs_timeout_user *u)
{
#if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
struct ip_vs_proto_data *pd;
@@ -2193,13 +2215,13 @@ static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u)
#ifdef CONFIG_IP_VS_PROTO_TCP
if (u->tcp_timeout) {
- pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
+ pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP);
pd->timeout_table[IP_VS_TCP_S_ESTABLISHED]
= u->tcp_timeout * HZ;
}
if (u->tcp_fin_timeout) {
- pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
+ pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP);
pd->timeout_table[IP_VS_TCP_S_FIN_WAIT]
= u->tcp_fin_timeout * HZ;
}
@@ -2207,7 +2229,7 @@ static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u)
#ifdef CONFIG_IP_VS_PROTO_UDP
if (u->udp_timeout) {
- pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
+ pd = ip_vs_proto_data_get(ipvs, IPPROTO_UDP);
pd->timeout_table[IP_VS_UDP_S_NORMAL]
= u->udp_timeout * HZ;
}
@@ -2319,24 +2341,34 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
cmd == IP_VS_SO_SET_STOPDAEMON) {
struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
- mutex_lock(&ipvs->sync_mutex);
- if (cmd == IP_VS_SO_SET_STARTDAEMON)
- ret = start_sync_thread(net, dm->state, dm->mcast_ifn,
- dm->syncid);
- else
- ret = stop_sync_thread(net, dm->state);
- mutex_unlock(&ipvs->sync_mutex);
+ if (cmd == IP_VS_SO_SET_STARTDAEMON) {
+ struct ipvs_sync_daemon_cfg cfg;
+
+ memset(&cfg, 0, sizeof(cfg));
+ strlcpy(cfg.mcast_ifn, dm->mcast_ifn,
+ sizeof(cfg.mcast_ifn));
+ cfg.syncid = dm->syncid;
+ rtnl_lock();
+ mutex_lock(&ipvs->sync_mutex);
+ ret = start_sync_thread(ipvs, &cfg, dm->state);
+ mutex_unlock(&ipvs->sync_mutex);
+ rtnl_unlock();
+ } else {
+ mutex_lock(&ipvs->sync_mutex);
+ ret = stop_sync_thread(ipvs, dm->state);
+ mutex_unlock(&ipvs->sync_mutex);
+ }
goto out_dec;
}
mutex_lock(&__ip_vs_mutex);
if (cmd == IP_VS_SO_SET_FLUSH) {
/* Flush the virtual service */
- ret = ip_vs_flush(net, false);
+ ret = ip_vs_flush(ipvs, false);
goto out_unlock;
} else if (cmd == IP_VS_SO_SET_TIMEOUT) {
/* Set timeout values for (tcp tcpfin udp) */
- ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg);
+ ret = ip_vs_set_timeout(ipvs, (struct ip_vs_timeout_user *)arg);
goto out_unlock;
}
@@ -2351,7 +2383,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
if (cmd == IP_VS_SO_SET_ZERO) {
/* if no service address is set, zero counters in all */
if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
- ret = ip_vs_zero_all(net);
+ ret = ip_vs_zero_all(ipvs);
goto out_unlock;
}
}
@@ -2369,10 +2401,10 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
/* Lookup the exact service by <protocol, addr, port> or fwmark */
rcu_read_lock();
if (usvc.fwmark == 0)
- svc = __ip_vs_service_find(net, usvc.af, usvc.protocol,
+ svc = __ip_vs_service_find(ipvs, usvc.af, usvc.protocol,
&usvc.addr, usvc.port);
else
- svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark);
+ svc = __ip_vs_svc_fwm_find(ipvs, usvc.af, usvc.fwmark);
rcu_read_unlock();
if (cmd != IP_VS_SO_SET_ADD
@@ -2386,7 +2418,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
if (svc != NULL)
ret = -EEXIST;
else
- ret = ip_vs_add_service(net, &usvc, &svc);
+ ret = ip_vs_add_service(ipvs, &usvc, &svc);
break;
case IP_VS_SO_SET_EDIT:
ret = ip_vs_edit_service(svc, &usvc);
@@ -2427,13 +2459,15 @@ ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
{
struct ip_vs_scheduler *sched;
struct ip_vs_kstats kstats;
+ char *sched_name;
sched = rcu_dereference_protected(src->scheduler, 1);
+ sched_name = sched ? sched->name : "none";
dst->protocol = src->protocol;
dst->addr = src->addr.ip;
dst->port = src->port;
dst->fwmark = src->fwmark;
- strlcpy(dst->sched_name, sched->name, sizeof(dst->sched_name));
+ strlcpy(dst->sched_name, sched_name, sizeof(dst->sched_name));
dst->flags = src->flags;
dst->timeout = src->timeout / HZ;
dst->netmask = src->netmask;
@@ -2443,7 +2477,7 @@ ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
}
static inline int
-__ip_vs_get_service_entries(struct net *net,
+__ip_vs_get_service_entries(struct netns_ipvs *ipvs,
const struct ip_vs_get_services *get,
struct ip_vs_get_services __user *uptr)
{
@@ -2455,7 +2489,7 @@ __ip_vs_get_service_entries(struct net *net,
for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
/* Only expose IPv4 entries to old interface */
- if (svc->af != AF_INET || !net_eq(svc->net, net))
+ if (svc->af != AF_INET || (svc->ipvs != ipvs))
continue;
if (count >= get->num_services)
@@ -2474,7 +2508,7 @@ __ip_vs_get_service_entries(struct net *net,
for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
/* Only expose IPv4 entries to old interface */
- if (svc->af != AF_INET || !net_eq(svc->net, net))
+ if (svc->af != AF_INET || (svc->ipvs != ipvs))
continue;
if (count >= get->num_services)
@@ -2494,7 +2528,7 @@ out:
}
static inline int
-__ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
+__ip_vs_get_dest_entries(struct netns_ipvs *ipvs, const struct ip_vs_get_dests *get,
struct ip_vs_get_dests __user *uptr)
{
struct ip_vs_service *svc;
@@ -2503,9 +2537,9 @@ __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
rcu_read_lock();
if (get->fwmark)
- svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark);
+ svc = __ip_vs_svc_fwm_find(ipvs, AF_INET, get->fwmark);
else
- svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr,
+ svc = __ip_vs_service_find(ipvs, AF_INET, get->protocol, &addr,
get->port);
rcu_read_unlock();
@@ -2550,7 +2584,7 @@ __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
}
static inline void
-__ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u)
+__ip_vs_get_timeouts(struct netns_ipvs *ipvs, struct ip_vs_timeout_user *u)
{
#if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
struct ip_vs_proto_data *pd;
@@ -2559,12 +2593,12 @@ __ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u)
memset(u, 0, sizeof (*u));
#ifdef CONFIG_IP_VS_PROTO_TCP
- pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
+ pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP);
u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
#endif
#ifdef CONFIG_IP_VS_PROTO_UDP
- pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
+ pd = ip_vs_proto_data_get(ipvs, IPPROTO_UDP);
u->udp_timeout =
pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
#endif
@@ -2627,15 +2661,15 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
mutex_lock(&ipvs->sync_mutex);
if (ipvs->sync_state & IP_VS_STATE_MASTER) {
d[0].state = IP_VS_STATE_MASTER;
- strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn,
+ strlcpy(d[0].mcast_ifn, ipvs->mcfg.mcast_ifn,
sizeof(d[0].mcast_ifn));
- d[0].syncid = ipvs->master_syncid;
+ d[0].syncid = ipvs->mcfg.syncid;
}
if (ipvs->sync_state & IP_VS_STATE_BACKUP) {
d[1].state = IP_VS_STATE_BACKUP;
- strlcpy(d[1].mcast_ifn, ipvs->backup_mcast_ifn,
+ strlcpy(d[1].mcast_ifn, ipvs->bcfg.mcast_ifn,
sizeof(d[1].mcast_ifn));
- d[1].syncid = ipvs->backup_syncid;
+ d[1].syncid = ipvs->bcfg.syncid;
}
if (copy_to_user(user, &d, sizeof(d)) != 0)
ret = -EFAULT;
@@ -2683,7 +2717,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
ret = -EINVAL;
goto out;
}
- ret = __ip_vs_get_service_entries(net, get, user);
+ ret = __ip_vs_get_service_entries(ipvs, get, user);
}
break;
@@ -2697,9 +2731,9 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
addr.ip = entry->addr;
rcu_read_lock();
if (entry->fwmark)
- svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark);
+ svc = __ip_vs_svc_fwm_find(ipvs, AF_INET, entry->fwmark);
else
- svc = __ip_vs_service_find(net, AF_INET,
+ svc = __ip_vs_service_find(ipvs, AF_INET,
entry->protocol, &addr,
entry->port);
rcu_read_unlock();
@@ -2725,7 +2759,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
ret = -EINVAL;
goto out;
}
- ret = __ip_vs_get_dest_entries(net, get, user);
+ ret = __ip_vs_get_dest_entries(ipvs, get, user);
}
break;
@@ -2733,7 +2767,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
{
struct ip_vs_timeout_user t;
- __ip_vs_get_timeouts(net, &t);
+ __ip_vs_get_timeouts(ipvs, &t);
if (copy_to_user(user, &t, sizeof(t)) != 0)
ret = -EFAULT;
}
@@ -2790,6 +2824,11 @@ static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
[IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING,
.len = IP_VS_IFNAME_MAXLEN },
[IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 },
+ [IPVS_DAEMON_ATTR_SYNC_MAXLEN] = { .type = NLA_U16 },
+ [IPVS_DAEMON_ATTR_MCAST_GROUP] = { .type = NLA_U32 },
+ [IPVS_DAEMON_ATTR_MCAST_GROUP6] = { .len = sizeof(struct in6_addr) },
+ [IPVS_DAEMON_ATTR_MCAST_PORT] = { .type = NLA_U16 },
+ [IPVS_DAEMON_ATTR_MCAST_TTL] = { .type = NLA_U8 },
};
/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
@@ -2892,6 +2931,7 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb,
struct ip_vs_flags flags = { .flags = svc->flags,
.mask = ~0 };
struct ip_vs_kstats kstats;
+ char *sched_name;
nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
if (!nl_service)
@@ -2910,8 +2950,9 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb,
}
sched = rcu_dereference_protected(svc->scheduler, 1);
+ sched_name = sched ? sched->name : "none";
pe = rcu_dereference_protected(svc->pe, 1);
- if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, sched->name) ||
+ if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, sched_name) ||
(pe && nla_put_string(skb, IPVS_SVC_ATTR_PE_NAME, pe->name)) ||
nla_put(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags) ||
nla_put_u32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ) ||
@@ -2961,12 +3002,13 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb,
int idx = 0, i;
int start = cb->args[0];
struct ip_vs_service *svc;
- struct net *net = skb_sknet(skb);
+ struct net *net = sock_net(skb->sk);
+ struct netns_ipvs *ipvs = net_ipvs(net);
mutex_lock(&__ip_vs_mutex);
for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
hlist_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
- if (++idx <= start || !net_eq(svc->net, net))
+ if (++idx <= start || (svc->ipvs != ipvs))
continue;
if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
idx--;
@@ -2977,7 +3019,7 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb,
for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
- if (++idx <= start || !net_eq(svc->net, net))
+ if (++idx <= start || (svc->ipvs != ipvs))
continue;
if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
idx--;
@@ -2993,7 +3035,7 @@ nla_put_failure:
return skb->len;
}
-static int ip_vs_genl_parse_service(struct net *net,
+static int ip_vs_genl_parse_service(struct netns_ipvs *ipvs,
struct ip_vs_service_user_kern *usvc,
struct nlattr *nla, int full_entry,
struct ip_vs_service **ret_svc)
@@ -3038,9 +3080,9 @@ static int ip_vs_genl_parse_service(struct net *net,
rcu_read_lock();
if (usvc->fwmark)
- svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark);
+ svc = __ip_vs_svc_fwm_find(ipvs, usvc->af, usvc->fwmark);
else
- svc = __ip_vs_service_find(net, usvc->af, usvc->protocol,
+ svc = __ip_vs_service_find(ipvs, usvc->af, usvc->protocol,
&usvc->addr, usvc->port);
rcu_read_unlock();
*ret_svc = svc;
@@ -3078,14 +3120,14 @@ static int ip_vs_genl_parse_service(struct net *net,
return 0;
}
-static struct ip_vs_service *ip_vs_genl_find_service(struct net *net,
+static struct ip_vs_service *ip_vs_genl_find_service(struct netns_ipvs *ipvs,
struct nlattr *nla)
{
struct ip_vs_service_user_kern usvc;
struct ip_vs_service *svc;
int ret;
- ret = ip_vs_genl_parse_service(net, &usvc, nla, 0, &svc);
+ ret = ip_vs_genl_parse_service(ipvs, &usvc, nla, 0, &svc);
return ret ? ERR_PTR(ret) : svc;
}
@@ -3160,7 +3202,8 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb,
struct ip_vs_service *svc;
struct ip_vs_dest *dest;
struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
- struct net *net = skb_sknet(skb);
+ struct net *net = sock_net(skb->sk);
+ struct netns_ipvs *ipvs = net_ipvs(net);
mutex_lock(&__ip_vs_mutex);
@@ -3170,7 +3213,7 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb,
goto out_err;
- svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]);
+ svc = ip_vs_genl_find_service(ipvs, attrs[IPVS_CMD_ATTR_SERVICE]);
if (IS_ERR(svc) || svc == NULL)
goto out_err;
@@ -3246,7 +3289,7 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
}
static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __u32 state,
- const char *mcast_ifn, __u32 syncid)
+ struct ipvs_sync_daemon_cfg *c)
{
struct nlattr *nl_daemon;
@@ -3255,9 +3298,23 @@ static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __u32 state,
return -EMSGSIZE;
if (nla_put_u32(skb, IPVS_DAEMON_ATTR_STATE, state) ||
- nla_put_string(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn) ||
- nla_put_u32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid))
+ nla_put_string(skb, IPVS_DAEMON_ATTR_MCAST_IFN, c->mcast_ifn) ||
+ nla_put_u32(skb, IPVS_DAEMON_ATTR_SYNC_ID, c->syncid) ||
+ nla_put_u16(skb, IPVS_DAEMON_ATTR_SYNC_MAXLEN, c->sync_maxlen) ||
+ nla_put_u16(skb, IPVS_DAEMON_ATTR_MCAST_PORT, c->mcast_port) ||
+ nla_put_u8(skb, IPVS_DAEMON_ATTR_MCAST_TTL, c->mcast_ttl))
goto nla_put_failure;
+#ifdef CONFIG_IP_VS_IPV6
+ if (c->mcast_af == AF_INET6) {
+ if (nla_put_in6_addr(skb, IPVS_DAEMON_ATTR_MCAST_GROUP6,
+ &c->mcast_group.in6))
+ goto nla_put_failure;
+ } else
+#endif
+ if (c->mcast_af == AF_INET &&
+ nla_put_in_addr(skb, IPVS_DAEMON_ATTR_MCAST_GROUP,
+ c->mcast_group.ip))
+ goto nla_put_failure;
nla_nest_end(skb, nl_daemon);
return 0;
@@ -3268,7 +3325,7 @@ nla_put_failure:
}
static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __u32 state,
- const char *mcast_ifn, __u32 syncid,
+ struct ipvs_sync_daemon_cfg *c,
struct netlink_callback *cb)
{
void *hdr;
@@ -3278,7 +3335,7 @@ static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __u32 state,
if (!hdr)
return -EMSGSIZE;
- if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
+ if (ip_vs_genl_fill_daemon(skb, state, c))
goto nla_put_failure;
genlmsg_end(skb, hdr);
@@ -3292,14 +3349,13 @@ nla_put_failure:
static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
struct netlink_callback *cb)
{
- struct net *net = skb_sknet(skb);
+ struct net *net = sock_net(skb->sk);
struct netns_ipvs *ipvs = net_ipvs(net);
mutex_lock(&ipvs->sync_mutex);
if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
- ipvs->master_mcast_ifn,
- ipvs->master_syncid, cb) < 0)
+ &ipvs->mcfg, cb) < 0)
goto nla_put_failure;
cb->args[0] = 1;
@@ -3307,8 +3363,7 @@ static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
- ipvs->backup_mcast_ifn,
- ipvs->backup_syncid, cb) < 0)
+ &ipvs->bcfg, cb) < 0)
goto nla_put_failure;
cb->args[1] = 1;
@@ -3320,39 +3375,90 @@ nla_put_failure:
return skb->len;
}
-static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs)
+static int ip_vs_genl_new_daemon(struct netns_ipvs *ipvs, struct nlattr **attrs)
{
+ struct ipvs_sync_daemon_cfg c;
+ struct nlattr *a;
+ int ret;
+
+ memset(&c, 0, sizeof(c));
if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
return -EINVAL;
+ strlcpy(c.mcast_ifn, nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
+ sizeof(c.mcast_ifn));
+ c.syncid = nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]);
+
+ a = attrs[IPVS_DAEMON_ATTR_SYNC_MAXLEN];
+ if (a)
+ c.sync_maxlen = nla_get_u16(a);
+
+ a = attrs[IPVS_DAEMON_ATTR_MCAST_GROUP];
+ if (a) {
+ c.mcast_af = AF_INET;
+ c.mcast_group.ip = nla_get_in_addr(a);
+ if (!ipv4_is_multicast(c.mcast_group.ip))
+ return -EINVAL;
+ } else {
+ a = attrs[IPVS_DAEMON_ATTR_MCAST_GROUP6];
+ if (a) {
+#ifdef CONFIG_IP_VS_IPV6
+ int addr_type;
+
+ c.mcast_af = AF_INET6;
+ c.mcast_group.in6 = nla_get_in6_addr(a);
+ addr_type = ipv6_addr_type(&c.mcast_group.in6);
+ if (!(addr_type & IPV6_ADDR_MULTICAST))
+ return -EINVAL;
+#else
+ return -EAFNOSUPPORT;
+#endif
+ }
+ }
+
+ a = attrs[IPVS_DAEMON_ATTR_MCAST_PORT];
+ if (a)
+ c.mcast_port = nla_get_u16(a);
+
+ a = attrs[IPVS_DAEMON_ATTR_MCAST_TTL];
+ if (a)
+ c.mcast_ttl = nla_get_u8(a);
/* The synchronization protocol is incompatible with mixed family
* services
*/
- if (net_ipvs(net)->mixed_address_family_dests > 0)
+ if (ipvs->mixed_address_family_dests > 0)
return -EINVAL;
- return start_sync_thread(net,
- nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
- nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
- nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
+ rtnl_lock();
+ mutex_lock(&ipvs->sync_mutex);
+ ret = start_sync_thread(ipvs, &c,
+ nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
+ mutex_unlock(&ipvs->sync_mutex);
+ rtnl_unlock();
+ return ret;
}
-static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs)
+static int ip_vs_genl_del_daemon(struct netns_ipvs *ipvs, struct nlattr **attrs)
{
+ int ret;
+
if (!attrs[IPVS_DAEMON_ATTR_STATE])
return -EINVAL;
- return stop_sync_thread(net,
- nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
+ mutex_lock(&ipvs->sync_mutex);
+ ret = stop_sync_thread(ipvs,
+ nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
+ mutex_unlock(&ipvs->sync_mutex);
+ return ret;
}
-static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)
+static int ip_vs_genl_set_config(struct netns_ipvs *ipvs, struct nlattr **attrs)
{
struct ip_vs_timeout_user t;
- __ip_vs_get_timeouts(net, &t);
+ __ip_vs_get_timeouts(ipvs, &t);
if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
@@ -3364,38 +3470,33 @@ static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)
if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
- return ip_vs_set_timeout(net, &t);
+ return ip_vs_set_timeout(ipvs, &t);
}
static int ip_vs_genl_set_daemon(struct sk_buff *skb, struct genl_info *info)
{
- int ret = 0, cmd;
- struct net *net;
- struct netns_ipvs *ipvs;
+ int ret = -EINVAL, cmd;
+ struct net *net = sock_net(skb->sk);
+ struct netns_ipvs *ipvs = net_ipvs(net);
- net = skb_sknet(skb);
- ipvs = net_ipvs(net);
cmd = info->genlhdr->cmd;
if (cmd == IPVS_CMD_NEW_DAEMON || cmd == IPVS_CMD_DEL_DAEMON) {
struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
- mutex_lock(&ipvs->sync_mutex);
if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
info->attrs[IPVS_CMD_ATTR_DAEMON],
- ip_vs_daemon_policy)) {
- ret = -EINVAL;
+ ip_vs_daemon_policy))
goto out;
- }
if (cmd == IPVS_CMD_NEW_DAEMON)
- ret = ip_vs_genl_new_daemon(net, daemon_attrs);
+ ret = ip_vs_genl_new_daemon(ipvs, daemon_attrs);
else
- ret = ip_vs_genl_del_daemon(net, daemon_attrs);
-out:
- mutex_unlock(&ipvs->sync_mutex);
+ ret = ip_vs_genl_del_daemon(ipvs, daemon_attrs);
}
+
+out:
return ret;
}
@@ -3406,22 +3507,22 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
struct ip_vs_dest_user_kern udest;
int ret = 0, cmd;
int need_full_svc = 0, need_full_dest = 0;
- struct net *net;
+ struct net *net = sock_net(skb->sk);
+ struct netns_ipvs *ipvs = net_ipvs(net);
- net = skb_sknet(skb);
cmd = info->genlhdr->cmd;
mutex_lock(&__ip_vs_mutex);
if (cmd == IPVS_CMD_FLUSH) {
- ret = ip_vs_flush(net, false);
+ ret = ip_vs_flush(ipvs, false);
goto out;
} else if (cmd == IPVS_CMD_SET_CONFIG) {
- ret = ip_vs_genl_set_config(net, info->attrs);
+ ret = ip_vs_genl_set_config(ipvs, info->attrs);
goto out;
} else if (cmd == IPVS_CMD_ZERO &&
!info->attrs[IPVS_CMD_ATTR_SERVICE]) {
- ret = ip_vs_zero_all(net);
+ ret = ip_vs_zero_all(ipvs);
goto out;
}
@@ -3431,7 +3532,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
need_full_svc = 1;
- ret = ip_vs_genl_parse_service(net, &usvc,
+ ret = ip_vs_genl_parse_service(ipvs, &usvc,
info->attrs[IPVS_CMD_ATTR_SERVICE],
need_full_svc, &svc);
if (ret)
@@ -3470,7 +3571,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
/* The synchronization protocol is incompatible
* with mixed family services
*/
- if (net_ipvs(net)->sync_state) {
+ if (ipvs->sync_state) {
ret = -EINVAL;
goto out;
}
@@ -3490,7 +3591,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
switch (cmd) {
case IPVS_CMD_NEW_SERVICE:
if (svc == NULL)
- ret = ip_vs_add_service(net, &usvc, &svc);
+ ret = ip_vs_add_service(ipvs, &usvc, &svc);
else
ret = -EEXIST;
break;
@@ -3528,9 +3629,9 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
struct sk_buff *msg;
void *reply;
int ret, cmd, reply_cmd;
- struct net *net;
+ struct net *net = sock_net(skb->sk);
+ struct netns_ipvs *ipvs = net_ipvs(net);
- net = skb_sknet(skb);
cmd = info->genlhdr->cmd;
if (cmd == IPVS_CMD_GET_SERVICE)
@@ -3559,7 +3660,7 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
{
struct ip_vs_service *svc;
- svc = ip_vs_genl_find_service(net,
+ svc = ip_vs_genl_find_service(ipvs,
info->attrs[IPVS_CMD_ATTR_SERVICE]);
if (IS_ERR(svc)) {
ret = PTR_ERR(svc);
@@ -3580,7 +3681,7 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
{
struct ip_vs_timeout_user t;
- __ip_vs_get_timeouts(net, &t);
+ __ip_vs_get_timeouts(ipvs, &t);
#ifdef CONFIG_IP_VS_PROTO_TCP
if (nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP,
t.tcp_timeout) ||
@@ -3735,10 +3836,10 @@ static void ip_vs_genl_unregister(void)
* per netns intit/exit func.
*/
#ifdef CONFIG_SYSCTL
-static int __net_init ip_vs_control_net_init_sysctl(struct net *net)
+static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs)
{
+ struct net *net = ipvs->net;
int idx;
- struct netns_ipvs *ipvs = net_ipvs(net);
struct ctl_table *tbl;
atomic_set(&ipvs->dropentry, 0);
@@ -3757,6 +3858,10 @@ static int __net_init ip_vs_control_net_init_sysctl(struct net *net)
} else
tbl = vs_vars;
/* Initialize sysctl defaults */
+ for (idx = 0; idx < ARRAY_SIZE(vs_vars); idx++) {
+ if (tbl[idx].proc_handler == proc_do_defense_mode)
+ tbl[idx].extra2 = ipvs;
+ }
idx = 0;
ipvs->sysctl_amemthresh = 1024;
tbl[idx++].data = &ipvs->sysctl_amemthresh;
@@ -3798,7 +3903,8 @@ static int __net_init ip_vs_control_net_init_sysctl(struct net *net)
tbl[idx++].data = &ipvs->sysctl_backup_only;
ipvs->sysctl_conn_reuse_mode = 1;
tbl[idx++].data = &ipvs->sysctl_conn_reuse_mode;
-
+ tbl[idx++].data = &ipvs->sysctl_schedule_icmp;
+ tbl[idx++].data = &ipvs->sysctl_ignore_tunneled;
ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl);
if (ipvs->sysctl_hdr == NULL) {
@@ -3806,7 +3912,7 @@ static int __net_init ip_vs_control_net_init_sysctl(struct net *net)
kfree(tbl);
return -ENOMEM;
}
- ip_vs_start_estimator(net, &ipvs->tot_stats);
+ ip_vs_start_estimator(ipvs, &ipvs->tot_stats);
ipvs->sysctl_tbl = tbl;
/* Schedule defense work */
INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler);
@@ -3815,14 +3921,14 @@ static int __net_init ip_vs_control_net_init_sysctl(struct net *net)
return 0;
}
-static void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net)
+static void __net_exit ip_vs_control_net_cleanup_sysctl(struct netns_ipvs *ipvs)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
+ struct net *net = ipvs->net;
cancel_delayed_work_sync(&ipvs->defense_work);
cancel_work_sync(&ipvs->defense_work.work);
unregister_net_sysctl_table(ipvs->sysctl_hdr);
- ip_vs_stop_estimator(net, &ipvs->tot_stats);
+ ip_vs_stop_estimator(ipvs, &ipvs->tot_stats);
if (!net_eq(net, &init_net))
kfree(ipvs->sysctl_tbl);
@@ -3830,8 +3936,8 @@ static void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net)
#else
-static int __net_init ip_vs_control_net_init_sysctl(struct net *net) { return 0; }
-static void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net) { }
+static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs) { return 0; }
+static void __net_exit ip_vs_control_net_cleanup_sysctl(struct netns_ipvs *ipvs) { }
#endif
@@ -3839,10 +3945,10 @@ static struct notifier_block ip_vs_dst_notifier = {
.notifier_call = ip_vs_dst_event,
};
-int __net_init ip_vs_control_net_init(struct net *net)
+int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs)
{
+ struct net *net = ipvs->net;
int i, idx;
- struct netns_ipvs *ipvs = net_ipvs(net);
/* Initialize rs_table */
for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
@@ -3851,7 +3957,7 @@ int __net_init ip_vs_control_net_init(struct net *net)
INIT_LIST_HEAD(&ipvs->dest_trash);
spin_lock_init(&ipvs->dest_trash_lock);
setup_timer(&ipvs->dest_trash_timer, ip_vs_dest_trash_expire,
- (unsigned long) net);
+ (unsigned long) ipvs);
atomic_set(&ipvs->ftpsvc_counter, 0);
atomic_set(&ipvs->nullsvc_counter, 0);
@@ -3873,7 +3979,7 @@ int __net_init ip_vs_control_net_init(struct net *net)
proc_create("ip_vs_stats_percpu", 0, net->proc_net,
&ip_vs_stats_percpu_fops);
- if (ip_vs_control_net_init_sysctl(net))
+ if (ip_vs_control_net_init_sysctl(ipvs))
goto err;
return 0;
@@ -3883,12 +3989,12 @@ err:
return -ENOMEM;
}
-void __net_exit ip_vs_control_net_cleanup(struct net *net)
+void __net_exit ip_vs_control_net_cleanup(struct netns_ipvs *ipvs)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
+ struct net *net = ipvs->net;
- ip_vs_trash_cleanup(net);
- ip_vs_control_net_cleanup_sysctl(net);
+ ip_vs_trash_cleanup(ipvs);
+ ip_vs_control_net_cleanup_sysctl(ipvs);
remove_proc_entry("ip_vs_stats_percpu", net->proc_net);
remove_proc_entry("ip_vs_stats", net->proc_net);
remove_proc_entry("ip_vs", net->proc_net);
diff --git a/kernel/net/netfilter/ipvs/ip_vs_est.c b/kernel/net/netfilter/ipvs/ip_vs_est.c
index ef0eb0a8d..457c6c193 100644
--- a/kernel/net/netfilter/ipvs/ip_vs_est.c
+++ b/kernel/net/netfilter/ipvs/ip_vs_est.c
@@ -102,10 +102,8 @@ static void estimation_timer(unsigned long arg)
struct ip_vs_estimator *e;
struct ip_vs_stats *s;
u64 rate;
- struct net *net = (struct net *)arg;
- struct netns_ipvs *ipvs;
+ struct netns_ipvs *ipvs = (struct netns_ipvs *)arg;
- ipvs = net_ipvs(net);
spin_lock(&ipvs->est_lock);
list_for_each_entry(e, &ipvs->est_list, list) {
s = container_of(e, struct ip_vs_stats, est);
@@ -140,9 +138,8 @@ static void estimation_timer(unsigned long arg)
mod_timer(&ipvs->est_timer, jiffies + 2*HZ);
}
-void ip_vs_start_estimator(struct net *net, struct ip_vs_stats *stats)
+void ip_vs_start_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
struct ip_vs_estimator *est = &stats->est;
INIT_LIST_HEAD(&est->list);
@@ -152,9 +149,8 @@ void ip_vs_start_estimator(struct net *net, struct ip_vs_stats *stats)
spin_unlock_bh(&ipvs->est_lock);
}
-void ip_vs_stop_estimator(struct net *net, struct ip_vs_stats *stats)
+void ip_vs_stop_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
struct ip_vs_estimator *est = &stats->est;
spin_lock_bh(&ipvs->est_lock);
@@ -192,18 +188,16 @@ void ip_vs_read_estimator(struct ip_vs_kstats *dst, struct ip_vs_stats *stats)
dst->outbps = (e->outbps + 0xF) >> 5;
}
-int __net_init ip_vs_estimator_net_init(struct net *net)
+int __net_init ip_vs_estimator_net_init(struct netns_ipvs *ipvs)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
-
INIT_LIST_HEAD(&ipvs->est_list);
spin_lock_init(&ipvs->est_lock);
- setup_timer(&ipvs->est_timer, estimation_timer, (unsigned long)net);
+ setup_timer(&ipvs->est_timer, estimation_timer, (unsigned long)ipvs);
mod_timer(&ipvs->est_timer, jiffies + 2 * HZ);
return 0;
}
-void __net_exit ip_vs_estimator_net_cleanup(struct net *net)
+void __net_exit ip_vs_estimator_net_cleanup(struct netns_ipvs *ipvs)
{
- del_timer_sync(&net_ipvs(net)->est_timer);
+ del_timer_sync(&ipvs->est_timer);
}
diff --git a/kernel/net/netfilter/ipvs/ip_vs_ftp.c b/kernel/net/netfilter/ipvs/ip_vs_ftp.c
index 5d3daae98..d30c327bb 100644
--- a/kernel/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/kernel/net/netfilter/ipvs/ip_vs_ftp.c
@@ -181,7 +181,6 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
int ret = 0;
enum ip_conntrack_info ctinfo;
struct nf_conn *ct;
- struct net *net;
*diff = 0;
@@ -223,14 +222,14 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
*/
{
struct ip_vs_conn_param p;
- ip_vs_conn_fill_param(ip_vs_conn_net(cp), AF_INET,
+ ip_vs_conn_fill_param(cp->ipvs, AF_INET,
iph->protocol, &from, port,
&cp->caddr, 0, &p);
n_cp = ip_vs_conn_out_get(&p);
}
if (!n_cp) {
struct ip_vs_conn_param p;
- ip_vs_conn_fill_param(ip_vs_conn_net(cp),
+ ip_vs_conn_fill_param(cp->ipvs,
AF_INET, IPPROTO_TCP, &cp->caddr,
0, &cp->vaddr, port, &p);
/* As above, this is ipv4 only */
@@ -289,9 +288,8 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
* would be adjusted twice.
*/
- net = skb_net(skb);
cp->app_data = NULL;
- ip_vs_tcp_conn_listen(net, n_cp);
+ ip_vs_tcp_conn_listen(n_cp);
ip_vs_conn_put(n_cp);
return ret;
}
@@ -320,7 +318,6 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
union nf_inet_addr to;
__be16 port;
struct ip_vs_conn *n_cp;
- struct net *net;
/* no diff required for incoming packets */
*diff = 0;
@@ -392,7 +389,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
{
struct ip_vs_conn_param p;
- ip_vs_conn_fill_param(ip_vs_conn_net(cp), AF_INET,
+ ip_vs_conn_fill_param(cp->ipvs, AF_INET,
iph->protocol, &to, port, &cp->vaddr,
htons(ntohs(cp->vport)-1), &p);
n_cp = ip_vs_conn_in_get(&p);
@@ -413,8 +410,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
/*
* Move tunnel to listen state
*/
- net = skb_net(skb);
- ip_vs_tcp_conn_listen(net, n_cp);
+ ip_vs_tcp_conn_listen(n_cp);
ip_vs_conn_put(n_cp);
return 1;
@@ -447,14 +443,14 @@ static int __net_init __ip_vs_ftp_init(struct net *net)
if (!ipvs)
return -ENOENT;
- app = register_ip_vs_app(net, &ip_vs_ftp);
+ app = register_ip_vs_app(ipvs, &ip_vs_ftp);
if (IS_ERR(app))
return PTR_ERR(app);
for (i = 0; i < ports_count; i++) {
if (!ports[i])
continue;
- ret = register_ip_vs_app_inc(net, app, app->protocol, ports[i]);
+ ret = register_ip_vs_app_inc(ipvs, app, app->protocol, ports[i]);
if (ret)
goto err_unreg;
pr_info("%s: loaded support on port[%d] = %d\n",
@@ -463,7 +459,7 @@ static int __net_init __ip_vs_ftp_init(struct net *net)
return 0;
err_unreg:
- unregister_ip_vs_app(net, &ip_vs_ftp);
+ unregister_ip_vs_app(ipvs, &ip_vs_ftp);
return ret;
}
/*
@@ -471,7 +467,12 @@ err_unreg:
*/
static void __ip_vs_ftp_exit(struct net *net)
{
- unregister_ip_vs_app(net, &ip_vs_ftp);
+ struct netns_ipvs *ipvs = net_ipvs(net);
+
+ if (!ipvs)
+ return;
+
+ unregister_ip_vs_app(ipvs, &ip_vs_ftp);
}
static struct pernet_operations ip_vs_ftp_ops = {
diff --git a/kernel/net/netfilter/ipvs/ip_vs_lblc.c b/kernel/net/netfilter/ipvs/ip_vs_lblc.c
index 127f14046..cccf4d637 100644
--- a/kernel/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/kernel/net/netfilter/ipvs/ip_vs_lblc.c
@@ -250,8 +250,7 @@ static void ip_vs_lblc_flush(struct ip_vs_service *svc)
static int sysctl_lblc_expiration(struct ip_vs_service *svc)
{
#ifdef CONFIG_SYSCTL
- struct netns_ipvs *ipvs = net_ipvs(svc->net);
- return ipvs->sysctl_lblc_expiration;
+ return svc->ipvs->sysctl_lblc_expiration;
#else
return DEFAULT_EXPIRATION;
#endif
diff --git a/kernel/net/netfilter/ipvs/ip_vs_lblcr.c b/kernel/net/netfilter/ipvs/ip_vs_lblcr.c
index 2229d2d8b..796d70e47 100644
--- a/kernel/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/kernel/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -415,8 +415,7 @@ static void ip_vs_lblcr_flush(struct ip_vs_service *svc)
static int sysctl_lblcr_expiration(struct ip_vs_service *svc)
{
#ifdef CONFIG_SYSCTL
- struct netns_ipvs *ipvs = net_ipvs(svc->net);
- return ipvs->sysctl_lblcr_expiration;
+ return svc->ipvs->sysctl_lblcr_expiration;
#else
return DEFAULT_EXPIRATION;
#endif
diff --git a/kernel/net/netfilter/ipvs/ip_vs_nfct.c b/kernel/net/netfilter/ipvs/ip_vs_nfct.c
index 5882bbfd1..30434fb13 100644
--- a/kernel/net/netfilter/ipvs/ip_vs_nfct.c
+++ b/kernel/net/netfilter/ipvs/ip_vs_nfct.c
@@ -161,7 +161,7 @@ static void ip_vs_nfct_expect_callback(struct nf_conn *ct,
/* RS->CLIENT */
orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
- ip_vs_conn_fill_param(net, exp->tuple.src.l3num, orig->dst.protonum,
+ ip_vs_conn_fill_param(net_ipvs(net), exp->tuple.src.l3num, orig->dst.protonum,
&orig->src.u3, orig->src.u.tcp.port,
&orig->dst.u3, orig->dst.u.tcp.port, &p);
cp = ip_vs_conn_out_get(&p);
@@ -274,8 +274,7 @@ void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp)
" for conn " FMT_CONN "\n",
__func__, ARG_TUPLE(&tuple), ARG_CONN(cp));
- h = nf_conntrack_find_get(ip_vs_conn_net(cp), NF_CT_DEFAULT_ZONE,
- &tuple);
+ h = nf_conntrack_find_get(cp->ipvs->net, &nf_ct_zone_dflt, &tuple);
if (h) {
ct = nf_ct_tuplehash_to_ctrack(h);
/* Show what happens instead of calling nf_ct_kill() */
diff --git a/kernel/net/netfilter/ipvs/ip_vs_ovf.c b/kernel/net/netfilter/ipvs/ip_vs_ovf.c
new file mode 100644
index 000000000..f7d62c3b7
--- /dev/null
+++ b/kernel/net/netfilter/ipvs/ip_vs_ovf.c
@@ -0,0 +1,86 @@
+/*
+ * IPVS: Overflow-Connection Scheduling module
+ *
+ * Authors: Raducu Deaconu <rhadoo_io@yahoo.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Scheduler implements "overflow" loadbalancing according to number of active
+ * connections , will keep all conections to the node with the highest weight
+ * and overflow to the next node if the number of connections exceeds the node's
+ * weight.
+ * Note that this scheduler might not be suitable for UDP because it only uses
+ * active connections
+ *
+ */
+
+#define KMSG_COMPONENT "IPVS"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+
+#include <net/ip_vs.h>
+
+/* OVF Connection scheduling */
+static struct ip_vs_dest *
+ip_vs_ovf_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
+ struct ip_vs_iphdr *iph)
+{
+ struct ip_vs_dest *dest, *h = NULL;
+ int hw = 0, w;
+
+ IP_VS_DBG(6, "ip_vs_ovf_schedule(): Scheduling...\n");
+ /* select the node with highest weight, go to next in line if active
+ * connections exceed weight
+ */
+ list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
+ w = atomic_read(&dest->weight);
+ if ((dest->flags & IP_VS_DEST_F_OVERLOAD) ||
+ atomic_read(&dest->activeconns) > w ||
+ w == 0)
+ continue;
+ if (!h || w > hw) {
+ h = dest;
+ hw = w;
+ }
+ }
+
+ if (h) {
+ IP_VS_DBG_BUF(6, "OVF: server %s:%u active %d w %d\n",
+ IP_VS_DBG_ADDR(h->af, &h->addr),
+ ntohs(h->port),
+ atomic_read(&h->activeconns),
+ atomic_read(&h->weight));
+ return h;
+ }
+
+ ip_vs_scheduler_err(svc, "no destination available");
+ return NULL;
+}
+
+static struct ip_vs_scheduler ip_vs_ovf_scheduler = {
+ .name = "ovf",
+ .refcnt = ATOMIC_INIT(0),
+ .module = THIS_MODULE,
+ .n_list = LIST_HEAD_INIT(ip_vs_ovf_scheduler.n_list),
+ .schedule = ip_vs_ovf_schedule,
+};
+
+static int __init ip_vs_ovf_init(void)
+{
+ return register_ip_vs_scheduler(&ip_vs_ovf_scheduler);
+}
+
+static void __exit ip_vs_ovf_cleanup(void)
+{
+ unregister_ip_vs_scheduler(&ip_vs_ovf_scheduler);
+ synchronize_rcu();
+}
+
+module_init(ip_vs_ovf_init);
+module_exit(ip_vs_ovf_cleanup);
+MODULE_LICENSE("GPL");
diff --git a/kernel/net/netfilter/ipvs/ip_vs_pe_sip.c b/kernel/net/netfilter/ipvs/ip_vs_pe_sip.c
index bed5f7042..1b8d594e4 100644
--- a/kernel/net/netfilter/ipvs/ip_vs_pe_sip.c
+++ b/kernel/net/netfilter/ipvs/ip_vs_pe_sip.c
@@ -70,7 +70,7 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb)
const char *dptr;
int retc;
- ip_vs_fill_iph_skb(p->af, skb, &iph);
+ ip_vs_fill_iph_skb(p->af, skb, false, &iph);
/* Only useful with UDP */
if (iph.protocol != IPPROTO_UDP)
diff --git a/kernel/net/netfilter/ipvs/ip_vs_proto.c b/kernel/net/netfilter/ipvs/ip_vs_proto.c
index 939f7fbe9..8ae480715 100644
--- a/kernel/net/netfilter/ipvs/ip_vs_proto.c
+++ b/kernel/net/netfilter/ipvs/ip_vs_proto.c
@@ -63,9 +63,8 @@ static int __used __init register_ip_vs_protocol(struct ip_vs_protocol *pp)
* register an ipvs protocols netns related data
*/
static int
-register_ip_vs_proto_netns(struct net *net, struct ip_vs_protocol *pp)
+register_ip_vs_proto_netns(struct netns_ipvs *ipvs, struct ip_vs_protocol *pp)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
unsigned int hash = IP_VS_PROTO_HASH(pp->protocol);
struct ip_vs_proto_data *pd =
kzalloc(sizeof(struct ip_vs_proto_data), GFP_KERNEL);
@@ -79,7 +78,7 @@ register_ip_vs_proto_netns(struct net *net, struct ip_vs_protocol *pp)
atomic_set(&pd->appcnt, 0); /* Init app counter */
if (pp->init_netns != NULL) {
- int ret = pp->init_netns(net, pd);
+ int ret = pp->init_netns(ipvs, pd);
if (ret) {
/* unlink an free proto data */
ipvs->proto_data_table[hash] = pd->next;
@@ -116,9 +115,8 @@ static int unregister_ip_vs_protocol(struct ip_vs_protocol *pp)
* unregister an ipvs protocols netns data
*/
static int
-unregister_ip_vs_proto_netns(struct net *net, struct ip_vs_proto_data *pd)
+unregister_ip_vs_proto_netns(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
struct ip_vs_proto_data **pd_p;
unsigned int hash = IP_VS_PROTO_HASH(pd->pp->protocol);
@@ -127,7 +125,7 @@ unregister_ip_vs_proto_netns(struct net *net, struct ip_vs_proto_data *pd)
if (*pd_p == pd) {
*pd_p = pd->next;
if (pd->pp->exit_netns != NULL)
- pd->pp->exit_netns(net, pd);
+ pd->pp->exit_netns(ipvs, pd);
kfree(pd);
return 0;
}
@@ -156,8 +154,8 @@ EXPORT_SYMBOL(ip_vs_proto_get);
/*
* get ip_vs_protocol object data by netns and proto
*/
-static struct ip_vs_proto_data *
-__ipvs_proto_data_get(struct netns_ipvs *ipvs, unsigned short proto)
+struct ip_vs_proto_data *
+ip_vs_proto_data_get(struct netns_ipvs *ipvs, unsigned short proto)
{
struct ip_vs_proto_data *pd;
unsigned int hash = IP_VS_PROTO_HASH(proto);
@@ -169,14 +167,6 @@ __ipvs_proto_data_get(struct netns_ipvs *ipvs, unsigned short proto)
return NULL;
}
-
-struct ip_vs_proto_data *
-ip_vs_proto_data_get(struct net *net, unsigned short proto)
-{
- struct netns_ipvs *ipvs = net_ipvs(net);
-
- return __ipvs_proto_data_get(ipvs, proto);
-}
EXPORT_SYMBOL(ip_vs_proto_data_get);
/*
@@ -317,7 +307,7 @@ ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp,
/*
* per network name-space init
*/
-int __net_init ip_vs_protocol_net_init(struct net *net)
+int __net_init ip_vs_protocol_net_init(struct netns_ipvs *ipvs)
{
int i, ret;
static struct ip_vs_protocol *protos[] = {
@@ -339,27 +329,26 @@ int __net_init ip_vs_protocol_net_init(struct net *net)
};
for (i = 0; i < ARRAY_SIZE(protos); i++) {
- ret = register_ip_vs_proto_netns(net, protos[i]);
+ ret = register_ip_vs_proto_netns(ipvs, protos[i]);
if (ret < 0)
goto cleanup;
}
return 0;
cleanup:
- ip_vs_protocol_net_cleanup(net);
+ ip_vs_protocol_net_cleanup(ipvs);
return ret;
}
-void __net_exit ip_vs_protocol_net_cleanup(struct net *net)
+void __net_exit ip_vs_protocol_net_cleanup(struct netns_ipvs *ipvs)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
struct ip_vs_proto_data *pd;
int i;
/* unregister all the ipvs proto data for this netns */
for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) {
while ((pd = ipvs->proto_data_table[i]) != NULL)
- unregister_ip_vs_proto_netns(net, pd);
+ unregister_ip_vs_proto_netns(ipvs, pd);
}
}
diff --git a/kernel/net/netfilter/ipvs/ip_vs_proto_ah_esp.c b/kernel/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
index 5de3dd312..5320d3997 100644
--- a/kernel/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
+++ b/kernel/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
@@ -41,30 +41,28 @@ struct isakmp_hdr {
#define PORT_ISAKMP 500
static void
-ah_esp_conn_fill_param_proto(struct net *net, int af,
- const struct ip_vs_iphdr *iph, int inverse,
+ah_esp_conn_fill_param_proto(struct netns_ipvs *ipvs, int af,
+ const struct ip_vs_iphdr *iph,
struct ip_vs_conn_param *p)
{
- if (likely(!inverse))
- ip_vs_conn_fill_param(net, af, IPPROTO_UDP,
+ if (likely(!ip_vs_iph_inverse(iph)))
+ ip_vs_conn_fill_param(ipvs, af, IPPROTO_UDP,
&iph->saddr, htons(PORT_ISAKMP),
&iph->daddr, htons(PORT_ISAKMP), p);
else
- ip_vs_conn_fill_param(net, af, IPPROTO_UDP,
+ ip_vs_conn_fill_param(ipvs, af, IPPROTO_UDP,
&iph->daddr, htons(PORT_ISAKMP),
&iph->saddr, htons(PORT_ISAKMP), p);
}
static struct ip_vs_conn *
-ah_esp_conn_in_get(int af, const struct sk_buff *skb,
- const struct ip_vs_iphdr *iph,
- int inverse)
+ah_esp_conn_in_get(struct netns_ipvs *ipvs, int af, const struct sk_buff *skb,
+ const struct ip_vs_iphdr *iph)
{
struct ip_vs_conn *cp;
struct ip_vs_conn_param p;
- struct net *net = skb_net(skb);
- ah_esp_conn_fill_param_proto(net, af, iph, inverse, &p);
+ ah_esp_conn_fill_param_proto(ipvs, af, iph, &p);
cp = ip_vs_conn_in_get(&p);
if (!cp) {
/*
@@ -73,7 +71,7 @@ ah_esp_conn_in_get(int af, const struct sk_buff *skb,
*/
IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for outin packet "
"%s%s %s->%s\n",
- inverse ? "ICMP+" : "",
+ ip_vs_iph_icmp(iph) ? "ICMP+" : "",
ip_vs_proto_get(iph->protocol)->name,
IP_VS_DBG_ADDR(af, &iph->saddr),
IP_VS_DBG_ADDR(af, &iph->daddr));
@@ -84,19 +82,18 @@ ah_esp_conn_in_get(int af, const struct sk_buff *skb,
static struct ip_vs_conn *
-ah_esp_conn_out_get(int af, const struct sk_buff *skb,
- const struct ip_vs_iphdr *iph, int inverse)
+ah_esp_conn_out_get(struct netns_ipvs *ipvs, int af, const struct sk_buff *skb,
+ const struct ip_vs_iphdr *iph)
{
struct ip_vs_conn *cp;
struct ip_vs_conn_param p;
- struct net *net = skb_net(skb);
- ah_esp_conn_fill_param_proto(net, af, iph, inverse, &p);
+ ah_esp_conn_fill_param_proto(ipvs, af, iph, &p);
cp = ip_vs_conn_out_get(&p);
if (!cp) {
IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for inout packet "
"%s%s %s->%s\n",
- inverse ? "ICMP+" : "",
+ ip_vs_iph_icmp(iph) ? "ICMP+" : "",
ip_vs_proto_get(iph->protocol)->name,
IP_VS_DBG_ADDR(af, &iph->saddr),
IP_VS_DBG_ADDR(af, &iph->daddr));
@@ -107,7 +104,8 @@ ah_esp_conn_out_get(int af, const struct sk_buff *skb,
static int
-ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
+ah_esp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
+ struct ip_vs_proto_data *pd,
int *verdict, struct ip_vs_conn **cpp,
struct ip_vs_iphdr *iph)
{
diff --git a/kernel/net/netfilter/ipvs/ip_vs_proto_sctp.c b/kernel/net/netfilter/ipvs/ip_vs_proto_sctp.c
index 5b84c0b56..010ddeec1 100644
--- a/kernel/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/kernel/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -9,35 +9,44 @@
#include <net/ip_vs.h>
static int
-sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
+sctp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
+ struct ip_vs_proto_data *pd,
int *verdict, struct ip_vs_conn **cpp,
struct ip_vs_iphdr *iph)
{
- struct net *net;
struct ip_vs_service *svc;
- struct netns_ipvs *ipvs;
sctp_chunkhdr_t _schunkh, *sch;
sctp_sctphdr_t *sh, _sctph;
-
- sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph);
- if (sh == NULL) {
- *verdict = NF_DROP;
- return 0;
+ __be16 _ports[2], *ports = NULL;
+
+ if (likely(!ip_vs_iph_icmp(iph))) {
+ sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph);
+ if (sh) {
+ sch = skb_header_pointer(
+ skb, iph->len + sizeof(sctp_sctphdr_t),
+ sizeof(_schunkh), &_schunkh);
+ if (sch && (sch->type == SCTP_CID_INIT ||
+ sysctl_sloppy_sctp(ipvs)))
+ ports = &sh->source;
+ }
+ } else {
+ ports = skb_header_pointer(
+ skb, iph->len, sizeof(_ports), &_ports);
}
- sch = skb_header_pointer(skb, iph->len + sizeof(sctp_sctphdr_t),
- sizeof(_schunkh), &_schunkh);
- if (sch == NULL) {
+ if (!ports) {
*verdict = NF_DROP;
return 0;
}
- net = skb_net(skb);
- ipvs = net_ipvs(net);
rcu_read_lock();
- if ((sch->type == SCTP_CID_INIT || sysctl_sloppy_sctp(ipvs)) &&
- (svc = ip_vs_service_find(net, af, skb->mark, iph->protocol,
- &iph->daddr, sh->dest))) {
+ if (likely(!ip_vs_iph_inverse(iph)))
+ svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
+ &iph->daddr, ports[1]);
+ else
+ svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
+ &iph->saddr, ports[0]);
+ if (svc) {
int ignored;
if (ip_vs_todrop(ipvs)) {
@@ -474,14 +483,13 @@ static inline __u16 sctp_app_hashkey(__be16 port)
& SCTP_APP_TAB_MASK;
}
-static int sctp_register_app(struct net *net, struct ip_vs_app *inc)
+static int sctp_register_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc)
{
struct ip_vs_app *i;
__u16 hash;
__be16 port = inc->port;
int ret = 0;
- struct netns_ipvs *ipvs = net_ipvs(net);
- struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP);
+ struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_SCTP);
hash = sctp_app_hashkey(port);
@@ -498,9 +506,9 @@ out:
return ret;
}
-static void sctp_unregister_app(struct net *net, struct ip_vs_app *inc)
+static void sctp_unregister_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc)
{
- struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP);
+ struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_SCTP);
atomic_dec(&pd->appcnt);
list_del_rcu(&inc->p_list);
@@ -508,7 +516,7 @@ static void sctp_unregister_app(struct net *net, struct ip_vs_app *inc)
static int sctp_app_conn_bind(struct ip_vs_conn *cp)
{
- struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp));
+ struct netns_ipvs *ipvs = cp->ipvs;
int hash;
struct ip_vs_app *inc;
int result = 0;
@@ -549,10 +557,8 @@ out:
* timeouts is netns related now.
* ---------------------------------------------
*/
-static int __ip_vs_sctp_init(struct net *net, struct ip_vs_proto_data *pd)
+static int __ip_vs_sctp_init(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
-
ip_vs_init_hash_table(ipvs->sctp_apps, SCTP_APP_TAB_SIZE);
pd->timeout_table = ip_vs_create_timeout_table((int *)sctp_timeouts,
sizeof(sctp_timeouts));
@@ -561,7 +567,7 @@ static int __ip_vs_sctp_init(struct net *net, struct ip_vs_proto_data *pd)
return 0;
}
-static void __ip_vs_sctp_exit(struct net *net, struct ip_vs_proto_data *pd)
+static void __ip_vs_sctp_exit(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd)
{
kfree(pd->timeout_table);
}
diff --git a/kernel/net/netfilter/ipvs/ip_vs_proto_tcp.c b/kernel/net/netfilter/ipvs/ip_vs_proto_tcp.c
index 8e92beb0c..d7024b2ed 100644
--- a/kernel/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/kernel/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -32,27 +32,47 @@
#include <net/ip_vs.h>
static int
-tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
+tcp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
+ struct ip_vs_proto_data *pd,
int *verdict, struct ip_vs_conn **cpp,
struct ip_vs_iphdr *iph)
{
- struct net *net;
struct ip_vs_service *svc;
struct tcphdr _tcph, *th;
- struct netns_ipvs *ipvs;
+ __be16 _ports[2], *ports = NULL;
- th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph);
- if (th == NULL) {
+ /* In the event of icmp, we're only guaranteed to have the first 8
+ * bytes of the transport header, so we only check the rest of the
+ * TCP packet for non-ICMP packets
+ */
+ if (likely(!ip_vs_iph_icmp(iph))) {
+ th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph);
+ if (th) {
+ if (th->rst || !(sysctl_sloppy_tcp(ipvs) || th->syn))
+ return 1;
+ ports = &th->source;
+ }
+ } else {
+ ports = skb_header_pointer(
+ skb, iph->len, sizeof(_ports), &_ports);
+ }
+
+ if (!ports) {
*verdict = NF_DROP;
return 0;
}
- net = skb_net(skb);
- ipvs = net_ipvs(net);
+
/* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */
rcu_read_lock();
- if ((th->syn || sysctl_sloppy_tcp(ipvs)) && !th->rst &&
- (svc = ip_vs_service_find(net, af, skb->mark, iph->protocol,
- &iph->daddr, th->dest))) {
+
+ if (likely(!ip_vs_iph_inverse(iph)))
+ svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
+ &iph->daddr, ports[1]);
+ else
+ svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
+ &iph->saddr, ports[0]);
+
+ if (svc) {
int ignored;
if (ip_vs_todrop(ipvs)) {
@@ -571,14 +591,13 @@ static inline __u16 tcp_app_hashkey(__be16 port)
}
-static int tcp_register_app(struct net *net, struct ip_vs_app *inc)
+static int tcp_register_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc)
{
struct ip_vs_app *i;
__u16 hash;
__be16 port = inc->port;
int ret = 0;
- struct netns_ipvs *ipvs = net_ipvs(net);
- struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
+ struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP);
hash = tcp_app_hashkey(port);
@@ -597,9 +616,9 @@ static int tcp_register_app(struct net *net, struct ip_vs_app *inc)
static void
-tcp_unregister_app(struct net *net, struct ip_vs_app *inc)
+tcp_unregister_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc)
{
- struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
+ struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP);
atomic_dec(&pd->appcnt);
list_del_rcu(&inc->p_list);
@@ -609,7 +628,7 @@ tcp_unregister_app(struct net *net, struct ip_vs_app *inc)
static int
tcp_app_conn_bind(struct ip_vs_conn *cp)
{
- struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp));
+ struct netns_ipvs *ipvs = cp->ipvs;
int hash;
struct ip_vs_app *inc;
int result = 0;
@@ -653,9 +672,9 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)
/*
* Set LISTEN timeout. (ip_vs_conn_put will setup timer)
*/
-void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp)
+void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp)
{
- struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
+ struct ip_vs_proto_data *pd = ip_vs_proto_data_get(cp->ipvs, IPPROTO_TCP);
spin_lock_bh(&cp->lock);
cp->state = IP_VS_TCP_S_LISTEN;
@@ -668,10 +687,8 @@ void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp)
* timeouts is netns related now.
* ---------------------------------------------
*/
-static int __ip_vs_tcp_init(struct net *net, struct ip_vs_proto_data *pd)
+static int __ip_vs_tcp_init(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
-
ip_vs_init_hash_table(ipvs->tcp_apps, TCP_APP_TAB_SIZE);
pd->timeout_table = ip_vs_create_timeout_table((int *)tcp_timeouts,
sizeof(tcp_timeouts));
@@ -681,7 +698,7 @@ static int __ip_vs_tcp_init(struct net *net, struct ip_vs_proto_data *pd)
return 0;
}
-static void __ip_vs_tcp_exit(struct net *net, struct ip_vs_proto_data *pd)
+static void __ip_vs_tcp_exit(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd)
{
kfree(pd->timeout_table);
}
diff --git a/kernel/net/netfilter/ipvs/ip_vs_proto_udp.c b/kernel/net/netfilter/ipvs/ip_vs_proto_udp.c
index b62a3c0ff..e494e9a88 100644
--- a/kernel/net/netfilter/ipvs/ip_vs_proto_udp.c
+++ b/kernel/net/netfilter/ipvs/ip_vs_proto_udp.c
@@ -29,28 +29,42 @@
#include <net/ip6_checksum.h>
static int
-udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
+udp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
+ struct ip_vs_proto_data *pd,
int *verdict, struct ip_vs_conn **cpp,
struct ip_vs_iphdr *iph)
{
- struct net *net;
struct ip_vs_service *svc;
struct udphdr _udph, *uh;
+ __be16 _ports[2], *ports = NULL;
- /* IPv6 fragments, only first fragment will hit this */
- uh = skb_header_pointer(skb, iph->len, sizeof(_udph), &_udph);
- if (uh == NULL) {
+ if (likely(!ip_vs_iph_icmp(iph))) {
+ /* IPv6 fragments, only first fragment will hit this */
+ uh = skb_header_pointer(skb, iph->len, sizeof(_udph), &_udph);
+ if (uh)
+ ports = &uh->source;
+ } else {
+ ports = skb_header_pointer(
+ skb, iph->len, sizeof(_ports), &_ports);
+ }
+
+ if (!ports) {
*verdict = NF_DROP;
return 0;
}
- net = skb_net(skb);
+
rcu_read_lock();
- svc = ip_vs_service_find(net, af, skb->mark, iph->protocol,
- &iph->daddr, uh->dest);
+ if (likely(!ip_vs_iph_inverse(iph)))
+ svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
+ &iph->daddr, ports[1]);
+ else
+ svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
+ &iph->saddr, ports[0]);
+
if (svc) {
int ignored;
- if (ip_vs_todrop(net_ipvs(net))) {
+ if (ip_vs_todrop(ipvs)) {
/*
* It seems that we are very loaded.
* We have to drop this packet :(
@@ -348,14 +362,13 @@ static inline __u16 udp_app_hashkey(__be16 port)
}
-static int udp_register_app(struct net *net, struct ip_vs_app *inc)
+static int udp_register_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc)
{
struct ip_vs_app *i;
__u16 hash;
__be16 port = inc->port;
int ret = 0;
- struct netns_ipvs *ipvs = net_ipvs(net);
- struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
+ struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_UDP);
hash = udp_app_hashkey(port);
@@ -374,9 +387,9 @@ static int udp_register_app(struct net *net, struct ip_vs_app *inc)
static void
-udp_unregister_app(struct net *net, struct ip_vs_app *inc)
+udp_unregister_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc)
{
- struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
+ struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_UDP);
atomic_dec(&pd->appcnt);
list_del_rcu(&inc->p_list);
@@ -385,7 +398,7 @@ udp_unregister_app(struct net *net, struct ip_vs_app *inc)
static int udp_app_conn_bind(struct ip_vs_conn *cp)
{
- struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp));
+ struct netns_ipvs *ipvs = cp->ipvs;
int hash;
struct ip_vs_app *inc;
int result = 0;
@@ -456,10 +469,8 @@ udp_state_transition(struct ip_vs_conn *cp, int direction,
cp->timeout = pd->timeout_table[IP_VS_UDP_S_NORMAL];
}
-static int __udp_init(struct net *net, struct ip_vs_proto_data *pd)
+static int __udp_init(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
-
ip_vs_init_hash_table(ipvs->udp_apps, UDP_APP_TAB_SIZE);
pd->timeout_table = ip_vs_create_timeout_table((int *)udp_timeouts,
sizeof(udp_timeouts));
@@ -468,7 +479,7 @@ static int __udp_init(struct net *net, struct ip_vs_proto_data *pd)
return 0;
}
-static void __udp_exit(struct net *net, struct ip_vs_proto_data *pd)
+static void __udp_exit(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd)
{
kfree(pd->timeout_table);
}
diff --git a/kernel/net/netfilter/ipvs/ip_vs_sched.c b/kernel/net/netfilter/ipvs/ip_vs_sched.c
index 199760c71..a2ff7d746 100644
--- a/kernel/net/netfilter/ipvs/ip_vs_sched.c
+++ b/kernel/net/netfilter/ipvs/ip_vs_sched.c
@@ -74,7 +74,7 @@ void ip_vs_unbind_scheduler(struct ip_vs_service *svc,
if (sched->done_service)
sched->done_service(svc);
- /* svc->scheduler can not be set to NULL */
+ /* svc->scheduler can be set to NULL only by caller */
}
@@ -137,7 +137,7 @@ struct ip_vs_scheduler *ip_vs_scheduler_get(const char *sched_name)
void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler)
{
- if (scheduler && scheduler->module)
+ if (scheduler)
module_put(scheduler->module);
}
@@ -147,21 +147,21 @@ void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler)
void ip_vs_scheduler_err(struct ip_vs_service *svc, const char *msg)
{
- struct ip_vs_scheduler *sched;
+ struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler);
+ char *sched_name = sched ? sched->name : "none";
- sched = rcu_dereference(svc->scheduler);
if (svc->fwmark) {
IP_VS_ERR_RL("%s: FWM %u 0x%08X - %s\n",
- sched->name, svc->fwmark, svc->fwmark, msg);
+ sched_name, svc->fwmark, svc->fwmark, msg);
#ifdef CONFIG_IP_VS_IPV6
} else if (svc->af == AF_INET6) {
IP_VS_ERR_RL("%s: %s [%pI6c]:%d - %s\n",
- sched->name, ip_vs_proto_name(svc->protocol),
+ sched_name, ip_vs_proto_name(svc->protocol),
&svc->addr.in6, ntohs(svc->port), msg);
#endif
} else {
IP_VS_ERR_RL("%s: %s %pI4:%d - %s\n",
- sched->name, ip_vs_proto_name(svc->protocol),
+ sched_name, ip_vs_proto_name(svc->protocol),
&svc->addr.ip, ntohs(svc->port), msg);
}
}
diff --git a/kernel/net/netfilter/ipvs/ip_vs_sh.c b/kernel/net/netfilter/ipvs/ip_vs_sh.c
index 98a13433b..1e373a5e4 100644
--- a/kernel/net/netfilter/ipvs/ip_vs_sh.c
+++ b/kernel/net/netfilter/ipvs/ip_vs_sh.c
@@ -280,35 +280,29 @@ static int ip_vs_sh_dest_changed(struct ip_vs_service *svc,
static inline __be16
ip_vs_sh_get_port(const struct sk_buff *skb, struct ip_vs_iphdr *iph)
{
- __be16 port;
- struct tcphdr _tcph, *th;
- struct udphdr _udph, *uh;
- sctp_sctphdr_t _sctph, *sh;
+ __be16 _ports[2], *ports;
+ /* At this point we know that we have a valid packet of some kind.
+ * Because ICMP packets are only guaranteed to have the first 8
+ * bytes, let's just grab the ports. Fortunately they're in the
+ * same position for all three of the protocols we care about.
+ */
switch (iph->protocol) {
case IPPROTO_TCP:
- th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph);
- if (unlikely(th == NULL))
- return 0;
- port = th->source;
- break;
case IPPROTO_UDP:
- uh = skb_header_pointer(skb, iph->len, sizeof(_udph), &_udph);
- if (unlikely(uh == NULL))
- return 0;
- port = uh->source;
- break;
case IPPROTO_SCTP:
- sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph);
- if (unlikely(sh == NULL))
+ ports = skb_header_pointer(skb, iph->len, sizeof(_ports),
+ &_ports);
+ if (unlikely(!ports))
return 0;
- port = sh->source;
- break;
+
+ if (likely(!ip_vs_iph_inverse(iph)))
+ return ports[0];
+ else
+ return ports[1];
default:
- port = 0;
+ return 0;
}
-
- return port;
}
@@ -322,6 +316,9 @@ ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
struct ip_vs_dest *dest;
struct ip_vs_sh_state *s;
__be16 port = 0;
+ const union nf_inet_addr *hash_addr;
+
+ hash_addr = ip_vs_iph_inverse(iph) ? &iph->daddr : &iph->saddr;
IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n");
@@ -331,9 +328,9 @@ ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
s = (struct ip_vs_sh_state *) svc->sched_data;
if (svc->flags & IP_VS_SVC_F_SCHED_SH_FALLBACK)
- dest = ip_vs_sh_get_fallback(svc, s, &iph->saddr, port);
+ dest = ip_vs_sh_get_fallback(svc, s, hash_addr, port);
else
- dest = ip_vs_sh_get(svc, s, &iph->saddr, port);
+ dest = ip_vs_sh_get(svc, s, hash_addr, port);
if (!dest) {
ip_vs_scheduler_err(svc, "no destination available");
@@ -341,7 +338,7 @@ ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
}
IP_VS_DBG_BUF(6, "SH: source IP address %s --> server %s:%d\n",
- IP_VS_DBG_ADDR(svc->af, &iph->saddr),
+ IP_VS_DBG_ADDR(svc->af, hash_addr),
IP_VS_DBG_ADDR(dest->af, &dest->addr),
ntohs(dest->port));
diff --git a/kernel/net/netfilter/ipvs/ip_vs_sync.c b/kernel/net/netfilter/ipvs/ip_vs_sync.c
index 19b9cce6c..803001a45 100644
--- a/kernel/net/netfilter/ipvs/ip_vs_sync.c
+++ b/kernel/net/netfilter/ipvs/ip_vs_sync.c
@@ -193,7 +193,7 @@ union ip_vs_sync_conn {
#define IPVS_OPT_F_PARAM (1 << (IPVS_OPT_PARAM-1))
struct ip_vs_sync_thread_data {
- struct net *net;
+ struct netns_ipvs *ipvs;
struct socket *sock;
char *buf;
int id;
@@ -262,6 +262,11 @@ struct ip_vs_sync_mesg {
/* ip_vs_sync_conn entries start here */
};
+union ipvs_sockaddr {
+ struct sockaddr_in in;
+ struct sockaddr_in6 in6;
+};
+
struct ip_vs_sync_buff {
struct list_head list;
unsigned long firstuse;
@@ -320,26 +325,28 @@ sb_dequeue(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms)
* Create a new sync buffer for Version 1 proto.
*/
static inline struct ip_vs_sync_buff *
-ip_vs_sync_buff_create(struct netns_ipvs *ipvs)
+ip_vs_sync_buff_create(struct netns_ipvs *ipvs, unsigned int len)
{
struct ip_vs_sync_buff *sb;
if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC)))
return NULL;
- sb->mesg = kmalloc(ipvs->send_mesg_maxlen, GFP_ATOMIC);
+ len = max_t(unsigned int, len + sizeof(struct ip_vs_sync_mesg),
+ ipvs->mcfg.sync_maxlen);
+ sb->mesg = kmalloc(len, GFP_ATOMIC);
if (!sb->mesg) {
kfree(sb);
return NULL;
}
sb->mesg->reserved = 0; /* old nr_conns i.e. must be zero now */
sb->mesg->version = SYNC_PROTO_VER;
- sb->mesg->syncid = ipvs->master_syncid;
+ sb->mesg->syncid = ipvs->mcfg.syncid;
sb->mesg->size = htons(sizeof(struct ip_vs_sync_mesg));
sb->mesg->nr_conns = 0;
sb->mesg->spare = 0;
sb->head = (unsigned char *)sb->mesg + sizeof(struct ip_vs_sync_mesg);
- sb->end = (unsigned char *)sb->mesg + ipvs->send_mesg_maxlen;
+ sb->end = (unsigned char *)sb->mesg + len;
sb->firstuse = jiffies;
return sb;
@@ -402,7 +409,7 @@ select_master_thread_id(struct netns_ipvs *ipvs, struct ip_vs_conn *cp)
* Create a new sync buffer for Version 0 proto.
*/
static inline struct ip_vs_sync_buff *
-ip_vs_sync_buff_create_v0(struct netns_ipvs *ipvs)
+ip_vs_sync_buff_create_v0(struct netns_ipvs *ipvs, unsigned int len)
{
struct ip_vs_sync_buff *sb;
struct ip_vs_sync_mesg_v0 *mesg;
@@ -410,17 +417,19 @@ ip_vs_sync_buff_create_v0(struct netns_ipvs *ipvs)
if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC)))
return NULL;
- sb->mesg = kmalloc(ipvs->send_mesg_maxlen, GFP_ATOMIC);
+ len = max_t(unsigned int, len + sizeof(struct ip_vs_sync_mesg_v0),
+ ipvs->mcfg.sync_maxlen);
+ sb->mesg = kmalloc(len, GFP_ATOMIC);
if (!sb->mesg) {
kfree(sb);
return NULL;
}
mesg = (struct ip_vs_sync_mesg_v0 *)sb->mesg;
mesg->nr_conns = 0;
- mesg->syncid = ipvs->master_syncid;
+ mesg->syncid = ipvs->mcfg.syncid;
mesg->size = htons(sizeof(struct ip_vs_sync_mesg_v0));
sb->head = (unsigned char *)mesg + sizeof(struct ip_vs_sync_mesg_v0);
- sb->end = (unsigned char *)mesg + ipvs->send_mesg_maxlen;
+ sb->end = (unsigned char *)mesg + len;
sb->firstuse = jiffies;
return sb;
}
@@ -524,16 +533,15 @@ set:
* Version 0 , could be switched in by sys_ctl.
* Add an ip_vs_conn information into the current sync_buff.
*/
-static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp,
+static void ip_vs_sync_conn_v0(struct netns_ipvs *ipvs, struct ip_vs_conn *cp,
int pkts)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
struct ip_vs_sync_mesg_v0 *m;
struct ip_vs_sync_conn_v0 *s;
struct ip_vs_sync_buff *buff;
struct ipvs_master_sync_state *ms;
int id;
- int len;
+ unsigned int len;
if (unlikely(cp->af != AF_INET))
return;
@@ -553,17 +561,19 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp,
id = select_master_thread_id(ipvs, cp);
ms = &ipvs->ms[id];
buff = ms->sync_buff;
+ len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE :
+ SIMPLE_CONN_SIZE;
if (buff) {
m = (struct ip_vs_sync_mesg_v0 *) buff->mesg;
/* Send buffer if it is for v1 */
- if (!m->nr_conns) {
+ if (buff->head + len > buff->end || !m->nr_conns) {
sb_queue_tail(ipvs, ms);
ms->sync_buff = NULL;
buff = NULL;
}
}
if (!buff) {
- buff = ip_vs_sync_buff_create_v0(ipvs);
+ buff = ip_vs_sync_buff_create_v0(ipvs, len);
if (!buff) {
spin_unlock_bh(&ipvs->sync_buff_lock);
pr_err("ip_vs_sync_buff_create failed.\n");
@@ -572,8 +582,6 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp,
ms->sync_buff = buff;
}
- len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE :
- SIMPLE_CONN_SIZE;
m = (struct ip_vs_sync_mesg_v0 *) buff->mesg;
s = (struct ip_vs_sync_conn_v0 *) buff->head;
@@ -597,12 +605,6 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp,
m->nr_conns++;
m->size = htons(ntohs(m->size) + len);
buff->head += len;
-
- /* check if there is a space for next one */
- if (buff->head + FULL_CONN_SIZE > buff->end) {
- sb_queue_tail(ipvs, ms);
- ms->sync_buff = NULL;
- }
spin_unlock_bh(&ipvs->sync_buff_lock);
/* synchronize its controller if it has */
@@ -612,7 +614,7 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp,
pkts = atomic_add_return(1, &cp->in_pkts);
else
pkts = sysctl_sync_threshold(ipvs);
- ip_vs_sync_conn(net, cp->control, pkts);
+ ip_vs_sync_conn(ipvs, cp, pkts);
}
}
@@ -621,9 +623,8 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp,
* Called by ip_vs_in.
* Sending Version 1 messages
*/
-void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp, int pkts)
+void ip_vs_sync_conn(struct netns_ipvs *ipvs, struct ip_vs_conn *cp, int pkts)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
struct ip_vs_sync_mesg *m;
union ip_vs_sync_conn *s;
struct ip_vs_sync_buff *buff;
@@ -634,7 +635,7 @@ void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp, int pkts)
/* Handle old version of the protocol */
if (sysctl_sync_ver(ipvs) == 0) {
- ip_vs_sync_conn_v0(net, cp, pkts);
+ ip_vs_sync_conn_v0(ipvs, cp, pkts);
return;
}
/* Do not sync ONE PACKET */
@@ -694,7 +695,7 @@ sloop:
}
if (!buff) {
- buff = ip_vs_sync_buff_create(ipvs);
+ buff = ip_vs_sync_buff_create(ipvs, len);
if (!buff) {
spin_unlock_bh(&ipvs->sync_buff_lock);
pr_err("ip_vs_sync_buff_create failed.\n");
@@ -781,21 +782,21 @@ control:
* fill_param used by version 1
*/
static inline int
-ip_vs_conn_fill_param_sync(struct net *net, int af, union ip_vs_sync_conn *sc,
+ip_vs_conn_fill_param_sync(struct netns_ipvs *ipvs, int af, union ip_vs_sync_conn *sc,
struct ip_vs_conn_param *p,
__u8 *pe_data, unsigned int pe_data_len,
__u8 *pe_name, unsigned int pe_name_len)
{
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6)
- ip_vs_conn_fill_param(net, af, sc->v6.protocol,
+ ip_vs_conn_fill_param(ipvs, af, sc->v6.protocol,
(const union nf_inet_addr *)&sc->v6.caddr,
sc->v6.cport,
(const union nf_inet_addr *)&sc->v6.vaddr,
sc->v6.vport, p);
else
#endif
- ip_vs_conn_fill_param(net, af, sc->v4.protocol,
+ ip_vs_conn_fill_param(ipvs, af, sc->v4.protocol,
(const union nf_inet_addr *)&sc->v4.caddr,
sc->v4.cport,
(const union nf_inet_addr *)&sc->v4.vaddr,
@@ -834,7 +835,7 @@ ip_vs_conn_fill_param_sync(struct net *net, int af, union ip_vs_sync_conn *sc,
* Param: ...
* timeout is in sec.
*/
-static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
+static void ip_vs_proc_conn(struct netns_ipvs *ipvs, struct ip_vs_conn_param *param,
unsigned int flags, unsigned int state,
unsigned int protocol, unsigned int type,
const union nf_inet_addr *daddr, __be16 dport,
@@ -843,7 +844,6 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
{
struct ip_vs_dest *dest;
struct ip_vs_conn *cp;
- struct netns_ipvs *ipvs = net_ipvs(net);
if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
cp = ip_vs_conn_in_get(param);
@@ -901,7 +901,7 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
* with synchronization, so we can make the assumption that
* the svc_af is the same as the dest_af
*/
- dest = ip_vs_find_dest(net, type, type, daddr, dport,
+ dest = ip_vs_find_dest(ipvs, type, type, daddr, dport,
param->vaddr, param->vport, protocol,
fwmark, flags);
@@ -938,7 +938,7 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
} else {
struct ip_vs_proto_data *pd;
- pd = ip_vs_proto_data_get(net, protocol);
+ pd = ip_vs_proto_data_get(ipvs, protocol);
if (!(flags & IP_VS_CONN_F_TEMPLATE) && pd && pd->timeout_table)
cp->timeout = pd->timeout_table[state];
else
@@ -950,7 +950,7 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
/*
* Process received multicast message for Version 0
*/
-static void ip_vs_process_message_v0(struct net *net, const char *buffer,
+static void ip_vs_process_message_v0(struct netns_ipvs *ipvs, const char *buffer,
const size_t buflen)
{
struct ip_vs_sync_mesg_v0 *m = (struct ip_vs_sync_mesg_v0 *)buffer;
@@ -1006,14 +1006,14 @@ static void ip_vs_process_message_v0(struct net *net, const char *buffer,
}
}
- ip_vs_conn_fill_param(net, AF_INET, s->protocol,
+ ip_vs_conn_fill_param(ipvs, AF_INET, s->protocol,
(const union nf_inet_addr *)&s->caddr,
s->cport,
(const union nf_inet_addr *)&s->vaddr,
s->vport, &param);
/* Send timeout as Zero */
- ip_vs_proc_conn(net, &param, flags, state, s->protocol, AF_INET,
+ ip_vs_proc_conn(ipvs, &param, flags, state, s->protocol, AF_INET,
(union nf_inet_addr *)&s->daddr, s->dport,
0, 0, opt);
}
@@ -1064,7 +1064,7 @@ static int ip_vs_proc_str(__u8 *p, unsigned int plen, unsigned int *data_len,
/*
* Process a Version 1 sync. connection
*/
-static inline int ip_vs_proc_sync_conn(struct net *net, __u8 *p, __u8 *msg_end)
+static inline int ip_vs_proc_sync_conn(struct netns_ipvs *ipvs, __u8 *p, __u8 *msg_end)
{
struct ip_vs_sync_conn_options opt;
union ip_vs_sync_conn *s;
@@ -1168,21 +1168,21 @@ static inline int ip_vs_proc_sync_conn(struct net *net, __u8 *p, __u8 *msg_end)
state = 0;
}
}
- if (ip_vs_conn_fill_param_sync(net, af, s, &param, pe_data,
+ if (ip_vs_conn_fill_param_sync(ipvs, af, s, &param, pe_data,
pe_data_len, pe_name, pe_name_len)) {
retc = 50;
goto out;
}
/* If only IPv4, just silent skip IPv6 */
if (af == AF_INET)
- ip_vs_proc_conn(net, &param, flags, state, s->v4.protocol, af,
+ ip_vs_proc_conn(ipvs, &param, flags, state, s->v4.protocol, af,
(union nf_inet_addr *)&s->v4.daddr, s->v4.dport,
ntohl(s->v4.timeout), ntohl(s->v4.fwmark),
(opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL)
);
#ifdef CONFIG_IP_VS_IPV6
else
- ip_vs_proc_conn(net, &param, flags, state, s->v6.protocol, af,
+ ip_vs_proc_conn(ipvs, &param, flags, state, s->v6.protocol, af,
(union nf_inet_addr *)&s->v6.daddr, s->v6.dport,
ntohl(s->v6.timeout), ntohl(s->v6.fwmark),
(opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL)
@@ -1201,10 +1201,9 @@ out:
* ip_vs_conn entries.
* Handles Version 0 & 1
*/
-static void ip_vs_process_message(struct net *net, __u8 *buffer,
+static void ip_vs_process_message(struct netns_ipvs *ipvs, __u8 *buffer,
const size_t buflen)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
struct ip_vs_sync_mesg *m2 = (struct ip_vs_sync_mesg *)buffer;
__u8 *p, *msg_end;
int i, nr_conns;
@@ -1219,7 +1218,7 @@ static void ip_vs_process_message(struct net *net, __u8 *buffer,
return;
}
/* SyncID sanity check */
- if (ipvs->backup_syncid != 0 && m2->syncid != ipvs->backup_syncid) {
+ if (ipvs->bcfg.syncid != 0 && m2->syncid != ipvs->bcfg.syncid) {
IP_VS_DBG(7, "BACKUP, Ignoring syncid = %d\n", m2->syncid);
return;
}
@@ -1254,7 +1253,7 @@ static void ip_vs_process_message(struct net *net, __u8 *buffer,
return;
}
/* Process a single sync_conn */
- retc = ip_vs_proc_sync_conn(net, p, msg_end);
+ retc = ip_vs_proc_sync_conn(ipvs, p, msg_end);
if (retc < 0) {
IP_VS_ERR_RL("BACKUP, Dropping buffer, Err: %d in decoding\n",
retc);
@@ -1265,7 +1264,7 @@ static void ip_vs_process_message(struct net *net, __u8 *buffer,
}
} else {
/* Old type of message */
- ip_vs_process_message_v0(net, buffer, buflen);
+ ip_vs_process_message_v0(ipvs, buffer, buflen);
return;
}
}
@@ -1303,6 +1302,14 @@ static void set_mcast_loop(struct sock *sk, u_char loop)
/* setsockopt(sock, SOL_IP, IP_MULTICAST_LOOP, &loop, sizeof(loop)); */
lock_sock(sk);
inet->mc_loop = loop ? 1 : 0;
+#ifdef CONFIG_IP_VS_IPV6
+ if (sk->sk_family == AF_INET6) {
+ struct ipv6_pinfo *np = inet6_sk(sk);
+
+ /* IPV6_MULTICAST_LOOP */
+ np->mc_loop = loop ? 1 : 0;
+ }
+#endif
release_sock(sk);
}
@@ -1316,6 +1323,33 @@ static void set_mcast_ttl(struct sock *sk, u_char ttl)
/* setsockopt(sock, SOL_IP, IP_MULTICAST_TTL, &ttl, sizeof(ttl)); */
lock_sock(sk);
inet->mc_ttl = ttl;
+#ifdef CONFIG_IP_VS_IPV6
+ if (sk->sk_family == AF_INET6) {
+ struct ipv6_pinfo *np = inet6_sk(sk);
+
+ /* IPV6_MULTICAST_HOPS */
+ np->mcast_hops = ttl;
+ }
+#endif
+ release_sock(sk);
+}
+
+/* Control fragmentation of messages */
+static void set_mcast_pmtudisc(struct sock *sk, int val)
+{
+ struct inet_sock *inet = inet_sk(sk);
+
+ /* setsockopt(sock, SOL_IP, IP_MTU_DISCOVER, &val, sizeof(val)); */
+ lock_sock(sk);
+ inet->pmtudisc = val;
+#ifdef CONFIG_IP_VS_IPV6
+ if (sk->sk_family == AF_INET6) {
+ struct ipv6_pinfo *np = inet6_sk(sk);
+
+ /* IPV6_MTU_DISCOVER */
+ np->pmtudisc = val;
+ }
+#endif
release_sock(sk);
}
@@ -1338,44 +1372,15 @@ static int set_mcast_if(struct sock *sk, char *ifname)
lock_sock(sk);
inet->mc_index = dev->ifindex;
/* inet->mc_addr = 0; */
- release_sock(sk);
-
- return 0;
-}
-
+#ifdef CONFIG_IP_VS_IPV6
+ if (sk->sk_family == AF_INET6) {
+ struct ipv6_pinfo *np = inet6_sk(sk);
-/*
- * Set the maximum length of sync message according to the
- * specified interface's MTU.
- */
-static int set_sync_mesg_maxlen(struct net *net, int sync_state)
-{
- struct netns_ipvs *ipvs = net_ipvs(net);
- struct net_device *dev;
- int num;
-
- if (sync_state == IP_VS_STATE_MASTER) {
- dev = __dev_get_by_name(net, ipvs->master_mcast_ifn);
- if (!dev)
- return -ENODEV;
-
- num = (dev->mtu - sizeof(struct iphdr) -
- sizeof(struct udphdr) -
- SYNC_MESG_HEADER_LEN - 20) / SIMPLE_CONN_SIZE;
- ipvs->send_mesg_maxlen = SYNC_MESG_HEADER_LEN +
- SIMPLE_CONN_SIZE * min(num, MAX_CONNS_PER_SYNCBUFF);
- IP_VS_DBG(7, "setting the maximum length of sync sending "
- "message %d.\n", ipvs->send_mesg_maxlen);
- } else if (sync_state == IP_VS_STATE_BACKUP) {
- dev = __dev_get_by_name(net, ipvs->backup_mcast_ifn);
- if (!dev)
- return -ENODEV;
-
- ipvs->recv_mesg_maxlen = dev->mtu -
- sizeof(struct iphdr) - sizeof(struct udphdr);
- IP_VS_DBG(7, "setting the maximum length of sync receiving "
- "message %d.\n", ipvs->recv_mesg_maxlen);
+ /* IPV6_MULTICAST_IF */
+ np->mcast_oif = dev->ifindex;
}
+#endif
+ release_sock(sk);
return 0;
}
@@ -1405,15 +1410,34 @@ join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname)
mreq.imr_ifindex = dev->ifindex;
- rtnl_lock();
lock_sock(sk);
ret = ip_mc_join_group(sk, &mreq);
release_sock(sk);
- rtnl_unlock();
return ret;
}
+#ifdef CONFIG_IP_VS_IPV6
+static int join_mcast_group6(struct sock *sk, struct in6_addr *addr,
+ char *ifname)
+{
+ struct net *net = sock_net(sk);
+ struct net_device *dev;
+ int ret;
+
+ dev = __dev_get_by_name(net, ifname);
+ if (!dev)
+ return -ENODEV;
+ if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
+ return -EINVAL;
+
+ lock_sock(sk);
+ ret = ipv6_sock_mc_join(sk, dev->ifindex, addr);
+ release_sock(sk);
+
+ return ret;
+}
+#endif
static int bind_mcastif_addr(struct socket *sock, char *ifname)
{
@@ -1442,53 +1466,69 @@ static int bind_mcastif_addr(struct socket *sock, char *ifname)
return sock->ops->bind(sock, (struct sockaddr*)&sin, sizeof(sin));
}
+static void get_mcast_sockaddr(union ipvs_sockaddr *sa, int *salen,
+ struct ipvs_sync_daemon_cfg *c, int id)
+{
+ if (AF_INET6 == c->mcast_af) {
+ sa->in6 = (struct sockaddr_in6) {
+ .sin6_family = AF_INET6,
+ .sin6_port = htons(c->mcast_port + id),
+ };
+ sa->in6.sin6_addr = c->mcast_group.in6;
+ *salen = sizeof(sa->in6);
+ } else {
+ sa->in = (struct sockaddr_in) {
+ .sin_family = AF_INET,
+ .sin_port = htons(c->mcast_port + id),
+ };
+ sa->in.sin_addr = c->mcast_group.in;
+ *salen = sizeof(sa->in);
+ }
+}
+
/*
* Set up sending multicast socket over UDP
*/
-static struct socket *make_send_sock(struct net *net, int id)
+static struct socket *make_send_sock(struct netns_ipvs *ipvs, int id)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
/* multicast addr */
- struct sockaddr_in mcast_addr = {
- .sin_family = AF_INET,
- .sin_port = cpu_to_be16(IP_VS_SYNC_PORT + id),
- .sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP),
- };
+ union ipvs_sockaddr mcast_addr;
struct socket *sock;
- int result;
+ int result, salen;
- /* First create a socket move it to right name space later */
- result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
+ /* First create a socket */
+ result = sock_create_kern(ipvs->net, ipvs->mcfg.mcast_af, SOCK_DGRAM,
+ IPPROTO_UDP, &sock);
if (result < 0) {
pr_err("Error during creation of socket; terminating\n");
return ERR_PTR(result);
}
- /*
- * Kernel sockets that are a part of a namespace, should not
- * hold a reference to a namespace in order to allow to stop it.
- * After sk_change_net should be released using sk_release_kernel.
- */
- sk_change_net(sock->sk, net);
- result = set_mcast_if(sock->sk, ipvs->master_mcast_ifn);
+ result = set_mcast_if(sock->sk, ipvs->mcfg.mcast_ifn);
if (result < 0) {
pr_err("Error setting outbound mcast interface\n");
goto error;
}
set_mcast_loop(sock->sk, 0);
- set_mcast_ttl(sock->sk, 1);
+ set_mcast_ttl(sock->sk, ipvs->mcfg.mcast_ttl);
+ /* Allow fragmentation if MTU changes */
+ set_mcast_pmtudisc(sock->sk, IP_PMTUDISC_DONT);
result = sysctl_sync_sock_size(ipvs);
if (result > 0)
set_sock_size(sock->sk, 1, result);
- result = bind_mcastif_addr(sock, ipvs->master_mcast_ifn);
+ if (AF_INET == ipvs->mcfg.mcast_af)
+ result = bind_mcastif_addr(sock, ipvs->mcfg.mcast_ifn);
+ else
+ result = 0;
if (result < 0) {
pr_err("Error binding address of the mcast interface\n");
goto error;
}
+ get_mcast_sockaddr(&mcast_addr, &salen, &ipvs->mcfg, id);
result = sock->ops->connect(sock, (struct sockaddr *) &mcast_addr,
- sizeof(struct sockaddr), 0);
+ salen, 0);
if (result < 0) {
pr_err("Error connecting to the multicast addr\n");
goto error;
@@ -1497,7 +1537,7 @@ static struct socket *make_send_sock(struct net *net, int id)
return sock;
error:
- sk_release_kernel(sock->sk);
+ sock_release(sock);
return ERR_PTR(result);
}
@@ -1505,47 +1545,42 @@ error:
/*
* Set up receiving multicast socket over UDP
*/
-static struct socket *make_receive_sock(struct net *net, int id)
+static struct socket *make_receive_sock(struct netns_ipvs *ipvs, int id)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
/* multicast addr */
- struct sockaddr_in mcast_addr = {
- .sin_family = AF_INET,
- .sin_port = cpu_to_be16(IP_VS_SYNC_PORT + id),
- .sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP),
- };
+ union ipvs_sockaddr mcast_addr;
struct socket *sock;
- int result;
+ int result, salen;
/* First create a socket */
- result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
+ result = sock_create_kern(ipvs->net, ipvs->bcfg.mcast_af, SOCK_DGRAM,
+ IPPROTO_UDP, &sock);
if (result < 0) {
pr_err("Error during creation of socket; terminating\n");
return ERR_PTR(result);
}
- /*
- * Kernel sockets that are a part of a namespace, should not
- * hold a reference to a namespace in order to allow to stop it.
- * After sk_change_net should be released using sk_release_kernel.
- */
- sk_change_net(sock->sk, net);
/* it is equivalent to the REUSEADDR option in user-space */
sock->sk->sk_reuse = SK_CAN_REUSE;
result = sysctl_sync_sock_size(ipvs);
if (result > 0)
set_sock_size(sock->sk, 0, result);
- result = sock->ops->bind(sock, (struct sockaddr *) &mcast_addr,
- sizeof(struct sockaddr));
+ get_mcast_sockaddr(&mcast_addr, &salen, &ipvs->bcfg, id);
+ result = sock->ops->bind(sock, (struct sockaddr *)&mcast_addr, salen);
if (result < 0) {
pr_err("Error binding to the multicast addr\n");
goto error;
}
/* join the multicast group */
- result = join_mcast_group(sock->sk,
- (struct in_addr *) &mcast_addr.sin_addr,
- ipvs->backup_mcast_ifn);
+#ifdef CONFIG_IP_VS_IPV6
+ if (ipvs->bcfg.mcast_af == AF_INET6)
+ result = join_mcast_group6(sock->sk, &mcast_addr.in6.sin6_addr,
+ ipvs->bcfg.mcast_ifn);
+ else
+#endif
+ result = join_mcast_group(sock->sk, &mcast_addr.in.sin_addr,
+ ipvs->bcfg.mcast_ifn);
if (result < 0) {
pr_err("Error joining to the multicast group\n");
goto error;
@@ -1554,7 +1589,7 @@ static struct socket *make_receive_sock(struct net *net, int id)
return sock;
error:
- sk_release_kernel(sock->sk);
+ sock_release(sock);
return ERR_PTR(result);
}
@@ -1646,14 +1681,14 @@ next_sync_buff(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms)
static int sync_thread_master(void *data)
{
struct ip_vs_sync_thread_data *tinfo = data;
- struct netns_ipvs *ipvs = net_ipvs(tinfo->net);
+ struct netns_ipvs *ipvs = tinfo->ipvs;
struct ipvs_master_sync_state *ms = &ipvs->ms[tinfo->id];
struct sock *sk = tinfo->sock->sk;
struct ip_vs_sync_buff *sb;
pr_info("sync thread started: state = MASTER, mcast_ifn = %s, "
"syncid = %d, id = %d\n",
- ipvs->master_mcast_ifn, ipvs->master_syncid, tinfo->id);
+ ipvs->mcfg.mcast_ifn, ipvs->mcfg.syncid, tinfo->id);
for (;;) {
sb = next_sync_buff(ipvs, ms);
@@ -1692,7 +1727,7 @@ done:
ip_vs_sync_buff_release(sb);
/* release the sending multicast socket */
- sk_release_kernel(tinfo->sock->sk);
+ sock_release(tinfo->sock);
kfree(tinfo);
return 0;
@@ -1702,12 +1737,12 @@ done:
static int sync_thread_backup(void *data)
{
struct ip_vs_sync_thread_data *tinfo = data;
- struct netns_ipvs *ipvs = net_ipvs(tinfo->net);
+ struct netns_ipvs *ipvs = tinfo->ipvs;
int len;
pr_info("sync thread started: state = BACKUP, mcast_ifn = %s, "
"syncid = %d, id = %d\n",
- ipvs->backup_mcast_ifn, ipvs->backup_syncid, tinfo->id);
+ ipvs->bcfg.mcast_ifn, ipvs->bcfg.syncid, tinfo->id);
while (!kthread_should_stop()) {
wait_event_interruptible(*sk_sleep(tinfo->sock->sk),
@@ -1717,19 +1752,19 @@ static int sync_thread_backup(void *data)
/* do we have data now? */
while (!skb_queue_empty(&(tinfo->sock->sk->sk_receive_queue))) {
len = ip_vs_receive(tinfo->sock, tinfo->buf,
- ipvs->recv_mesg_maxlen);
+ ipvs->bcfg.sync_maxlen);
if (len <= 0) {
if (len != -EAGAIN)
pr_err("receiving message error\n");
break;
}
- ip_vs_process_message(tinfo->net, tinfo->buf, len);
+ ip_vs_process_message(ipvs, tinfo->buf, len);
}
}
/* release the sending multicast socket */
- sk_release_kernel(tinfo->sock->sk);
+ sock_release(tinfo->sock);
kfree(tinfo->buf);
kfree(tinfo);
@@ -1737,16 +1772,18 @@ static int sync_thread_backup(void *data)
}
-int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid)
+int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c,
+ int state)
{
struct ip_vs_sync_thread_data *tinfo;
struct task_struct **array = NULL, *task;
struct socket *sock;
- struct netns_ipvs *ipvs = net_ipvs(net);
+ struct net_device *dev;
char *name;
int (*threadfn)(void *data);
- int id, count;
+ int id, count, hlen;
int result = -ENOMEM;
+ u16 mtu, min_mtu;
IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n",
@@ -1758,22 +1795,46 @@ int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid)
} else
count = ipvs->threads_mask + 1;
+ if (c->mcast_af == AF_UNSPEC) {
+ c->mcast_af = AF_INET;
+ c->mcast_group.ip = cpu_to_be32(IP_VS_SYNC_GROUP);
+ }
+ if (!c->mcast_port)
+ c->mcast_port = IP_VS_SYNC_PORT;
+ if (!c->mcast_ttl)
+ c->mcast_ttl = 1;
+
+ dev = __dev_get_by_name(ipvs->net, c->mcast_ifn);
+ if (!dev) {
+ pr_err("Unknown mcast interface: %s\n", c->mcast_ifn);
+ return -ENODEV;
+ }
+ hlen = (AF_INET6 == c->mcast_af) ?
+ sizeof(struct ipv6hdr) + sizeof(struct udphdr) :
+ sizeof(struct iphdr) + sizeof(struct udphdr);
+ mtu = (state == IP_VS_STATE_BACKUP) ?
+ clamp(dev->mtu, 1500U, 65535U) : 1500U;
+ min_mtu = (state == IP_VS_STATE_BACKUP) ? 1024 : 1;
+
+ if (c->sync_maxlen)
+ c->sync_maxlen = clamp_t(unsigned int,
+ c->sync_maxlen, min_mtu,
+ 65535 - hlen);
+ else
+ c->sync_maxlen = mtu - hlen;
+
if (state == IP_VS_STATE_MASTER) {
if (ipvs->ms)
return -EEXIST;
- strlcpy(ipvs->master_mcast_ifn, mcast_ifn,
- sizeof(ipvs->master_mcast_ifn));
- ipvs->master_syncid = syncid;
+ ipvs->mcfg = *c;
name = "ipvs-m:%d:%d";
threadfn = sync_thread_master;
} else if (state == IP_VS_STATE_BACKUP) {
if (ipvs->backup_threads)
return -EEXIST;
- strlcpy(ipvs->backup_mcast_ifn, mcast_ifn,
- sizeof(ipvs->backup_mcast_ifn));
- ipvs->backup_syncid = syncid;
+ ipvs->bcfg = *c;
name = "ipvs-b:%d:%d";
threadfn = sync_thread_backup;
} else {
@@ -1801,14 +1862,13 @@ int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid)
if (!array)
goto out;
}
- set_sync_mesg_maxlen(net, state);
tinfo = NULL;
for (id = 0; id < count; id++) {
if (state == IP_VS_STATE_MASTER)
- sock = make_send_sock(net, id);
+ sock = make_send_sock(ipvs, id);
else
- sock = make_receive_sock(net, id);
+ sock = make_receive_sock(ipvs, id);
if (IS_ERR(sock)) {
result = PTR_ERR(sock);
goto outtinfo;
@@ -1816,10 +1876,10 @@ int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid)
tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL);
if (!tinfo)
goto outsocket;
- tinfo->net = net;
+ tinfo->ipvs = ipvs;
tinfo->sock = sock;
if (state == IP_VS_STATE_BACKUP) {
- tinfo->buf = kmalloc(ipvs->recv_mesg_maxlen,
+ tinfo->buf = kmalloc(ipvs->bcfg.sync_maxlen,
GFP_KERNEL);
if (!tinfo->buf)
goto outtinfo;
@@ -1854,11 +1914,11 @@ int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid)
return 0;
outsocket:
- sk_release_kernel(sock->sk);
+ sock_release(sock);
outtinfo:
if (tinfo) {
- sk_release_kernel(tinfo->sock->sk);
+ sock_release(tinfo->sock);
kfree(tinfo->buf);
kfree(tinfo);
}
@@ -1880,9 +1940,8 @@ out:
}
-int stop_sync_thread(struct net *net, int state)
+int stop_sync_thread(struct netns_ipvs *ipvs, int state)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
struct task_struct **array;
int id;
int retc = -EINVAL;
@@ -1948,27 +2007,24 @@ int stop_sync_thread(struct net *net, int state)
/*
* Initialize data struct for each netns
*/
-int __net_init ip_vs_sync_net_init(struct net *net)
+int __net_init ip_vs_sync_net_init(struct netns_ipvs *ipvs)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
-
__mutex_init(&ipvs->sync_mutex, "ipvs->sync_mutex", &__ipvs_sync_key);
spin_lock_init(&ipvs->sync_lock);
spin_lock_init(&ipvs->sync_buff_lock);
return 0;
}
-void ip_vs_sync_net_cleanup(struct net *net)
+void ip_vs_sync_net_cleanup(struct netns_ipvs *ipvs)
{
int retc;
- struct netns_ipvs *ipvs = net_ipvs(net);
mutex_lock(&ipvs->sync_mutex);
- retc = stop_sync_thread(net, IP_VS_STATE_MASTER);
+ retc = stop_sync_thread(ipvs, IP_VS_STATE_MASTER);
if (retc && retc != -ESRCH)
pr_err("Failed to stop Master Daemon\n");
- retc = stop_sync_thread(net, IP_VS_STATE_BACKUP);
+ retc = stop_sync_thread(ipvs, IP_VS_STATE_BACKUP);
if (retc && retc != -ESRCH)
pr_err("Failed to stop Backup Daemon\n");
mutex_unlock(&ipvs->sync_mutex);
diff --git a/kernel/net/netfilter/ipvs/ip_vs_xmit.c b/kernel/net/netfilter/ipvs/ip_vs_xmit.c
index 19986ec5f..3264cb49b 100644
--- a/kernel/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/kernel/net/netfilter/ipvs/ip_vs_xmit.c
@@ -130,7 +130,6 @@ static struct rtable *do_output_route4(struct net *net, __be32 daddr,
memset(&fl4, 0, sizeof(fl4));
fl4.daddr = daddr;
- fl4.saddr = (rt_mode & IP_VS_RT_MODE_CONNECT) ? *saddr : 0;
fl4.flowi4_flags = (rt_mode & IP_VS_RT_MODE_KNOWN_NH) ?
FLOWI_FLAG_KNOWN_NH : 0;
@@ -213,19 +212,20 @@ static inline void maybe_update_pmtu(int skb_af, struct sk_buff *skb, int mtu)
ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu);
}
-static inline bool ensure_mtu_is_adequate(int skb_af, int rt_mode,
+static inline bool ensure_mtu_is_adequate(struct netns_ipvs *ipvs, int skb_af,
+ int rt_mode,
struct ip_vs_iphdr *ipvsh,
struct sk_buff *skb, int mtu)
{
#ifdef CONFIG_IP_VS_IPV6
if (skb_af == AF_INET6) {
- struct net *net = dev_net(skb_dst(skb)->dev);
+ struct net *net = ipvs->net;
if (unlikely(__mtu_check_toobig_v6(skb, mtu))) {
if (!skb->dev)
skb->dev = net->loopback_dev;
/* only send ICMP too big on first fragment */
- if (!ipvsh->fragoffs)
+ if (!ipvsh->fragoffs && !ip_vs_iph_icmp(ipvsh))
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
IP_VS_DBG(1, "frag needed for %pI6c\n",
&ipv6_hdr(skb)->saddr);
@@ -234,8 +234,6 @@ static inline bool ensure_mtu_is_adequate(int skb_af, int rt_mode,
} else
#endif
{
- struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
-
/* If we're going to tunnel the packet and pmtu discovery
* is disabled, we'll just fragment it anyway
*/
@@ -243,7 +241,8 @@ static inline bool ensure_mtu_is_adequate(int skb_af, int rt_mode,
return true;
if (unlikely(ip_hdr(skb)->frag_off & htons(IP_DF) &&
- skb->len > mtu && !skb_is_gso(skb))) {
+ skb->len > mtu && !skb_is_gso(skb) &&
+ !ip_vs_iph_icmp(ipvsh))) {
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
htonl(mtu));
IP_VS_DBG(1, "frag needed for %pI4\n",
@@ -257,11 +256,12 @@ static inline bool ensure_mtu_is_adequate(int skb_af, int rt_mode,
/* Get route to destination or remote server */
static int
-__ip_vs_get_out_rt(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest,
+__ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
+ struct ip_vs_dest *dest,
__be32 daddr, int rt_mode, __be32 *ret_saddr,
struct ip_vs_iphdr *ipvsh)
{
- struct net *net = dev_net(skb_dst(skb)->dev);
+ struct net *net = ipvs->net;
struct ip_vs_dest_dst *dest_dst;
struct rtable *rt; /* Route to the other host */
int mtu;
@@ -337,7 +337,7 @@ __ip_vs_get_out_rt(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest,
maybe_update_pmtu(skb_af, skb, mtu);
}
- if (!ensure_mtu_is_adequate(skb_af, rt_mode, ipvsh, skb, mtu))
+ if (!ensure_mtu_is_adequate(ipvs, skb_af, rt_mode, ipvsh, skb, mtu))
goto err_put;
skb_dst_drop(skb);
@@ -364,13 +364,16 @@ err_unreach:
#ifdef CONFIG_IP_VS_IPV6
static struct dst_entry *
__ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr,
- struct in6_addr *ret_saddr, int do_xfrm)
+ struct in6_addr *ret_saddr, int do_xfrm, int rt_mode)
{
struct dst_entry *dst;
struct flowi6 fl6 = {
.daddr = *daddr,
};
+ if (rt_mode & IP_VS_RT_MODE_KNOWN_NH)
+ fl6.flowi6_flags = FLOWI_FLAG_KNOWN_NH;
+
dst = ip6_route_output(net, NULL, &fl6);
if (dst->error)
goto out_err;
@@ -400,11 +403,12 @@ out_err:
* Get route to destination or remote server
*/
static int
-__ip_vs_get_out_rt_v6(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest,
+__ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
+ struct ip_vs_dest *dest,
struct in6_addr *daddr, struct in6_addr *ret_saddr,
struct ip_vs_iphdr *ipvsh, int do_xfrm, int rt_mode)
{
- struct net *net = dev_net(skb_dst(skb)->dev);
+ struct net *net = ipvs->net;
struct ip_vs_dest_dst *dest_dst;
struct rt6_info *rt; /* Route to the other host */
struct dst_entry *dst;
@@ -427,7 +431,7 @@ __ip_vs_get_out_rt_v6(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest,
}
dst = __ip_vs_route_output_v6(net, &dest->addr.in6,
&dest_dst->dst_saddr.in6,
- do_xfrm);
+ do_xfrm, rt_mode);
if (!dst) {
__ip_vs_dst_set(dest, NULL, NULL, 0);
spin_unlock_bh(&dest->dst_lock);
@@ -435,7 +439,7 @@ __ip_vs_get_out_rt_v6(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest,
goto err_unreach;
}
rt = (struct rt6_info *) dst;
- cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
+ cookie = rt6_get_cookie(rt);
__ip_vs_dst_set(dest, dest_dst, &rt->dst, cookie);
spin_unlock_bh(&dest->dst_lock);
IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n",
@@ -446,7 +450,8 @@ __ip_vs_get_out_rt_v6(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest,
*ret_saddr = dest_dst->dst_saddr.in6;
} else {
noref = 0;
- dst = __ip_vs_route_output_v6(net, daddr, ret_saddr, do_xfrm);
+ dst = __ip_vs_route_output_v6(net, daddr, ret_saddr, do_xfrm,
+ rt_mode);
if (!dst)
goto err_unreach;
rt = (struct rt6_info *) dst;
@@ -481,7 +486,7 @@ __ip_vs_get_out_rt_v6(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest,
maybe_update_pmtu(skb_af, skb, mtu);
}
- if (!ensure_mtu_is_adequate(skb_af, rt_mode, ipvsh, skb, mtu))
+ if (!ensure_mtu_is_adequate(ipvs, skb_af, rt_mode, ipvsh, skb, mtu))
goto err_put;
skb_dst_drop(skb);
@@ -501,6 +506,13 @@ err_put:
return -1;
err_unreach:
+ /* The ip6_link_failure function requires the dev field to be set
+ * in order to get the net (further for the sake of fwmark
+ * reflection).
+ */
+ if (!skb->dev)
+ skb->dev = skb_dst(skb)->dev;
+
dst_link_failure(skb);
return -1;
}
@@ -519,10 +531,27 @@ static inline int ip_vs_tunnel_xmit_prepare(struct sk_buff *skb,
if (ret == NF_ACCEPT) {
nf_reset(skb);
skb_forward_csum(skb);
+ if (!skb->sk)
+ skb_sender_cpu_clear(skb);
}
return ret;
}
+/* In the event of a remote destination, it's possible that we would have
+ * matches against an old socket (particularly a TIME-WAIT socket). This
+ * causes havoc down the line (ip_local_out et. al. expect regular sockets
+ * and invalid memory accesses will happen) so simply drop the association
+ * in this case.
+*/
+static inline void ip_vs_drop_early_demux_sk(struct sk_buff *skb)
+{
+ /* If dev is set, the packet came from the LOCAL_IN callback and
+ * not from a local TCP socket.
+ */
+ if (skb->dev)
+ skb_orphan(skb);
+}
+
/* return NF_STOLEN (sent) or NF_ACCEPT if local=1 (not sent) */
static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb,
struct ip_vs_conn *cp, int local)
@@ -534,12 +563,23 @@ static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb,
ip_vs_notrack(skb);
else
ip_vs_update_conntrack(skb, cp, 1);
+
+ /* Remove the early_demux association unless it's bound for the
+ * exact same port and address on this host after translation.
+ */
+ if (!local || cp->vport != cp->dport ||
+ !ip_vs_addr_equal(cp->af, &cp->vaddr, &cp->daddr))
+ ip_vs_drop_early_demux_sk(skb);
+
if (!local) {
skb_forward_csum(skb);
- NF_HOOK(pf, NF_INET_LOCAL_OUT, NULL, skb,
- NULL, skb_dst(skb)->dev, dst_output_sk);
+ if (!skb->sk)
+ skb_sender_cpu_clear(skb);
+ NF_HOOK(pf, NF_INET_LOCAL_OUT, cp->ipvs->net, NULL, skb,
+ NULL, skb_dst(skb)->dev, dst_output);
} else
ret = NF_ACCEPT;
+
return ret;
}
@@ -553,9 +593,12 @@ static inline int ip_vs_send_or_cont(int pf, struct sk_buff *skb,
if (likely(!(cp->flags & IP_VS_CONN_F_NFCT)))
ip_vs_notrack(skb);
if (!local) {
+ ip_vs_drop_early_demux_sk(skb);
skb_forward_csum(skb);
- NF_HOOK(pf, NF_INET_LOCAL_OUT, NULL, skb,
- NULL, skb_dst(skb)->dev, dst_output_sk);
+ if (!skb->sk)
+ skb_sender_cpu_clear(skb);
+ NF_HOOK(pf, NF_INET_LOCAL_OUT, cp->ipvs->net, NULL, skb,
+ NULL, skb_dst(skb)->dev, dst_output);
} else
ret = NF_ACCEPT;
return ret;
@@ -588,7 +631,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
EnterFunction(10);
rcu_read_lock();
- if (__ip_vs_get_out_rt(cp->af, skb, NULL, iph->daddr,
+ if (__ip_vs_get_out_rt(cp->ipvs, cp->af, skb, NULL, iph->daddr,
IP_VS_RT_MODE_NON_LOCAL, NULL, ipvsh) < 0)
goto tx_error;
@@ -615,10 +658,13 @@ int
ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
{
+ struct ipv6hdr *iph = ipv6_hdr(skb);
+
EnterFunction(10);
rcu_read_lock();
- if (__ip_vs_get_out_rt_v6(cp->af, skb, NULL, &ipvsh->daddr.in6, NULL,
+ if (__ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, NULL,
+ &iph->daddr, NULL,
ipvsh, 0, IP_VS_RT_MODE_NON_LOCAL) < 0)
goto tx_error;
@@ -665,7 +711,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
}
was_input = rt_is_input_route(skb_rtable(skb));
- local = __ip_vs_get_out_rt(cp->af, skb, cp->dest, cp->daddr.ip,
+ local = __ip_vs_get_out_rt(cp->ipvs, cp->af, skb, cp->dest, cp->daddr.ip,
IP_VS_RT_MODE_LOCAL |
IP_VS_RT_MODE_NON_LOCAL |
IP_VS_RT_MODE_RDR, NULL, ipvsh);
@@ -682,7 +728,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
if (ct && !nf_ct_is_untracked(ct)) {
- IP_VS_DBG_RL_PKT(10, AF_INET, pp, skb, 0,
+ IP_VS_DBG_RL_PKT(10, AF_INET, pp, skb, ipvsh->off,
"ip_vs_nat_xmit(): "
"stopping DNAT to local address");
goto tx_error;
@@ -692,8 +738,9 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
/* From world but DNAT to loopback address? */
if (local && ipv4_is_loopback(cp->daddr.ip) && was_input) {
- IP_VS_DBG_RL_PKT(1, AF_INET, pp, skb, 0, "ip_vs_nat_xmit(): "
- "stopping DNAT to loopback address");
+ IP_VS_DBG_RL_PKT(1, AF_INET, pp, skb, ipvsh->off,
+ "ip_vs_nat_xmit(): stopping DNAT to loopback "
+ "address");
goto tx_error;
}
@@ -710,7 +757,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
ip_hdr(skb)->daddr = cp->daddr.ip;
ip_send_check(ip_hdr(skb));
- IP_VS_DBG_PKT(10, AF_INET, pp, skb, 0, "After DNAT");
+ IP_VS_DBG_PKT(10, AF_INET, pp, skb, ipvsh->off, "After DNAT");
/* FIXME: when application helper enlarges the packet and the length
is larger than the MTU of outgoing device, there will be still
@@ -753,7 +800,8 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
}
- local = __ip_vs_get_out_rt_v6(cp->af, skb, cp->dest, &cp->daddr.in6,
+ local = __ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, cp->dest,
+ &cp->daddr.in6,
NULL, ipvsh, 0,
IP_VS_RT_MODE_LOCAL |
IP_VS_RT_MODE_NON_LOCAL |
@@ -771,7 +819,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
if (ct && !nf_ct_is_untracked(ct)) {
- IP_VS_DBG_RL_PKT(10, AF_INET6, pp, skb, 0,
+ IP_VS_DBG_RL_PKT(10, AF_INET6, pp, skb, ipvsh->off,
"ip_vs_nat_xmit_v6(): "
"stopping DNAT to local address");
goto tx_error;
@@ -781,8 +829,8 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
/* From world but DNAT to loopback address? */
if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) &&
- ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) {
- IP_VS_DBG_RL_PKT(1, AF_INET6, pp, skb, 0,
+ ipv6_addr_type(&cp->daddr.in6) & IPV6_ADDR_LOOPBACK) {
+ IP_VS_DBG_RL_PKT(1, AF_INET6, pp, skb, ipvsh->off,
"ip_vs_nat_xmit_v6(): "
"stopping DNAT to loopback address");
goto tx_error;
@@ -800,7 +848,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
goto tx_error;
ipv6_hdr(skb)->daddr = cp->daddr.in6;
- IP_VS_DBG_PKT(10, AF_INET6, pp, skb, 0, "After DNAT");
+ IP_VS_DBG_PKT(10, AF_INET6, pp, skb, ipvsh->off, "After DNAT");
/* FIXME: when application helper enlarges the packet and the length
is larger than the MTU of outgoing device, there will be still
@@ -841,6 +889,8 @@ ip_vs_prepare_tunneled_skb(struct sk_buff *skb, int skb_af,
struct ipv6hdr *old_ipv6h = NULL;
#endif
+ ip_vs_drop_early_demux_sk(skb);
+
if (skb_headroom(skb) < max_headroom || skb_cloned(skb)) {
new_skb = skb_realloc_headroom(skb, max_headroom);
if (!new_skb)
@@ -924,8 +974,8 @@ int
ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
{
- struct net *net = skb_net(skb);
- struct netns_ipvs *ipvs = net_ipvs(net);
+ struct netns_ipvs *ipvs = cp->ipvs;
+ struct net *net = ipvs->net;
struct rtable *rt; /* Route to the other host */
__be32 saddr; /* Source for tunnel */
struct net_device *tdev; /* Device to other host */
@@ -941,7 +991,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
EnterFunction(10);
rcu_read_lock();
- local = __ip_vs_get_out_rt(cp->af, skb, cp->dest, cp->daddr.ip,
+ local = __ip_vs_get_out_rt(ipvs, cp->af, skb, cp->dest, cp->daddr.ip,
IP_VS_RT_MODE_LOCAL |
IP_VS_RT_MODE_NON_LOCAL |
IP_VS_RT_MODE_CONNECT |
@@ -999,7 +1049,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
ret = ip_vs_tunnel_xmit_prepare(skb, cp);
if (ret == NF_ACCEPT)
- ip_local_out(skb);
+ ip_local_out(net, skb->sk, skb);
else if (ret == NF_DROP)
kfree_skb(skb);
rcu_read_unlock();
@@ -1035,7 +1085,8 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
EnterFunction(10);
rcu_read_lock();
- local = __ip_vs_get_out_rt_v6(cp->af, skb, cp->dest, &cp->daddr.in6,
+ local = __ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, cp->dest,
+ &cp->daddr.in6,
&saddr, ipvsh, 1,
IP_VS_RT_MODE_LOCAL |
IP_VS_RT_MODE_NON_LOCAL |
@@ -1090,7 +1141,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
ret = ip_vs_tunnel_xmit_prepare(skb, cp);
if (ret == NF_ACCEPT)
- ip6_local_out(skb);
+ ip6_local_out(cp->ipvs->net, skb->sk, skb);
else if (ret == NF_DROP)
kfree_skb(skb);
rcu_read_unlock();
@@ -1122,7 +1173,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
EnterFunction(10);
rcu_read_lock();
- local = __ip_vs_get_out_rt(cp->af, skb, cp->dest, cp->daddr.ip,
+ local = __ip_vs_get_out_rt(cp->ipvs, cp->af, skb, cp->dest, cp->daddr.ip,
IP_VS_RT_MODE_LOCAL |
IP_VS_RT_MODE_NON_LOCAL |
IP_VS_RT_MODE_KNOWN_NH, NULL, ipvsh);
@@ -1161,10 +1212,12 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
EnterFunction(10);
rcu_read_lock();
- local = __ip_vs_get_out_rt_v6(cp->af, skb, cp->dest, &cp->daddr.in6,
+ local = __ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, cp->dest,
+ &cp->daddr.in6,
NULL, ipvsh, 0,
IP_VS_RT_MODE_LOCAL |
- IP_VS_RT_MODE_NON_LOCAL);
+ IP_VS_RT_MODE_NON_LOCAL |
+ IP_VS_RT_MODE_KNOWN_NH);
if (local < 0)
goto tx_error;
if (local) {
@@ -1229,7 +1282,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |
IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
rcu_read_lock();
- local = __ip_vs_get_out_rt(cp->af, skb, cp->dest, cp->daddr.ip, rt_mode,
+ local = __ip_vs_get_out_rt(cp->ipvs, cp->af, skb, cp->dest, cp->daddr.ip, rt_mode,
NULL, iph);
if (local < 0)
goto tx_error;
@@ -1321,8 +1374,8 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |
IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
rcu_read_lock();
- local = __ip_vs_get_out_rt_v6(cp->af, skb, cp->dest, &cp->daddr.in6,
- NULL, ipvsh, 0, rt_mode);
+ local = __ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, cp->dest,
+ &cp->daddr.in6, NULL, ipvsh, 0, rt_mode);
if (local < 0)
goto tx_error;
rt = (struct rt6_info *) skb_dst(skb);
@@ -1346,7 +1399,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
/* From world but DNAT to loopback address? */
if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) &&
- ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) {
+ ipv6_addr_type(&cp->daddr.in6) & IPV6_ADDR_LOOPBACK) {
IP_VS_DBG(1, "%s(): "
"stopping DNAT to loopback %pI6\n",
__func__, &cp->daddr.in6);