From 9ca8dbcc65cfc63d6f5ef3312a33184e1d726e00 Mon Sep 17 00:00:00 2001 From: Yunhong Jiang Date: Tue, 4 Aug 2015 12:17:53 -0700 Subject: Add the rt linux 4.1.3-rt3 as base Import the rt linux 4.1.3-rt3 as OPNFV kvm base. It's from git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-rt-devel.git linux-4.1.y-rt and the base is: commit 0917f823c59692d751951bf5ea699a2d1e2f26a2 Author: Sebastian Andrzej Siewior Date: Sat Jul 25 12:13:34 2015 +0200 Prepare v4.1.3-rt3 Signed-off-by: Sebastian Andrzej Siewior We lose all the git history this way and it's not good. We should apply another opnfv project repo in future. Change-Id: I87543d81c9df70d99c5001fbdf646b202c19f423 Signed-off-by: Yunhong Jiang --- kernel/net/netfilter/nf_conntrack_ecache.c | 337 +++++++++++++++++++++++++++++ 1 file changed, 337 insertions(+) create mode 100644 kernel/net/netfilter/nf_conntrack_ecache.c (limited to 'kernel/net/netfilter/nf_conntrack_ecache.c') diff --git a/kernel/net/netfilter/nf_conntrack_ecache.c b/kernel/net/netfilter/nf_conntrack_ecache.c new file mode 100644 index 000000000..4e78c57b8 --- /dev/null +++ b/kernel/net/netfilter/nf_conntrack_ecache.c @@ -0,0 +1,337 @@ +/* Event cache for netfilter. */ + +/* + * (C) 2005 Harald Welte + * (C) 2005 Patrick McHardy + * (C) 2005-2006 Netfilter Core Team + * (C) 2005 USAGI/WIDE Project + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +static DEFINE_MUTEX(nf_ct_ecache_mutex); + +#define ECACHE_RETRY_WAIT (HZ/10) + +enum retry_state { + STATE_CONGESTED, + STATE_RESTART, + STATE_DONE, +}; + +static enum retry_state ecache_work_evict_list(struct ct_pcpu *pcpu) +{ + struct nf_conn *refs[16]; + struct nf_conntrack_tuple_hash *h; + struct hlist_nulls_node *n; + unsigned int evicted = 0; + enum retry_state ret = STATE_DONE; + + spin_lock(&pcpu->lock); + + hlist_nulls_for_each_entry(h, n, &pcpu->dying, hnnode) { + struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); + + if (nf_ct_is_dying(ct)) + continue; + + if (nf_conntrack_event(IPCT_DESTROY, ct)) { + ret = STATE_CONGESTED; + break; + } + + /* we've got the event delivered, now it's dying */ + set_bit(IPS_DYING_BIT, &ct->status); + refs[evicted] = ct; + + if (++evicted >= ARRAY_SIZE(refs)) { + ret = STATE_RESTART; + break; + } + } + + spin_unlock(&pcpu->lock); + + /* can't _put while holding lock */ + while (evicted) + nf_ct_put(refs[--evicted]); + + return ret; +} + +static void ecache_work(struct work_struct *work) +{ + struct netns_ct *ctnet = + container_of(work, struct netns_ct, ecache_dwork.work); + int cpu, delay = -1; + struct ct_pcpu *pcpu; + + local_bh_disable(); + + for_each_possible_cpu(cpu) { + enum retry_state ret; + + pcpu = per_cpu_ptr(ctnet->pcpu_lists, cpu); + + ret = ecache_work_evict_list(pcpu); + + switch (ret) { + case STATE_CONGESTED: + delay = ECACHE_RETRY_WAIT; + goto out; + case STATE_RESTART: + delay = 0; + break; + case STATE_DONE: + break; + } + } + + out: + local_bh_enable(); + + ctnet->ecache_dwork_pending = delay > 0; + if (delay >= 0) + schedule_delayed_work(&ctnet->ecache_dwork, delay); +} + +/* deliver cached events and clear cache entry - must be called with locally + * disabled softirqs */ +void nf_ct_deliver_cached_events(struct nf_conn *ct) +{ + struct net *net = nf_ct_net(ct); + unsigned long events, missed; + struct nf_ct_event_notifier *notify; + struct nf_conntrack_ecache *e; + struct nf_ct_event item; + int ret; + + rcu_read_lock(); + notify = rcu_dereference(net->ct.nf_conntrack_event_cb); + if (notify == NULL) + goto out_unlock; + + e = nf_ct_ecache_find(ct); + if (e == NULL) + goto out_unlock; + + events = xchg(&e->cache, 0); + + if (!nf_ct_is_confirmed(ct) || nf_ct_is_dying(ct) || !events) + goto out_unlock; + + /* We make a copy of the missed event cache without taking + * the lock, thus we may send missed events twice. However, + * this does not harm and it happens very rarely. */ + missed = e->missed; + + if (!((events | missed) & e->ctmask)) + goto out_unlock; + + item.ct = ct; + item.portid = 0; + item.report = 0; + + ret = notify->fcn(events | missed, &item); + + if (likely(ret >= 0 && !missed)) + goto out_unlock; + + spin_lock_bh(&ct->lock); + if (ret < 0) + e->missed |= events; + else + e->missed &= ~missed; + spin_unlock_bh(&ct->lock); + +out_unlock: + rcu_read_unlock(); +} +EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events); + +int nf_conntrack_register_notifier(struct net *net, + struct nf_ct_event_notifier *new) +{ + int ret; + struct nf_ct_event_notifier *notify; + + mutex_lock(&nf_ct_ecache_mutex); + notify = rcu_dereference_protected(net->ct.nf_conntrack_event_cb, + lockdep_is_held(&nf_ct_ecache_mutex)); + if (notify != NULL) { + ret = -EBUSY; + goto out_unlock; + } + rcu_assign_pointer(net->ct.nf_conntrack_event_cb, new); + ret = 0; + +out_unlock: + mutex_unlock(&nf_ct_ecache_mutex); + return ret; +} +EXPORT_SYMBOL_GPL(nf_conntrack_register_notifier); + +void nf_conntrack_unregister_notifier(struct net *net, + struct nf_ct_event_notifier *new) +{ + struct nf_ct_event_notifier *notify; + + mutex_lock(&nf_ct_ecache_mutex); + notify = rcu_dereference_protected(net->ct.nf_conntrack_event_cb, + lockdep_is_held(&nf_ct_ecache_mutex)); + BUG_ON(notify != new); + RCU_INIT_POINTER(net->ct.nf_conntrack_event_cb, NULL); + mutex_unlock(&nf_ct_ecache_mutex); +} +EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier); + +int nf_ct_expect_register_notifier(struct net *net, + struct nf_exp_event_notifier *new) +{ + int ret; + struct nf_exp_event_notifier *notify; + + mutex_lock(&nf_ct_ecache_mutex); + notify = rcu_dereference_protected(net->ct.nf_expect_event_cb, + lockdep_is_held(&nf_ct_ecache_mutex)); + if (notify != NULL) { + ret = -EBUSY; + goto out_unlock; + } + rcu_assign_pointer(net->ct.nf_expect_event_cb, new); + ret = 0; + +out_unlock: + mutex_unlock(&nf_ct_ecache_mutex); + return ret; +} +EXPORT_SYMBOL_GPL(nf_ct_expect_register_notifier); + +void nf_ct_expect_unregister_notifier(struct net *net, + struct nf_exp_event_notifier *new) +{ + struct nf_exp_event_notifier *notify; + + mutex_lock(&nf_ct_ecache_mutex); + notify = rcu_dereference_protected(net->ct.nf_expect_event_cb, + lockdep_is_held(&nf_ct_ecache_mutex)); + BUG_ON(notify != new); + RCU_INIT_POINTER(net->ct.nf_expect_event_cb, NULL); + mutex_unlock(&nf_ct_ecache_mutex); +} +EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier); + +#define NF_CT_EVENTS_DEFAULT 1 +static int nf_ct_events __read_mostly = NF_CT_EVENTS_DEFAULT; + +#ifdef CONFIG_SYSCTL +static struct ctl_table event_sysctl_table[] = { + { + .procname = "nf_conntrack_events", + .data = &init_net.ct.sysctl_events, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + {} +}; +#endif /* CONFIG_SYSCTL */ + +static struct nf_ct_ext_type event_extend __read_mostly = { + .len = sizeof(struct nf_conntrack_ecache), + .align = __alignof__(struct nf_conntrack_ecache), + .id = NF_CT_EXT_ECACHE, +}; + +#ifdef CONFIG_SYSCTL +static int nf_conntrack_event_init_sysctl(struct net *net) +{ + struct ctl_table *table; + + table = kmemdup(event_sysctl_table, sizeof(event_sysctl_table), + GFP_KERNEL); + if (!table) + goto out; + + table[0].data = &net->ct.sysctl_events; + + /* Don't export sysctls to unprivileged users */ + if (net->user_ns != &init_user_ns) + table[0].procname = NULL; + + net->ct.event_sysctl_header = + register_net_sysctl(net, "net/netfilter", table); + if (!net->ct.event_sysctl_header) { + printk(KERN_ERR "nf_ct_event: can't register to sysctl.\n"); + goto out_register; + } + return 0; + +out_register: + kfree(table); +out: + return -ENOMEM; +} + +static void nf_conntrack_event_fini_sysctl(struct net *net) +{ + struct ctl_table *table; + + table = net->ct.event_sysctl_header->ctl_table_arg; + unregister_net_sysctl_table(net->ct.event_sysctl_header); + kfree(table); +} +#else +static int nf_conntrack_event_init_sysctl(struct net *net) +{ + return 0; +} + +static void nf_conntrack_event_fini_sysctl(struct net *net) +{ +} +#endif /* CONFIG_SYSCTL */ + +int nf_conntrack_ecache_pernet_init(struct net *net) +{ + net->ct.sysctl_events = nf_ct_events; + INIT_DELAYED_WORK(&net->ct.ecache_dwork, ecache_work); + return nf_conntrack_event_init_sysctl(net); +} + +void nf_conntrack_ecache_pernet_fini(struct net *net) +{ + cancel_delayed_work_sync(&net->ct.ecache_dwork); + nf_conntrack_event_fini_sysctl(net); +} + +int nf_conntrack_ecache_init(void) +{ + int ret = nf_ct_extend_register(&event_extend); + if (ret < 0) + pr_err("nf_ct_event: Unable to register event extension.\n"); + return ret; +} + +void nf_conntrack_ecache_fini(void) +{ + nf_ct_extend_unregister(&event_extend); +} -- cgit 1.2.3-korg