summaryrefslogtreecommitdiffstats
path: root/kernel/net/sched
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/net/sched')
-rw-r--r--kernel/net/sched/Kconfig11
-rw-r--r--kernel/net/sched/Makefile1
-rw-r--r--kernel/net/sched/act_api.c52
-rw-r--r--kernel/net/sched/act_bpf.c93
-rw-r--r--kernel/net/sched/act_connmark.c12
-rw-r--r--kernel/net/sched/act_csum.c3
-rw-r--r--kernel/net/sched/act_gact.c44
-rw-r--r--kernel/net/sched/act_ipt.c3
-rw-r--r--kernel/net/sched/act_mirred.c80
-rw-r--r--kernel/net/sched/act_nat.c10
-rw-r--r--kernel/net/sched/act_pedit.c13
-rw-r--r--kernel/net/sched/act_simple.c3
-rw-r--r--kernel/net/sched/act_skbedit.c3
-rw-r--r--kernel/net/sched/act_vlan.c3
-rw-r--r--kernel/net/sched/cls_bpf.c98
-rw-r--r--kernel/net/sched/cls_cgroup.c23
-rw-r--r--kernel/net/sched/cls_flow.c43
-rw-r--r--kernel/net/sched/cls_flower.c697
-rw-r--r--kernel/net/sched/cls_rsvp.h18
-rw-r--r--kernel/net/sched/cls_tcindex.c29
-rw-r--r--kernel/net/sched/em_ipset.c5
-rw-r--r--kernel/net/sched/em_meta.c138
-rw-r--r--kernel/net/sched/sch_api.c90
-rw-r--r--kernel/net/sched/sch_atm.c2
-rw-r--r--kernel/net/sched/sch_blackhole.c15
-rw-r--r--kernel/net/sched/sch_cbq.c2
-rw-r--r--kernel/net/sched/sch_choke.c94
-rw-r--r--kernel/net/sched/sch_codel.c15
-rw-r--r--kernel/net/sched/sch_drr.c2
-rw-r--r--kernel/net/sched/sch_dsmark.c65
-rw-r--r--kernel/net/sched/sch_fifo.c2
-rw-r--r--kernel/net/sched/sch_fq.c13
-rw-r--r--kernel/net/sched/sch_fq_codel.c61
-rw-r--r--kernel/net/sched/sch_generic.c60
-rw-r--r--kernel/net/sched/sch_gred.c26
-rw-r--r--kernel/net/sched/sch_hfsc.c2
-rw-r--r--kernel/net/sched/sch_hhf.c30
-rw-r--r--kernel/net/sched/sch_htb.c8
-rw-r--r--kernel/net/sched/sch_ingress.c59
-rw-r--r--kernel/net/sched/sch_mq.c4
-rw-r--r--kernel/net/sched/sch_mqprio.c4
-rw-r--r--kernel/net/sched/sch_multiq.c2
-rw-r--r--kernel/net/sched/sch_netem.c4
-rw-r--r--kernel/net/sched/sch_plug.c9
-rw-r--r--kernel/net/sched/sch_prio.c2
-rw-r--r--kernel/net/sched/sch_qfq.c6
-rw-r--r--kernel/net/sched/sch_sfb.c28
-rw-r--r--kernel/net/sched/sch_sfq.c31
48 files changed, 1416 insertions, 602 deletions
diff --git a/kernel/net/sched/Kconfig b/kernel/net/sched/Kconfig
index 2274e723a..daa33432b 100644
--- a/kernel/net/sched/Kconfig
+++ b/kernel/net/sched/Kconfig
@@ -312,6 +312,7 @@ config NET_SCH_PIE
config NET_SCH_INGRESS
tristate "Ingress Qdisc"
depends on NET_CLS_ACT
+ select NET_INGRESS
---help---
Say Y here if you want to use classifiers for incoming packets.
If unsure, say Y.
@@ -477,6 +478,16 @@ config NET_CLS_BPF
To compile this code as a module, choose M here: the module will
be called cls_bpf.
+config NET_CLS_FLOWER
+ tristate "Flower classifier"
+ select NET_CLS
+ ---help---
+ If you say Y here, you will be able to classify packets based on
+ a configurable combination of packet keys and masks.
+
+ To compile this code as a module, choose M here: the module will
+ be called cls_flower.
+
config NET_EMATCH
bool "Extended Matches"
select NET_CLS
diff --git a/kernel/net/sched/Makefile b/kernel/net/sched/Makefile
index 7ca7f4c1b..690c1689e 100644
--- a/kernel/net/sched/Makefile
+++ b/kernel/net/sched/Makefile
@@ -56,6 +56,7 @@ obj-$(CONFIG_NET_CLS_BASIC) += cls_basic.o
obj-$(CONFIG_NET_CLS_FLOW) += cls_flow.o
obj-$(CONFIG_NET_CLS_CGROUP) += cls_cgroup.o
obj-$(CONFIG_NET_CLS_BPF) += cls_bpf.o
+obj-$(CONFIG_NET_CLS_FLOWER) += cls_flower.o
obj-$(CONFIG_NET_EMATCH) += ematch.o
obj-$(CONFIG_NET_EMATCH_CMP) += em_cmp.o
obj-$(CONFIG_NET_EMATCH_NBYTE) += em_nbyte.o
diff --git a/kernel/net/sched/act_api.c b/kernel/net/sched/act_api.c
index f8d9c2a2c..06e7c4a37 100644
--- a/kernel/net/sched/act_api.c
+++ b/kernel/net/sched/act_api.c
@@ -27,7 +27,16 @@
#include <net/act_api.h>
#include <net/netlink.h>
-void tcf_hash_destroy(struct tc_action *a)
+static void free_tcf(struct rcu_head *head)
+{
+ struct tcf_common *p = container_of(head, struct tcf_common, tcfc_rcu);
+
+ free_percpu(p->cpu_bstats);
+ free_percpu(p->cpu_qstats);
+ kfree(p);
+}
+
+static void tcf_hash_destroy(struct tc_action *a)
{
struct tcf_common *p = a->priv;
struct tcf_hashinfo *hinfo = a->ops->hinfo;
@@ -41,9 +50,8 @@ void tcf_hash_destroy(struct tc_action *a)
* gen_estimator est_timer() might access p->tcfc_lock
* or bstats, wait a RCU grace period before freeing p
*/
- kfree_rcu(p, tcfc_rcu);
+ call_rcu(&p->tcfc_rcu, free_tcf);
}
-EXPORT_SYMBOL(tcf_hash_destroy);
int __tcf_hash_release(struct tc_action *a, bool bind, bool strict)
{
@@ -231,15 +239,16 @@ void tcf_hash_cleanup(struct tc_action *a, struct nlattr *est)
if (est)
gen_kill_estimator(&pc->tcfc_bstats,
&pc->tcfc_rate_est);
- kfree_rcu(pc, tcfc_rcu);
+ call_rcu(&pc->tcfc_rcu, free_tcf);
}
EXPORT_SYMBOL(tcf_hash_cleanup);
int tcf_hash_create(u32 index, struct nlattr *est, struct tc_action *a,
- int size, int bind)
+ int size, int bind, bool cpustats)
{
struct tcf_hashinfo *hinfo = a->ops->hinfo;
struct tcf_common *p = kzalloc(size, GFP_KERNEL);
+ int err = -ENOMEM;
if (unlikely(!p))
return -ENOMEM;
@@ -247,18 +256,32 @@ int tcf_hash_create(u32 index, struct nlattr *est, struct tc_action *a,
if (bind)
p->tcfc_bindcnt = 1;
+ if (cpustats) {
+ p->cpu_bstats = netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
+ if (!p->cpu_bstats) {
+err1:
+ kfree(p);
+ return err;
+ }
+ p->cpu_qstats = alloc_percpu(struct gnet_stats_queue);
+ if (!p->cpu_qstats) {
+err2:
+ free_percpu(p->cpu_bstats);
+ goto err1;
+ }
+ }
spin_lock_init(&p->tcfc_lock);
INIT_HLIST_NODE(&p->tcfc_head);
p->tcfc_index = index ? index : tcf_hash_new_index(hinfo);
p->tcfc_tm.install = jiffies;
p->tcfc_tm.lastuse = jiffies;
if (est) {
- int err = gen_new_estimator(&p->tcfc_bstats, NULL,
- &p->tcfc_rate_est,
- &p->tcfc_lock, est);
+ err = gen_new_estimator(&p->tcfc_bstats, p->cpu_bstats,
+ &p->tcfc_rate_est,
+ &p->tcfc_lock, est);
if (err) {
- kfree(p);
- return err;
+ free_percpu(p->cpu_qstats);
+ goto err2;
}
}
@@ -393,11 +416,6 @@ int tcf_action_exec(struct sk_buff *skb, const struct list_head *actions,
list_for_each_entry(a, actions, list) {
repeat:
ret = a->ops->act(skb, a, res);
- if (TC_MUNGED & skb->tc_verd) {
- /* copied already, allow trampling */
- skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
- skb->tc_verd = CLR_TC_MUNGED(skb->tc_verd);
- }
if (ret == TC_ACT_REPEAT)
goto repeat; /* we need a ttl - JHS */
if (ret != TC_ACT_PIPE)
@@ -621,10 +639,10 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *a,
if (err < 0)
goto errout;
- if (gnet_stats_copy_basic(&d, NULL, &p->tcfc_bstats) < 0 ||
+ if (gnet_stats_copy_basic(&d, p->cpu_bstats, &p->tcfc_bstats) < 0 ||
gnet_stats_copy_rate_est(&d, &p->tcfc_bstats,
&p->tcfc_rate_est) < 0 ||
- gnet_stats_copy_queue(&d, NULL,
+ gnet_stats_copy_queue(&d, p->cpu_qstats,
&p->tcfc_qstats,
p->tcfc_qstats.qlen) < 0)
goto errout;
diff --git a/kernel/net/sched/act_bpf.c b/kernel/net/sched/act_bpf.c
index 521ffca91..0bc6f912f 100644
--- a/kernel/net/sched/act_bpf.c
+++ b/kernel/net/sched/act_bpf.c
@@ -37,19 +37,25 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act,
struct tcf_result *res)
{
struct tcf_bpf *prog = act->priv;
+ struct bpf_prog *filter;
int action, filter_res;
+ bool at_ingress = G_TC_AT(skb->tc_verd) & AT_INGRESS;
if (unlikely(!skb_mac_header_was_set(skb)))
return TC_ACT_UNSPEC;
- spin_lock(&prog->tcf_lock);
-
- prog->tcf_tm.lastuse = jiffies;
- bstats_update(&prog->tcf_bstats, skb);
+ tcf_lastuse_update(&prog->tcf_tm);
+ bstats_cpu_update(this_cpu_ptr(prog->common.cpu_bstats), skb);
- /* Needed here for accessing maps. */
rcu_read_lock();
- filter_res = BPF_PROG_RUN(prog->filter, skb);
+ filter = rcu_dereference(prog->filter);
+ if (at_ingress) {
+ __skb_push(skb, skb->mac_len);
+ filter_res = BPF_PROG_RUN(filter, skb);
+ __skb_pull(skb, skb->mac_len);
+ } else {
+ filter_res = BPF_PROG_RUN(filter, skb);
+ }
rcu_read_unlock();
/* A BPF program may overwrite the default action opcode.
@@ -66,11 +72,12 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act,
case TC_ACT_PIPE:
case TC_ACT_RECLASSIFY:
case TC_ACT_OK:
+ case TC_ACT_REDIRECT:
action = filter_res;
break;
case TC_ACT_SHOT:
action = filter_res;
- prog->tcf_qstats.drops++;
+ qstats_drop_inc(this_cpu_ptr(prog->common.cpu_qstats));
break;
case TC_ACT_UNSPEC:
action = prog->tcf_action;
@@ -80,7 +87,6 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act,
break;
}
- spin_unlock(&prog->tcf_lock);
return action;
}
@@ -256,7 +262,10 @@ static void tcf_bpf_prog_fill_cfg(const struct tcf_bpf *prog,
struct tcf_bpf_cfg *cfg)
{
cfg->is_ebpf = tcf_bpf_is_ebpf(prog);
- cfg->filter = prog->filter;
+ /* updates to prog->filter are prevented, since it's called either
+ * with rtnl lock or during final cleanup in rcu callback
+ */
+ cfg->filter = rcu_dereference_protected(prog->filter, 1);
cfg->bpf_ops = prog->bpf_ops;
cfg->bpf_name = prog->bpf_name;
@@ -271,7 +280,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
struct tc_act_bpf *parm;
struct tcf_bpf *prog;
bool is_bpf, is_ebpf;
- int ret;
+ int ret, res = 0;
if (!nla)
return -EINVAL;
@@ -280,45 +289,47 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
if (ret < 0)
return ret;
- is_bpf = tb[TCA_ACT_BPF_OPS_LEN] && tb[TCA_ACT_BPF_OPS];
- is_ebpf = tb[TCA_ACT_BPF_FD];
-
- if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf) ||
- !tb[TCA_ACT_BPF_PARMS])
+ if (!tb[TCA_ACT_BPF_PARMS])
return -EINVAL;
parm = nla_data(tb[TCA_ACT_BPF_PARMS]);
- memset(&cfg, 0, sizeof(cfg));
-
- ret = is_bpf ? tcf_bpf_init_from_ops(tb, &cfg) :
- tcf_bpf_init_from_efd(tb, &cfg);
- if (ret < 0)
- return ret;
-
if (!tcf_hash_check(parm->index, act, bind)) {
ret = tcf_hash_create(parm->index, est, act,
- sizeof(*prog), bind);
+ sizeof(*prog), bind, true);
if (ret < 0)
- goto destroy_fp;
+ return ret;
- ret = ACT_P_CREATED;
+ res = ACT_P_CREATED;
} else {
/* Don't override defaults. */
if (bind)
- goto destroy_fp;
+ return 0;
tcf_hash_release(act, bind);
- if (!replace) {
- ret = -EEXIST;
- goto destroy_fp;
- }
+ if (!replace)
+ return -EEXIST;
}
+ is_bpf = tb[TCA_ACT_BPF_OPS_LEN] && tb[TCA_ACT_BPF_OPS];
+ is_ebpf = tb[TCA_ACT_BPF_FD];
+
+ if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ memset(&cfg, 0, sizeof(cfg));
+
+ ret = is_bpf ? tcf_bpf_init_from_ops(tb, &cfg) :
+ tcf_bpf_init_from_efd(tb, &cfg);
+ if (ret < 0)
+ goto out;
+
prog = to_bpf(act);
- spin_lock_bh(&prog->tcf_lock);
+ ASSERT_RTNL();
- if (ret != ACT_P_CREATED)
+ if (res != ACT_P_CREATED)
tcf_bpf_prog_fill_cfg(prog, &old);
prog->bpf_ops = cfg.bpf_ops;
@@ -330,19 +341,21 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
prog->bpf_fd = cfg.bpf_fd;
prog->tcf_action = parm->action;
- prog->filter = cfg.filter;
-
- spin_unlock_bh(&prog->tcf_lock);
+ rcu_assign_pointer(prog->filter, cfg.filter);
- if (ret == ACT_P_CREATED)
+ if (res == ACT_P_CREATED) {
tcf_hash_insert(act);
- else
+ } else {
+ /* make sure the program being replaced is no longer executing */
+ synchronize_rcu();
tcf_bpf_cfg_cleanup(&old);
+ }
- return ret;
+ return res;
+out:
+ if (res == ACT_P_CREATED)
+ tcf_hash_cleanup(act, est);
-destroy_fp:
- tcf_bpf_cfg_cleanup(&cfg);
return ret;
}
diff --git a/kernel/net/sched/act_connmark.c b/kernel/net/sched/act_connmark.c
index 295d14bd6..bb41699c6 100644
--- a/kernel/net/sched/act_connmark.c
+++ b/kernel/net/sched/act_connmark.c
@@ -37,6 +37,7 @@ static int tcf_connmark(struct sk_buff *skb, const struct tc_action *a,
struct nf_conntrack_tuple tuple;
enum ip_conntrack_info ctinfo;
struct tcf_connmark_info *ca = a->priv;
+ struct nf_conntrack_zone zone;
struct nf_conn *c;
int proto;
@@ -67,10 +68,13 @@ static int tcf_connmark(struct sk_buff *skb, const struct tc_action *a,
}
if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb),
- proto, &tuple))
+ proto, ca->net, &tuple))
goto out;
- thash = nf_conntrack_find_get(dev_net(skb->dev), ca->zone, &tuple);
+ zone.id = ca->zone;
+ zone.dir = NF_CT_DEFAULT_ZONE_DIR;
+
+ thash = nf_conntrack_find_get(ca->net, &zone, &tuple);
if (!thash)
goto out;
@@ -108,12 +112,14 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
parm = nla_data(tb[TCA_CONNMARK_PARMS]);
if (!tcf_hash_check(parm->index, a, bind)) {
- ret = tcf_hash_create(parm->index, est, a, sizeof(*ci), bind);
+ ret = tcf_hash_create(parm->index, est, a, sizeof(*ci),
+ bind, false);
if (ret)
return ret;
ci = to_connmark(a);
ci->tcf_action = parm->action;
+ ci->net = net;
ci->zone = parm->zone;
tcf_hash_insert(a);
diff --git a/kernel/net/sched/act_csum.c b/kernel/net/sched/act_csum.c
index 4cd5cf1ae..b07c535ba 100644
--- a/kernel/net/sched/act_csum.c
+++ b/kernel/net/sched/act_csum.c
@@ -62,7 +62,8 @@ static int tcf_csum_init(struct net *n, struct nlattr *nla, struct nlattr *est,
parm = nla_data(tb[TCA_CSUM_PARMS]);
if (!tcf_hash_check(parm->index, a, bind)) {
- ret = tcf_hash_create(parm->index, est, a, sizeof(*p), bind);
+ ret = tcf_hash_create(parm->index, est, a, sizeof(*p),
+ bind, false);
if (ret)
return ret;
ret = ACT_P_CREATED;
diff --git a/kernel/net/sched/act_gact.c b/kernel/net/sched/act_gact.c
index 7fffc2272..5c1b05170 100644
--- a/kernel/net/sched/act_gact.c
+++ b/kernel/net/sched/act_gact.c
@@ -28,14 +28,18 @@
#ifdef CONFIG_GACT_PROB
static int gact_net_rand(struct tcf_gact *gact)
{
- if (!gact->tcfg_pval || prandom_u32() % gact->tcfg_pval)
+ smp_rmb(); /* coupled with smp_wmb() in tcf_gact_init() */
+ if (prandom_u32() % gact->tcfg_pval)
return gact->tcf_action;
return gact->tcfg_paction;
}
static int gact_determ(struct tcf_gact *gact)
{
- if (!gact->tcfg_pval || gact->tcf_bstats.packets % gact->tcfg_pval)
+ u32 pack = atomic_inc_return(&gact->packets);
+
+ smp_rmb(); /* coupled with smp_wmb() in tcf_gact_init() */
+ if (pack % gact->tcfg_pval)
return gact->tcf_action;
return gact->tcfg_paction;
}
@@ -85,7 +89,8 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla,
#endif
if (!tcf_hash_check(parm->index, a, bind)) {
- ret = tcf_hash_create(parm->index, est, a, sizeof(*gact), bind);
+ ret = tcf_hash_create(parm->index, est, a, sizeof(*gact),
+ bind, true);
if (ret)
return ret;
ret = ACT_P_CREATED;
@@ -99,16 +104,19 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla,
gact = to_gact(a);
- spin_lock_bh(&gact->tcf_lock);
+ ASSERT_RTNL();
gact->tcf_action = parm->action;
#ifdef CONFIG_GACT_PROB
if (p_parm) {
gact->tcfg_paction = p_parm->paction;
- gact->tcfg_pval = p_parm->pval;
+ gact->tcfg_pval = max_t(u16, 1, p_parm->pval);
+ /* Make sure tcfg_pval is written before tcfg_ptype
+ * coupled with smp_rmb() in gact_net_rand() & gact_determ()
+ */
+ smp_wmb();
gact->tcfg_ptype = p_parm->ptype;
}
#endif
- spin_unlock_bh(&gact->tcf_lock);
if (ret == ACT_P_CREATED)
tcf_hash_insert(a);
return ret;
@@ -118,23 +126,21 @@ static int tcf_gact(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
{
struct tcf_gact *gact = a->priv;
- int action = TC_ACT_SHOT;
+ int action = READ_ONCE(gact->tcf_action);
- spin_lock(&gact->tcf_lock);
#ifdef CONFIG_GACT_PROB
- if (gact->tcfg_ptype)
- action = gact_rand[gact->tcfg_ptype](gact);
- else
- action = gact->tcf_action;
-#else
- action = gact->tcf_action;
+ {
+ u32 ptype = READ_ONCE(gact->tcfg_ptype);
+
+ if (ptype)
+ action = gact_rand[ptype](gact);
+ }
#endif
- gact->tcf_bstats.bytes += qdisc_pkt_len(skb);
- gact->tcf_bstats.packets++;
+ bstats_cpu_update(this_cpu_ptr(gact->common.cpu_bstats), skb);
if (action == TC_ACT_SHOT)
- gact->tcf_qstats.drops++;
- gact->tcf_tm.lastuse = jiffies;
- spin_unlock(&gact->tcf_lock);
+ qstats_drop_inc(this_cpu_ptr(gact->common.cpu_qstats));
+
+ tcf_lastuse_update(&gact->tcf_tm);
return action;
}
diff --git a/kernel/net/sched/act_ipt.c b/kernel/net/sched/act_ipt.c
index cbc8dd7dd..d05869646 100644
--- a/kernel/net/sched/act_ipt.c
+++ b/kernel/net/sched/act_ipt.c
@@ -114,7 +114,7 @@ static int tcf_ipt_init(struct net *net, struct nlattr *nla, struct nlattr *est,
index = nla_get_u32(tb[TCA_IPT_INDEX]);
if (!tcf_hash_check(index, a, bind) ) {
- ret = tcf_hash_create(index, est, a, sizeof(*ipt), bind);
+ ret = tcf_hash_create(index, est, a, sizeof(*ipt), bind, false);
if (ret)
return ret;
ret = ACT_P_CREATED;
@@ -189,6 +189,7 @@ static int tcf_ipt(struct sk_buff *skb, const struct tc_action *a,
* worry later - danger - this API seems to have changed
* from earlier kernels
*/
+ par.net = dev_net(skb->dev);
par.in = skb->dev;
par.out = NULL;
par.hooknum = ipt->tcfi_hook;
diff --git a/kernel/net/sched/act_mirred.c b/kernel/net/sched/act_mirred.c
index 3f63ceac8..32fcdecdb 100644
--- a/kernel/net/sched/act_mirred.c
+++ b/kernel/net/sched/act_mirred.c
@@ -31,13 +31,19 @@
#define MIRRED_TAB_MASK 7
static LIST_HEAD(mirred_list);
+static DEFINE_SPINLOCK(mirred_list_lock);
static void tcf_mirred_release(struct tc_action *a, int bind)
{
struct tcf_mirred *m = to_mirred(a);
+ struct net_device *dev = rcu_dereference_protected(m->tcfm_dev, 1);
+
+ /* We could be called either in a RCU callback or with RTNL lock held. */
+ spin_lock_bh(&mirred_list_lock);
list_del(&m->tcfm_list);
- if (m->tcfm_dev)
- dev_put(m->tcfm_dev);
+ spin_unlock_bh(&mirred_list_lock);
+ if (dev)
+ dev_put(dev);
}
static const struct nla_policy mirred_policy[TCA_MIRRED_MAX + 1] = {
@@ -93,32 +99,37 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
if (!tcf_hash_check(parm->index, a, bind)) {
if (dev == NULL)
return -EINVAL;
- ret = tcf_hash_create(parm->index, est, a, sizeof(*m), bind);
+ ret = tcf_hash_create(parm->index, est, a, sizeof(*m),
+ bind, true);
if (ret)
return ret;
ret = ACT_P_CREATED;
} else {
- if (!ovr) {
- tcf_hash_release(a, bind);
+ if (bind)
+ return 0;
+
+ tcf_hash_release(a, bind);
+ if (!ovr)
return -EEXIST;
- }
}
m = to_mirred(a);
- spin_lock_bh(&m->tcf_lock);
+ ASSERT_RTNL();
m->tcf_action = parm->action;
m->tcfm_eaction = parm->eaction;
if (dev != NULL) {
m->tcfm_ifindex = parm->ifindex;
if (ret != ACT_P_CREATED)
- dev_put(m->tcfm_dev);
+ dev_put(rcu_dereference_protected(m->tcfm_dev, 1));
dev_hold(dev);
- m->tcfm_dev = dev;
+ rcu_assign_pointer(m->tcfm_dev, dev);
m->tcfm_ok_push = ok_push;
}
- spin_unlock_bh(&m->tcf_lock);
+
if (ret == ACT_P_CREATED) {
+ spin_lock_bh(&mirred_list_lock);
list_add(&m->tcfm_list, &mirred_list);
+ spin_unlock_bh(&mirred_list_lock);
tcf_hash_insert(a);
}
@@ -131,28 +142,30 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
struct tcf_mirred *m = a->priv;
struct net_device *dev;
struct sk_buff *skb2;
+ int retval, err;
u32 at;
- int retval, err = 1;
- spin_lock(&m->tcf_lock);
- m->tcf_tm.lastuse = jiffies;
- bstats_update(&m->tcf_bstats, skb);
+ tcf_lastuse_update(&m->tcf_tm);
+
+ bstats_cpu_update(this_cpu_ptr(m->common.cpu_bstats), skb);
- dev = m->tcfm_dev;
- if (!dev) {
- printk_once(KERN_NOTICE "tc mirred: target device is gone\n");
+ rcu_read_lock();
+ retval = READ_ONCE(m->tcf_action);
+ dev = rcu_dereference(m->tcfm_dev);
+ if (unlikely(!dev)) {
+ pr_notice_once("tc mirred: target device is gone\n");
goto out;
}
- if (!(dev->flags & IFF_UP)) {
+ if (unlikely(!(dev->flags & IFF_UP))) {
net_notice_ratelimited("tc mirred to Houston: device %s is down\n",
dev->name);
goto out;
}
at = G_TC_AT(skb->tc_verd);
- skb2 = skb_act_clone(skb, GFP_ATOMIC, m->tcf_action);
- if (skb2 == NULL)
+ skb2 = skb_clone(skb, GFP_ATOMIC);
+ if (!skb2)
goto out;
if (!(at & AT_EGRESS)) {
@@ -166,18 +179,16 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
skb2->skb_iif = skb->dev->ifindex;
skb2->dev = dev;
+ skb_sender_cpu_clear(skb2);
err = dev_queue_xmit(skb2);
-out:
if (err) {
- m->tcf_qstats.overlimits++;
+out:
+ qstats_overlimit_inc(this_cpu_ptr(m->common.cpu_qstats));
if (m->tcfm_eaction != TCA_EGRESS_MIRROR)
retval = TC_ACT_SHOT;
- else
- retval = m->tcf_action;
- } else
- retval = m->tcf_action;
- spin_unlock(&m->tcf_lock);
+ }
+ rcu_read_unlock();
return retval;
}
@@ -216,15 +227,20 @@ static int mirred_device_event(struct notifier_block *unused,
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct tcf_mirred *m;
- if (event == NETDEV_UNREGISTER)
+ ASSERT_RTNL();
+ if (event == NETDEV_UNREGISTER) {
+ spin_lock_bh(&mirred_list_lock);
list_for_each_entry(m, &mirred_list, tcfm_list) {
- spin_lock_bh(&m->tcf_lock);
- if (m->tcfm_dev == dev) {
+ if (rcu_access_pointer(m->tcfm_dev) == dev) {
dev_put(dev);
- m->tcfm_dev = NULL;
+ /* Note : no rcu grace period necessary, as
+ * net_device are already rcu protected.
+ */
+ RCU_INIT_POINTER(m->tcfm_dev, NULL);
}
- spin_unlock_bh(&m->tcf_lock);
}
+ spin_unlock_bh(&mirred_list_lock);
+ }
return NOTIFY_DONE;
}
diff --git a/kernel/net/sched/act_nat.c b/kernel/net/sched/act_nat.c
index 270a030d5..b7c4ead8b 100644
--- a/kernel/net/sched/act_nat.c
+++ b/kernel/net/sched/act_nat.c
@@ -55,7 +55,8 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
parm = nla_data(tb[TCA_NAT_PARMS]);
if (!tcf_hash_check(parm->index, a, bind)) {
- ret = tcf_hash_create(parm->index, est, a, sizeof(*p), bind);
+ ret = tcf_hash_create(parm->index, est, a, sizeof(*p),
+ bind, false);
if (ret)
return ret;
ret = ACT_P_CREATED;
@@ -161,7 +162,8 @@ static int tcf_nat(struct sk_buff *skb, const struct tc_action *a,
goto drop;
tcph = (void *)(skb_network_header(skb) + ihl);
- inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, 1);
+ inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr,
+ true);
break;
}
case IPPROTO_UDP:
@@ -177,7 +179,7 @@ static int tcf_nat(struct sk_buff *skb, const struct tc_action *a,
udph = (void *)(skb_network_header(skb) + ihl);
if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
inet_proto_csum_replace4(&udph->check, skb, addr,
- new_addr, 1);
+ new_addr, true);
if (!udph->check)
udph->check = CSUM_MANGLED_0;
}
@@ -230,7 +232,7 @@ static int tcf_nat(struct sk_buff *skb, const struct tc_action *a,
iph->saddr = new_addr;
inet_proto_csum_replace4(&icmph->checksum, skb, addr, new_addr,
- 0);
+ false);
break;
}
default:
diff --git a/kernel/net/sched/act_pedit.c b/kernel/net/sched/act_pedit.c
index 59649d588..e38a7701f 100644
--- a/kernel/net/sched/act_pedit.c
+++ b/kernel/net/sched/act_pedit.c
@@ -57,7 +57,8 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
if (!tcf_hash_check(parm->index, a, bind)) {
if (!parm->nkeys)
return -EINVAL;
- ret = tcf_hash_create(parm->index, est, a, sizeof(*p), bind);
+ ret = tcf_hash_create(parm->index, est, a, sizeof(*p),
+ bind, false);
if (ret)
return ret;
p = to_pedit(a);
@@ -68,13 +69,12 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
}
ret = ACT_P_CREATED;
} else {
- p = to_pedit(a);
- tcf_hash_release(a, bind);
if (bind)
return 0;
+ tcf_hash_release(a, bind);
if (!ovr)
return -EEXIST;
-
+ p = to_pedit(a);
if (p->tcfp_nkeys && p->tcfp_nkeys != parm->nkeys) {
keys = kmalloc(ksize, GFP_KERNEL);
if (keys == NULL)
@@ -108,7 +108,7 @@ static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
{
struct tcf_pedit *p = a->priv;
- int i, munged = 0;
+ int i;
unsigned int off;
if (skb_unclone(skb, GFP_ATOMIC))
@@ -156,11 +156,8 @@ static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a,
*ptr = ((*ptr & tkey->mask) ^ tkey->val);
if (ptr == &_data)
skb_store_bits(skb, off + offset, ptr, 4);
- munged++;
}
- if (munged)
- skb->tc_verd = SET_TC_MUNGED(skb->tc_verd);
goto done;
} else
WARN(1, "pedit BUG: index %d\n", p->tcf_index);
diff --git a/kernel/net/sched/act_simple.c b/kernel/net/sched/act_simple.c
index 6a8d94886..d6b708d6a 100644
--- a/kernel/net/sched/act_simple.c
+++ b/kernel/net/sched/act_simple.c
@@ -103,7 +103,8 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
defdata = nla_data(tb[TCA_DEF_DATA]);
if (!tcf_hash_check(parm->index, a, bind)) {
- ret = tcf_hash_create(parm->index, est, a, sizeof(*d), bind);
+ ret = tcf_hash_create(parm->index, est, a, sizeof(*d),
+ bind, false);
if (ret)
return ret;
diff --git a/kernel/net/sched/act_skbedit.c b/kernel/net/sched/act_skbedit.c
index fcfeeaf83..6751b5f8c 100644
--- a/kernel/net/sched/act_skbedit.c
+++ b/kernel/net/sched/act_skbedit.c
@@ -99,7 +99,8 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
parm = nla_data(tb[TCA_SKBEDIT_PARMS]);
if (!tcf_hash_check(parm->index, a, bind)) {
- ret = tcf_hash_create(parm->index, est, a, sizeof(*d), bind);
+ ret = tcf_hash_create(parm->index, est, a, sizeof(*d),
+ bind, false);
if (ret)
return ret;
diff --git a/kernel/net/sched/act_vlan.c b/kernel/net/sched/act_vlan.c
index d735ecf0b..796785e0b 100644
--- a/kernel/net/sched/act_vlan.c
+++ b/kernel/net/sched/act_vlan.c
@@ -116,7 +116,8 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
action = parm->v_action;
if (!tcf_hash_check(parm->index, a, bind)) {
- ret = tcf_hash_create(parm->index, est, a, sizeof(*v), bind);
+ ret = tcf_hash_create(parm->index, est, a, sizeof(*v),
+ bind, false);
if (ret)
return ret;
diff --git a/kernel/net/sched/cls_bpf.c b/kernel/net/sched/cls_bpf.c
index c0b86f2bf..5faaa5425 100644
--- a/kernel/net/sched/cls_bpf.c
+++ b/kernel/net/sched/cls_bpf.c
@@ -38,6 +38,7 @@ struct cls_bpf_prog {
struct bpf_prog *filter;
struct list_head link;
struct tcf_result res;
+ bool exts_integrated;
struct tcf_exts exts;
u32 handle;
union {
@@ -52,6 +53,7 @@ struct cls_bpf_prog {
static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = {
[TCA_BPF_CLASSID] = { .type = NLA_U32 },
+ [TCA_BPF_FLAGS] = { .type = NLA_U32 },
[TCA_BPF_FD] = { .type = NLA_U32 },
[TCA_BPF_NAME] = { .type = NLA_NUL_STRING, .len = CLS_BPF_NAME_LEN },
[TCA_BPF_OPS_LEN] = { .type = NLA_U16 },
@@ -59,11 +61,30 @@ static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = {
.len = sizeof(struct sock_filter) * BPF_MAXINSNS },
};
+static int cls_bpf_exec_opcode(int code)
+{
+ switch (code) {
+ case TC_ACT_OK:
+ case TC_ACT_SHOT:
+ case TC_ACT_STOLEN:
+ case TC_ACT_REDIRECT:
+ case TC_ACT_UNSPEC:
+ return code;
+ default:
+ return TC_ACT_UNSPEC;
+ }
+}
+
static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res)
{
struct cls_bpf_head *head = rcu_dereference_bh(tp->root);
struct cls_bpf_prog *prog;
+#ifdef CONFIG_NET_CLS_ACT
+ bool at_ingress = G_TC_AT(skb->tc_verd) & AT_INGRESS;
+#else
+ bool at_ingress = false;
+#endif
int ret = -1;
if (unlikely(!skb_mac_header_was_set(skb)))
@@ -72,7 +93,28 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
/* Needed here for accessing maps. */
rcu_read_lock();
list_for_each_entry_rcu(prog, &head->plist, link) {
- int filter_res = BPF_PROG_RUN(prog->filter, skb);
+ int filter_res;
+
+ qdisc_skb_cb(skb)->tc_classid = prog->res.classid;
+
+ if (at_ingress) {
+ /* It is safe to push/pull even if skb_shared() */
+ __skb_push(skb, skb->mac_len);
+ filter_res = BPF_PROG_RUN(prog->filter, skb);
+ __skb_pull(skb, skb->mac_len);
+ } else {
+ filter_res = BPF_PROG_RUN(prog->filter, skb);
+ }
+
+ if (prog->exts_integrated) {
+ res->class = prog->res.class;
+ res->classid = qdisc_skb_cb(skb)->tc_classid;
+
+ ret = cls_bpf_exec_opcode(filter_res);
+ if (ret == TC_ACT_UNSPEC)
+ continue;
+ break;
+ }
if (filter_res == 0)
continue;
@@ -181,8 +223,7 @@ static unsigned long cls_bpf_get(struct tcf_proto *tp, u32 handle)
return ret;
}
-static int cls_bpf_prog_from_ops(struct nlattr **tb,
- struct cls_bpf_prog *prog, u32 classid)
+static int cls_bpf_prog_from_ops(struct nlattr **tb, struct cls_bpf_prog *prog)
{
struct sock_filter *bpf_ops;
struct sock_fprog_kern fprog_tmp;
@@ -216,15 +257,13 @@ static int cls_bpf_prog_from_ops(struct nlattr **tb,
prog->bpf_ops = bpf_ops;
prog->bpf_num_ops = bpf_num_ops;
prog->bpf_name = NULL;
-
prog->filter = fp;
- prog->res.classid = classid;
return 0;
}
-static int cls_bpf_prog_from_efd(struct nlattr **tb,
- struct cls_bpf_prog *prog, u32 classid)
+static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog,
+ const struct tcf_proto *tp)
{
struct bpf_prog *fp;
char *name = NULL;
@@ -254,9 +293,10 @@ static int cls_bpf_prog_from_efd(struct nlattr **tb,
prog->bpf_ops = NULL;
prog->bpf_fd = bpf_fd;
prog->bpf_name = name;
-
prog->filter = fp;
- prog->res.classid = classid;
+
+ if (fp->dst_needed)
+ netif_keep_dst(qdisc_dev(tp->q));
return 0;
}
@@ -266,16 +306,13 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
unsigned long base, struct nlattr **tb,
struct nlattr *est, bool ovr)
{
+ bool is_bpf, is_ebpf, have_exts = false;
struct tcf_exts exts;
- bool is_bpf, is_ebpf;
- u32 classid;
int ret;
is_bpf = tb[TCA_BPF_OPS_LEN] && tb[TCA_BPF_OPS];
is_ebpf = tb[TCA_BPF_FD];
-
- if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf) ||
- !tb[TCA_BPF_CLASSID])
+ if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf))
return -EINVAL;
tcf_exts_init(&exts, TCA_BPF_ACT, TCA_BPF_POLICE);
@@ -283,18 +320,32 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
if (ret < 0)
return ret;
- classid = nla_get_u32(tb[TCA_BPF_CLASSID]);
+ if (tb[TCA_BPF_FLAGS]) {
+ u32 bpf_flags = nla_get_u32(tb[TCA_BPF_FLAGS]);
+
+ if (bpf_flags & ~TCA_BPF_FLAG_ACT_DIRECT) {
+ tcf_exts_destroy(&exts);
+ return -EINVAL;
+ }
+
+ have_exts = bpf_flags & TCA_BPF_FLAG_ACT_DIRECT;
+ }
+
+ prog->exts_integrated = have_exts;
- ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog, classid) :
- cls_bpf_prog_from_efd(tb, prog, classid);
+ ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog) :
+ cls_bpf_prog_from_efd(tb, prog, tp);
if (ret < 0) {
tcf_exts_destroy(&exts);
return ret;
}
- tcf_bind_filter(tp, &prog->res, base);
- tcf_exts_change(tp, &prog->exts, &exts);
+ if (tb[TCA_BPF_CLASSID]) {
+ prog->res.classid = nla_get_u32(tb[TCA_BPF_CLASSID]);
+ tcf_bind_filter(tp, &prog->res, base);
+ }
+ tcf_exts_change(tp, &prog->exts, &exts);
return 0;
}
@@ -415,6 +466,7 @@ static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
{
struct cls_bpf_prog *prog = (struct cls_bpf_prog *) fh;
struct nlattr *nest;
+ u32 bpf_flags = 0;
int ret;
if (prog == NULL)
@@ -426,7 +478,8 @@ static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
if (nest == NULL)
goto nla_put_failure;
- if (nla_put_u32(skb, TCA_BPF_CLASSID, prog->res.classid))
+ if (prog->res.classid &&
+ nla_put_u32(skb, TCA_BPF_CLASSID, prog->res.classid))
goto nla_put_failure;
if (cls_bpf_is_ebpf(prog))
@@ -439,6 +492,11 @@ static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
if (tcf_exts_dump(skb, &prog->exts) < 0)
goto nla_put_failure;
+ if (prog->exts_integrated)
+ bpf_flags |= TCA_BPF_FLAG_ACT_DIRECT;
+ if (bpf_flags && nla_put_u32(skb, TCA_BPF_FLAGS, bpf_flags))
+ goto nla_put_failure;
+
nla_nest_end(skb, nest);
if (tcf_exts_dump_stats(skb, &prog->exts) < 0)
diff --git a/kernel/net/sched/cls_cgroup.c b/kernel/net/sched/cls_cgroup.c
index ea611b216..4c85bd3a7 100644
--- a/kernel/net/sched/cls_cgroup.c
+++ b/kernel/net/sched/cls_cgroup.c
@@ -30,35 +30,16 @@ static int cls_cgroup_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res)
{
struct cls_cgroup_head *head = rcu_dereference_bh(tp->root);
- u32 classid;
-
- classid = task_cls_state(current)->classid;
-
- /*
- * Due to the nature of the classifier it is required to ignore all
- * packets originating from softirq context as accessing `current'
- * would lead to false results.
- *
- * This test assumes that all callers of dev_queue_xmit() explicitely
- * disable bh. Knowing this, it is possible to detect softirq based
- * calls by looking at the number of nested bh disable calls because
- * softirqs always disables bh.
- */
- if (in_serving_softirq()) {
- /* If there is an sk_classid we'll use that. */
- if (!skb->sk)
- return -1;
- classid = skb->sk->sk_classid;
- }
+ u32 classid = task_get_classid(skb);
if (!classid)
return -1;
-
if (!tcf_em_tree_match(skb, &head->ematches, NULL))
return -1;
res->classid = classid;
res->class = 0;
+
return tcf_exts_exec(skb, &head->exts, res);
}
diff --git a/kernel/net/sched/cls_flow.c b/kernel/net/sched/cls_flow.c
index 75df923f5..fbfec6a18 100644
--- a/kernel/net/sched/cls_flow.c
+++ b/kernel/net/sched/cls_flow.c
@@ -22,11 +22,12 @@
#include <linux/if_vlan.h>
#include <linux/slab.h>
#include <linux/module.h>
+#include <net/inet_sock.h>
#include <net/pkt_cls.h>
#include <net/ip.h>
#include <net/route.h>
-#include <net/flow_keys.h>
+#include <net/flow_dissector.h>
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
#include <net/netfilter/nf_conntrack.h>
@@ -68,35 +69,41 @@ static inline u32 addr_fold(void *addr)
static u32 flow_get_src(const struct sk_buff *skb, const struct flow_keys *flow)
{
- if (flow->src)
- return ntohl(flow->src);
+ __be32 src = flow_get_u32_src(flow);
+
+ if (src)
+ return ntohl(src);
+
return addr_fold(skb->sk);
}
static u32 flow_get_dst(const struct sk_buff *skb, const struct flow_keys *flow)
{
- if (flow->dst)
- return ntohl(flow->dst);
+ __be32 dst = flow_get_u32_dst(flow);
+
+ if (dst)
+ return ntohl(dst);
+
return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb);
}
static u32 flow_get_proto(const struct sk_buff *skb, const struct flow_keys *flow)
{
- return flow->ip_proto;
+ return flow->basic.ip_proto;
}
static u32 flow_get_proto_src(const struct sk_buff *skb, const struct flow_keys *flow)
{
- if (flow->ports)
- return ntohs(flow->port16[0]);
+ if (flow->ports.ports)
+ return ntohs(flow->ports.src);
return addr_fold(skb->sk);
}
static u32 flow_get_proto_dst(const struct sk_buff *skb, const struct flow_keys *flow)
{
- if (flow->ports)
- return ntohs(flow->port16[1]);
+ if (flow->ports.ports)
+ return ntohs(flow->ports.dst);
return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb);
}
@@ -191,8 +198,11 @@ static u32 flow_get_rtclassid(const struct sk_buff *skb)
static u32 flow_get_skuid(const struct sk_buff *skb)
{
- if (skb->sk && skb->sk->sk_socket && skb->sk->sk_socket->file) {
- kuid_t skuid = skb->sk->sk_socket->file->f_cred->fsuid;
+ struct sock *sk = skb_to_full_sk(skb);
+
+ if (sk && sk->sk_socket && sk->sk_socket->file) {
+ kuid_t skuid = sk->sk_socket->file->f_cred->fsuid;
+
return from_kuid(&init_user_ns, skuid);
}
return 0;
@@ -200,8 +210,11 @@ static u32 flow_get_skuid(const struct sk_buff *skb)
static u32 flow_get_skgid(const struct sk_buff *skb)
{
- if (skb->sk && skb->sk->sk_socket && skb->sk->sk_socket->file) {
- kgid_t skgid = skb->sk->sk_socket->file->f_cred->fsgid;
+ struct sock *sk = skb_to_full_sk(skb);
+
+ if (sk && sk->sk_socket && sk->sk_socket->file) {
+ kgid_t skgid = sk->sk_socket->file->f_cred->fsgid;
+
return from_kgid(&init_user_ns, skgid);
}
return 0;
@@ -295,7 +308,7 @@ static int flow_classify(struct sk_buff *skb, const struct tcf_proto *tp,
keymask = f->keymask;
if (keymask & FLOW_KEYS_NEEDED)
- skb_flow_dissect(skb, &flow_keys);
+ skb_flow_dissect_flow_keys(skb, &flow_keys, 0);
for (n = 0; n < f->nkeys; n++) {
key = ffs(keymask) - 1;
diff --git a/kernel/net/sched/cls_flower.c b/kernel/net/sched/cls_flower.c
new file mode 100644
index 000000000..95b021243
--- /dev/null
+++ b/kernel/net/sched/cls_flower.c
@@ -0,0 +1,697 @@
+/*
+ * net/sched/cls_flower.c Flower classifier
+ *
+ * Copyright (c) 2015 Jiri Pirko <jiri@resnulli.us>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/rhashtable.h>
+
+#include <linux/if_ether.h>
+#include <linux/in6.h>
+#include <linux/ip.h>
+
+#include <net/sch_generic.h>
+#include <net/pkt_cls.h>
+#include <net/ip.h>
+#include <net/flow_dissector.h>
+
+struct fl_flow_key {
+ int indev_ifindex;
+ struct flow_dissector_key_control control;
+ struct flow_dissector_key_basic basic;
+ struct flow_dissector_key_eth_addrs eth;
+ struct flow_dissector_key_addrs ipaddrs;
+ union {
+ struct flow_dissector_key_ipv4_addrs ipv4;
+ struct flow_dissector_key_ipv6_addrs ipv6;
+ };
+ struct flow_dissector_key_ports tp;
+} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
+
+struct fl_flow_mask_range {
+ unsigned short int start;
+ unsigned short int end;
+};
+
+struct fl_flow_mask {
+ struct fl_flow_key key;
+ struct fl_flow_mask_range range;
+ struct rcu_head rcu;
+};
+
+struct cls_fl_head {
+ struct rhashtable ht;
+ struct fl_flow_mask mask;
+ struct flow_dissector dissector;
+ u32 hgen;
+ bool mask_assigned;
+ struct list_head filters;
+ struct rhashtable_params ht_params;
+ struct rcu_head rcu;
+};
+
+struct cls_fl_filter {
+ struct rhash_head ht_node;
+ struct fl_flow_key mkey;
+ struct tcf_exts exts;
+ struct tcf_result res;
+ struct fl_flow_key key;
+ struct list_head list;
+ u32 handle;
+ struct rcu_head rcu;
+};
+
+static unsigned short int fl_mask_range(const struct fl_flow_mask *mask)
+{
+ return mask->range.end - mask->range.start;
+}
+
+static void fl_mask_update_range(struct fl_flow_mask *mask)
+{
+ const u8 *bytes = (const u8 *) &mask->key;
+ size_t size = sizeof(mask->key);
+ size_t i, first = 0, last = size - 1;
+
+ for (i = 0; i < sizeof(mask->key); i++) {
+ if (bytes[i]) {
+ if (!first && i)
+ first = i;
+ last = i;
+ }
+ }
+ mask->range.start = rounddown(first, sizeof(long));
+ mask->range.end = roundup(last + 1, sizeof(long));
+}
+
+static void *fl_key_get_start(struct fl_flow_key *key,
+ const struct fl_flow_mask *mask)
+{
+ return (u8 *) key + mask->range.start;
+}
+
+static void fl_set_masked_key(struct fl_flow_key *mkey, struct fl_flow_key *key,
+ struct fl_flow_mask *mask)
+{
+ const long *lkey = fl_key_get_start(key, mask);
+ const long *lmask = fl_key_get_start(&mask->key, mask);
+ long *lmkey = fl_key_get_start(mkey, mask);
+ int i;
+
+ for (i = 0; i < fl_mask_range(mask); i += sizeof(long))
+ *lmkey++ = *lkey++ & *lmask++;
+}
+
+static void fl_clear_masked_range(struct fl_flow_key *key,
+ struct fl_flow_mask *mask)
+{
+ memset(fl_key_get_start(key, mask), 0, fl_mask_range(mask));
+}
+
+static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
+ struct tcf_result *res)
+{
+ struct cls_fl_head *head = rcu_dereference_bh(tp->root);
+ struct cls_fl_filter *f;
+ struct fl_flow_key skb_key;
+ struct fl_flow_key skb_mkey;
+
+ fl_clear_masked_range(&skb_key, &head->mask);
+ skb_key.indev_ifindex = skb->skb_iif;
+ /* skb_flow_dissect() does not set n_proto in case an unknown protocol,
+ * so do it rather here.
+ */
+ skb_key.basic.n_proto = skb->protocol;
+ skb_flow_dissect(skb, &head->dissector, &skb_key, 0);
+
+ fl_set_masked_key(&skb_mkey, &skb_key, &head->mask);
+
+ f = rhashtable_lookup_fast(&head->ht,
+ fl_key_get_start(&skb_mkey, &head->mask),
+ head->ht_params);
+ if (f) {
+ *res = f->res;
+ return tcf_exts_exec(skb, &f->exts, res);
+ }
+ return -1;
+}
+
+static int fl_init(struct tcf_proto *tp)
+{
+ struct cls_fl_head *head;
+
+ head = kzalloc(sizeof(*head), GFP_KERNEL);
+ if (!head)
+ return -ENOBUFS;
+
+ INIT_LIST_HEAD_RCU(&head->filters);
+ rcu_assign_pointer(tp->root, head);
+
+ return 0;
+}
+
+static void fl_destroy_filter(struct rcu_head *head)
+{
+ struct cls_fl_filter *f = container_of(head, struct cls_fl_filter, rcu);
+
+ tcf_exts_destroy(&f->exts);
+ kfree(f);
+}
+
+static bool fl_destroy(struct tcf_proto *tp, bool force)
+{
+ struct cls_fl_head *head = rtnl_dereference(tp->root);
+ struct cls_fl_filter *f, *next;
+
+ if (!force && !list_empty(&head->filters))
+ return false;
+
+ list_for_each_entry_safe(f, next, &head->filters, list) {
+ list_del_rcu(&f->list);
+ call_rcu(&f->rcu, fl_destroy_filter);
+ }
+ RCU_INIT_POINTER(tp->root, NULL);
+ if (head->mask_assigned)
+ rhashtable_destroy(&head->ht);
+ kfree_rcu(head, rcu);
+ return true;
+}
+
+static unsigned long fl_get(struct tcf_proto *tp, u32 handle)
+{
+ struct cls_fl_head *head = rtnl_dereference(tp->root);
+ struct cls_fl_filter *f;
+
+ list_for_each_entry(f, &head->filters, list)
+ if (f->handle == handle)
+ return (unsigned long) f;
+ return 0;
+}
+
+static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
+ [TCA_FLOWER_UNSPEC] = { .type = NLA_UNSPEC },
+ [TCA_FLOWER_CLASSID] = { .type = NLA_U32 },
+ [TCA_FLOWER_INDEV] = { .type = NLA_STRING,
+ .len = IFNAMSIZ },
+ [TCA_FLOWER_KEY_ETH_DST] = { .len = ETH_ALEN },
+ [TCA_FLOWER_KEY_ETH_DST_MASK] = { .len = ETH_ALEN },
+ [TCA_FLOWER_KEY_ETH_SRC] = { .len = ETH_ALEN },
+ [TCA_FLOWER_KEY_ETH_SRC_MASK] = { .len = ETH_ALEN },
+ [TCA_FLOWER_KEY_ETH_TYPE] = { .type = NLA_U16 },
+ [TCA_FLOWER_KEY_IP_PROTO] = { .type = NLA_U8 },
+ [TCA_FLOWER_KEY_IPV4_SRC] = { .type = NLA_U32 },
+ [TCA_FLOWER_KEY_IPV4_SRC_MASK] = { .type = NLA_U32 },
+ [TCA_FLOWER_KEY_IPV4_DST] = { .type = NLA_U32 },
+ [TCA_FLOWER_KEY_IPV4_DST_MASK] = { .type = NLA_U32 },
+ [TCA_FLOWER_KEY_IPV6_SRC] = { .len = sizeof(struct in6_addr) },
+ [TCA_FLOWER_KEY_IPV6_SRC_MASK] = { .len = sizeof(struct in6_addr) },
+ [TCA_FLOWER_KEY_IPV6_DST] = { .len = sizeof(struct in6_addr) },
+ [TCA_FLOWER_KEY_IPV6_DST_MASK] = { .len = sizeof(struct in6_addr) },
+ [TCA_FLOWER_KEY_TCP_SRC] = { .type = NLA_U16 },
+ [TCA_FLOWER_KEY_TCP_DST] = { .type = NLA_U16 },
+ [TCA_FLOWER_KEY_UDP_SRC] = { .type = NLA_U16 },
+ [TCA_FLOWER_KEY_UDP_DST] = { .type = NLA_U16 },
+};
+
+static void fl_set_key_val(struct nlattr **tb,
+ void *val, int val_type,
+ void *mask, int mask_type, int len)
+{
+ if (!tb[val_type])
+ return;
+ memcpy(val, nla_data(tb[val_type]), len);
+ if (mask_type == TCA_FLOWER_UNSPEC || !tb[mask_type])
+ memset(mask, 0xff, len);
+ else
+ memcpy(mask, nla_data(tb[mask_type]), len);
+}
+
+static int fl_set_key(struct net *net, struct nlattr **tb,
+ struct fl_flow_key *key, struct fl_flow_key *mask)
+{
+#ifdef CONFIG_NET_CLS_IND
+ if (tb[TCA_FLOWER_INDEV]) {
+ int err = tcf_change_indev(net, tb[TCA_FLOWER_INDEV]);
+ if (err < 0)
+ return err;
+ key->indev_ifindex = err;
+ mask->indev_ifindex = 0xffffffff;
+ }
+#endif
+
+ fl_set_key_val(tb, key->eth.dst, TCA_FLOWER_KEY_ETH_DST,
+ mask->eth.dst, TCA_FLOWER_KEY_ETH_DST_MASK,
+ sizeof(key->eth.dst));
+ fl_set_key_val(tb, key->eth.src, TCA_FLOWER_KEY_ETH_SRC,
+ mask->eth.src, TCA_FLOWER_KEY_ETH_SRC_MASK,
+ sizeof(key->eth.src));
+
+ fl_set_key_val(tb, &key->basic.n_proto, TCA_FLOWER_KEY_ETH_TYPE,
+ &mask->basic.n_proto, TCA_FLOWER_UNSPEC,
+ sizeof(key->basic.n_proto));
+
+ if (key->basic.n_proto == htons(ETH_P_IP) ||
+ key->basic.n_proto == htons(ETH_P_IPV6)) {
+ fl_set_key_val(tb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO,
+ &mask->basic.ip_proto, TCA_FLOWER_UNSPEC,
+ sizeof(key->basic.ip_proto));
+ }
+
+ if (tb[TCA_FLOWER_KEY_IPV4_SRC] || tb[TCA_FLOWER_KEY_IPV4_DST]) {
+ key->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+ fl_set_key_val(tb, &key->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC,
+ &mask->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC_MASK,
+ sizeof(key->ipv4.src));
+ fl_set_key_val(tb, &key->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST,
+ &mask->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST_MASK,
+ sizeof(key->ipv4.dst));
+ } else if (tb[TCA_FLOWER_KEY_IPV6_SRC] || tb[TCA_FLOWER_KEY_IPV6_DST]) {
+ key->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+ fl_set_key_val(tb, &key->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC,
+ &mask->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC_MASK,
+ sizeof(key->ipv6.src));
+ fl_set_key_val(tb, &key->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST,
+ &mask->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST_MASK,
+ sizeof(key->ipv6.dst));
+ }
+
+ if (key->basic.ip_proto == IPPROTO_TCP) {
+ fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_TCP_SRC,
+ &mask->tp.src, TCA_FLOWER_UNSPEC,
+ sizeof(key->tp.src));
+ fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_TCP_DST,
+ &mask->tp.dst, TCA_FLOWER_UNSPEC,
+ sizeof(key->tp.dst));
+ } else if (key->basic.ip_proto == IPPROTO_UDP) {
+ fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_UDP_SRC,
+ &mask->tp.src, TCA_FLOWER_UNSPEC,
+ sizeof(key->tp.src));
+ fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_UDP_DST,
+ &mask->tp.dst, TCA_FLOWER_UNSPEC,
+ sizeof(key->tp.dst));
+ }
+
+ return 0;
+}
+
+static bool fl_mask_eq(struct fl_flow_mask *mask1,
+ struct fl_flow_mask *mask2)
+{
+ const long *lmask1 = fl_key_get_start(&mask1->key, mask1);
+ const long *lmask2 = fl_key_get_start(&mask2->key, mask2);
+
+ return !memcmp(&mask1->range, &mask2->range, sizeof(mask1->range)) &&
+ !memcmp(lmask1, lmask2, fl_mask_range(mask1));
+}
+
+static const struct rhashtable_params fl_ht_params = {
+ .key_offset = offsetof(struct cls_fl_filter, mkey), /* base offset */
+ .head_offset = offsetof(struct cls_fl_filter, ht_node),
+ .automatic_shrinking = true,
+};
+
+static int fl_init_hashtable(struct cls_fl_head *head,
+ struct fl_flow_mask *mask)
+{
+ head->ht_params = fl_ht_params;
+ head->ht_params.key_len = fl_mask_range(mask);
+ head->ht_params.key_offset += mask->range.start;
+
+ return rhashtable_init(&head->ht, &head->ht_params);
+}
+
+#define FL_KEY_MEMBER_OFFSET(member) offsetof(struct fl_flow_key, member)
+#define FL_KEY_MEMBER_SIZE(member) (sizeof(((struct fl_flow_key *) 0)->member))
+#define FL_KEY_MEMBER_END_OFFSET(member) \
+ (FL_KEY_MEMBER_OFFSET(member) + FL_KEY_MEMBER_SIZE(member))
+
+#define FL_KEY_IN_RANGE(mask, member) \
+ (FL_KEY_MEMBER_OFFSET(member) <= (mask)->range.end && \
+ FL_KEY_MEMBER_END_OFFSET(member) >= (mask)->range.start)
+
+#define FL_KEY_SET(keys, cnt, id, member) \
+ do { \
+ keys[cnt].key_id = id; \
+ keys[cnt].offset = FL_KEY_MEMBER_OFFSET(member); \
+ cnt++; \
+ } while(0);
+
+#define FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt, id, member) \
+ do { \
+ if (FL_KEY_IN_RANGE(mask, member)) \
+ FL_KEY_SET(keys, cnt, id, member); \
+ } while(0);
+
+static void fl_init_dissector(struct cls_fl_head *head,
+ struct fl_flow_mask *mask)
+{
+ struct flow_dissector_key keys[FLOW_DISSECTOR_KEY_MAX];
+ size_t cnt = 0;
+
+ FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_CONTROL, control);
+ FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_BASIC, basic);
+ FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt,
+ FLOW_DISSECTOR_KEY_ETH_ADDRS, eth);
+ FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt,
+ FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4);
+ FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt,
+ FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6);
+ FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt,
+ FLOW_DISSECTOR_KEY_PORTS, tp);
+
+ skb_flow_dissector_init(&head->dissector, keys, cnt);
+}
+
+static int fl_check_assign_mask(struct cls_fl_head *head,
+ struct fl_flow_mask *mask)
+{
+ int err;
+
+ if (head->mask_assigned) {
+ if (!fl_mask_eq(&head->mask, mask))
+ return -EINVAL;
+ else
+ return 0;
+ }
+
+ /* Mask is not assigned yet. So assign it and init hashtable
+ * according to that.
+ */
+ err = fl_init_hashtable(head, mask);
+ if (err)
+ return err;
+ memcpy(&head->mask, mask, sizeof(head->mask));
+ head->mask_assigned = true;
+
+ fl_init_dissector(head, mask);
+
+ return 0;
+}
+
+static int fl_set_parms(struct net *net, struct tcf_proto *tp,
+ struct cls_fl_filter *f, struct fl_flow_mask *mask,
+ unsigned long base, struct nlattr **tb,
+ struct nlattr *est, bool ovr)
+{
+ struct tcf_exts e;
+ int err;
+
+ tcf_exts_init(&e, TCA_FLOWER_ACT, 0);
+ err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
+ if (err < 0)
+ return err;
+
+ if (tb[TCA_FLOWER_CLASSID]) {
+ f->res.classid = nla_get_u32(tb[TCA_FLOWER_CLASSID]);
+ tcf_bind_filter(tp, &f->res, base);
+ }
+
+ err = fl_set_key(net, tb, &f->key, &mask->key);
+ if (err)
+ goto errout;
+
+ fl_mask_update_range(mask);
+ fl_set_masked_key(&f->mkey, &f->key, mask);
+
+ tcf_exts_change(tp, &f->exts, &e);
+
+ return 0;
+errout:
+ tcf_exts_destroy(&e);
+ return err;
+}
+
+static u32 fl_grab_new_handle(struct tcf_proto *tp,
+ struct cls_fl_head *head)
+{
+ unsigned int i = 0x80000000;
+ u32 handle;
+
+ do {
+ if (++head->hgen == 0x7FFFFFFF)
+ head->hgen = 1;
+ } while (--i > 0 && fl_get(tp, head->hgen));
+
+ if (unlikely(i == 0)) {
+ pr_err("Insufficient number of handles\n");
+ handle = 0;
+ } else {
+ handle = head->hgen;
+ }
+
+ return handle;
+}
+
+static int fl_change(struct net *net, struct sk_buff *in_skb,
+ struct tcf_proto *tp, unsigned long base,
+ u32 handle, struct nlattr **tca,
+ unsigned long *arg, bool ovr)
+{
+ struct cls_fl_head *head = rtnl_dereference(tp->root);
+ struct cls_fl_filter *fold = (struct cls_fl_filter *) *arg;
+ struct cls_fl_filter *fnew;
+ struct nlattr *tb[TCA_FLOWER_MAX + 1];
+ struct fl_flow_mask mask = {};
+ int err;
+
+ if (!tca[TCA_OPTIONS])
+ return -EINVAL;
+
+ err = nla_parse_nested(tb, TCA_FLOWER_MAX, tca[TCA_OPTIONS], fl_policy);
+ if (err < 0)
+ return err;
+
+ if (fold && handle && fold->handle != handle)
+ return -EINVAL;
+
+ fnew = kzalloc(sizeof(*fnew), GFP_KERNEL);
+ if (!fnew)
+ return -ENOBUFS;
+
+ tcf_exts_init(&fnew->exts, TCA_FLOWER_ACT, 0);
+
+ if (!handle) {
+ handle = fl_grab_new_handle(tp, head);
+ if (!handle) {
+ err = -EINVAL;
+ goto errout;
+ }
+ }
+ fnew->handle = handle;
+
+ err = fl_set_parms(net, tp, fnew, &mask, base, tb, tca[TCA_RATE], ovr);
+ if (err)
+ goto errout;
+
+ err = fl_check_assign_mask(head, &mask);
+ if (err)
+ goto errout;
+
+ err = rhashtable_insert_fast(&head->ht, &fnew->ht_node,
+ head->ht_params);
+ if (err)
+ goto errout;
+ if (fold)
+ rhashtable_remove_fast(&head->ht, &fold->ht_node,
+ head->ht_params);
+
+ *arg = (unsigned long) fnew;
+
+ if (fold) {
+ list_replace_rcu(&fold->list, &fnew->list);
+ tcf_unbind_filter(tp, &fold->res);
+ call_rcu(&fold->rcu, fl_destroy_filter);
+ } else {
+ list_add_tail_rcu(&fnew->list, &head->filters);
+ }
+
+ return 0;
+
+errout:
+ kfree(fnew);
+ return err;
+}
+
+static int fl_delete(struct tcf_proto *tp, unsigned long arg)
+{
+ struct cls_fl_head *head = rtnl_dereference(tp->root);
+ struct cls_fl_filter *f = (struct cls_fl_filter *) arg;
+
+ rhashtable_remove_fast(&head->ht, &f->ht_node,
+ head->ht_params);
+ list_del_rcu(&f->list);
+ tcf_unbind_filter(tp, &f->res);
+ call_rcu(&f->rcu, fl_destroy_filter);
+ return 0;
+}
+
+static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+{
+ struct cls_fl_head *head = rtnl_dereference(tp->root);
+ struct cls_fl_filter *f;
+
+ list_for_each_entry_rcu(f, &head->filters, list) {
+ if (arg->count < arg->skip)
+ goto skip;
+ if (arg->fn(tp, (unsigned long) f, arg) < 0) {
+ arg->stop = 1;
+ break;
+ }
+skip:
+ arg->count++;
+ }
+}
+
+static int fl_dump_key_val(struct sk_buff *skb,
+ void *val, int val_type,
+ void *mask, int mask_type, int len)
+{
+ int err;
+
+ if (!memchr_inv(mask, 0, len))
+ return 0;
+ err = nla_put(skb, val_type, len, val);
+ if (err)
+ return err;
+ if (mask_type != TCA_FLOWER_UNSPEC) {
+ err = nla_put(skb, mask_type, len, mask);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
+ struct sk_buff *skb, struct tcmsg *t)
+{
+ struct cls_fl_head *head = rtnl_dereference(tp->root);
+ struct cls_fl_filter *f = (struct cls_fl_filter *) fh;
+ struct nlattr *nest;
+ struct fl_flow_key *key, *mask;
+
+ if (!f)
+ return skb->len;
+
+ t->tcm_handle = f->handle;
+
+ nest = nla_nest_start(skb, TCA_OPTIONS);
+ if (!nest)
+ goto nla_put_failure;
+
+ if (f->res.classid &&
+ nla_put_u32(skb, TCA_FLOWER_CLASSID, f->res.classid))
+ goto nla_put_failure;
+
+ key = &f->key;
+ mask = &head->mask.key;
+
+ if (mask->indev_ifindex) {
+ struct net_device *dev;
+
+ dev = __dev_get_by_index(net, key->indev_ifindex);
+ if (dev && nla_put_string(skb, TCA_FLOWER_INDEV, dev->name))
+ goto nla_put_failure;
+ }
+
+ if (fl_dump_key_val(skb, key->eth.dst, TCA_FLOWER_KEY_ETH_DST,
+ mask->eth.dst, TCA_FLOWER_KEY_ETH_DST_MASK,
+ sizeof(key->eth.dst)) ||
+ fl_dump_key_val(skb, key->eth.src, TCA_FLOWER_KEY_ETH_SRC,
+ mask->eth.src, TCA_FLOWER_KEY_ETH_SRC_MASK,
+ sizeof(key->eth.src)) ||
+ fl_dump_key_val(skb, &key->basic.n_proto, TCA_FLOWER_KEY_ETH_TYPE,
+ &mask->basic.n_proto, TCA_FLOWER_UNSPEC,
+ sizeof(key->basic.n_proto)))
+ goto nla_put_failure;
+ if ((key->basic.n_proto == htons(ETH_P_IP) ||
+ key->basic.n_proto == htons(ETH_P_IPV6)) &&
+ fl_dump_key_val(skb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO,
+ &mask->basic.ip_proto, TCA_FLOWER_UNSPEC,
+ sizeof(key->basic.ip_proto)))
+ goto nla_put_failure;
+
+ if (key->control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS &&
+ (fl_dump_key_val(skb, &key->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC,
+ &mask->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC_MASK,
+ sizeof(key->ipv4.src)) ||
+ fl_dump_key_val(skb, &key->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST,
+ &mask->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST_MASK,
+ sizeof(key->ipv4.dst))))
+ goto nla_put_failure;
+ else if (key->control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS &&
+ (fl_dump_key_val(skb, &key->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC,
+ &mask->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC_MASK,
+ sizeof(key->ipv6.src)) ||
+ fl_dump_key_val(skb, &key->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST,
+ &mask->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST_MASK,
+ sizeof(key->ipv6.dst))))
+ goto nla_put_failure;
+
+ if (key->basic.ip_proto == IPPROTO_TCP &&
+ (fl_dump_key_val(skb, &key->tp.src, TCA_FLOWER_KEY_TCP_SRC,
+ &mask->tp.src, TCA_FLOWER_UNSPEC,
+ sizeof(key->tp.src)) ||
+ fl_dump_key_val(skb, &key->tp.dst, TCA_FLOWER_KEY_TCP_DST,
+ &mask->tp.dst, TCA_FLOWER_UNSPEC,
+ sizeof(key->tp.dst))))
+ goto nla_put_failure;
+ else if (key->basic.ip_proto == IPPROTO_UDP &&
+ (fl_dump_key_val(skb, &key->tp.src, TCA_FLOWER_KEY_UDP_SRC,
+ &mask->tp.src, TCA_FLOWER_UNSPEC,
+ sizeof(key->tp.src)) ||
+ fl_dump_key_val(skb, &key->tp.dst, TCA_FLOWER_KEY_UDP_DST,
+ &mask->tp.dst, TCA_FLOWER_UNSPEC,
+ sizeof(key->tp.dst))))
+ goto nla_put_failure;
+
+ if (tcf_exts_dump(skb, &f->exts))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, nest);
+
+ if (tcf_exts_dump_stats(skb, &f->exts) < 0)
+ goto nla_put_failure;
+
+ return skb->len;
+
+nla_put_failure:
+ nla_nest_cancel(skb, nest);
+ return -1;
+}
+
+static struct tcf_proto_ops cls_fl_ops __read_mostly = {
+ .kind = "flower",
+ .classify = fl_classify,
+ .init = fl_init,
+ .destroy = fl_destroy,
+ .get = fl_get,
+ .change = fl_change,
+ .delete = fl_delete,
+ .walk = fl_walk,
+ .dump = fl_dump,
+ .owner = THIS_MODULE,
+};
+
+static int __init cls_fl_init(void)
+{
+ return register_tcf_proto_ops(&cls_fl_ops);
+}
+
+static void __exit cls_fl_exit(void)
+{
+ unregister_tcf_proto_ops(&cls_fl_ops);
+}
+
+module_init(cls_fl_init);
+module_exit(cls_fl_exit);
+
+MODULE_AUTHOR("Jiri Pirko <jiri@resnulli.us>");
+MODULE_DESCRIPTION("Flower classifier");
+MODULE_LICENSE("GPL v2");
diff --git a/kernel/net/sched/cls_rsvp.h b/kernel/net/sched/cls_rsvp.h
index 02fa82792..f9c9fc075 100644
--- a/kernel/net/sched/cls_rsvp.h
+++ b/kernel/net/sched/cls_rsvp.h
@@ -283,12 +283,22 @@ static int rsvp_init(struct tcf_proto *tp)
return -ENOBUFS;
}
-static void
-rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
+static void rsvp_delete_filter_rcu(struct rcu_head *head)
{
- tcf_unbind_filter(tp, &f->res);
+ struct rsvp_filter *f = container_of(head, struct rsvp_filter, rcu);
+
tcf_exts_destroy(&f->exts);
- kfree_rcu(f, rcu);
+ kfree(f);
+}
+
+static void rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
+{
+ tcf_unbind_filter(tp, &f->res);
+ /* all classifiers are required to call tcf_exts_destroy() after rcu
+ * grace period, since converted-to-rcu actions are relying on that
+ * in cleanup() callback
+ */
+ call_rcu(&f->rcu, rsvp_delete_filter_rcu);
}
static bool rsvp_destroy(struct tcf_proto *tp, bool force)
diff --git a/kernel/net/sched/cls_tcindex.c b/kernel/net/sched/cls_tcindex.c
index a557dbaf5..944c8ff45 100644
--- a/kernel/net/sched/cls_tcindex.c
+++ b/kernel/net/sched/cls_tcindex.c
@@ -27,6 +27,7 @@
struct tcindex_filter_result {
struct tcf_exts exts;
struct tcf_result res;
+ struct rcu_head rcu;
};
struct tcindex_filter {
@@ -133,8 +134,23 @@ static int tcindex_init(struct tcf_proto *tp)
return 0;
}
-static int
-tcindex_delete(struct tcf_proto *tp, unsigned long arg)
+static void tcindex_destroy_rexts(struct rcu_head *head)
+{
+ struct tcindex_filter_result *r;
+
+ r = container_of(head, struct tcindex_filter_result, rcu);
+ tcf_exts_destroy(&r->exts);
+}
+
+static void tcindex_destroy_fexts(struct rcu_head *head)
+{
+ struct tcindex_filter *f = container_of(head, struct tcindex_filter, rcu);
+
+ tcf_exts_destroy(&f->result.exts);
+ kfree(f);
+}
+
+static int tcindex_delete(struct tcf_proto *tp, unsigned long arg)
{
struct tcindex_data *p = rtnl_dereference(tp->root);
struct tcindex_filter_result *r = (struct tcindex_filter_result *) arg;
@@ -162,9 +178,14 @@ found:
rcu_assign_pointer(*walk, rtnl_dereference(f->next));
}
tcf_unbind_filter(tp, &r->res);
- tcf_exts_destroy(&r->exts);
+ /* all classifiers are required to call tcf_exts_destroy() after rcu
+ * grace period, since converted-to-rcu actions are relying on that
+ * in cleanup() callback
+ */
if (f)
- kfree_rcu(f, rcu);
+ call_rcu(&f->rcu, tcindex_destroy_fexts);
+ else
+ call_rcu(&r->rcu, tcindex_destroy_rexts);
return 0;
}
diff --git a/kernel/net/sched/em_ipset.c b/kernel/net/sched/em_ipset.c
index a3d79c8bf..c66ca9400 100644
--- a/kernel/net/sched/em_ipset.c
+++ b/kernel/net/sched/em_ipset.c
@@ -92,9 +92,10 @@ static int em_ipset_match(struct sk_buff *skb, struct tcf_ematch *em,
rcu_read_lock();
- if (dev && skb->skb_iif)
- indev = dev_get_by_index_rcu(dev_net(dev), skb->skb_iif);
+ if (skb->skb_iif)
+ indev = dev_get_by_index_rcu(em->net, skb->skb_iif);
+ acpar.net = em->net;
acpar.in = indev ? indev : dev;
acpar.out = dev;
diff --git a/kernel/net/sched/em_meta.c b/kernel/net/sched/em_meta.c
index b5294ce20..f2aabc008 100644
--- a/kernel/net/sched/em_meta.c
+++ b/kernel/net/sched/em_meta.c
@@ -343,119 +343,145 @@ META_COLLECTOR(int_sk_refcnt)
META_COLLECTOR(int_sk_rcvbuf)
{
- if (skip_nonlocal(skb)) {
+ const struct sock *sk = skb_to_full_sk(skb);
+
+ if (!sk) {
*err = -1;
return;
}
- dst->value = skb->sk->sk_rcvbuf;
+ dst->value = sk->sk_rcvbuf;
}
META_COLLECTOR(int_sk_shutdown)
{
- if (skip_nonlocal(skb)) {
+ const struct sock *sk = skb_to_full_sk(skb);
+
+ if (!sk) {
*err = -1;
return;
}
- dst->value = skb->sk->sk_shutdown;
+ dst->value = sk->sk_shutdown;
}
META_COLLECTOR(int_sk_proto)
{
- if (skip_nonlocal(skb)) {
+ const struct sock *sk = skb_to_full_sk(skb);
+
+ if (!sk) {
*err = -1;
return;
}
- dst->value = skb->sk->sk_protocol;
+ dst->value = sk->sk_protocol;
}
META_COLLECTOR(int_sk_type)
{
- if (skip_nonlocal(skb)) {
+ const struct sock *sk = skb_to_full_sk(skb);
+
+ if (!sk) {
*err = -1;
return;
}
- dst->value = skb->sk->sk_type;
+ dst->value = sk->sk_type;
}
META_COLLECTOR(int_sk_rmem_alloc)
{
- if (skip_nonlocal(skb)) {
+ const struct sock *sk = skb_to_full_sk(skb);
+
+ if (!sk) {
*err = -1;
return;
}
- dst->value = sk_rmem_alloc_get(skb->sk);
+ dst->value = sk_rmem_alloc_get(sk);
}
META_COLLECTOR(int_sk_wmem_alloc)
{
- if (skip_nonlocal(skb)) {
+ const struct sock *sk = skb_to_full_sk(skb);
+
+ if (!sk) {
*err = -1;
return;
}
- dst->value = sk_wmem_alloc_get(skb->sk);
+ dst->value = sk_wmem_alloc_get(sk);
}
META_COLLECTOR(int_sk_omem_alloc)
{
- if (skip_nonlocal(skb)) {
+ const struct sock *sk = skb_to_full_sk(skb);
+
+ if (!sk) {
*err = -1;
return;
}
- dst->value = atomic_read(&skb->sk->sk_omem_alloc);
+ dst->value = atomic_read(&sk->sk_omem_alloc);
}
META_COLLECTOR(int_sk_rcv_qlen)
{
- if (skip_nonlocal(skb)) {
+ const struct sock *sk = skb_to_full_sk(skb);
+
+ if (!sk) {
*err = -1;
return;
}
- dst->value = skb->sk->sk_receive_queue.qlen;
+ dst->value = sk->sk_receive_queue.qlen;
}
META_COLLECTOR(int_sk_snd_qlen)
{
- if (skip_nonlocal(skb)) {
+ const struct sock *sk = skb_to_full_sk(skb);
+
+ if (!sk) {
*err = -1;
return;
}
- dst->value = skb->sk->sk_write_queue.qlen;
+ dst->value = sk->sk_write_queue.qlen;
}
META_COLLECTOR(int_sk_wmem_queued)
{
- if (skip_nonlocal(skb)) {
+ const struct sock *sk = skb_to_full_sk(skb);
+
+ if (!sk) {
*err = -1;
return;
}
- dst->value = skb->sk->sk_wmem_queued;
+ dst->value = sk->sk_wmem_queued;
}
META_COLLECTOR(int_sk_fwd_alloc)
{
- if (skip_nonlocal(skb)) {
+ const struct sock *sk = skb_to_full_sk(skb);
+
+ if (!sk) {
*err = -1;
return;
}
- dst->value = skb->sk->sk_forward_alloc;
+ dst->value = sk->sk_forward_alloc;
}
META_COLLECTOR(int_sk_sndbuf)
{
- if (skip_nonlocal(skb)) {
+ const struct sock *sk = skb_to_full_sk(skb);
+
+ if (!sk) {
*err = -1;
return;
}
- dst->value = skb->sk->sk_sndbuf;
+ dst->value = sk->sk_sndbuf;
}
META_COLLECTOR(int_sk_alloc)
{
- if (skip_nonlocal(skb)) {
+ const struct sock *sk = skb_to_full_sk(skb);
+
+ if (!sk) {
*err = -1;
return;
}
- dst->value = (__force int) skb->sk->sk_allocation;
+ dst->value = (__force int) sk->sk_allocation;
}
META_COLLECTOR(int_sk_hash)
@@ -469,92 +495,112 @@ META_COLLECTOR(int_sk_hash)
META_COLLECTOR(int_sk_lingertime)
{
- if (skip_nonlocal(skb)) {
+ const struct sock *sk = skb_to_full_sk(skb);
+
+ if (!sk) {
*err = -1;
return;
}
- dst->value = skb->sk->sk_lingertime / HZ;
+ dst->value = sk->sk_lingertime / HZ;
}
META_COLLECTOR(int_sk_err_qlen)
{
- if (skip_nonlocal(skb)) {
+ const struct sock *sk = skb_to_full_sk(skb);
+
+ if (!sk) {
*err = -1;
return;
}
- dst->value = skb->sk->sk_error_queue.qlen;
+ dst->value = sk->sk_error_queue.qlen;
}
META_COLLECTOR(int_sk_ack_bl)
{
- if (skip_nonlocal(skb)) {
+ const struct sock *sk = skb_to_full_sk(skb);
+
+ if (!sk) {
*err = -1;
return;
}
- dst->value = skb->sk->sk_ack_backlog;
+ dst->value = sk->sk_ack_backlog;
}
META_COLLECTOR(int_sk_max_ack_bl)
{
- if (skip_nonlocal(skb)) {
+ const struct sock *sk = skb_to_full_sk(skb);
+
+ if (!sk) {
*err = -1;
return;
}
- dst->value = skb->sk->sk_max_ack_backlog;
+ dst->value = sk->sk_max_ack_backlog;
}
META_COLLECTOR(int_sk_prio)
{
- if (skip_nonlocal(skb)) {
+ const struct sock *sk = skb_to_full_sk(skb);
+
+ if (!sk) {
*err = -1;
return;
}
- dst->value = skb->sk->sk_priority;
+ dst->value = sk->sk_priority;
}
META_COLLECTOR(int_sk_rcvlowat)
{
- if (skip_nonlocal(skb)) {
+ const struct sock *sk = skb_to_full_sk(skb);
+
+ if (!sk) {
*err = -1;
return;
}
- dst->value = skb->sk->sk_rcvlowat;
+ dst->value = sk->sk_rcvlowat;
}
META_COLLECTOR(int_sk_rcvtimeo)
{
- if (skip_nonlocal(skb)) {
+ const struct sock *sk = skb_to_full_sk(skb);
+
+ if (!sk) {
*err = -1;
return;
}
- dst->value = skb->sk->sk_rcvtimeo / HZ;
+ dst->value = sk->sk_rcvtimeo / HZ;
}
META_COLLECTOR(int_sk_sndtimeo)
{
- if (skip_nonlocal(skb)) {
+ const struct sock *sk = skb_to_full_sk(skb);
+
+ if (!sk) {
*err = -1;
return;
}
- dst->value = skb->sk->sk_sndtimeo / HZ;
+ dst->value = sk->sk_sndtimeo / HZ;
}
META_COLLECTOR(int_sk_sendmsg_off)
{
- if (skip_nonlocal(skb)) {
+ const struct sock *sk = skb_to_full_sk(skb);
+
+ if (!sk) {
*err = -1;
return;
}
- dst->value = skb->sk->sk_frag.offset;
+ dst->value = sk->sk_frag.offset;
}
META_COLLECTOR(int_sk_write_pend)
{
- if (skip_nonlocal(skb)) {
+ const struct sock *sk = skb_to_full_sk(skb);
+
+ if (!sk) {
*err = -1;
return;
}
- dst->value = skb->sk->sk_write_pending;
+ dst->value = sk->sk_write_pending;
}
/**************************************************************************
diff --git a/kernel/net/sched/sch_api.c b/kernel/net/sched/sch_api.c
index 1e1c89e51..af1acf009 100644
--- a/kernel/net/sched/sch_api.c
+++ b/kernel/net/sched/sch_api.c
@@ -253,7 +253,8 @@ int qdisc_set_default(const char *name)
}
/* We know handle. Find qdisc among all qdisc's attached to device
- (root qdisc, all its children, children of children etc.)
+ * (root qdisc, all its children, children of children etc.)
+ * Note: caller either uses rtnl or rcu_read_lock()
*/
static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
@@ -264,7 +265,7 @@ static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
root->handle == handle)
return root;
- list_for_each_entry(q, &root->list, list) {
+ list_for_each_entry_rcu(q, &root->list, list) {
if (q->handle == handle)
return q;
}
@@ -277,15 +278,18 @@ void qdisc_list_add(struct Qdisc *q)
struct Qdisc *root = qdisc_dev(q)->qdisc;
WARN_ON_ONCE(root == &noop_qdisc);
- list_add_tail(&q->list, &root->list);
+ ASSERT_RTNL();
+ list_add_tail_rcu(&q->list, &root->list);
}
}
EXPORT_SYMBOL(qdisc_list_add);
void qdisc_list_del(struct Qdisc *q)
{
- if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS))
- list_del(&q->list);
+ if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
+ ASSERT_RTNL();
+ list_del_rcu(&q->list);
+ }
}
EXPORT_SYMBOL(qdisc_list_del);
@@ -750,14 +754,18 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
if (n == 0)
return;
drops = max_t(int, n, 0);
+ rcu_read_lock();
while ((parentid = sch->parent)) {
if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
- return;
+ break;
+ if (sch->flags & TCQ_F_NOPARENT)
+ break;
+ /* TODO: perform the search on a per txq basis */
sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
if (sch == NULL) {
- WARN_ON(parentid != TC_H_ROOT);
- return;
+ WARN_ON_ONCE(parentid != TC_H_ROOT);
+ break;
}
cops = sch->ops->cl_ops;
if (cops->qlen_notify) {
@@ -768,6 +776,7 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
sch->q.qlen -= n;
__qdisc_qstats_drop(sch, drops);
}
+ rcu_read_unlock();
}
EXPORT_SYMBOL(qdisc_tree_decrease_qlen);
@@ -1806,57 +1815,46 @@ done:
* to this qdisc, (optionally) tests for protocol and asks
* specific classifiers.
*/
-int tc_classify_compat(struct sk_buff *skb, const struct tcf_proto *tp,
- struct tcf_result *res)
+int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp,
+ struct tcf_result *res, bool compat_mode)
{
__be16 protocol = tc_skb_protocol(skb);
- int err;
+#ifdef CONFIG_NET_CLS_ACT
+ const struct tcf_proto *old_tp = tp;
+ int limit = 0;
+reclassify:
+#endif
for (; tp; tp = rcu_dereference_bh(tp->next)) {
+ int err;
+
if (tp->protocol != protocol &&
tp->protocol != htons(ETH_P_ALL))
continue;
- err = tp->classify(skb, tp, res);
- if (err >= 0) {
+ err = tp->classify(skb, tp, res);
#ifdef CONFIG_NET_CLS_ACT
- if (err != TC_ACT_RECLASSIFY && skb->tc_verd)
- skb->tc_verd = SET_TC_VERD(skb->tc_verd, 0);
+ if (unlikely(err == TC_ACT_RECLASSIFY && !compat_mode))
+ goto reset;
#endif
+ if (err >= 0)
return err;
- }
}
- return -1;
-}
-EXPORT_SYMBOL(tc_classify_compat);
-
-int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp,
- struct tcf_result *res)
-{
- int err = 0;
-#ifdef CONFIG_NET_CLS_ACT
- const struct tcf_proto *otp = tp;
-reclassify:
-#endif
- err = tc_classify_compat(skb, tp, res);
+ return -1;
#ifdef CONFIG_NET_CLS_ACT
- if (err == TC_ACT_RECLASSIFY) {
- u32 verd = G_TC_VERD(skb->tc_verd);
- tp = otp;
-
- if (verd++ >= MAX_REC_LOOP) {
- net_notice_ratelimited("%s: packet reclassify loop rule prio %u protocol %02x\n",
- tp->q->ops->id,
- tp->prio & 0xffff,
- ntohs(tp->protocol));
- return TC_ACT_SHOT;
- }
- skb->tc_verd = SET_TC_VERD(skb->tc_verd, verd);
- goto reclassify;
+reset:
+ if (unlikely(limit++ >= MAX_REC_LOOP)) {
+ net_notice_ratelimited("%s: reclassify loop, rule prio %u, protocol %02x\n",
+ tp->q->ops->id, tp->prio & 0xffff,
+ ntohs(tp->protocol));
+ return TC_ACT_SHOT;
}
+
+ tp = old_tp;
+ protocol = tc_skb_protocol(skb);
+ goto reclassify;
#endif
- return err;
}
EXPORT_SYMBOL(tc_classify);
@@ -1885,13 +1883,10 @@ EXPORT_SYMBOL(tcf_destroy_chain);
#ifdef CONFIG_PROC_FS
static int psched_show(struct seq_file *seq, void *v)
{
- struct timespec ts;
-
- hrtimer_get_res(CLOCK_MONOTONIC, &ts);
seq_printf(seq, "%08x %08x %08x %08x\n",
(u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1),
1000000,
- (u32)NSEC_PER_SEC/(u32)ktime_to_ns(timespec_to_ktime(ts)));
+ (u32)NSEC_PER_SEC / hrtimer_resolution);
return 0;
}
@@ -1956,6 +1951,7 @@ static int __init pktsched_init(void)
register_qdisc(&bfifo_qdisc_ops);
register_qdisc(&pfifo_head_drop_qdisc_ops);
register_qdisc(&mq_qdisc_ops);
+ register_qdisc(&noqueue_qdisc_ops);
rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, NULL);
rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, NULL);
diff --git a/kernel/net/sched/sch_atm.c b/kernel/net/sched/sch_atm.c
index e3e2cc5fd..1911af3ca 100644
--- a/kernel/net/sched/sch_atm.c
+++ b/kernel/net/sched/sch_atm.c
@@ -375,7 +375,7 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
list_for_each_entry(flow, &p->flows, list) {
fl = rcu_dereference_bh(flow->filter_list);
if (fl) {
- result = tc_classify_compat(skb, fl, &res);
+ result = tc_classify(skb, fl, &res, true);
if (result < 0)
continue;
flow = (struct atm_flow_data *)res.class;
diff --git a/kernel/net/sched/sch_blackhole.c b/kernel/net/sched/sch_blackhole.c
index 094a874b4..3fee70d98 100644
--- a/kernel/net/sched/sch_blackhole.c
+++ b/kernel/net/sched/sch_blackhole.c
@@ -11,7 +11,7 @@
* Note: Quantum tunneling is not supported.
*/
-#include <linux/module.h>
+#include <linux/init.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/skbuff.h>
@@ -37,17 +37,8 @@ static struct Qdisc_ops blackhole_qdisc_ops __read_mostly = {
.owner = THIS_MODULE,
};
-static int __init blackhole_module_init(void)
+static int __init blackhole_init(void)
{
return register_qdisc(&blackhole_qdisc_ops);
}
-
-static void __exit blackhole_module_exit(void)
-{
- unregister_qdisc(&blackhole_qdisc_ops);
-}
-
-module_init(blackhole_module_init)
-module_exit(blackhole_module_exit)
-
-MODULE_LICENSE("GPL");
+device_initcall(blackhole_init)
diff --git a/kernel/net/sched/sch_cbq.c b/kernel/net/sched/sch_cbq.c
index beeb75f80..c538d9e4a 100644
--- a/kernel/net/sched/sch_cbq.c
+++ b/kernel/net/sched/sch_cbq.c
@@ -240,7 +240,7 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
/*
* Step 2+n. Apply classifier.
*/
- result = tc_classify_compat(skb, fl, &res);
+ result = tc_classify(skb, fl, &res, true);
if (!fl || result < 0)
goto fallback;
diff --git a/kernel/net/sched/sch_choke.c b/kernel/net/sched/sch_choke.c
index c009eb904..5ffb8b833 100644
--- a/kernel/net/sched/sch_choke.c
+++ b/kernel/net/sched/sch_choke.c
@@ -18,7 +18,7 @@
#include <net/pkt_sched.h>
#include <net/inet_ecn.h>
#include <net/red.h>
-#include <net/flow_keys.h>
+#include <net/flow_dissector.h>
/*
CHOKe stateless AQM for fair bandwidth allocation
@@ -133,16 +133,10 @@ static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx)
--sch->q.qlen;
}
-/* private part of skb->cb[] that a qdisc is allowed to use
- * is limited to QDISC_CB_PRIV_LEN bytes.
- * As a flow key might be too large, we store a part of it only.
- */
-#define CHOKE_K_LEN min_t(u32, sizeof(struct flow_keys), QDISC_CB_PRIV_LEN - 3)
-
struct choke_skb_cb {
u16 classid;
u8 keys_valid;
- u8 keys[QDISC_CB_PRIV_LEN - 3];
+ struct flow_keys_digest keys;
};
static inline struct choke_skb_cb *choke_skb_cb(const struct sk_buff *skb)
@@ -176,19 +170,19 @@ static bool choke_match_flow(struct sk_buff *skb1,
if (!choke_skb_cb(skb1)->keys_valid) {
choke_skb_cb(skb1)->keys_valid = 1;
- skb_flow_dissect(skb1, &temp);
- memcpy(&choke_skb_cb(skb1)->keys, &temp, CHOKE_K_LEN);
+ skb_flow_dissect_flow_keys(skb1, &temp, 0);
+ make_flow_keys_digest(&choke_skb_cb(skb1)->keys, &temp);
}
if (!choke_skb_cb(skb2)->keys_valid) {
choke_skb_cb(skb2)->keys_valid = 1;
- skb_flow_dissect(skb2, &temp);
- memcpy(&choke_skb_cb(skb2)->keys, &temp, CHOKE_K_LEN);
+ skb_flow_dissect_flow_keys(skb2, &temp, 0);
+ make_flow_keys_digest(&choke_skb_cb(skb2)->keys, &temp);
}
return !memcmp(&choke_skb_cb(skb1)->keys,
&choke_skb_cb(skb2)->keys,
- CHOKE_K_LEN);
+ sizeof(choke_skb_cb(skb1)->keys));
}
/*
@@ -207,7 +201,7 @@ static bool choke_classify(struct sk_buff *skb,
int result;
fl = rcu_dereference_bh(q->filter_list);
- result = tc_classify(skb, fl, &res);
+ result = tc_classify(skb, fl, &res, false);
if (result >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
@@ -391,6 +385,19 @@ static void choke_reset(struct Qdisc *sch)
{
struct choke_sched_data *q = qdisc_priv(sch);
+ while (q->head != q->tail) {
+ struct sk_buff *skb = q->tab[q->head];
+
+ q->head = (q->head + 1) & q->tab_mask;
+ if (!skb)
+ continue;
+ qdisc_qstats_backlog_dec(sch, skb);
+ --sch->q.qlen;
+ qdisc_drop(skb, sch);
+ }
+
+ memset(q->tab, 0, (q->tab_mask + 1) * sizeof(struct sk_buff *));
+ q->head = q->tail = 0;
red_restart(&q->vars);
}
@@ -546,65 +553,6 @@ static void choke_destroy(struct Qdisc *sch)
choke_free(q->tab);
}
-static struct Qdisc *choke_leaf(struct Qdisc *sch, unsigned long arg)
-{
- return NULL;
-}
-
-static unsigned long choke_get(struct Qdisc *sch, u32 classid)
-{
- return 0;
-}
-
-static void choke_put(struct Qdisc *q, unsigned long cl)
-{
-}
-
-static unsigned long choke_bind(struct Qdisc *sch, unsigned long parent,
- u32 classid)
-{
- return 0;
-}
-
-static struct tcf_proto __rcu **choke_find_tcf(struct Qdisc *sch,
- unsigned long cl)
-{
- struct choke_sched_data *q = qdisc_priv(sch);
-
- if (cl)
- return NULL;
- return &q->filter_list;
-}
-
-static int choke_dump_class(struct Qdisc *sch, unsigned long cl,
- struct sk_buff *skb, struct tcmsg *tcm)
-{
- tcm->tcm_handle |= TC_H_MIN(cl);
- return 0;
-}
-
-static void choke_walk(struct Qdisc *sch, struct qdisc_walker *arg)
-{
- if (!arg->stop) {
- if (arg->fn(sch, 1, arg) < 0) {
- arg->stop = 1;
- return;
- }
- arg->count++;
- }
-}
-
-static const struct Qdisc_class_ops choke_class_ops = {
- .leaf = choke_leaf,
- .get = choke_get,
- .put = choke_put,
- .tcf_chain = choke_find_tcf,
- .bind_tcf = choke_bind,
- .unbind_tcf = choke_put,
- .dump = choke_dump_class,
- .walk = choke_walk,
-};
-
static struct sk_buff *choke_peek_head(struct Qdisc *sch)
{
struct choke_sched_data *q = qdisc_priv(sch);
diff --git a/kernel/net/sched/sch_codel.c b/kernel/net/sched/sch_codel.c
index 7a0bdb16a..535007d5f 100644
--- a/kernel/net/sched/sch_codel.c
+++ b/kernel/net/sched/sch_codel.c
@@ -6,7 +6,7 @@
*
* Implemented on linux by :
* Copyright (C) 2012 Michael D. Taht <dave.taht@bufferbloat.net>
- * Copyright (C) 2012 Eric Dumazet <edumazet@google.com>
+ * Copyright (C) 2012,2015 Eric Dumazet <edumazet@google.com>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -109,6 +109,7 @@ static const struct nla_policy codel_policy[TCA_CODEL_MAX + 1] = {
[TCA_CODEL_LIMIT] = { .type = NLA_U32 },
[TCA_CODEL_INTERVAL] = { .type = NLA_U32 },
[TCA_CODEL_ECN] = { .type = NLA_U32 },
+ [TCA_CODEL_CE_THRESHOLD]= { .type = NLA_U32 },
};
static int codel_change(struct Qdisc *sch, struct nlattr *opt)
@@ -133,6 +134,12 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt)
q->params.target = ((u64)target * NSEC_PER_USEC) >> CODEL_SHIFT;
}
+ if (tb[TCA_CODEL_CE_THRESHOLD]) {
+ u64 val = nla_get_u32(tb[TCA_CODEL_CE_THRESHOLD]);
+
+ q->params.ce_threshold = (val * NSEC_PER_USEC) >> CODEL_SHIFT;
+ }
+
if (tb[TCA_CODEL_INTERVAL]) {
u32 interval = nla_get_u32(tb[TCA_CODEL_INTERVAL]);
@@ -201,7 +208,10 @@ static int codel_dump(struct Qdisc *sch, struct sk_buff *skb)
nla_put_u32(skb, TCA_CODEL_ECN,
q->params.ecn))
goto nla_put_failure;
-
+ if (q->params.ce_threshold != CODEL_DISABLED_THRESHOLD &&
+ nla_put_u32(skb, TCA_CODEL_CE_THRESHOLD,
+ codel_time_to_us(q->params.ce_threshold)))
+ goto nla_put_failure;
return nla_nest_end(skb, opts);
nla_put_failure:
@@ -220,6 +230,7 @@ static int codel_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
.ldelay = codel_time_to_us(q->vars.ldelay),
.dropping = q->vars.dropping,
.ecn_mark = q->stats.ecn_mark,
+ .ce_mark = q->stats.ce_mark,
};
if (q->vars.dropping) {
diff --git a/kernel/net/sched/sch_drr.c b/kernel/net/sched/sch_drr.c
index 338706092..f26bdea87 100644
--- a/kernel/net/sched/sch_drr.c
+++ b/kernel/net/sched/sch_drr.c
@@ -331,7 +331,7 @@ static struct drr_class *drr_classify(struct sk_buff *skb, struct Qdisc *sch,
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
fl = rcu_dereference_bh(q->filter_list);
- result = tc_classify(skb, fl, &res);
+ result = tc_classify(skb, fl, &res, false);
if (result >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
diff --git a/kernel/net/sched/sch_dsmark.c b/kernel/net/sched/sch_dsmark.c
index 66700a611..f357f34d0 100644
--- a/kernel/net/sched/sch_dsmark.c
+++ b/kernel/net/sched/sch_dsmark.c
@@ -35,14 +35,20 @@
#define NO_DEFAULT_INDEX (1 << 16)
+struct mask_value {
+ u8 mask;
+ u8 value;
+};
+
struct dsmark_qdisc_data {
struct Qdisc *q;
struct tcf_proto __rcu *filter_list;
- u8 *mask; /* "owns" the array */
- u8 *value;
+ struct mask_value *mv;
u16 indices;
+ u8 set_tc_index;
u32 default_index; /* index range is 0...0xffff */
- int set_tc_index;
+#define DSMARK_EMBEDDED_SZ 16
+ struct mask_value embedded[DSMARK_EMBEDDED_SZ];
};
static inline int dsmark_valid_index(struct dsmark_qdisc_data *p, u16 index)
@@ -116,7 +122,6 @@ static int dsmark_change(struct Qdisc *sch, u32 classid, u32 parent,
struct nlattr *opt = tca[TCA_OPTIONS];
struct nlattr *tb[TCA_DSMARK_MAX + 1];
int err = -EINVAL;
- u8 mask = 0;
pr_debug("%s(sch %p,[qdisc %p],classid %x,parent %x), arg 0x%lx\n",
__func__, sch, p, classid, parent, *arg);
@@ -133,14 +138,11 @@ static int dsmark_change(struct Qdisc *sch, u32 classid, u32 parent,
if (err < 0)
goto errout;
- if (tb[TCA_DSMARK_MASK])
- mask = nla_get_u8(tb[TCA_DSMARK_MASK]);
-
if (tb[TCA_DSMARK_VALUE])
- p->value[*arg - 1] = nla_get_u8(tb[TCA_DSMARK_VALUE]);
+ p->mv[*arg - 1].value = nla_get_u8(tb[TCA_DSMARK_VALUE]);
if (tb[TCA_DSMARK_MASK])
- p->mask[*arg - 1] = mask;
+ p->mv[*arg - 1].mask = nla_get_u8(tb[TCA_DSMARK_MASK]);
err = 0;
@@ -155,8 +157,8 @@ static int dsmark_delete(struct Qdisc *sch, unsigned long arg)
if (!dsmark_valid_index(p, arg))
return -EINVAL;
- p->mask[arg - 1] = 0xff;
- p->value[arg - 1] = 0;
+ p->mv[arg - 1].mask = 0xff;
+ p->mv[arg - 1].value = 0;
return 0;
}
@@ -173,7 +175,7 @@ static void dsmark_walk(struct Qdisc *sch, struct qdisc_walker *walker)
return;
for (i = 0; i < p->indices; i++) {
- if (p->mask[i] == 0xff && !p->value[i])
+ if (p->mv[i].mask == 0xff && !p->mv[i].value)
goto ignore;
if (walker->count >= walker->skip) {
if (walker->fn(sch, i + 1, walker) < 0) {
@@ -230,7 +232,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
else {
struct tcf_result res;
struct tcf_proto *fl = rcu_dereference_bh(p->filter_list);
- int result = tc_classify(skb, fl, &res);
+ int result = tc_classify(skb, fl, &res, false);
pr_debug("result %d class 0x%04x\n", result, res.classid);
@@ -291,12 +293,12 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
switch (tc_skb_protocol(skb)) {
case htons(ETH_P_IP):
- ipv4_change_dsfield(ip_hdr(skb), p->mask[index],
- p->value[index]);
+ ipv4_change_dsfield(ip_hdr(skb), p->mv[index].mask,
+ p->mv[index].value);
break;
case htons(ETH_P_IPV6):
- ipv6_change_dsfield(ipv6_hdr(skb), p->mask[index],
- p->value[index]);
+ ipv6_change_dsfield(ipv6_hdr(skb), p->mv[index].mask,
+ p->mv[index].value);
break;
default:
/*
@@ -304,7 +306,7 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
* This way, we can send non-IP traffic through dsmark
* and don't need yet another qdisc as a bypass.
*/
- if (p->mask[index] != 0xff || p->value[index])
+ if (p->mv[index].mask != 0xff || p->mv[index].value)
pr_warn("%s: unsupported protocol %d\n",
__func__, ntohs(tc_skb_protocol(skb)));
break;
@@ -346,7 +348,7 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt)
int err = -EINVAL;
u32 default_index = NO_DEFAULT_INDEX;
u16 indices;
- u8 *mask;
+ int i;
pr_debug("%s(sch %p,[qdisc %p],opt %p)\n", __func__, sch, p, opt);
@@ -366,18 +368,18 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt)
if (tb[TCA_DSMARK_DEFAULT_INDEX])
default_index = nla_get_u16(tb[TCA_DSMARK_DEFAULT_INDEX]);
- mask = kmalloc(indices * 2, GFP_KERNEL);
- if (mask == NULL) {
+ if (indices <= DSMARK_EMBEDDED_SZ)
+ p->mv = p->embedded;
+ else
+ p->mv = kmalloc_array(indices, sizeof(*p->mv), GFP_KERNEL);
+ if (!p->mv) {
err = -ENOMEM;
goto errout;
}
-
- p->mask = mask;
- memset(p->mask, 0xff, indices);
-
- p->value = p->mask + indices;
- memset(p->value, 0, indices);
-
+ for (i = 0; i < indices; i++) {
+ p->mv[i].mask = 0xff;
+ p->mv[i].value = 0;
+ }
p->indices = indices;
p->default_index = default_index;
p->set_tc_index = nla_get_flag(tb[TCA_DSMARK_SET_TC_INDEX]);
@@ -410,7 +412,8 @@ static void dsmark_destroy(struct Qdisc *sch)
tcf_destroy_chain(&p->filter_list);
qdisc_destroy(p->q);
- kfree(p->mask);
+ if (p->mv != p->embedded)
+ kfree(p->mv);
}
static int dsmark_dump_class(struct Qdisc *sch, unsigned long cl,
@@ -430,8 +433,8 @@ static int dsmark_dump_class(struct Qdisc *sch, unsigned long cl,
opts = nla_nest_start(skb, TCA_OPTIONS);
if (opts == NULL)
goto nla_put_failure;
- if (nla_put_u8(skb, TCA_DSMARK_MASK, p->mask[cl - 1]) ||
- nla_put_u8(skb, TCA_DSMARK_VALUE, p->value[cl - 1]))
+ if (nla_put_u8(skb, TCA_DSMARK_MASK, p->mv[cl - 1].mask) ||
+ nla_put_u8(skb, TCA_DSMARK_VALUE, p->mv[cl - 1].value))
goto nla_put_failure;
return nla_nest_end(skb, opts);
diff --git a/kernel/net/sched/sch_fifo.c b/kernel/net/sched/sch_fifo.c
index 2e2398cfc..2177eac0a 100644
--- a/kernel/net/sched/sch_fifo.c
+++ b/kernel/net/sched/sch_fifo.c
@@ -54,7 +54,7 @@ static int fifo_init(struct Qdisc *sch, struct nlattr *opt)
bool is_bfifo = sch->ops == &bfifo_qdisc_ops;
if (opt == NULL) {
- u32 limit = qdisc_dev(sch)->tx_queue_len ? : 1;
+ u32 limit = qdisc_dev(sch)->tx_queue_len;
if (is_bfifo)
limit *= psched_mtu(qdisc_dev(sch));
diff --git a/kernel/net/sched/sch_fq.c b/kernel/net/sched/sch_fq.c
index f377702d4..109b23227 100644
--- a/kernel/net/sched/sch_fq.c
+++ b/kernel/net/sched/sch_fq.c
@@ -224,13 +224,16 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q)
if (unlikely((skb->priority & TC_PRIO_MAX) == TC_PRIO_CONTROL))
return &q->internal;
- /* SYNACK messages are attached to a listener socket.
- * 1) They are not part of a 'flow' yet
- * 2) We do not want to rate limit them (eg SYNFLOOD attack),
+ /* SYNACK messages are attached to a TCP_NEW_SYN_RECV request socket
+ * or a listener (SYNCOOKIE mode)
+ * 1) request sockets are not full blown,
+ * they do not contain sk_pacing_rate
+ * 2) They are not part of a 'flow' yet
+ * 3) We do not want to rate limit them (eg SYNFLOOD attack),
* especially if the listener set SO_MAX_PACING_RATE
- * 3) We pretend they are orphaned
+ * 4) We pretend they are orphaned
*/
- if (!sk || sk->sk_state == TCP_LISTEN) {
+ if (!sk || sk_listener(sk)) {
unsigned long hash = skb_get_hash(skb) & q->orphan_mask;
/* By forcing low order bit to 1, we make sure to not
diff --git a/kernel/net/sched/sch_fq_codel.c b/kernel/net/sched/sch_fq_codel.c
index 9291598b5..4c834e93d 100644
--- a/kernel/net/sched/sch_fq_codel.c
+++ b/kernel/net/sched/sch_fq_codel.c
@@ -6,7 +6,7 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
- * Copyright (C) 2012 Eric Dumazet <edumazet@google.com>
+ * Copyright (C) 2012,2015 Eric Dumazet <edumazet@google.com>
*/
#include <linux/module.h>
@@ -23,7 +23,6 @@
#include <linux/vmalloc.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
-#include <net/flow_keys.h>
#include <net/codel.h>
/* Fair Queue CoDel.
@@ -68,15 +67,9 @@ struct fq_codel_sched_data {
};
static unsigned int fq_codel_hash(const struct fq_codel_sched_data *q,
- const struct sk_buff *skb)
+ struct sk_buff *skb)
{
- struct flow_keys keys;
- unsigned int hash;
-
- skb_flow_dissect(skb, &keys);
- hash = jhash_3words((__force u32)keys.dst,
- (__force u32)keys.src ^ keys.ip_proto,
- (__force u32)keys.ports, q->perturbation);
+ u32 hash = skb_get_hash_perturb(skb, q->perturbation);
return reciprocal_scale(hash, q->flows_cnt);
}
@@ -99,7 +92,7 @@ static unsigned int fq_codel_classify(struct sk_buff *skb, struct Qdisc *sch,
return fq_codel_hash(q, skb) + 1;
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
- result = tc_classify(skb, filter, &res);
+ result = tc_classify(skb, filter, &res, false);
if (result >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
@@ -170,6 +163,15 @@ static unsigned int fq_codel_drop(struct Qdisc *sch)
return idx;
}
+static unsigned int fq_codel_qdisc_drop(struct Qdisc *sch)
+{
+ unsigned int prev_backlog;
+
+ prev_backlog = sch->qstats.backlog;
+ fq_codel_drop(sch);
+ return prev_backlog - sch->qstats.backlog;
+}
+
static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch)
{
struct fq_codel_sched_data *q = qdisc_priv(sch);
@@ -286,10 +288,26 @@ begin:
static void fq_codel_reset(struct Qdisc *sch)
{
- struct sk_buff *skb;
+ struct fq_codel_sched_data *q = qdisc_priv(sch);
+ int i;
- while ((skb = fq_codel_dequeue(sch)) != NULL)
- kfree_skb(skb);
+ INIT_LIST_HEAD(&q->new_flows);
+ INIT_LIST_HEAD(&q->old_flows);
+ for (i = 0; i < q->flows_cnt; i++) {
+ struct fq_codel_flow *flow = q->flows + i;
+
+ while (flow->head) {
+ struct sk_buff *skb = dequeue_head(flow);
+
+ qdisc_qstats_backlog_dec(sch, skb);
+ kfree_skb(skb);
+ }
+
+ INIT_LIST_HEAD(&flow->flowchain);
+ codel_vars_init(&flow->cvars);
+ }
+ memset(q->backlogs, 0, q->flows_cnt * sizeof(u32));
+ sch->q.qlen = 0;
}
static const struct nla_policy fq_codel_policy[TCA_FQ_CODEL_MAX + 1] = {
@@ -299,6 +317,7 @@ static const struct nla_policy fq_codel_policy[TCA_FQ_CODEL_MAX + 1] = {
[TCA_FQ_CODEL_ECN] = { .type = NLA_U32 },
[TCA_FQ_CODEL_FLOWS] = { .type = NLA_U32 },
[TCA_FQ_CODEL_QUANTUM] = { .type = NLA_U32 },
+ [TCA_FQ_CODEL_CE_THRESHOLD] = { .type = NLA_U32 },
};
static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt)
@@ -329,6 +348,12 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt)
q->cparams.target = (target * NSEC_PER_USEC) >> CODEL_SHIFT;
}
+ if (tb[TCA_FQ_CODEL_CE_THRESHOLD]) {
+ u64 val = nla_get_u32(tb[TCA_FQ_CODEL_CE_THRESHOLD]);
+
+ q->cparams.ce_threshold = (val * NSEC_PER_USEC) >> CODEL_SHIFT;
+ }
+
if (tb[TCA_FQ_CODEL_INTERVAL]) {
u64 interval = nla_get_u32(tb[TCA_FQ_CODEL_INTERVAL]);
@@ -448,6 +473,11 @@ static int fq_codel_dump(struct Qdisc *sch, struct sk_buff *skb)
q->flows_cnt))
goto nla_put_failure;
+ if (q->cparams.ce_threshold != CODEL_DISABLED_THRESHOLD &&
+ nla_put_u32(skb, TCA_FQ_CODEL_CE_THRESHOLD,
+ codel_time_to_us(q->cparams.ce_threshold)))
+ goto nla_put_failure;
+
return nla_nest_end(skb, opts);
nla_put_failure:
@@ -466,6 +496,7 @@ static int fq_codel_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
st.qdisc_stats.drop_overlimit = q->drop_overlimit;
st.qdisc_stats.ecn_mark = q->cstats.ecn_mark;
st.qdisc_stats.new_flow_count = q->new_flow_count;
+ st.qdisc_stats.ce_mark = q->cstats.ce_mark;
list_for_each(pos, &q->new_flows)
st.qdisc_stats.new_flows_len++;
@@ -598,7 +629,7 @@ static struct Qdisc_ops fq_codel_qdisc_ops __read_mostly = {
.enqueue = fq_codel_enqueue,
.dequeue = fq_codel_dequeue,
.peek = qdisc_peek_dequeued,
- .drop = fq_codel_drop,
+ .drop = fq_codel_qdisc_drop,
.init = fq_codel_init,
.reset = fq_codel_reset,
.destroy = fq_codel_destroy,
diff --git a/kernel/net/sched/sch_generic.c b/kernel/net/sched/sch_generic.c
index 1e346523f..47ef1b11b 100644
--- a/kernel/net/sched/sch_generic.c
+++ b/kernel/net/sched/sch_generic.c
@@ -416,33 +416,25 @@ struct Qdisc noop_qdisc = {
};
EXPORT_SYMBOL(noop_qdisc);
-static struct Qdisc_ops noqueue_qdisc_ops __read_mostly = {
+static int noqueue_init(struct Qdisc *qdisc, struct nlattr *opt)
+{
+ /* register_qdisc() assigns a default of noop_enqueue if unset,
+ * but __dev_queue_xmit() treats noqueue only as such
+ * if this is NULL - so clear it here. */
+ qdisc->enqueue = NULL;
+ return 0;
+}
+
+struct Qdisc_ops noqueue_qdisc_ops __read_mostly = {
.id = "noqueue",
.priv_size = 0,
+ .init = noqueue_init,
.enqueue = noop_enqueue,
.dequeue = noop_dequeue,
.peek = noop_dequeue,
.owner = THIS_MODULE,
};
-static struct Qdisc noqueue_qdisc;
-static struct netdev_queue noqueue_netdev_queue = {
- .qdisc = &noqueue_qdisc,
- .qdisc_sleeping = &noqueue_qdisc,
-};
-
-static struct Qdisc noqueue_qdisc = {
- .enqueue = NULL,
- .dequeue = noop_dequeue,
- .flags = TCQ_F_BUILTIN,
- .ops = &noqueue_qdisc_ops,
- .list = LIST_HEAD_INIT(noqueue_qdisc.list),
- .q.lock = __SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock),
- .dev_queue = &noqueue_netdev_queue,
- .busylock = __SPIN_LOCK_UNLOCKED(noqueue_qdisc.busylock),
-};
-
-
static const u8 prio2band[TC_PRIO_MAX + 1] = {
1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1
};
@@ -666,8 +658,10 @@ static void qdisc_rcu_free(struct rcu_head *head)
{
struct Qdisc *qdisc = container_of(head, struct Qdisc, rcu_head);
- if (qdisc_is_percpu_stats(qdisc))
+ if (qdisc_is_percpu_stats(qdisc)) {
free_percpu(qdisc->cpu_bstats);
+ free_percpu(qdisc->cpu_qstats);
+ }
kfree((char *) qdisc - qdisc->padded);
}
@@ -733,18 +727,19 @@ static void attach_one_default_qdisc(struct net_device *dev,
struct netdev_queue *dev_queue,
void *_unused)
{
- struct Qdisc *qdisc = &noqueue_qdisc;
+ struct Qdisc *qdisc;
+ const struct Qdisc_ops *ops = default_qdisc_ops;
- if (dev->tx_queue_len) {
- qdisc = qdisc_create_dflt(dev_queue,
- default_qdisc_ops, TC_H_ROOT);
- if (!qdisc) {
- netdev_info(dev, "activation failed\n");
- return;
- }
- if (!netif_is_multiqueue(dev))
- qdisc->flags |= TCQ_F_ONETXQUEUE;
+ if (dev->priv_flags & IFF_NO_QUEUE)
+ ops = &noqueue_qdisc_ops;
+
+ qdisc = qdisc_create_dflt(dev_queue, ops, TC_H_ROOT);
+ if (!qdisc) {
+ netdev_info(dev, "activation failed\n");
+ return;
}
+ if (!netif_is_multiqueue(dev))
+ qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
dev_queue->qdisc_sleeping = qdisc;
}
@@ -755,7 +750,8 @@ static void attach_default_qdiscs(struct net_device *dev)
txq = netdev_get_tx_queue(dev, 0);
- if (!netif_is_multiqueue(dev) || dev->tx_queue_len == 0) {
+ if (!netif_is_multiqueue(dev) ||
+ dev->priv_flags & IFF_NO_QUEUE) {
netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL);
dev->qdisc = txq->qdisc_sleeping;
atomic_inc(&dev->qdisc->refcnt);
@@ -779,7 +775,7 @@ static void transition_one_qdisc(struct net_device *dev,
clear_bit(__QDISC_STATE_DEACTIVATED, &new_qdisc->state);
rcu_assign_pointer(dev_queue->qdisc, new_qdisc);
- if (need_watchdog_p && new_qdisc != &noqueue_qdisc) {
+ if (need_watchdog_p) {
dev_queue->trans_start = 0;
*need_watchdog_p = 1;
}
diff --git a/kernel/net/sched/sch_gred.c b/kernel/net/sched/sch_gred.c
index 634529e0c..80105109f 100644
--- a/kernel/net/sched/sch_gred.c
+++ b/kernel/net/sched/sch_gred.c
@@ -165,7 +165,8 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch)
* if no default DP has been configured. This
* allows for DP flows to be left untouched.
*/
- if (skb_queue_len(&sch->q) < qdisc_dev(sch)->tx_queue_len)
+ if (likely(sch->qstats.backlog + qdisc_pkt_len(skb) <=
+ sch->limit))
return qdisc_enqueue_tail(skb, sch);
else
goto drop;
@@ -397,7 +398,10 @@ static inline int gred_change_vq(struct Qdisc *sch, int dp,
q->DP = dp;
q->prio = prio;
- q->limit = ctl->limit;
+ if (ctl->limit > sch->limit)
+ q->limit = sch->limit;
+ else
+ q->limit = ctl->limit;
if (q->backlog == 0)
red_end_of_idle_period(&q->vars);
@@ -414,6 +418,7 @@ static const struct nla_policy gred_policy[TCA_GRED_MAX + 1] = {
[TCA_GRED_STAB] = { .len = 256 },
[TCA_GRED_DPS] = { .len = sizeof(struct tc_gred_sopt) },
[TCA_GRED_MAX_P] = { .type = NLA_U32 },
+ [TCA_GRED_LIMIT] = { .type = NLA_U32 },
};
static int gred_change(struct Qdisc *sch, struct nlattr *opt)
@@ -433,11 +438,15 @@ static int gred_change(struct Qdisc *sch, struct nlattr *opt)
if (err < 0)
return err;
- if (tb[TCA_GRED_PARMS] == NULL && tb[TCA_GRED_STAB] == NULL)
+ if (tb[TCA_GRED_PARMS] == NULL && tb[TCA_GRED_STAB] == NULL) {
+ if (tb[TCA_GRED_LIMIT] != NULL)
+ sch->limit = nla_get_u32(tb[TCA_GRED_LIMIT]);
return gred_change_table_def(sch, opt);
+ }
if (tb[TCA_GRED_PARMS] == NULL ||
- tb[TCA_GRED_STAB] == NULL)
+ tb[TCA_GRED_STAB] == NULL ||
+ tb[TCA_GRED_LIMIT] != NULL)
return -EINVAL;
max_P = tb[TCA_GRED_MAX_P] ? nla_get_u32(tb[TCA_GRED_MAX_P]) : 0;
@@ -501,6 +510,12 @@ static int gred_init(struct Qdisc *sch, struct nlattr *opt)
if (tb[TCA_GRED_PARMS] || tb[TCA_GRED_STAB])
return -EINVAL;
+ if (tb[TCA_GRED_LIMIT])
+ sch->limit = nla_get_u32(tb[TCA_GRED_LIMIT]);
+ else
+ sch->limit = qdisc_dev(sch)->tx_queue_len
+ * psched_mtu(qdisc_dev(sch));
+
return gred_change_table_def(sch, tb[TCA_GRED_DPS]);
}
@@ -531,6 +546,9 @@ static int gred_dump(struct Qdisc *sch, struct sk_buff *skb)
if (nla_put(skb, TCA_GRED_MAX_P, sizeof(max_p), max_p))
goto nla_put_failure;
+ if (nla_put_u32(skb, TCA_GRED_LIMIT, sch->limit))
+ goto nla_put_failure;
+
parms = nla_nest_start(skb, TCA_GRED_PARMS);
if (parms == NULL)
goto nla_put_failure;
diff --git a/kernel/net/sched/sch_hfsc.c b/kernel/net/sched/sch_hfsc.c
index e6c7416d0..b7ebe2c87 100644
--- a/kernel/net/sched/sch_hfsc.c
+++ b/kernel/net/sched/sch_hfsc.c
@@ -1165,7 +1165,7 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
head = &q->root;
tcf = rcu_dereference_bh(q->root.filter_list);
- while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) {
+ while (tcf && (result = tc_classify(skb, tcf, &res, false)) >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
case TC_ACT_QUEUED:
diff --git a/kernel/net/sched/sch_hhf.c b/kernel/net/sched/sch_hhf.c
index 15d3aabfe..86b04e31e 100644
--- a/kernel/net/sched/sch_hhf.c
+++ b/kernel/net/sched/sch_hhf.c
@@ -9,7 +9,6 @@
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/vmalloc.h>
-#include <net/flow_keys.h>
#include <net/pkt_sched.h>
#include <net/sock.h>
@@ -176,22 +175,6 @@ static u32 hhf_time_stamp(void)
return jiffies;
}
-static unsigned int skb_hash(const struct hhf_sched_data *q,
- const struct sk_buff *skb)
-{
- struct flow_keys keys;
- unsigned int hash;
-
- if (skb->sk && skb->sk->sk_hash)
- return skb->sk->sk_hash;
-
- skb_flow_dissect(skb, &keys);
- hash = jhash_3words((__force u32)keys.dst,
- (__force u32)keys.src ^ keys.ip_proto,
- (__force u32)keys.ports, q->perturbation);
- return hash;
-}
-
/* Looks up a heavy-hitter flow in a chaining list of table T. */
static struct hh_flow_state *seek_list(const u32 hash,
struct list_head *head,
@@ -280,7 +263,7 @@ static enum wdrr_bucket_idx hhf_classify(struct sk_buff *skb, struct Qdisc *sch)
}
/* Get hashed flow-id of the skb. */
- hash = skb_hash(q, skb);
+ hash = skb_get_hash_perturb(skb, q->perturbation);
/* Check if this packet belongs to an already established HH flow. */
flow_pos = hash & HHF_BIT_MASK;
@@ -385,6 +368,15 @@ static unsigned int hhf_drop(struct Qdisc *sch)
return bucket - q->buckets;
}
+static unsigned int hhf_qdisc_drop(struct Qdisc *sch)
+{
+ unsigned int prev_backlog;
+
+ prev_backlog = sch->qstats.backlog;
+ hhf_drop(sch);
+ return prev_backlog - sch->qstats.backlog;
+}
+
static int hhf_enqueue(struct sk_buff *skb, struct Qdisc *sch)
{
struct hhf_sched_data *q = qdisc_priv(sch);
@@ -713,7 +705,7 @@ static struct Qdisc_ops hhf_qdisc_ops __read_mostly = {
.enqueue = hhf_enqueue,
.dequeue = hhf_dequeue,
.peek = qdisc_peek_dequeued,
- .drop = hhf_drop,
+ .drop = hhf_qdisc_drop,
.init = hhf_init,
.reset = hhf_reset,
.destroy = hhf_destroy,
diff --git a/kernel/net/sched/sch_htb.c b/kernel/net/sched/sch_htb.c
index f1acb0f60..15ccd7f8f 100644
--- a/kernel/net/sched/sch_htb.c
+++ b/kernel/net/sched/sch_htb.c
@@ -229,7 +229,7 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
}
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
- while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) {
+ while (tcf && (result = tc_classify(skb, tcf, &res, false)) >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
case TC_ACT_QUEUED:
@@ -1048,11 +1048,9 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt)
if (tb[TCA_HTB_DIRECT_QLEN])
q->direct_qlen = nla_get_u32(tb[TCA_HTB_DIRECT_QLEN]);
- else {
+ else
q->direct_qlen = qdisc_dev(sch)->tx_queue_len;
- if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */
- q->direct_qlen = 2;
- }
+
if ((q->rate2quantum = gopt->rate2quantum) < 1)
q->rate2quantum = 1;
q->defcls = gopt->defcls;
diff --git a/kernel/net/sched/sch_ingress.c b/kernel/net/sched/sch_ingress.c
index 4cdbfb856..e7c648fa9 100644
--- a/kernel/net/sched/sch_ingress.c
+++ b/kernel/net/sched/sch_ingress.c
@@ -12,16 +12,10 @@
#include <linux/list.h>
#include <linux/skbuff.h>
#include <linux/rtnetlink.h>
+
#include <net/netlink.h>
#include <net/pkt_sched.h>
-
-struct ingress_qdisc_data {
- struct tcf_proto __rcu *filter_list;
-};
-
-/* ------------------------- Class/flow operations ------------------------- */
-
static struct Qdisc *ingress_leaf(struct Qdisc *sch, unsigned long arg)
{
return NULL;
@@ -49,57 +43,24 @@ static void ingress_walk(struct Qdisc *sch, struct qdisc_walker *walker)
static struct tcf_proto __rcu **ingress_find_tcf(struct Qdisc *sch,
unsigned long cl)
{
- struct ingress_qdisc_data *p = qdisc_priv(sch);
-
- return &p->filter_list;
-}
-
-/* --------------------------- Qdisc operations ---------------------------- */
+ struct net_device *dev = qdisc_dev(sch);
-static int ingress_enqueue(struct sk_buff *skb, struct Qdisc *sch)
-{
- struct ingress_qdisc_data *p = qdisc_priv(sch);
- struct tcf_result res;
- struct tcf_proto *fl = rcu_dereference_bh(p->filter_list);
- int result;
-
- result = tc_classify(skb, fl, &res);
-
- qdisc_bstats_update(sch, skb);
- switch (result) {
- case TC_ACT_SHOT:
- result = TC_ACT_SHOT;
- qdisc_qstats_drop(sch);
- break;
- case TC_ACT_STOLEN:
- case TC_ACT_QUEUED:
- result = TC_ACT_STOLEN;
- break;
- case TC_ACT_RECLASSIFY:
- case TC_ACT_OK:
- skb->tc_index = TC_H_MIN(res.classid);
- default:
- result = TC_ACT_OK;
- break;
- }
-
- return result;
+ return &dev->ingress_cl_list;
}
-/* ------------------------------------------------------------- */
-
static int ingress_init(struct Qdisc *sch, struct nlattr *opt)
{
net_inc_ingress_queue();
+ sch->flags |= TCQ_F_CPUSTATS;
return 0;
}
static void ingress_destroy(struct Qdisc *sch)
{
- struct ingress_qdisc_data *p = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
- tcf_destroy_chain(&p->filter_list);
+ tcf_destroy_chain(&dev->ingress_cl_list);
net_dec_ingress_queue();
}
@@ -110,6 +71,7 @@ static int ingress_dump(struct Qdisc *sch, struct sk_buff *skb)
nest = nla_nest_start(skb, TCA_OPTIONS);
if (nest == NULL)
goto nla_put_failure;
+
return nla_nest_end(skb, nest);
nla_put_failure:
@@ -130,8 +92,6 @@ static const struct Qdisc_class_ops ingress_class_ops = {
static struct Qdisc_ops ingress_qdisc_ops __read_mostly = {
.cl_ops = &ingress_class_ops,
.id = "ingress",
- .priv_size = sizeof(struct ingress_qdisc_data),
- .enqueue = ingress_enqueue,
.init = ingress_init,
.destroy = ingress_destroy,
.dump = ingress_dump,
@@ -148,6 +108,7 @@ static void __exit ingress_module_exit(void)
unregister_qdisc(&ingress_qdisc_ops);
}
-module_init(ingress_module_init)
-module_exit(ingress_module_exit)
+module_init(ingress_module_init);
+module_exit(ingress_module_exit);
+
MODULE_LICENSE("GPL");
diff --git a/kernel/net/sched/sch_mq.c b/kernel/net/sched/sch_mq.c
index f3cbaecd2..3e82f047c 100644
--- a/kernel/net/sched/sch_mq.c
+++ b/kernel/net/sched/sch_mq.c
@@ -63,7 +63,7 @@ static int mq_init(struct Qdisc *sch, struct nlattr *opt)
if (qdisc == NULL)
goto err;
priv->qdiscs[ntx] = qdisc;
- qdisc->flags |= TCQ_F_ONETXQUEUE;
+ qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
}
sch->flags |= TCQ_F_MQROOT;
@@ -156,7 +156,7 @@ static int mq_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new,
*old = dev_graft_qdisc(dev_queue, new);
if (new)
- new->flags |= TCQ_F_ONETXQUEUE;
+ new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
if (dev->flags & IFF_UP)
dev_activate(dev);
return 0;
diff --git a/kernel/net/sched/sch_mqprio.c b/kernel/net/sched/sch_mqprio.c
index 3811a7454..ad70ecf57 100644
--- a/kernel/net/sched/sch_mqprio.c
+++ b/kernel/net/sched/sch_mqprio.c
@@ -132,7 +132,7 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
goto err;
}
priv->qdiscs[i] = qdisc;
- qdisc->flags |= TCQ_F_ONETXQUEUE;
+ qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
}
/* If the mqprio options indicate that hardware should own
@@ -209,7 +209,7 @@ static int mqprio_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new,
*old = dev_graft_qdisc(dev_queue, new);
if (new)
- new->flags |= TCQ_F_ONETXQUEUE;
+ new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
if (dev->flags & IFF_UP)
dev_activate(dev);
diff --git a/kernel/net/sched/sch_multiq.c b/kernel/net/sched/sch_multiq.c
index 42dd21887..4e904ca0a 100644
--- a/kernel/net/sched/sch_multiq.c
+++ b/kernel/net/sched/sch_multiq.c
@@ -46,7 +46,7 @@ multiq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
int err;
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
- err = tc_classify(skb, fl, &res);
+ err = tc_classify(skb, fl, &res, false);
#ifdef CONFIG_NET_CLS_ACT
switch (err) {
case TC_ACT_STOLEN:
diff --git a/kernel/net/sched/sch_netem.c b/kernel/net/sched/sch_netem.c
index 956ead2ca..5abd1d9de 100644
--- a/kernel/net/sched/sch_netem.c
+++ b/kernel/net/sched/sch_netem.c
@@ -440,9 +440,9 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) {
struct Qdisc *rootq = qdisc_root(sch);
u32 dupsave = q->duplicate; /* prevent duplicating a dup... */
- q->duplicate = 0;
- qdisc_enqueue_root(skb2, rootq);
+ q->duplicate = 0;
+ rootq->enqueue(skb2, rootq);
q->duplicate = dupsave;
}
diff --git a/kernel/net/sched/sch_plug.c b/kernel/net/sched/sch_plug.c
index 89f8fcf73..5abfe4467 100644
--- a/kernel/net/sched/sch_plug.c
+++ b/kernel/net/sched/sch_plug.c
@@ -130,12 +130,8 @@ static int plug_init(struct Qdisc *sch, struct nlattr *opt)
q->unplug_indefinite = false;
if (opt == NULL) {
- /* We will set a default limit of 100 pkts (~150kB)
- * in case tx_queue_len is not available. The
- * default value is completely arbitrary.
- */
- u32 pkt_limit = qdisc_dev(sch)->tx_queue_len ? : 100;
- q->limit = pkt_limit * psched_mtu(qdisc_dev(sch));
+ q->limit = qdisc_dev(sch)->tx_queue_len
+ * psched_mtu(qdisc_dev(sch));
} else {
struct tc_plug_qopt *ctl = nla_data(opt);
@@ -216,6 +212,7 @@ static struct Qdisc_ops plug_qdisc_ops __read_mostly = {
.peek = qdisc_peek_head,
.init = plug_init,
.change = plug_change,
+ .reset = qdisc_reset_queue,
.owner = THIS_MODULE,
};
diff --git a/kernel/net/sched/sch_prio.c b/kernel/net/sched/sch_prio.c
index 8e5cd34aa..ba6487f27 100644
--- a/kernel/net/sched/sch_prio.c
+++ b/kernel/net/sched/sch_prio.c
@@ -42,7 +42,7 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
if (TC_H_MAJ(skb->priority) != sch->handle) {
fl = rcu_dereference_bh(q->filter_list);
- err = tc_classify(skb, fl, &res);
+ err = tc_classify(skb, fl, &res, false);
#ifdef CONFIG_NET_CLS_ACT
switch (err) {
case TC_ACT_STOLEN:
diff --git a/kernel/net/sched/sch_qfq.c b/kernel/net/sched/sch_qfq.c
index 3ec7e88a4..3dc3a6e56 100644
--- a/kernel/net/sched/sch_qfq.c
+++ b/kernel/net/sched/sch_qfq.c
@@ -186,7 +186,6 @@ struct qfq_sched {
u64 oldV, V; /* Precise virtual times. */
struct qfq_aggregate *in_serv_agg; /* Aggregate being served. */
- u32 num_active_agg; /* Num. of active aggregates */
u32 wsum; /* weight sum */
u32 iwsum; /* inverse weight sum */
@@ -339,8 +338,7 @@ static struct qfq_aggregate *qfq_choose_next_agg(struct qfq_sched *);
static void qfq_destroy_agg(struct qfq_sched *q, struct qfq_aggregate *agg)
{
- if (!hlist_unhashed(&agg->nonfull_next))
- hlist_del_init(&agg->nonfull_next);
+ hlist_del_init(&agg->nonfull_next);
q->wsum -= agg->class_weight;
if (q->wsum != 0)
q->iwsum = ONE_FP / q->wsum;
@@ -719,7 +717,7 @@ static struct qfq_class *qfq_classify(struct sk_buff *skb, struct Qdisc *sch,
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
fl = rcu_dereference_bh(q->filter_list);
- result = tc_classify(skb, fl, &res);
+ result = tc_classify(skb, fl, &res, false);
if (result >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
diff --git a/kernel/net/sched/sch_sfb.c b/kernel/net/sched/sch_sfb.c
index 5819dd826..5bbb6332e 100644
--- a/kernel/net/sched/sch_sfb.c
+++ b/kernel/net/sched/sch_sfb.c
@@ -26,7 +26,6 @@
#include <net/ip.h>
#include <net/pkt_sched.h>
#include <net/inet_ecn.h>
-#include <net/flow_keys.h>
/*
* SFB uses two B[l][n] : L x N arrays of bins (L levels, N bins per level)
@@ -259,7 +258,7 @@ static bool sfb_classify(struct sk_buff *skb, struct tcf_proto *fl,
struct tcf_result res;
int result;
- result = tc_classify(skb, fl, &res);
+ result = tc_classify(skb, fl, &res, false);
if (result >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
@@ -285,9 +284,9 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
int i;
u32 p_min = ~0;
u32 minqlen = ~0;
- u32 r, slot, salt, sfbhash;
+ u32 r, sfbhash;
+ u32 slot = q->slot;
int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
- struct flow_keys keys;
if (unlikely(sch->q.qlen >= q->limit)) {
qdisc_qstats_overlimit(sch);
@@ -309,22 +308,17 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
fl = rcu_dereference_bh(q->filter_list);
if (fl) {
+ u32 salt;
+
/* If using external classifiers, get result and record it. */
if (!sfb_classify(skb, fl, &ret, &salt))
goto other_drop;
- keys.src = salt;
- keys.dst = 0;
- keys.ports = 0;
+ sfbhash = jhash_1word(salt, q->bins[slot].perturbation);
} else {
- skb_flow_dissect(skb, &keys);
+ sfbhash = skb_get_hash_perturb(skb, q->bins[slot].perturbation);
}
- slot = q->slot;
- sfbhash = jhash_3words((__force u32)keys.dst,
- (__force u32)keys.src,
- (__force u32)keys.ports,
- q->bins[slot].perturbation);
if (!sfbhash)
sfbhash = 1;
sfb_skb_cb(skb)->hashes[slot] = sfbhash;
@@ -356,10 +350,8 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
if (unlikely(p_min >= SFB_MAX_PROB)) {
/* Inelastic flow */
if (q->double_buffering) {
- sfbhash = jhash_3words((__force u32)keys.dst,
- (__force u32)keys.src,
- (__force u32)keys.ports,
- q->bins[slot].perturbation);
+ sfbhash = skb_get_hash_perturb(skb,
+ q->bins[slot].perturbation);
if (!sfbhash)
sfbhash = 1;
sfb_skb_cb(skb)->hashes[slot] = sfbhash;
@@ -510,7 +502,7 @@ static int sfb_change(struct Qdisc *sch, struct nlattr *opt)
limit = ctl->limit;
if (limit == 0)
- limit = max_t(u32, qdisc_dev(sch)->tx_queue_len, 1);
+ limit = qdisc_dev(sch)->tx_queue_len;
child = fifo_create_dflt(sch, &pfifo_qdisc_ops, limit);
if (IS_ERR(child))
diff --git a/kernel/net/sched/sch_sfq.c b/kernel/net/sched/sch_sfq.c
index b877140be..3abab534e 100644
--- a/kernel/net/sched/sch_sfq.c
+++ b/kernel/net/sched/sch_sfq.c
@@ -23,7 +23,6 @@
#include <linux/vmalloc.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
-#include <net/flow_keys.h>
#include <net/red.h>
@@ -156,30 +155,10 @@ static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index
return &q->dep[val - SFQ_MAX_FLOWS];
}
-/*
- * In order to be able to quickly rehash our queue when timer changes
- * q->perturbation, we store flow_keys in skb->cb[]
- */
-struct sfq_skb_cb {
- struct flow_keys keys;
-};
-
-static inline struct sfq_skb_cb *sfq_skb_cb(const struct sk_buff *skb)
-{
- qdisc_cb_private_validate(skb, sizeof(struct sfq_skb_cb));
- return (struct sfq_skb_cb *)qdisc_skb_cb(skb)->data;
-}
-
static unsigned int sfq_hash(const struct sfq_sched_data *q,
const struct sk_buff *skb)
{
- const struct flow_keys *keys = &sfq_skb_cb(skb)->keys;
- unsigned int hash;
-
- hash = jhash_3words((__force u32)keys->dst,
- (__force u32)keys->src ^ keys->ip_proto,
- (__force u32)keys->ports, q->perturbation);
- return hash & (q->divisor - 1);
+ return skb_get_hash_perturb(skb, q->perturbation) & (q->divisor - 1);
}
static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
@@ -196,13 +175,11 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
return TC_H_MIN(skb->priority);
fl = rcu_dereference_bh(q->filter_list);
- if (!fl) {
- skb_flow_dissect(skb, &sfq_skb_cb(skb)->keys);
+ if (!fl)
return sfq_hash(q, skb) + 1;
- }
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
- result = tc_classify(skb, fl, &res);
+ result = tc_classify(skb, fl, &res, false);
if (result >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
@@ -329,10 +306,10 @@ drop:
len = qdisc_pkt_len(skb);
slot->backlog -= len;
sfq_dec(q, x);
- kfree_skb(skb);
sch->q.qlen--;
qdisc_qstats_drop(sch);
qdisc_qstats_backlog_dec(sch, skb);
+ kfree_skb(skb);
return len;
}