From e09b41010ba33a20a87472ee821fa407a5b8da36 Mon Sep 17 00:00:00 2001 From: José Pekkarinen Date: Mon, 11 Apr 2016 10:41:07 +0300 Subject: These changes are the raw update to linux-4.4.6-rt14. Kernel sources are taken from kernel.org, and rt patch from the rt wiki download page. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During the rebasing, the following patch collided: Force tick interrupt and get rid of softirq magic(I70131fb85). Collisions have been removed because its logic was found on the source already. Change-Id: I7f57a4081d9deaa0d9ccfc41a6c8daccdee3b769 Signed-off-by: José Pekkarinen --- kernel/net/netfilter/ipset/ip_set_list_set.c | 427 +++++++++++++-------------- 1 file changed, 202 insertions(+), 225 deletions(-) (limited to 'kernel/net/netfilter/ipset/ip_set_list_set.c') diff --git a/kernel/net/netfilter/ipset/ip_set_list_set.c b/kernel/net/netfilter/ipset/ip_set_list_set.c index f8f682806..bbede95c9 100644 --- a/kernel/net/netfilter/ipset/ip_set_list_set.c +++ b/kernel/net/netfilter/ipset/ip_set_list_set.c @@ -9,6 +9,7 @@ #include #include +#include #include #include @@ -27,8 +28,10 @@ MODULE_ALIAS("ip_set_list:set"); /* Member elements */ struct set_elem { + struct rcu_head rcu; + struct list_head list; ip_set_id_t id; -}; +} __aligned(__alignof__(u64)); struct set_adt_elem { ip_set_id_t id; @@ -41,12 +44,9 @@ struct list_set { u32 size; /* size of set list array */ struct timer_list gc; /* garbage collection */ struct net *net; /* namespace */ - struct set_elem members[0]; /* the set members */ + struct list_head members; /* the set members */ }; -#define list_set_elem(set, map, id) \ - (struct set_elem *)((void *)(map)->members + (id) * (set)->dsize) - static int list_set_ktest(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, @@ -54,17 +54,14 @@ list_set_ktest(struct ip_set *set, const struct sk_buff *skb, { struct list_set *map = set->data; struct set_elem *e; - u32 i, cmdflags = opt->cmdflags; + u32 cmdflags = opt->cmdflags; int ret; /* Don't lookup sub-counters at all */ opt->cmdflags &= ~IPSET_FLAG_MATCH_COUNTERS; if (opt->cmdflags & IPSET_FLAG_SKIP_SUBCOUNTER_UPDATE) opt->cmdflags &= ~IPSET_FLAG_SKIP_COUNTER_UPDATE; - for (i = 0; i < map->size; i++) { - e = list_set_elem(set, map, i); - if (e->id == IPSET_INVALID_ID) - return 0; + list_for_each_entry_rcu(e, &map->members, list) { if (SET_WITH_TIMEOUT(set) && ip_set_timeout_expired(ext_timeout(e, set))) continue; @@ -91,13 +88,9 @@ list_set_kadd(struct ip_set *set, const struct sk_buff *skb, { struct list_set *map = set->data; struct set_elem *e; - u32 i; int ret; - for (i = 0; i < map->size; i++) { - e = list_set_elem(set, map, i); - if (e->id == IPSET_INVALID_ID) - return 0; + list_for_each_entry(e, &map->members, list) { if (SET_WITH_TIMEOUT(set) && ip_set_timeout_expired(ext_timeout(e, set))) continue; @@ -115,13 +108,9 @@ list_set_kdel(struct ip_set *set, const struct sk_buff *skb, { struct list_set *map = set->data; struct set_elem *e; - u32 i; int ret; - for (i = 0; i < map->size; i++) { - e = list_set_elem(set, map, i); - if (e->id == IPSET_INVALID_ID) - return 0; + list_for_each_entry(e, &map->members, list) { if (SET_WITH_TIMEOUT(set) && ip_set_timeout_expired(ext_timeout(e, set))) continue; @@ -138,110 +127,65 @@ list_set_kadt(struct ip_set *set, const struct sk_buff *skb, enum ipset_adt adt, struct ip_set_adt_opt *opt) { struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); + int ret = -EINVAL; + rcu_read_lock(); switch (adt) { case IPSET_TEST: - return list_set_ktest(set, skb, par, opt, &ext); + ret = list_set_ktest(set, skb, par, opt, &ext); + break; case IPSET_ADD: - return list_set_kadd(set, skb, par, opt, &ext); + ret = list_set_kadd(set, skb, par, opt, &ext); + break; case IPSET_DEL: - return list_set_kdel(set, skb, par, opt, &ext); + ret = list_set_kdel(set, skb, par, opt, &ext); + break; default: break; } - return -EINVAL; -} - -static bool -id_eq(const struct ip_set *set, u32 i, ip_set_id_t id) -{ - const struct list_set *map = set->data; - const struct set_elem *e; - - if (i >= map->size) - return 0; + rcu_read_unlock(); - e = list_set_elem(set, map, i); - return !!(e->id == id && - !(SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(e, set)))); + return ret; } -static int -list_set_add(struct ip_set *set, u32 i, struct set_adt_elem *d, - const struct ip_set_ext *ext) -{ - struct list_set *map = set->data; - struct set_elem *e = list_set_elem(set, map, i); +/* Userspace interfaces: we are protected by the nfnl mutex */ - if (e->id != IPSET_INVALID_ID) { - if (i == map->size - 1) { - /* Last element replaced: e.g. add new,before,last */ - ip_set_put_byindex(map->net, e->id); - ip_set_ext_destroy(set, e); - } else { - struct set_elem *x = list_set_elem(set, map, - map->size - 1); - - /* Last element pushed off */ - if (x->id != IPSET_INVALID_ID) { - ip_set_put_byindex(map->net, x->id); - ip_set_ext_destroy(set, x); - } - memmove(list_set_elem(set, map, i + 1), e, - set->dsize * (map->size - (i + 1))); - /* Extensions must be initialized to zero */ - memset(e, 0, set->dsize); - } - } - - e->id = d->id; - if (SET_WITH_TIMEOUT(set)) - ip_set_timeout_set(ext_timeout(e, set), ext->timeout); - if (SET_WITH_COUNTER(set)) - ip_set_init_counter(ext_counter(e, set), ext); - if (SET_WITH_COMMENT(set)) - ip_set_init_comment(ext_comment(e, set), ext); - if (SET_WITH_SKBINFO(set)) - ip_set_init_skbinfo(ext_skbinfo(e, set), ext); - return 0; -} - -static int -list_set_del(struct ip_set *set, u32 i) +static void +__list_set_del(struct ip_set *set, struct set_elem *e) { struct list_set *map = set->data; - struct set_elem *e = list_set_elem(set, map, i); ip_set_put_byindex(map->net, e->id); + /* We may call it, because we don't have a to be destroyed + * extension which is used by the kernel. + */ ip_set_ext_destroy(set, e); + kfree_rcu(e, rcu); +} - if (i < map->size - 1) - memmove(e, list_set_elem(set, map, i + 1), - set->dsize * (map->size - (i + 1))); +static inline void +list_set_del(struct ip_set *set, struct set_elem *e) +{ + list_del_rcu(&e->list); + __list_set_del(set, e); +} - /* Last element */ - e = list_set_elem(set, map, map->size - 1); - e->id = IPSET_INVALID_ID; - return 0; +static inline void +list_set_replace(struct ip_set *set, struct set_elem *e, struct set_elem *old) +{ + list_replace_rcu(&old->list, &e->list); + __list_set_del(set, old); } static void set_cleanup_entries(struct ip_set *set) { struct list_set *map = set->data; - struct set_elem *e; - u32 i = 0; + struct set_elem *e, *n; - while (i < map->size) { - e = list_set_elem(set, map, i); - if (e->id != IPSET_INVALID_ID && - ip_set_timeout_expired(ext_timeout(e, set))) - list_set_del(set, i); - /* Check element moved to position i in next loop */ - else - i++; - } + list_for_each_entry_safe(e, n, &map->members, list) + if (ip_set_timeout_expired(ext_timeout(e, set))) + list_set_del(set, e); } static int @@ -250,31 +194,46 @@ list_set_utest(struct ip_set *set, void *value, const struct ip_set_ext *ext, { struct list_set *map = set->data; struct set_adt_elem *d = value; - struct set_elem *e; - u32 i; + struct set_elem *e, *next, *prev = NULL; int ret; - for (i = 0; i < map->size; i++) { - e = list_set_elem(set, map, i); - if (e->id == IPSET_INVALID_ID) - return 0; - else if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(e, set))) + list_for_each_entry(e, &map->members, list) { + if (SET_WITH_TIMEOUT(set) && + ip_set_timeout_expired(ext_timeout(e, set))) continue; - else if (e->id != d->id) + else if (e->id != d->id) { + prev = e; continue; + } - if (d->before == 0) - return 1; - else if (d->before > 0) - ret = id_eq(set, i + 1, d->refid); - else - ret = i > 0 && id_eq(set, i - 1, d->refid); + if (d->before == 0) { + ret = 1; + } else if (d->before > 0) { + next = list_next_entry(e, list); + ret = !list_is_last(&e->list, &map->members) && + next->id == d->refid; + } else { + ret = prev && prev->id == d->refid; + } return ret; } return 0; } +static void +list_set_init_extensions(struct ip_set *set, const struct ip_set_ext *ext, + struct set_elem *e) +{ + if (SET_WITH_COUNTER(set)) + ip_set_init_counter(ext_counter(e, set), ext); + if (SET_WITH_COMMENT(set)) + ip_set_init_comment(ext_comment(e, set), ext); + if (SET_WITH_SKBINFO(set)) + ip_set_init_skbinfo(ext_skbinfo(e, set), ext); + /* Update timeout last */ + if (SET_WITH_TIMEOUT(set)) + ip_set_timeout_set(ext_timeout(e, set), ext->timeout); +} static int list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext, @@ -282,60 +241,78 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext, { struct list_set *map = set->data; struct set_adt_elem *d = value; - struct set_elem *e; + struct set_elem *e, *n, *prev, *next; bool flag_exist = flags & IPSET_FLAG_EXIST; - u32 i, ret = 0; if (SET_WITH_TIMEOUT(set)) set_cleanup_entries(set); - /* Check already added element */ - for (i = 0; i < map->size; i++) { - e = list_set_elem(set, map, i); - if (e->id == IPSET_INVALID_ID) - goto insert; - else if (e->id != d->id) + /* Find where to add the new entry */ + n = prev = next = NULL; + list_for_each_entry(e, &map->members, list) { + if (SET_WITH_TIMEOUT(set) && + ip_set_timeout_expired(ext_timeout(e, set))) continue; - - if ((d->before > 1 && !id_eq(set, i + 1, d->refid)) || - (d->before < 0 && - (i == 0 || !id_eq(set, i - 1, d->refid)))) - /* Before/after doesn't match */ + else if (d->id == e->id) + n = e; + else if (d->before == 0 || e->id != d->refid) + continue; + else if (d->before > 0) + next = e; + else + prev = e; + } + /* Re-add already existing element */ + if (n) { + if ((d->before > 0 && !next) || + (d->before < 0 && !prev)) return -IPSET_ERR_REF_EXIST; if (!flag_exist) - /* Can't re-add */ return -IPSET_ERR_EXIST; /* Update extensions */ - ip_set_ext_destroy(set, e); + ip_set_ext_destroy(set, n); + list_set_init_extensions(set, ext, n); - if (SET_WITH_TIMEOUT(set)) - ip_set_timeout_set(ext_timeout(e, set), ext->timeout); - if (SET_WITH_COUNTER(set)) - ip_set_init_counter(ext_counter(e, set), ext); - if (SET_WITH_COMMENT(set)) - ip_set_init_comment(ext_comment(e, set), ext); - if (SET_WITH_SKBINFO(set)) - ip_set_init_skbinfo(ext_skbinfo(e, set), ext); /* Set is already added to the list */ ip_set_put_byindex(map->net, d->id); return 0; } -insert: - ret = -IPSET_ERR_LIST_FULL; - for (i = 0; i < map->size && ret == -IPSET_ERR_LIST_FULL; i++) { - e = list_set_elem(set, map, i); - if (e->id == IPSET_INVALID_ID) - ret = d->before != 0 ? -IPSET_ERR_REF_EXIST - : list_set_add(set, i, d, ext); - else if (e->id != d->refid) - continue; - else if (d->before > 0) - ret = list_set_add(set, i, d, ext); - else if (i + 1 < map->size) - ret = list_set_add(set, i + 1, d, ext); + /* Add new entry */ + if (d->before == 0) { + /* Append */ + n = list_empty(&map->members) ? NULL : + list_last_entry(&map->members, struct set_elem, list); + } else if (d->before > 0) { + /* Insert after next element */ + if (!list_is_last(&next->list, &map->members)) + n = list_next_entry(next, list); + } else { + /* Insert before prev element */ + if (prev->list.prev != &map->members) + n = list_prev_entry(prev, list); } + /* Can we replace a timed out entry? */ + if (n && + !(SET_WITH_TIMEOUT(set) && + ip_set_timeout_expired(ext_timeout(n, set)))) + n = NULL; + + e = kzalloc(set->dsize, GFP_ATOMIC); + if (!e) + return -ENOMEM; + e->id = d->id; + INIT_LIST_HEAD(&e->list); + list_set_init_extensions(set, ext, e); + if (n) + list_set_replace(set, e, n); + else if (next) + list_add_tail_rcu(&e->list, &next->list); + else if (prev) + list_add_rcu(&e->list, &prev->list); + else + list_add_tail_rcu(&e->list, &map->members); - return ret; + return 0; } static int @@ -344,32 +321,30 @@ list_set_udel(struct ip_set *set, void *value, const struct ip_set_ext *ext, { struct list_set *map = set->data; struct set_adt_elem *d = value; - struct set_elem *e; - u32 i; - - for (i = 0; i < map->size; i++) { - e = list_set_elem(set, map, i); - if (e->id == IPSET_INVALID_ID) - return d->before != 0 ? -IPSET_ERR_REF_EXIST - : -IPSET_ERR_EXIST; - else if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(e, set))) + struct set_elem *e, *next, *prev = NULL; + + list_for_each_entry(e, &map->members, list) { + if (SET_WITH_TIMEOUT(set) && + ip_set_timeout_expired(ext_timeout(e, set))) continue; - else if (e->id != d->id) + else if (e->id != d->id) { + prev = e; continue; + } - if (d->before == 0) - return list_set_del(set, i); - else if (d->before > 0) { - if (!id_eq(set, i + 1, d->refid)) + if (d->before > 0) { + next = list_next_entry(e, list); + if (list_is_last(&e->list, &map->members) || + next->id != d->refid) return -IPSET_ERR_REF_EXIST; - return list_set_del(set, i); - } else if (i == 0 || !id_eq(set, i - 1, d->refid)) - return -IPSET_ERR_REF_EXIST; - else - return list_set_del(set, i); + } else if (d->before < 0) { + if (!prev || prev->id != d->refid) + return -IPSET_ERR_REF_EXIST; + } + list_set_del(set, e); + return 0; } - return -IPSET_ERR_EXIST; + return d->before != 0 ? -IPSET_ERR_REF_EXIST : -IPSET_ERR_EXIST; } static int @@ -383,19 +358,13 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[], struct ip_set *s; int ret = 0; - if (unlikely(!tb[IPSET_ATTR_NAME] || - !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) || - !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE))) - return -IPSET_ERR_PROTOCOL; - if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + if (unlikely(!tb[IPSET_ATTR_NAME] || + !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) + return -IPSET_ERR_PROTOCOL; + ret = ip_set_get_extensions(set, tb, &ext); if (ret) return ret; @@ -410,6 +379,7 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_CADT_FLAGS]) { u32 f = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); + e.before = f & IPSET_FLAG_BEFORE; } @@ -447,27 +417,26 @@ static void list_set_flush(struct ip_set *set) { struct list_set *map = set->data; - struct set_elem *e; - u32 i; - - for (i = 0; i < map->size; i++) { - e = list_set_elem(set, map, i); - if (e->id != IPSET_INVALID_ID) { - ip_set_put_byindex(map->net, e->id); - ip_set_ext_destroy(set, e); - e->id = IPSET_INVALID_ID; - } - } + struct set_elem *e, *n; + + list_for_each_entry_safe(e, n, &map->members, list) + list_set_del(set, e); } static void list_set_destroy(struct ip_set *set) { struct list_set *map = set->data; + struct set_elem *e, *n; if (SET_WITH_TIMEOUT(set)) del_timer_sync(&map->gc); - list_set_flush(set); + list_for_each_entry_safe(e, n, &map->members, list) { + list_del(&e->list); + ip_set_put_byindex(map->net, e->id); + ip_set_ext_destroy(set, e); + kfree(e); + } kfree(map); set->data = NULL; @@ -478,6 +447,11 @@ list_set_head(struct ip_set *set, struct sk_buff *skb) { const struct list_set *map = set->data; struct nlattr *nested; + struct set_elem *e; + u32 n = 0; + + list_for_each_entry(e, &map->members, list) + n++; nested = ipset_nest_start(skb, IPSET_ATTR_DATA); if (!nested) @@ -485,7 +459,7 @@ list_set_head(struct ip_set *set, struct sk_buff *skb) if (nla_put_net32(skb, IPSET_ATTR_SIZE, htonl(map->size)) || nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) || nla_put_net32(skb, IPSET_ATTR_MEMSIZE, - htonl(sizeof(*map) + map->size * set->dsize))) + htonl(sizeof(*map) + n * set->dsize))) goto nla_put_failure; if (unlikely(ip_set_put_flags(skb, set))) goto nla_put_failure; @@ -502,18 +476,22 @@ list_set_list(const struct ip_set *set, { const struct list_set *map = set->data; struct nlattr *atd, *nested; - u32 i, first = cb->args[IPSET_CB_ARG0]; - const struct set_elem *e; + u32 i = 0, first = cb->args[IPSET_CB_ARG0]; + struct set_elem *e; + int ret = 0; atd = ipset_nest_start(skb, IPSET_ATTR_ADT); if (!atd) return -EMSGSIZE; - for (; cb->args[IPSET_CB_ARG0] < map->size; - cb->args[IPSET_CB_ARG0]++) { - i = cb->args[IPSET_CB_ARG0]; - e = list_set_elem(set, map, i); - if (e->id == IPSET_INVALID_ID) - goto finish; + list_for_each_entry(e, &map->members, list) { + if (i == first) + break; + i++; + } + + rcu_read_lock(); + list_for_each_entry_from(e, &map->members, list) { + i++; if (SET_WITH_TIMEOUT(set) && ip_set_timeout_expired(ext_timeout(e, set))) continue; @@ -521,9 +499,10 @@ list_set_list(const struct ip_set *set, if (!nested) { if (i == first) { nla_nest_cancel(skb, atd); - return -EMSGSIZE; - } else - goto nla_put_failure; + ret = -EMSGSIZE; + goto out; + } + goto nla_put_failure; } if (nla_put_string(skb, IPSET_ATTR_NAME, ip_set_name_byindex(map->net, e->id))) @@ -532,20 +511,23 @@ list_set_list(const struct ip_set *set, goto nla_put_failure; ipset_nest_end(skb, nested); } -finish: + ipset_nest_end(skb, atd); /* Set listing finished */ cb->args[IPSET_CB_ARG0] = 0; - return 0; + goto out; nla_put_failure: nla_nest_cancel(skb, nested); if (unlikely(i == first)) { cb->args[IPSET_CB_ARG0] = 0; - return -EMSGSIZE; + ret = -EMSGSIZE; } + cb->args[IPSET_CB_ARG0] = i - 1; ipset_nest_end(skb, atd); - return 0; +out: + rcu_read_unlock(); + return ret; } static bool @@ -577,12 +559,12 @@ static const struct ip_set_type_variant set_variant = { static void list_set_gc(unsigned long ul_set) { - struct ip_set *set = (struct ip_set *) ul_set; + struct ip_set *set = (struct ip_set *)ul_set; struct list_set *map = set->data; - write_lock_bh(&set->lock); + spin_lock_bh(&set->lock); set_cleanup_entries(set); - write_unlock_bh(&set->lock); + spin_unlock_bh(&set->lock); map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ; add_timer(&map->gc); @@ -594,7 +576,7 @@ list_set_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set)) struct list_set *map = set->data; init_timer(&map->gc); - map->gc.data = (unsigned long) set; + map->gc.data = (unsigned long)set; map->gc.function = gc; map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ; add_timer(&map->gc); @@ -606,24 +588,16 @@ static bool init_list_set(struct net *net, struct ip_set *set, u32 size) { struct list_set *map; - struct set_elem *e; - u32 i; - map = kzalloc(sizeof(*map) + - min_t(u32, size, IP_SET_LIST_MAX_SIZE) * set->dsize, - GFP_KERNEL); + map = kzalloc(sizeof(*map), GFP_KERNEL); if (!map) return false; map->size = size; map->net = net; + INIT_LIST_HEAD(&map->members); set->data = map; - for (i = 0; i < size; i++) { - e = list_set_elem(set, map, i); - e->id = IPSET_INVALID_ID; - } - return true; } @@ -644,7 +618,8 @@ list_set_create(struct net *net, struct ip_set *set, struct nlattr *tb[], size = IP_SET_LIST_MIN_SIZE; set->variant = &set_variant; - set->dsize = ip_set_elem_len(set, tb, sizeof(struct set_elem)); + set->dsize = ip_set_elem_len(set, tb, sizeof(struct set_elem), + __alignof__(struct set_elem)); if (!init_list_set(net, set, size)) return -ENOMEM; if (tb[IPSET_ATTR_TIMEOUT]) { @@ -678,7 +653,8 @@ static struct ip_set_type list_set_type __read_mostly = { [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, - [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, + [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING, + .len = IPSET_MAX_COMMENT_SIZE }, [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 }, [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 }, [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 }, @@ -695,6 +671,7 @@ list_set_init(void) static void __exit list_set_fini(void) { + rcu_barrier(); ip_set_type_unregister(&list_set_type); } -- cgit 1.2.3-korg