summaryrefslogtreecommitdiffstats
path: root/kernel/net/netfilter
diff options
context:
space:
mode:
authorYunhong Jiang <yunhong.jiang@linux.intel.com>2017-03-08 23:13:28 -0800
committerYunhong Jiang <yunhong.jiang@linux.intel.com>2017-03-08 23:36:15 -0800
commit52f993b8e89487ec9ee15a7fb4979e0f09a45b27 (patch)
treed65304486afe0bea4a311c783c0d72791c8c0aa2 /kernel/net/netfilter
parentc189ccac5702322ed843fe17057035b7222a59b6 (diff)
Upgrade to 4.4.50-rt62
The current kernel is based on rt kernel v4.4.6-rt14. We will upgrade it to 4.4.50-rt62. The command to achieve it is: a) Clone a git repo from git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-stable-rt.git b) Get the diff between this two changesets: git diff 640eca2901f3435e616157b11379d3223a44b391 705619beeea1b0b48219a683fd1a901a86fdaf5e where the two commits are: [yjiang5@jnakajim-build linux-stable-rt]$ git show --oneline --name-only 640eca2901f3435e616157b11379d3223a44b391 640eca2901f3 v4.4.6-rt14 localversion-rt [yjiang5@jnakajim-build linux-stable-rt]$ git show --oneline --name-only 705619beeea1b0b48219a683fd1a901a86fdaf5e 705619beeea1 Linux 4.4.50-rt62 localversion-rt c) One patch has been backported thus revert the patch before applying. filterdiff -p1 -x scripts/package/Makefile ~/tmp/v4.4.6-rt14-4.4.50-rt62.diff |patch -p1 --dry-run Upstream status: backport Change-Id: I244d57a32f6066e5a5b9915f9fbf99e7bbca6e01 Signed-off-by: Yunhong Jiang <yunhong.jiang@linux.intel.com>
Diffstat (limited to 'kernel/net/netfilter')
-rw-r--r--kernel/net/netfilter/ipvs/ip_vs_core.c37
-rw-r--r--kernel/net/netfilter/ipvs/ip_vs_pe_sip.c6
-rw-r--r--kernel/net/netfilter/ipvs/ip_vs_sync.c6
-rw-r--r--kernel/net/netfilter/nf_conntrack_core.c4
-rw-r--r--kernel/net/netfilter/nf_log.c6
-rw-r--r--kernel/net/netfilter/nft_dynset.c6
-rw-r--r--kernel/net/netfilter/x_tables.c251
7 files changed, 294 insertions, 22 deletions
diff --git a/kernel/net/netfilter/ipvs/ip_vs_core.c b/kernel/net/netfilter/ipvs/ip_vs_core.c
index f57b4dcdb..4da560005 100644
--- a/kernel/net/netfilter/ipvs/ip_vs_core.c
+++ b/kernel/net/netfilter/ipvs/ip_vs_core.c
@@ -1757,15 +1757,34 @@ ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int
cp = pp->conn_in_get(ipvs, af, skb, &iph);
conn_reuse_mode = sysctl_conn_reuse_mode(ipvs);
- if (conn_reuse_mode && !iph.fragoffs &&
- is_new_conn(skb, &iph) && cp &&
- ((unlikely(sysctl_expire_nodest_conn(ipvs)) && cp->dest &&
- unlikely(!atomic_read(&cp->dest->weight))) ||
- unlikely(is_new_conn_expected(cp, conn_reuse_mode)))) {
- if (!atomic_read(&cp->n_control))
- ip_vs_conn_expire_now(cp);
- __ip_vs_conn_put(cp);
- cp = NULL;
+ if (conn_reuse_mode && !iph.fragoffs && is_new_conn(skb, &iph) && cp) {
+ bool uses_ct = false, resched = false;
+
+ if (unlikely(sysctl_expire_nodest_conn(ipvs)) && cp->dest &&
+ unlikely(!atomic_read(&cp->dest->weight))) {
+ resched = true;
+ uses_ct = ip_vs_conn_uses_conntrack(cp, skb);
+ } else if (is_new_conn_expected(cp, conn_reuse_mode)) {
+ uses_ct = ip_vs_conn_uses_conntrack(cp, skb);
+ if (!atomic_read(&cp->n_control)) {
+ resched = true;
+ } else {
+ /* Do not reschedule controlling connection
+ * that uses conntrack while it is still
+ * referenced by controlled connection(s).
+ */
+ resched = !uses_ct;
+ }
+ }
+
+ if (resched) {
+ if (!atomic_read(&cp->n_control))
+ ip_vs_conn_expire_now(cp);
+ __ip_vs_conn_put(cp);
+ if (uses_ct)
+ return NF_DROP;
+ cp = NULL;
+ }
}
if (unlikely(!cp)) {
diff --git a/kernel/net/netfilter/ipvs/ip_vs_pe_sip.c b/kernel/net/netfilter/ipvs/ip_vs_pe_sip.c
index 1b8d594e4..0a6eb5c0d 100644
--- a/kernel/net/netfilter/ipvs/ip_vs_pe_sip.c
+++ b/kernel/net/netfilter/ipvs/ip_vs_pe_sip.c
@@ -70,10 +70,10 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb)
const char *dptr;
int retc;
- ip_vs_fill_iph_skb(p->af, skb, false, &iph);
+ retc = ip_vs_fill_iph_skb(p->af, skb, false, &iph);
/* Only useful with UDP */
- if (iph.protocol != IPPROTO_UDP)
+ if (!retc || iph.protocol != IPPROTO_UDP)
return -EINVAL;
/* todo: IPv6 fragments:
* I think this only should be done for the first fragment. /HS
@@ -88,7 +88,7 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb)
dptr = skb->data + dataoff;
datalen = skb->len - dataoff;
- if (get_callid(dptr, dataoff, datalen, &matchoff, &matchlen))
+ if (get_callid(dptr, 0, datalen, &matchoff, &matchlen))
return -EINVAL;
/* N.B: pe_data is only set on success,
diff --git a/kernel/net/netfilter/ipvs/ip_vs_sync.c b/kernel/net/netfilter/ipvs/ip_vs_sync.c
index 803001a45..1b07578be 100644
--- a/kernel/net/netfilter/ipvs/ip_vs_sync.c
+++ b/kernel/net/netfilter/ipvs/ip_vs_sync.c
@@ -1545,7 +1545,8 @@ error:
/*
* Set up receiving multicast socket over UDP
*/
-static struct socket *make_receive_sock(struct netns_ipvs *ipvs, int id)
+static struct socket *make_receive_sock(struct netns_ipvs *ipvs, int id,
+ int ifindex)
{
/* multicast addr */
union ipvs_sockaddr mcast_addr;
@@ -1566,6 +1567,7 @@ static struct socket *make_receive_sock(struct netns_ipvs *ipvs, int id)
set_sock_size(sock->sk, 0, result);
get_mcast_sockaddr(&mcast_addr, &salen, &ipvs->bcfg, id);
+ sock->sk->sk_bound_dev_if = ifindex;
result = sock->ops->bind(sock, (struct sockaddr *)&mcast_addr, salen);
if (result < 0) {
pr_err("Error binding to the multicast addr\n");
@@ -1868,7 +1870,7 @@ int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c,
if (state == IP_VS_STATE_MASTER)
sock = make_send_sock(ipvs, id);
else
- sock = make_receive_sock(ipvs, id);
+ sock = make_receive_sock(ipvs, id, dev->ifindex);
if (IS_ERR(sock)) {
result = PTR_ERR(sock);
goto outtinfo;
diff --git a/kernel/net/netfilter/nf_conntrack_core.c b/kernel/net/netfilter/nf_conntrack_core.c
index 3cb3cb831..86a3c6f0c 100644
--- a/kernel/net/netfilter/nf_conntrack_core.c
+++ b/kernel/net/netfilter/nf_conntrack_core.c
@@ -1757,6 +1757,7 @@ void nf_conntrack_init_end(void)
int nf_conntrack_init_net(struct net *net)
{
+ static atomic64_t unique_id;
int ret = -ENOMEM;
int cpu;
@@ -1779,7 +1780,8 @@ int nf_conntrack_init_net(struct net *net)
if (!net->ct.stat)
goto err_pcpu_lists;
- net->ct.slabname = kasprintf(GFP_KERNEL, "nf_conntrack_%p", net);
+ net->ct.slabname = kasprintf(GFP_KERNEL, "nf_conntrack_%llu",
+ (u64)atomic64_inc_return(&unique_id));
if (!net->ct.slabname)
goto err_slabname;
diff --git a/kernel/net/netfilter/nf_log.c b/kernel/net/netfilter/nf_log.c
index a5d41dfa9..2c89f90cd 100644
--- a/kernel/net/netfilter/nf_log.c
+++ b/kernel/net/netfilter/nf_log.c
@@ -401,7 +401,7 @@ static int nf_log_proc_dostring(struct ctl_table *table, int write,
size_t size = *lenp;
int r = 0;
int tindex = (unsigned long)table->extra1;
- struct net *net = current->nsproxy->net_ns;
+ struct net *net = table->extra2;
if (write) {
if (size > sizeof(buf))
@@ -453,7 +453,6 @@ static int netfilter_log_sysctl_init(struct net *net)
3, "%d", i);
nf_log_sysctl_table[i].procname =
nf_log_sysctl_fnames[i];
- nf_log_sysctl_table[i].data = NULL;
nf_log_sysctl_table[i].maxlen = NFLOGGER_NAME_LEN;
nf_log_sysctl_table[i].mode = 0644;
nf_log_sysctl_table[i].proc_handler =
@@ -463,6 +462,9 @@ static int netfilter_log_sysctl_init(struct net *net)
}
}
+ for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++)
+ table[i].extra2 = net;
+
net->nf.nf_log_dir_header = register_net_sysctl(net,
"net/netfilter/nf_log",
table);
diff --git a/kernel/net/netfilter/nft_dynset.c b/kernel/net/netfilter/nft_dynset.c
index 9dec3bd1b..0a5df0cba 100644
--- a/kernel/net/netfilter/nft_dynset.c
+++ b/kernel/net/netfilter/nft_dynset.c
@@ -140,7 +140,8 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
if (tb[NFTA_DYNSET_TIMEOUT] != NULL) {
if (!(set->flags & NFT_SET_TIMEOUT))
return -EINVAL;
- timeout = be64_to_cpu(nla_get_be64(tb[NFTA_DYNSET_TIMEOUT]));
+ timeout = msecs_to_jiffies(be64_to_cpu(nla_get_be64(
+ tb[NFTA_DYNSET_TIMEOUT])));
}
priv->sreg_key = nft_parse_register(tb[NFTA_DYNSET_SREG_KEY]);
@@ -227,7 +228,8 @@ static int nft_dynset_dump(struct sk_buff *skb, const struct nft_expr *expr)
goto nla_put_failure;
if (nla_put_string(skb, NFTA_DYNSET_SET_NAME, priv->set->name))
goto nla_put_failure;
- if (nla_put_be64(skb, NFTA_DYNSET_TIMEOUT, cpu_to_be64(priv->timeout)))
+ if (nla_put_be64(skb, NFTA_DYNSET_TIMEOUT,
+ cpu_to_be64(jiffies_to_msecs(priv->timeout))))
goto nla_put_failure;
if (priv->expr && nft_expr_dump(skb, NFTA_DYNSET_EXPR, priv->expr))
goto nla_put_failure;
diff --git a/kernel/net/netfilter/x_tables.c b/kernel/net/netfilter/x_tables.c
index d4aaad747..2fc6ca9d1 100644
--- a/kernel/net/netfilter/x_tables.c
+++ b/kernel/net/netfilter/x_tables.c
@@ -415,6 +415,47 @@ int xt_check_match(struct xt_mtchk_param *par,
}
EXPORT_SYMBOL_GPL(xt_check_match);
+/** xt_check_entry_match - check that matches end before start of target
+ *
+ * @match: beginning of xt_entry_match
+ * @target: beginning of this rules target (alleged end of matches)
+ * @alignment: alignment requirement of match structures
+ *
+ * Validates that all matches add up to the beginning of the target,
+ * and that each match covers at least the base structure size.
+ *
+ * Return: 0 on success, negative errno on failure.
+ */
+static int xt_check_entry_match(const char *match, const char *target,
+ const size_t alignment)
+{
+ const struct xt_entry_match *pos;
+ int length = target - match;
+
+ if (length == 0) /* no matches */
+ return 0;
+
+ pos = (struct xt_entry_match *)match;
+ do {
+ if ((unsigned long)pos % alignment)
+ return -EINVAL;
+
+ if (length < (int)sizeof(struct xt_entry_match))
+ return -EINVAL;
+
+ if (pos->u.match_size < sizeof(struct xt_entry_match))
+ return -EINVAL;
+
+ if (pos->u.match_size > length)
+ return -EINVAL;
+
+ length -= pos->u.match_size;
+ pos = ((void *)((char *)(pos) + (pos)->u.match_size));
+ } while (length > 0);
+
+ return 0;
+}
+
#ifdef CONFIG_COMPAT
int xt_compat_add_offset(u_int8_t af, unsigned int offset, int delta)
{
@@ -484,13 +525,14 @@ int xt_compat_match_offset(const struct xt_match *match)
}
EXPORT_SYMBOL_GPL(xt_compat_match_offset);
-int xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr,
- unsigned int *size)
+void xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr,
+ unsigned int *size)
{
const struct xt_match *match = m->u.kernel.match;
struct compat_xt_entry_match *cm = (struct compat_xt_entry_match *)m;
int pad, off = xt_compat_match_offset(match);
u_int16_t msize = cm->u.user.match_size;
+ char name[sizeof(m->u.user.name)];
m = *dstptr;
memcpy(m, cm, sizeof(*cm));
@@ -504,10 +546,12 @@ int xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr,
msize += off;
m->u.user.match_size = msize;
+ strlcpy(name, match->name, sizeof(name));
+ module_put(match->me);
+ strncpy(m->u.user.name, name, sizeof(m->u.user.name));
*size += off;
*dstptr += msize;
- return 0;
}
EXPORT_SYMBOL_GPL(xt_compat_match_from_user);
@@ -538,8 +582,125 @@ int xt_compat_match_to_user(const struct xt_entry_match *m,
return 0;
}
EXPORT_SYMBOL_GPL(xt_compat_match_to_user);
+
+/* non-compat version may have padding after verdict */
+struct compat_xt_standard_target {
+ struct compat_xt_entry_target t;
+ compat_uint_t verdict;
+};
+
+int xt_compat_check_entry_offsets(const void *base, const char *elems,
+ unsigned int target_offset,
+ unsigned int next_offset)
+{
+ long size_of_base_struct = elems - (const char *)base;
+ const struct compat_xt_entry_target *t;
+ const char *e = base;
+
+ if (target_offset < size_of_base_struct)
+ return -EINVAL;
+
+ if (target_offset + sizeof(*t) > next_offset)
+ return -EINVAL;
+
+ t = (void *)(e + target_offset);
+ if (t->u.target_size < sizeof(*t))
+ return -EINVAL;
+
+ if (target_offset + t->u.target_size > next_offset)
+ return -EINVAL;
+
+ if (strcmp(t->u.user.name, XT_STANDARD_TARGET) == 0 &&
+ COMPAT_XT_ALIGN(target_offset + sizeof(struct compat_xt_standard_target)) != next_offset)
+ return -EINVAL;
+
+ /* compat_xt_entry match has less strict aligment requirements,
+ * otherwise they are identical. In case of padding differences
+ * we need to add compat version of xt_check_entry_match.
+ */
+ BUILD_BUG_ON(sizeof(struct compat_xt_entry_match) != sizeof(struct xt_entry_match));
+
+ return xt_check_entry_match(elems, base + target_offset,
+ __alignof__(struct compat_xt_entry_match));
+}
+EXPORT_SYMBOL(xt_compat_check_entry_offsets);
#endif /* CONFIG_COMPAT */
+/**
+ * xt_check_entry_offsets - validate arp/ip/ip6t_entry
+ *
+ * @base: pointer to arp/ip/ip6t_entry
+ * @elems: pointer to first xt_entry_match, i.e. ip(6)t_entry->elems
+ * @target_offset: the arp/ip/ip6_t->target_offset
+ * @next_offset: the arp/ip/ip6_t->next_offset
+ *
+ * validates that target_offset and next_offset are sane and that all
+ * match sizes (if any) align with the target offset.
+ *
+ * This function does not validate the targets or matches themselves, it
+ * only tests that all the offsets and sizes are correct, that all
+ * match structures are aligned, and that the last structure ends where
+ * the target structure begins.
+ *
+ * Also see xt_compat_check_entry_offsets for CONFIG_COMPAT version.
+ *
+ * The arp/ip/ip6t_entry structure @base must have passed following tests:
+ * - it must point to a valid memory location
+ * - base to base + next_offset must be accessible, i.e. not exceed allocated
+ * length.
+ *
+ * A well-formed entry looks like this:
+ *
+ * ip(6)t_entry match [mtdata] match [mtdata] target [tgdata] ip(6)t_entry
+ * e->elems[]-----' | |
+ * matchsize | |
+ * matchsize | |
+ * | |
+ * target_offset---------------------------------' |
+ * next_offset---------------------------------------------------'
+ *
+ * elems[]: flexible array member at end of ip(6)/arpt_entry struct.
+ * This is where matches (if any) and the target reside.
+ * target_offset: beginning of target.
+ * next_offset: start of the next rule; also: size of this rule.
+ * Since targets have a minimum size, target_offset + minlen <= next_offset.
+ *
+ * Every match stores its size, sum of sizes must not exceed target_offset.
+ *
+ * Return: 0 on success, negative errno on failure.
+ */
+int xt_check_entry_offsets(const void *base,
+ const char *elems,
+ unsigned int target_offset,
+ unsigned int next_offset)
+{
+ long size_of_base_struct = elems - (const char *)base;
+ const struct xt_entry_target *t;
+ const char *e = base;
+
+ /* target start is within the ip/ip6/arpt_entry struct */
+ if (target_offset < size_of_base_struct)
+ return -EINVAL;
+
+ if (target_offset + sizeof(*t) > next_offset)
+ return -EINVAL;
+
+ t = (void *)(e + target_offset);
+ if (t->u.target_size < sizeof(*t))
+ return -EINVAL;
+
+ if (target_offset + t->u.target_size > next_offset)
+ return -EINVAL;
+
+ if (strcmp(t->u.user.name, XT_STANDARD_TARGET) == 0 &&
+ XT_ALIGN(target_offset + sizeof(struct xt_standard_target)) != next_offset)
+ return -EINVAL;
+
+ return xt_check_entry_match(elems, base + target_offset,
+ __alignof__(struct xt_entry_match));
+}
+EXPORT_SYMBOL(xt_check_entry_offsets);
+
int xt_check_target(struct xt_tgchk_param *par,
unsigned int size, u_int8_t proto, bool inv_proto)
{
@@ -590,6 +751,80 @@ int xt_check_target(struct xt_tgchk_param *par,
}
EXPORT_SYMBOL_GPL(xt_check_target);
+/**
+ * xt_copy_counters_from_user - copy counters and metadata from userspace
+ *
+ * @user: src pointer to userspace memory
+ * @len: alleged size of userspace memory
+ * @info: where to store the xt_counters_info metadata
+ * @compat: true if we setsockopt call is done by 32bit task on 64bit kernel
+ *
+ * Copies counter meta data from @user and stores it in @info.
+ *
+ * vmallocs memory to hold the counters, then copies the counter data
+ * from @user to the new memory and returns a pointer to it.
+ *
+ * If @compat is true, @info gets converted automatically to the 64bit
+ * representation.
+ *
+ * The metadata associated with the counters is stored in @info.
+ *
+ * Return: returns pointer that caller has to test via IS_ERR().
+ * If IS_ERR is false, caller has to vfree the pointer.
+ */
+void *xt_copy_counters_from_user(const void __user *user, unsigned int len,
+ struct xt_counters_info *info, bool compat)
+{
+ void *mem;
+ u64 size;
+
+#ifdef CONFIG_COMPAT
+ if (compat) {
+ /* structures only differ in size due to alignment */
+ struct compat_xt_counters_info compat_tmp;
+
+ if (len <= sizeof(compat_tmp))
+ return ERR_PTR(-EINVAL);
+
+ len -= sizeof(compat_tmp);
+ if (copy_from_user(&compat_tmp, user, sizeof(compat_tmp)) != 0)
+ return ERR_PTR(-EFAULT);
+
+ strlcpy(info->name, compat_tmp.name, sizeof(info->name));
+ info->num_counters = compat_tmp.num_counters;
+ user += sizeof(compat_tmp);
+ } else
+#endif
+ {
+ if (len <= sizeof(*info))
+ return ERR_PTR(-EINVAL);
+
+ len -= sizeof(*info);
+ if (copy_from_user(info, user, sizeof(*info)) != 0)
+ return ERR_PTR(-EFAULT);
+
+ info->name[sizeof(info->name) - 1] = '\0';
+ user += sizeof(*info);
+ }
+
+ size = sizeof(struct xt_counters);
+ size *= info->num_counters;
+
+ if (size != (u64)len)
+ return ERR_PTR(-EINVAL);
+
+ mem = vmalloc(len);
+ if (!mem)
+ return ERR_PTR(-ENOMEM);
+
+ if (copy_from_user(mem, user, len) == 0)
+ return mem;
+
+ vfree(mem);
+ return ERR_PTR(-EFAULT);
+}
+EXPORT_SYMBOL_GPL(xt_copy_counters_from_user);
+
#ifdef CONFIG_COMPAT
int xt_compat_target_offset(const struct xt_target *target)
{
@@ -605,6 +840,7 @@ void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr,
struct compat_xt_entry_target *ct = (struct compat_xt_entry_target *)t;
int pad, off = xt_compat_target_offset(target);
u_int16_t tsize = ct->u.user.target_size;
+ char name[sizeof(t->u.user.name)];
t = *dstptr;
memcpy(t, ct, sizeof(*ct));
@@ -618,6 +854,9 @@ void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr,
tsize += off;
t->u.user.target_size = tsize;
+ strlcpy(name, target->name, sizeof(name));
+ module_put(target->me);
+ strncpy(t->u.user.name, name, sizeof(t->u.user.name));
*size += off;
*dstptr += tsize;
@@ -658,6 +897,12 @@ struct xt_table_info *xt_alloc_table_info(unsigned int size)
struct xt_table_info *info = NULL;
size_t sz = sizeof(*info) + size;
+ if (sz < sizeof(*info))
+ return NULL;
+
+ if (sz < sizeof(*info))
+ return NULL;
+
/* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */
if ((SMP_ALIGN(size) >> PAGE_SHIFT) + 2 > totalram_pages)
return NULL;