summaryrefslogtreecommitdiffstats
path: root/kernel/net/core/filter.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/net/core/filter.c')
-rw-r--r--kernel/net/core/filter.c543
1 files changed, 484 insertions, 59 deletions
diff --git a/kernel/net/core/filter.c b/kernel/net/core/filter.c
index bf831a85c..37157c4c1 100644
--- a/kernel/net/core/filter.c
+++ b/kernel/net/core/filter.c
@@ -36,6 +36,7 @@
#include <net/netlink.h>
#include <linux/skbuff.h>
#include <net/sock.h>
+#include <net/flow_dissector.h>
#include <linux/errno.h>
#include <linux/timer.h>
#include <asm/uaccess.h>
@@ -45,16 +46,20 @@
#include <linux/seccomp.h>
#include <linux/if_vlan.h>
#include <linux/bpf.h>
+#include <net/sch_generic.h>
+#include <net/cls_cgroup.h>
+#include <net/dst_metadata.h>
+#include <net/dst.h>
/**
* sk_filter - run a packet through a socket filter
* @sk: sock associated with &sk_buff
* @skb: buffer to filter
*
- * Run the filter code and then cut skb->data to correct size returned by
- * SK_RUN_FILTER. If pkt_len is 0 we toss packet. If skb->len is smaller
+ * Run the eBPF program and then cut skb->data to correct size returned by
+ * the program. If pkt_len is 0 we toss packet. If skb->len is smaller
* than pkt_len we keep whole skb->data. This is the socket level
- * wrapper to SK_RUN_FILTER. It returns 0 if the packet should
+ * wrapper to BPF_PROG_RUN. It returns 0 if the packet should
* be accepted or -EPERM if the packet should be tossed.
*
*/
@@ -78,7 +83,7 @@ int sk_filter(struct sock *sk, struct sk_buff *skb)
rcu_read_lock();
filter = rcu_dereference(sk->sk_filter);
if (filter) {
- unsigned int pkt_len = SK_RUN_FILTER(filter, skb);
+ unsigned int pkt_len = bpf_prog_run_save_cb(filter->prog, skb);
err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
}
@@ -144,12 +149,6 @@ static u64 __get_raw_cpu_id(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
return raw_smp_processor_id();
}
-/* note that this only generates 32-bit random numbers */
-static u64 __get_random_u32(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
-{
- return prandom_u32();
-}
-
static u32 convert_skb_access(int skb_field, int dst_reg, int src_reg,
struct bpf_insn *insn_buf)
{
@@ -308,7 +307,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
*insn = BPF_EMIT_CALL(__get_raw_cpu_id);
break;
case SKF_AD_OFF + SKF_AD_RANDOM:
- *insn = BPF_EMIT_CALL(__get_random_u32);
+ *insn = BPF_EMIT_CALL(bpf_user_rnd_u32);
+ bpf_user_rnd_init_once();
break;
}
break;
@@ -355,8 +355,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
* for socket filters: ctx == 'struct sk_buff *', for seccomp:
* ctx == 'struct seccomp_data *'.
*/
-int bpf_convert_filter(struct sock_filter *prog, int len,
- struct bpf_insn *new_prog, int *new_len)
+static int bpf_convert_filter(struct sock_filter *prog, int len,
+ struct bpf_insn *new_prog, int *new_len)
{
int new_flen = 0, pass = 0, target, i;
struct bpf_insn *new_insn;
@@ -371,7 +371,8 @@ int bpf_convert_filter(struct sock_filter *prog, int len,
return -EINVAL;
if (new_prog) {
- addrs = kcalloc(len, sizeof(*addrs), GFP_KERNEL);
+ addrs = kcalloc(len, sizeof(*addrs),
+ GFP_KERNEL | __GFP_NOWARN);
if (!addrs)
return -ENOMEM;
}
@@ -473,9 +474,9 @@ do_pass:
bpf_src = BPF_X;
} else {
insn->dst_reg = BPF_REG_A;
- insn->src_reg = BPF_REG_X;
insn->imm = fp->k;
bpf_src = BPF_SRC(fp->code);
+ insn->src_reg = bpf_src == BPF_X ? BPF_REG_X : 0;
}
/* Common case where 'jump_false' is next insn. */
@@ -751,7 +752,8 @@ static bool chk_code_allowed(u16 code_to_probe)
*
* Returns 0 if the rule set is legal or -EINVAL if not.
*/
-int bpf_check_classic(const struct sock_filter *filter, unsigned int flen)
+static int bpf_check_classic(const struct sock_filter *filter,
+ unsigned int flen)
{
bool anc_found;
int pc;
@@ -775,6 +777,11 @@ int bpf_check_classic(const struct sock_filter *filter, unsigned int flen)
if (ftest->k == 0)
return -EINVAL;
break;
+ case BPF_ALU | BPF_LSH | BPF_K:
+ case BPF_ALU | BPF_RSH | BPF_K:
+ if (ftest->k >= 32)
+ return -EINVAL;
+ break;
case BPF_LD | BPF_MEM:
case BPF_LDX | BPF_MEM:
case BPF_ST:
@@ -825,7 +832,6 @@ int bpf_check_classic(const struct sock_filter *filter, unsigned int flen)
return -EINVAL;
}
-EXPORT_SYMBOL(bpf_check_classic);
static int bpf_prog_store_orig_filter(struct bpf_prog *fp,
const struct sock_fprog *fprog)
@@ -839,7 +845,9 @@ static int bpf_prog_store_orig_filter(struct bpf_prog *fp,
fkprog = fp->orig_prog;
fkprog->len = fprog->len;
- fkprog->filter = kmemdup(fp->insns, fsize, GFP_KERNEL);
+
+ fkprog->filter = kmemdup(fp->insns, fsize,
+ GFP_KERNEL | __GFP_NOWARN);
if (!fkprog->filter) {
kfree(fp->orig_prog);
return -ENOMEM;
@@ -941,7 +949,7 @@ static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp)
* pass. At this time, the user BPF is stored in fp->insns.
*/
old_prog = kmemdup(fp->insns, old_len * sizeof(struct sock_filter),
- GFP_KERNEL);
+ GFP_KERNEL | __GFP_NOWARN);
if (!old_prog) {
err = -ENOMEM;
goto out_err;
@@ -988,12 +996,13 @@ out_err:
return ERR_PTR(err);
}
-static struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp)
+static struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp,
+ bpf_aux_classic_check_t trans)
{
int err;
fp->bpf_func = NULL;
- fp->jited = false;
+ fp->jited = 0;
err = bpf_check_classic(fp->insns, fp->len);
if (err) {
@@ -1001,6 +1010,17 @@ static struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp)
return ERR_PTR(err);
}
+ /* There might be additional checks and transformations
+ * needed on classic filters, f.e. in case of seccomp.
+ */
+ if (trans) {
+ err = trans(fp->insns, fp->len);
+ if (err) {
+ __bpf_prog_release(fp);
+ return ERR_PTR(err);
+ }
+ }
+
/* Probe if we can JIT compile the filter and if so, do
* the compilation of the filter.
*/
@@ -1050,7 +1070,7 @@ int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog)
/* bpf_prepare_filter() already takes care of freeing
* memory in case something goes wrong.
*/
- fp = bpf_prepare_filter(fp);
+ fp = bpf_prepare_filter(fp, NULL);
if (IS_ERR(fp))
return PTR_ERR(fp);
@@ -1059,6 +1079,60 @@ int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog)
}
EXPORT_SYMBOL_GPL(bpf_prog_create);
+/**
+ * bpf_prog_create_from_user - create an unattached filter from user buffer
+ * @pfp: the unattached filter that is created
+ * @fprog: the filter program
+ * @trans: post-classic verifier transformation handler
+ * @save_orig: save classic BPF program
+ *
+ * This function effectively does the same as bpf_prog_create(), only
+ * that it builds up its insns buffer from user space provided buffer.
+ * It also allows for passing a bpf_aux_classic_check_t handler.
+ */
+int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog,
+ bpf_aux_classic_check_t trans, bool save_orig)
+{
+ unsigned int fsize = bpf_classic_proglen(fprog);
+ struct bpf_prog *fp;
+ int err;
+
+ /* Make sure new filter is there and in the right amounts. */
+ if (fprog->filter == NULL)
+ return -EINVAL;
+
+ fp = bpf_prog_alloc(bpf_prog_size(fprog->len), 0);
+ if (!fp)
+ return -ENOMEM;
+
+ if (copy_from_user(fp->insns, fprog->filter, fsize)) {
+ __bpf_prog_free(fp);
+ return -EFAULT;
+ }
+
+ fp->len = fprog->len;
+ fp->orig_prog = NULL;
+
+ if (save_orig) {
+ err = bpf_prog_store_orig_filter(fp, fprog);
+ if (err) {
+ __bpf_prog_free(fp);
+ return -ENOMEM;
+ }
+ }
+
+ /* bpf_prepare_filter() already takes care of freeing
+ * memory in case something goes wrong.
+ */
+ fp = bpf_prepare_filter(fp, trans);
+ if (IS_ERR(fp))
+ return PTR_ERR(fp);
+
+ *pfp = fp;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(bpf_prog_create_from_user);
+
void bpf_prog_destroy(struct bpf_prog *fp)
{
__bpf_prog_release(fp);
@@ -1135,7 +1209,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
/* bpf_prepare_filter() already takes care of freeing
* memory in case something goes wrong.
*/
- prog = bpf_prepare_filter(prog);
+ prog = bpf_prepare_filter(prog, NULL);
if (IS_ERR(prog))
return PTR_ERR(prog);
@@ -1175,21 +1249,6 @@ int sk_attach_bpf(u32 ufd, struct sock *sk)
return 0;
}
-/**
- * bpf_skb_clone_not_writable - is the header of a clone not writable
- * @skb: buffer to check
- * @len: length up to which to write, can be negative
- *
- * Returns true if modifying the header part of the cloned buffer
- * does require the data to be copied. I.e. this version works with
- * negative lengths needed for eBPF case!
- */
-static bool bpf_skb_clone_unwritable(const struct sk_buff *skb, int len)
-{
- return skb_header_cloned(skb) ||
- (int) skb_headroom(skb) + len > skb->hdr_len;
-}
-
#define BPF_RECOMPUTE_CSUM(flags) ((flags) & 1)
static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
@@ -1212,9 +1271,8 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
if (unlikely((u32) offset > 0xffff || len > sizeof(buf)))
return -EFAULT;
- offset -= skb->data - skb_mac_header(skb);
if (unlikely(skb_cloned(skb) &&
- bpf_skb_clone_unwritable(skb, offset + len)))
+ !skb_clone_writable(skb, offset + len)))
return -EFAULT;
ptr = skb_header_pointer(skb, offset, len, buf);
@@ -1258,9 +1316,8 @@ static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
if (unlikely((u32) offset > 0xffff))
return -EFAULT;
- offset -= skb->data - skb_mac_header(skb);
if (unlikely(skb_cloned(skb) &&
- bpf_skb_clone_unwritable(skb, offset + sizeof(sum))))
+ !skb_clone_writable(skb, offset + sizeof(sum))))
return -EFAULT;
ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum);
@@ -1299,16 +1356,15 @@ const struct bpf_func_proto bpf_l3_csum_replace_proto = {
static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
{
struct sk_buff *skb = (struct sk_buff *) (long) r1;
- u32 is_pseudo = BPF_IS_PSEUDO_HEADER(flags);
+ bool is_pseudo = !!BPF_IS_PSEUDO_HEADER(flags);
int offset = (int) r2;
__sum16 sum, *ptr;
if (unlikely((u32) offset > 0xffff))
return -EFAULT;
- offset -= skb->data - skb_mac_header(skb);
if (unlikely(skb_cloned(skb) &&
- bpf_skb_clone_unwritable(skb, offset + sizeof(sum))))
+ !skb_clone_writable(skb, offset + sizeof(sum))))
return -EFAULT;
ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum);
@@ -1344,6 +1400,233 @@ const struct bpf_func_proto bpf_l4_csum_replace_proto = {
.arg5_type = ARG_ANYTHING,
};
+#define BPF_IS_REDIRECT_INGRESS(flags) ((flags) & 1)
+
+static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5)
+{
+ struct sk_buff *skb = (struct sk_buff *) (long) r1, *skb2;
+ struct net_device *dev;
+
+ dev = dev_get_by_index_rcu(dev_net(skb->dev), ifindex);
+ if (unlikely(!dev))
+ return -EINVAL;
+
+ skb2 = skb_clone(skb, GFP_ATOMIC);
+ if (unlikely(!skb2))
+ return -ENOMEM;
+
+ if (BPF_IS_REDIRECT_INGRESS(flags))
+ return dev_forward_skb(dev, skb2);
+
+ skb2->dev = dev;
+ skb_sender_cpu_clear(skb2);
+ return dev_queue_xmit(skb2);
+}
+
+const struct bpf_func_proto bpf_clone_redirect_proto = {
+ .func = bpf_clone_redirect,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_ANYTHING,
+};
+
+struct redirect_info {
+ u32 ifindex;
+ u32 flags;
+};
+
+static DEFINE_PER_CPU(struct redirect_info, redirect_info);
+static u64 bpf_redirect(u64 ifindex, u64 flags, u64 r3, u64 r4, u64 r5)
+{
+ struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+
+ ri->ifindex = ifindex;
+ ri->flags = flags;
+ return TC_ACT_REDIRECT;
+}
+
+int skb_do_redirect(struct sk_buff *skb)
+{
+ struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+ struct net_device *dev;
+
+ dev = dev_get_by_index_rcu(dev_net(skb->dev), ri->ifindex);
+ ri->ifindex = 0;
+ if (unlikely(!dev)) {
+ kfree_skb(skb);
+ return -EINVAL;
+ }
+
+ if (BPF_IS_REDIRECT_INGRESS(ri->flags))
+ return dev_forward_skb(dev, skb);
+
+ skb->dev = dev;
+ skb_sender_cpu_clear(skb);
+ return dev_queue_xmit(skb);
+}
+
+const struct bpf_func_proto bpf_redirect_proto = {
+ .func = bpf_redirect,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_ANYTHING,
+ .arg2_type = ARG_ANYTHING,
+};
+
+static u64 bpf_get_cgroup_classid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+ return task_get_classid((struct sk_buff *) (unsigned long) r1);
+}
+
+static const struct bpf_func_proto bpf_get_cgroup_classid_proto = {
+ .func = bpf_get_cgroup_classid,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+};
+
+static u64 bpf_get_route_realm(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+#ifdef CONFIG_IP_ROUTE_CLASSID
+ const struct dst_entry *dst;
+
+ dst = skb_dst((struct sk_buff *) (unsigned long) r1);
+ if (dst)
+ return dst->tclassid;
+#endif
+ return 0;
+}
+
+static const struct bpf_func_proto bpf_get_route_realm_proto = {
+ .func = bpf_get_route_realm,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+};
+
+static u64 bpf_skb_vlan_push(u64 r1, u64 r2, u64 vlan_tci, u64 r4, u64 r5)
+{
+ struct sk_buff *skb = (struct sk_buff *) (long) r1;
+ __be16 vlan_proto = (__force __be16) r2;
+
+ if (unlikely(vlan_proto != htons(ETH_P_8021Q) &&
+ vlan_proto != htons(ETH_P_8021AD)))
+ vlan_proto = htons(ETH_P_8021Q);
+
+ return skb_vlan_push(skb, vlan_proto, vlan_tci);
+}
+
+const struct bpf_func_proto bpf_skb_vlan_push_proto = {
+ .func = bpf_skb_vlan_push,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_ANYTHING,
+};
+EXPORT_SYMBOL_GPL(bpf_skb_vlan_push_proto);
+
+static u64 bpf_skb_vlan_pop(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+ struct sk_buff *skb = (struct sk_buff *) (long) r1;
+
+ return skb_vlan_pop(skb);
+}
+
+const struct bpf_func_proto bpf_skb_vlan_pop_proto = {
+ .func = bpf_skb_vlan_pop,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+};
+EXPORT_SYMBOL_GPL(bpf_skb_vlan_pop_proto);
+
+bool bpf_helper_changes_skb_data(void *func)
+{
+ if (func == bpf_skb_vlan_push)
+ return true;
+ if (func == bpf_skb_vlan_pop)
+ return true;
+ return false;
+}
+
+static u64 bpf_skb_get_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
+{
+ struct sk_buff *skb = (struct sk_buff *) (long) r1;
+ struct bpf_tunnel_key *to = (struct bpf_tunnel_key *) (long) r2;
+ struct ip_tunnel_info *info = skb_tunnel_info(skb);
+
+ if (unlikely(size != sizeof(struct bpf_tunnel_key) || flags || !info))
+ return -EINVAL;
+ if (ip_tunnel_info_af(info) != AF_INET)
+ return -EINVAL;
+
+ to->tunnel_id = be64_to_cpu(info->key.tun_id);
+ to->remote_ipv4 = be32_to_cpu(info->key.u.ipv4.src);
+
+ return 0;
+}
+
+const struct bpf_func_proto bpf_skb_get_tunnel_key_proto = {
+ .func = bpf_skb_get_tunnel_key,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_PTR_TO_STACK,
+ .arg3_type = ARG_CONST_STACK_SIZE,
+ .arg4_type = ARG_ANYTHING,
+};
+
+static struct metadata_dst __percpu *md_dst;
+
+static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
+{
+ struct sk_buff *skb = (struct sk_buff *) (long) r1;
+ struct bpf_tunnel_key *from = (struct bpf_tunnel_key *) (long) r2;
+ struct metadata_dst *md = this_cpu_ptr(md_dst);
+ struct ip_tunnel_info *info;
+
+ if (unlikely(size != sizeof(struct bpf_tunnel_key) || flags))
+ return -EINVAL;
+
+ skb_dst_drop(skb);
+ dst_hold((struct dst_entry *) md);
+ skb_dst_set(skb, (struct dst_entry *) md);
+
+ info = &md->u.tun_info;
+ info->mode = IP_TUNNEL_INFO_TX;
+ info->key.tun_flags = TUNNEL_KEY;
+ info->key.tun_id = cpu_to_be64(from->tunnel_id);
+ info->key.u.ipv4.dst = cpu_to_be32(from->remote_ipv4);
+
+ return 0;
+}
+
+const struct bpf_func_proto bpf_skb_set_tunnel_key_proto = {
+ .func = bpf_skb_set_tunnel_key,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_PTR_TO_STACK,
+ .arg3_type = ARG_CONST_STACK_SIZE,
+ .arg4_type = ARG_ANYTHING,
+};
+
+static const struct bpf_func_proto *bpf_get_skb_set_tunnel_key_proto(void)
+{
+ if (!md_dst) {
+ /* race is not possible, since it's called from
+ * verifier that is holding verifier mutex
+ */
+ md_dst = metadata_dst_alloc_percpu(0, GFP_KERNEL);
+ if (!md_dst)
+ return NULL;
+ }
+ return &bpf_skb_set_tunnel_key_proto;
+}
+
static const struct bpf_func_proto *
sk_filter_func_proto(enum bpf_func_id func_id)
{
@@ -1358,6 +1641,13 @@ sk_filter_func_proto(enum bpf_func_id func_id)
return &bpf_get_prandom_u32_proto;
case BPF_FUNC_get_smp_processor_id:
return &bpf_get_smp_processor_id_proto;
+ case BPF_FUNC_tail_call:
+ return &bpf_tail_call_proto;
+ case BPF_FUNC_ktime_get_ns:
+ return &bpf_ktime_get_ns_proto;
+ case BPF_FUNC_trace_printk:
+ if (capable(CAP_SYS_ADMIN))
+ return bpf_get_trace_printk_proto();
default:
return NULL;
}
@@ -1373,18 +1663,29 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
return &bpf_l3_csum_replace_proto;
case BPF_FUNC_l4_csum_replace:
return &bpf_l4_csum_replace_proto;
+ case BPF_FUNC_clone_redirect:
+ return &bpf_clone_redirect_proto;
+ case BPF_FUNC_get_cgroup_classid:
+ return &bpf_get_cgroup_classid_proto;
+ case BPF_FUNC_skb_vlan_push:
+ return &bpf_skb_vlan_push_proto;
+ case BPF_FUNC_skb_vlan_pop:
+ return &bpf_skb_vlan_pop_proto;
+ case BPF_FUNC_skb_get_tunnel_key:
+ return &bpf_skb_get_tunnel_key_proto;
+ case BPF_FUNC_skb_set_tunnel_key:
+ return bpf_get_skb_set_tunnel_key_proto();
+ case BPF_FUNC_redirect:
+ return &bpf_redirect_proto;
+ case BPF_FUNC_get_route_realm:
+ return &bpf_get_route_realm_proto;
default:
return sk_filter_func_proto(func_id);
}
}
-static bool sk_filter_is_valid_access(int off, int size,
- enum bpf_access_type type)
+static bool __is_valid_access(int off, int size, enum bpf_access_type type)
{
- /* only read is allowed */
- if (type != BPF_READ)
- return false;
-
/* check bounds */
if (off < 0 || off >= sizeof(struct __sk_buff))
return false;
@@ -1400,8 +1701,50 @@ static bool sk_filter_is_valid_access(int off, int size,
return true;
}
-static u32 sk_filter_convert_ctx_access(int dst_reg, int src_reg, int ctx_off,
- struct bpf_insn *insn_buf)
+static bool sk_filter_is_valid_access(int off, int size,
+ enum bpf_access_type type)
+{
+ if (off == offsetof(struct __sk_buff, tc_classid))
+ return false;
+
+ if (type == BPF_WRITE) {
+ switch (off) {
+ case offsetof(struct __sk_buff, cb[0]) ...
+ offsetof(struct __sk_buff, cb[4]):
+ break;
+ default:
+ return false;
+ }
+ }
+
+ return __is_valid_access(off, size, type);
+}
+
+static bool tc_cls_act_is_valid_access(int off, int size,
+ enum bpf_access_type type)
+{
+ if (off == offsetof(struct __sk_buff, tc_classid))
+ return type == BPF_WRITE ? true : false;
+
+ if (type == BPF_WRITE) {
+ switch (off) {
+ case offsetof(struct __sk_buff, mark):
+ case offsetof(struct __sk_buff, tc_index):
+ case offsetof(struct __sk_buff, priority):
+ case offsetof(struct __sk_buff, cb[0]) ...
+ offsetof(struct __sk_buff, cb[4]):
+ break;
+ default:
+ return false;
+ }
+ }
+ return __is_valid_access(off, size, type);
+}
+
+static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg,
+ int src_reg, int ctx_off,
+ struct bpf_insn *insn_buf,
+ struct bpf_prog *prog)
{
struct bpf_insn *insn = insn_buf;
@@ -1430,12 +1773,49 @@ static u32 sk_filter_convert_ctx_access(int dst_reg, int src_reg, int ctx_off,
case offsetof(struct __sk_buff, priority):
BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, priority) != 4);
+ if (type == BPF_WRITE)
+ *insn++ = BPF_STX_MEM(BPF_W, dst_reg, src_reg,
+ offsetof(struct sk_buff, priority));
+ else
+ *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
+ offsetof(struct sk_buff, priority));
+ break;
+
+ case offsetof(struct __sk_buff, ingress_ifindex):
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, skb_iif) != 4);
+
*insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
- offsetof(struct sk_buff, priority));
+ offsetof(struct sk_buff, skb_iif));
+ break;
+
+ case offsetof(struct __sk_buff, ifindex):
+ BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
+
+ *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct sk_buff, dev)),
+ dst_reg, src_reg,
+ offsetof(struct sk_buff, dev));
+ *insn++ = BPF_JMP_IMM(BPF_JEQ, dst_reg, 0, 1);
+ *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, dst_reg,
+ offsetof(struct net_device, ifindex));
+ break;
+
+ case offsetof(struct __sk_buff, hash):
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
+
+ *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
+ offsetof(struct sk_buff, hash));
break;
case offsetof(struct __sk_buff, mark):
- return convert_skb_access(SKF_AD_MARK, dst_reg, src_reg, insn);
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
+
+ if (type == BPF_WRITE)
+ *insn++ = BPF_STX_MEM(BPF_W, dst_reg, src_reg,
+ offsetof(struct sk_buff, mark));
+ else
+ *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
+ offsetof(struct sk_buff, mark));
+ break;
case offsetof(struct __sk_buff, pkt_type):
return convert_skb_access(SKF_AD_PKTTYPE, dst_reg, src_reg, insn);
@@ -1450,6 +1830,47 @@ static u32 sk_filter_convert_ctx_access(int dst_reg, int src_reg, int ctx_off,
case offsetof(struct __sk_buff, vlan_tci):
return convert_skb_access(SKF_AD_VLAN_TAG,
dst_reg, src_reg, insn);
+
+ case offsetof(struct __sk_buff, cb[0]) ...
+ offsetof(struct __sk_buff, cb[4]):
+ BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, data) < 20);
+
+ prog->cb_access = 1;
+ ctx_off -= offsetof(struct __sk_buff, cb[0]);
+ ctx_off += offsetof(struct sk_buff, cb);
+ ctx_off += offsetof(struct qdisc_skb_cb, data);
+ if (type == BPF_WRITE)
+ *insn++ = BPF_STX_MEM(BPF_W, dst_reg, src_reg, ctx_off);
+ else
+ *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, ctx_off);
+ break;
+
+ case offsetof(struct __sk_buff, tc_classid):
+ ctx_off -= offsetof(struct __sk_buff, tc_classid);
+ ctx_off += offsetof(struct sk_buff, cb);
+ ctx_off += offsetof(struct qdisc_skb_cb, tc_classid);
+ WARN_ON(type != BPF_WRITE);
+ *insn++ = BPF_STX_MEM(BPF_H, dst_reg, src_reg, ctx_off);
+ break;
+
+ case offsetof(struct __sk_buff, tc_index):
+#ifdef CONFIG_NET_SCHED
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, tc_index) != 2);
+
+ if (type == BPF_WRITE)
+ *insn++ = BPF_STX_MEM(BPF_H, dst_reg, src_reg,
+ offsetof(struct sk_buff, tc_index));
+ else
+ *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
+ offsetof(struct sk_buff, tc_index));
+ break;
+#else
+ if (type == BPF_WRITE)
+ *insn++ = BPF_MOV64_REG(dst_reg, dst_reg);
+ else
+ *insn++ = BPF_MOV64_IMM(dst_reg, 0);
+ break;
+#endif
}
return insn - insn_buf;
@@ -1458,13 +1879,13 @@ static u32 sk_filter_convert_ctx_access(int dst_reg, int src_reg, int ctx_off,
static const struct bpf_verifier_ops sk_filter_ops = {
.get_func_proto = sk_filter_func_proto,
.is_valid_access = sk_filter_is_valid_access,
- .convert_ctx_access = sk_filter_convert_ctx_access,
+ .convert_ctx_access = bpf_net_convert_ctx_access,
};
static const struct bpf_verifier_ops tc_cls_act_ops = {
.get_func_proto = tc_cls_act_func_proto,
- .is_valid_access = sk_filter_is_valid_access,
- .convert_ctx_access = sk_filter_convert_ctx_access,
+ .is_valid_access = tc_cls_act_is_valid_access,
+ .convert_ctx_access = bpf_net_convert_ctx_access,
};
static struct bpf_prog_type_list sk_filter_type __read_mostly = {
@@ -1526,9 +1947,13 @@ int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf,
goto out;
/* We're copying the filter that has been originally attached,
- * so no conversion/decode needed anymore.
+ * so no conversion/decode needed anymore. eBPF programs that
+ * have no original program cannot be dumped through this.
*/
+ ret = -EACCES;
fprog = filter->prog->orig_prog;
+ if (!fprog)
+ goto out;
ret = fprog->len;
if (!len)