summaryrefslogtreecommitdiffstats
path: root/kernel/samples/bpf
diff options
context:
space:
mode:
authorJosé Pekkarinen <jose.pekkarinen@nokia.com>2016-04-11 10:41:07 +0300
committerJosé Pekkarinen <jose.pekkarinen@nokia.com>2016-04-13 08:17:18 +0300
commite09b41010ba33a20a87472ee821fa407a5b8da36 (patch)
treed10dc367189862e7ca5c592f033dc3726e1df4e3 /kernel/samples/bpf
parentf93b97fd65072de626c074dbe099a1fff05ce060 (diff)
These changes are the raw update to linux-4.4.6-rt14. Kernel sources
are taken from kernel.org, and rt patch from the rt wiki download page. During the rebasing, the following patch collided: Force tick interrupt and get rid of softirq magic(I70131fb85). Collisions have been removed because its logic was found on the source already. Change-Id: I7f57a4081d9deaa0d9ccfc41a6c8daccdee3b769 Signed-off-by: José Pekkarinen <jose.pekkarinen@nokia.com>
Diffstat (limited to 'kernel/samples/bpf')
-rw-r--r--kernel/samples/bpf/Makefile33
-rw-r--r--kernel/samples/bpf/bpf_helpers.h55
-rw-r--r--kernel/samples/bpf/bpf_load.c57
-rw-r--r--kernel/samples/bpf/fds_example.c183
-rw-r--r--kernel/samples/bpf/lathist_kern.c99
-rw-r--r--kernel/samples/bpf/lathist_user.c103
-rw-r--r--kernel/samples/bpf/libbpf.c19
-rw-r--r--kernel/samples/bpf/libbpf.h11
-rw-r--r--kernel/samples/bpf/sockex3_kern.c290
-rw-r--r--kernel/samples/bpf/sockex3_user.c66
-rw-r--r--kernel/samples/bpf/tcbpf1_kern.c32
-rw-r--r--kernel/samples/bpf/test_verifier.c498
-rw-r--r--kernel/samples/bpf/trace_output_kern.c31
-rw-r--r--kernel/samples/bpf/trace_output_user.c196
-rw-r--r--kernel/samples/bpf/tracex1_kern.c2
-rw-r--r--kernel/samples/bpf/tracex2_kern.c30
-rw-r--r--kernel/samples/bpf/tracex2_user.c67
-rw-r--r--kernel/samples/bpf/tracex3_kern.c4
-rw-r--r--kernel/samples/bpf/tracex4_kern.c6
-rw-r--r--kernel/samples/bpf/tracex5_kern.c75
-rw-r--r--kernel/samples/bpf/tracex5_user.c46
-rw-r--r--kernel/samples/bpf/tracex6_kern.c27
-rw-r--r--kernel/samples/bpf/tracex6_user.c72
23 files changed, 1949 insertions, 53 deletions
diff --git a/kernel/samples/bpf/Makefile b/kernel/samples/bpf/Makefile
index 76e3458a5..edd638b58 100644
--- a/kernel/samples/bpf/Makefile
+++ b/kernel/samples/bpf/Makefile
@@ -4,47 +4,76 @@ obj- := dummy.o
# List of programs to build
hostprogs-y := test_verifier test_maps
hostprogs-y += sock_example
+hostprogs-y += fds_example
hostprogs-y += sockex1
hostprogs-y += sockex2
+hostprogs-y += sockex3
hostprogs-y += tracex1
hostprogs-y += tracex2
hostprogs-y += tracex3
hostprogs-y += tracex4
+hostprogs-y += tracex5
+hostprogs-y += tracex6
+hostprogs-y += trace_output
+hostprogs-y += lathist
test_verifier-objs := test_verifier.o libbpf.o
test_maps-objs := test_maps.o libbpf.o
sock_example-objs := sock_example.o libbpf.o
+fds_example-objs := bpf_load.o libbpf.o fds_example.o
sockex1-objs := bpf_load.o libbpf.o sockex1_user.o
sockex2-objs := bpf_load.o libbpf.o sockex2_user.o
+sockex3-objs := bpf_load.o libbpf.o sockex3_user.o
tracex1-objs := bpf_load.o libbpf.o tracex1_user.o
tracex2-objs := bpf_load.o libbpf.o tracex2_user.o
tracex3-objs := bpf_load.o libbpf.o tracex3_user.o
tracex4-objs := bpf_load.o libbpf.o tracex4_user.o
+tracex5-objs := bpf_load.o libbpf.o tracex5_user.o
+tracex6-objs := bpf_load.o libbpf.o tracex6_user.o
+trace_output-objs := bpf_load.o libbpf.o trace_output_user.o
+lathist-objs := bpf_load.o libbpf.o lathist_user.o
# Tell kbuild to always build the programs
always := $(hostprogs-y)
always += sockex1_kern.o
always += sockex2_kern.o
+always += sockex3_kern.o
always += tracex1_kern.o
always += tracex2_kern.o
always += tracex3_kern.o
always += tracex4_kern.o
+always += tracex5_kern.o
+always += tracex6_kern.o
+always += trace_output_kern.o
always += tcbpf1_kern.o
+always += lathist_kern.o
HOSTCFLAGS += -I$(objtree)/usr/include
HOSTCFLAGS_bpf_load.o += -I$(objtree)/usr/include -Wno-unused-variable
+HOSTLOADLIBES_fds_example += -lelf
HOSTLOADLIBES_sockex1 += -lelf
HOSTLOADLIBES_sockex2 += -lelf
+HOSTLOADLIBES_sockex3 += -lelf
HOSTLOADLIBES_tracex1 += -lelf
HOSTLOADLIBES_tracex2 += -lelf
HOSTLOADLIBES_tracex3 += -lelf
HOSTLOADLIBES_tracex4 += -lelf -lrt
+HOSTLOADLIBES_tracex5 += -lelf
+HOSTLOADLIBES_tracex6 += -lelf
+HOSTLOADLIBES_trace_output += -lelf -lrt
+HOSTLOADLIBES_lathist += -lelf
# point this to your LLVM backend with bpf support
LLC=$(srctree)/tools/bpf/llvm/bld/Debug+Asserts/bin/llc
-%.o: %.c
+# asm/sysreg.h inline assmbly used by it is incompatible with llvm.
+# But, ehere is not easy way to fix it, so just exclude it since it is
+# useless for BPF samples.
+$(obj)/%.o: $(src)/%.c
clang $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) \
- -D__KERNEL__ -Wno-unused-value -Wno-pointer-sign \
+ -D__KERNEL__ -D__ASM_SYSREG_H -Wno-unused-value -Wno-pointer-sign \
-O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf -filetype=obj -o $@
+ clang $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) \
+ -D__KERNEL__ -D__ASM_SYSREG_H -Wno-unused-value -Wno-pointer-sign \
+ -O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf -filetype=asm -o $@.s
diff --git a/kernel/samples/bpf/bpf_helpers.h b/kernel/samples/bpf/bpf_helpers.h
index f960b5fb3..7ad19e1db 100644
--- a/kernel/samples/bpf/bpf_helpers.h
+++ b/kernel/samples/bpf/bpf_helpers.h
@@ -21,6 +21,24 @@ static unsigned long long (*bpf_ktime_get_ns)(void) =
(void *) BPF_FUNC_ktime_get_ns;
static int (*bpf_trace_printk)(const char *fmt, int fmt_size, ...) =
(void *) BPF_FUNC_trace_printk;
+static void (*bpf_tail_call)(void *ctx, void *map, int index) =
+ (void *) BPF_FUNC_tail_call;
+static unsigned long long (*bpf_get_smp_processor_id)(void) =
+ (void *) BPF_FUNC_get_smp_processor_id;
+static unsigned long long (*bpf_get_current_pid_tgid)(void) =
+ (void *) BPF_FUNC_get_current_pid_tgid;
+static unsigned long long (*bpf_get_current_uid_gid)(void) =
+ (void *) BPF_FUNC_get_current_uid_gid;
+static int (*bpf_get_current_comm)(void *buf, int buf_size) =
+ (void *) BPF_FUNC_get_current_comm;
+static int (*bpf_perf_event_read)(void *map, int index) =
+ (void *) BPF_FUNC_perf_event_read;
+static int (*bpf_clone_redirect)(void *ctx, int ifindex, int flags) =
+ (void *) BPF_FUNC_clone_redirect;
+static int (*bpf_redirect)(int ifindex, int flags) =
+ (void *) BPF_FUNC_redirect;
+static int (*bpf_perf_event_output)(void *ctx, void *map, int index, void *data, int size) =
+ (void *) BPF_FUNC_perf_event_output;
/* llvm builtin functions that eBPF C program may use to
* emit BPF_LD_ABS and BPF_LD_IND instructions
@@ -50,4 +68,41 @@ static int (*bpf_l3_csum_replace)(void *ctx, int off, int from, int to, int flag
static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int flags) =
(void *) BPF_FUNC_l4_csum_replace;
+#if defined(__x86_64__)
+
+#define PT_REGS_PARM1(x) ((x)->di)
+#define PT_REGS_PARM2(x) ((x)->si)
+#define PT_REGS_PARM3(x) ((x)->dx)
+#define PT_REGS_PARM4(x) ((x)->cx)
+#define PT_REGS_PARM5(x) ((x)->r8)
+#define PT_REGS_RET(x) ((x)->sp)
+#define PT_REGS_FP(x) ((x)->bp)
+#define PT_REGS_RC(x) ((x)->ax)
+#define PT_REGS_SP(x) ((x)->sp)
+
+#elif defined(__s390x__)
+
+#define PT_REGS_PARM1(x) ((x)->gprs[2])
+#define PT_REGS_PARM2(x) ((x)->gprs[3])
+#define PT_REGS_PARM3(x) ((x)->gprs[4])
+#define PT_REGS_PARM4(x) ((x)->gprs[5])
+#define PT_REGS_PARM5(x) ((x)->gprs[6])
+#define PT_REGS_RET(x) ((x)->gprs[14])
+#define PT_REGS_FP(x) ((x)->gprs[11]) /* Works only with CONFIG_FRAME_POINTER */
+#define PT_REGS_RC(x) ((x)->gprs[2])
+#define PT_REGS_SP(x) ((x)->gprs[15])
+
+#elif defined(__aarch64__)
+
+#define PT_REGS_PARM1(x) ((x)->regs[0])
+#define PT_REGS_PARM2(x) ((x)->regs[1])
+#define PT_REGS_PARM3(x) ((x)->regs[2])
+#define PT_REGS_PARM4(x) ((x)->regs[3])
+#define PT_REGS_PARM5(x) ((x)->regs[4])
+#define PT_REGS_RET(x) ((x)->regs[30])
+#define PT_REGS_FP(x) ((x)->regs[29]) /* Works only with CONFIG_FRAME_POINTER */
+#define PT_REGS_RC(x) ((x)->regs[0])
+#define PT_REGS_SP(x) ((x)->sp)
+
+#endif
#endif
diff --git a/kernel/samples/bpf/bpf_load.c b/kernel/samples/bpf/bpf_load.c
index 38dac5a53..da86a8e0a 100644
--- a/kernel/samples/bpf/bpf_load.c
+++ b/kernel/samples/bpf/bpf_load.c
@@ -16,6 +16,7 @@
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <poll.h>
+#include <ctype.h>
#include "libbpf.h"
#include "bpf_helpers.h"
#include "bpf_load.h"
@@ -29,6 +30,19 @@ int map_fd[MAX_MAPS];
int prog_fd[MAX_PROGS];
int event_fd[MAX_PROGS];
int prog_cnt;
+int prog_array_fd = -1;
+
+static int populate_prog_array(const char *event, int prog_fd)
+{
+ int ind = atoi(event), err;
+
+ err = bpf_update_elem(prog_array_fd, &ind, &prog_fd, BPF_ANY);
+ if (err < 0) {
+ printf("failed to store prog_fd in prog_array\n");
+ return -1;
+ }
+ return 0;
+}
static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
{
@@ -54,12 +68,40 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
return -1;
}
+ fd = bpf_prog_load(prog_type, prog, size, license, kern_version);
+ if (fd < 0) {
+ printf("bpf_prog_load() err=%d\n%s", errno, bpf_log_buf);
+ return -1;
+ }
+
+ prog_fd[prog_cnt++] = fd;
+
+ if (is_socket) {
+ event += 6;
+ if (*event != '/')
+ return 0;
+ event++;
+ if (!isdigit(*event)) {
+ printf("invalid prog number\n");
+ return -1;
+ }
+ return populate_prog_array(event, fd);
+ }
+
if (is_kprobe || is_kretprobe) {
if (is_kprobe)
event += 7;
else
event += 10;
+ if (*event == 0) {
+ printf("event name cannot be empty\n");
+ return -1;
+ }
+
+ if (isdigit(*event))
+ return populate_prog_array(event, fd);
+
snprintf(buf, sizeof(buf),
"echo '%c:%s %s' >> /sys/kernel/debug/tracing/kprobe_events",
is_kprobe ? 'p' : 'r', event, event);
@@ -71,18 +113,6 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
}
}
- fd = bpf_prog_load(prog_type, prog, size, license, kern_version);
-
- if (fd < 0) {
- printf("bpf_prog_load() err=%d\n%s", errno, bpf_log_buf);
- return -1;
- }
-
- prog_fd[prog_cnt++] = fd;
-
- if (is_socket)
- return 0;
-
strcpy(buf, DEBUGFS);
strcat(buf, "events/kprobes/");
strcat(buf, event);
@@ -130,6 +160,9 @@ static int load_maps(struct bpf_map_def *maps, int len)
maps[i].max_entries);
if (map_fd[i] < 0)
return 1;
+
+ if (maps[i].type == BPF_MAP_TYPE_PROG_ARRAY)
+ prog_array_fd = map_fd[i];
}
return 0;
}
diff --git a/kernel/samples/bpf/fds_example.c b/kernel/samples/bpf/fds_example.c
new file mode 100644
index 000000000..e2fd16c3d
--- /dev/null
+++ b/kernel/samples/bpf/fds_example.c
@@ -0,0 +1,183 @@
+#include <linux/unistd.h>
+#include <linux/bpf.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <string.h>
+#include <assert.h>
+#include <errno.h>
+
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#include "bpf_load.h"
+#include "libbpf.h"
+
+#define BPF_F_PIN (1 << 0)
+#define BPF_F_GET (1 << 1)
+#define BPF_F_PIN_GET (BPF_F_PIN | BPF_F_GET)
+
+#define BPF_F_KEY (1 << 2)
+#define BPF_F_VAL (1 << 3)
+#define BPF_F_KEY_VAL (BPF_F_KEY | BPF_F_VAL)
+
+#define BPF_M_UNSPEC 0
+#define BPF_M_MAP 1
+#define BPF_M_PROG 2
+
+static void usage(void)
+{
+ printf("Usage: fds_example [...]\n");
+ printf(" -F <file> File to pin/get object\n");
+ printf(" -P |- pin object\n");
+ printf(" -G `- get object\n");
+ printf(" -m eBPF map mode\n");
+ printf(" -k <key> |- map key\n");
+ printf(" -v <value> `- map value\n");
+ printf(" -p eBPF prog mode\n");
+ printf(" -o <object> `- object file\n");
+ printf(" -h Display this help.\n");
+}
+
+static int bpf_map_create(void)
+{
+ return bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(uint32_t),
+ sizeof(uint32_t), 1024);
+}
+
+static int bpf_prog_create(const char *object)
+{
+ static const struct bpf_insn insns[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ };
+
+ if (object) {
+ assert(!load_bpf_file((char *)object));
+ return prog_fd[0];
+ } else {
+ return bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER,
+ insns, sizeof(insns), "GPL", 0);
+ }
+}
+
+static int bpf_do_map(const char *file, uint32_t flags, uint32_t key,
+ uint32_t value)
+{
+ int fd, ret;
+
+ if (flags & BPF_F_PIN) {
+ fd = bpf_map_create();
+ printf("bpf: map fd:%d (%s)\n", fd, strerror(errno));
+ assert(fd > 0);
+
+ ret = bpf_obj_pin(fd, file);
+ printf("bpf: pin ret:(%d,%s)\n", ret, strerror(errno));
+ assert(ret == 0);
+ } else {
+ fd = bpf_obj_get(file);
+ printf("bpf: get fd:%d (%s)\n", fd, strerror(errno));
+ assert(fd > 0);
+ }
+
+ if ((flags & BPF_F_KEY_VAL) == BPF_F_KEY_VAL) {
+ ret = bpf_update_elem(fd, &key, &value, 0);
+ printf("bpf: fd:%d u->(%u:%u) ret:(%d,%s)\n", fd, key, value,
+ ret, strerror(errno));
+ assert(ret == 0);
+ } else if (flags & BPF_F_KEY) {
+ ret = bpf_lookup_elem(fd, &key, &value);
+ printf("bpf: fd:%d l->(%u):%u ret:(%d,%s)\n", fd, key, value,
+ ret, strerror(errno));
+ assert(ret == 0);
+ }
+
+ return 0;
+}
+
+static int bpf_do_prog(const char *file, uint32_t flags, const char *object)
+{
+ int fd, sock, ret;
+
+ if (flags & BPF_F_PIN) {
+ fd = bpf_prog_create(object);
+ printf("bpf: prog fd:%d (%s)\n", fd, strerror(errno));
+ assert(fd > 0);
+
+ ret = bpf_obj_pin(fd, file);
+ printf("bpf: pin ret:(%d,%s)\n", ret, strerror(errno));
+ assert(ret == 0);
+ } else {
+ fd = bpf_obj_get(file);
+ printf("bpf: get fd:%d (%s)\n", fd, strerror(errno));
+ assert(fd > 0);
+ }
+
+ sock = open_raw_sock("lo");
+ assert(sock > 0);
+
+ ret = setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, &fd, sizeof(fd));
+ printf("bpf: sock:%d <- fd:%d attached ret:(%d,%s)\n", sock, fd,
+ ret, strerror(errno));
+ assert(ret == 0);
+
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ const char *file = NULL, *object = NULL;
+ uint32_t key = 0, value = 0, flags = 0;
+ int opt, mode = BPF_M_UNSPEC;
+
+ while ((opt = getopt(argc, argv, "F:PGmk:v:po:")) != -1) {
+ switch (opt) {
+ /* General args */
+ case 'F':
+ file = optarg;
+ break;
+ case 'P':
+ flags |= BPF_F_PIN;
+ break;
+ case 'G':
+ flags |= BPF_F_GET;
+ break;
+ /* Map-related args */
+ case 'm':
+ mode = BPF_M_MAP;
+ break;
+ case 'k':
+ key = strtoul(optarg, NULL, 0);
+ flags |= BPF_F_KEY;
+ break;
+ case 'v':
+ value = strtoul(optarg, NULL, 0);
+ flags |= BPF_F_VAL;
+ break;
+ /* Prog-related args */
+ case 'p':
+ mode = BPF_M_PROG;
+ break;
+ case 'o':
+ object = optarg;
+ break;
+ default:
+ goto out;
+ }
+ }
+
+ if (!(flags & BPF_F_PIN_GET) || !file)
+ goto out;
+
+ switch (mode) {
+ case BPF_M_MAP:
+ return bpf_do_map(file, flags, key, value);
+ case BPF_M_PROG:
+ return bpf_do_prog(file, flags, object);
+ }
+out:
+ usage();
+ return -1;
+}
diff --git a/kernel/samples/bpf/lathist_kern.c b/kernel/samples/bpf/lathist_kern.c
new file mode 100644
index 000000000..18fa08847
--- /dev/null
+++ b/kernel/samples/bpf/lathist_kern.c
@@ -0,0 +1,99 @@
+/* Copyright (c) 2013-2015 PLUMgrid, http://plumgrid.com
+ * Copyright (c) 2015 BMW Car IT GmbH
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/version.h>
+#include <linux/ptrace.h>
+#include <uapi/linux/bpf.h>
+#include "bpf_helpers.h"
+
+#define MAX_ENTRIES 20
+#define MAX_CPU 4
+
+/* We need to stick to static allocated memory (an array instead of
+ * hash table) because managing dynamic memory from the
+ * trace_preempt_[on|off] tracepoints hooks is not supported.
+ */
+
+struct bpf_map_def SEC("maps") my_map = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(u64),
+ .max_entries = MAX_CPU,
+};
+
+SEC("kprobe/trace_preempt_off")
+int bpf_prog1(struct pt_regs *ctx)
+{
+ int cpu = bpf_get_smp_processor_id();
+ u64 *ts = bpf_map_lookup_elem(&my_map, &cpu);
+
+ if (ts)
+ *ts = bpf_ktime_get_ns();
+
+ return 0;
+}
+
+static unsigned int log2(unsigned int v)
+{
+ unsigned int r;
+ unsigned int shift;
+
+ r = (v > 0xFFFF) << 4; v >>= r;
+ shift = (v > 0xFF) << 3; v >>= shift; r |= shift;
+ shift = (v > 0xF) << 2; v >>= shift; r |= shift;
+ shift = (v > 0x3) << 1; v >>= shift; r |= shift;
+ r |= (v >> 1);
+
+ return r;
+}
+
+static unsigned int log2l(unsigned long v)
+{
+ unsigned int hi = v >> 32;
+
+ if (hi)
+ return log2(hi) + 32;
+ else
+ return log2(v);
+}
+
+struct bpf_map_def SEC("maps") my_lat = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(long),
+ .max_entries = MAX_CPU * MAX_ENTRIES,
+};
+
+SEC("kprobe/trace_preempt_on")
+int bpf_prog2(struct pt_regs *ctx)
+{
+ u64 *ts, cur_ts, delta;
+ int key, cpu;
+ long *val;
+
+ cpu = bpf_get_smp_processor_id();
+ ts = bpf_map_lookup_elem(&my_map, &cpu);
+ if (!ts)
+ return 0;
+
+ cur_ts = bpf_ktime_get_ns();
+ delta = log2l(cur_ts - *ts);
+
+ if (delta > MAX_ENTRIES - 1)
+ delta = MAX_ENTRIES - 1;
+
+ key = cpu * MAX_ENTRIES + delta;
+ val = bpf_map_lookup_elem(&my_lat, &key);
+ if (val)
+ __sync_fetch_and_add((long *)val, 1);
+
+ return 0;
+
+}
+
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/kernel/samples/bpf/lathist_user.c b/kernel/samples/bpf/lathist_user.c
new file mode 100644
index 000000000..65da8c157
--- /dev/null
+++ b/kernel/samples/bpf/lathist_user.c
@@ -0,0 +1,103 @@
+/* Copyright (c) 2013-2015 PLUMgrid, http://plumgrid.com
+ * Copyright (c) 2015 BMW Car IT GmbH
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <linux/bpf.h>
+#include "libbpf.h"
+#include "bpf_load.h"
+
+#define MAX_ENTRIES 20
+#define MAX_CPU 4
+#define MAX_STARS 40
+
+struct cpu_hist {
+ long data[MAX_ENTRIES];
+ long max;
+};
+
+static struct cpu_hist cpu_hist[MAX_CPU];
+
+static void stars(char *str, long val, long max, int width)
+{
+ int i;
+
+ for (i = 0; i < (width * val / max) - 1 && i < width - 1; i++)
+ str[i] = '*';
+ if (val > max)
+ str[i - 1] = '+';
+ str[i] = '\0';
+}
+
+static void print_hist(void)
+{
+ char starstr[MAX_STARS];
+ struct cpu_hist *hist;
+ int i, j;
+
+ /* clear screen */
+ printf("\033[2J");
+
+ for (j = 0; j < MAX_CPU; j++) {
+ hist = &cpu_hist[j];
+
+ /* ignore CPUs without data (maybe offline?) */
+ if (hist->max == 0)
+ continue;
+
+ printf("CPU %d\n", j);
+ printf(" latency : count distribution\n");
+ for (i = 1; i <= MAX_ENTRIES; i++) {
+ stars(starstr, hist->data[i - 1], hist->max, MAX_STARS);
+ printf("%8ld -> %-8ld : %-8ld |%-*s|\n",
+ (1l << i) >> 1, (1l << i) - 1,
+ hist->data[i - 1], MAX_STARS, starstr);
+ }
+ }
+}
+
+static void get_data(int fd)
+{
+ long key, value;
+ int c, i;
+
+ for (i = 0; i < MAX_CPU; i++)
+ cpu_hist[i].max = 0;
+
+ for (c = 0; c < MAX_CPU; c++) {
+ for (i = 0; i < MAX_ENTRIES; i++) {
+ key = c * MAX_ENTRIES + i;
+ bpf_lookup_elem(fd, &key, &value);
+
+ cpu_hist[c].data[i] = value;
+ if (value > cpu_hist[c].max)
+ cpu_hist[c].max = value;
+ }
+ }
+}
+
+int main(int argc, char **argv)
+{
+ char filename[256];
+
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+ if (load_bpf_file(filename)) {
+ printf("%s", bpf_log_buf);
+ return 1;
+ }
+
+ while (1) {
+ get_data(map_fd[1]);
+ print_hist();
+ sleep(5);
+ }
+
+ return 0;
+}
diff --git a/kernel/samples/bpf/libbpf.c b/kernel/samples/bpf/libbpf.c
index 7e1efa7e2..65a8d48d2 100644
--- a/kernel/samples/bpf/libbpf.c
+++ b/kernel/samples/bpf/libbpf.c
@@ -103,6 +103,25 @@ int bpf_prog_load(enum bpf_prog_type prog_type,
return syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
}
+int bpf_obj_pin(int fd, const char *pathname)
+{
+ union bpf_attr attr = {
+ .pathname = ptr_to_u64((void *)pathname),
+ .bpf_fd = fd,
+ };
+
+ return syscall(__NR_bpf, BPF_OBJ_PIN, &attr, sizeof(attr));
+}
+
+int bpf_obj_get(const char *pathname)
+{
+ union bpf_attr attr = {
+ .pathname = ptr_to_u64((void *)pathname),
+ };
+
+ return syscall(__NR_bpf, BPF_OBJ_GET, &attr, sizeof(attr));
+}
+
int open_raw_sock(const char *name)
{
struct sockaddr_ll sll;
diff --git a/kernel/samples/bpf/libbpf.h b/kernel/samples/bpf/libbpf.h
index 7235e292a..014aacf91 100644
--- a/kernel/samples/bpf/libbpf.h
+++ b/kernel/samples/bpf/libbpf.h
@@ -15,6 +15,9 @@ int bpf_prog_load(enum bpf_prog_type prog_type,
const struct bpf_insn *insns, int insn_len,
const char *license, int kern_version);
+int bpf_obj_pin(int fd, const char *pathname);
+int bpf_obj_get(const char *pathname);
+
#define LOG_BUF_SIZE 65536
extern char bpf_log_buf[LOG_BUF_SIZE];
@@ -64,6 +67,14 @@ extern char bpf_log_buf[LOG_BUF_SIZE];
.off = 0, \
.imm = 0 })
+#define BPF_MOV32_REG(DST, SRC) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU | BPF_MOV | BPF_X, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = 0, \
+ .imm = 0 })
+
/* Short form of mov, dst_reg = imm32 */
#define BPF_MOV64_IMM(DST, IMM) \
diff --git a/kernel/samples/bpf/sockex3_kern.c b/kernel/samples/bpf/sockex3_kern.c
new file mode 100644
index 000000000..41ae2fd21
--- /dev/null
+++ b/kernel/samples/bpf/sockex3_kern.c
@@ -0,0 +1,290 @@
+/* Copyright (c) 2015 PLUMgrid, http://plumgrid.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <uapi/linux/bpf.h>
+#include "bpf_helpers.h"
+#include <uapi/linux/in.h>
+#include <uapi/linux/if.h>
+#include <uapi/linux/if_ether.h>
+#include <uapi/linux/ip.h>
+#include <uapi/linux/ipv6.h>
+#include <uapi/linux/if_tunnel.h>
+#include <uapi/linux/mpls.h>
+#define IP_MF 0x2000
+#define IP_OFFSET 0x1FFF
+
+#define PROG(F) SEC("socket/"__stringify(F)) int bpf_func_##F
+
+struct bpf_map_def SEC("maps") jmp_table = {
+ .type = BPF_MAP_TYPE_PROG_ARRAY,
+ .key_size = sizeof(u32),
+ .value_size = sizeof(u32),
+ .max_entries = 8,
+};
+
+#define PARSE_VLAN 1
+#define PARSE_MPLS 2
+#define PARSE_IP 3
+#define PARSE_IPV6 4
+
+/* protocol dispatch routine.
+ * It tail-calls next BPF program depending on eth proto
+ * Note, we could have used:
+ * bpf_tail_call(skb, &jmp_table, proto);
+ * but it would need large prog_array
+ */
+static inline void parse_eth_proto(struct __sk_buff *skb, u32 proto)
+{
+ switch (proto) {
+ case ETH_P_8021Q:
+ case ETH_P_8021AD:
+ bpf_tail_call(skb, &jmp_table, PARSE_VLAN);
+ break;
+ case ETH_P_MPLS_UC:
+ case ETH_P_MPLS_MC:
+ bpf_tail_call(skb, &jmp_table, PARSE_MPLS);
+ break;
+ case ETH_P_IP:
+ bpf_tail_call(skb, &jmp_table, PARSE_IP);
+ break;
+ case ETH_P_IPV6:
+ bpf_tail_call(skb, &jmp_table, PARSE_IPV6);
+ break;
+ }
+}
+
+struct vlan_hdr {
+ __be16 h_vlan_TCI;
+ __be16 h_vlan_encapsulated_proto;
+};
+
+struct flow_keys {
+ __be32 src;
+ __be32 dst;
+ union {
+ __be32 ports;
+ __be16 port16[2];
+ };
+ __u32 ip_proto;
+};
+
+static inline int ip_is_fragment(struct __sk_buff *ctx, __u64 nhoff)
+{
+ return load_half(ctx, nhoff + offsetof(struct iphdr, frag_off))
+ & (IP_MF | IP_OFFSET);
+}
+
+static inline __u32 ipv6_addr_hash(struct __sk_buff *ctx, __u64 off)
+{
+ __u64 w0 = load_word(ctx, off);
+ __u64 w1 = load_word(ctx, off + 4);
+ __u64 w2 = load_word(ctx, off + 8);
+ __u64 w3 = load_word(ctx, off + 12);
+
+ return (__u32)(w0 ^ w1 ^ w2 ^ w3);
+}
+
+struct globals {
+ struct flow_keys flow;
+};
+
+struct bpf_map_def SEC("maps") percpu_map = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(struct globals),
+ .max_entries = 32,
+};
+
+/* user poor man's per_cpu until native support is ready */
+static struct globals *this_cpu_globals(void)
+{
+ u32 key = bpf_get_smp_processor_id();
+
+ return bpf_map_lookup_elem(&percpu_map, &key);
+}
+
+/* some simple stats for user space consumption */
+struct pair {
+ __u64 packets;
+ __u64 bytes;
+};
+
+struct bpf_map_def SEC("maps") hash_map = {
+ .type = BPF_MAP_TYPE_HASH,
+ .key_size = sizeof(struct flow_keys),
+ .value_size = sizeof(struct pair),
+ .max_entries = 1024,
+};
+
+static void update_stats(struct __sk_buff *skb, struct globals *g)
+{
+ struct flow_keys key = g->flow;
+ struct pair *value;
+
+ value = bpf_map_lookup_elem(&hash_map, &key);
+ if (value) {
+ __sync_fetch_and_add(&value->packets, 1);
+ __sync_fetch_and_add(&value->bytes, skb->len);
+ } else {
+ struct pair val = {1, skb->len};
+
+ bpf_map_update_elem(&hash_map, &key, &val, BPF_ANY);
+ }
+}
+
+static __always_inline void parse_ip_proto(struct __sk_buff *skb,
+ struct globals *g, __u32 ip_proto)
+{
+ __u32 nhoff = skb->cb[0];
+ int poff;
+
+ switch (ip_proto) {
+ case IPPROTO_GRE: {
+ struct gre_hdr {
+ __be16 flags;
+ __be16 proto;
+ };
+
+ __u32 gre_flags = load_half(skb,
+ nhoff + offsetof(struct gre_hdr, flags));
+ __u32 gre_proto = load_half(skb,
+ nhoff + offsetof(struct gre_hdr, proto));
+
+ if (gre_flags & (GRE_VERSION|GRE_ROUTING))
+ break;
+
+ nhoff += 4;
+ if (gre_flags & GRE_CSUM)
+ nhoff += 4;
+ if (gre_flags & GRE_KEY)
+ nhoff += 4;
+ if (gre_flags & GRE_SEQ)
+ nhoff += 4;
+
+ skb->cb[0] = nhoff;
+ parse_eth_proto(skb, gre_proto);
+ break;
+ }
+ case IPPROTO_IPIP:
+ parse_eth_proto(skb, ETH_P_IP);
+ break;
+ case IPPROTO_IPV6:
+ parse_eth_proto(skb, ETH_P_IPV6);
+ break;
+ case IPPROTO_TCP:
+ case IPPROTO_UDP:
+ g->flow.ports = load_word(skb, nhoff);
+ case IPPROTO_ICMP:
+ g->flow.ip_proto = ip_proto;
+ update_stats(skb, g);
+ break;
+ default:
+ break;
+ }
+}
+
+PROG(PARSE_IP)(struct __sk_buff *skb)
+{
+ struct globals *g = this_cpu_globals();
+ __u32 nhoff, verlen, ip_proto;
+
+ if (!g)
+ return 0;
+
+ nhoff = skb->cb[0];
+
+ if (unlikely(ip_is_fragment(skb, nhoff)))
+ return 0;
+
+ ip_proto = load_byte(skb, nhoff + offsetof(struct iphdr, protocol));
+
+ if (ip_proto != IPPROTO_GRE) {
+ g->flow.src = load_word(skb, nhoff + offsetof(struct iphdr, saddr));
+ g->flow.dst = load_word(skb, nhoff + offsetof(struct iphdr, daddr));
+ }
+
+ verlen = load_byte(skb, nhoff + 0/*offsetof(struct iphdr, ihl)*/);
+ nhoff += (verlen & 0xF) << 2;
+
+ skb->cb[0] = nhoff;
+ parse_ip_proto(skb, g, ip_proto);
+ return 0;
+}
+
+PROG(PARSE_IPV6)(struct __sk_buff *skb)
+{
+ struct globals *g = this_cpu_globals();
+ __u32 nhoff, ip_proto;
+
+ if (!g)
+ return 0;
+
+ nhoff = skb->cb[0];
+
+ ip_proto = load_byte(skb,
+ nhoff + offsetof(struct ipv6hdr, nexthdr));
+ g->flow.src = ipv6_addr_hash(skb,
+ nhoff + offsetof(struct ipv6hdr, saddr));
+ g->flow.dst = ipv6_addr_hash(skb,
+ nhoff + offsetof(struct ipv6hdr, daddr));
+ nhoff += sizeof(struct ipv6hdr);
+
+ skb->cb[0] = nhoff;
+ parse_ip_proto(skb, g, ip_proto);
+ return 0;
+}
+
+PROG(PARSE_VLAN)(struct __sk_buff *skb)
+{
+ __u32 nhoff, proto;
+
+ nhoff = skb->cb[0];
+
+ proto = load_half(skb, nhoff + offsetof(struct vlan_hdr,
+ h_vlan_encapsulated_proto));
+ nhoff += sizeof(struct vlan_hdr);
+ skb->cb[0] = nhoff;
+
+ parse_eth_proto(skb, proto);
+
+ return 0;
+}
+
+PROG(PARSE_MPLS)(struct __sk_buff *skb)
+{
+ __u32 nhoff, label;
+
+ nhoff = skb->cb[0];
+
+ label = load_word(skb, nhoff);
+ nhoff += sizeof(struct mpls_label);
+ skb->cb[0] = nhoff;
+
+ if (label & MPLS_LS_S_MASK) {
+ __u8 verlen = load_byte(skb, nhoff);
+ if ((verlen & 0xF0) == 4)
+ parse_eth_proto(skb, ETH_P_IP);
+ else
+ parse_eth_proto(skb, ETH_P_IPV6);
+ } else {
+ parse_eth_proto(skb, ETH_P_MPLS_UC);
+ }
+
+ return 0;
+}
+
+SEC("socket/0")
+int main_prog(struct __sk_buff *skb)
+{
+ __u32 nhoff = ETH_HLEN;
+ __u32 proto = load_half(skb, 12);
+
+ skb->cb[0] = nhoff;
+ parse_eth_proto(skb, proto);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/kernel/samples/bpf/sockex3_user.c b/kernel/samples/bpf/sockex3_user.c
new file mode 100644
index 000000000..2617772d0
--- /dev/null
+++ b/kernel/samples/bpf/sockex3_user.c
@@ -0,0 +1,66 @@
+#include <stdio.h>
+#include <assert.h>
+#include <linux/bpf.h>
+#include "libbpf.h"
+#include "bpf_load.h"
+#include <unistd.h>
+#include <arpa/inet.h>
+
+struct flow_keys {
+ __be32 src;
+ __be32 dst;
+ union {
+ __be32 ports;
+ __be16 port16[2];
+ };
+ __u32 ip_proto;
+};
+
+struct pair {
+ __u64 packets;
+ __u64 bytes;
+};
+
+int main(int argc, char **argv)
+{
+ char filename[256];
+ FILE *f;
+ int i, sock;
+
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+ if (load_bpf_file(filename)) {
+ printf("%s", bpf_log_buf);
+ return 1;
+ }
+
+ sock = open_raw_sock("lo");
+
+ assert(setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, &prog_fd[4],
+ sizeof(__u32)) == 0);
+
+ if (argc > 1)
+ f = popen("ping -c5 localhost", "r");
+ else
+ f = popen("netperf -l 4 localhost", "r");
+ (void) f;
+
+ for (i = 0; i < 5; i++) {
+ struct flow_keys key = {}, next_key;
+ struct pair value;
+
+ sleep(1);
+ printf("IP src.port -> dst.port bytes packets\n");
+ while (bpf_get_next_key(map_fd[2], &key, &next_key) == 0) {
+ bpf_lookup_elem(map_fd[2], &next_key, &value);
+ printf("%s.%05d -> %s.%05d %12lld %12lld\n",
+ inet_ntoa((struct in_addr){htonl(next_key.src)}),
+ next_key.port16[0],
+ inet_ntoa((struct in_addr){htonl(next_key.dst)}),
+ next_key.port16[1],
+ value.bytes, value.packets);
+ key = next_key;
+ }
+ }
+ return 0;
+}
diff --git a/kernel/samples/bpf/tcbpf1_kern.c b/kernel/samples/bpf/tcbpf1_kern.c
index 7c27710f8..fa051b3d5 100644
--- a/kernel/samples/bpf/tcbpf1_kern.c
+++ b/kernel/samples/bpf/tcbpf1_kern.c
@@ -5,7 +5,7 @@
#include <uapi/linux/in.h>
#include <uapi/linux/tcp.h>
#include <uapi/linux/filter.h>
-
+#include <uapi/linux/pkt_cls.h>
#include "bpf_helpers.h"
/* compiler workaround */
@@ -21,7 +21,7 @@ static inline void set_dst_mac(struct __sk_buff *skb, char *mac)
static inline void set_ip_tos(struct __sk_buff *skb, __u8 new_tos)
{
- __u8 old_tos = load_byte(skb, BPF_LL_OFF + TOS_OFF);
+ __u8 old_tos = load_byte(skb, TOS_OFF);
bpf_l3_csum_replace(skb, IP_CSUM_OFF, htons(old_tos), htons(new_tos), 2);
bpf_skb_store_bytes(skb, TOS_OFF, &new_tos, sizeof(new_tos), 0);
@@ -34,7 +34,7 @@ static inline void set_ip_tos(struct __sk_buff *skb, __u8 new_tos)
static inline void set_tcp_ip_src(struct __sk_buff *skb, __u32 new_ip)
{
- __u32 old_ip = _htonl(load_word(skb, BPF_LL_OFF + IP_SRC_OFF));
+ __u32 old_ip = _htonl(load_word(skb, IP_SRC_OFF));
bpf_l4_csum_replace(skb, TCP_CSUM_OFF, old_ip, new_ip, IS_PSEUDO | sizeof(new_ip));
bpf_l3_csum_replace(skb, IP_CSUM_OFF, old_ip, new_ip, sizeof(new_ip));
@@ -44,7 +44,7 @@ static inline void set_tcp_ip_src(struct __sk_buff *skb, __u32 new_ip)
#define TCP_DPORT_OFF (ETH_HLEN + sizeof(struct iphdr) + offsetof(struct tcphdr, dest))
static inline void set_tcp_dest_port(struct __sk_buff *skb, __u16 new_port)
{
- __u16 old_port = htons(load_half(skb, BPF_LL_OFF + TCP_DPORT_OFF));
+ __u16 old_port = htons(load_half(skb, TCP_DPORT_OFF));
bpf_l4_csum_replace(skb, TCP_CSUM_OFF, old_port, new_port, sizeof(new_port));
bpf_skb_store_bytes(skb, TCP_DPORT_OFF, &new_port, sizeof(new_port), 0);
@@ -53,7 +53,7 @@ static inline void set_tcp_dest_port(struct __sk_buff *skb, __u16 new_port)
SEC("classifier")
int bpf_prog1(struct __sk_buff *skb)
{
- __u8 proto = load_byte(skb, BPF_LL_OFF + ETH_HLEN + offsetof(struct iphdr, protocol));
+ __u8 proto = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
long *value;
if (proto == IPPROTO_TCP) {
@@ -64,4 +64,26 @@ int bpf_prog1(struct __sk_buff *skb)
return 0;
}
+SEC("redirect_xmit")
+int _redirect_xmit(struct __sk_buff *skb)
+{
+ return bpf_redirect(skb->ifindex + 1, 0);
+}
+SEC("redirect_recv")
+int _redirect_recv(struct __sk_buff *skb)
+{
+ return bpf_redirect(skb->ifindex + 1, 1);
+}
+SEC("clone_redirect_xmit")
+int _clone_redirect_xmit(struct __sk_buff *skb)
+{
+ bpf_clone_redirect(skb, skb->ifindex + 1, 0);
+ return TC_ACT_SHOT;
+}
+SEC("clone_redirect_recv")
+int _clone_redirect_recv(struct __sk_buff *skb)
+{
+ bpf_clone_redirect(skb, skb->ifindex + 1, 1);
+ return TC_ACT_SHOT;
+}
char _license[] SEC("license") = "GPL";
diff --git a/kernel/samples/bpf/test_verifier.c b/kernel/samples/bpf/test_verifier.c
index 12f3780af..563c507c0 100644
--- a/kernel/samples/bpf/test_verifier.c
+++ b/kernel/samples/bpf/test_verifier.c
@@ -15,20 +15,28 @@
#include <string.h>
#include <linux/filter.h>
#include <stddef.h>
+#include <stdbool.h>
+#include <sys/resource.h>
#include "libbpf.h"
#define MAX_INSNS 512
#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
+#define MAX_FIXUPS 8
+
struct bpf_test {
const char *descr;
struct bpf_insn insns[MAX_INSNS];
- int fixup[32];
+ int fixup[MAX_FIXUPS];
+ int prog_array_fixup[MAX_FIXUPS];
const char *errstr;
+ const char *errstr_unpriv;
enum {
+ UNDEF,
ACCEPT,
REJECT
- } result;
+ } result, result_unpriv;
+ enum bpf_prog_type prog_type;
};
static struct bpf_test tests[] = {
@@ -95,6 +103,7 @@ static struct bpf_test tests[] = {
BPF_EXIT_INSN(),
},
.errstr = "invalid BPF_LD_IMM insn",
+ .errstr_unpriv = "R1 pointer comparison",
.result = REJECT,
},
{
@@ -108,6 +117,7 @@ static struct bpf_test tests[] = {
BPF_EXIT_INSN(),
},
.errstr = "invalid BPF_LD_IMM insn",
+ .errstr_unpriv = "R1 pointer comparison",
.result = REJECT,
},
{
@@ -218,6 +228,7 @@ static struct bpf_test tests[] = {
BPF_EXIT_INSN(),
},
.errstr = "R0 !read_ok",
+ .errstr_unpriv = "R1 pointer comparison",
.result = REJECT,
},
{
@@ -293,7 +304,9 @@ static struct bpf_test tests[] = {
BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "R0 leaks addr",
.result = ACCEPT,
+ .result_unpriv = REJECT,
},
{
"check corrupted spill/fill",
@@ -309,6 +322,7 @@ static struct bpf_test tests[] = {
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "attempt to corrupt spilled",
.errstr = "corrupted spill",
.result = REJECT,
},
@@ -472,6 +486,7 @@ static struct bpf_test tests[] = {
},
.fixup = {3},
.errstr = "R0 invalid mem access",
+ .errstr_unpriv = "R0 leaks addr",
.result = REJECT,
},
{
@@ -494,6 +509,8 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "R1 pointer comparison",
+ .result_unpriv = REJECT,
.result = ACCEPT,
},
{
@@ -520,6 +537,8 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "R1 pointer comparison",
+ .result_unpriv = REJECT,
.result = ACCEPT,
},
{
@@ -554,6 +573,8 @@ static struct bpf_test tests[] = {
BPF_EXIT_INSN(),
},
.fixup = {24},
+ .errstr_unpriv = "R1 pointer comparison",
+ .result_unpriv = REJECT,
.result = ACCEPT,
},
{
@@ -602,6 +623,8 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "R1 pointer comparison",
+ .result_unpriv = REJECT,
.result = ACCEPT,
},
{
@@ -641,6 +664,8 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "R1 pointer comparison",
+ .result_unpriv = REJECT,
.result = ACCEPT,
},
{
@@ -698,6 +723,7 @@ static struct bpf_test tests[] = {
},
.fixup = {4},
.errstr = "different pointers",
+ .errstr_unpriv = "R1 pointer comparison",
.result = REJECT,
},
{
@@ -719,6 +745,7 @@ static struct bpf_test tests[] = {
},
.fixup = {6},
.errstr = "different pointers",
+ .errstr_unpriv = "R1 pointer comparison",
.result = REJECT,
},
{
@@ -741,7 +768,417 @@ static struct bpf_test tests[] = {
},
.fixup = {7},
.errstr = "different pointers",
+ .errstr_unpriv = "R1 pointer comparison",
+ .result = REJECT,
+ },
+ {
+ "check skb->mark is not writeable by sockets",
+ .insns = {
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
+ offsetof(struct __sk_buff, mark)),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .errstr_unpriv = "R1 leaks addr",
+ .result = REJECT,
+ },
+ {
+ "check skb->tc_index is not writeable by sockets",
+ .insns = {
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
+ offsetof(struct __sk_buff, tc_index)),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .errstr_unpriv = "R1 leaks addr",
+ .result = REJECT,
+ },
+ {
+ "check non-u32 access to cb",
+ .insns = {
+ BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .errstr_unpriv = "R1 leaks addr",
+ .result = REJECT,
+ },
+ {
+ "check out of range skb->cb access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[0]) + 256),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .errstr_unpriv = "",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SCHED_ACT,
+ },
+ {
+ "write skb fields from socket prog",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[4])),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 1),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, mark)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, tc_index)),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 1),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[2])),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .errstr_unpriv = "R1 leaks addr",
+ .result_unpriv = REJECT,
+ },
+ {
+ "write skb fields from tc_cls_act prog",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, mark)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, tc_index)),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, tc_index)),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[3])),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "PTR_TO_STACK store/load",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -10),
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, 2, 0xfaceb00c),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 2),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ },
+ {
+ "PTR_TO_STACK store/load - bad alignment on off",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, 2, 0xfaceb00c),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 2),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "misaligned access off -6 size 8",
+ },
+ {
+ "PTR_TO_STACK store/load - bad alignment on reg",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -10),
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, 8, 0xfaceb00c),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 8),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "misaligned access off -2 size 8",
+ },
+ {
+ "PTR_TO_STACK store/load - out of bounds low",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -80000),
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, 8, 0xfaceb00c),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 8),
+ BPF_EXIT_INSN(),
+ },
.result = REJECT,
+ .errstr = "invalid stack off=-79992 size=8",
+ },
+ {
+ "PTR_TO_STACK store/load - out of bounds high",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, 8, 0xfaceb00c),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 8),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "invalid stack off=0 size=8",
+ },
+ {
+ "unpriv: return pointer",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_10),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "R0 leaks addr",
+ },
+ {
+ "unpriv: add const to pointer",
+ .insns = {
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "R1 pointer arithmetic",
+ },
+ {
+ "unpriv: add pointer to pointer",
+ .insns = {
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_10),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "R1 pointer arithmetic",
+ },
+ {
+ "unpriv: neg pointer",
+ .insns = {
+ BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "R1 pointer arithmetic",
+ },
+ {
+ "unpriv: cmp pointer with const",
+ .insns = {
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "R1 pointer comparison",
+ },
+ {
+ "unpriv: cmp pointer with pointer",
+ .insns = {
+ BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_10, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "R10 pointer comparison",
+ },
+ {
+ "unpriv: check that printk is disallowed",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_2, 8),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_1),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_trace_printk),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "unknown func 6",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "unpriv: pass pointer to helper function",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_2),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_update_elem),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup = {3},
+ .errstr_unpriv = "R4 leaks addr",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "unpriv: indirectly pass pointer on stack to helper function",
+ .insns = {
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_10, -8),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup = {3},
+ .errstr = "invalid indirect read from stack off -8+0 size 8",
+ .result = REJECT,
+ },
+ {
+ "unpriv: mangle pointer on stack 1",
+ .insns = {
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_10, -8),
+ BPF_ST_MEM(BPF_W, BPF_REG_10, -8, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "attempt to corrupt spilled",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "unpriv: mangle pointer on stack 2",
+ .insns = {
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_10, -8),
+ BPF_ST_MEM(BPF_B, BPF_REG_10, -1, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "attempt to corrupt spilled",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "unpriv: read pointer from stack in small chunks",
+ .insns = {
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_10, -8),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_10, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid size",
+ .result = REJECT,
+ },
+ {
+ "unpriv: write pointer into ctx",
+ .insns = {
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "R1 leaks addr",
+ .result_unpriv = REJECT,
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ },
+ {
+ "unpriv: write pointer into map elem value",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup = {3},
+ .errstr_unpriv = "R0 leaks addr",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "unpriv: partial copy of pointer",
+ .insns = {
+ BPF_MOV32_REG(BPF_REG_1, BPF_REG_10),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "R10 partial copy",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "unpriv: pass pointer to tail_call",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_1),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_array_fixup = {1},
+ .errstr_unpriv = "R3 leaks addr into helper",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "unpriv: cmp map pointer with zero",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_1, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup = {1},
+ .errstr_unpriv = "R1 pointer comparison",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "unpriv: write into frame pointer",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_10, BPF_REG_1),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "frame pointer is read only",
+ .result = REJECT,
+ },
+ {
+ "unpriv: cmp of frame pointer",
+ .insns = {
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_10, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "R10 pointer comparison",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "unpriv: cmp of stack pointer",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_2, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "R2 pointer comparison",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "unpriv: obfuscate stack pointer",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "R2 pointer arithmetic",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
},
};
@@ -758,13 +1195,24 @@ static int probe_filter_length(struct bpf_insn *fp)
static int create_map(void)
{
- long long key, value = 0;
int map_fd;
- map_fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value), 1024);
- if (map_fd < 0) {
+ map_fd = bpf_create_map(BPF_MAP_TYPE_HASH,
+ sizeof(long long), sizeof(long long), 1024);
+ if (map_fd < 0)
printf("failed to create map '%s'\n", strerror(errno));
- }
+
+ return map_fd;
+}
+
+static int create_prog_array(void)
+{
+ int map_fd;
+
+ map_fd = bpf_create_map(BPF_MAP_TYPE_PROG_ARRAY,
+ sizeof(int), sizeof(int), 4);
+ if (map_fd < 0)
+ printf("failed to create prog_array '%s'\n", strerror(errno));
return map_fd;
}
@@ -772,12 +1220,17 @@ static int create_map(void)
static int test(void)
{
int prog_fd, i, pass_cnt = 0, err_cnt = 0;
+ bool unpriv = geteuid() != 0;
for (i = 0; i < ARRAY_SIZE(tests); i++) {
struct bpf_insn *prog = tests[i].insns;
+ int prog_type = tests[i].prog_type;
int prog_len = probe_filter_length(prog);
int *fixup = tests[i].fixup;
- int map_fd = -1;
+ int *prog_array_fixup = tests[i].prog_array_fixup;
+ int expected_result;
+ const char *expected_errstr;
+ int map_fd = -1, prog_array_fd = -1;
if (*fixup) {
map_fd = create_map();
@@ -787,13 +1240,31 @@ static int test(void)
fixup++;
} while (*fixup);
}
+ if (*prog_array_fixup) {
+ prog_array_fd = create_prog_array();
+
+ do {
+ prog[*prog_array_fixup].imm = prog_array_fd;
+ prog_array_fixup++;
+ } while (*prog_array_fixup);
+ }
printf("#%d %s ", i, tests[i].descr);
- prog_fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, prog,
- prog_len * sizeof(struct bpf_insn),
+ prog_fd = bpf_prog_load(prog_type ?: BPF_PROG_TYPE_SOCKET_FILTER,
+ prog, prog_len * sizeof(struct bpf_insn),
"GPL", 0);
- if (tests[i].result == ACCEPT) {
+ if (unpriv && tests[i].result_unpriv != UNDEF)
+ expected_result = tests[i].result_unpriv;
+ else
+ expected_result = tests[i].result;
+
+ if (unpriv && tests[i].errstr_unpriv)
+ expected_errstr = tests[i].errstr_unpriv;
+ else
+ expected_errstr = tests[i].errstr;
+
+ if (expected_result == ACCEPT) {
if (prog_fd < 0) {
printf("FAIL\nfailed to load prog '%s'\n",
strerror(errno));
@@ -808,7 +1279,7 @@ static int test(void)
err_cnt++;
goto fail;
}
- if (strstr(bpf_log_buf, tests[i].errstr) == 0) {
+ if (strstr(bpf_log_buf, expected_errstr) == 0) {
printf("FAIL\nunexpected error message: %s",
bpf_log_buf);
err_cnt++;
@@ -821,6 +1292,8 @@ static int test(void)
fail:
if (map_fd >= 0)
close(map_fd);
+ if (prog_array_fd >= 0)
+ close(prog_array_fd);
close(prog_fd);
}
@@ -831,5 +1304,8 @@ fail:
int main(void)
{
+ struct rlimit r = {1 << 20, 1 << 20};
+
+ setrlimit(RLIMIT_MEMLOCK, &r);
return test();
}
diff --git a/kernel/samples/bpf/trace_output_kern.c b/kernel/samples/bpf/trace_output_kern.c
new file mode 100644
index 000000000..8d8d1ec42
--- /dev/null
+++ b/kernel/samples/bpf/trace_output_kern.c
@@ -0,0 +1,31 @@
+#include <linux/ptrace.h>
+#include <linux/version.h>
+#include <uapi/linux/bpf.h>
+#include "bpf_helpers.h"
+
+struct bpf_map_def SEC("maps") my_map = {
+ .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(u32),
+ .max_entries = 2,
+};
+
+SEC("kprobe/sys_write")
+int bpf_prog1(struct pt_regs *ctx)
+{
+ struct S {
+ u64 pid;
+ u64 cookie;
+ } data;
+
+ memset(&data, 0, sizeof(data));
+ data.pid = bpf_get_current_pid_tgid();
+ data.cookie = 0x12345678;
+
+ bpf_perf_event_output(ctx, &my_map, 0, &data, sizeof(data));
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/kernel/samples/bpf/trace_output_user.c b/kernel/samples/bpf/trace_output_user.c
new file mode 100644
index 000000000..661a7d052
--- /dev/null
+++ b/kernel/samples/bpf/trace_output_user.c
@@ -0,0 +1,196 @@
+/* This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <sys/ioctl.h>
+#include <linux/perf_event.h>
+#include <linux/bpf.h>
+#include <errno.h>
+#include <assert.h>
+#include <sys/syscall.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <time.h>
+#include <signal.h>
+#include "libbpf.h"
+#include "bpf_load.h"
+
+static int pmu_fd;
+
+int page_size;
+int page_cnt = 8;
+volatile struct perf_event_mmap_page *header;
+
+typedef void (*print_fn)(void *data, int size);
+
+static int perf_event_mmap(int fd)
+{
+ void *base;
+ int mmap_size;
+
+ page_size = getpagesize();
+ mmap_size = page_size * (page_cnt + 1);
+
+ base = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+ if (base == MAP_FAILED) {
+ printf("mmap err\n");
+ return -1;
+ }
+
+ header = base;
+ return 0;
+}
+
+static int perf_event_poll(int fd)
+{
+ struct pollfd pfd = { .fd = fd, .events = POLLIN };
+
+ return poll(&pfd, 1, 1000);
+}
+
+struct perf_event_sample {
+ struct perf_event_header header;
+ __u32 size;
+ char data[];
+};
+
+void perf_event_read(print_fn fn)
+{
+ __u64 data_tail = header->data_tail;
+ __u64 data_head = header->data_head;
+ __u64 buffer_size = page_cnt * page_size;
+ void *base, *begin, *end;
+ char buf[256];
+
+ asm volatile("" ::: "memory"); /* in real code it should be smp_rmb() */
+ if (data_head == data_tail)
+ return;
+
+ base = ((char *)header) + page_size;
+
+ begin = base + data_tail % buffer_size;
+ end = base + data_head % buffer_size;
+
+ while (begin != end) {
+ struct perf_event_sample *e;
+
+ e = begin;
+ if (begin + e->header.size > base + buffer_size) {
+ long len = base + buffer_size - begin;
+
+ assert(len < e->header.size);
+ memcpy(buf, begin, len);
+ memcpy(buf + len, base, e->header.size - len);
+ e = (void *) buf;
+ begin = base + e->header.size - len;
+ } else if (begin + e->header.size == base + buffer_size) {
+ begin = base;
+ } else {
+ begin += e->header.size;
+ }
+
+ if (e->header.type == PERF_RECORD_SAMPLE) {
+ fn(e->data, e->size);
+ } else if (e->header.type == PERF_RECORD_LOST) {
+ struct {
+ struct perf_event_header header;
+ __u64 id;
+ __u64 lost;
+ } *lost = (void *) e;
+ printf("lost %lld events\n", lost->lost);
+ } else {
+ printf("unknown event type=%d size=%d\n",
+ e->header.type, e->header.size);
+ }
+ }
+
+ __sync_synchronize(); /* smp_mb() */
+ header->data_tail = data_head;
+}
+
+static __u64 time_get_ns(void)
+{
+ struct timespec ts;
+
+ clock_gettime(CLOCK_MONOTONIC, &ts);
+ return ts.tv_sec * 1000000000ull + ts.tv_nsec;
+}
+
+static __u64 start_time;
+
+#define MAX_CNT 100000ll
+
+static void print_bpf_output(void *data, int size)
+{
+ static __u64 cnt;
+ struct {
+ __u64 pid;
+ __u64 cookie;
+ } *e = data;
+
+ if (e->cookie != 0x12345678) {
+ printf("BUG pid %llx cookie %llx sized %d\n",
+ e->pid, e->cookie, size);
+ kill(0, SIGINT);
+ }
+
+ cnt++;
+
+ if (cnt == MAX_CNT) {
+ printf("recv %lld events per sec\n",
+ MAX_CNT * 1000000000ll / (time_get_ns() - start_time));
+ kill(0, SIGINT);
+ }
+}
+
+static void test_bpf_perf_event(void)
+{
+ struct perf_event_attr attr = {
+ .sample_type = PERF_SAMPLE_RAW,
+ .type = PERF_TYPE_SOFTWARE,
+ .config = PERF_COUNT_SW_BPF_OUTPUT,
+ };
+ int key = 0;
+
+ pmu_fd = perf_event_open(&attr, -1/*pid*/, 0/*cpu*/, -1/*group_fd*/, 0);
+
+ assert(pmu_fd >= 0);
+ assert(bpf_update_elem(map_fd[0], &key, &pmu_fd, BPF_ANY) == 0);
+ ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
+}
+
+int main(int argc, char **argv)
+{
+ char filename[256];
+ FILE *f;
+
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+ if (load_bpf_file(filename)) {
+ printf("%s", bpf_log_buf);
+ return 1;
+ }
+
+ test_bpf_perf_event();
+
+ if (perf_event_mmap(pmu_fd) < 0)
+ return 1;
+
+ f = popen("taskset 1 dd if=/dev/zero of=/dev/null", "r");
+ (void) f;
+
+ start_time = time_get_ns();
+ for (;;) {
+ perf_event_poll(pmu_fd);
+ perf_event_read(print_bpf_output);
+ }
+
+ return 0;
+}
diff --git a/kernel/samples/bpf/tracex1_kern.c b/kernel/samples/bpf/tracex1_kern.c
index 316204637..3f450a8fa 100644
--- a/kernel/samples/bpf/tracex1_kern.c
+++ b/kernel/samples/bpf/tracex1_kern.c
@@ -29,7 +29,7 @@ int bpf_prog1(struct pt_regs *ctx)
int len;
/* non-portable! works for the given kernel only */
- skb = (struct sk_buff *) ctx->di;
+ skb = (struct sk_buff *) PT_REGS_PARM1(ctx);
dev = _(skb->dev);
diff --git a/kernel/samples/bpf/tracex2_kern.c b/kernel/samples/bpf/tracex2_kern.c
index 19ec1cfc4..b32367cfb 100644
--- a/kernel/samples/bpf/tracex2_kern.c
+++ b/kernel/samples/bpf/tracex2_kern.c
@@ -27,10 +27,10 @@ int bpf_prog2(struct pt_regs *ctx)
long init_val = 1;
long *value;
- /* x64 specific: read ip of kfree_skb caller.
+ /* x64/s390x specific: read ip of kfree_skb caller.
* non-portable version of __builtin_return_address(0)
*/
- bpf_probe_read(&loc, sizeof(loc), (void *)ctx->sp);
+ bpf_probe_read(&loc, sizeof(loc), (void *)PT_REGS_RET(ctx));
value = bpf_map_lookup_elem(&my_map, &loc);
if (value)
@@ -62,24 +62,38 @@ static unsigned int log2l(unsigned long v)
return log2(v);
}
+struct hist_key {
+ char comm[16];
+ u64 pid_tgid;
+ u64 uid_gid;
+ u32 index;
+};
+
struct bpf_map_def SEC("maps") my_hist_map = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(u32),
+ .type = BPF_MAP_TYPE_HASH,
+ .key_size = sizeof(struct hist_key),
.value_size = sizeof(long),
- .max_entries = 64,
+ .max_entries = 1024,
};
SEC("kprobe/sys_write")
int bpf_prog3(struct pt_regs *ctx)
{
- long write_size = ctx->dx; /* arg3 */
+ long write_size = PT_REGS_PARM3(ctx);
long init_val = 1;
long *value;
- u32 index = log2l(write_size);
+ struct hist_key key = {};
+
+ key.index = log2l(write_size);
+ key.pid_tgid = bpf_get_current_pid_tgid();
+ key.uid_gid = bpf_get_current_uid_gid();
+ bpf_get_current_comm(&key.comm, sizeof(key.comm));
- value = bpf_map_lookup_elem(&my_hist_map, &index);
+ value = bpf_map_lookup_elem(&my_hist_map, &key);
if (value)
__sync_fetch_and_add(value, 1);
+ else
+ bpf_map_update_elem(&my_hist_map, &key, &init_val, BPF_ANY);
return 0;
}
char _license[] SEC("license") = "GPL";
diff --git a/kernel/samples/bpf/tracex2_user.c b/kernel/samples/bpf/tracex2_user.c
index 91b8d0896..cd0241c14 100644
--- a/kernel/samples/bpf/tracex2_user.c
+++ b/kernel/samples/bpf/tracex2_user.c
@@ -3,6 +3,7 @@
#include <stdlib.h>
#include <signal.h>
#include <linux/bpf.h>
+#include <string.h>
#include "libbpf.h"
#include "bpf_load.h"
@@ -20,23 +21,42 @@ static void stars(char *str, long val, long max, int width)
str[i] = '\0';
}
-static void print_hist(int fd)
+struct task {
+ char comm[16];
+ __u64 pid_tgid;
+ __u64 uid_gid;
+};
+
+struct hist_key {
+ struct task t;
+ __u32 index;
+};
+
+#define SIZE sizeof(struct task)
+
+static void print_hist_for_pid(int fd, void *task)
{
- int key;
+ struct hist_key key = {}, next_key;
+ char starstr[MAX_STARS];
long value;
long data[MAX_INDEX] = {};
- char starstr[MAX_STARS];
- int i;
int max_ind = -1;
long max_value = 0;
+ int i, ind;
- for (key = 0; key < MAX_INDEX; key++) {
- bpf_lookup_elem(fd, &key, &value);
- data[key] = value;
- if (value && key > max_ind)
- max_ind = key;
+ while (bpf_get_next_key(fd, &key, &next_key) == 0) {
+ if (memcmp(&next_key, task, SIZE)) {
+ key = next_key;
+ continue;
+ }
+ bpf_lookup_elem(fd, &next_key, &value);
+ ind = next_key.index;
+ data[ind] = value;
+ if (value && ind > max_ind)
+ max_ind = ind;
if (value > max_value)
max_value = value;
+ key = next_key;
}
printf(" syscall write() stats\n");
@@ -48,6 +68,35 @@ static void print_hist(int fd)
MAX_STARS, starstr);
}
}
+
+static void print_hist(int fd)
+{
+ struct hist_key key = {}, next_key;
+ static struct task tasks[1024];
+ int task_cnt = 0;
+ int i;
+
+ while (bpf_get_next_key(fd, &key, &next_key) == 0) {
+ int found = 0;
+
+ for (i = 0; i < task_cnt; i++)
+ if (memcmp(&tasks[i], &next_key, SIZE) == 0)
+ found = 1;
+ if (!found)
+ memcpy(&tasks[task_cnt++], &next_key, SIZE);
+ key = next_key;
+ }
+
+ for (i = 0; i < task_cnt; i++) {
+ printf("\npid %d cmd %s uid %d\n",
+ (__u32) tasks[i].pid_tgid,
+ tasks[i].comm,
+ (__u32) tasks[i].uid_gid);
+ print_hist_for_pid(fd, &tasks[i]);
+ }
+
+}
+
static void int_exit(int sig)
{
print_hist(map_fd[1]);
diff --git a/kernel/samples/bpf/tracex3_kern.c b/kernel/samples/bpf/tracex3_kern.c
index 255ff2792..bf337fbb0 100644
--- a/kernel/samples/bpf/tracex3_kern.c
+++ b/kernel/samples/bpf/tracex3_kern.c
@@ -23,7 +23,7 @@ struct bpf_map_def SEC("maps") my_map = {
SEC("kprobe/blk_mq_start_request")
int bpf_prog1(struct pt_regs *ctx)
{
- long rq = ctx->di;
+ long rq = PT_REGS_PARM1(ctx);
u64 val = bpf_ktime_get_ns();
bpf_map_update_elem(&my_map, &rq, &val, BPF_ANY);
@@ -51,7 +51,7 @@ struct bpf_map_def SEC("maps") lat_map = {
SEC("kprobe/blk_update_request")
int bpf_prog2(struct pt_regs *ctx)
{
- long rq = ctx->di;
+ long rq = PT_REGS_PARM1(ctx);
u64 *value, l, base;
u32 index;
diff --git a/kernel/samples/bpf/tracex4_kern.c b/kernel/samples/bpf/tracex4_kern.c
index 126b80512..ac4671420 100644
--- a/kernel/samples/bpf/tracex4_kern.c
+++ b/kernel/samples/bpf/tracex4_kern.c
@@ -27,7 +27,7 @@ struct bpf_map_def SEC("maps") my_map = {
SEC("kprobe/kmem_cache_free")
int bpf_prog1(struct pt_regs *ctx)
{
- long ptr = ctx->si;
+ long ptr = PT_REGS_PARM2(ctx);
bpf_map_delete_elem(&my_map, &ptr);
return 0;
@@ -36,11 +36,11 @@ int bpf_prog1(struct pt_regs *ctx)
SEC("kretprobe/kmem_cache_alloc_node")
int bpf_prog2(struct pt_regs *ctx)
{
- long ptr = ctx->ax;
+ long ptr = PT_REGS_RC(ctx);
long ip = 0;
/* get ip address of kmem_cache_alloc_node() caller */
- bpf_probe_read(&ip, sizeof(ip), (void *)(ctx->bp + sizeof(ip)));
+ bpf_probe_read(&ip, sizeof(ip), (void *)(PT_REGS_FP(ctx) + sizeof(ip)));
struct pair v = {
.val = bpf_ktime_get_ns(),
diff --git a/kernel/samples/bpf/tracex5_kern.c b/kernel/samples/bpf/tracex5_kern.c
new file mode 100644
index 000000000..b3f4295bf
--- /dev/null
+++ b/kernel/samples/bpf/tracex5_kern.c
@@ -0,0 +1,75 @@
+/* Copyright (c) 2015 PLUMgrid, http://plumgrid.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/ptrace.h>
+#include <linux/version.h>
+#include <uapi/linux/bpf.h>
+#include <uapi/linux/seccomp.h>
+#include "bpf_helpers.h"
+
+#define PROG(F) SEC("kprobe/"__stringify(F)) int bpf_func_##F
+
+struct bpf_map_def SEC("maps") progs = {
+ .type = BPF_MAP_TYPE_PROG_ARRAY,
+ .key_size = sizeof(u32),
+ .value_size = sizeof(u32),
+ .max_entries = 1024,
+};
+
+SEC("kprobe/seccomp_phase1")
+int bpf_prog1(struct pt_regs *ctx)
+{
+ struct seccomp_data sd = {};
+
+ bpf_probe_read(&sd, sizeof(sd), (void *)PT_REGS_PARM1(ctx));
+
+ /* dispatch into next BPF program depending on syscall number */
+ bpf_tail_call(ctx, &progs, sd.nr);
+
+ /* fall through -> unknown syscall */
+ if (sd.nr >= __NR_getuid && sd.nr <= __NR_getsid) {
+ char fmt[] = "syscall=%d (one of get/set uid/pid/gid)\n";
+ bpf_trace_printk(fmt, sizeof(fmt), sd.nr);
+ }
+ return 0;
+}
+
+/* we jump here when syscall number == __NR_write */
+PROG(__NR_write)(struct pt_regs *ctx)
+{
+ struct seccomp_data sd = {};
+
+ bpf_probe_read(&sd, sizeof(sd), (void *)PT_REGS_PARM1(ctx));
+ if (sd.args[2] == 512) {
+ char fmt[] = "write(fd=%d, buf=%p, size=%d)\n";
+ bpf_trace_printk(fmt, sizeof(fmt),
+ sd.args[0], sd.args[1], sd.args[2]);
+ }
+ return 0;
+}
+
+PROG(__NR_read)(struct pt_regs *ctx)
+{
+ struct seccomp_data sd = {};
+
+ bpf_probe_read(&sd, sizeof(sd), (void *)PT_REGS_PARM1(ctx));
+ if (sd.args[2] > 128 && sd.args[2] <= 1024) {
+ char fmt[] = "read(fd=%d, buf=%p, size=%d)\n";
+ bpf_trace_printk(fmt, sizeof(fmt),
+ sd.args[0], sd.args[1], sd.args[2]);
+ }
+ return 0;
+}
+
+PROG(__NR_mmap)(struct pt_regs *ctx)
+{
+ char fmt[] = "mmap\n";
+ bpf_trace_printk(fmt, sizeof(fmt));
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/kernel/samples/bpf/tracex5_user.c b/kernel/samples/bpf/tracex5_user.c
new file mode 100644
index 000000000..a04dd3cd4
--- /dev/null
+++ b/kernel/samples/bpf/tracex5_user.c
@@ -0,0 +1,46 @@
+#include <stdio.h>
+#include <linux/bpf.h>
+#include <unistd.h>
+#include <linux/filter.h>
+#include <linux/seccomp.h>
+#include <sys/prctl.h>
+#include "libbpf.h"
+#include "bpf_load.h"
+
+/* install fake seccomp program to enable seccomp code path inside the kernel,
+ * so that our kprobe attached to seccomp_phase1() can be triggered
+ */
+static void install_accept_all_seccomp(void)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)(sizeof(filter)/sizeof(filter[0])),
+ .filter = filter,
+ };
+ if (prctl(PR_SET_SECCOMP, 2, &prog))
+ perror("prctl");
+}
+
+int main(int ac, char **argv)
+{
+ FILE *f;
+ char filename[256];
+
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+ if (load_bpf_file(filename)) {
+ printf("%s", bpf_log_buf);
+ return 1;
+ }
+
+ install_accept_all_seccomp();
+
+ f = popen("dd if=/dev/zero of=/dev/null count=5", "r");
+ (void) f;
+
+ read_trace_pipe();
+
+ return 0;
+}
diff --git a/kernel/samples/bpf/tracex6_kern.c b/kernel/samples/bpf/tracex6_kern.c
new file mode 100644
index 000000000..be479c4af
--- /dev/null
+++ b/kernel/samples/bpf/tracex6_kern.c
@@ -0,0 +1,27 @@
+#include <linux/ptrace.h>
+#include <linux/version.h>
+#include <uapi/linux/bpf.h>
+#include "bpf_helpers.h"
+
+struct bpf_map_def SEC("maps") my_map = {
+ .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(u32),
+ .max_entries = 32,
+};
+
+SEC("kprobe/sys_write")
+int bpf_prog1(struct pt_regs *ctx)
+{
+ u64 count;
+ u32 key = bpf_get_smp_processor_id();
+ char fmt[] = "CPU-%d %llu\n";
+
+ count = bpf_perf_event_read(&my_map, key);
+ bpf_trace_printk(fmt, sizeof(fmt), key, count);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/kernel/samples/bpf/tracex6_user.c b/kernel/samples/bpf/tracex6_user.c
new file mode 100644
index 000000000..8ea4976cf
--- /dev/null
+++ b/kernel/samples/bpf/tracex6_user.c
@@ -0,0 +1,72 @@
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <sys/ioctl.h>
+#include <linux/perf_event.h>
+#include <linux/bpf.h>
+#include "libbpf.h"
+#include "bpf_load.h"
+
+#define SAMPLE_PERIOD 0x7fffffffffffffffULL
+
+static void test_bpf_perf_event(void)
+{
+ int nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
+ int *pmu_fd = malloc(nr_cpus * sizeof(int));
+ int status, i;
+
+ struct perf_event_attr attr_insn_pmu = {
+ .freq = 0,
+ .sample_period = SAMPLE_PERIOD,
+ .inherit = 0,
+ .type = PERF_TYPE_HARDWARE,
+ .read_format = 0,
+ .sample_type = 0,
+ .config = 0,/* PMU: cycles */
+ };
+
+ for (i = 0; i < nr_cpus; i++) {
+ pmu_fd[i] = perf_event_open(&attr_insn_pmu, -1/*pid*/, i/*cpu*/, -1/*group_fd*/, 0);
+ if (pmu_fd[i] < 0) {
+ printf("event syscall failed\n");
+ goto exit;
+ }
+
+ bpf_update_elem(map_fd[0], &i, &pmu_fd[i], BPF_ANY);
+ ioctl(pmu_fd[i], PERF_EVENT_IOC_ENABLE, 0);
+ }
+
+ status = system("ls > /dev/null");
+ if (status)
+ goto exit;
+ status = system("sleep 2");
+ if (status)
+ goto exit;
+
+exit:
+ for (i = 0; i < nr_cpus; i++)
+ close(pmu_fd[i]);
+ close(map_fd[0]);
+ free(pmu_fd);
+}
+
+int main(int argc, char **argv)
+{
+ char filename[256];
+
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+ if (load_bpf_file(filename)) {
+ printf("%s", bpf_log_buf);
+ return 1;
+ }
+
+ test_bpf_perf_event();
+ read_trace_pipe();
+
+ return 0;
+}