summaryrefslogtreecommitdiffstats
path: root/kernel/samples
diff options
context:
space:
mode:
authorJosé Pekkarinen <jose.pekkarinen@nokia.com>2016-04-11 10:41:07 +0300
committerJosé Pekkarinen <jose.pekkarinen@nokia.com>2016-04-13 08:17:18 +0300
commite09b41010ba33a20a87472ee821fa407a5b8da36 (patch)
treed10dc367189862e7ca5c592f033dc3726e1df4e3 /kernel/samples
parentf93b97fd65072de626c074dbe099a1fff05ce060 (diff)
These changes are the raw update to linux-4.4.6-rt14. Kernel sources
are taken from kernel.org, and rt patch from the rt wiki download page. During the rebasing, the following patch collided: Force tick interrupt and get rid of softirq magic(I70131fb85). Collisions have been removed because its logic was found on the source already. Change-Id: I7f57a4081d9deaa0d9ccfc41a6c8daccdee3b769 Signed-off-by: José Pekkarinen <jose.pekkarinen@nokia.com>
Diffstat (limited to 'kernel/samples')
-rw-r--r--kernel/samples/Kconfig6
-rw-r--r--kernel/samples/Makefile3
-rw-r--r--kernel/samples/bpf/Makefile33
-rw-r--r--kernel/samples/bpf/bpf_helpers.h55
-rw-r--r--kernel/samples/bpf/bpf_load.c57
-rw-r--r--kernel/samples/bpf/fds_example.c183
-rw-r--r--kernel/samples/bpf/lathist_kern.c99
-rw-r--r--kernel/samples/bpf/lathist_user.c103
-rw-r--r--kernel/samples/bpf/libbpf.c19
-rw-r--r--kernel/samples/bpf/libbpf.h11
-rw-r--r--kernel/samples/bpf/sockex3_kern.c290
-rw-r--r--kernel/samples/bpf/sockex3_user.c66
-rw-r--r--kernel/samples/bpf/tcbpf1_kern.c32
-rw-r--r--kernel/samples/bpf/test_verifier.c498
-rw-r--r--kernel/samples/bpf/trace_output_kern.c31
-rw-r--r--kernel/samples/bpf/trace_output_user.c196
-rw-r--r--kernel/samples/bpf/tracex1_kern.c2
-rw-r--r--kernel/samples/bpf/tracex2_kern.c30
-rw-r--r--kernel/samples/bpf/tracex2_user.c67
-rw-r--r--kernel/samples/bpf/tracex3_kern.c4
-rw-r--r--kernel/samples/bpf/tracex4_kern.c6
-rw-r--r--kernel/samples/bpf/tracex5_kern.c75
-rw-r--r--kernel/samples/bpf/tracex5_user.c46
-rw-r--r--kernel/samples/bpf/tracex6_kern.c27
-rw-r--r--kernel/samples/bpf/tracex6_user.c72
-rw-r--r--kernel/samples/configfs/Makefile2
-rw-r--r--kernel/samples/configfs/configfs_sample.c404
-rw-r--r--kernel/samples/kprobes/jprobe_example.c14
-rw-r--r--kernel/samples/kprobes/kprobe_example.c6
-rw-r--r--kernel/samples/kprobes/kretprobe_example.c4
-rw-r--r--kernel/samples/pktgen/README.rst43
-rw-r--r--kernel/samples/pktgen/functions.sh121
-rw-r--r--kernel/samples/pktgen/parameters.sh97
-rwxr-xr-xkernel/samples/pktgen/pktgen.conf-1-159
-rwxr-xr-xkernel/samples/pktgen/pktgen.conf-2-166
-rwxr-xr-xkernel/samples/pktgen/pktgen.conf-2-273
-rwxr-xr-xkernel/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh86
-rwxr-xr-xkernel/samples/pktgen/pktgen_sample01_simple.sh71
-rwxr-xr-xkernel/samples/pktgen/pktgen_sample02_multiqueue.sh75
-rwxr-xr-xkernel/samples/pktgen/pktgen_sample03_burst_single_flow.sh82
-rw-r--r--kernel/samples/trace_events/trace-events-sample.h6
41 files changed, 2953 insertions, 267 deletions
diff --git a/kernel/samples/Kconfig b/kernel/samples/Kconfig
index 224ebb46b..d54f28c6d 100644
--- a/kernel/samples/Kconfig
+++ b/kernel/samples/Kconfig
@@ -70,4 +70,10 @@ config SAMPLE_LIVEPATCH
Builds a sample live patch that replaces the procfs handler
for /proc/cmdline to print "this has been live patched".
+config SAMPLE_CONFIGFS
+ tristate "Build configfs patching sample -- loadable modules only"
+ depends on CONFIGFS_FS && m
+ help
+ Builds a sample configfs interface.
+
endif # SAMPLES
diff --git a/kernel/samples/Makefile b/kernel/samples/Makefile
index f00257bcc..48001d7e2 100644
--- a/kernel/samples/Makefile
+++ b/kernel/samples/Makefile
@@ -1,4 +1,5 @@
# Makefile for Linux samples code
obj-$(CONFIG_SAMPLES) += kobject/ kprobes/ trace_events/ livepatch/ \
- hw_breakpoint/ kfifo/ kdb/ hidraw/ rpmsg/ seccomp/
+ hw_breakpoint/ kfifo/ kdb/ hidraw/ rpmsg/ seccomp/ \
+ configfs/
diff --git a/kernel/samples/bpf/Makefile b/kernel/samples/bpf/Makefile
index 76e3458a5..edd638b58 100644
--- a/kernel/samples/bpf/Makefile
+++ b/kernel/samples/bpf/Makefile
@@ -4,47 +4,76 @@ obj- := dummy.o
# List of programs to build
hostprogs-y := test_verifier test_maps
hostprogs-y += sock_example
+hostprogs-y += fds_example
hostprogs-y += sockex1
hostprogs-y += sockex2
+hostprogs-y += sockex3
hostprogs-y += tracex1
hostprogs-y += tracex2
hostprogs-y += tracex3
hostprogs-y += tracex4
+hostprogs-y += tracex5
+hostprogs-y += tracex6
+hostprogs-y += trace_output
+hostprogs-y += lathist
test_verifier-objs := test_verifier.o libbpf.o
test_maps-objs := test_maps.o libbpf.o
sock_example-objs := sock_example.o libbpf.o
+fds_example-objs := bpf_load.o libbpf.o fds_example.o
sockex1-objs := bpf_load.o libbpf.o sockex1_user.o
sockex2-objs := bpf_load.o libbpf.o sockex2_user.o
+sockex3-objs := bpf_load.o libbpf.o sockex3_user.o
tracex1-objs := bpf_load.o libbpf.o tracex1_user.o
tracex2-objs := bpf_load.o libbpf.o tracex2_user.o
tracex3-objs := bpf_load.o libbpf.o tracex3_user.o
tracex4-objs := bpf_load.o libbpf.o tracex4_user.o
+tracex5-objs := bpf_load.o libbpf.o tracex5_user.o
+tracex6-objs := bpf_load.o libbpf.o tracex6_user.o
+trace_output-objs := bpf_load.o libbpf.o trace_output_user.o
+lathist-objs := bpf_load.o libbpf.o lathist_user.o
# Tell kbuild to always build the programs
always := $(hostprogs-y)
always += sockex1_kern.o
always += sockex2_kern.o
+always += sockex3_kern.o
always += tracex1_kern.o
always += tracex2_kern.o
always += tracex3_kern.o
always += tracex4_kern.o
+always += tracex5_kern.o
+always += tracex6_kern.o
+always += trace_output_kern.o
always += tcbpf1_kern.o
+always += lathist_kern.o
HOSTCFLAGS += -I$(objtree)/usr/include
HOSTCFLAGS_bpf_load.o += -I$(objtree)/usr/include -Wno-unused-variable
+HOSTLOADLIBES_fds_example += -lelf
HOSTLOADLIBES_sockex1 += -lelf
HOSTLOADLIBES_sockex2 += -lelf
+HOSTLOADLIBES_sockex3 += -lelf
HOSTLOADLIBES_tracex1 += -lelf
HOSTLOADLIBES_tracex2 += -lelf
HOSTLOADLIBES_tracex3 += -lelf
HOSTLOADLIBES_tracex4 += -lelf -lrt
+HOSTLOADLIBES_tracex5 += -lelf
+HOSTLOADLIBES_tracex6 += -lelf
+HOSTLOADLIBES_trace_output += -lelf -lrt
+HOSTLOADLIBES_lathist += -lelf
# point this to your LLVM backend with bpf support
LLC=$(srctree)/tools/bpf/llvm/bld/Debug+Asserts/bin/llc
-%.o: %.c
+# asm/sysreg.h inline assmbly used by it is incompatible with llvm.
+# But, ehere is not easy way to fix it, so just exclude it since it is
+# useless for BPF samples.
+$(obj)/%.o: $(src)/%.c
clang $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) \
- -D__KERNEL__ -Wno-unused-value -Wno-pointer-sign \
+ -D__KERNEL__ -D__ASM_SYSREG_H -Wno-unused-value -Wno-pointer-sign \
-O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf -filetype=obj -o $@
+ clang $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) \
+ -D__KERNEL__ -D__ASM_SYSREG_H -Wno-unused-value -Wno-pointer-sign \
+ -O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf -filetype=asm -o $@.s
diff --git a/kernel/samples/bpf/bpf_helpers.h b/kernel/samples/bpf/bpf_helpers.h
index f960b5fb3..7ad19e1db 100644
--- a/kernel/samples/bpf/bpf_helpers.h
+++ b/kernel/samples/bpf/bpf_helpers.h
@@ -21,6 +21,24 @@ static unsigned long long (*bpf_ktime_get_ns)(void) =
(void *) BPF_FUNC_ktime_get_ns;
static int (*bpf_trace_printk)(const char *fmt, int fmt_size, ...) =
(void *) BPF_FUNC_trace_printk;
+static void (*bpf_tail_call)(void *ctx, void *map, int index) =
+ (void *) BPF_FUNC_tail_call;
+static unsigned long long (*bpf_get_smp_processor_id)(void) =
+ (void *) BPF_FUNC_get_smp_processor_id;
+static unsigned long long (*bpf_get_current_pid_tgid)(void) =
+ (void *) BPF_FUNC_get_current_pid_tgid;
+static unsigned long long (*bpf_get_current_uid_gid)(void) =
+ (void *) BPF_FUNC_get_current_uid_gid;
+static int (*bpf_get_current_comm)(void *buf, int buf_size) =
+ (void *) BPF_FUNC_get_current_comm;
+static int (*bpf_perf_event_read)(void *map, int index) =
+ (void *) BPF_FUNC_perf_event_read;
+static int (*bpf_clone_redirect)(void *ctx, int ifindex, int flags) =
+ (void *) BPF_FUNC_clone_redirect;
+static int (*bpf_redirect)(int ifindex, int flags) =
+ (void *) BPF_FUNC_redirect;
+static int (*bpf_perf_event_output)(void *ctx, void *map, int index, void *data, int size) =
+ (void *) BPF_FUNC_perf_event_output;
/* llvm builtin functions that eBPF C program may use to
* emit BPF_LD_ABS and BPF_LD_IND instructions
@@ -50,4 +68,41 @@ static int (*bpf_l3_csum_replace)(void *ctx, int off, int from, int to, int flag
static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int flags) =
(void *) BPF_FUNC_l4_csum_replace;
+#if defined(__x86_64__)
+
+#define PT_REGS_PARM1(x) ((x)->di)
+#define PT_REGS_PARM2(x) ((x)->si)
+#define PT_REGS_PARM3(x) ((x)->dx)
+#define PT_REGS_PARM4(x) ((x)->cx)
+#define PT_REGS_PARM5(x) ((x)->r8)
+#define PT_REGS_RET(x) ((x)->sp)
+#define PT_REGS_FP(x) ((x)->bp)
+#define PT_REGS_RC(x) ((x)->ax)
+#define PT_REGS_SP(x) ((x)->sp)
+
+#elif defined(__s390x__)
+
+#define PT_REGS_PARM1(x) ((x)->gprs[2])
+#define PT_REGS_PARM2(x) ((x)->gprs[3])
+#define PT_REGS_PARM3(x) ((x)->gprs[4])
+#define PT_REGS_PARM4(x) ((x)->gprs[5])
+#define PT_REGS_PARM5(x) ((x)->gprs[6])
+#define PT_REGS_RET(x) ((x)->gprs[14])
+#define PT_REGS_FP(x) ((x)->gprs[11]) /* Works only with CONFIG_FRAME_POINTER */
+#define PT_REGS_RC(x) ((x)->gprs[2])
+#define PT_REGS_SP(x) ((x)->gprs[15])
+
+#elif defined(__aarch64__)
+
+#define PT_REGS_PARM1(x) ((x)->regs[0])
+#define PT_REGS_PARM2(x) ((x)->regs[1])
+#define PT_REGS_PARM3(x) ((x)->regs[2])
+#define PT_REGS_PARM4(x) ((x)->regs[3])
+#define PT_REGS_PARM5(x) ((x)->regs[4])
+#define PT_REGS_RET(x) ((x)->regs[30])
+#define PT_REGS_FP(x) ((x)->regs[29]) /* Works only with CONFIG_FRAME_POINTER */
+#define PT_REGS_RC(x) ((x)->regs[0])
+#define PT_REGS_SP(x) ((x)->sp)
+
+#endif
#endif
diff --git a/kernel/samples/bpf/bpf_load.c b/kernel/samples/bpf/bpf_load.c
index 38dac5a53..da86a8e0a 100644
--- a/kernel/samples/bpf/bpf_load.c
+++ b/kernel/samples/bpf/bpf_load.c
@@ -16,6 +16,7 @@
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <poll.h>
+#include <ctype.h>
#include "libbpf.h"
#include "bpf_helpers.h"
#include "bpf_load.h"
@@ -29,6 +30,19 @@ int map_fd[MAX_MAPS];
int prog_fd[MAX_PROGS];
int event_fd[MAX_PROGS];
int prog_cnt;
+int prog_array_fd = -1;
+
+static int populate_prog_array(const char *event, int prog_fd)
+{
+ int ind = atoi(event), err;
+
+ err = bpf_update_elem(prog_array_fd, &ind, &prog_fd, BPF_ANY);
+ if (err < 0) {
+ printf("failed to store prog_fd in prog_array\n");
+ return -1;
+ }
+ return 0;
+}
static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
{
@@ -54,12 +68,40 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
return -1;
}
+ fd = bpf_prog_load(prog_type, prog, size, license, kern_version);
+ if (fd < 0) {
+ printf("bpf_prog_load() err=%d\n%s", errno, bpf_log_buf);
+ return -1;
+ }
+
+ prog_fd[prog_cnt++] = fd;
+
+ if (is_socket) {
+ event += 6;
+ if (*event != '/')
+ return 0;
+ event++;
+ if (!isdigit(*event)) {
+ printf("invalid prog number\n");
+ return -1;
+ }
+ return populate_prog_array(event, fd);
+ }
+
if (is_kprobe || is_kretprobe) {
if (is_kprobe)
event += 7;
else
event += 10;
+ if (*event == 0) {
+ printf("event name cannot be empty\n");
+ return -1;
+ }
+
+ if (isdigit(*event))
+ return populate_prog_array(event, fd);
+
snprintf(buf, sizeof(buf),
"echo '%c:%s %s' >> /sys/kernel/debug/tracing/kprobe_events",
is_kprobe ? 'p' : 'r', event, event);
@@ -71,18 +113,6 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
}
}
- fd = bpf_prog_load(prog_type, prog, size, license, kern_version);
-
- if (fd < 0) {
- printf("bpf_prog_load() err=%d\n%s", errno, bpf_log_buf);
- return -1;
- }
-
- prog_fd[prog_cnt++] = fd;
-
- if (is_socket)
- return 0;
-
strcpy(buf, DEBUGFS);
strcat(buf, "events/kprobes/");
strcat(buf, event);
@@ -130,6 +160,9 @@ static int load_maps(struct bpf_map_def *maps, int len)
maps[i].max_entries);
if (map_fd[i] < 0)
return 1;
+
+ if (maps[i].type == BPF_MAP_TYPE_PROG_ARRAY)
+ prog_array_fd = map_fd[i];
}
return 0;
}
diff --git a/kernel/samples/bpf/fds_example.c b/kernel/samples/bpf/fds_example.c
new file mode 100644
index 000000000..e2fd16c3d
--- /dev/null
+++ b/kernel/samples/bpf/fds_example.c
@@ -0,0 +1,183 @@
+#include <linux/unistd.h>
+#include <linux/bpf.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <string.h>
+#include <assert.h>
+#include <errno.h>
+
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#include "bpf_load.h"
+#include "libbpf.h"
+
+#define BPF_F_PIN (1 << 0)
+#define BPF_F_GET (1 << 1)
+#define BPF_F_PIN_GET (BPF_F_PIN | BPF_F_GET)
+
+#define BPF_F_KEY (1 << 2)
+#define BPF_F_VAL (1 << 3)
+#define BPF_F_KEY_VAL (BPF_F_KEY | BPF_F_VAL)
+
+#define BPF_M_UNSPEC 0
+#define BPF_M_MAP 1
+#define BPF_M_PROG 2
+
+static void usage(void)
+{
+ printf("Usage: fds_example [...]\n");
+ printf(" -F <file> File to pin/get object\n");
+ printf(" -P |- pin object\n");
+ printf(" -G `- get object\n");
+ printf(" -m eBPF map mode\n");
+ printf(" -k <key> |- map key\n");
+ printf(" -v <value> `- map value\n");
+ printf(" -p eBPF prog mode\n");
+ printf(" -o <object> `- object file\n");
+ printf(" -h Display this help.\n");
+}
+
+static int bpf_map_create(void)
+{
+ return bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(uint32_t),
+ sizeof(uint32_t), 1024);
+}
+
+static int bpf_prog_create(const char *object)
+{
+ static const struct bpf_insn insns[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ };
+
+ if (object) {
+ assert(!load_bpf_file((char *)object));
+ return prog_fd[0];
+ } else {
+ return bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER,
+ insns, sizeof(insns), "GPL", 0);
+ }
+}
+
+static int bpf_do_map(const char *file, uint32_t flags, uint32_t key,
+ uint32_t value)
+{
+ int fd, ret;
+
+ if (flags & BPF_F_PIN) {
+ fd = bpf_map_create();
+ printf("bpf: map fd:%d (%s)\n", fd, strerror(errno));
+ assert(fd > 0);
+
+ ret = bpf_obj_pin(fd, file);
+ printf("bpf: pin ret:(%d,%s)\n", ret, strerror(errno));
+ assert(ret == 0);
+ } else {
+ fd = bpf_obj_get(file);
+ printf("bpf: get fd:%d (%s)\n", fd, strerror(errno));
+ assert(fd > 0);
+ }
+
+ if ((flags & BPF_F_KEY_VAL) == BPF_F_KEY_VAL) {
+ ret = bpf_update_elem(fd, &key, &value, 0);
+ printf("bpf: fd:%d u->(%u:%u) ret:(%d,%s)\n", fd, key, value,
+ ret, strerror(errno));
+ assert(ret == 0);
+ } else if (flags & BPF_F_KEY) {
+ ret = bpf_lookup_elem(fd, &key, &value);
+ printf("bpf: fd:%d l->(%u):%u ret:(%d,%s)\n", fd, key, value,
+ ret, strerror(errno));
+ assert(ret == 0);
+ }
+
+ return 0;
+}
+
+static int bpf_do_prog(const char *file, uint32_t flags, const char *object)
+{
+ int fd, sock, ret;
+
+ if (flags & BPF_F_PIN) {
+ fd = bpf_prog_create(object);
+ printf("bpf: prog fd:%d (%s)\n", fd, strerror(errno));
+ assert(fd > 0);
+
+ ret = bpf_obj_pin(fd, file);
+ printf("bpf: pin ret:(%d,%s)\n", ret, strerror(errno));
+ assert(ret == 0);
+ } else {
+ fd = bpf_obj_get(file);
+ printf("bpf: get fd:%d (%s)\n", fd, strerror(errno));
+ assert(fd > 0);
+ }
+
+ sock = open_raw_sock("lo");
+ assert(sock > 0);
+
+ ret = setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, &fd, sizeof(fd));
+ printf("bpf: sock:%d <- fd:%d attached ret:(%d,%s)\n", sock, fd,
+ ret, strerror(errno));
+ assert(ret == 0);
+
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ const char *file = NULL, *object = NULL;
+ uint32_t key = 0, value = 0, flags = 0;
+ int opt, mode = BPF_M_UNSPEC;
+
+ while ((opt = getopt(argc, argv, "F:PGmk:v:po:")) != -1) {
+ switch (opt) {
+ /* General args */
+ case 'F':
+ file = optarg;
+ break;
+ case 'P':
+ flags |= BPF_F_PIN;
+ break;
+ case 'G':
+ flags |= BPF_F_GET;
+ break;
+ /* Map-related args */
+ case 'm':
+ mode = BPF_M_MAP;
+ break;
+ case 'k':
+ key = strtoul(optarg, NULL, 0);
+ flags |= BPF_F_KEY;
+ break;
+ case 'v':
+ value = strtoul(optarg, NULL, 0);
+ flags |= BPF_F_VAL;
+ break;
+ /* Prog-related args */
+ case 'p':
+ mode = BPF_M_PROG;
+ break;
+ case 'o':
+ object = optarg;
+ break;
+ default:
+ goto out;
+ }
+ }
+
+ if (!(flags & BPF_F_PIN_GET) || !file)
+ goto out;
+
+ switch (mode) {
+ case BPF_M_MAP:
+ return bpf_do_map(file, flags, key, value);
+ case BPF_M_PROG:
+ return bpf_do_prog(file, flags, object);
+ }
+out:
+ usage();
+ return -1;
+}
diff --git a/kernel/samples/bpf/lathist_kern.c b/kernel/samples/bpf/lathist_kern.c
new file mode 100644
index 000000000..18fa08847
--- /dev/null
+++ b/kernel/samples/bpf/lathist_kern.c
@@ -0,0 +1,99 @@
+/* Copyright (c) 2013-2015 PLUMgrid, http://plumgrid.com
+ * Copyright (c) 2015 BMW Car IT GmbH
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/version.h>
+#include <linux/ptrace.h>
+#include <uapi/linux/bpf.h>
+#include "bpf_helpers.h"
+
+#define MAX_ENTRIES 20
+#define MAX_CPU 4
+
+/* We need to stick to static allocated memory (an array instead of
+ * hash table) because managing dynamic memory from the
+ * trace_preempt_[on|off] tracepoints hooks is not supported.
+ */
+
+struct bpf_map_def SEC("maps") my_map = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(u64),
+ .max_entries = MAX_CPU,
+};
+
+SEC("kprobe/trace_preempt_off")
+int bpf_prog1(struct pt_regs *ctx)
+{
+ int cpu = bpf_get_smp_processor_id();
+ u64 *ts = bpf_map_lookup_elem(&my_map, &cpu);
+
+ if (ts)
+ *ts = bpf_ktime_get_ns();
+
+ return 0;
+}
+
+static unsigned int log2(unsigned int v)
+{
+ unsigned int r;
+ unsigned int shift;
+
+ r = (v > 0xFFFF) << 4; v >>= r;
+ shift = (v > 0xFF) << 3; v >>= shift; r |= shift;
+ shift = (v > 0xF) << 2; v >>= shift; r |= shift;
+ shift = (v > 0x3) << 1; v >>= shift; r |= shift;
+ r |= (v >> 1);
+
+ return r;
+}
+
+static unsigned int log2l(unsigned long v)
+{
+ unsigned int hi = v >> 32;
+
+ if (hi)
+ return log2(hi) + 32;
+ else
+ return log2(v);
+}
+
+struct bpf_map_def SEC("maps") my_lat = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(long),
+ .max_entries = MAX_CPU * MAX_ENTRIES,
+};
+
+SEC("kprobe/trace_preempt_on")
+int bpf_prog2(struct pt_regs *ctx)
+{
+ u64 *ts, cur_ts, delta;
+ int key, cpu;
+ long *val;
+
+ cpu = bpf_get_smp_processor_id();
+ ts = bpf_map_lookup_elem(&my_map, &cpu);
+ if (!ts)
+ return 0;
+
+ cur_ts = bpf_ktime_get_ns();
+ delta = log2l(cur_ts - *ts);
+
+ if (delta > MAX_ENTRIES - 1)
+ delta = MAX_ENTRIES - 1;
+
+ key = cpu * MAX_ENTRIES + delta;
+ val = bpf_map_lookup_elem(&my_lat, &key);
+ if (val)
+ __sync_fetch_and_add((long *)val, 1);
+
+ return 0;
+
+}
+
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/kernel/samples/bpf/lathist_user.c b/kernel/samples/bpf/lathist_user.c
new file mode 100644
index 000000000..65da8c157
--- /dev/null
+++ b/kernel/samples/bpf/lathist_user.c
@@ -0,0 +1,103 @@
+/* Copyright (c) 2013-2015 PLUMgrid, http://plumgrid.com
+ * Copyright (c) 2015 BMW Car IT GmbH
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <linux/bpf.h>
+#include "libbpf.h"
+#include "bpf_load.h"
+
+#define MAX_ENTRIES 20
+#define MAX_CPU 4
+#define MAX_STARS 40
+
+struct cpu_hist {
+ long data[MAX_ENTRIES];
+ long max;
+};
+
+static struct cpu_hist cpu_hist[MAX_CPU];
+
+static void stars(char *str, long val, long max, int width)
+{
+ int i;
+
+ for (i = 0; i < (width * val / max) - 1 && i < width - 1; i++)
+ str[i] = '*';
+ if (val > max)
+ str[i - 1] = '+';
+ str[i] = '\0';
+}
+
+static void print_hist(void)
+{
+ char starstr[MAX_STARS];
+ struct cpu_hist *hist;
+ int i, j;
+
+ /* clear screen */
+ printf("\033[2J");
+
+ for (j = 0; j < MAX_CPU; j++) {
+ hist = &cpu_hist[j];
+
+ /* ignore CPUs without data (maybe offline?) */
+ if (hist->max == 0)
+ continue;
+
+ printf("CPU %d\n", j);
+ printf(" latency : count distribution\n");
+ for (i = 1; i <= MAX_ENTRIES; i++) {
+ stars(starstr, hist->data[i - 1], hist->max, MAX_STARS);
+ printf("%8ld -> %-8ld : %-8ld |%-*s|\n",
+ (1l << i) >> 1, (1l << i) - 1,
+ hist->data[i - 1], MAX_STARS, starstr);
+ }
+ }
+}
+
+static void get_data(int fd)
+{
+ long key, value;
+ int c, i;
+
+ for (i = 0; i < MAX_CPU; i++)
+ cpu_hist[i].max = 0;
+
+ for (c = 0; c < MAX_CPU; c++) {
+ for (i = 0; i < MAX_ENTRIES; i++) {
+ key = c * MAX_ENTRIES + i;
+ bpf_lookup_elem(fd, &key, &value);
+
+ cpu_hist[c].data[i] = value;
+ if (value > cpu_hist[c].max)
+ cpu_hist[c].max = value;
+ }
+ }
+}
+
+int main(int argc, char **argv)
+{
+ char filename[256];
+
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+ if (load_bpf_file(filename)) {
+ printf("%s", bpf_log_buf);
+ return 1;
+ }
+
+ while (1) {
+ get_data(map_fd[1]);
+ print_hist();
+ sleep(5);
+ }
+
+ return 0;
+}
diff --git a/kernel/samples/bpf/libbpf.c b/kernel/samples/bpf/libbpf.c
index 7e1efa7e2..65a8d48d2 100644
--- a/kernel/samples/bpf/libbpf.c
+++ b/kernel/samples/bpf/libbpf.c
@@ -103,6 +103,25 @@ int bpf_prog_load(enum bpf_prog_type prog_type,
return syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
}
+int bpf_obj_pin(int fd, const char *pathname)
+{
+ union bpf_attr attr = {
+ .pathname = ptr_to_u64((void *)pathname),
+ .bpf_fd = fd,
+ };
+
+ return syscall(__NR_bpf, BPF_OBJ_PIN, &attr, sizeof(attr));
+}
+
+int bpf_obj_get(const char *pathname)
+{
+ union bpf_attr attr = {
+ .pathname = ptr_to_u64((void *)pathname),
+ };
+
+ return syscall(__NR_bpf, BPF_OBJ_GET, &attr, sizeof(attr));
+}
+
int open_raw_sock(const char *name)
{
struct sockaddr_ll sll;
diff --git a/kernel/samples/bpf/libbpf.h b/kernel/samples/bpf/libbpf.h
index 7235e292a..014aacf91 100644
--- a/kernel/samples/bpf/libbpf.h
+++ b/kernel/samples/bpf/libbpf.h
@@ -15,6 +15,9 @@ int bpf_prog_load(enum bpf_prog_type prog_type,
const struct bpf_insn *insns, int insn_len,
const char *license, int kern_version);
+int bpf_obj_pin(int fd, const char *pathname);
+int bpf_obj_get(const char *pathname);
+
#define LOG_BUF_SIZE 65536
extern char bpf_log_buf[LOG_BUF_SIZE];
@@ -64,6 +67,14 @@ extern char bpf_log_buf[LOG_BUF_SIZE];
.off = 0, \
.imm = 0 })
+#define BPF_MOV32_REG(DST, SRC) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU | BPF_MOV | BPF_X, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = 0, \
+ .imm = 0 })
+
/* Short form of mov, dst_reg = imm32 */
#define BPF_MOV64_IMM(DST, IMM) \
diff --git a/kernel/samples/bpf/sockex3_kern.c b/kernel/samples/bpf/sockex3_kern.c
new file mode 100644
index 000000000..41ae2fd21
--- /dev/null
+++ b/kernel/samples/bpf/sockex3_kern.c
@@ -0,0 +1,290 @@
+/* Copyright (c) 2015 PLUMgrid, http://plumgrid.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <uapi/linux/bpf.h>
+#include "bpf_helpers.h"
+#include <uapi/linux/in.h>
+#include <uapi/linux/if.h>
+#include <uapi/linux/if_ether.h>
+#include <uapi/linux/ip.h>
+#include <uapi/linux/ipv6.h>
+#include <uapi/linux/if_tunnel.h>
+#include <uapi/linux/mpls.h>
+#define IP_MF 0x2000
+#define IP_OFFSET 0x1FFF
+
+#define PROG(F) SEC("socket/"__stringify(F)) int bpf_func_##F
+
+struct bpf_map_def SEC("maps") jmp_table = {
+ .type = BPF_MAP_TYPE_PROG_ARRAY,
+ .key_size = sizeof(u32),
+ .value_size = sizeof(u32),
+ .max_entries = 8,
+};
+
+#define PARSE_VLAN 1
+#define PARSE_MPLS 2
+#define PARSE_IP 3
+#define PARSE_IPV6 4
+
+/* protocol dispatch routine.
+ * It tail-calls next BPF program depending on eth proto
+ * Note, we could have used:
+ * bpf_tail_call(skb, &jmp_table, proto);
+ * but it would need large prog_array
+ */
+static inline void parse_eth_proto(struct __sk_buff *skb, u32 proto)
+{
+ switch (proto) {
+ case ETH_P_8021Q:
+ case ETH_P_8021AD:
+ bpf_tail_call(skb, &jmp_table, PARSE_VLAN);
+ break;
+ case ETH_P_MPLS_UC:
+ case ETH_P_MPLS_MC:
+ bpf_tail_call(skb, &jmp_table, PARSE_MPLS);
+ break;
+ case ETH_P_IP:
+ bpf_tail_call(skb, &jmp_table, PARSE_IP);
+ break;
+ case ETH_P_IPV6:
+ bpf_tail_call(skb, &jmp_table, PARSE_IPV6);
+ break;
+ }
+}
+
+struct vlan_hdr {
+ __be16 h_vlan_TCI;
+ __be16 h_vlan_encapsulated_proto;
+};
+
+struct flow_keys {
+ __be32 src;
+ __be32 dst;
+ union {
+ __be32 ports;
+ __be16 port16[2];
+ };
+ __u32 ip_proto;
+};
+
+static inline int ip_is_fragment(struct __sk_buff *ctx, __u64 nhoff)
+{
+ return load_half(ctx, nhoff + offsetof(struct iphdr, frag_off))
+ & (IP_MF | IP_OFFSET);
+}
+
+static inline __u32 ipv6_addr_hash(struct __sk_buff *ctx, __u64 off)
+{
+ __u64 w0 = load_word(ctx, off);
+ __u64 w1 = load_word(ctx, off + 4);
+ __u64 w2 = load_word(ctx, off + 8);
+ __u64 w3 = load_word(ctx, off + 12);
+
+ return (__u32)(w0 ^ w1 ^ w2 ^ w3);
+}
+
+struct globals {
+ struct flow_keys flow;
+};
+
+struct bpf_map_def SEC("maps") percpu_map = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(struct globals),
+ .max_entries = 32,
+};
+
+/* user poor man's per_cpu until native support is ready */
+static struct globals *this_cpu_globals(void)
+{
+ u32 key = bpf_get_smp_processor_id();
+
+ return bpf_map_lookup_elem(&percpu_map, &key);
+}
+
+/* some simple stats for user space consumption */
+struct pair {
+ __u64 packets;
+ __u64 bytes;
+};
+
+struct bpf_map_def SEC("maps") hash_map = {
+ .type = BPF_MAP_TYPE_HASH,
+ .key_size = sizeof(struct flow_keys),
+ .value_size = sizeof(struct pair),
+ .max_entries = 1024,
+};
+
+static void update_stats(struct __sk_buff *skb, struct globals *g)
+{
+ struct flow_keys key = g->flow;
+ struct pair *value;
+
+ value = bpf_map_lookup_elem(&hash_map, &key);
+ if (value) {
+ __sync_fetch_and_add(&value->packets, 1);
+ __sync_fetch_and_add(&value->bytes, skb->len);
+ } else {
+ struct pair val = {1, skb->len};
+
+ bpf_map_update_elem(&hash_map, &key, &val, BPF_ANY);
+ }
+}
+
+static __always_inline void parse_ip_proto(struct __sk_buff *skb,
+ struct globals *g, __u32 ip_proto)
+{
+ __u32 nhoff = skb->cb[0];
+ int poff;
+
+ switch (ip_proto) {
+ case IPPROTO_GRE: {
+ struct gre_hdr {
+ __be16 flags;
+ __be16 proto;
+ };
+
+ __u32 gre_flags = load_half(skb,
+ nhoff + offsetof(struct gre_hdr, flags));
+ __u32 gre_proto = load_half(skb,
+ nhoff + offsetof(struct gre_hdr, proto));
+
+ if (gre_flags & (GRE_VERSION|GRE_ROUTING))
+ break;
+
+ nhoff += 4;
+ if (gre_flags & GRE_CSUM)
+ nhoff += 4;
+ if (gre_flags & GRE_KEY)
+ nhoff += 4;
+ if (gre_flags & GRE_SEQ)
+ nhoff += 4;
+
+ skb->cb[0] = nhoff;
+ parse_eth_proto(skb, gre_proto);
+ break;
+ }
+ case IPPROTO_IPIP:
+ parse_eth_proto(skb, ETH_P_IP);
+ break;
+ case IPPROTO_IPV6:
+ parse_eth_proto(skb, ETH_P_IPV6);
+ break;
+ case IPPROTO_TCP:
+ case IPPROTO_UDP:
+ g->flow.ports = load_word(skb, nhoff);
+ case IPPROTO_ICMP:
+ g->flow.ip_proto = ip_proto;
+ update_stats(skb, g);
+ break;
+ default:
+ break;
+ }
+}
+
+PROG(PARSE_IP)(struct __sk_buff *skb)
+{
+ struct globals *g = this_cpu_globals();
+ __u32 nhoff, verlen, ip_proto;
+
+ if (!g)
+ return 0;
+
+ nhoff = skb->cb[0];
+
+ if (unlikely(ip_is_fragment(skb, nhoff)))
+ return 0;
+
+ ip_proto = load_byte(skb, nhoff + offsetof(struct iphdr, protocol));
+
+ if (ip_proto != IPPROTO_GRE) {
+ g->flow.src = load_word(skb, nhoff + offsetof(struct iphdr, saddr));
+ g->flow.dst = load_word(skb, nhoff + offsetof(struct iphdr, daddr));
+ }
+
+ verlen = load_byte(skb, nhoff + 0/*offsetof(struct iphdr, ihl)*/);
+ nhoff += (verlen & 0xF) << 2;
+
+ skb->cb[0] = nhoff;
+ parse_ip_proto(skb, g, ip_proto);
+ return 0;
+}
+
+PROG(PARSE_IPV6)(struct __sk_buff *skb)
+{
+ struct globals *g = this_cpu_globals();
+ __u32 nhoff, ip_proto;
+
+ if (!g)
+ return 0;
+
+ nhoff = skb->cb[0];
+
+ ip_proto = load_byte(skb,
+ nhoff + offsetof(struct ipv6hdr, nexthdr));
+ g->flow.src = ipv6_addr_hash(skb,
+ nhoff + offsetof(struct ipv6hdr, saddr));
+ g->flow.dst = ipv6_addr_hash(skb,
+ nhoff + offsetof(struct ipv6hdr, daddr));
+ nhoff += sizeof(struct ipv6hdr);
+
+ skb->cb[0] = nhoff;
+ parse_ip_proto(skb, g, ip_proto);
+ return 0;
+}
+
+PROG(PARSE_VLAN)(struct __sk_buff *skb)
+{
+ __u32 nhoff, proto;
+
+ nhoff = skb->cb[0];
+
+ proto = load_half(skb, nhoff + offsetof(struct vlan_hdr,
+ h_vlan_encapsulated_proto));
+ nhoff += sizeof(struct vlan_hdr);
+ skb->cb[0] = nhoff;
+
+ parse_eth_proto(skb, proto);
+
+ return 0;
+}
+
+PROG(PARSE_MPLS)(struct __sk_buff *skb)
+{
+ __u32 nhoff, label;
+
+ nhoff = skb->cb[0];
+
+ label = load_word(skb, nhoff);
+ nhoff += sizeof(struct mpls_label);
+ skb->cb[0] = nhoff;
+
+ if (label & MPLS_LS_S_MASK) {
+ __u8 verlen = load_byte(skb, nhoff);
+ if ((verlen & 0xF0) == 4)
+ parse_eth_proto(skb, ETH_P_IP);
+ else
+ parse_eth_proto(skb, ETH_P_IPV6);
+ } else {
+ parse_eth_proto(skb, ETH_P_MPLS_UC);
+ }
+
+ return 0;
+}
+
+SEC("socket/0")
+int main_prog(struct __sk_buff *skb)
+{
+ __u32 nhoff = ETH_HLEN;
+ __u32 proto = load_half(skb, 12);
+
+ skb->cb[0] = nhoff;
+ parse_eth_proto(skb, proto);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/kernel/samples/bpf/sockex3_user.c b/kernel/samples/bpf/sockex3_user.c
new file mode 100644
index 000000000..2617772d0
--- /dev/null
+++ b/kernel/samples/bpf/sockex3_user.c
@@ -0,0 +1,66 @@
+#include <stdio.h>
+#include <assert.h>
+#include <linux/bpf.h>
+#include "libbpf.h"
+#include "bpf_load.h"
+#include <unistd.h>
+#include <arpa/inet.h>
+
+struct flow_keys {
+ __be32 src;
+ __be32 dst;
+ union {
+ __be32 ports;
+ __be16 port16[2];
+ };
+ __u32 ip_proto;
+};
+
+struct pair {
+ __u64 packets;
+ __u64 bytes;
+};
+
+int main(int argc, char **argv)
+{
+ char filename[256];
+ FILE *f;
+ int i, sock;
+
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+ if (load_bpf_file(filename)) {
+ printf("%s", bpf_log_buf);
+ return 1;
+ }
+
+ sock = open_raw_sock("lo");
+
+ assert(setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, &prog_fd[4],
+ sizeof(__u32)) == 0);
+
+ if (argc > 1)
+ f = popen("ping -c5 localhost", "r");
+ else
+ f = popen("netperf -l 4 localhost", "r");
+ (void) f;
+
+ for (i = 0; i < 5; i++) {
+ struct flow_keys key = {}, next_key;
+ struct pair value;
+
+ sleep(1);
+ printf("IP src.port -> dst.port bytes packets\n");
+ while (bpf_get_next_key(map_fd[2], &key, &next_key) == 0) {
+ bpf_lookup_elem(map_fd[2], &next_key, &value);
+ printf("%s.%05d -> %s.%05d %12lld %12lld\n",
+ inet_ntoa((struct in_addr){htonl(next_key.src)}),
+ next_key.port16[0],
+ inet_ntoa((struct in_addr){htonl(next_key.dst)}),
+ next_key.port16[1],
+ value.bytes, value.packets);
+ key = next_key;
+ }
+ }
+ return 0;
+}
diff --git a/kernel/samples/bpf/tcbpf1_kern.c b/kernel/samples/bpf/tcbpf1_kern.c
index 7c27710f8..fa051b3d5 100644
--- a/kernel/samples/bpf/tcbpf1_kern.c
+++ b/kernel/samples/bpf/tcbpf1_kern.c
@@ -5,7 +5,7 @@
#include <uapi/linux/in.h>
#include <uapi/linux/tcp.h>
#include <uapi/linux/filter.h>
-
+#include <uapi/linux/pkt_cls.h>
#include "bpf_helpers.h"
/* compiler workaround */
@@ -21,7 +21,7 @@ static inline void set_dst_mac(struct __sk_buff *skb, char *mac)
static inline void set_ip_tos(struct __sk_buff *skb, __u8 new_tos)
{
- __u8 old_tos = load_byte(skb, BPF_LL_OFF + TOS_OFF);
+ __u8 old_tos = load_byte(skb, TOS_OFF);
bpf_l3_csum_replace(skb, IP_CSUM_OFF, htons(old_tos), htons(new_tos), 2);
bpf_skb_store_bytes(skb, TOS_OFF, &new_tos, sizeof(new_tos), 0);
@@ -34,7 +34,7 @@ static inline void set_ip_tos(struct __sk_buff *skb, __u8 new_tos)
static inline void set_tcp_ip_src(struct __sk_buff *skb, __u32 new_ip)
{
- __u32 old_ip = _htonl(load_word(skb, BPF_LL_OFF + IP_SRC_OFF));
+ __u32 old_ip = _htonl(load_word(skb, IP_SRC_OFF));
bpf_l4_csum_replace(skb, TCP_CSUM_OFF, old_ip, new_ip, IS_PSEUDO | sizeof(new_ip));
bpf_l3_csum_replace(skb, IP_CSUM_OFF, old_ip, new_ip, sizeof(new_ip));
@@ -44,7 +44,7 @@ static inline void set_tcp_ip_src(struct __sk_buff *skb, __u32 new_ip)
#define TCP_DPORT_OFF (ETH_HLEN + sizeof(struct iphdr) + offsetof(struct tcphdr, dest))
static inline void set_tcp_dest_port(struct __sk_buff *skb, __u16 new_port)
{
- __u16 old_port = htons(load_half(skb, BPF_LL_OFF + TCP_DPORT_OFF));
+ __u16 old_port = htons(load_half(skb, TCP_DPORT_OFF));
bpf_l4_csum_replace(skb, TCP_CSUM_OFF, old_port, new_port, sizeof(new_port));
bpf_skb_store_bytes(skb, TCP_DPORT_OFF, &new_port, sizeof(new_port), 0);
@@ -53,7 +53,7 @@ static inline void set_tcp_dest_port(struct __sk_buff *skb, __u16 new_port)
SEC("classifier")
int bpf_prog1(struct __sk_buff *skb)
{
- __u8 proto = load_byte(skb, BPF_LL_OFF + ETH_HLEN + offsetof(struct iphdr, protocol));
+ __u8 proto = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
long *value;
if (proto == IPPROTO_TCP) {
@@ -64,4 +64,26 @@ int bpf_prog1(struct __sk_buff *skb)
return 0;
}
+SEC("redirect_xmit")
+int _redirect_xmit(struct __sk_buff *skb)
+{
+ return bpf_redirect(skb->ifindex + 1, 0);
+}
+SEC("redirect_recv")
+int _redirect_recv(struct __sk_buff *skb)
+{
+ return bpf_redirect(skb->ifindex + 1, 1);
+}
+SEC("clone_redirect_xmit")
+int _clone_redirect_xmit(struct __sk_buff *skb)
+{
+ bpf_clone_redirect(skb, skb->ifindex + 1, 0);
+ return TC_ACT_SHOT;
+}
+SEC("clone_redirect_recv")
+int _clone_redirect_recv(struct __sk_buff *skb)
+{
+ bpf_clone_redirect(skb, skb->ifindex + 1, 1);
+ return TC_ACT_SHOT;
+}
char _license[] SEC("license") = "GPL";
diff --git a/kernel/samples/bpf/test_verifier.c b/kernel/samples/bpf/test_verifier.c
index 12f3780af..563c507c0 100644
--- a/kernel/samples/bpf/test_verifier.c
+++ b/kernel/samples/bpf/test_verifier.c
@@ -15,20 +15,28 @@
#include <string.h>
#include <linux/filter.h>
#include <stddef.h>
+#include <stdbool.h>
+#include <sys/resource.h>
#include "libbpf.h"
#define MAX_INSNS 512
#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
+#define MAX_FIXUPS 8
+
struct bpf_test {
const char *descr;
struct bpf_insn insns[MAX_INSNS];
- int fixup[32];
+ int fixup[MAX_FIXUPS];
+ int prog_array_fixup[MAX_FIXUPS];
const char *errstr;
+ const char *errstr_unpriv;
enum {
+ UNDEF,
ACCEPT,
REJECT
- } result;
+ } result, result_unpriv;
+ enum bpf_prog_type prog_type;
};
static struct bpf_test tests[] = {
@@ -95,6 +103,7 @@ static struct bpf_test tests[] = {
BPF_EXIT_INSN(),
},
.errstr = "invalid BPF_LD_IMM insn",
+ .errstr_unpriv = "R1 pointer comparison",
.result = REJECT,
},
{
@@ -108,6 +117,7 @@ static struct bpf_test tests[] = {
BPF_EXIT_INSN(),
},
.errstr = "invalid BPF_LD_IMM insn",
+ .errstr_unpriv = "R1 pointer comparison",
.result = REJECT,
},
{
@@ -218,6 +228,7 @@ static struct bpf_test tests[] = {
BPF_EXIT_INSN(),
},
.errstr = "R0 !read_ok",
+ .errstr_unpriv = "R1 pointer comparison",
.result = REJECT,
},
{
@@ -293,7 +304,9 @@ static struct bpf_test tests[] = {
BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "R0 leaks addr",
.result = ACCEPT,
+ .result_unpriv = REJECT,
},
{
"check corrupted spill/fill",
@@ -309,6 +322,7 @@ static struct bpf_test tests[] = {
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "attempt to corrupt spilled",
.errstr = "corrupted spill",
.result = REJECT,
},
@@ -472,6 +486,7 @@ static struct bpf_test tests[] = {
},
.fixup = {3},
.errstr = "R0 invalid mem access",
+ .errstr_unpriv = "R0 leaks addr",
.result = REJECT,
},
{
@@ -494,6 +509,8 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "R1 pointer comparison",
+ .result_unpriv = REJECT,
.result = ACCEPT,
},
{
@@ -520,6 +537,8 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "R1 pointer comparison",
+ .result_unpriv = REJECT,
.result = ACCEPT,
},
{
@@ -554,6 +573,8 @@ static struct bpf_test tests[] = {
BPF_EXIT_INSN(),
},
.fixup = {24},
+ .errstr_unpriv = "R1 pointer comparison",
+ .result_unpriv = REJECT,
.result = ACCEPT,
},
{
@@ -602,6 +623,8 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "R1 pointer comparison",
+ .result_unpriv = REJECT,
.result = ACCEPT,
},
{
@@ -641,6 +664,8 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "R1 pointer comparison",
+ .result_unpriv = REJECT,
.result = ACCEPT,
},
{
@@ -698,6 +723,7 @@ static struct bpf_test tests[] = {
},
.fixup = {4},
.errstr = "different pointers",
+ .errstr_unpriv = "R1 pointer comparison",
.result = REJECT,
},
{
@@ -719,6 +745,7 @@ static struct bpf_test tests[] = {
},
.fixup = {6},
.errstr = "different pointers",
+ .errstr_unpriv = "R1 pointer comparison",
.result = REJECT,
},
{
@@ -741,7 +768,417 @@ static struct bpf_test tests[] = {
},
.fixup = {7},
.errstr = "different pointers",
+ .errstr_unpriv = "R1 pointer comparison",
+ .result = REJECT,
+ },
+ {
+ "check skb->mark is not writeable by sockets",
+ .insns = {
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
+ offsetof(struct __sk_buff, mark)),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .errstr_unpriv = "R1 leaks addr",
+ .result = REJECT,
+ },
+ {
+ "check skb->tc_index is not writeable by sockets",
+ .insns = {
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
+ offsetof(struct __sk_buff, tc_index)),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .errstr_unpriv = "R1 leaks addr",
+ .result = REJECT,
+ },
+ {
+ "check non-u32 access to cb",
+ .insns = {
+ BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .errstr_unpriv = "R1 leaks addr",
+ .result = REJECT,
+ },
+ {
+ "check out of range skb->cb access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[0]) + 256),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .errstr_unpriv = "",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SCHED_ACT,
+ },
+ {
+ "write skb fields from socket prog",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[4])),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 1),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, mark)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, tc_index)),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 1),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[2])),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .errstr_unpriv = "R1 leaks addr",
+ .result_unpriv = REJECT,
+ },
+ {
+ "write skb fields from tc_cls_act prog",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, mark)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, tc_index)),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, tc_index)),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[3])),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "PTR_TO_STACK store/load",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -10),
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, 2, 0xfaceb00c),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 2),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ },
+ {
+ "PTR_TO_STACK store/load - bad alignment on off",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, 2, 0xfaceb00c),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 2),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "misaligned access off -6 size 8",
+ },
+ {
+ "PTR_TO_STACK store/load - bad alignment on reg",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -10),
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, 8, 0xfaceb00c),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 8),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "misaligned access off -2 size 8",
+ },
+ {
+ "PTR_TO_STACK store/load - out of bounds low",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -80000),
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, 8, 0xfaceb00c),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 8),
+ BPF_EXIT_INSN(),
+ },
.result = REJECT,
+ .errstr = "invalid stack off=-79992 size=8",
+ },
+ {
+ "PTR_TO_STACK store/load - out of bounds high",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, 8, 0xfaceb00c),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 8),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "invalid stack off=0 size=8",
+ },
+ {
+ "unpriv: return pointer",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_10),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "R0 leaks addr",
+ },
+ {
+ "unpriv: add const to pointer",
+ .insns = {
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "R1 pointer arithmetic",
+ },
+ {
+ "unpriv: add pointer to pointer",
+ .insns = {
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_10),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "R1 pointer arithmetic",
+ },
+ {
+ "unpriv: neg pointer",
+ .insns = {
+ BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "R1 pointer arithmetic",
+ },
+ {
+ "unpriv: cmp pointer with const",
+ .insns = {
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "R1 pointer comparison",
+ },
+ {
+ "unpriv: cmp pointer with pointer",
+ .insns = {
+ BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_10, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "R10 pointer comparison",
+ },
+ {
+ "unpriv: check that printk is disallowed",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_2, 8),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_1),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_trace_printk),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "unknown func 6",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "unpriv: pass pointer to helper function",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_2),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_update_elem),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup = {3},
+ .errstr_unpriv = "R4 leaks addr",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "unpriv: indirectly pass pointer on stack to helper function",
+ .insns = {
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_10, -8),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup = {3},
+ .errstr = "invalid indirect read from stack off -8+0 size 8",
+ .result = REJECT,
+ },
+ {
+ "unpriv: mangle pointer on stack 1",
+ .insns = {
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_10, -8),
+ BPF_ST_MEM(BPF_W, BPF_REG_10, -8, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "attempt to corrupt spilled",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "unpriv: mangle pointer on stack 2",
+ .insns = {
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_10, -8),
+ BPF_ST_MEM(BPF_B, BPF_REG_10, -1, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "attempt to corrupt spilled",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "unpriv: read pointer from stack in small chunks",
+ .insns = {
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_10, -8),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_10, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid size",
+ .result = REJECT,
+ },
+ {
+ "unpriv: write pointer into ctx",
+ .insns = {
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "R1 leaks addr",
+ .result_unpriv = REJECT,
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ },
+ {
+ "unpriv: write pointer into map elem value",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup = {3},
+ .errstr_unpriv = "R0 leaks addr",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "unpriv: partial copy of pointer",
+ .insns = {
+ BPF_MOV32_REG(BPF_REG_1, BPF_REG_10),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "R10 partial copy",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "unpriv: pass pointer to tail_call",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_1),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_array_fixup = {1},
+ .errstr_unpriv = "R3 leaks addr into helper",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "unpriv: cmp map pointer with zero",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_1, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup = {1},
+ .errstr_unpriv = "R1 pointer comparison",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "unpriv: write into frame pointer",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_10, BPF_REG_1),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "frame pointer is read only",
+ .result = REJECT,
+ },
+ {
+ "unpriv: cmp of frame pointer",
+ .insns = {
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_10, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "R10 pointer comparison",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "unpriv: cmp of stack pointer",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_2, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "R2 pointer comparison",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "unpriv: obfuscate stack pointer",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "R2 pointer arithmetic",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
},
};
@@ -758,13 +1195,24 @@ static int probe_filter_length(struct bpf_insn *fp)
static int create_map(void)
{
- long long key, value = 0;
int map_fd;
- map_fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value), 1024);
- if (map_fd < 0) {
+ map_fd = bpf_create_map(BPF_MAP_TYPE_HASH,
+ sizeof(long long), sizeof(long long), 1024);
+ if (map_fd < 0)
printf("failed to create map '%s'\n", strerror(errno));
- }
+
+ return map_fd;
+}
+
+static int create_prog_array(void)
+{
+ int map_fd;
+
+ map_fd = bpf_create_map(BPF_MAP_TYPE_PROG_ARRAY,
+ sizeof(int), sizeof(int), 4);
+ if (map_fd < 0)
+ printf("failed to create prog_array '%s'\n", strerror(errno));
return map_fd;
}
@@ -772,12 +1220,17 @@ static int create_map(void)
static int test(void)
{
int prog_fd, i, pass_cnt = 0, err_cnt = 0;
+ bool unpriv = geteuid() != 0;
for (i = 0; i < ARRAY_SIZE(tests); i++) {
struct bpf_insn *prog = tests[i].insns;
+ int prog_type = tests[i].prog_type;
int prog_len = probe_filter_length(prog);
int *fixup = tests[i].fixup;
- int map_fd = -1;
+ int *prog_array_fixup = tests[i].prog_array_fixup;
+ int expected_result;
+ const char *expected_errstr;
+ int map_fd = -1, prog_array_fd = -1;
if (*fixup) {
map_fd = create_map();
@@ -787,13 +1240,31 @@ static int test(void)
fixup++;
} while (*fixup);
}
+ if (*prog_array_fixup) {
+ prog_array_fd = create_prog_array();
+
+ do {
+ prog[*prog_array_fixup].imm = prog_array_fd;
+ prog_array_fixup++;
+ } while (*prog_array_fixup);
+ }
printf("#%d %s ", i, tests[i].descr);
- prog_fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, prog,
- prog_len * sizeof(struct bpf_insn),
+ prog_fd = bpf_prog_load(prog_type ?: BPF_PROG_TYPE_SOCKET_FILTER,
+ prog, prog_len * sizeof(struct bpf_insn),
"GPL", 0);
- if (tests[i].result == ACCEPT) {
+ if (unpriv && tests[i].result_unpriv != UNDEF)
+ expected_result = tests[i].result_unpriv;
+ else
+ expected_result = tests[i].result;
+
+ if (unpriv && tests[i].errstr_unpriv)
+ expected_errstr = tests[i].errstr_unpriv;
+ else
+ expected_errstr = tests[i].errstr;
+
+ if (expected_result == ACCEPT) {
if (prog_fd < 0) {
printf("FAIL\nfailed to load prog '%s'\n",
strerror(errno));
@@ -808,7 +1279,7 @@ static int test(void)
err_cnt++;
goto fail;
}
- if (strstr(bpf_log_buf, tests[i].errstr) == 0) {
+ if (strstr(bpf_log_buf, expected_errstr) == 0) {
printf("FAIL\nunexpected error message: %s",
bpf_log_buf);
err_cnt++;
@@ -821,6 +1292,8 @@ static int test(void)
fail:
if (map_fd >= 0)
close(map_fd);
+ if (prog_array_fd >= 0)
+ close(prog_array_fd);
close(prog_fd);
}
@@ -831,5 +1304,8 @@ fail:
int main(void)
{
+ struct rlimit r = {1 << 20, 1 << 20};
+
+ setrlimit(RLIMIT_MEMLOCK, &r);
return test();
}
diff --git a/kernel/samples/bpf/trace_output_kern.c b/kernel/samples/bpf/trace_output_kern.c
new file mode 100644
index 000000000..8d8d1ec42
--- /dev/null
+++ b/kernel/samples/bpf/trace_output_kern.c
@@ -0,0 +1,31 @@
+#include <linux/ptrace.h>
+#include <linux/version.h>
+#include <uapi/linux/bpf.h>
+#include "bpf_helpers.h"
+
+struct bpf_map_def SEC("maps") my_map = {
+ .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(u32),
+ .max_entries = 2,
+};
+
+SEC("kprobe/sys_write")
+int bpf_prog1(struct pt_regs *ctx)
+{
+ struct S {
+ u64 pid;
+ u64 cookie;
+ } data;
+
+ memset(&data, 0, sizeof(data));
+ data.pid = bpf_get_current_pid_tgid();
+ data.cookie = 0x12345678;
+
+ bpf_perf_event_output(ctx, &my_map, 0, &data, sizeof(data));
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/kernel/samples/bpf/trace_output_user.c b/kernel/samples/bpf/trace_output_user.c
new file mode 100644
index 000000000..661a7d052
--- /dev/null
+++ b/kernel/samples/bpf/trace_output_user.c
@@ -0,0 +1,196 @@
+/* This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <sys/ioctl.h>
+#include <linux/perf_event.h>
+#include <linux/bpf.h>
+#include <errno.h>
+#include <assert.h>
+#include <sys/syscall.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <time.h>
+#include <signal.h>
+#include "libbpf.h"
+#include "bpf_load.h"
+
+static int pmu_fd;
+
+int page_size;
+int page_cnt = 8;
+volatile struct perf_event_mmap_page *header;
+
+typedef void (*print_fn)(void *data, int size);
+
+static int perf_event_mmap(int fd)
+{
+ void *base;
+ int mmap_size;
+
+ page_size = getpagesize();
+ mmap_size = page_size * (page_cnt + 1);
+
+ base = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+ if (base == MAP_FAILED) {
+ printf("mmap err\n");
+ return -1;
+ }
+
+ header = base;
+ return 0;
+}
+
+static int perf_event_poll(int fd)
+{
+ struct pollfd pfd = { .fd = fd, .events = POLLIN };
+
+ return poll(&pfd, 1, 1000);
+}
+
+struct perf_event_sample {
+ struct perf_event_header header;
+ __u32 size;
+ char data[];
+};
+
+void perf_event_read(print_fn fn)
+{
+ __u64 data_tail = header->data_tail;
+ __u64 data_head = header->data_head;
+ __u64 buffer_size = page_cnt * page_size;
+ void *base, *begin, *end;
+ char buf[256];
+
+ asm volatile("" ::: "memory"); /* in real code it should be smp_rmb() */
+ if (data_head == data_tail)
+ return;
+
+ base = ((char *)header) + page_size;
+
+ begin = base + data_tail % buffer_size;
+ end = base + data_head % buffer_size;
+
+ while (begin != end) {
+ struct perf_event_sample *e;
+
+ e = begin;
+ if (begin + e->header.size > base + buffer_size) {
+ long len = base + buffer_size - begin;
+
+ assert(len < e->header.size);
+ memcpy(buf, begin, len);
+ memcpy(buf + len, base, e->header.size - len);
+ e = (void *) buf;
+ begin = base + e->header.size - len;
+ } else if (begin + e->header.size == base + buffer_size) {
+ begin = base;
+ } else {
+ begin += e->header.size;
+ }
+
+ if (e->header.type == PERF_RECORD_SAMPLE) {
+ fn(e->data, e->size);
+ } else if (e->header.type == PERF_RECORD_LOST) {
+ struct {
+ struct perf_event_header header;
+ __u64 id;
+ __u64 lost;
+ } *lost = (void *) e;
+ printf("lost %lld events\n", lost->lost);
+ } else {
+ printf("unknown event type=%d size=%d\n",
+ e->header.type, e->header.size);
+ }
+ }
+
+ __sync_synchronize(); /* smp_mb() */
+ header->data_tail = data_head;
+}
+
+static __u64 time_get_ns(void)
+{
+ struct timespec ts;
+
+ clock_gettime(CLOCK_MONOTONIC, &ts);
+ return ts.tv_sec * 1000000000ull + ts.tv_nsec;
+}
+
+static __u64 start_time;
+
+#define MAX_CNT 100000ll
+
+static void print_bpf_output(void *data, int size)
+{
+ static __u64 cnt;
+ struct {
+ __u64 pid;
+ __u64 cookie;
+ } *e = data;
+
+ if (e->cookie != 0x12345678) {
+ printf("BUG pid %llx cookie %llx sized %d\n",
+ e->pid, e->cookie, size);
+ kill(0, SIGINT);
+ }
+
+ cnt++;
+
+ if (cnt == MAX_CNT) {
+ printf("recv %lld events per sec\n",
+ MAX_CNT * 1000000000ll / (time_get_ns() - start_time));
+ kill(0, SIGINT);
+ }
+}
+
+static void test_bpf_perf_event(void)
+{
+ struct perf_event_attr attr = {
+ .sample_type = PERF_SAMPLE_RAW,
+ .type = PERF_TYPE_SOFTWARE,
+ .config = PERF_COUNT_SW_BPF_OUTPUT,
+ };
+ int key = 0;
+
+ pmu_fd = perf_event_open(&attr, -1/*pid*/, 0/*cpu*/, -1/*group_fd*/, 0);
+
+ assert(pmu_fd >= 0);
+ assert(bpf_update_elem(map_fd[0], &key, &pmu_fd, BPF_ANY) == 0);
+ ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
+}
+
+int main(int argc, char **argv)
+{
+ char filename[256];
+ FILE *f;
+
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+ if (load_bpf_file(filename)) {
+ printf("%s", bpf_log_buf);
+ return 1;
+ }
+
+ test_bpf_perf_event();
+
+ if (perf_event_mmap(pmu_fd) < 0)
+ return 1;
+
+ f = popen("taskset 1 dd if=/dev/zero of=/dev/null", "r");
+ (void) f;
+
+ start_time = time_get_ns();
+ for (;;) {
+ perf_event_poll(pmu_fd);
+ perf_event_read(print_bpf_output);
+ }
+
+ return 0;
+}
diff --git a/kernel/samples/bpf/tracex1_kern.c b/kernel/samples/bpf/tracex1_kern.c
index 316204637..3f450a8fa 100644
--- a/kernel/samples/bpf/tracex1_kern.c
+++ b/kernel/samples/bpf/tracex1_kern.c
@@ -29,7 +29,7 @@ int bpf_prog1(struct pt_regs *ctx)
int len;
/* non-portable! works for the given kernel only */
- skb = (struct sk_buff *) ctx->di;
+ skb = (struct sk_buff *) PT_REGS_PARM1(ctx);
dev = _(skb->dev);
diff --git a/kernel/samples/bpf/tracex2_kern.c b/kernel/samples/bpf/tracex2_kern.c
index 19ec1cfc4..b32367cfb 100644
--- a/kernel/samples/bpf/tracex2_kern.c
+++ b/kernel/samples/bpf/tracex2_kern.c
@@ -27,10 +27,10 @@ int bpf_prog2(struct pt_regs *ctx)
long init_val = 1;
long *value;
- /* x64 specific: read ip of kfree_skb caller.
+ /* x64/s390x specific: read ip of kfree_skb caller.
* non-portable version of __builtin_return_address(0)
*/
- bpf_probe_read(&loc, sizeof(loc), (void *)ctx->sp);
+ bpf_probe_read(&loc, sizeof(loc), (void *)PT_REGS_RET(ctx));
value = bpf_map_lookup_elem(&my_map, &loc);
if (value)
@@ -62,24 +62,38 @@ static unsigned int log2l(unsigned long v)
return log2(v);
}
+struct hist_key {
+ char comm[16];
+ u64 pid_tgid;
+ u64 uid_gid;
+ u32 index;
+};
+
struct bpf_map_def SEC("maps") my_hist_map = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(u32),
+ .type = BPF_MAP_TYPE_HASH,
+ .key_size = sizeof(struct hist_key),
.value_size = sizeof(long),
- .max_entries = 64,
+ .max_entries = 1024,
};
SEC("kprobe/sys_write")
int bpf_prog3(struct pt_regs *ctx)
{
- long write_size = ctx->dx; /* arg3 */
+ long write_size = PT_REGS_PARM3(ctx);
long init_val = 1;
long *value;
- u32 index = log2l(write_size);
+ struct hist_key key = {};
+
+ key.index = log2l(write_size);
+ key.pid_tgid = bpf_get_current_pid_tgid();
+ key.uid_gid = bpf_get_current_uid_gid();
+ bpf_get_current_comm(&key.comm, sizeof(key.comm));
- value = bpf_map_lookup_elem(&my_hist_map, &index);
+ value = bpf_map_lookup_elem(&my_hist_map, &key);
if (value)
__sync_fetch_and_add(value, 1);
+ else
+ bpf_map_update_elem(&my_hist_map, &key, &init_val, BPF_ANY);
return 0;
}
char _license[] SEC("license") = "GPL";
diff --git a/kernel/samples/bpf/tracex2_user.c b/kernel/samples/bpf/tracex2_user.c
index 91b8d0896..cd0241c14 100644
--- a/kernel/samples/bpf/tracex2_user.c
+++ b/kernel/samples/bpf/tracex2_user.c
@@ -3,6 +3,7 @@
#include <stdlib.h>
#include <signal.h>
#include <linux/bpf.h>
+#include <string.h>
#include "libbpf.h"
#include "bpf_load.h"
@@ -20,23 +21,42 @@ static void stars(char *str, long val, long max, int width)
str[i] = '\0';
}
-static void print_hist(int fd)
+struct task {
+ char comm[16];
+ __u64 pid_tgid;
+ __u64 uid_gid;
+};
+
+struct hist_key {
+ struct task t;
+ __u32 index;
+};
+
+#define SIZE sizeof(struct task)
+
+static void print_hist_for_pid(int fd, void *task)
{
- int key;
+ struct hist_key key = {}, next_key;
+ char starstr[MAX_STARS];
long value;
long data[MAX_INDEX] = {};
- char starstr[MAX_STARS];
- int i;
int max_ind = -1;
long max_value = 0;
+ int i, ind;
- for (key = 0; key < MAX_INDEX; key++) {
- bpf_lookup_elem(fd, &key, &value);
- data[key] = value;
- if (value && key > max_ind)
- max_ind = key;
+ while (bpf_get_next_key(fd, &key, &next_key) == 0) {
+ if (memcmp(&next_key, task, SIZE)) {
+ key = next_key;
+ continue;
+ }
+ bpf_lookup_elem(fd, &next_key, &value);
+ ind = next_key.index;
+ data[ind] = value;
+ if (value && ind > max_ind)
+ max_ind = ind;
if (value > max_value)
max_value = value;
+ key = next_key;
}
printf(" syscall write() stats\n");
@@ -48,6 +68,35 @@ static void print_hist(int fd)
MAX_STARS, starstr);
}
}
+
+static void print_hist(int fd)
+{
+ struct hist_key key = {}, next_key;
+ static struct task tasks[1024];
+ int task_cnt = 0;
+ int i;
+
+ while (bpf_get_next_key(fd, &key, &next_key) == 0) {
+ int found = 0;
+
+ for (i = 0; i < task_cnt; i++)
+ if (memcmp(&tasks[i], &next_key, SIZE) == 0)
+ found = 1;
+ if (!found)
+ memcpy(&tasks[task_cnt++], &next_key, SIZE);
+ key = next_key;
+ }
+
+ for (i = 0; i < task_cnt; i++) {
+ printf("\npid %d cmd %s uid %d\n",
+ (__u32) tasks[i].pid_tgid,
+ tasks[i].comm,
+ (__u32) tasks[i].uid_gid);
+ print_hist_for_pid(fd, &tasks[i]);
+ }
+
+}
+
static void int_exit(int sig)
{
print_hist(map_fd[1]);
diff --git a/kernel/samples/bpf/tracex3_kern.c b/kernel/samples/bpf/tracex3_kern.c
index 255ff2792..bf337fbb0 100644
--- a/kernel/samples/bpf/tracex3_kern.c
+++ b/kernel/samples/bpf/tracex3_kern.c
@@ -23,7 +23,7 @@ struct bpf_map_def SEC("maps") my_map = {
SEC("kprobe/blk_mq_start_request")
int bpf_prog1(struct pt_regs *ctx)
{
- long rq = ctx->di;
+ long rq = PT_REGS_PARM1(ctx);
u64 val = bpf_ktime_get_ns();
bpf_map_update_elem(&my_map, &rq, &val, BPF_ANY);
@@ -51,7 +51,7 @@ struct bpf_map_def SEC("maps") lat_map = {
SEC("kprobe/blk_update_request")
int bpf_prog2(struct pt_regs *ctx)
{
- long rq = ctx->di;
+ long rq = PT_REGS_PARM1(ctx);
u64 *value, l, base;
u32 index;
diff --git a/kernel/samples/bpf/tracex4_kern.c b/kernel/samples/bpf/tracex4_kern.c
index 126b80512..ac4671420 100644
--- a/kernel/samples/bpf/tracex4_kern.c
+++ b/kernel/samples/bpf/tracex4_kern.c
@@ -27,7 +27,7 @@ struct bpf_map_def SEC("maps") my_map = {
SEC("kprobe/kmem_cache_free")
int bpf_prog1(struct pt_regs *ctx)
{
- long ptr = ctx->si;
+ long ptr = PT_REGS_PARM2(ctx);
bpf_map_delete_elem(&my_map, &ptr);
return 0;
@@ -36,11 +36,11 @@ int bpf_prog1(struct pt_regs *ctx)
SEC("kretprobe/kmem_cache_alloc_node")
int bpf_prog2(struct pt_regs *ctx)
{
- long ptr = ctx->ax;
+ long ptr = PT_REGS_RC(ctx);
long ip = 0;
/* get ip address of kmem_cache_alloc_node() caller */
- bpf_probe_read(&ip, sizeof(ip), (void *)(ctx->bp + sizeof(ip)));
+ bpf_probe_read(&ip, sizeof(ip), (void *)(PT_REGS_FP(ctx) + sizeof(ip)));
struct pair v = {
.val = bpf_ktime_get_ns(),
diff --git a/kernel/samples/bpf/tracex5_kern.c b/kernel/samples/bpf/tracex5_kern.c
new file mode 100644
index 000000000..b3f4295bf
--- /dev/null
+++ b/kernel/samples/bpf/tracex5_kern.c
@@ -0,0 +1,75 @@
+/* Copyright (c) 2015 PLUMgrid, http://plumgrid.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/ptrace.h>
+#include <linux/version.h>
+#include <uapi/linux/bpf.h>
+#include <uapi/linux/seccomp.h>
+#include "bpf_helpers.h"
+
+#define PROG(F) SEC("kprobe/"__stringify(F)) int bpf_func_##F
+
+struct bpf_map_def SEC("maps") progs = {
+ .type = BPF_MAP_TYPE_PROG_ARRAY,
+ .key_size = sizeof(u32),
+ .value_size = sizeof(u32),
+ .max_entries = 1024,
+};
+
+SEC("kprobe/seccomp_phase1")
+int bpf_prog1(struct pt_regs *ctx)
+{
+ struct seccomp_data sd = {};
+
+ bpf_probe_read(&sd, sizeof(sd), (void *)PT_REGS_PARM1(ctx));
+
+ /* dispatch into next BPF program depending on syscall number */
+ bpf_tail_call(ctx, &progs, sd.nr);
+
+ /* fall through -> unknown syscall */
+ if (sd.nr >= __NR_getuid && sd.nr <= __NR_getsid) {
+ char fmt[] = "syscall=%d (one of get/set uid/pid/gid)\n";
+ bpf_trace_printk(fmt, sizeof(fmt), sd.nr);
+ }
+ return 0;
+}
+
+/* we jump here when syscall number == __NR_write */
+PROG(__NR_write)(struct pt_regs *ctx)
+{
+ struct seccomp_data sd = {};
+
+ bpf_probe_read(&sd, sizeof(sd), (void *)PT_REGS_PARM1(ctx));
+ if (sd.args[2] == 512) {
+ char fmt[] = "write(fd=%d, buf=%p, size=%d)\n";
+ bpf_trace_printk(fmt, sizeof(fmt),
+ sd.args[0], sd.args[1], sd.args[2]);
+ }
+ return 0;
+}
+
+PROG(__NR_read)(struct pt_regs *ctx)
+{
+ struct seccomp_data sd = {};
+
+ bpf_probe_read(&sd, sizeof(sd), (void *)PT_REGS_PARM1(ctx));
+ if (sd.args[2] > 128 && sd.args[2] <= 1024) {
+ char fmt[] = "read(fd=%d, buf=%p, size=%d)\n";
+ bpf_trace_printk(fmt, sizeof(fmt),
+ sd.args[0], sd.args[1], sd.args[2]);
+ }
+ return 0;
+}
+
+PROG(__NR_mmap)(struct pt_regs *ctx)
+{
+ char fmt[] = "mmap\n";
+ bpf_trace_printk(fmt, sizeof(fmt));
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/kernel/samples/bpf/tracex5_user.c b/kernel/samples/bpf/tracex5_user.c
new file mode 100644
index 000000000..a04dd3cd4
--- /dev/null
+++ b/kernel/samples/bpf/tracex5_user.c
@@ -0,0 +1,46 @@
+#include <stdio.h>
+#include <linux/bpf.h>
+#include <unistd.h>
+#include <linux/filter.h>
+#include <linux/seccomp.h>
+#include <sys/prctl.h>
+#include "libbpf.h"
+#include "bpf_load.h"
+
+/* install fake seccomp program to enable seccomp code path inside the kernel,
+ * so that our kprobe attached to seccomp_phase1() can be triggered
+ */
+static void install_accept_all_seccomp(void)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)(sizeof(filter)/sizeof(filter[0])),
+ .filter = filter,
+ };
+ if (prctl(PR_SET_SECCOMP, 2, &prog))
+ perror("prctl");
+}
+
+int main(int ac, char **argv)
+{
+ FILE *f;
+ char filename[256];
+
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+ if (load_bpf_file(filename)) {
+ printf("%s", bpf_log_buf);
+ return 1;
+ }
+
+ install_accept_all_seccomp();
+
+ f = popen("dd if=/dev/zero of=/dev/null count=5", "r");
+ (void) f;
+
+ read_trace_pipe();
+
+ return 0;
+}
diff --git a/kernel/samples/bpf/tracex6_kern.c b/kernel/samples/bpf/tracex6_kern.c
new file mode 100644
index 000000000..be479c4af
--- /dev/null
+++ b/kernel/samples/bpf/tracex6_kern.c
@@ -0,0 +1,27 @@
+#include <linux/ptrace.h>
+#include <linux/version.h>
+#include <uapi/linux/bpf.h>
+#include "bpf_helpers.h"
+
+struct bpf_map_def SEC("maps") my_map = {
+ .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(u32),
+ .max_entries = 32,
+};
+
+SEC("kprobe/sys_write")
+int bpf_prog1(struct pt_regs *ctx)
+{
+ u64 count;
+ u32 key = bpf_get_smp_processor_id();
+ char fmt[] = "CPU-%d %llu\n";
+
+ count = bpf_perf_event_read(&my_map, key);
+ bpf_trace_printk(fmt, sizeof(fmt), key, count);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/kernel/samples/bpf/tracex6_user.c b/kernel/samples/bpf/tracex6_user.c
new file mode 100644
index 000000000..8ea4976cf
--- /dev/null
+++ b/kernel/samples/bpf/tracex6_user.c
@@ -0,0 +1,72 @@
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <sys/ioctl.h>
+#include <linux/perf_event.h>
+#include <linux/bpf.h>
+#include "libbpf.h"
+#include "bpf_load.h"
+
+#define SAMPLE_PERIOD 0x7fffffffffffffffULL
+
+static void test_bpf_perf_event(void)
+{
+ int nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
+ int *pmu_fd = malloc(nr_cpus * sizeof(int));
+ int status, i;
+
+ struct perf_event_attr attr_insn_pmu = {
+ .freq = 0,
+ .sample_period = SAMPLE_PERIOD,
+ .inherit = 0,
+ .type = PERF_TYPE_HARDWARE,
+ .read_format = 0,
+ .sample_type = 0,
+ .config = 0,/* PMU: cycles */
+ };
+
+ for (i = 0; i < nr_cpus; i++) {
+ pmu_fd[i] = perf_event_open(&attr_insn_pmu, -1/*pid*/, i/*cpu*/, -1/*group_fd*/, 0);
+ if (pmu_fd[i] < 0) {
+ printf("event syscall failed\n");
+ goto exit;
+ }
+
+ bpf_update_elem(map_fd[0], &i, &pmu_fd[i], BPF_ANY);
+ ioctl(pmu_fd[i], PERF_EVENT_IOC_ENABLE, 0);
+ }
+
+ status = system("ls > /dev/null");
+ if (status)
+ goto exit;
+ status = system("sleep 2");
+ if (status)
+ goto exit;
+
+exit:
+ for (i = 0; i < nr_cpus; i++)
+ close(pmu_fd[i]);
+ close(map_fd[0]);
+ free(pmu_fd);
+}
+
+int main(int argc, char **argv)
+{
+ char filename[256];
+
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+ if (load_bpf_file(filename)) {
+ printf("%s", bpf_log_buf);
+ return 1;
+ }
+
+ test_bpf_perf_event();
+ read_trace_pipe();
+
+ return 0;
+}
diff --git a/kernel/samples/configfs/Makefile b/kernel/samples/configfs/Makefile
new file mode 100644
index 000000000..a9afd9963
--- /dev/null
+++ b/kernel/samples/configfs/Makefile
@@ -0,0 +1,2 @@
+
+obj-$(CONFIG_SAMPLE_CONFIGFS) += configfs_sample.o
diff --git a/kernel/samples/configfs/configfs_sample.c b/kernel/samples/configfs/configfs_sample.c
new file mode 100644
index 000000000..1ea33119e
--- /dev/null
+++ b/kernel/samples/configfs/configfs_sample.c
@@ -0,0 +1,404 @@
+/*
+ * vim: noexpandtab ts=8 sts=0 sw=8:
+ *
+ * configfs_example_macros.c - This file is a demonstration module
+ * containing a number of configfs subsystems. It uses the helper
+ * macros defined by configfs.h
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Based on sysfs:
+ * sysfs is Copyright (C) 2001, 2002, 2003 Patrick Mochel
+ *
+ * configfs Copyright (C) 2005 Oracle. All rights reserved.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+
+#include <linux/configfs.h>
+
+
+
+/*
+ * 01-childless
+ *
+ * This first example is a childless subsystem. It cannot create
+ * any config_items. It just has attributes.
+ *
+ * Note that we are enclosing the configfs_subsystem inside a container.
+ * This is not necessary if a subsystem has no attributes directly
+ * on the subsystem. See the next example, 02-simple-children, for
+ * such a subsystem.
+ */
+
+struct childless {
+ struct configfs_subsystem subsys;
+ int showme;
+ int storeme;
+};
+
+static inline struct childless *to_childless(struct config_item *item)
+{
+ return item ? container_of(to_configfs_subsystem(to_config_group(item)),
+ struct childless, subsys) : NULL;
+}
+
+static ssize_t childless_showme_show(struct config_item *item, char *page)
+{
+ struct childless *childless = to_childless(item);
+ ssize_t pos;
+
+ pos = sprintf(page, "%d\n", childless->showme);
+ childless->showme++;
+
+ return pos;
+}
+
+static ssize_t childless_storeme_show(struct config_item *item, char *page)
+{
+ return sprintf(page, "%d\n", to_childless(item)->storeme);
+}
+
+static ssize_t childless_storeme_store(struct config_item *item,
+ const char *page, size_t count)
+{
+ struct childless *childless = to_childless(item);
+ unsigned long tmp;
+ char *p = (char *) page;
+
+ tmp = simple_strtoul(p, &p, 10);
+ if (!p || (*p && (*p != '\n')))
+ return -EINVAL;
+
+ if (tmp > INT_MAX)
+ return -ERANGE;
+
+ childless->storeme = tmp;
+
+ return count;
+}
+
+static ssize_t childless_description_show(struct config_item *item, char *page)
+{
+ return sprintf(page,
+"[01-childless]\n"
+"\n"
+"The childless subsystem is the simplest possible subsystem in\n"
+"configfs. It does not support the creation of child config_items.\n"
+"It only has a few attributes. In fact, it isn't much different\n"
+"than a directory in /proc.\n");
+}
+
+CONFIGFS_ATTR_RO(childless_, showme);
+CONFIGFS_ATTR(childless_, storeme);
+CONFIGFS_ATTR_RO(childless_, description);
+
+static struct configfs_attribute *childless_attrs[] = {
+ &childless_attr_showme,
+ &childless_attr_storeme,
+ &childless_attr_description,
+ NULL,
+};
+
+static struct config_item_type childless_type = {
+ .ct_attrs = childless_attrs,
+ .ct_owner = THIS_MODULE,
+};
+
+static struct childless childless_subsys = {
+ .subsys = {
+ .su_group = {
+ .cg_item = {
+ .ci_namebuf = "01-childless",
+ .ci_type = &childless_type,
+ },
+ },
+ },
+};
+
+
+/* ----------------------------------------------------------------- */
+
+/*
+ * 02-simple-children
+ *
+ * This example merely has a simple one-attribute child. Note that
+ * there is no extra attribute structure, as the child's attribute is
+ * known from the get-go. Also, there is no container for the
+ * subsystem, as it has no attributes of its own.
+ */
+
+struct simple_child {
+ struct config_item item;
+ int storeme;
+};
+
+static inline struct simple_child *to_simple_child(struct config_item *item)
+{
+ return item ? container_of(item, struct simple_child, item) : NULL;
+}
+
+static ssize_t simple_child_storeme_show(struct config_item *item, char *page)
+{
+ return sprintf(page, "%d\n", to_simple_child(item)->storeme);
+}
+
+static ssize_t simple_child_storeme_store(struct config_item *item,
+ const char *page, size_t count)
+{
+ struct simple_child *simple_child = to_simple_child(item);
+ unsigned long tmp;
+ char *p = (char *) page;
+
+ tmp = simple_strtoul(p, &p, 10);
+ if (!p || (*p && (*p != '\n')))
+ return -EINVAL;
+
+ if (tmp > INT_MAX)
+ return -ERANGE;
+
+ simple_child->storeme = tmp;
+
+ return count;
+}
+
+CONFIGFS_ATTR(simple_child_, storeme);
+
+static struct configfs_attribute *simple_child_attrs[] = {
+ &simple_child_attr_storeme,
+ NULL,
+};
+
+static void simple_child_release(struct config_item *item)
+{
+ kfree(to_simple_child(item));
+}
+
+static struct configfs_item_operations simple_child_item_ops = {
+ .release = simple_child_release,
+};
+
+static struct config_item_type simple_child_type = {
+ .ct_item_ops = &simple_child_item_ops,
+ .ct_attrs = simple_child_attrs,
+ .ct_owner = THIS_MODULE,
+};
+
+
+struct simple_children {
+ struct config_group group;
+};
+
+static inline struct simple_children *to_simple_children(struct config_item *item)
+{
+ return item ? container_of(to_config_group(item),
+ struct simple_children, group) : NULL;
+}
+
+static struct config_item *simple_children_make_item(struct config_group *group,
+ const char *name)
+{
+ struct simple_child *simple_child;
+
+ simple_child = kzalloc(sizeof(struct simple_child), GFP_KERNEL);
+ if (!simple_child)
+ return ERR_PTR(-ENOMEM);
+
+ config_item_init_type_name(&simple_child->item, name,
+ &simple_child_type);
+
+ simple_child->storeme = 0;
+
+ return &simple_child->item;
+}
+
+static ssize_t simple_children_description_show(struct config_item *item,
+ char *page)
+{
+ return sprintf(page,
+"[02-simple-children]\n"
+"\n"
+"This subsystem allows the creation of child config_items. These\n"
+"items have only one attribute that is readable and writeable.\n");
+}
+
+CONFIGFS_ATTR_RO(simple_children_, description);
+
+static struct configfs_attribute *simple_children_attrs[] = {
+ &simple_children_attr_description,
+ NULL,
+};
+
+static void simple_children_release(struct config_item *item)
+{
+ kfree(to_simple_children(item));
+}
+
+static struct configfs_item_operations simple_children_item_ops = {
+ .release = simple_children_release,
+};
+
+/*
+ * Note that, since no extra work is required on ->drop_item(),
+ * no ->drop_item() is provided.
+ */
+static struct configfs_group_operations simple_children_group_ops = {
+ .make_item = simple_children_make_item,
+};
+
+static struct config_item_type simple_children_type = {
+ .ct_item_ops = &simple_children_item_ops,
+ .ct_group_ops = &simple_children_group_ops,
+ .ct_attrs = simple_children_attrs,
+ .ct_owner = THIS_MODULE,
+};
+
+static struct configfs_subsystem simple_children_subsys = {
+ .su_group = {
+ .cg_item = {
+ .ci_namebuf = "02-simple-children",
+ .ci_type = &simple_children_type,
+ },
+ },
+};
+
+
+/* ----------------------------------------------------------------- */
+
+/*
+ * 03-group-children
+ *
+ * This example reuses the simple_children group from above. However,
+ * the simple_children group is not the subsystem itself, it is a
+ * child of the subsystem. Creation of a group in the subsystem creates
+ * a new simple_children group. That group can then have simple_child
+ * children of its own.
+ */
+
+static struct config_group *group_children_make_group(
+ struct config_group *group, const char *name)
+{
+ struct simple_children *simple_children;
+
+ simple_children = kzalloc(sizeof(struct simple_children),
+ GFP_KERNEL);
+ if (!simple_children)
+ return ERR_PTR(-ENOMEM);
+
+ config_group_init_type_name(&simple_children->group, name,
+ &simple_children_type);
+
+ return &simple_children->group;
+}
+
+static ssize_t group_children_description_show(struct config_item *item,
+ char *page)
+{
+ return sprintf(page,
+"[03-group-children]\n"
+"\n"
+"This subsystem allows the creation of child config_groups. These\n"
+"groups are like the subsystem simple-children.\n");
+}
+
+CONFIGFS_ATTR_RO(group_children_, description);
+
+static struct configfs_attribute *group_children_attrs[] = {
+ &group_children_attr_description,
+ NULL,
+};
+
+/*
+ * Note that, since no extra work is required on ->drop_item(),
+ * no ->drop_item() is provided.
+ */
+static struct configfs_group_operations group_children_group_ops = {
+ .make_group = group_children_make_group,
+};
+
+static struct config_item_type group_children_type = {
+ .ct_group_ops = &group_children_group_ops,
+ .ct_attrs = group_children_attrs,
+ .ct_owner = THIS_MODULE,
+};
+
+static struct configfs_subsystem group_children_subsys = {
+ .su_group = {
+ .cg_item = {
+ .ci_namebuf = "03-group-children",
+ .ci_type = &group_children_type,
+ },
+ },
+};
+
+/* ----------------------------------------------------------------- */
+
+/*
+ * We're now done with our subsystem definitions.
+ * For convenience in this module, here's a list of them all. It
+ * allows the init function to easily register them. Most modules
+ * will only have one subsystem, and will only call register_subsystem
+ * on it directly.
+ */
+static struct configfs_subsystem *example_subsys[] = {
+ &childless_subsys.subsys,
+ &simple_children_subsys,
+ &group_children_subsys,
+ NULL,
+};
+
+static int __init configfs_example_init(void)
+{
+ int ret;
+ int i;
+ struct configfs_subsystem *subsys;
+
+ for (i = 0; example_subsys[i]; i++) {
+ subsys = example_subsys[i];
+
+ config_group_init(&subsys->su_group);
+ mutex_init(&subsys->su_mutex);
+ ret = configfs_register_subsystem(subsys);
+ if (ret) {
+ printk(KERN_ERR "Error %d while registering subsystem %s\n",
+ ret,
+ subsys->su_group.cg_item.ci_namebuf);
+ goto out_unregister;
+ }
+ }
+
+ return 0;
+
+out_unregister:
+ for (i--; i >= 0; i--)
+ configfs_unregister_subsystem(example_subsys[i]);
+
+ return ret;
+}
+
+static void __exit configfs_example_exit(void)
+{
+ int i;
+
+ for (i = 0; example_subsys[i]; i++)
+ configfs_unregister_subsystem(example_subsys[i]);
+}
+
+module_init(configfs_example_init);
+module_exit(configfs_example_exit);
+MODULE_LICENSE("GPL");
diff --git a/kernel/samples/kprobes/jprobe_example.c b/kernel/samples/kprobes/jprobe_example.c
index 9119ac6a8..c285a3b8a 100644
--- a/kernel/samples/kprobes/jprobe_example.c
+++ b/kernel/samples/kprobes/jprobe_example.c
@@ -1,13 +1,13 @@
/*
* Here's a sample kernel module showing the use of jprobes to dump
- * the arguments of do_fork().
+ * the arguments of _do_fork().
*
* For more information on theory of operation of jprobes, see
* Documentation/kprobes.txt
*
* Build and insert the kernel module as done in the kprobe example.
* You will see the trace data in /var/log/messages and on the
- * console whenever do_fork() is invoked to create a new process.
+ * console whenever _do_fork() is invoked to create a new process.
* (Some messages may be suppressed if syslogd is configured to
* eliminate duplicate messages.)
*/
@@ -17,13 +17,13 @@
#include <linux/kprobes.h>
/*
- * Jumper probe for do_fork.
+ * Jumper probe for _do_fork.
* Mirror principle enables access to arguments of the probed routine
* from the probe handler.
*/
-/* Proxy routine having the same arguments as actual do_fork() routine */
-static long jdo_fork(unsigned long clone_flags, unsigned long stack_start,
+/* Proxy routine having the same arguments as actual _do_fork() routine */
+static long j_do_fork(unsigned long clone_flags, unsigned long stack_start,
unsigned long stack_size, int __user *parent_tidptr,
int __user *child_tidptr)
{
@@ -36,9 +36,9 @@ static long jdo_fork(unsigned long clone_flags, unsigned long stack_start,
}
static struct jprobe my_jprobe = {
- .entry = jdo_fork,
+ .entry = j_do_fork,
.kp = {
- .symbol_name = "do_fork",
+ .symbol_name = "_do_fork",
},
};
diff --git a/kernel/samples/kprobes/kprobe_example.c b/kernel/samples/kprobes/kprobe_example.c
index 366db1a9f..727eb21c9 100644
--- a/kernel/samples/kprobes/kprobe_example.c
+++ b/kernel/samples/kprobes/kprobe_example.c
@@ -1,13 +1,13 @@
/*
* NOTE: This example is works on x86 and powerpc.
* Here's a sample kernel module showing the use of kprobes to dump a
- * stack trace and selected registers when do_fork() is called.
+ * stack trace and selected registers when _do_fork() is called.
*
* For more information on theory of operation of kprobes, see
* Documentation/kprobes.txt
*
* You will see the trace data in /var/log/messages and on the console
- * whenever do_fork() is invoked to create a new process.
+ * whenever _do_fork() is invoked to create a new process.
*/
#include <linux/kernel.h>
@@ -16,7 +16,7 @@
/* For each probe you need to allocate a kprobe structure */
static struct kprobe kp = {
- .symbol_name = "do_fork",
+ .symbol_name = "_do_fork",
};
/* kprobe pre_handler: called just before the probed instruction is executed */
diff --git a/kernel/samples/kprobes/kretprobe_example.c b/kernel/samples/kprobes/kretprobe_example.c
index 1041b6731..ebb1d1aed 100644
--- a/kernel/samples/kprobes/kretprobe_example.c
+++ b/kernel/samples/kprobes/kretprobe_example.c
@@ -7,7 +7,7 @@
*
* usage: insmod kretprobe_example.ko func=<func_name>
*
- * If no func_name is specified, do_fork is instrumented
+ * If no func_name is specified, _do_fork is instrumented
*
* For more information on theory of operation of kretprobes, see
* Documentation/kprobes.txt
@@ -25,7 +25,7 @@
#include <linux/limits.h>
#include <linux/sched.h>
-static char func_name[NAME_MAX] = "do_fork";
+static char func_name[NAME_MAX] = "_do_fork";
module_param_string(func, func_name, NAME_MAX, S_IRUGO);
MODULE_PARM_DESC(func, "Function to kretprobe; this module will report the"
" function's execution time");
diff --git a/kernel/samples/pktgen/README.rst b/kernel/samples/pktgen/README.rst
new file mode 100644
index 000000000..8365c4e5c
--- /dev/null
+++ b/kernel/samples/pktgen/README.rst
@@ -0,0 +1,43 @@
+Sample and benchmark scripts for pktgen (packet generator)
+==========================================================
+This directory contains some pktgen sample and benchmark scripts, that
+can easily be copied and adjusted for your own use-case.
+
+General doc is located in kernel: Documentation/networking/pktgen.txt
+
+Helper include files
+====================
+This directory contains two helper shell files, that can be "included"
+by shell source'ing. Namely "functions.sh" and "parameters.sh".
+
+Common parameters
+-----------------
+The parameters.sh file support easy and consistant parameter parsing
+across the sample scripts. Usage example is printed on errors::
+
+ Usage: ./pktgen_sample01_simple.sh [-vx] -i ethX
+ -i : ($DEV) output interface/device (required)
+ -s : ($PKT_SIZE) packet size
+ -d : ($DEST_IP) destination IP
+ -m : ($DST_MAC) destination MAC-addr
+ -t : ($THREADS) threads to start
+ -c : ($SKB_CLONE) SKB clones send before alloc new SKB
+ -b : ($BURST) HW level bursting of SKBs
+ -v : ($VERBOSE) verbose
+ -x : ($DEBUG) debug
+
+The global variable being set is also listed. E.g. the required
+interface/device parameter "-i" sets variable $DEV.
+
+Common functions
+----------------
+The functions.sh file provides; Three different shell functions for
+configuring the different components of pktgen: pg_ctrl(), pg_thread()
+and pg_set().
+
+These functions correspond to pktgens different components.
+ * pg_ctrl() control "pgctrl" (/proc/net/pktgen/pgctrl)
+ * pg_thread() control the kernel threads and binding to devices
+ * pg_set() control setup of individual devices
+
+See sample scripts for usage examples.
diff --git a/kernel/samples/pktgen/functions.sh b/kernel/samples/pktgen/functions.sh
new file mode 100644
index 000000000..205e4cde4
--- /dev/null
+++ b/kernel/samples/pktgen/functions.sh
@@ -0,0 +1,121 @@
+#
+# Common functions used by pktgen scripts
+# - Depending on bash 3 (or higher) syntax
+#
+# Author: Jesper Dangaaard Brouer
+# License: GPL
+
+## -- General shell logging cmds --
+function err() {
+ local exitcode=$1
+ shift
+ echo "ERROR: $@" >&2
+ exit $exitcode
+}
+
+function warn() {
+ echo "WARN : $@" >&2
+}
+
+function info() {
+ if [[ -n "$VERBOSE" ]]; then
+ echo "INFO : $@" >&2
+ fi
+}
+
+## -- Pktgen proc config commands -- ##
+export PROC_DIR=/proc/net/pktgen
+#
+# Three different shell functions for configuring the different
+# components of pktgen:
+# pg_ctrl(), pg_thread() and pg_set().
+#
+# These functions correspond to pktgens different components.
+# * pg_ctrl() control "pgctrl" (/proc/net/pktgen/pgctrl)
+# * pg_thread() control the kernel threads and binding to devices
+# * pg_set() control setup of individual devices
+function pg_ctrl() {
+ local proc_file="pgctrl"
+ proc_cmd ${proc_file} "$@"
+}
+
+function pg_thread() {
+ local thread=$1
+ local proc_file="kpktgend_${thread}"
+ shift
+ proc_cmd ${proc_file} "$@"
+}
+
+function pg_set() {
+ local dev=$1
+ local proc_file="$dev"
+ shift
+ proc_cmd ${proc_file} "$@"
+}
+
+# More generic replacement for pgset(), that does not depend on global
+# variable for proc file.
+function proc_cmd() {
+ local result
+ local proc_file=$1
+ # after shift, the remaining args are contained in $@
+ shift
+ local proc_ctrl=${PROC_DIR}/$proc_file
+ if [[ ! -e "$proc_ctrl" ]]; then
+ err 3 "proc file:$proc_ctrl does not exists (dev added to thread?)"
+ else
+ if [[ ! -w "$proc_ctrl" ]]; then
+ err 4 "proc file:$proc_ctrl not writable, not root?!"
+ fi
+ fi
+
+ if [[ "$DEBUG" == "yes" ]]; then
+ echo "cmd: $@ > $proc_ctrl"
+ fi
+ # Quoting of "$@" is important for space expansion
+ echo "$@" > "$proc_ctrl"
+ local status=$?
+
+ result=$(grep "Result: OK:" $proc_ctrl)
+ # Due to pgctrl, cannot use exit code $? from grep
+ if [[ "$result" == "" ]]; then
+ grep "Result:" $proc_ctrl >&2
+ fi
+ if (( $status != 0 )); then
+ err 5 "Write error($status) occurred cmd: \"$@ > $proc_ctrl\""
+ fi
+}
+
+# Old obsolete "pgset" function, with slightly improved err handling
+function pgset() {
+ local result
+
+ if [[ "$DEBUG" == "yes" ]]; then
+ echo "cmd: $1 > $PGDEV"
+ fi
+ echo $1 > $PGDEV
+ local status=$?
+
+ result=`cat $PGDEV | fgrep "Result: OK:"`
+ if [[ "$result" == "" ]]; then
+ cat $PGDEV | fgrep Result:
+ fi
+ if (( $status != 0 )); then
+ err 5 "Write error($status) occurred cmd: \"$1 > $PGDEV\""
+ fi
+}
+
+## -- General shell tricks --
+
+function root_check_run_with_sudo() {
+ # Trick so, program can be run as normal user, will just use "sudo"
+ # call as root_check_run_as_sudo "$@"
+ if [ "$EUID" -ne 0 ]; then
+ if [ -x $0 ]; then # Directly executable use sudo
+ info "Not root, running with sudo"
+ sudo "$0" "$@"
+ exit $?
+ fi
+ err 4 "cannot perform sudo run of $0"
+ fi
+}
diff --git a/kernel/samples/pktgen/parameters.sh b/kernel/samples/pktgen/parameters.sh
new file mode 100644
index 000000000..33b70fdd5
--- /dev/null
+++ b/kernel/samples/pktgen/parameters.sh
@@ -0,0 +1,97 @@
+#
+# Common parameter parsing for pktgen scripts
+#
+
+function usage() {
+ echo ""
+ echo "Usage: $0 [-vx] -i ethX"
+ echo " -i : (\$DEV) output interface/device (required)"
+ echo " -s : (\$PKT_SIZE) packet size"
+ echo " -d : (\$DEST_IP) destination IP"
+ echo " -m : (\$DST_MAC) destination MAC-addr"
+ echo " -t : (\$THREADS) threads to start"
+ echo " -c : (\$SKB_CLONE) SKB clones send before alloc new SKB"
+ echo " -b : (\$BURST) HW level bursting of SKBs"
+ echo " -v : (\$VERBOSE) verbose"
+ echo " -x : (\$DEBUG) debug"
+ echo ""
+}
+
+## --- Parse command line arguments / parameters ---
+## echo "Commandline options:"
+while getopts "s:i:d:m:t:c:b:vxh" option; do
+ case $option in
+ i) # interface
+ export DEV=$OPTARG
+ info "Output device set to: DEV=$DEV"
+ ;;
+ s)
+ export PKT_SIZE=$OPTARG
+ info "Packet size set to: PKT_SIZE=$PKT_SIZE bytes"
+ ;;
+ d) # destination IP
+ export DEST_IP=$OPTARG
+ info "Destination IP set to: DEST_IP=$DEST_IP"
+ ;;
+ m) # MAC
+ export DST_MAC=$OPTARG
+ info "Destination MAC set to: DST_MAC=$DST_MAC"
+ ;;
+ t)
+ export THREADS=$OPTARG
+ export CPU_THREADS=$OPTARG
+ let "CPU_THREADS -= 1"
+ info "Number of threads to start: $THREADS (0 to $CPU_THREADS)"
+ ;;
+ c)
+ export CLONE_SKB=$OPTARG
+ info "CLONE_SKB=$CLONE_SKB"
+ ;;
+ b)
+ export BURST=$OPTARG
+ info "SKB bursting: BURST=$BURST"
+ ;;
+ v)
+ export VERBOSE=yes
+ info "Verbose mode: VERBOSE=$VERBOSE"
+ ;;
+ x)
+ export DEBUG=yes
+ info "Debug mode: DEBUG=$DEBUG"
+ ;;
+ h|?|*)
+ usage;
+ err 2 "[ERROR] Unknown parameters!!!"
+ esac
+done
+shift $(( $OPTIND - 1 ))
+
+if [ -z "$PKT_SIZE" ]; then
+ # NIC adds 4 bytes CRC
+ export PKT_SIZE=60
+ info "Default packet size set to: set to: $PKT_SIZE bytes"
+fi
+
+if [ -z "$THREADS" ]; then
+ # Zero CPU threads means one thread, because CPU numbers are zero indexed
+ export CPU_THREADS=0
+ export THREADS=1
+fi
+
+if [ -z "$DEV" ]; then
+ usage
+ err 2 "Please specify output device"
+fi
+
+if [ -z "$DST_MAC" ]; then
+ warn "Missing destination MAC address"
+fi
+
+if [ -z "$DEST_IP" ]; then
+ warn "Missing destination IP address"
+fi
+
+if [ ! -d /proc/net/pktgen ]; then
+ info "Loading kernel module: pktgen"
+ modprobe pktgen
+fi
diff --git a/kernel/samples/pktgen/pktgen.conf-1-1 b/kernel/samples/pktgen/pktgen.conf-1-1
deleted file mode 100755
index f91daad9e..000000000
--- a/kernel/samples/pktgen/pktgen.conf-1-1
+++ /dev/null
@@ -1,59 +0,0 @@
-#!/bin/bash
-
-#modprobe pktgen
-
-
-function pgset() {
- local result
-
- echo $1 > $PGDEV
-
- result=`cat $PGDEV | fgrep "Result: OK:"`
- if [ "$result" = "" ]; then
- cat $PGDEV | fgrep Result:
- fi
-}
-
-# Config Start Here -----------------------------------------------------------
-
-
-# thread config
-# Each CPU has its own thread. One CPU example. We add eth1.
-
-PGDEV=/proc/net/pktgen/kpktgend_0
- echo "Removing all devices"
- pgset "rem_device_all"
- echo "Adding eth1"
- pgset "add_device eth1"
-
-
-# device config
-# delay 0 means maximum speed.
-
-CLONE_SKB="clone_skb 1000000"
-# NIC adds 4 bytes CRC
-PKT_SIZE="pkt_size 60"
-
-# COUNT 0 means forever
-#COUNT="count 0"
-COUNT="count 10000000"
-DELAY="delay 0"
-
-PGDEV=/proc/net/pktgen/eth1
- echo "Configuring $PGDEV"
- pgset "$COUNT"
- pgset "$CLONE_SKB"
- pgset "$PKT_SIZE"
- pgset "$DELAY"
- pgset "dst 10.10.11.2"
- pgset "dst_mac 00:04:23:08:91:dc"
-
-
-# Time to run
-PGDEV=/proc/net/pktgen/pgctrl
-
- echo "Running... ctrl^C to stop"
- trap true INT
- pgset "start"
- echo "Done"
- cat /proc/net/pktgen/eth1
diff --git a/kernel/samples/pktgen/pktgen.conf-2-1 b/kernel/samples/pktgen/pktgen.conf-2-1
deleted file mode 100755
index e108e97d6..000000000
--- a/kernel/samples/pktgen/pktgen.conf-2-1
+++ /dev/null
@@ -1,66 +0,0 @@
-#!/bin/bash
-
-#modprobe pktgen
-
-
-function pgset() {
- local result
-
- echo $1 > $PGDEV
-
- result=`cat $PGDEV | fgrep "Result: OK:"`
- if [ "$result" = "" ]; then
- cat $PGDEV | fgrep Result:
- fi
-}
-
-# Config Start Here -----------------------------------------------------------
-
-
-# thread config
-# Each CPU has its own thread. Two CPU example. We add eth1 to the first
-# and leave the second idle.
-
-PGDEV=/proc/net/pktgen/kpktgend_0
- echo "Removing all devices"
- pgset "rem_device_all"
- echo "Adding eth1"
- pgset "add_device eth1"
-
-# We need to remove old config since we dont use this thread. We can only
-# one NIC on one CPU due to affinity reasons.
-
-PGDEV=/proc/net/pktgen/kpktgend_1
- echo "Removing all devices"
- pgset "rem_device_all"
-
-# device config
-# delay 0 means maximum speed.
-
-CLONE_SKB="clone_skb 1000000"
-# NIC adds 4 bytes CRC
-PKT_SIZE="pkt_size 60"
-
-# COUNT 0 means forever
-#COUNT="count 0"
-COUNT="count 10000000"
-DELAY="delay 0"
-
-PGDEV=/proc/net/pktgen/eth1
- echo "Configuring $PGDEV"
- pgset "$COUNT"
- pgset "$CLONE_SKB"
- pgset "$PKT_SIZE"
- pgset "$DELAY"
- pgset "dst 10.10.11.2"
- pgset "dst_mac 00:04:23:08:91:dc"
-
-
-# Time to run
-PGDEV=/proc/net/pktgen/pgctrl
-
- echo "Running... ctrl^C to stop"
- trap true INT
- pgset "start"
- echo "Done"
- cat /proc/net/pktgen/eth1
diff --git a/kernel/samples/pktgen/pktgen.conf-2-2 b/kernel/samples/pktgen/pktgen.conf-2-2
deleted file mode 100755
index acea15503..000000000
--- a/kernel/samples/pktgen/pktgen.conf-2-2
+++ /dev/null
@@ -1,73 +0,0 @@
-#!/bin/bash
-
-#modprobe pktgen
-
-
-function pgset() {
- local result
-
- echo $1 > $PGDEV
-
- result=`cat $PGDEV | fgrep "Result: OK:"`
- if [ "$result" = "" ]; then
- cat $PGDEV | fgrep Result:
- fi
-}
-
-# Config Start Here -----------------------------------------------------------
-
-
-# thread config
-# Each CPU has its own thread. Two CPU example. We add eth1, eth2 respectively.
-
-PGDEV=/proc/net/pktgen/kpktgend_0
- echo "Removing all devices"
- pgset "rem_device_all"
- echo "Adding eth1"
- pgset "add_device eth1"
-
-PGDEV=/proc/net/pktgen/kpktgend_1
- echo "Removing all devices"
- pgset "rem_device_all"
- echo "Adding eth2"
- pgset "add_device eth2"
-
-
-# device config
-# delay 0 means maximum speed.
-
-CLONE_SKB="clone_skb 1000000"
-# NIC adds 4 bytes CRC
-PKT_SIZE="pkt_size 60"
-
-# COUNT 0 means forever
-#COUNT="count 0"
-COUNT="count 10000000"
-DELAY="delay 0"
-
-PGDEV=/proc/net/pktgen/eth1
- echo "Configuring $PGDEV"
- pgset "$COUNT"
- pgset "$CLONE_SKB"
- pgset "$PKT_SIZE"
- pgset "$DELAY"
- pgset "dst 10.10.11.2"
- pgset "dst_mac 00:04:23:08:91:dc"
-
-PGDEV=/proc/net/pktgen/eth2
- echo "Configuring $PGDEV"
- pgset "$COUNT"
- pgset "$CLONE_SKB"
- pgset "$PKT_SIZE"
- pgset "$DELAY"
- pgset "dst 192.168.2.2"
- pgset "dst_mac 00:04:23:08:91:de"
-
-# Time to run
-PGDEV=/proc/net/pktgen/pgctrl
-
- echo "Running... ctrl^C to stop"
- trap true INT
- pgset "start"
- echo "Done"
- cat /proc/net/pktgen/eth1 /proc/net/pktgen/eth2
diff --git a/kernel/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh b/kernel/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh
new file mode 100755
index 000000000..cb1590331
--- /dev/null
+++ b/kernel/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh
@@ -0,0 +1,86 @@
+#!/bin/bash
+#
+# Benchmark script:
+# - developed for benchmarking ingress qdisc path
+#
+# Script for injecting packets into RX path of the stack with pktgen
+# "xmit_mode netif_receive". With an invalid dst_mac this will only
+# measure the ingress code path as packets gets dropped in ip_rcv().
+#
+# This script don't really need any hardware. It benchmarks software
+# RX path just after NIC driver level. With bursting is also
+# "removes" the SKB alloc/free overhead.
+#
+# Setup scenarios for measuring ingress qdisc (with invalid dst_mac):
+# ------------------------------------------------------------------
+# (1) no ingress (uses static_key_false(&ingress_needed))
+#
+# (2) ingress on other dev (change ingress_needed and calls
+# handle_ing() but exit early)
+#
+# config: tc qdisc add dev $SOMEDEV handle ffff: ingress
+#
+# (3) ingress on this dev, handle_ing() -> tc_classify()
+#
+# config: tc qdisc add dev $DEV handle ffff: ingress
+#
+# (4) ingress on this dev + drop at u32 classifier/action.
+#
+basedir=`dirname $0`
+source ${basedir}/functions.sh
+root_check_run_with_sudo "$@"
+
+# Parameter parsing via include
+source ${basedir}/parameters.sh
+# Using invalid DST_MAC will cause the packets to get dropped in
+# ip_rcv() which is part of the test
+[ -z "$DEST_IP" ] && DEST_IP="198.18.0.42"
+[ -z "$DST_MAC" ] && DST_MAC="90:e2:ba:ff:ff:ff"
+[ -z "$BURST" ] && BURST=1024
+
+# Base Config
+DELAY="0" # Zero means max speed
+COUNT="10000000" # Zero means indefinitely
+
+# General cleanup everything since last run
+pg_ctrl "reset"
+
+# Threads are specified with parameter -t value in $THREADS
+for ((thread = 0; thread < $THREADS; thread++)); do
+ # The device name is extended with @name, using thread number to
+ # make then unique, but any name will do.
+ dev=${DEV}@${thread}
+
+ # Add remove all other devices and add_device $dev to thread
+ pg_thread $thread "rem_device_all"
+ pg_thread $thread "add_device" $dev
+
+ # Base config of dev
+ pg_set $dev "flag QUEUE_MAP_CPU"
+ pg_set $dev "count $COUNT"
+ pg_set $dev "pkt_size $PKT_SIZE"
+ pg_set $dev "delay $DELAY"
+ pg_set $dev "flag NO_TIMESTAMP"
+
+ # Destination
+ pg_set $dev "dst_mac $DST_MAC"
+ pg_set $dev "dst $DEST_IP"
+
+ # Inject packet into RX path of stack
+ pg_set $dev "xmit_mode netif_receive"
+
+ # Burst allow us to avoid measuring SKB alloc/free overhead
+ pg_set $dev "burst $BURST"
+done
+
+# start_run
+echo "Running... ctrl^C to stop" >&2
+pg_ctrl "start"
+echo "Done" >&2
+
+# Print results
+for ((thread = 0; thread < $THREADS; thread++)); do
+ dev=${DEV}@${thread}
+ echo "Device: $dev"
+ cat /proc/net/pktgen/$dev | grep -A2 "Result:"
+done
diff --git a/kernel/samples/pktgen/pktgen_sample01_simple.sh b/kernel/samples/pktgen/pktgen_sample01_simple.sh
new file mode 100755
index 000000000..8c9d318c2
--- /dev/null
+++ b/kernel/samples/pktgen/pktgen_sample01_simple.sh
@@ -0,0 +1,71 @@
+#!/bin/bash
+#
+# Simple example:
+# * pktgen sending with single thread and single interface
+# * flow variation via random UDP source port
+#
+basedir=`dirname $0`
+source ${basedir}/functions.sh
+root_check_run_with_sudo "$@"
+
+# Parameter parsing via include
+# - go look in parameters.sh to see which setting are avail
+# - required param is the interface "-i" stored in $DEV
+source ${basedir}/parameters.sh
+#
+# Set some default params, if they didn't get set
+[ -z "$DEST_IP" ] && DEST_IP="198.18.0.42"
+[ -z "$CLONE_SKB" ] && CLONE_SKB="0"
+# Example enforce param "-m" for dst_mac
+[ -z "$DST_MAC" ] && usage && err 2 "Must specify -m dst_mac"
+
+# Base Config
+DELAY="0" # Zero means max speed
+COUNT="100000" # Zero means indefinitely
+
+# Flow variation random source port between min and max
+UDP_MIN=9
+UDP_MAX=109
+
+# General cleanup everything since last run
+# (especially important if other threads were configured by other scripts)
+pg_ctrl "reset"
+
+# Add remove all other devices and add_device $DEV to thread 0
+thread=0
+pg_thread $thread "rem_device_all"
+pg_thread $thread "add_device" $DEV
+
+# How many packets to send (zero means indefinitely)
+pg_set $DEV "count $COUNT"
+
+# Reduce alloc cost by sending same SKB many times
+# - this obviously affects the randomness within the packet
+pg_set $DEV "clone_skb $CLONE_SKB"
+
+# Set packet size
+pg_set $DEV "pkt_size $PKT_SIZE"
+
+# Delay between packets (zero means max speed)
+pg_set $DEV "delay $DELAY"
+
+# Flag example disabling timestamping
+pg_set $DEV "flag NO_TIMESTAMP"
+
+# Destination
+pg_set $DEV "dst_mac $DST_MAC"
+pg_set $DEV "dst $DEST_IP"
+
+# Setup random UDP port src range
+pg_set $DEV "flag UDPSRC_RND"
+pg_set $DEV "udp_src_min $UDP_MIN"
+pg_set $DEV "udp_src_max $UDP_MAX"
+
+# start_run
+echo "Running... ctrl^C to stop" >&2
+pg_ctrl "start"
+echo "Done" >&2
+
+# Print results
+echo "Result device: $DEV"
+cat /proc/net/pktgen/$DEV
diff --git a/kernel/samples/pktgen/pktgen_sample02_multiqueue.sh b/kernel/samples/pktgen/pktgen_sample02_multiqueue.sh
new file mode 100755
index 000000000..32467aea8
--- /dev/null
+++ b/kernel/samples/pktgen/pktgen_sample02_multiqueue.sh
@@ -0,0 +1,75 @@
+#!/bin/bash
+#
+# Multiqueue: Using pktgen threads for sending on multiple CPUs
+# * adding devices to kernel threads
+# * notice the naming scheme for keeping device names unique
+# * nameing scheme: dev@thread_number
+# * flow variation via random UDP source port
+#
+basedir=`dirname $0`
+source ${basedir}/functions.sh
+root_check_run_with_sudo "$@"
+#
+# Required param: -i dev in $DEV
+source ${basedir}/parameters.sh
+
+# Base Config
+DELAY="0" # Zero means max speed
+COUNT="100000" # Zero means indefinitely
+[ -z "$CLONE_SKB" ] && CLONE_SKB="0"
+
+# Flow variation random source port between min and max
+UDP_MIN=9
+UDP_MAX=109
+
+# (example of setting default params in your script)
+[ -z "$DEST_IP" ] && DEST_IP="198.18.0.42"
+[ -z "$DST_MAC" ] && DST_MAC="90:e2:ba:ff:ff:ff"
+
+# General cleanup everything since last run
+pg_ctrl "reset"
+
+# Threads are specified with parameter -t value in $THREADS
+for ((thread = 0; thread < $THREADS; thread++)); do
+ # The device name is extended with @name, using thread number to
+ # make then unique, but any name will do.
+ dev=${DEV}@${thread}
+
+ # Add remove all other devices and add_device $dev to thread
+ pg_thread $thread "rem_device_all"
+ pg_thread $thread "add_device" $dev
+
+ # Notice config queue to map to cpu (mirrors smp_processor_id())
+ # It is beneficial to map IRQ /proc/irq/*/smp_affinity 1:1 to CPU number
+ pg_set $dev "flag QUEUE_MAP_CPU"
+
+ # Base config of dev
+ pg_set $dev "count $COUNT"
+ pg_set $dev "clone_skb $CLONE_SKB"
+ pg_set $dev "pkt_size $PKT_SIZE"
+ pg_set $dev "delay $DELAY"
+
+ # Flag example disabling timestamping
+ pg_set $dev "flag NO_TIMESTAMP"
+
+ # Destination
+ pg_set $dev "dst_mac $DST_MAC"
+ pg_set $dev "dst $DEST_IP"
+
+ # Setup random UDP port src range
+ pg_set $dev "flag UDPSRC_RND"
+ pg_set $dev "udp_src_min $UDP_MIN"
+ pg_set $dev "udp_src_max $UDP_MAX"
+done
+
+# start_run
+echo "Running... ctrl^C to stop" >&2
+pg_ctrl "start"
+echo "Done" >&2
+
+# Print results
+for ((thread = 0; thread < $THREADS; thread++)); do
+ dev=${DEV}@${thread}
+ echo "Device: $dev"
+ cat /proc/net/pktgen/$dev | grep -A2 "Result:"
+done
diff --git a/kernel/samples/pktgen/pktgen_sample03_burst_single_flow.sh b/kernel/samples/pktgen/pktgen_sample03_burst_single_flow.sh
new file mode 100755
index 000000000..775f5d0a1
--- /dev/null
+++ b/kernel/samples/pktgen/pktgen_sample03_burst_single_flow.sh
@@ -0,0 +1,82 @@
+#!/bin/bash
+#
+# Script for max single flow performance
+# - If correctly tuned[1], single CPU 10G wirespeed small pkts is possible[2]
+#
+# Using pktgen "burst" option (use -b $N)
+# - To boost max performance
+# - Avail since: kernel v3.18
+# * commit 38b2cf2982dc73 ("net: pktgen: packet bursting via skb->xmit_more")
+# - This avoids writing the HW tailptr on every driver xmit
+# - The performance boost is impressive, see commit and blog [2]
+#
+# Notice: On purpose generates a single (UDP) flow towards target,
+# reason behind this is to only overload/activate a single CPU on
+# target host. And no randomness for pktgen also makes it faster.
+#
+# Tuning see:
+# [1] http://netoptimizer.blogspot.dk/2014/06/pktgen-for-network-overload-testing.html
+# [2] http://netoptimizer.blogspot.dk/2014/10/unlocked-10gbps-tx-wirespeed-smallest.html
+#
+basedir=`dirname $0`
+source ${basedir}/functions.sh
+root_check_run_with_sudo "$@"
+
+# Parameter parsing via include
+source ${basedir}/parameters.sh
+# Set some default params, if they didn't get set
+[ -z "$DEST_IP" ] && DEST_IP="198.18.0.42"
+[ -z "$DST_MAC" ] && DST_MAC="90:e2:ba:ff:ff:ff"
+[ -z "$BURST" ] && BURST=32
+[ -z "$CLONE_SKB" ] && CLONE_SKB="100000"
+
+# Base Config
+DELAY="0" # Zero means max speed
+COUNT="0" # Zero means indefinitely
+
+# General cleanup everything since last run
+pg_ctrl "reset"
+
+# Threads are specified with parameter -t value in $THREADS
+for ((thread = 0; thread < $THREADS; thread++)); do
+ dev=${DEV}@${thread}
+
+ # Add remove all other devices and add_device $dev to thread
+ pg_thread $thread "rem_device_all"
+ pg_thread $thread "add_device" $dev
+
+ # Base config
+ pg_set $dev "flag QUEUE_MAP_CPU"
+ pg_set $dev "count $COUNT"
+ pg_set $dev "clone_skb $CLONE_SKB"
+ pg_set $dev "pkt_size $PKT_SIZE"
+ pg_set $dev "delay $DELAY"
+ pg_set $dev "flag NO_TIMESTAMP"
+
+ # Destination
+ pg_set $dev "dst_mac $DST_MAC"
+ pg_set $dev "dst $DEST_IP"
+
+ # Setup burst, for easy testing -b 0 disable bursting
+ # (internally in pktgen default and minimum burst=1)
+ if [[ ${BURST} -ne 0 ]]; then
+ pg_set $dev "burst $BURST"
+ else
+ info "$dev: Not using burst"
+ fi
+done
+
+# Run if user hits control-c
+function control_c() {
+ # Print results
+ for ((thread = 0; thread < $THREADS; thread++)); do
+ dev=${DEV}@${thread}
+ echo "Device: $dev"
+ cat /proc/net/pktgen/$dev | grep -A2 "Result:"
+ done
+}
+# trap keyboard interrupt (Ctrl-C)
+trap control_c SIGINT
+
+echo "Running... ctrl^C to stop" >&2
+pg_ctrl "start"
diff --git a/kernel/samples/trace_events/trace-events-sample.h b/kernel/samples/trace_events/trace-events-sample.h
index 125d6402f..d6b75bb49 100644
--- a/kernel/samples/trace_events/trace-events-sample.h
+++ b/kernel/samples/trace_events/trace-events-sample.h
@@ -4,14 +4,14 @@
*
* The define_trace.h below will also look for a file name of
* TRACE_SYSTEM.h where TRACE_SYSTEM is what is defined here.
- * In this case, it would look for sample.h
+ * In this case, it would look for sample-trace.h
*
* If the header name will be different than the system name
* (as in this case), then you can override the header name that
* define_trace.h will look up by defining TRACE_INCLUDE_FILE
*
* This file is called trace-events-sample.h but we want the system
- * to be called "sample". Therefore we must define the name of this
+ * to be called "sample-trace". Therefore we must define the name of this
* file:
*
* #define TRACE_INCLUDE_FILE trace-events-sample
@@ -106,7 +106,7 @@
*
* memcpy(__entry->foo, bar, 10);
*
- * __dynamic_array: This is similar to array, but can vary is size from
+ * __dynamic_array: This is similar to array, but can vary its size from
* instance to instance of the tracepoint being called.
* Like __array, this too has three elements (type, name, size);
* type is the type of the element, name is the name of the array.