diff options
Diffstat (limited to 'kernel/samples')
67 files changed, 7501 insertions, 0 deletions
diff --git a/kernel/samples/Kconfig b/kernel/samples/Kconfig new file mode 100644 index 000000000..224ebb46b --- /dev/null +++ b/kernel/samples/Kconfig @@ -0,0 +1,73 @@ +menuconfig SAMPLES + bool "Sample kernel code" + help + You can build and test sample kernel code here. + +if SAMPLES + +config SAMPLE_TRACE_EVENTS + tristate "Build trace_events examples -- loadable modules only" + depends on EVENT_TRACING && m + help + This build trace event example modules. + +config SAMPLE_KOBJECT + tristate "Build kobject examples -- loadable modules only" + depends on m + help + This config option will allow you to build a number of + different kobject sample modules showing how to use kobjects, + ksets, and ktypes properly. + + If in doubt, say "N" here. + +config SAMPLE_KPROBES + tristate "Build kprobes examples -- loadable modules only" + depends on KPROBES && m + help + This build several kprobes example modules. + +config SAMPLE_KRETPROBES + tristate "Build kretprobes example -- loadable modules only" + default m + depends on SAMPLE_KPROBES && KRETPROBES + +config SAMPLE_HW_BREAKPOINT + tristate "Build kernel hardware breakpoint examples -- loadable module only" + depends on HAVE_HW_BREAKPOINT && m + help + This builds kernel hardware breakpoint example modules. + +config SAMPLE_KFIFO + tristate "Build kfifo examples -- loadable modules only" + depends on m + help + This config option will allow you to build a number of + different kfifo sample modules showing how to use the + generic kfifo API. + + If in doubt, say "N" here. + +config SAMPLE_KDB + tristate "Build kdb command example -- loadable modules only" + depends on KGDB_KDB && m + help + Build an example of how to dynamically add the hello + command to the kdb shell. + +config SAMPLE_RPMSG_CLIENT + tristate "Build rpmsg client sample -- loadable modules only" + depends on RPMSG && m + help + Build an rpmsg client sample driver, which demonstrates how + to communicate with an AMP-configured remote processor over + the rpmsg bus. + +config SAMPLE_LIVEPATCH + tristate "Build live patching sample -- loadable modules only" + depends on LIVEPATCH && m + help + Builds a sample live patch that replaces the procfs handler + for /proc/cmdline to print "this has been live patched". + +endif # SAMPLES diff --git a/kernel/samples/Makefile b/kernel/samples/Makefile new file mode 100644 index 000000000..f00257bcc --- /dev/null +++ b/kernel/samples/Makefile @@ -0,0 +1,4 @@ +# Makefile for Linux samples code + +obj-$(CONFIG_SAMPLES) += kobject/ kprobes/ trace_events/ livepatch/ \ + hw_breakpoint/ kfifo/ kdb/ hidraw/ rpmsg/ seccomp/ diff --git a/kernel/samples/bpf/Makefile b/kernel/samples/bpf/Makefile new file mode 100644 index 000000000..76e3458a5 --- /dev/null +++ b/kernel/samples/bpf/Makefile @@ -0,0 +1,50 @@ +# kbuild trick to avoid linker error. Can be omitted if a module is built. +obj- := dummy.o + +# List of programs to build +hostprogs-y := test_verifier test_maps +hostprogs-y += sock_example +hostprogs-y += sockex1 +hostprogs-y += sockex2 +hostprogs-y += tracex1 +hostprogs-y += tracex2 +hostprogs-y += tracex3 +hostprogs-y += tracex4 + +test_verifier-objs := test_verifier.o libbpf.o +test_maps-objs := test_maps.o libbpf.o +sock_example-objs := sock_example.o libbpf.o +sockex1-objs := bpf_load.o libbpf.o sockex1_user.o +sockex2-objs := bpf_load.o libbpf.o sockex2_user.o +tracex1-objs := bpf_load.o libbpf.o tracex1_user.o +tracex2-objs := bpf_load.o libbpf.o tracex2_user.o +tracex3-objs := bpf_load.o libbpf.o tracex3_user.o +tracex4-objs := bpf_load.o libbpf.o tracex4_user.o + +# Tell kbuild to always build the programs +always := $(hostprogs-y) +always += sockex1_kern.o +always += sockex2_kern.o +always += tracex1_kern.o +always += tracex2_kern.o +always += tracex3_kern.o +always += tracex4_kern.o +always += tcbpf1_kern.o + +HOSTCFLAGS += -I$(objtree)/usr/include + +HOSTCFLAGS_bpf_load.o += -I$(objtree)/usr/include -Wno-unused-variable +HOSTLOADLIBES_sockex1 += -lelf +HOSTLOADLIBES_sockex2 += -lelf +HOSTLOADLIBES_tracex1 += -lelf +HOSTLOADLIBES_tracex2 += -lelf +HOSTLOADLIBES_tracex3 += -lelf +HOSTLOADLIBES_tracex4 += -lelf -lrt + +# point this to your LLVM backend with bpf support +LLC=$(srctree)/tools/bpf/llvm/bld/Debug+Asserts/bin/llc + +%.o: %.c + clang $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) \ + -D__KERNEL__ -Wno-unused-value -Wno-pointer-sign \ + -O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf -filetype=obj -o $@ diff --git a/kernel/samples/bpf/bpf_helpers.h b/kernel/samples/bpf/bpf_helpers.h new file mode 100644 index 000000000..f960b5fb3 --- /dev/null +++ b/kernel/samples/bpf/bpf_helpers.h @@ -0,0 +1,53 @@ +#ifndef __BPF_HELPERS_H +#define __BPF_HELPERS_H + +/* helper macro to place programs, maps, license in + * different sections in elf_bpf file. Section names + * are interpreted by elf_bpf loader + */ +#define SEC(NAME) __attribute__((section(NAME), used)) + +/* helper functions called from eBPF programs written in C */ +static void *(*bpf_map_lookup_elem)(void *map, void *key) = + (void *) BPF_FUNC_map_lookup_elem; +static int (*bpf_map_update_elem)(void *map, void *key, void *value, + unsigned long long flags) = + (void *) BPF_FUNC_map_update_elem; +static int (*bpf_map_delete_elem)(void *map, void *key) = + (void *) BPF_FUNC_map_delete_elem; +static int (*bpf_probe_read)(void *dst, int size, void *unsafe_ptr) = + (void *) BPF_FUNC_probe_read; +static unsigned long long (*bpf_ktime_get_ns)(void) = + (void *) BPF_FUNC_ktime_get_ns; +static int (*bpf_trace_printk)(const char *fmt, int fmt_size, ...) = + (void *) BPF_FUNC_trace_printk; + +/* llvm builtin functions that eBPF C program may use to + * emit BPF_LD_ABS and BPF_LD_IND instructions + */ +struct sk_buff; +unsigned long long load_byte(void *skb, + unsigned long long off) asm("llvm.bpf.load.byte"); +unsigned long long load_half(void *skb, + unsigned long long off) asm("llvm.bpf.load.half"); +unsigned long long load_word(void *skb, + unsigned long long off) asm("llvm.bpf.load.word"); + +/* a helper structure used by eBPF C program + * to describe map attributes to elf_bpf loader + */ +struct bpf_map_def { + unsigned int type; + unsigned int key_size; + unsigned int value_size; + unsigned int max_entries; +}; + +static int (*bpf_skb_store_bytes)(void *ctx, int off, void *from, int len, int flags) = + (void *) BPF_FUNC_skb_store_bytes; +static int (*bpf_l3_csum_replace)(void *ctx, int off, int from, int to, int flags) = + (void *) BPF_FUNC_l3_csum_replace; +static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int flags) = + (void *) BPF_FUNC_l4_csum_replace; + +#endif diff --git a/kernel/samples/bpf/bpf_load.c b/kernel/samples/bpf/bpf_load.c new file mode 100644 index 000000000..38dac5a53 --- /dev/null +++ b/kernel/samples/bpf/bpf_load.c @@ -0,0 +1,312 @@ +#include <stdio.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <libelf.h> +#include <gelf.h> +#include <errno.h> +#include <unistd.h> +#include <string.h> +#include <stdbool.h> +#include <stdlib.h> +#include <linux/bpf.h> +#include <linux/filter.h> +#include <linux/perf_event.h> +#include <sys/syscall.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <poll.h> +#include "libbpf.h" +#include "bpf_helpers.h" +#include "bpf_load.h" + +#define DEBUGFS "/sys/kernel/debug/tracing/" + +static char license[128]; +static int kern_version; +static bool processed_sec[128]; +int map_fd[MAX_MAPS]; +int prog_fd[MAX_PROGS]; +int event_fd[MAX_PROGS]; +int prog_cnt; + +static int load_and_attach(const char *event, struct bpf_insn *prog, int size) +{ + bool is_socket = strncmp(event, "socket", 6) == 0; + bool is_kprobe = strncmp(event, "kprobe/", 7) == 0; + bool is_kretprobe = strncmp(event, "kretprobe/", 10) == 0; + enum bpf_prog_type prog_type; + char buf[256]; + int fd, efd, err, id; + struct perf_event_attr attr = {}; + + attr.type = PERF_TYPE_TRACEPOINT; + attr.sample_type = PERF_SAMPLE_RAW; + attr.sample_period = 1; + attr.wakeup_events = 1; + + if (is_socket) { + prog_type = BPF_PROG_TYPE_SOCKET_FILTER; + } else if (is_kprobe || is_kretprobe) { + prog_type = BPF_PROG_TYPE_KPROBE; + } else { + printf("Unknown event '%s'\n", event); + return -1; + } + + if (is_kprobe || is_kretprobe) { + if (is_kprobe) + event += 7; + else + event += 10; + + snprintf(buf, sizeof(buf), + "echo '%c:%s %s' >> /sys/kernel/debug/tracing/kprobe_events", + is_kprobe ? 'p' : 'r', event, event); + err = system(buf); + if (err < 0) { + printf("failed to create kprobe '%s' error '%s'\n", + event, strerror(errno)); + return -1; + } + } + + fd = bpf_prog_load(prog_type, prog, size, license, kern_version); + + if (fd < 0) { + printf("bpf_prog_load() err=%d\n%s", errno, bpf_log_buf); + return -1; + } + + prog_fd[prog_cnt++] = fd; + + if (is_socket) + return 0; + + strcpy(buf, DEBUGFS); + strcat(buf, "events/kprobes/"); + strcat(buf, event); + strcat(buf, "/id"); + + efd = open(buf, O_RDONLY, 0); + if (efd < 0) { + printf("failed to open event %s\n", event); + return -1; + } + + err = read(efd, buf, sizeof(buf)); + if (err < 0 || err >= sizeof(buf)) { + printf("read from '%s' failed '%s'\n", event, strerror(errno)); + return -1; + } + + close(efd); + + buf[err] = 0; + id = atoi(buf); + attr.config = id; + + efd = perf_event_open(&attr, -1/*pid*/, 0/*cpu*/, -1/*group_fd*/, 0); + if (efd < 0) { + printf("event %d fd %d err %s\n", id, efd, strerror(errno)); + return -1; + } + event_fd[prog_cnt - 1] = efd; + ioctl(efd, PERF_EVENT_IOC_ENABLE, 0); + ioctl(efd, PERF_EVENT_IOC_SET_BPF, fd); + + return 0; +} + +static int load_maps(struct bpf_map_def *maps, int len) +{ + int i; + + for (i = 0; i < len / sizeof(struct bpf_map_def); i++) { + + map_fd[i] = bpf_create_map(maps[i].type, + maps[i].key_size, + maps[i].value_size, + maps[i].max_entries); + if (map_fd[i] < 0) + return 1; + } + return 0; +} + +static int get_sec(Elf *elf, int i, GElf_Ehdr *ehdr, char **shname, + GElf_Shdr *shdr, Elf_Data **data) +{ + Elf_Scn *scn; + + scn = elf_getscn(elf, i); + if (!scn) + return 1; + + if (gelf_getshdr(scn, shdr) != shdr) + return 2; + + *shname = elf_strptr(elf, ehdr->e_shstrndx, shdr->sh_name); + if (!*shname || !shdr->sh_size) + return 3; + + *data = elf_getdata(scn, 0); + if (!*data || elf_getdata(scn, *data) != NULL) + return 4; + + return 0; +} + +static int parse_relo_and_apply(Elf_Data *data, Elf_Data *symbols, + GElf_Shdr *shdr, struct bpf_insn *insn) +{ + int i, nrels; + + nrels = shdr->sh_size / shdr->sh_entsize; + + for (i = 0; i < nrels; i++) { + GElf_Sym sym; + GElf_Rel rel; + unsigned int insn_idx; + + gelf_getrel(data, i, &rel); + + insn_idx = rel.r_offset / sizeof(struct bpf_insn); + + gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym); + + if (insn[insn_idx].code != (BPF_LD | BPF_IMM | BPF_DW)) { + printf("invalid relo for insn[%d].code 0x%x\n", + insn_idx, insn[insn_idx].code); + return 1; + } + insn[insn_idx].src_reg = BPF_PSEUDO_MAP_FD; + insn[insn_idx].imm = map_fd[sym.st_value / sizeof(struct bpf_map_def)]; + } + + return 0; +} + +int load_bpf_file(char *path) +{ + int fd, i; + Elf *elf; + GElf_Ehdr ehdr; + GElf_Shdr shdr, shdr_prog; + Elf_Data *data, *data_prog, *symbols = NULL; + char *shname, *shname_prog; + + if (elf_version(EV_CURRENT) == EV_NONE) + return 1; + + fd = open(path, O_RDONLY, 0); + if (fd < 0) + return 1; + + elf = elf_begin(fd, ELF_C_READ, NULL); + + if (!elf) + return 1; + + if (gelf_getehdr(elf, &ehdr) != &ehdr) + return 1; + + /* clear all kprobes */ + i = system("echo \"\" > /sys/kernel/debug/tracing/kprobe_events"); + + /* scan over all elf sections to get license and map info */ + for (i = 1; i < ehdr.e_shnum; i++) { + + if (get_sec(elf, i, &ehdr, &shname, &shdr, &data)) + continue; + + if (0) /* helpful for llvm debugging */ + printf("section %d:%s data %p size %zd link %d flags %d\n", + i, shname, data->d_buf, data->d_size, + shdr.sh_link, (int) shdr.sh_flags); + + if (strcmp(shname, "license") == 0) { + processed_sec[i] = true; + memcpy(license, data->d_buf, data->d_size); + } else if (strcmp(shname, "version") == 0) { + processed_sec[i] = true; + if (data->d_size != sizeof(int)) { + printf("invalid size of version section %zd\n", + data->d_size); + return 1; + } + memcpy(&kern_version, data->d_buf, sizeof(int)); + } else if (strcmp(shname, "maps") == 0) { + processed_sec[i] = true; + if (load_maps(data->d_buf, data->d_size)) + return 1; + } else if (shdr.sh_type == SHT_SYMTAB) { + symbols = data; + } + } + + /* load programs that need map fixup (relocations) */ + for (i = 1; i < ehdr.e_shnum; i++) { + + if (get_sec(elf, i, &ehdr, &shname, &shdr, &data)) + continue; + if (shdr.sh_type == SHT_REL) { + struct bpf_insn *insns; + + if (get_sec(elf, shdr.sh_info, &ehdr, &shname_prog, + &shdr_prog, &data_prog)) + continue; + + insns = (struct bpf_insn *) data_prog->d_buf; + + processed_sec[shdr.sh_info] = true; + processed_sec[i] = true; + + if (parse_relo_and_apply(data, symbols, &shdr, insns)) + continue; + + if (memcmp(shname_prog, "kprobe/", 7) == 0 || + memcmp(shname_prog, "kretprobe/", 10) == 0 || + memcmp(shname_prog, "socket", 6) == 0) + load_and_attach(shname_prog, insns, data_prog->d_size); + } + } + + /* load programs that don't use maps */ + for (i = 1; i < ehdr.e_shnum; i++) { + + if (processed_sec[i]) + continue; + + if (get_sec(elf, i, &ehdr, &shname, &shdr, &data)) + continue; + + if (memcmp(shname, "kprobe/", 7) == 0 || + memcmp(shname, "kretprobe/", 10) == 0 || + memcmp(shname, "socket", 6) == 0) + load_and_attach(shname, data->d_buf, data->d_size); + } + + close(fd); + return 0; +} + +void read_trace_pipe(void) +{ + int trace_fd; + + trace_fd = open(DEBUGFS "trace_pipe", O_RDONLY, 0); + if (trace_fd < 0) + return; + + while (1) { + static char buf[4096]; + ssize_t sz; + + sz = read(trace_fd, buf, sizeof(buf)); + if (sz > 0) { + buf[sz] = 0; + puts(buf); + } + } +} diff --git a/kernel/samples/bpf/bpf_load.h b/kernel/samples/bpf/bpf_load.h new file mode 100644 index 000000000..cbd7c2b53 --- /dev/null +++ b/kernel/samples/bpf/bpf_load.h @@ -0,0 +1,27 @@ +#ifndef __BPF_LOAD_H +#define __BPF_LOAD_H + +#define MAX_MAPS 32 +#define MAX_PROGS 32 + +extern int map_fd[MAX_MAPS]; +extern int prog_fd[MAX_PROGS]; +extern int event_fd[MAX_PROGS]; + +/* parses elf file compiled by llvm .c->.o + * . parses 'maps' section and creates maps via BPF syscall + * . parses 'license' section and passes it to syscall + * . parses elf relocations for BPF maps and adjusts BPF_LD_IMM64 insns by + * storing map_fd into insn->imm and marking such insns as BPF_PSEUDO_MAP_FD + * . loads eBPF programs via BPF syscall + * + * One ELF file can contain multiple BPF programs which will be loaded + * and their FDs stored stored in prog_fd array + * + * returns zero on success + */ +int load_bpf_file(char *path); + +void read_trace_pipe(void); + +#endif diff --git a/kernel/samples/bpf/libbpf.c b/kernel/samples/bpf/libbpf.c new file mode 100644 index 000000000..7e1efa7e2 --- /dev/null +++ b/kernel/samples/bpf/libbpf.c @@ -0,0 +1,135 @@ +/* eBPF mini library */ +#include <stdlib.h> +#include <stdio.h> +#include <linux/unistd.h> +#include <unistd.h> +#include <string.h> +#include <linux/netlink.h> +#include <linux/bpf.h> +#include <errno.h> +#include <net/ethernet.h> +#include <net/if.h> +#include <linux/if_packet.h> +#include <arpa/inet.h> +#include "libbpf.h" + +static __u64 ptr_to_u64(void *ptr) +{ + return (__u64) (unsigned long) ptr; +} + +int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size, + int max_entries) +{ + union bpf_attr attr = { + .map_type = map_type, + .key_size = key_size, + .value_size = value_size, + .max_entries = max_entries + }; + + return syscall(__NR_bpf, BPF_MAP_CREATE, &attr, sizeof(attr)); +} + +int bpf_update_elem(int fd, void *key, void *value, unsigned long long flags) +{ + union bpf_attr attr = { + .map_fd = fd, + .key = ptr_to_u64(key), + .value = ptr_to_u64(value), + .flags = flags, + }; + + return syscall(__NR_bpf, BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr)); +} + +int bpf_lookup_elem(int fd, void *key, void *value) +{ + union bpf_attr attr = { + .map_fd = fd, + .key = ptr_to_u64(key), + .value = ptr_to_u64(value), + }; + + return syscall(__NR_bpf, BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr)); +} + +int bpf_delete_elem(int fd, void *key) +{ + union bpf_attr attr = { + .map_fd = fd, + .key = ptr_to_u64(key), + }; + + return syscall(__NR_bpf, BPF_MAP_DELETE_ELEM, &attr, sizeof(attr)); +} + +int bpf_get_next_key(int fd, void *key, void *next_key) +{ + union bpf_attr attr = { + .map_fd = fd, + .key = ptr_to_u64(key), + .next_key = ptr_to_u64(next_key), + }; + + return syscall(__NR_bpf, BPF_MAP_GET_NEXT_KEY, &attr, sizeof(attr)); +} + +#define ROUND_UP(x, n) (((x) + (n) - 1u) & ~((n) - 1u)) + +char bpf_log_buf[LOG_BUF_SIZE]; + +int bpf_prog_load(enum bpf_prog_type prog_type, + const struct bpf_insn *insns, int prog_len, + const char *license, int kern_version) +{ + union bpf_attr attr = { + .prog_type = prog_type, + .insns = ptr_to_u64((void *) insns), + .insn_cnt = prog_len / sizeof(struct bpf_insn), + .license = ptr_to_u64((void *) license), + .log_buf = ptr_to_u64(bpf_log_buf), + .log_size = LOG_BUF_SIZE, + .log_level = 1, + }; + + /* assign one field outside of struct init to make sure any + * padding is zero initialized + */ + attr.kern_version = kern_version; + + bpf_log_buf[0] = 0; + + return syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr)); +} + +int open_raw_sock(const char *name) +{ + struct sockaddr_ll sll; + int sock; + + sock = socket(PF_PACKET, SOCK_RAW | SOCK_NONBLOCK | SOCK_CLOEXEC, htons(ETH_P_ALL)); + if (sock < 0) { + printf("cannot create raw socket\n"); + return -1; + } + + memset(&sll, 0, sizeof(sll)); + sll.sll_family = AF_PACKET; + sll.sll_ifindex = if_nametoindex(name); + sll.sll_protocol = htons(ETH_P_ALL); + if (bind(sock, (struct sockaddr *)&sll, sizeof(sll)) < 0) { + printf("bind to %s: %s\n", name, strerror(errno)); + close(sock); + return -1; + } + + return sock; +} + +int perf_event_open(struct perf_event_attr *attr, int pid, int cpu, + int group_fd, unsigned long flags) +{ + return syscall(__NR_perf_event_open, attr, pid, cpu, + group_fd, flags); +} diff --git a/kernel/samples/bpf/libbpf.h b/kernel/samples/bpf/libbpf.h new file mode 100644 index 000000000..7235e292a --- /dev/null +++ b/kernel/samples/bpf/libbpf.h @@ -0,0 +1,190 @@ +/* eBPF mini library */ +#ifndef __LIBBPF_H +#define __LIBBPF_H + +struct bpf_insn; + +int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size, + int max_entries); +int bpf_update_elem(int fd, void *key, void *value, unsigned long long flags); +int bpf_lookup_elem(int fd, void *key, void *value); +int bpf_delete_elem(int fd, void *key); +int bpf_get_next_key(int fd, void *key, void *next_key); + +int bpf_prog_load(enum bpf_prog_type prog_type, + const struct bpf_insn *insns, int insn_len, + const char *license, int kern_version); + +#define LOG_BUF_SIZE 65536 +extern char bpf_log_buf[LOG_BUF_SIZE]; + +/* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */ + +#define BPF_ALU64_REG(OP, DST, SRC) \ + ((struct bpf_insn) { \ + .code = BPF_ALU64 | BPF_OP(OP) | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = 0, \ + .imm = 0 }) + +#define BPF_ALU32_REG(OP, DST, SRC) \ + ((struct bpf_insn) { \ + .code = BPF_ALU | BPF_OP(OP) | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = 0, \ + .imm = 0 }) + +/* ALU ops on immediates, bpf_add|sub|...: dst_reg += imm32 */ + +#define BPF_ALU64_IMM(OP, DST, IMM) \ + ((struct bpf_insn) { \ + .code = BPF_ALU64 | BPF_OP(OP) | BPF_K, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = 0, \ + .imm = IMM }) + +#define BPF_ALU32_IMM(OP, DST, IMM) \ + ((struct bpf_insn) { \ + .code = BPF_ALU | BPF_OP(OP) | BPF_K, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = 0, \ + .imm = IMM }) + +/* Short form of mov, dst_reg = src_reg */ + +#define BPF_MOV64_REG(DST, SRC) \ + ((struct bpf_insn) { \ + .code = BPF_ALU64 | BPF_MOV | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = 0, \ + .imm = 0 }) + +/* Short form of mov, dst_reg = imm32 */ + +#define BPF_MOV64_IMM(DST, IMM) \ + ((struct bpf_insn) { \ + .code = BPF_ALU64 | BPF_MOV | BPF_K, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = 0, \ + .imm = IMM }) + +/* BPF_LD_IMM64 macro encodes single 'load 64-bit immediate' insn */ +#define BPF_LD_IMM64(DST, IMM) \ + BPF_LD_IMM64_RAW(DST, 0, IMM) + +#define BPF_LD_IMM64_RAW(DST, SRC, IMM) \ + ((struct bpf_insn) { \ + .code = BPF_LD | BPF_DW | BPF_IMM, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = 0, \ + .imm = (__u32) (IMM) }), \ + ((struct bpf_insn) { \ + .code = 0, /* zero is reserved opcode */ \ + .dst_reg = 0, \ + .src_reg = 0, \ + .off = 0, \ + .imm = ((__u64) (IMM)) >> 32 }) + +#ifndef BPF_PSEUDO_MAP_FD +# define BPF_PSEUDO_MAP_FD 1 +#endif + +/* pseudo BPF_LD_IMM64 insn used to refer to process-local map_fd */ +#define BPF_LD_MAP_FD(DST, MAP_FD) \ + BPF_LD_IMM64_RAW(DST, BPF_PSEUDO_MAP_FD, MAP_FD) + + +/* Direct packet access, R0 = *(uint *) (skb->data + imm32) */ + +#define BPF_LD_ABS(SIZE, IMM) \ + ((struct bpf_insn) { \ + .code = BPF_LD | BPF_SIZE(SIZE) | BPF_ABS, \ + .dst_reg = 0, \ + .src_reg = 0, \ + .off = 0, \ + .imm = IMM }) + +/* Memory load, dst_reg = *(uint *) (src_reg + off16) */ + +#define BPF_LDX_MEM(SIZE, DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEM, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + +/* Memory store, *(uint *) (dst_reg + off16) = src_reg */ + +#define BPF_STX_MEM(SIZE, DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_STX | BPF_SIZE(SIZE) | BPF_MEM, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + +/* Memory store, *(uint *) (dst_reg + off16) = imm32 */ + +#define BPF_ST_MEM(SIZE, DST, OFF, IMM) \ + ((struct bpf_insn) { \ + .code = BPF_ST | BPF_SIZE(SIZE) | BPF_MEM, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = OFF, \ + .imm = IMM }) + +/* Conditional jumps against registers, if (dst_reg 'op' src_reg) goto pc + off16 */ + +#define BPF_JMP_REG(OP, DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_JMP | BPF_OP(OP) | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + +/* Conditional jumps against immediates, if (dst_reg 'op' imm32) goto pc + off16 */ + +#define BPF_JMP_IMM(OP, DST, IMM, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_JMP | BPF_OP(OP) | BPF_K, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = OFF, \ + .imm = IMM }) + +/* Raw code statement block */ + +#define BPF_RAW_INSN(CODE, DST, SRC, OFF, IMM) \ + ((struct bpf_insn) { \ + .code = CODE, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = IMM }) + +/* Program exit */ + +#define BPF_EXIT_INSN() \ + ((struct bpf_insn) { \ + .code = BPF_JMP | BPF_EXIT, \ + .dst_reg = 0, \ + .src_reg = 0, \ + .off = 0, \ + .imm = 0 }) + +/* create RAW socket and bind to interface 'name' */ +int open_raw_sock(const char *name); + +struct perf_event_attr; +int perf_event_open(struct perf_event_attr *attr, int pid, int cpu, + int group_fd, unsigned long flags); +#endif diff --git a/kernel/samples/bpf/sock_example.c b/kernel/samples/bpf/sock_example.c new file mode 100644 index 000000000..a0ce251c5 --- /dev/null +++ b/kernel/samples/bpf/sock_example.c @@ -0,0 +1,101 @@ +/* eBPF example program: + * - creates arraymap in kernel with key 4 bytes and value 8 bytes + * + * - loads eBPF program: + * r0 = skb->data[ETH_HLEN + offsetof(struct iphdr, protocol)]; + * *(u32*)(fp - 4) = r0; + * // assuming packet is IPv4, lookup ip->proto in a map + * value = bpf_map_lookup_elem(map_fd, fp - 4); + * if (value) + * (*(u64*)value) += 1; + * + * - attaches this program to eth0 raw socket + * + * - every second user space reads map[tcp], map[udp], map[icmp] to see + * how many packets of given protocol were seen on eth0 + */ +#include <stdio.h> +#include <unistd.h> +#include <assert.h> +#include <linux/bpf.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <sys/socket.h> +#include <arpa/inet.h> +#include <linux/if_ether.h> +#include <linux/ip.h> +#include <stddef.h> +#include "libbpf.h" + +static int test_sock(void) +{ + int sock = -1, map_fd, prog_fd, i, key; + long long value = 0, tcp_cnt, udp_cnt, icmp_cnt; + + map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(key), sizeof(value), + 256); + if (map_fd < 0) { + printf("failed to create map '%s'\n", strerror(errno)); + goto cleanup; + } + + struct bpf_insn prog[] = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_LD_ABS(BPF_B, ETH_HLEN + offsetof(struct iphdr, protocol) /* R0 = ip->proto */), + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), /* *(u32 *)(fp - 4) = r0 */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), /* r2 = fp - 4 */ + BPF_LD_MAP_FD(BPF_REG_1, map_fd), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_1, 1), /* r1 = 1 */ + BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */ + BPF_MOV64_IMM(BPF_REG_0, 0), /* r0 = 0 */ + BPF_EXIT_INSN(), + }; + + prog_fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, prog, sizeof(prog), + "GPL", 0); + if (prog_fd < 0) { + printf("failed to load prog '%s'\n", strerror(errno)); + goto cleanup; + } + + sock = open_raw_sock("lo"); + + if (setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, &prog_fd, + sizeof(prog_fd)) < 0) { + printf("setsockopt %s\n", strerror(errno)); + goto cleanup; + } + + for (i = 0; i < 10; i++) { + key = IPPROTO_TCP; + assert(bpf_lookup_elem(map_fd, &key, &tcp_cnt) == 0); + + key = IPPROTO_UDP; + assert(bpf_lookup_elem(map_fd, &key, &udp_cnt) == 0); + + key = IPPROTO_ICMP; + assert(bpf_lookup_elem(map_fd, &key, &icmp_cnt) == 0); + + printf("TCP %lld UDP %lld ICMP %lld packets\n", + tcp_cnt, udp_cnt, icmp_cnt); + sleep(1); + } + +cleanup: + /* maps, programs, raw sockets will auto cleanup on process exit */ + return 0; +} + +int main(void) +{ + FILE *f; + + f = popen("ping -c5 localhost", "r"); + (void)f; + + return test_sock(); +} diff --git a/kernel/samples/bpf/sockex1_kern.c b/kernel/samples/bpf/sockex1_kern.c new file mode 100644 index 000000000..ed18e9a49 --- /dev/null +++ b/kernel/samples/bpf/sockex1_kern.c @@ -0,0 +1,29 @@ +#include <uapi/linux/bpf.h> +#include <uapi/linux/if_ether.h> +#include <uapi/linux/if_packet.h> +#include <uapi/linux/ip.h> +#include "bpf_helpers.h" + +struct bpf_map_def SEC("maps") my_map = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(u32), + .value_size = sizeof(long), + .max_entries = 256, +}; + +SEC("socket1") +int bpf_prog1(struct __sk_buff *skb) +{ + int index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol)); + long *value; + + if (skb->pkt_type != PACKET_OUTGOING) + return 0; + + value = bpf_map_lookup_elem(&my_map, &index); + if (value) + __sync_fetch_and_add(value, skb->len); + + return 0; +} +char _license[] SEC("license") = "GPL"; diff --git a/kernel/samples/bpf/sockex1_user.c b/kernel/samples/bpf/sockex1_user.c new file mode 100644 index 000000000..678ce4693 --- /dev/null +++ b/kernel/samples/bpf/sockex1_user.c @@ -0,0 +1,49 @@ +#include <stdio.h> +#include <assert.h> +#include <linux/bpf.h> +#include "libbpf.h" +#include "bpf_load.h" +#include <unistd.h> +#include <arpa/inet.h> + +int main(int ac, char **argv) +{ + char filename[256]; + FILE *f; + int i, sock; + + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + + if (load_bpf_file(filename)) { + printf("%s", bpf_log_buf); + return 1; + } + + sock = open_raw_sock("lo"); + + assert(setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, prog_fd, + sizeof(prog_fd[0])) == 0); + + f = popen("ping -c5 localhost", "r"); + (void) f; + + for (i = 0; i < 5; i++) { + long long tcp_cnt, udp_cnt, icmp_cnt; + int key; + + key = IPPROTO_TCP; + assert(bpf_lookup_elem(map_fd[0], &key, &tcp_cnt) == 0); + + key = IPPROTO_UDP; + assert(bpf_lookup_elem(map_fd[0], &key, &udp_cnt) == 0); + + key = IPPROTO_ICMP; + assert(bpf_lookup_elem(map_fd[0], &key, &icmp_cnt) == 0); + + printf("TCP %lld UDP %lld ICMP %lld bytes\n", + tcp_cnt, udp_cnt, icmp_cnt); + sleep(1); + } + + return 0; +} diff --git a/kernel/samples/bpf/sockex2_kern.c b/kernel/samples/bpf/sockex2_kern.c new file mode 100644 index 000000000..ba0e177ff --- /dev/null +++ b/kernel/samples/bpf/sockex2_kern.c @@ -0,0 +1,221 @@ +#include <uapi/linux/bpf.h> +#include "bpf_helpers.h" +#include <uapi/linux/in.h> +#include <uapi/linux/if.h> +#include <uapi/linux/if_ether.h> +#include <uapi/linux/ip.h> +#include <uapi/linux/ipv6.h> +#include <uapi/linux/if_tunnel.h> +#define IP_MF 0x2000 +#define IP_OFFSET 0x1FFF + +struct vlan_hdr { + __be16 h_vlan_TCI; + __be16 h_vlan_encapsulated_proto; +}; + +struct flow_keys { + __be32 src; + __be32 dst; + union { + __be32 ports; + __be16 port16[2]; + }; + __u16 thoff; + __u8 ip_proto; +}; + +static inline int proto_ports_offset(__u64 proto) +{ + switch (proto) { + case IPPROTO_TCP: + case IPPROTO_UDP: + case IPPROTO_DCCP: + case IPPROTO_ESP: + case IPPROTO_SCTP: + case IPPROTO_UDPLITE: + return 0; + case IPPROTO_AH: + return 4; + default: + return 0; + } +} + +static inline int ip_is_fragment(struct __sk_buff *ctx, __u64 nhoff) +{ + return load_half(ctx, nhoff + offsetof(struct iphdr, frag_off)) + & (IP_MF | IP_OFFSET); +} + +static inline __u32 ipv6_addr_hash(struct __sk_buff *ctx, __u64 off) +{ + __u64 w0 = load_word(ctx, off); + __u64 w1 = load_word(ctx, off + 4); + __u64 w2 = load_word(ctx, off + 8); + __u64 w3 = load_word(ctx, off + 12); + + return (__u32)(w0 ^ w1 ^ w2 ^ w3); +} + +static inline __u64 parse_ip(struct __sk_buff *skb, __u64 nhoff, __u64 *ip_proto, + struct flow_keys *flow) +{ + __u64 verlen; + + if (unlikely(ip_is_fragment(skb, nhoff))) + *ip_proto = 0; + else + *ip_proto = load_byte(skb, nhoff + offsetof(struct iphdr, protocol)); + + if (*ip_proto != IPPROTO_GRE) { + flow->src = load_word(skb, nhoff + offsetof(struct iphdr, saddr)); + flow->dst = load_word(skb, nhoff + offsetof(struct iphdr, daddr)); + } + + verlen = load_byte(skb, nhoff + 0/*offsetof(struct iphdr, ihl)*/); + if (likely(verlen == 0x45)) + nhoff += 20; + else + nhoff += (verlen & 0xF) << 2; + + return nhoff; +} + +static inline __u64 parse_ipv6(struct __sk_buff *skb, __u64 nhoff, __u64 *ip_proto, + struct flow_keys *flow) +{ + *ip_proto = load_byte(skb, + nhoff + offsetof(struct ipv6hdr, nexthdr)); + flow->src = ipv6_addr_hash(skb, + nhoff + offsetof(struct ipv6hdr, saddr)); + flow->dst = ipv6_addr_hash(skb, + nhoff + offsetof(struct ipv6hdr, daddr)); + nhoff += sizeof(struct ipv6hdr); + + return nhoff; +} + +static inline bool flow_dissector(struct __sk_buff *skb, struct flow_keys *flow) +{ + __u64 nhoff = ETH_HLEN; + __u64 ip_proto; + __u64 proto = load_half(skb, 12); + int poff; + + if (proto == ETH_P_8021AD) { + proto = load_half(skb, nhoff + offsetof(struct vlan_hdr, + h_vlan_encapsulated_proto)); + nhoff += sizeof(struct vlan_hdr); + } + + if (proto == ETH_P_8021Q) { + proto = load_half(skb, nhoff + offsetof(struct vlan_hdr, + h_vlan_encapsulated_proto)); + nhoff += sizeof(struct vlan_hdr); + } + + if (likely(proto == ETH_P_IP)) + nhoff = parse_ip(skb, nhoff, &ip_proto, flow); + else if (proto == ETH_P_IPV6) + nhoff = parse_ipv6(skb, nhoff, &ip_proto, flow); + else + return false; + + switch (ip_proto) { + case IPPROTO_GRE: { + struct gre_hdr { + __be16 flags; + __be16 proto; + }; + + __u64 gre_flags = load_half(skb, + nhoff + offsetof(struct gre_hdr, flags)); + __u64 gre_proto = load_half(skb, + nhoff + offsetof(struct gre_hdr, proto)); + + if (gre_flags & (GRE_VERSION|GRE_ROUTING)) + break; + + proto = gre_proto; + nhoff += 4; + if (gre_flags & GRE_CSUM) + nhoff += 4; + if (gre_flags & GRE_KEY) + nhoff += 4; + if (gre_flags & GRE_SEQ) + nhoff += 4; + + if (proto == ETH_P_8021Q) { + proto = load_half(skb, + nhoff + offsetof(struct vlan_hdr, + h_vlan_encapsulated_proto)); + nhoff += sizeof(struct vlan_hdr); + } + + if (proto == ETH_P_IP) + nhoff = parse_ip(skb, nhoff, &ip_proto, flow); + else if (proto == ETH_P_IPV6) + nhoff = parse_ipv6(skb, nhoff, &ip_proto, flow); + else + return false; + break; + } + case IPPROTO_IPIP: + nhoff = parse_ip(skb, nhoff, &ip_proto, flow); + break; + case IPPROTO_IPV6: + nhoff = parse_ipv6(skb, nhoff, &ip_proto, flow); + break; + default: + break; + } + + flow->ip_proto = ip_proto; + poff = proto_ports_offset(ip_proto); + if (poff >= 0) { + nhoff += poff; + flow->ports = load_word(skb, nhoff); + } + + flow->thoff = (__u16) nhoff; + + return true; +} + +struct pair { + long packets; + long bytes; +}; + +struct bpf_map_def SEC("maps") hash_map = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(__be32), + .value_size = sizeof(struct pair), + .max_entries = 1024, +}; + +SEC("socket2") +int bpf_prog2(struct __sk_buff *skb) +{ + struct flow_keys flow; + struct pair *value; + u32 key; + + if (!flow_dissector(skb, &flow)) + return 0; + + key = flow.dst; + value = bpf_map_lookup_elem(&hash_map, &key); + if (value) { + __sync_fetch_and_add(&value->packets, 1); + __sync_fetch_and_add(&value->bytes, skb->len); + } else { + struct pair val = {1, skb->len}; + + bpf_map_update_elem(&hash_map, &key, &val, BPF_ANY); + } + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/kernel/samples/bpf/sockex2_user.c b/kernel/samples/bpf/sockex2_user.c new file mode 100644 index 000000000..29a276d76 --- /dev/null +++ b/kernel/samples/bpf/sockex2_user.c @@ -0,0 +1,49 @@ +#include <stdio.h> +#include <assert.h> +#include <linux/bpf.h> +#include "libbpf.h" +#include "bpf_load.h" +#include <unistd.h> +#include <arpa/inet.h> + +struct pair { + __u64 packets; + __u64 bytes; +}; + +int main(int ac, char **argv) +{ + char filename[256]; + FILE *f; + int i, sock; + + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + + if (load_bpf_file(filename)) { + printf("%s", bpf_log_buf); + return 1; + } + + sock = open_raw_sock("lo"); + + assert(setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, prog_fd, + sizeof(prog_fd[0])) == 0); + + f = popen("ping -c5 localhost", "r"); + (void) f; + + for (i = 0; i < 5; i++) { + int key = 0, next_key; + struct pair value; + + while (bpf_get_next_key(map_fd[0], &key, &next_key) == 0) { + bpf_lookup_elem(map_fd[0], &next_key, &value); + printf("ip %s bytes %lld packets %lld\n", + inet_ntoa((struct in_addr){htonl(next_key)}), + value.bytes, value.packets); + key = next_key; + } + sleep(1); + } + return 0; +} diff --git a/kernel/samples/bpf/tcbpf1_kern.c b/kernel/samples/bpf/tcbpf1_kern.c new file mode 100644 index 000000000..7c27710f8 --- /dev/null +++ b/kernel/samples/bpf/tcbpf1_kern.c @@ -0,0 +1,67 @@ +#include <uapi/linux/bpf.h> +#include <uapi/linux/if_ether.h> +#include <uapi/linux/if_packet.h> +#include <uapi/linux/ip.h> +#include <uapi/linux/in.h> +#include <uapi/linux/tcp.h> +#include <uapi/linux/filter.h> + +#include "bpf_helpers.h" + +/* compiler workaround */ +#define _htonl __builtin_bswap32 + +static inline void set_dst_mac(struct __sk_buff *skb, char *mac) +{ + bpf_skb_store_bytes(skb, 0, mac, ETH_ALEN, 1); +} + +#define IP_CSUM_OFF (ETH_HLEN + offsetof(struct iphdr, check)) +#define TOS_OFF (ETH_HLEN + offsetof(struct iphdr, tos)) + +static inline void set_ip_tos(struct __sk_buff *skb, __u8 new_tos) +{ + __u8 old_tos = load_byte(skb, BPF_LL_OFF + TOS_OFF); + + bpf_l3_csum_replace(skb, IP_CSUM_OFF, htons(old_tos), htons(new_tos), 2); + bpf_skb_store_bytes(skb, TOS_OFF, &new_tos, sizeof(new_tos), 0); +} + +#define TCP_CSUM_OFF (ETH_HLEN + sizeof(struct iphdr) + offsetof(struct tcphdr, check)) +#define IP_SRC_OFF (ETH_HLEN + offsetof(struct iphdr, saddr)) + +#define IS_PSEUDO 0x10 + +static inline void set_tcp_ip_src(struct __sk_buff *skb, __u32 new_ip) +{ + __u32 old_ip = _htonl(load_word(skb, BPF_LL_OFF + IP_SRC_OFF)); + + bpf_l4_csum_replace(skb, TCP_CSUM_OFF, old_ip, new_ip, IS_PSEUDO | sizeof(new_ip)); + bpf_l3_csum_replace(skb, IP_CSUM_OFF, old_ip, new_ip, sizeof(new_ip)); + bpf_skb_store_bytes(skb, IP_SRC_OFF, &new_ip, sizeof(new_ip), 0); +} + +#define TCP_DPORT_OFF (ETH_HLEN + sizeof(struct iphdr) + offsetof(struct tcphdr, dest)) +static inline void set_tcp_dest_port(struct __sk_buff *skb, __u16 new_port) +{ + __u16 old_port = htons(load_half(skb, BPF_LL_OFF + TCP_DPORT_OFF)); + + bpf_l4_csum_replace(skb, TCP_CSUM_OFF, old_port, new_port, sizeof(new_port)); + bpf_skb_store_bytes(skb, TCP_DPORT_OFF, &new_port, sizeof(new_port), 0); +} + +SEC("classifier") +int bpf_prog1(struct __sk_buff *skb) +{ + __u8 proto = load_byte(skb, BPF_LL_OFF + ETH_HLEN + offsetof(struct iphdr, protocol)); + long *value; + + if (proto == IPPROTO_TCP) { + set_ip_tos(skb, 8); + set_tcp_ip_src(skb, 0xA010101); + set_tcp_dest_port(skb, 5001); + } + + return 0; +} +char _license[] SEC("license") = "GPL"; diff --git a/kernel/samples/bpf/test_maps.c b/kernel/samples/bpf/test_maps.c new file mode 100644 index 000000000..6299ee95c --- /dev/null +++ b/kernel/samples/bpf/test_maps.c @@ -0,0 +1,291 @@ +/* + * Testsuite for eBPF maps + * + * Copyright (c) 2014 PLUMgrid, http://plumgrid.com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include <stdio.h> +#include <unistd.h> +#include <linux/bpf.h> +#include <errno.h> +#include <string.h> +#include <assert.h> +#include <sys/wait.h> +#include <stdlib.h> +#include "libbpf.h" + +/* sanity tests for map API */ +static void test_hashmap_sanity(int i, void *data) +{ + long long key, next_key, value; + int map_fd; + + map_fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value), 2); + if (map_fd < 0) { + printf("failed to create hashmap '%s'\n", strerror(errno)); + exit(1); + } + + key = 1; + value = 1234; + /* insert key=1 element */ + assert(bpf_update_elem(map_fd, &key, &value, BPF_ANY) == 0); + + value = 0; + /* BPF_NOEXIST means: add new element if it doesn't exist */ + assert(bpf_update_elem(map_fd, &key, &value, BPF_NOEXIST) == -1 && + /* key=1 already exists */ + errno == EEXIST); + + assert(bpf_update_elem(map_fd, &key, &value, -1) == -1 && errno == EINVAL); + + /* check that key=1 can be found */ + assert(bpf_lookup_elem(map_fd, &key, &value) == 0 && value == 1234); + + key = 2; + /* check that key=2 is not found */ + assert(bpf_lookup_elem(map_fd, &key, &value) == -1 && errno == ENOENT); + + /* BPF_EXIST means: update existing element */ + assert(bpf_update_elem(map_fd, &key, &value, BPF_EXIST) == -1 && + /* key=2 is not there */ + errno == ENOENT); + + /* insert key=2 element */ + assert(bpf_update_elem(map_fd, &key, &value, BPF_NOEXIST) == 0); + + /* key=1 and key=2 were inserted, check that key=0 cannot be inserted + * due to max_entries limit + */ + key = 0; + assert(bpf_update_elem(map_fd, &key, &value, BPF_NOEXIST) == -1 && + errno == E2BIG); + + /* check that key = 0 doesn't exist */ + assert(bpf_delete_elem(map_fd, &key) == -1 && errno == ENOENT); + + /* iterate over two elements */ + assert(bpf_get_next_key(map_fd, &key, &next_key) == 0 && + (next_key == 1 || next_key == 2)); + assert(bpf_get_next_key(map_fd, &next_key, &next_key) == 0 && + (next_key == 1 || next_key == 2)); + assert(bpf_get_next_key(map_fd, &next_key, &next_key) == -1 && + errno == ENOENT); + + /* delete both elements */ + key = 1; + assert(bpf_delete_elem(map_fd, &key) == 0); + key = 2; + assert(bpf_delete_elem(map_fd, &key) == 0); + assert(bpf_delete_elem(map_fd, &key) == -1 && errno == ENOENT); + + key = 0; + /* check that map is empty */ + assert(bpf_get_next_key(map_fd, &key, &next_key) == -1 && + errno == ENOENT); + close(map_fd); +} + +static void test_arraymap_sanity(int i, void *data) +{ + int key, next_key, map_fd; + long long value; + + map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(key), sizeof(value), 2); + if (map_fd < 0) { + printf("failed to create arraymap '%s'\n", strerror(errno)); + exit(1); + } + + key = 1; + value = 1234; + /* insert key=1 element */ + assert(bpf_update_elem(map_fd, &key, &value, BPF_ANY) == 0); + + value = 0; + assert(bpf_update_elem(map_fd, &key, &value, BPF_NOEXIST) == -1 && + errno == EEXIST); + + /* check that key=1 can be found */ + assert(bpf_lookup_elem(map_fd, &key, &value) == 0 && value == 1234); + + key = 0; + /* check that key=0 is also found and zero initialized */ + assert(bpf_lookup_elem(map_fd, &key, &value) == 0 && value == 0); + + + /* key=0 and key=1 were inserted, check that key=2 cannot be inserted + * due to max_entries limit + */ + key = 2; + assert(bpf_update_elem(map_fd, &key, &value, BPF_EXIST) == -1 && + errno == E2BIG); + + /* check that key = 2 doesn't exist */ + assert(bpf_lookup_elem(map_fd, &key, &value) == -1 && errno == ENOENT); + + /* iterate over two elements */ + assert(bpf_get_next_key(map_fd, &key, &next_key) == 0 && + next_key == 0); + assert(bpf_get_next_key(map_fd, &next_key, &next_key) == 0 && + next_key == 1); + assert(bpf_get_next_key(map_fd, &next_key, &next_key) == -1 && + errno == ENOENT); + + /* delete shouldn't succeed */ + key = 1; + assert(bpf_delete_elem(map_fd, &key) == -1 && errno == EINVAL); + + close(map_fd); +} + +#define MAP_SIZE (32 * 1024) +static void test_map_large(void) +{ + struct bigkey { + int a; + char b[116]; + long long c; + } key; + int map_fd, i, value; + + /* allocate 4Mbyte of memory */ + map_fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value), + MAP_SIZE); + if (map_fd < 0) { + printf("failed to create large map '%s'\n", strerror(errno)); + exit(1); + } + + for (i = 0; i < MAP_SIZE; i++) { + key = (struct bigkey) {.c = i}; + value = i; + assert(bpf_update_elem(map_fd, &key, &value, BPF_NOEXIST) == 0); + } + key.c = -1; + assert(bpf_update_elem(map_fd, &key, &value, BPF_NOEXIST) == -1 && + errno == E2BIG); + + /* iterate through all elements */ + for (i = 0; i < MAP_SIZE; i++) + assert(bpf_get_next_key(map_fd, &key, &key) == 0); + assert(bpf_get_next_key(map_fd, &key, &key) == -1 && errno == ENOENT); + + key.c = 0; + assert(bpf_lookup_elem(map_fd, &key, &value) == 0 && value == 0); + key.a = 1; + assert(bpf_lookup_elem(map_fd, &key, &value) == -1 && errno == ENOENT); + + close(map_fd); +} + +/* fork N children and wait for them to complete */ +static void run_parallel(int tasks, void (*fn)(int i, void *data), void *data) +{ + pid_t pid[tasks]; + int i; + + for (i = 0; i < tasks; i++) { + pid[i] = fork(); + if (pid[i] == 0) { + fn(i, data); + exit(0); + } else if (pid[i] == -1) { + printf("couldn't spawn #%d process\n", i); + exit(1); + } + } + for (i = 0; i < tasks; i++) { + int status; + + assert(waitpid(pid[i], &status, 0) == pid[i]); + assert(status == 0); + } +} + +static void test_map_stress(void) +{ + run_parallel(100, test_hashmap_sanity, NULL); + run_parallel(100, test_arraymap_sanity, NULL); +} + +#define TASKS 1024 +#define DO_UPDATE 1 +#define DO_DELETE 0 +static void do_work(int fn, void *data) +{ + int map_fd = ((int *)data)[0]; + int do_update = ((int *)data)[1]; + int i; + int key, value; + + for (i = fn; i < MAP_SIZE; i += TASKS) { + key = value = i; + if (do_update) + assert(bpf_update_elem(map_fd, &key, &value, BPF_NOEXIST) == 0); + else + assert(bpf_delete_elem(map_fd, &key) == 0); + } +} + +static void test_map_parallel(void) +{ + int i, map_fd, key = 0, value = 0; + int data[2]; + + map_fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value), + MAP_SIZE); + if (map_fd < 0) { + printf("failed to create map for parallel test '%s'\n", + strerror(errno)); + exit(1); + } + + data[0] = map_fd; + data[1] = DO_UPDATE; + /* use the same map_fd in children to add elements to this map + * child_0 adds key=0, key=1024, key=2048, ... + * child_1 adds key=1, key=1025, key=2049, ... + * child_1023 adds key=1023, ... + */ + run_parallel(TASKS, do_work, data); + + /* check that key=0 is already there */ + assert(bpf_update_elem(map_fd, &key, &value, BPF_NOEXIST) == -1 && + errno == EEXIST); + + /* check that all elements were inserted */ + key = -1; + for (i = 0; i < MAP_SIZE; i++) + assert(bpf_get_next_key(map_fd, &key, &key) == 0); + assert(bpf_get_next_key(map_fd, &key, &key) == -1 && errno == ENOENT); + + /* another check for all elements */ + for (i = 0; i < MAP_SIZE; i++) { + key = MAP_SIZE - i - 1; + assert(bpf_lookup_elem(map_fd, &key, &value) == 0 && + value == key); + } + + /* now let's delete all elemenets in parallel */ + data[1] = DO_DELETE; + run_parallel(TASKS, do_work, data); + + /* nothing should be left */ + key = -1; + assert(bpf_get_next_key(map_fd, &key, &key) == -1 && errno == ENOENT); +} + +int main(void) +{ + test_hashmap_sanity(0, NULL); + test_arraymap_sanity(0, NULL); + test_map_large(); + test_map_parallel(); + test_map_stress(); + printf("test_maps: OK\n"); + return 0; +} diff --git a/kernel/samples/bpf/test_verifier.c b/kernel/samples/bpf/test_verifier.c new file mode 100644 index 000000000..12f3780af --- /dev/null +++ b/kernel/samples/bpf/test_verifier.c @@ -0,0 +1,835 @@ +/* + * Testsuite for eBPF verifier + * + * Copyright (c) 2014 PLUMgrid, http://plumgrid.com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include <stdio.h> +#include <unistd.h> +#include <linux/bpf.h> +#include <errno.h> +#include <linux/unistd.h> +#include <string.h> +#include <linux/filter.h> +#include <stddef.h> +#include "libbpf.h" + +#define MAX_INSNS 512 +#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x))) + +struct bpf_test { + const char *descr; + struct bpf_insn insns[MAX_INSNS]; + int fixup[32]; + const char *errstr; + enum { + ACCEPT, + REJECT + } result; +}; + +static struct bpf_test tests[] = { + { + "add+sub+mul", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, 1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 2), + BPF_MOV64_IMM(BPF_REG_2, 3), + BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -1), + BPF_ALU64_IMM(BPF_MUL, BPF_REG_1, 3), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + }, + { + "unreachable", + .insns = { + BPF_EXIT_INSN(), + BPF_EXIT_INSN(), + }, + .errstr = "unreachable", + .result = REJECT, + }, + { + "unreachable2", + .insns = { + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "unreachable", + .result = REJECT, + }, + { + "out of range jump", + .insns = { + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_EXIT_INSN(), + }, + .errstr = "jump out of range", + .result = REJECT, + }, + { + "out of range jump2", + .insns = { + BPF_JMP_IMM(BPF_JA, 0, 0, -2), + BPF_EXIT_INSN(), + }, + .errstr = "jump out of range", + .result = REJECT, + }, + { + "test1 ld_imm64", + .insns = { + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), + BPF_LD_IMM64(BPF_REG_0, 0), + BPF_LD_IMM64(BPF_REG_0, 0), + BPF_LD_IMM64(BPF_REG_0, 1), + BPF_LD_IMM64(BPF_REG_0, 1), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .errstr = "invalid BPF_LD_IMM insn", + .result = REJECT, + }, + { + "test2 ld_imm64", + .insns = { + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), + BPF_LD_IMM64(BPF_REG_0, 0), + BPF_LD_IMM64(BPF_REG_0, 0), + BPF_LD_IMM64(BPF_REG_0, 1), + BPF_LD_IMM64(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .errstr = "invalid BPF_LD_IMM insn", + .result = REJECT, + }, + { + "test3 ld_imm64", + .insns = { + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), + BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 0), + BPF_LD_IMM64(BPF_REG_0, 0), + BPF_LD_IMM64(BPF_REG_0, 0), + BPF_LD_IMM64(BPF_REG_0, 1), + BPF_LD_IMM64(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_ld_imm64 insn", + .result = REJECT, + }, + { + "test4 ld_imm64", + .insns = { + BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_ld_imm64 insn", + .result = REJECT, + }, + { + "test5 ld_imm64", + .insns = { + BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 0), + }, + .errstr = "invalid bpf_ld_imm64 insn", + .result = REJECT, + }, + { + "no bpf_exit", + .insns = { + BPF_ALU64_REG(BPF_MOV, BPF_REG_0, BPF_REG_2), + }, + .errstr = "jump out of range", + .result = REJECT, + }, + { + "loop (back-edge)", + .insns = { + BPF_JMP_IMM(BPF_JA, 0, 0, -1), + BPF_EXIT_INSN(), + }, + .errstr = "back-edge", + .result = REJECT, + }, + { + "loop2 (back-edge)", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_0), + BPF_JMP_IMM(BPF_JA, 0, 0, -4), + BPF_EXIT_INSN(), + }, + .errstr = "back-edge", + .result = REJECT, + }, + { + "conditional loop", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, -3), + BPF_EXIT_INSN(), + }, + .errstr = "back-edge", + .result = REJECT, + }, + { + "read uninitialized register", + .insns = { + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_EXIT_INSN(), + }, + .errstr = "R2 !read_ok", + .result = REJECT, + }, + { + "read invalid register", + .insns = { + BPF_MOV64_REG(BPF_REG_0, -1), + BPF_EXIT_INSN(), + }, + .errstr = "R15 is invalid", + .result = REJECT, + }, + { + "program doesn't init R0 before exit", + .insns = { + BPF_ALU64_REG(BPF_MOV, BPF_REG_2, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .errstr = "R0 !read_ok", + .result = REJECT, + }, + { + "program doesn't init R0 before exit in all branches", + .insns = { + BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .errstr = "R0 !read_ok", + .result = REJECT, + }, + { + "stack out of bounds", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, 8, 0), + BPF_EXIT_INSN(), + }, + .errstr = "invalid stack", + .result = REJECT, + }, + { + "invalid call insn1", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL | BPF_X, 0, 0, 0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "BPF_CALL uses reserved", + .result = REJECT, + }, + { + "invalid call insn2", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 1, 0), + BPF_EXIT_INSN(), + }, + .errstr = "BPF_CALL uses reserved", + .result = REJECT, + }, + { + "invalid function call", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, 1234567), + BPF_EXIT_INSN(), + }, + .errstr = "invalid func 1234567", + .result = REJECT, + }, + { + "uninitialized stack1", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_EXIT_INSN(), + }, + .fixup = {2}, + .errstr = "invalid indirect read from stack", + .result = REJECT, + }, + { + "uninitialized stack2", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, -8), + BPF_EXIT_INSN(), + }, + .errstr = "invalid read from stack", + .result = REJECT, + }, + { + "check valid spill/fill", + .insns = { + /* spill R1(ctx) into stack */ + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8), + + /* fill it back into R2 */ + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -8), + + /* should be able to access R0 = *(R2 + 8) */ + /* BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 8), */ + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + }, + { + "check corrupted spill/fill", + .insns = { + /* spill R1(ctx) into stack */ + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8), + + /* mess up with R1 pointer on stack */ + BPF_ST_MEM(BPF_B, BPF_REG_10, -7, 0x23), + + /* fill back into R0 should fail */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), + + BPF_EXIT_INSN(), + }, + .errstr = "corrupted spill", + .result = REJECT, + }, + { + "invalid src register in STX", + .insns = { + BPF_STX_MEM(BPF_B, BPF_REG_10, -1, -1), + BPF_EXIT_INSN(), + }, + .errstr = "R15 is invalid", + .result = REJECT, + }, + { + "invalid dst register in STX", + .insns = { + BPF_STX_MEM(BPF_B, 14, BPF_REG_10, -1), + BPF_EXIT_INSN(), + }, + .errstr = "R14 is invalid", + .result = REJECT, + }, + { + "invalid dst register in ST", + .insns = { + BPF_ST_MEM(BPF_B, 14, -1, -1), + BPF_EXIT_INSN(), + }, + .errstr = "R14 is invalid", + .result = REJECT, + }, + { + "invalid src register in LDX", + .insns = { + BPF_LDX_MEM(BPF_B, BPF_REG_0, 12, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R12 is invalid", + .result = REJECT, + }, + { + "invalid dst register in LDX", + .insns = { + BPF_LDX_MEM(BPF_B, 11, BPF_REG_1, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R11 is invalid", + .result = REJECT, + }, + { + "junk insn", + .insns = { + BPF_RAW_INSN(0, 0, 0, 0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "invalid BPF_LD_IMM", + .result = REJECT, + }, + { + "junk insn2", + .insns = { + BPF_RAW_INSN(1, 0, 0, 0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "BPF_LDX uses reserved fields", + .result = REJECT, + }, + { + "junk insn3", + .insns = { + BPF_RAW_INSN(-1, 0, 0, 0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "invalid BPF_ALU opcode f0", + .result = REJECT, + }, + { + "junk insn4", + .insns = { + BPF_RAW_INSN(-1, -1, -1, -1, -1), + BPF_EXIT_INSN(), + }, + .errstr = "invalid BPF_ALU opcode f0", + .result = REJECT, + }, + { + "junk insn5", + .insns = { + BPF_RAW_INSN(0x7f, -1, -1, -1, -1), + BPF_EXIT_INSN(), + }, + .errstr = "BPF_ALU uses reserved fields", + .result = REJECT, + }, + { + "misaligned read from stack", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, -4), + BPF_EXIT_INSN(), + }, + .errstr = "misaligned access", + .result = REJECT, + }, + { + "invalid map_fd for function call", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_ALU64_REG(BPF_MOV, BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_delete_elem), + BPF_EXIT_INSN(), + }, + .errstr = "fd 0 is not pointing to valid bpf_map", + .result = REJECT, + }, + { + "don't check return value before access", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + BPF_EXIT_INSN(), + }, + .fixup = {3}, + .errstr = "R0 invalid mem access 'map_value_or_null'", + .result = REJECT, + }, + { + "access memory with incorrect alignment", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 4, 0), + BPF_EXIT_INSN(), + }, + .fixup = {3}, + .errstr = "misaligned access", + .result = REJECT, + }, + { + "sometimes access memory with incorrect alignment", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + BPF_EXIT_INSN(), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + }, + .fixup = {3}, + .errstr = "R0 invalid mem access", + .result = REJECT, + }, + { + "jump test 1", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), + BPF_ST_MEM(BPF_DW, BPF_REG_2, -8, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 1, 1), + BPF_ST_MEM(BPF_DW, BPF_REG_2, -16, 1), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 2, 1), + BPF_ST_MEM(BPF_DW, BPF_REG_2, -8, 2), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 3, 1), + BPF_ST_MEM(BPF_DW, BPF_REG_2, -16, 3), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 4, 1), + BPF_ST_MEM(BPF_DW, BPF_REG_2, -8, 4), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 5, 1), + BPF_ST_MEM(BPF_DW, BPF_REG_2, -32, 5), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + }, + { + "jump test 2", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 2), + BPF_ST_MEM(BPF_DW, BPF_REG_2, -8, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 14), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 1, 2), + BPF_ST_MEM(BPF_DW, BPF_REG_2, -16, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 11), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 2, 2), + BPF_ST_MEM(BPF_DW, BPF_REG_2, -32, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 3, 2), + BPF_ST_MEM(BPF_DW, BPF_REG_2, -40, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 5), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 4, 2), + BPF_ST_MEM(BPF_DW, BPF_REG_2, -48, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 5, 1), + BPF_ST_MEM(BPF_DW, BPF_REG_2, -56, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + }, + { + "jump test 3", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 3), + BPF_ST_MEM(BPF_DW, BPF_REG_2, -8, 0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_JMP_IMM(BPF_JA, 0, 0, 19), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 1, 3), + BPF_ST_MEM(BPF_DW, BPF_REG_2, -16, 0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_JMP_IMM(BPF_JA, 0, 0, 15), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 2, 3), + BPF_ST_MEM(BPF_DW, BPF_REG_2, -32, 0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -32), + BPF_JMP_IMM(BPF_JA, 0, 0, 11), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 3, 3), + BPF_ST_MEM(BPF_DW, BPF_REG_2, -40, 0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -40), + BPF_JMP_IMM(BPF_JA, 0, 0, 7), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 4, 3), + BPF_ST_MEM(BPF_DW, BPF_REG_2, -48, 0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -48), + BPF_JMP_IMM(BPF_JA, 0, 0, 3), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 5, 0), + BPF_ST_MEM(BPF_DW, BPF_REG_2, -56, 0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -56), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_delete_elem), + BPF_EXIT_INSN(), + }, + .fixup = {24}, + .result = ACCEPT, + }, + { + "jump test 4", + .insns = { + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + }, + { + "jump test 5", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_2), + BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, -8), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_2, -8), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, -8), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_2, -8), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, -8), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_2, -8), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, -8), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_2, -8), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, -8), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_2, -8), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + }, + { + "access skb fields ok", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 1), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 1), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, pkt_type)), + BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 1), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, queue_mapping)), + BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 0), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, protocol)), + BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 0), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, vlan_present)), + BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 0), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, vlan_tci)), + BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + }, + { + "access skb fields bad1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -4), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + }, + { + "access skb fields bad2", + .insns = { + BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 9), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, pkt_type)), + BPF_EXIT_INSN(), + }, + .fixup = {4}, + .errstr = "different pointers", + .result = REJECT, + }, + { + "access skb fields bad3", + .insns = { + BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, pkt_type)), + BPF_EXIT_INSN(), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_JMP_IMM(BPF_JA, 0, 0, -12), + }, + .fixup = {6}, + .errstr = "different pointers", + .result = REJECT, + }, + { + "access skb fields bad4", + .insns = { + BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 3), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_JMP_IMM(BPF_JA, 0, 0, -13), + }, + .fixup = {7}, + .errstr = "different pointers", + .result = REJECT, + }, +}; + +static int probe_filter_length(struct bpf_insn *fp) +{ + int len = 0; + + for (len = MAX_INSNS - 1; len > 0; --len) + if (fp[len].code != 0 || fp[len].imm != 0) + break; + + return len + 1; +} + +static int create_map(void) +{ + long long key, value = 0; + int map_fd; + + map_fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value), 1024); + if (map_fd < 0) { + printf("failed to create map '%s'\n", strerror(errno)); + } + + return map_fd; +} + +static int test(void) +{ + int prog_fd, i, pass_cnt = 0, err_cnt = 0; + + for (i = 0; i < ARRAY_SIZE(tests); i++) { + struct bpf_insn *prog = tests[i].insns; + int prog_len = probe_filter_length(prog); + int *fixup = tests[i].fixup; + int map_fd = -1; + + if (*fixup) { + map_fd = create_map(); + + do { + prog[*fixup].imm = map_fd; + fixup++; + } while (*fixup); + } + printf("#%d %s ", i, tests[i].descr); + + prog_fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, prog, + prog_len * sizeof(struct bpf_insn), + "GPL", 0); + + if (tests[i].result == ACCEPT) { + if (prog_fd < 0) { + printf("FAIL\nfailed to load prog '%s'\n", + strerror(errno)); + printf("%s", bpf_log_buf); + err_cnt++; + goto fail; + } + } else { + if (prog_fd >= 0) { + printf("FAIL\nunexpected success to load\n"); + printf("%s", bpf_log_buf); + err_cnt++; + goto fail; + } + if (strstr(bpf_log_buf, tests[i].errstr) == 0) { + printf("FAIL\nunexpected error message: %s", + bpf_log_buf); + err_cnt++; + goto fail; + } + } + + pass_cnt++; + printf("OK\n"); +fail: + if (map_fd >= 0) + close(map_fd); + close(prog_fd); + + } + printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, err_cnt); + + return 0; +} + +int main(void) +{ + return test(); +} diff --git a/kernel/samples/bpf/tracex1_kern.c b/kernel/samples/bpf/tracex1_kern.c new file mode 100644 index 000000000..316204637 --- /dev/null +++ b/kernel/samples/bpf/tracex1_kern.c @@ -0,0 +1,50 @@ +/* Copyright (c) 2013-2015 PLUMgrid, http://plumgrid.com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include <linux/skbuff.h> +#include <linux/netdevice.h> +#include <uapi/linux/bpf.h> +#include <linux/version.h> +#include "bpf_helpers.h" + +#define _(P) ({typeof(P) val = 0; bpf_probe_read(&val, sizeof(val), &P); val;}) + +/* kprobe is NOT a stable ABI + * kernel functions can be removed, renamed or completely change semantics. + * Number of arguments and their positions can change, etc. + * In such case this bpf+kprobe example will no longer be meaningful + */ +SEC("kprobe/__netif_receive_skb_core") +int bpf_prog1(struct pt_regs *ctx) +{ + /* attaches to kprobe netif_receive_skb, + * looks for packets on loobpack device and prints them + */ + char devname[IFNAMSIZ] = {}; + struct net_device *dev; + struct sk_buff *skb; + int len; + + /* non-portable! works for the given kernel only */ + skb = (struct sk_buff *) ctx->di; + + dev = _(skb->dev); + + len = _(skb->len); + + bpf_probe_read(devname, sizeof(devname), dev->name); + + if (devname[0] == 'l' && devname[1] == 'o') { + char fmt[] = "skb %p len %d\n"; + /* using bpf_trace_printk() for DEBUG ONLY */ + bpf_trace_printk(fmt, sizeof(fmt), skb, len); + } + + return 0; +} + +char _license[] SEC("license") = "GPL"; +u32 _version SEC("version") = LINUX_VERSION_CODE; diff --git a/kernel/samples/bpf/tracex1_user.c b/kernel/samples/bpf/tracex1_user.c new file mode 100644 index 000000000..31a48183b --- /dev/null +++ b/kernel/samples/bpf/tracex1_user.c @@ -0,0 +1,25 @@ +#include <stdio.h> +#include <linux/bpf.h> +#include <unistd.h> +#include "libbpf.h" +#include "bpf_load.h" + +int main(int ac, char **argv) +{ + FILE *f; + char filename[256]; + + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + + if (load_bpf_file(filename)) { + printf("%s", bpf_log_buf); + return 1; + } + + f = popen("taskset 1 ping -c5 localhost", "r"); + (void) f; + + read_trace_pipe(); + + return 0; +} diff --git a/kernel/samples/bpf/tracex2_kern.c b/kernel/samples/bpf/tracex2_kern.c new file mode 100644 index 000000000..19ec1cfc4 --- /dev/null +++ b/kernel/samples/bpf/tracex2_kern.c @@ -0,0 +1,86 @@ +/* Copyright (c) 2013-2015 PLUMgrid, http://plumgrid.com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include <linux/skbuff.h> +#include <linux/netdevice.h> +#include <linux/version.h> +#include <uapi/linux/bpf.h> +#include "bpf_helpers.h" + +struct bpf_map_def SEC("maps") my_map = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(long), + .value_size = sizeof(long), + .max_entries = 1024, +}; + +/* kprobe is NOT a stable ABI. If kernel internals change this bpf+kprobe + * example will no longer be meaningful + */ +SEC("kprobe/kfree_skb") +int bpf_prog2(struct pt_regs *ctx) +{ + long loc = 0; + long init_val = 1; + long *value; + + /* x64 specific: read ip of kfree_skb caller. + * non-portable version of __builtin_return_address(0) + */ + bpf_probe_read(&loc, sizeof(loc), (void *)ctx->sp); + + value = bpf_map_lookup_elem(&my_map, &loc); + if (value) + *value += 1; + else + bpf_map_update_elem(&my_map, &loc, &init_val, BPF_ANY); + return 0; +} + +static unsigned int log2(unsigned int v) +{ + unsigned int r; + unsigned int shift; + + r = (v > 0xFFFF) << 4; v >>= r; + shift = (v > 0xFF) << 3; v >>= shift; r |= shift; + shift = (v > 0xF) << 2; v >>= shift; r |= shift; + shift = (v > 0x3) << 1; v >>= shift; r |= shift; + r |= (v >> 1); + return r; +} + +static unsigned int log2l(unsigned long v) +{ + unsigned int hi = v >> 32; + if (hi) + return log2(hi) + 32; + else + return log2(v); +} + +struct bpf_map_def SEC("maps") my_hist_map = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(u32), + .value_size = sizeof(long), + .max_entries = 64, +}; + +SEC("kprobe/sys_write") +int bpf_prog3(struct pt_regs *ctx) +{ + long write_size = ctx->dx; /* arg3 */ + long init_val = 1; + long *value; + u32 index = log2l(write_size); + + value = bpf_map_lookup_elem(&my_hist_map, &index); + if (value) + __sync_fetch_and_add(value, 1); + return 0; +} +char _license[] SEC("license") = "GPL"; +u32 _version SEC("version") = LINUX_VERSION_CODE; diff --git a/kernel/samples/bpf/tracex2_user.c b/kernel/samples/bpf/tracex2_user.c new file mode 100644 index 000000000..91b8d0896 --- /dev/null +++ b/kernel/samples/bpf/tracex2_user.c @@ -0,0 +1,95 @@ +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include <signal.h> +#include <linux/bpf.h> +#include "libbpf.h" +#include "bpf_load.h" + +#define MAX_INDEX 64 +#define MAX_STARS 38 + +static void stars(char *str, long val, long max, int width) +{ + int i; + + for (i = 0; i < (width * val / max) - 1 && i < width - 1; i++) + str[i] = '*'; + if (val > max) + str[i - 1] = '+'; + str[i] = '\0'; +} + +static void print_hist(int fd) +{ + int key; + long value; + long data[MAX_INDEX] = {}; + char starstr[MAX_STARS]; + int i; + int max_ind = -1; + long max_value = 0; + + for (key = 0; key < MAX_INDEX; key++) { + bpf_lookup_elem(fd, &key, &value); + data[key] = value; + if (value && key > max_ind) + max_ind = key; + if (value > max_value) + max_value = value; + } + + printf(" syscall write() stats\n"); + printf(" byte_size : count distribution\n"); + for (i = 1; i <= max_ind + 1; i++) { + stars(starstr, data[i - 1], max_value, MAX_STARS); + printf("%8ld -> %-8ld : %-8ld |%-*s|\n", + (1l << i) >> 1, (1l << i) - 1, data[i - 1], + MAX_STARS, starstr); + } +} +static void int_exit(int sig) +{ + print_hist(map_fd[1]); + exit(0); +} + +int main(int ac, char **argv) +{ + char filename[256]; + long key, next_key, value; + FILE *f; + int i; + + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + + signal(SIGINT, int_exit); + + /* start 'ping' in the background to have some kfree_skb events */ + f = popen("ping -c5 localhost", "r"); + (void) f; + + /* start 'dd' in the background to have plenty of 'write' syscalls */ + f = popen("dd if=/dev/zero of=/dev/null count=5000000", "r"); + (void) f; + + if (load_bpf_file(filename)) { + printf("%s", bpf_log_buf); + return 1; + } + + for (i = 0; i < 5; i++) { + key = 0; + while (bpf_get_next_key(map_fd[0], &key, &next_key) == 0) { + bpf_lookup_elem(map_fd[0], &next_key, &value); + printf("location 0x%lx count %ld\n", next_key, value); + key = next_key; + } + if (key) + printf("\n"); + sleep(1); + } + print_hist(map_fd[1]); + + return 0; +} diff --git a/kernel/samples/bpf/tracex3_kern.c b/kernel/samples/bpf/tracex3_kern.c new file mode 100644 index 000000000..255ff2792 --- /dev/null +++ b/kernel/samples/bpf/tracex3_kern.c @@ -0,0 +1,89 @@ +/* Copyright (c) 2013-2015 PLUMgrid, http://plumgrid.com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include <linux/skbuff.h> +#include <linux/netdevice.h> +#include <linux/version.h> +#include <uapi/linux/bpf.h> +#include "bpf_helpers.h" + +struct bpf_map_def SEC("maps") my_map = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(long), + .value_size = sizeof(u64), + .max_entries = 4096, +}; + +/* kprobe is NOT a stable ABI. If kernel internals change this bpf+kprobe + * example will no longer be meaningful + */ +SEC("kprobe/blk_mq_start_request") +int bpf_prog1(struct pt_regs *ctx) +{ + long rq = ctx->di; + u64 val = bpf_ktime_get_ns(); + + bpf_map_update_elem(&my_map, &rq, &val, BPF_ANY); + return 0; +} + +static unsigned int log2l(unsigned long long n) +{ +#define S(k) if (n >= (1ull << k)) { i += k; n >>= k; } + int i = -(n == 0); + S(32); S(16); S(8); S(4); S(2); S(1); + return i; +#undef S +} + +#define SLOTS 100 + +struct bpf_map_def SEC("maps") lat_map = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(u32), + .value_size = sizeof(u64), + .max_entries = SLOTS, +}; + +SEC("kprobe/blk_update_request") +int bpf_prog2(struct pt_regs *ctx) +{ + long rq = ctx->di; + u64 *value, l, base; + u32 index; + + value = bpf_map_lookup_elem(&my_map, &rq); + if (!value) + return 0; + + u64 cur_time = bpf_ktime_get_ns(); + u64 delta = cur_time - *value; + + bpf_map_delete_elem(&my_map, &rq); + + /* the lines below are computing index = log10(delta)*10 + * using integer arithmetic + * index = 29 ~ 1 usec + * index = 59 ~ 1 msec + * index = 89 ~ 1 sec + * index = 99 ~ 10sec or more + * log10(x)*10 = log2(x)*10/log2(10) = log2(x)*3 + */ + l = log2l(delta); + base = 1ll << l; + index = (l * 64 + (delta - base) * 64 / base) * 3 / 64; + + if (index >= SLOTS) + index = SLOTS - 1; + + value = bpf_map_lookup_elem(&lat_map, &index); + if (value) + __sync_fetch_and_add((long *)value, 1); + + return 0; +} +char _license[] SEC("license") = "GPL"; +u32 _version SEC("version") = LINUX_VERSION_CODE; diff --git a/kernel/samples/bpf/tracex3_user.c b/kernel/samples/bpf/tracex3_user.c new file mode 100644 index 000000000..0aaa933ab --- /dev/null +++ b/kernel/samples/bpf/tracex3_user.c @@ -0,0 +1,150 @@ +/* Copyright (c) 2013-2015 PLUMgrid, http://plumgrid.com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include <stdio.h> +#include <stdlib.h> +#include <signal.h> +#include <unistd.h> +#include <stdbool.h> +#include <string.h> +#include <linux/bpf.h> +#include "libbpf.h" +#include "bpf_load.h" + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x))) + +#define SLOTS 100 + +static void clear_stats(int fd) +{ + __u32 key; + __u64 value = 0; + + for (key = 0; key < SLOTS; key++) + bpf_update_elem(fd, &key, &value, BPF_ANY); +} + +const char *color[] = { + "\033[48;5;255m", + "\033[48;5;252m", + "\033[48;5;250m", + "\033[48;5;248m", + "\033[48;5;246m", + "\033[48;5;244m", + "\033[48;5;242m", + "\033[48;5;240m", + "\033[48;5;238m", + "\033[48;5;236m", + "\033[48;5;234m", + "\033[48;5;232m", +}; +const int num_colors = ARRAY_SIZE(color); + +const char nocolor[] = "\033[00m"; + +const char *sym[] = { + " ", + " ", + ".", + ".", + "*", + "*", + "o", + "o", + "O", + "O", + "#", + "#", +}; + +bool full_range = false; +bool text_only = false; + +static void print_banner(void) +{ + if (full_range) + printf("|1ns |10ns |100ns |1us |10us |100us" + " |1ms |10ms |100ms |1s |10s\n"); + else + printf("|1us |10us |100us |1ms |10ms " + "|100ms |1s |10s\n"); +} + +static void print_hist(int fd) +{ + __u32 key; + __u64 value; + __u64 cnt[SLOTS]; + __u64 max_cnt = 0; + __u64 total_events = 0; + + for (key = 0; key < SLOTS; key++) { + value = 0; + bpf_lookup_elem(fd, &key, &value); + cnt[key] = value; + total_events += value; + if (value > max_cnt) + max_cnt = value; + } + clear_stats(fd); + for (key = full_range ? 0 : 29; key < SLOTS; key++) { + int c = num_colors * cnt[key] / (max_cnt + 1); + + if (text_only) + printf("%s", sym[c]); + else + printf("%s %s", color[c], nocolor); + } + printf(" # %lld\n", total_events); +} + +int main(int ac, char **argv) +{ + char filename[256]; + int i; + + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + + if (load_bpf_file(filename)) { + printf("%s", bpf_log_buf); + return 1; + } + + for (i = 1; i < ac; i++) { + if (strcmp(argv[i], "-a") == 0) { + full_range = true; + } else if (strcmp(argv[i], "-t") == 0) { + text_only = true; + } else if (strcmp(argv[i], "-h") == 0) { + printf("Usage:\n" + " -a display wider latency range\n" + " -t text only\n"); + return 1; + } + } + + printf(" heatmap of IO latency\n"); + if (text_only) + printf(" %s", sym[num_colors - 1]); + else + printf(" %s %s", color[num_colors - 1], nocolor); + printf(" - many events with this latency\n"); + + if (text_only) + printf(" %s", sym[0]); + else + printf(" %s %s", color[0], nocolor); + printf(" - few events\n"); + + for (i = 0; ; i++) { + if (i % 20 == 0) + print_banner(); + print_hist(map_fd[1]); + sleep(2); + } + + return 0; +} diff --git a/kernel/samples/bpf/tracex4_kern.c b/kernel/samples/bpf/tracex4_kern.c new file mode 100644 index 000000000..126b80512 --- /dev/null +++ b/kernel/samples/bpf/tracex4_kern.c @@ -0,0 +1,54 @@ +/* Copyright (c) 2015 PLUMgrid, http://plumgrid.com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include <linux/ptrace.h> +#include <linux/version.h> +#include <uapi/linux/bpf.h> +#include "bpf_helpers.h" + +struct pair { + u64 val; + u64 ip; +}; + +struct bpf_map_def SEC("maps") my_map = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(long), + .value_size = sizeof(struct pair), + .max_entries = 1000000, +}; + +/* kprobe is NOT a stable ABI. If kernel internals change this bpf+kprobe + * example will no longer be meaningful + */ +SEC("kprobe/kmem_cache_free") +int bpf_prog1(struct pt_regs *ctx) +{ + long ptr = ctx->si; + + bpf_map_delete_elem(&my_map, &ptr); + return 0; +} + +SEC("kretprobe/kmem_cache_alloc_node") +int bpf_prog2(struct pt_regs *ctx) +{ + long ptr = ctx->ax; + long ip = 0; + + /* get ip address of kmem_cache_alloc_node() caller */ + bpf_probe_read(&ip, sizeof(ip), (void *)(ctx->bp + sizeof(ip))); + + struct pair v = { + .val = bpf_ktime_get_ns(), + .ip = ip, + }; + + bpf_map_update_elem(&my_map, &ptr, &v, BPF_ANY); + return 0; +} +char _license[] SEC("license") = "GPL"; +u32 _version SEC("version") = LINUX_VERSION_CODE; diff --git a/kernel/samples/bpf/tracex4_user.c b/kernel/samples/bpf/tracex4_user.c new file mode 100644 index 000000000..bc4a3bdea --- /dev/null +++ b/kernel/samples/bpf/tracex4_user.c @@ -0,0 +1,69 @@ +/* Copyright (c) 2015 PLUMgrid, http://plumgrid.com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include <stdio.h> +#include <stdlib.h> +#include <signal.h> +#include <unistd.h> +#include <stdbool.h> +#include <string.h> +#include <time.h> +#include <linux/bpf.h> +#include "libbpf.h" +#include "bpf_load.h" + +struct pair { + long long val; + __u64 ip; +}; + +static __u64 time_get_ns(void) +{ + struct timespec ts; + + clock_gettime(CLOCK_MONOTONIC, &ts); + return ts.tv_sec * 1000000000ull + ts.tv_nsec; +} + +static void print_old_objects(int fd) +{ + long long val = time_get_ns(); + __u64 key, next_key; + struct pair v; + + key = write(1, "\e[1;1H\e[2J", 12); /* clear screen */ + + key = -1; + while (bpf_get_next_key(map_fd[0], &key, &next_key) == 0) { + bpf_lookup_elem(map_fd[0], &next_key, &v); + key = next_key; + if (val - v.val < 1000000000ll) + /* object was allocated more then 1 sec ago */ + continue; + printf("obj 0x%llx is %2lldsec old was allocated at ip %llx\n", + next_key, (val - v.val) / 1000000000ll, v.ip); + } +} + +int main(int ac, char **argv) +{ + char filename[256]; + int i; + + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + + if (load_bpf_file(filename)) { + printf("%s", bpf_log_buf); + return 1; + } + + for (i = 0; ; i++) { + print_old_objects(map_fd[1]); + sleep(1); + } + + return 0; +} diff --git a/kernel/samples/hidraw/.gitignore b/kernel/samples/hidraw/.gitignore new file mode 100644 index 000000000..05e51a685 --- /dev/null +++ b/kernel/samples/hidraw/.gitignore @@ -0,0 +1 @@ +hid-example diff --git a/kernel/samples/hidraw/Makefile b/kernel/samples/hidraw/Makefile new file mode 100644 index 000000000..a9ab96188 --- /dev/null +++ b/kernel/samples/hidraw/Makefile @@ -0,0 +1,12 @@ +# kbuild trick to avoid linker error. Can be omitted if a module is built. +obj- := dummy.o + +# List of programs to build +hostprogs-y := hid-example + +# Tell kbuild to always build the programs +always := $(hostprogs-y) + +HOSTCFLAGS_hid-example.o += -I$(objtree)/usr/include + +all: hid-example diff --git a/kernel/samples/hidraw/hid-example.c b/kernel/samples/hidraw/hid-example.c new file mode 100644 index 000000000..92e6c1511 --- /dev/null +++ b/kernel/samples/hidraw/hid-example.c @@ -0,0 +1,181 @@ +/* + * Hidraw Userspace Example + * + * Copyright (c) 2010 Alan Ott <alan@signal11.us> + * Copyright (c) 2010 Signal 11 Software + * + * The code may be used by anyone for any purpose, + * and can serve as a starting point for developing + * applications using hidraw. + */ + +/* Linux */ +#include <linux/types.h> +#include <linux/input.h> +#include <linux/hidraw.h> + +/* + * Ugly hack to work around failing compilation on systems that don't + * yet populate new version of hidraw.h to userspace. + */ +#ifndef HIDIOCSFEATURE +#warning Please have your distro update the userspace kernel headers +#define HIDIOCSFEATURE(len) _IOC(_IOC_WRITE|_IOC_READ, 'H', 0x06, len) +#define HIDIOCGFEATURE(len) _IOC(_IOC_WRITE|_IOC_READ, 'H', 0x07, len) +#endif + +/* Unix */ +#include <sys/ioctl.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> + +/* C */ +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> + +const char *bus_str(int bus); + +int main(int argc, char **argv) +{ + int fd; + int i, res, desc_size = 0; + char buf[256]; + struct hidraw_report_descriptor rpt_desc; + struct hidraw_devinfo info; + char *device = "/dev/hidraw0"; + + if (argc > 1) + device = argv[1]; + + /* Open the Device with non-blocking reads. In real life, + don't use a hard coded path; use libudev instead. */ + fd = open(device, O_RDWR|O_NONBLOCK); + + if (fd < 0) { + perror("Unable to open device"); + return 1; + } + + memset(&rpt_desc, 0x0, sizeof(rpt_desc)); + memset(&info, 0x0, sizeof(info)); + memset(buf, 0x0, sizeof(buf)); + + /* Get Report Descriptor Size */ + res = ioctl(fd, HIDIOCGRDESCSIZE, &desc_size); + if (res < 0) + perror("HIDIOCGRDESCSIZE"); + else + printf("Report Descriptor Size: %d\n", desc_size); + + /* Get Report Descriptor */ + rpt_desc.size = desc_size; + res = ioctl(fd, HIDIOCGRDESC, &rpt_desc); + if (res < 0) { + perror("HIDIOCGRDESC"); + } else { + printf("Report Descriptor:\n"); + for (i = 0; i < rpt_desc.size; i++) + printf("%hhx ", rpt_desc.value[i]); + puts("\n"); + } + + /* Get Raw Name */ + res = ioctl(fd, HIDIOCGRAWNAME(256), buf); + if (res < 0) + perror("HIDIOCGRAWNAME"); + else + printf("Raw Name: %s\n", buf); + + /* Get Physical Location */ + res = ioctl(fd, HIDIOCGRAWPHYS(256), buf); + if (res < 0) + perror("HIDIOCGRAWPHYS"); + else + printf("Raw Phys: %s\n", buf); + + /* Get Raw Info */ + res = ioctl(fd, HIDIOCGRAWINFO, &info); + if (res < 0) { + perror("HIDIOCGRAWINFO"); + } else { + printf("Raw Info:\n"); + printf("\tbustype: %d (%s)\n", + info.bustype, bus_str(info.bustype)); + printf("\tvendor: 0x%04hx\n", info.vendor); + printf("\tproduct: 0x%04hx\n", info.product); + } + + /* Set Feature */ + buf[0] = 0x9; /* Report Number */ + buf[1] = 0xff; + buf[2] = 0xff; + buf[3] = 0xff; + res = ioctl(fd, HIDIOCSFEATURE(4), buf); + if (res < 0) + perror("HIDIOCSFEATURE"); + else + printf("ioctl HIDIOCGFEATURE returned: %d\n", res); + + /* Get Feature */ + buf[0] = 0x9; /* Report Number */ + res = ioctl(fd, HIDIOCGFEATURE(256), buf); + if (res < 0) { + perror("HIDIOCGFEATURE"); + } else { + printf("ioctl HIDIOCGFEATURE returned: %d\n", res); + printf("Report data (not containing the report number):\n\t"); + for (i = 0; i < res; i++) + printf("%hhx ", buf[i]); + puts("\n"); + } + + /* Send a Report to the Device */ + buf[0] = 0x1; /* Report Number */ + buf[1] = 0x77; + res = write(fd, buf, 2); + if (res < 0) { + printf("Error: %d\n", errno); + perror("write"); + } else { + printf("write() wrote %d bytes\n", res); + } + + /* Get a report from the device */ + res = read(fd, buf, 16); + if (res < 0) { + perror("read"); + } else { + printf("read() read %d bytes:\n\t", res); + for (i = 0; i < res; i++) + printf("%hhx ", buf[i]); + puts("\n"); + } + close(fd); + return 0; +} + +const char * +bus_str(int bus) +{ + switch (bus) { + case BUS_USB: + return "USB"; + break; + case BUS_HIL: + return "HIL"; + break; + case BUS_BLUETOOTH: + return "Bluetooth"; + break; + case BUS_VIRTUAL: + return "Virtual"; + break; + default: + return "Other"; + break; + } +} diff --git a/kernel/samples/hw_breakpoint/Makefile b/kernel/samples/hw_breakpoint/Makefile new file mode 100644 index 000000000..0f5c31c2f --- /dev/null +++ b/kernel/samples/hw_breakpoint/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_SAMPLE_HW_BREAKPOINT) += data_breakpoint.o diff --git a/kernel/samples/hw_breakpoint/data_breakpoint.c b/kernel/samples/hw_breakpoint/data_breakpoint.c new file mode 100644 index 000000000..ef7f32291 --- /dev/null +++ b/kernel/samples/hw_breakpoint/data_breakpoint.c @@ -0,0 +1,90 @@ +/* + * data_breakpoint.c - Sample HW Breakpoint file to watch kernel data address + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * usage: insmod data_breakpoint.ko ksym=<ksym_name> + * + * This file is a kernel module that places a breakpoint over ksym_name kernel + * variable using Hardware Breakpoint register. The corresponding handler which + * prints a backtrace is invoked every time a write operation is performed on + * that variable. + * + * Copyright (C) IBM Corporation, 2009 + * + * Author: K.Prasad <prasad@linux.vnet.ibm.com> + */ +#include <linux/module.h> /* Needed by all modules */ +#include <linux/kernel.h> /* Needed for KERN_INFO */ +#include <linux/init.h> /* Needed for the macros */ +#include <linux/kallsyms.h> + +#include <linux/perf_event.h> +#include <linux/hw_breakpoint.h> + +struct perf_event * __percpu *sample_hbp; + +static char ksym_name[KSYM_NAME_LEN] = "pid_max"; +module_param_string(ksym, ksym_name, KSYM_NAME_LEN, S_IRUGO); +MODULE_PARM_DESC(ksym, "Kernel symbol to monitor; this module will report any" + " write operations on the kernel symbol"); + +static void sample_hbp_handler(struct perf_event *bp, + struct perf_sample_data *data, + struct pt_regs *regs) +{ + printk(KERN_INFO "%s value is changed\n", ksym_name); + dump_stack(); + printk(KERN_INFO "Dump stack from sample_hbp_handler\n"); +} + +static int __init hw_break_module_init(void) +{ + int ret; + struct perf_event_attr attr; + + hw_breakpoint_init(&attr); + attr.bp_addr = kallsyms_lookup_name(ksym_name); + attr.bp_len = HW_BREAKPOINT_LEN_4; + attr.bp_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R; + + sample_hbp = register_wide_hw_breakpoint(&attr, sample_hbp_handler, NULL); + if (IS_ERR((void __force *)sample_hbp)) { + ret = PTR_ERR((void __force *)sample_hbp); + goto fail; + } + + printk(KERN_INFO "HW Breakpoint for %s write installed\n", ksym_name); + + return 0; + +fail: + printk(KERN_INFO "Breakpoint registration failed\n"); + + return ret; +} + +static void __exit hw_break_module_exit(void) +{ + unregister_wide_hw_breakpoint(sample_hbp); + printk(KERN_INFO "HW Breakpoint for %s write uninstalled\n", ksym_name); +} + +module_init(hw_break_module_init); +module_exit(hw_break_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("K.Prasad"); +MODULE_DESCRIPTION("ksym breakpoint"); diff --git a/kernel/samples/kdb/Makefile b/kernel/samples/kdb/Makefile new file mode 100644 index 000000000..fbedf39d9 --- /dev/null +++ b/kernel/samples/kdb/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_SAMPLE_KDB) += kdb_hello.o diff --git a/kernel/samples/kdb/kdb_hello.c b/kernel/samples/kdb/kdb_hello.c new file mode 100644 index 000000000..c1c2fa0f6 --- /dev/null +++ b/kernel/samples/kdb/kdb_hello.c @@ -0,0 +1,60 @@ +/* + * Created by: Jason Wessel <jason.wessel@windriver.com> + * + * Copyright (c) 2010 Wind River Systems, Inc. All Rights Reserved. + * + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + */ + +#include <linux/module.h> +#include <linux/kdb.h> + +/* + * All kdb shell command call backs receive argc and argv, where + * argv[0] is the command the end user typed + */ +static int kdb_hello_cmd(int argc, const char **argv) +{ + if (argc > 1) + return KDB_ARGCOUNT; + + if (argc) + kdb_printf("Hello %s.\n", argv[1]); + else + kdb_printf("Hello world!\n"); + + return 0; +} + + +static int __init kdb_hello_cmd_init(void) +{ + /* + * Registration of a dynamically added kdb command is done with + * kdb_register() with the arguments being: + * 1: The name of the shell command + * 2: The function that processes the command + * 3: Description of the usage of any arguments + * 4: Descriptive text when you run help + * 5: Number of characters to complete the command + * 0 == type the whole command + * 1 == match both "g" and "go" for example + */ + kdb_register("hello", kdb_hello_cmd, "[string]", + "Say Hello World or Hello [string]", 0); + return 0; +} + +static void __exit kdb_hello_cmd_exit(void) +{ + kdb_unregister("hello"); +} + +module_init(kdb_hello_cmd_init); +module_exit(kdb_hello_cmd_exit); + +MODULE_AUTHOR("WindRiver"); +MODULE_DESCRIPTION("KDB example to add a hello command"); +MODULE_LICENSE("GPL"); diff --git a/kernel/samples/kfifo/Makefile b/kernel/samples/kfifo/Makefile new file mode 100644 index 000000000..bcc9484a1 --- /dev/null +++ b/kernel/samples/kfifo/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_SAMPLE_KFIFO) += bytestream-example.o dma-example.o inttype-example.o record-example.o diff --git a/kernel/samples/kfifo/bytestream-example.c b/kernel/samples/kfifo/bytestream-example.c new file mode 100644 index 000000000..2fca916d9 --- /dev/null +++ b/kernel/samples/kfifo/bytestream-example.c @@ -0,0 +1,194 @@ +/* + * Sample kfifo byte stream implementation + * + * Copyright (C) 2010 Stefani Seibold <stefani@seibold.net> + * + * Released under the GPL version 2 only. + * + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/proc_fs.h> +#include <linux/mutex.h> +#include <linux/kfifo.h> + +/* + * This module shows how to create a byte stream fifo. + */ + +/* fifo size in elements (bytes) */ +#define FIFO_SIZE 32 + +/* name of the proc entry */ +#define PROC_FIFO "bytestream-fifo" + +/* lock for procfs read access */ +static DEFINE_MUTEX(read_lock); + +/* lock for procfs write access */ +static DEFINE_MUTEX(write_lock); + +/* + * define DYNAMIC in this example for a dynamically allocated fifo. + * + * Otherwise the fifo storage will be a part of the fifo structure. + */ +#if 0 +#define DYNAMIC +#endif + +#ifdef DYNAMIC +static struct kfifo test; +#else +static DECLARE_KFIFO(test, unsigned char, FIFO_SIZE); +#endif + +static const unsigned char expected_result[FIFO_SIZE] = { + 3, 4, 5, 6, 7, 8, 9, 0, + 1, 20, 21, 22, 23, 24, 25, 26, + 27, 28, 29, 30, 31, 32, 33, 34, + 35, 36, 37, 38, 39, 40, 41, 42, +}; + +static int __init testfunc(void) +{ + unsigned char buf[6]; + unsigned char i, j; + unsigned int ret; + + printk(KERN_INFO "byte stream fifo test start\n"); + + /* put string into the fifo */ + kfifo_in(&test, "hello", 5); + + /* put values into the fifo */ + for (i = 0; i != 10; i++) + kfifo_put(&test, i); + + /* show the number of used elements */ + printk(KERN_INFO "fifo len: %u\n", kfifo_len(&test)); + + /* get max of 5 bytes from the fifo */ + i = kfifo_out(&test, buf, 5); + printk(KERN_INFO "buf: %.*s\n", i, buf); + + /* get max of 2 elements from the fifo */ + ret = kfifo_out(&test, buf, 2); + printk(KERN_INFO "ret: %d\n", ret); + /* and put it back to the end of the fifo */ + ret = kfifo_in(&test, buf, ret); + printk(KERN_INFO "ret: %d\n", ret); + + /* skip first element of the fifo */ + printk(KERN_INFO "skip 1st element\n"); + kfifo_skip(&test); + + /* put values into the fifo until is full */ + for (i = 20; kfifo_put(&test, i); i++) + ; + + printk(KERN_INFO "queue len: %u\n", kfifo_len(&test)); + + /* show the first value without removing from the fifo */ + if (kfifo_peek(&test, &i)) + printk(KERN_INFO "%d\n", i); + + /* check the correctness of all values in the fifo */ + j = 0; + while (kfifo_get(&test, &i)) { + printk(KERN_INFO "item = %d\n", i); + if (i != expected_result[j++]) { + printk(KERN_WARNING "value mismatch: test failed\n"); + return -EIO; + } + } + if (j != ARRAY_SIZE(expected_result)) { + printk(KERN_WARNING "size mismatch: test failed\n"); + return -EIO; + } + printk(KERN_INFO "test passed\n"); + + return 0; +} + +static ssize_t fifo_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + int ret; + unsigned int copied; + + if (mutex_lock_interruptible(&write_lock)) + return -ERESTARTSYS; + + ret = kfifo_from_user(&test, buf, count, &copied); + + mutex_unlock(&write_lock); + + return ret ? ret : copied; +} + +static ssize_t fifo_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + int ret; + unsigned int copied; + + if (mutex_lock_interruptible(&read_lock)) + return -ERESTARTSYS; + + ret = kfifo_to_user(&test, buf, count, &copied); + + mutex_unlock(&read_lock); + + return ret ? ret : copied; +} + +static const struct file_operations fifo_fops = { + .owner = THIS_MODULE, + .read = fifo_read, + .write = fifo_write, + .llseek = noop_llseek, +}; + +static int __init example_init(void) +{ +#ifdef DYNAMIC + int ret; + + ret = kfifo_alloc(&test, FIFO_SIZE, GFP_KERNEL); + if (ret) { + printk(KERN_ERR "error kfifo_alloc\n"); + return ret; + } +#else + INIT_KFIFO(test); +#endif + if (testfunc() < 0) { +#ifdef DYNAMIC + kfifo_free(&test); +#endif + return -EIO; + } + + if (proc_create(PROC_FIFO, 0, NULL, &fifo_fops) == NULL) { +#ifdef DYNAMIC + kfifo_free(&test); +#endif + return -ENOMEM; + } + return 0; +} + +static void __exit example_exit(void) +{ + remove_proc_entry(PROC_FIFO, NULL); +#ifdef DYNAMIC + kfifo_free(&test); +#endif +} + +module_init(example_init); +module_exit(example_exit); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Stefani Seibold <stefani@seibold.net>"); diff --git a/kernel/samples/kfifo/dma-example.c b/kernel/samples/kfifo/dma-example.c new file mode 100644 index 000000000..aa243db93 --- /dev/null +++ b/kernel/samples/kfifo/dma-example.c @@ -0,0 +1,143 @@ +/* + * Sample fifo dma implementation + * + * Copyright (C) 2010 Stefani Seibold <stefani@seibold.net> + * + * Released under the GPL version 2 only. + * + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/kfifo.h> + +/* + * This module shows how to handle fifo dma operations. + */ + +/* fifo size in elements (bytes) */ +#define FIFO_SIZE 32 + +static struct kfifo fifo; + +static int __init example_init(void) +{ + int i; + unsigned int ret; + unsigned int nents; + struct scatterlist sg[10]; + + printk(KERN_INFO "DMA fifo test start\n"); + + if (kfifo_alloc(&fifo, FIFO_SIZE, GFP_KERNEL)) { + printk(KERN_WARNING "error kfifo_alloc\n"); + return -ENOMEM; + } + + printk(KERN_INFO "queue size: %u\n", kfifo_size(&fifo)); + + kfifo_in(&fifo, "test", 4); + + for (i = 0; i != 9; i++) + kfifo_put(&fifo, i); + + /* kick away first byte */ + kfifo_skip(&fifo); + + printk(KERN_INFO "queue len: %u\n", kfifo_len(&fifo)); + + /* + * Configure the kfifo buffer to receive data from DMA input. + * + * .--------------------------------------. + * | 0 | 1 | 2 | ... | 12 | 13 | ... | 31 | + * |---|------------------|---------------| + * \_/ \________________/ \_____________/ + * \ \ \ + * \ \_allocated data \ + * \_*free space* \_*free space* + * + * We need two different SG entries: one for the free space area at the + * end of the kfifo buffer (19 bytes) and another for the first free + * byte at the beginning, after the kfifo_skip(). + */ + sg_init_table(sg, ARRAY_SIZE(sg)); + nents = kfifo_dma_in_prepare(&fifo, sg, ARRAY_SIZE(sg), FIFO_SIZE); + printk(KERN_INFO "DMA sgl entries: %d\n", nents); + if (!nents) { + /* fifo is full and no sgl was created */ + printk(KERN_WARNING "error kfifo_dma_in_prepare\n"); + return -EIO; + } + + /* receive data */ + printk(KERN_INFO "scatterlist for receive:\n"); + for (i = 0; i < nents; i++) { + printk(KERN_INFO + "sg[%d] -> " + "page_link 0x%.8lx offset 0x%.8x length 0x%.8x\n", + i, sg[i].page_link, sg[i].offset, sg[i].length); + + if (sg_is_last(&sg[i])) + break; + } + + /* put here your code to setup and exectute the dma operation */ + /* ... */ + + /* example: zero bytes received */ + ret = 0; + + /* finish the dma operation and update the received data */ + kfifo_dma_in_finish(&fifo, ret); + + /* Prepare to transmit data, example: 8 bytes */ + nents = kfifo_dma_out_prepare(&fifo, sg, ARRAY_SIZE(sg), 8); + printk(KERN_INFO "DMA sgl entries: %d\n", nents); + if (!nents) { + /* no data was available and no sgl was created */ + printk(KERN_WARNING "error kfifo_dma_out_prepare\n"); + return -EIO; + } + + printk(KERN_INFO "scatterlist for transmit:\n"); + for (i = 0; i < nents; i++) { + printk(KERN_INFO + "sg[%d] -> " + "page_link 0x%.8lx offset 0x%.8x length 0x%.8x\n", + i, sg[i].page_link, sg[i].offset, sg[i].length); + + if (sg_is_last(&sg[i])) + break; + } + + /* put here your code to setup and exectute the dma operation */ + /* ... */ + + /* example: 5 bytes transmitted */ + ret = 5; + + /* finish the dma operation and update the transmitted data */ + kfifo_dma_out_finish(&fifo, ret); + + ret = kfifo_len(&fifo); + printk(KERN_INFO "queue len: %u\n", kfifo_len(&fifo)); + + if (ret != 7) { + printk(KERN_WARNING "size mismatch: test failed"); + return -EIO; + } + printk(KERN_INFO "test passed\n"); + + return 0; +} + +static void __exit example_exit(void) +{ + kfifo_free(&fifo); +} + +module_init(example_init); +module_exit(example_exit); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Stefani Seibold <stefani@seibold.net>"); diff --git a/kernel/samples/kfifo/inttype-example.c b/kernel/samples/kfifo/inttype-example.c new file mode 100644 index 000000000..8dc3c2e71 --- /dev/null +++ b/kernel/samples/kfifo/inttype-example.c @@ -0,0 +1,185 @@ +/* + * Sample kfifo int type implementation + * + * Copyright (C) 2010 Stefani Seibold <stefani@seibold.net> + * + * Released under the GPL version 2 only. + * + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/proc_fs.h> +#include <linux/mutex.h> +#include <linux/kfifo.h> + +/* + * This module shows how to create a int type fifo. + */ + +/* fifo size in elements (ints) */ +#define FIFO_SIZE 32 + +/* name of the proc entry */ +#define PROC_FIFO "int-fifo" + +/* lock for procfs read access */ +static DEFINE_MUTEX(read_lock); + +/* lock for procfs write access */ +static DEFINE_MUTEX(write_lock); + +/* + * define DYNAMIC in this example for a dynamically allocated fifo. + * + * Otherwise the fifo storage will be a part of the fifo structure. + */ +#if 0 +#define DYNAMIC +#endif + +#ifdef DYNAMIC +static DECLARE_KFIFO_PTR(test, int); +#else +static DEFINE_KFIFO(test, int, FIFO_SIZE); +#endif + +static const int expected_result[FIFO_SIZE] = { + 3, 4, 5, 6, 7, 8, 9, 0, + 1, 20, 21, 22, 23, 24, 25, 26, + 27, 28, 29, 30, 31, 32, 33, 34, + 35, 36, 37, 38, 39, 40, 41, 42, +}; + +static int __init testfunc(void) +{ + int buf[6]; + int i, j; + unsigned int ret; + + printk(KERN_INFO "int fifo test start\n"); + + /* put values into the fifo */ + for (i = 0; i != 10; i++) + kfifo_put(&test, i); + + /* show the number of used elements */ + printk(KERN_INFO "fifo len: %u\n", kfifo_len(&test)); + + /* get max of 2 elements from the fifo */ + ret = kfifo_out(&test, buf, 2); + printk(KERN_INFO "ret: %d\n", ret); + /* and put it back to the end of the fifo */ + ret = kfifo_in(&test, buf, ret); + printk(KERN_INFO "ret: %d\n", ret); + + /* skip first element of the fifo */ + printk(KERN_INFO "skip 1st element\n"); + kfifo_skip(&test); + + /* put values into the fifo until is full */ + for (i = 20; kfifo_put(&test, i); i++) + ; + + printk(KERN_INFO "queue len: %u\n", kfifo_len(&test)); + + /* show the first value without removing from the fifo */ + if (kfifo_peek(&test, &i)) + printk(KERN_INFO "%d\n", i); + + /* check the correctness of all values in the fifo */ + j = 0; + while (kfifo_get(&test, &i)) { + printk(KERN_INFO "item = %d\n", i); + if (i != expected_result[j++]) { + printk(KERN_WARNING "value mismatch: test failed\n"); + return -EIO; + } + } + if (j != ARRAY_SIZE(expected_result)) { + printk(KERN_WARNING "size mismatch: test failed\n"); + return -EIO; + } + printk(KERN_INFO "test passed\n"); + + return 0; +} + +static ssize_t fifo_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + int ret; + unsigned int copied; + + if (mutex_lock_interruptible(&write_lock)) + return -ERESTARTSYS; + + ret = kfifo_from_user(&test, buf, count, &copied); + + mutex_unlock(&write_lock); + + return ret ? ret : copied; +} + +static ssize_t fifo_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + int ret; + unsigned int copied; + + if (mutex_lock_interruptible(&read_lock)) + return -ERESTARTSYS; + + ret = kfifo_to_user(&test, buf, count, &copied); + + mutex_unlock(&read_lock); + + return ret ? ret : copied; +} + +static const struct file_operations fifo_fops = { + .owner = THIS_MODULE, + .read = fifo_read, + .write = fifo_write, + .llseek = noop_llseek, +}; + +static int __init example_init(void) +{ +#ifdef DYNAMIC + int ret; + + ret = kfifo_alloc(&test, FIFO_SIZE, GFP_KERNEL); + if (ret) { + printk(KERN_ERR "error kfifo_alloc\n"); + return ret; + } +#endif + if (testfunc() < 0) { +#ifdef DYNAMIC + kfifo_free(&test); +#endif + return -EIO; + } + + if (proc_create(PROC_FIFO, 0, NULL, &fifo_fops) == NULL) { +#ifdef DYNAMIC + kfifo_free(&test); +#endif + return -ENOMEM; + } + return 0; +} + +static void __exit example_exit(void) +{ + remove_proc_entry(PROC_FIFO, NULL); +#ifdef DYNAMIC + kfifo_free(&test); +#endif +} + +module_init(example_init); +module_exit(example_exit); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Stefani Seibold <stefani@seibold.net>"); diff --git a/kernel/samples/kfifo/record-example.c b/kernel/samples/kfifo/record-example.c new file mode 100644 index 000000000..2d7529eeb --- /dev/null +++ b/kernel/samples/kfifo/record-example.c @@ -0,0 +1,201 @@ +/* + * Sample dynamic sized record fifo implementation + * + * Copyright (C) 2010 Stefani Seibold <stefani@seibold.net> + * + * Released under the GPL version 2 only. + * + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/proc_fs.h> +#include <linux/mutex.h> +#include <linux/kfifo.h> + +/* + * This module shows how to create a variable sized record fifo. + */ + +/* fifo size in elements (bytes) */ +#define FIFO_SIZE 128 + +/* name of the proc entry */ +#define PROC_FIFO "record-fifo" + +/* lock for procfs read access */ +static DEFINE_MUTEX(read_lock); + +/* lock for procfs write access */ +static DEFINE_MUTEX(write_lock); + +/* + * define DYNAMIC in this example for a dynamically allocated fifo. + * + * Otherwise the fifo storage will be a part of the fifo structure. + */ +#if 0 +#define DYNAMIC +#endif + +/* + * struct kfifo_rec_ptr_1 and STRUCT_KFIFO_REC_1 can handle records of a + * length between 0 and 255 bytes. + * + * struct kfifo_rec_ptr_2 and STRUCT_KFIFO_REC_2 can handle records of a + * length between 0 and 65535 bytes. + */ + +#ifdef DYNAMIC +struct kfifo_rec_ptr_1 test; + +#else +typedef STRUCT_KFIFO_REC_1(FIFO_SIZE) mytest; + +static mytest test; +#endif + +static const char *expected_result[] = { + "a", + "bb", + "ccc", + "dddd", + "eeeee", + "ffffff", + "ggggggg", + "hhhhhhhh", + "iiiiiiiii", + "jjjjjjjjjj", +}; + +static int __init testfunc(void) +{ + char buf[100]; + unsigned int i; + unsigned int ret; + struct { unsigned char buf[6]; } hello = { "hello" }; + + printk(KERN_INFO "record fifo test start\n"); + + kfifo_in(&test, &hello, sizeof(hello)); + + /* show the size of the next record in the fifo */ + printk(KERN_INFO "fifo peek len: %u\n", kfifo_peek_len(&test)); + + /* put in variable length data */ + for (i = 0; i < 10; i++) { + memset(buf, 'a' + i, i + 1); + kfifo_in(&test, buf, i + 1); + } + + /* skip first element of the fifo */ + printk(KERN_INFO "skip 1st element\n"); + kfifo_skip(&test); + + printk(KERN_INFO "fifo len: %u\n", kfifo_len(&test)); + + /* show the first record without removing from the fifo */ + ret = kfifo_out_peek(&test, buf, sizeof(buf)); + if (ret) + printk(KERN_INFO "%.*s\n", ret, buf); + + /* check the correctness of all values in the fifo */ + i = 0; + while (!kfifo_is_empty(&test)) { + ret = kfifo_out(&test, buf, sizeof(buf)); + buf[ret] = '\0'; + printk(KERN_INFO "item = %.*s\n", ret, buf); + if (strcmp(buf, expected_result[i++])) { + printk(KERN_WARNING "value mismatch: test failed\n"); + return -EIO; + } + } + if (i != ARRAY_SIZE(expected_result)) { + printk(KERN_WARNING "size mismatch: test failed\n"); + return -EIO; + } + printk(KERN_INFO "test passed\n"); + + return 0; +} + +static ssize_t fifo_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + int ret; + unsigned int copied; + + if (mutex_lock_interruptible(&write_lock)) + return -ERESTARTSYS; + + ret = kfifo_from_user(&test, buf, count, &copied); + + mutex_unlock(&write_lock); + + return ret ? ret : copied; +} + +static ssize_t fifo_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + int ret; + unsigned int copied; + + if (mutex_lock_interruptible(&read_lock)) + return -ERESTARTSYS; + + ret = kfifo_to_user(&test, buf, count, &copied); + + mutex_unlock(&read_lock); + + return ret ? ret : copied; +} + +static const struct file_operations fifo_fops = { + .owner = THIS_MODULE, + .read = fifo_read, + .write = fifo_write, + .llseek = noop_llseek, +}; + +static int __init example_init(void) +{ +#ifdef DYNAMIC + int ret; + + ret = kfifo_alloc(&test, FIFO_SIZE, GFP_KERNEL); + if (ret) { + printk(KERN_ERR "error kfifo_alloc\n"); + return ret; + } +#else + INIT_KFIFO(test); +#endif + if (testfunc() < 0) { +#ifdef DYNAMIC + kfifo_free(&test); +#endif + return -EIO; + } + + if (proc_create(PROC_FIFO, 0, NULL, &fifo_fops) == NULL) { +#ifdef DYNAMIC + kfifo_free(&test); +#endif + return -ENOMEM; + } + return 0; +} + +static void __exit example_exit(void) +{ + remove_proc_entry(PROC_FIFO, NULL); +#ifdef DYNAMIC + kfifo_free(&test); +#endif +} + +module_init(example_init); +module_exit(example_exit); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Stefani Seibold <stefani@seibold.net>"); diff --git a/kernel/samples/kobject/Makefile b/kernel/samples/kobject/Makefile new file mode 100644 index 000000000..4a194203c --- /dev/null +++ b/kernel/samples/kobject/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_SAMPLE_KOBJECT) += kobject-example.o kset-example.o diff --git a/kernel/samples/kobject/kobject-example.c b/kernel/samples/kobject/kobject-example.c new file mode 100644 index 000000000..2e0740f06 --- /dev/null +++ b/kernel/samples/kobject/kobject-example.c @@ -0,0 +1,146 @@ +/* + * Sample kobject implementation + * + * Copyright (C) 2004-2007 Greg Kroah-Hartman <greg@kroah.com> + * Copyright (C) 2007 Novell Inc. + * + * Released under the GPL version 2 only. + * + */ +#include <linux/kobject.h> +#include <linux/string.h> +#include <linux/sysfs.h> +#include <linux/module.h> +#include <linux/init.h> + +/* + * This module shows how to create a simple subdirectory in sysfs called + * /sys/kernel/kobject-example In that directory, 3 files are created: + * "foo", "baz", and "bar". If an integer is written to these files, it can be + * later read out of it. + */ + +static int foo; +static int baz; +static int bar; + +/* + * The "foo" file where a static variable is read from and written to. + */ +static ssize_t foo_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + return sprintf(buf, "%d\n", foo); +} + +static ssize_t foo_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + int ret; + + ret = kstrtoint(buf, 10, &foo); + if (ret < 0) + return ret; + + return count; +} + +/* Sysfs attributes cannot be world-writable. */ +static struct kobj_attribute foo_attribute = + __ATTR(foo, 0664, foo_show, foo_store); + +/* + * More complex function where we determine which variable is being accessed by + * looking at the attribute for the "baz" and "bar" files. + */ +static ssize_t b_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + int var; + + if (strcmp(attr->attr.name, "baz") == 0) + var = baz; + else + var = bar; + return sprintf(buf, "%d\n", var); +} + +static ssize_t b_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + int var, ret; + + ret = kstrtoint(buf, 10, &var); + if (ret < 0) + return ret; + + if (strcmp(attr->attr.name, "baz") == 0) + baz = var; + else + bar = var; + return count; +} + +static struct kobj_attribute baz_attribute = + __ATTR(baz, 0664, b_show, b_store); +static struct kobj_attribute bar_attribute = + __ATTR(bar, 0664, b_show, b_store); + + +/* + * Create a group of attributes so that we can create and destroy them all + * at once. + */ +static struct attribute *attrs[] = { + &foo_attribute.attr, + &baz_attribute.attr, + &bar_attribute.attr, + NULL, /* need to NULL terminate the list of attributes */ +}; + +/* + * An unnamed attribute group will put all of the attributes directly in + * the kobject directory. If we specify a name, a subdirectory will be + * created for the attributes with the directory being the name of the + * attribute group. + */ +static struct attribute_group attr_group = { + .attrs = attrs, +}; + +static struct kobject *example_kobj; + +static int __init example_init(void) +{ + int retval; + + /* + * Create a simple kobject with the name of "kobject_example", + * located under /sys/kernel/ + * + * As this is a simple directory, no uevent will be sent to + * userspace. That is why this function should not be used for + * any type of dynamic kobjects, where the name and number are + * not known ahead of time. + */ + example_kobj = kobject_create_and_add("kobject_example", kernel_kobj); + if (!example_kobj) + return -ENOMEM; + + /* Create the files associated with this kobject */ + retval = sysfs_create_group(example_kobj, &attr_group); + if (retval) + kobject_put(example_kobj); + + return retval; +} + +static void __exit example_exit(void) +{ + kobject_put(example_kobj); +} + +module_init(example_init); +module_exit(example_exit); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Greg Kroah-Hartman <greg@kroah.com>"); diff --git a/kernel/samples/kobject/kset-example.c b/kernel/samples/kobject/kset-example.c new file mode 100644 index 000000000..a55bff52b --- /dev/null +++ b/kernel/samples/kobject/kset-example.c @@ -0,0 +1,289 @@ +/* + * Sample kset and ktype implementation + * + * Copyright (C) 2004-2007 Greg Kroah-Hartman <greg@kroah.com> + * Copyright (C) 2007 Novell Inc. + * + * Released under the GPL version 2 only. + * + */ +#include <linux/kobject.h> +#include <linux/string.h> +#include <linux/sysfs.h> +#include <linux/slab.h> +#include <linux/module.h> +#include <linux/init.h> + +/* + * This module shows how to create a kset in sysfs called + * /sys/kernel/kset-example + * Then tree kobjects are created and assigned to this kset, "foo", "baz", + * and "bar". In those kobjects, attributes of the same name are also + * created and if an integer is written to these files, it can be later + * read out of it. + */ + + +/* + * This is our "object" that we will create a few of and register them with + * sysfs. + */ +struct foo_obj { + struct kobject kobj; + int foo; + int baz; + int bar; +}; +#define to_foo_obj(x) container_of(x, struct foo_obj, kobj) + +/* a custom attribute that works just for a struct foo_obj. */ +struct foo_attribute { + struct attribute attr; + ssize_t (*show)(struct foo_obj *foo, struct foo_attribute *attr, char *buf); + ssize_t (*store)(struct foo_obj *foo, struct foo_attribute *attr, const char *buf, size_t count); +}; +#define to_foo_attr(x) container_of(x, struct foo_attribute, attr) + +/* + * The default show function that must be passed to sysfs. This will be + * called by sysfs for whenever a show function is called by the user on a + * sysfs file associated with the kobjects we have registered. We need to + * transpose back from a "default" kobject to our custom struct foo_obj and + * then call the show function for that specific object. + */ +static ssize_t foo_attr_show(struct kobject *kobj, + struct attribute *attr, + char *buf) +{ + struct foo_attribute *attribute; + struct foo_obj *foo; + + attribute = to_foo_attr(attr); + foo = to_foo_obj(kobj); + + if (!attribute->show) + return -EIO; + + return attribute->show(foo, attribute, buf); +} + +/* + * Just like the default show function above, but this one is for when the + * sysfs "store" is requested (when a value is written to a file.) + */ +static ssize_t foo_attr_store(struct kobject *kobj, + struct attribute *attr, + const char *buf, size_t len) +{ + struct foo_attribute *attribute; + struct foo_obj *foo; + + attribute = to_foo_attr(attr); + foo = to_foo_obj(kobj); + + if (!attribute->store) + return -EIO; + + return attribute->store(foo, attribute, buf, len); +} + +/* Our custom sysfs_ops that we will associate with our ktype later on */ +static const struct sysfs_ops foo_sysfs_ops = { + .show = foo_attr_show, + .store = foo_attr_store, +}; + +/* + * The release function for our object. This is REQUIRED by the kernel to + * have. We free the memory held in our object here. + * + * NEVER try to get away with just a "blank" release function to try to be + * smarter than the kernel. Turns out, no one ever is... + */ +static void foo_release(struct kobject *kobj) +{ + struct foo_obj *foo; + + foo = to_foo_obj(kobj); + kfree(foo); +} + +/* + * The "foo" file where the .foo variable is read from and written to. + */ +static ssize_t foo_show(struct foo_obj *foo_obj, struct foo_attribute *attr, + char *buf) +{ + return sprintf(buf, "%d\n", foo_obj->foo); +} + +static ssize_t foo_store(struct foo_obj *foo_obj, struct foo_attribute *attr, + const char *buf, size_t count) +{ + int ret; + + ret = kstrtoint(buf, 10, &foo_obj->foo); + if (ret < 0) + return ret; + + return count; +} + +/* Sysfs attributes cannot be world-writable. */ +static struct foo_attribute foo_attribute = + __ATTR(foo, 0664, foo_show, foo_store); + +/* + * More complex function where we determine which variable is being accessed by + * looking at the attribute for the "baz" and "bar" files. + */ +static ssize_t b_show(struct foo_obj *foo_obj, struct foo_attribute *attr, + char *buf) +{ + int var; + + if (strcmp(attr->attr.name, "baz") == 0) + var = foo_obj->baz; + else + var = foo_obj->bar; + return sprintf(buf, "%d\n", var); +} + +static ssize_t b_store(struct foo_obj *foo_obj, struct foo_attribute *attr, + const char *buf, size_t count) +{ + int var, ret; + + ret = kstrtoint(buf, 10, &var); + if (ret < 0) + return ret; + + if (strcmp(attr->attr.name, "baz") == 0) + foo_obj->baz = var; + else + foo_obj->bar = var; + return count; +} + +static struct foo_attribute baz_attribute = + __ATTR(baz, 0664, b_show, b_store); +static struct foo_attribute bar_attribute = + __ATTR(bar, 0664, b_show, b_store); + +/* + * Create a group of attributes so that we can create and destroy them all + * at once. + */ +static struct attribute *foo_default_attrs[] = { + &foo_attribute.attr, + &baz_attribute.attr, + &bar_attribute.attr, + NULL, /* need to NULL terminate the list of attributes */ +}; + +/* + * Our own ktype for our kobjects. Here we specify our sysfs ops, the + * release function, and the set of default attributes we want created + * whenever a kobject of this type is registered with the kernel. + */ +static struct kobj_type foo_ktype = { + .sysfs_ops = &foo_sysfs_ops, + .release = foo_release, + .default_attrs = foo_default_attrs, +}; + +static struct kset *example_kset; +static struct foo_obj *foo_obj; +static struct foo_obj *bar_obj; +static struct foo_obj *baz_obj; + +static struct foo_obj *create_foo_obj(const char *name) +{ + struct foo_obj *foo; + int retval; + + /* allocate the memory for the whole object */ + foo = kzalloc(sizeof(*foo), GFP_KERNEL); + if (!foo) + return NULL; + + /* + * As we have a kset for this kobject, we need to set it before calling + * the kobject core. + */ + foo->kobj.kset = example_kset; + + /* + * Initialize and add the kobject to the kernel. All the default files + * will be created here. As we have already specified a kset for this + * kobject, we don't have to set a parent for the kobject, the kobject + * will be placed beneath that kset automatically. + */ + retval = kobject_init_and_add(&foo->kobj, &foo_ktype, NULL, "%s", name); + if (retval) { + kobject_put(&foo->kobj); + return NULL; + } + + /* + * We are always responsible for sending the uevent that the kobject + * was added to the system. + */ + kobject_uevent(&foo->kobj, KOBJ_ADD); + + return foo; +} + +static void destroy_foo_obj(struct foo_obj *foo) +{ + kobject_put(&foo->kobj); +} + +static int __init example_init(void) +{ + /* + * Create a kset with the name of "kset_example", + * located under /sys/kernel/ + */ + example_kset = kset_create_and_add("kset_example", NULL, kernel_kobj); + if (!example_kset) + return -ENOMEM; + + /* + * Create three objects and register them with our kset + */ + foo_obj = create_foo_obj("foo"); + if (!foo_obj) + goto foo_error; + + bar_obj = create_foo_obj("bar"); + if (!bar_obj) + goto bar_error; + + baz_obj = create_foo_obj("baz"); + if (!baz_obj) + goto baz_error; + + return 0; + +baz_error: + destroy_foo_obj(bar_obj); +bar_error: + destroy_foo_obj(foo_obj); +foo_error: + kset_unregister(example_kset); + return -EINVAL; +} + +static void __exit example_exit(void) +{ + destroy_foo_obj(baz_obj); + destroy_foo_obj(bar_obj); + destroy_foo_obj(foo_obj); + kset_unregister(example_kset); +} + +module_init(example_init); +module_exit(example_exit); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Greg Kroah-Hartman <greg@kroah.com>"); diff --git a/kernel/samples/kprobes/Makefile b/kernel/samples/kprobes/Makefile new file mode 100644 index 000000000..68739bc4f --- /dev/null +++ b/kernel/samples/kprobes/Makefile @@ -0,0 +1,5 @@ +# builds the kprobes example kernel modules; +# then to use one (as root): insmod <module_name.ko> + +obj-$(CONFIG_SAMPLE_KPROBES) += kprobe_example.o jprobe_example.o +obj-$(CONFIG_SAMPLE_KRETPROBES) += kretprobe_example.o diff --git a/kernel/samples/kprobes/jprobe_example.c b/kernel/samples/kprobes/jprobe_example.c new file mode 100644 index 000000000..9119ac6a8 --- /dev/null +++ b/kernel/samples/kprobes/jprobe_example.c @@ -0,0 +1,67 @@ +/* + * Here's a sample kernel module showing the use of jprobes to dump + * the arguments of do_fork(). + * + * For more information on theory of operation of jprobes, see + * Documentation/kprobes.txt + * + * Build and insert the kernel module as done in the kprobe example. + * You will see the trace data in /var/log/messages and on the + * console whenever do_fork() is invoked to create a new process. + * (Some messages may be suppressed if syslogd is configured to + * eliminate duplicate messages.) + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/kprobes.h> + +/* + * Jumper probe for do_fork. + * Mirror principle enables access to arguments of the probed routine + * from the probe handler. + */ + +/* Proxy routine having the same arguments as actual do_fork() routine */ +static long jdo_fork(unsigned long clone_flags, unsigned long stack_start, + unsigned long stack_size, int __user *parent_tidptr, + int __user *child_tidptr) +{ + pr_info("jprobe: clone_flags = 0x%lx, stack_start = 0x%lx " + "stack_size = 0x%lx\n", clone_flags, stack_start, stack_size); + + /* Always end with a call to jprobe_return(). */ + jprobe_return(); + return 0; +} + +static struct jprobe my_jprobe = { + .entry = jdo_fork, + .kp = { + .symbol_name = "do_fork", + }, +}; + +static int __init jprobe_init(void) +{ + int ret; + + ret = register_jprobe(&my_jprobe); + if (ret < 0) { + printk(KERN_INFO "register_jprobe failed, returned %d\n", ret); + return -1; + } + printk(KERN_INFO "Planted jprobe at %p, handler addr %p\n", + my_jprobe.kp.addr, my_jprobe.entry); + return 0; +} + +static void __exit jprobe_exit(void) +{ + unregister_jprobe(&my_jprobe); + printk(KERN_INFO "jprobe at %p unregistered\n", my_jprobe.kp.addr); +} + +module_init(jprobe_init) +module_exit(jprobe_exit) +MODULE_LICENSE("GPL"); diff --git a/kernel/samples/kprobes/kprobe_example.c b/kernel/samples/kprobes/kprobe_example.c new file mode 100644 index 000000000..366db1a9f --- /dev/null +++ b/kernel/samples/kprobes/kprobe_example.c @@ -0,0 +1,109 @@ +/* + * NOTE: This example is works on x86 and powerpc. + * Here's a sample kernel module showing the use of kprobes to dump a + * stack trace and selected registers when do_fork() is called. + * + * For more information on theory of operation of kprobes, see + * Documentation/kprobes.txt + * + * You will see the trace data in /var/log/messages and on the console + * whenever do_fork() is invoked to create a new process. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/kprobes.h> + +/* For each probe you need to allocate a kprobe structure */ +static struct kprobe kp = { + .symbol_name = "do_fork", +}; + +/* kprobe pre_handler: called just before the probed instruction is executed */ +static int handler_pre(struct kprobe *p, struct pt_regs *regs) +{ +#ifdef CONFIG_X86 + printk(KERN_INFO "pre_handler: p->addr = 0x%p, ip = %lx," + " flags = 0x%lx\n", + p->addr, regs->ip, regs->flags); +#endif +#ifdef CONFIG_PPC + printk(KERN_INFO "pre_handler: p->addr = 0x%p, nip = 0x%lx," + " msr = 0x%lx\n", + p->addr, regs->nip, regs->msr); +#endif +#ifdef CONFIG_MIPS + printk(KERN_INFO "pre_handler: p->addr = 0x%p, epc = 0x%lx," + " status = 0x%lx\n", + p->addr, regs->cp0_epc, regs->cp0_status); +#endif +#ifdef CONFIG_TILEGX + printk(KERN_INFO "pre_handler: p->addr = 0x%p, pc = 0x%lx," + " ex1 = 0x%lx\n", + p->addr, regs->pc, regs->ex1); +#endif + + /* A dump_stack() here will give a stack backtrace */ + return 0; +} + +/* kprobe post_handler: called after the probed instruction is executed */ +static void handler_post(struct kprobe *p, struct pt_regs *regs, + unsigned long flags) +{ +#ifdef CONFIG_X86 + printk(KERN_INFO "post_handler: p->addr = 0x%p, flags = 0x%lx\n", + p->addr, regs->flags); +#endif +#ifdef CONFIG_PPC + printk(KERN_INFO "post_handler: p->addr = 0x%p, msr = 0x%lx\n", + p->addr, regs->msr); +#endif +#ifdef CONFIG_MIPS + printk(KERN_INFO "post_handler: p->addr = 0x%p, status = 0x%lx\n", + p->addr, regs->cp0_status); +#endif +#ifdef CONFIG_TILEGX + printk(KERN_INFO "post_handler: p->addr = 0x%p, ex1 = 0x%lx\n", + p->addr, regs->ex1); +#endif +} + +/* + * fault_handler: this is called if an exception is generated for any + * instruction within the pre- or post-handler, or when Kprobes + * single-steps the probed instruction. + */ +static int handler_fault(struct kprobe *p, struct pt_regs *regs, int trapnr) +{ + printk(KERN_INFO "fault_handler: p->addr = 0x%p, trap #%dn", + p->addr, trapnr); + /* Return 0 because we don't handle the fault. */ + return 0; +} + +static int __init kprobe_init(void) +{ + int ret; + kp.pre_handler = handler_pre; + kp.post_handler = handler_post; + kp.fault_handler = handler_fault; + + ret = register_kprobe(&kp); + if (ret < 0) { + printk(KERN_INFO "register_kprobe failed, returned %d\n", ret); + return ret; + } + printk(KERN_INFO "Planted kprobe at %p\n", kp.addr); + return 0; +} + +static void __exit kprobe_exit(void) +{ + unregister_kprobe(&kp); + printk(KERN_INFO "kprobe at %p unregistered\n", kp.addr); +} + +module_init(kprobe_init) +module_exit(kprobe_exit) +MODULE_LICENSE("GPL"); diff --git a/kernel/samples/kprobes/kretprobe_example.c b/kernel/samples/kprobes/kretprobe_example.c new file mode 100644 index 000000000..1041b6731 --- /dev/null +++ b/kernel/samples/kprobes/kretprobe_example.c @@ -0,0 +1,107 @@ +/* + * kretprobe_example.c + * + * Here's a sample kernel module showing the use of return probes to + * report the return value and total time taken for probed function + * to run. + * + * usage: insmod kretprobe_example.ko func=<func_name> + * + * If no func_name is specified, do_fork is instrumented + * + * For more information on theory of operation of kretprobes, see + * Documentation/kprobes.txt + * + * Build and insert the kernel module as done in the kprobe example. + * You will see the trace data in /var/log/messages and on the console + * whenever the probed function returns. (Some messages may be suppressed + * if syslogd is configured to eliminate duplicate messages.) + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/kprobes.h> +#include <linux/ktime.h> +#include <linux/limits.h> +#include <linux/sched.h> + +static char func_name[NAME_MAX] = "do_fork"; +module_param_string(func, func_name, NAME_MAX, S_IRUGO); +MODULE_PARM_DESC(func, "Function to kretprobe; this module will report the" + " function's execution time"); + +/* per-instance private data */ +struct my_data { + ktime_t entry_stamp; +}; + +/* Here we use the entry_hanlder to timestamp function entry */ +static int entry_handler(struct kretprobe_instance *ri, struct pt_regs *regs) +{ + struct my_data *data; + + if (!current->mm) + return 1; /* Skip kernel threads */ + + data = (struct my_data *)ri->data; + data->entry_stamp = ktime_get(); + return 0; +} + +/* + * Return-probe handler: Log the return value and duration. Duration may turn + * out to be zero consistently, depending upon the granularity of time + * accounting on the platform. + */ +static int ret_handler(struct kretprobe_instance *ri, struct pt_regs *regs) +{ + int retval = regs_return_value(regs); + struct my_data *data = (struct my_data *)ri->data; + s64 delta; + ktime_t now; + + now = ktime_get(); + delta = ktime_to_ns(ktime_sub(now, data->entry_stamp)); + printk(KERN_INFO "%s returned %d and took %lld ns to execute\n", + func_name, retval, (long long)delta); + return 0; +} + +static struct kretprobe my_kretprobe = { + .handler = ret_handler, + .entry_handler = entry_handler, + .data_size = sizeof(struct my_data), + /* Probe up to 20 instances concurrently. */ + .maxactive = 20, +}; + +static int __init kretprobe_init(void) +{ + int ret; + + my_kretprobe.kp.symbol_name = func_name; + ret = register_kretprobe(&my_kretprobe); + if (ret < 0) { + printk(KERN_INFO "register_kretprobe failed, returned %d\n", + ret); + return -1; + } + printk(KERN_INFO "Planted return probe at %s: %p\n", + my_kretprobe.kp.symbol_name, my_kretprobe.kp.addr); + return 0; +} + +static void __exit kretprobe_exit(void) +{ + unregister_kretprobe(&my_kretprobe); + printk(KERN_INFO "kretprobe at %p unregistered\n", + my_kretprobe.kp.addr); + + /* nmissed > 0 suggests that maxactive was set too low. */ + printk(KERN_INFO "Missed probing %d instances of %s\n", + my_kretprobe.nmissed, my_kretprobe.kp.symbol_name); +} + +module_init(kretprobe_init) +module_exit(kretprobe_exit) +MODULE_LICENSE("GPL"); diff --git a/kernel/samples/livepatch/Makefile b/kernel/samples/livepatch/Makefile new file mode 100644 index 000000000..10319d7ea --- /dev/null +++ b/kernel/samples/livepatch/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_SAMPLE_LIVEPATCH) += livepatch-sample.o diff --git a/kernel/samples/livepatch/livepatch-sample.c b/kernel/samples/livepatch/livepatch-sample.c new file mode 100644 index 000000000..fb8c8614e --- /dev/null +++ b/kernel/samples/livepatch/livepatch-sample.c @@ -0,0 +1,91 @@ +/* + * livepatch-sample.c - Kernel Live Patching Sample Module + * + * Copyright (C) 2014 Seth Jennings <sjenning@redhat.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/livepatch.h> + +/* + * This (dumb) live patch overrides the function that prints the + * kernel boot cmdline when /proc/cmdline is read. + * + * Example: + * + * $ cat /proc/cmdline + * <your cmdline> + * + * $ insmod livepatch-sample.ko + * $ cat /proc/cmdline + * this has been live patched + * + * $ echo 0 > /sys/kernel/livepatch/livepatch_sample/enabled + * $ cat /proc/cmdline + * <your cmdline> + */ + +#include <linux/seq_file.h> +static int livepatch_cmdline_proc_show(struct seq_file *m, void *v) +{ + seq_printf(m, "%s\n", "this has been live patched"); + return 0; +} + +static struct klp_func funcs[] = { + { + .old_name = "cmdline_proc_show", + .new_func = livepatch_cmdline_proc_show, + }, { } +}; + +static struct klp_object objs[] = { + { + /* name being NULL means vmlinux */ + .funcs = funcs, + }, { } +}; + +static struct klp_patch patch = { + .mod = THIS_MODULE, + .objs = objs, +}; + +static int livepatch_init(void) +{ + int ret; + + ret = klp_register_patch(&patch); + if (ret) + return ret; + ret = klp_enable_patch(&patch); + if (ret) { + WARN_ON(klp_unregister_patch(&patch)); + return ret; + } + return 0; +} + +static void livepatch_exit(void) +{ + WARN_ON(klp_disable_patch(&patch)); + WARN_ON(klp_unregister_patch(&patch)); +} + +module_init(livepatch_init); +module_exit(livepatch_exit); +MODULE_LICENSE("GPL"); diff --git a/kernel/samples/pktgen/pktgen.conf-1-1 b/kernel/samples/pktgen/pktgen.conf-1-1 new file mode 100755 index 000000000..f91daad9e --- /dev/null +++ b/kernel/samples/pktgen/pktgen.conf-1-1 @@ -0,0 +1,59 @@ +#!/bin/bash + +#modprobe pktgen + + +function pgset() { + local result + + echo $1 > $PGDEV + + result=`cat $PGDEV | fgrep "Result: OK:"` + if [ "$result" = "" ]; then + cat $PGDEV | fgrep Result: + fi +} + +# Config Start Here ----------------------------------------------------------- + + +# thread config +# Each CPU has its own thread. One CPU example. We add eth1. + +PGDEV=/proc/net/pktgen/kpktgend_0 + echo "Removing all devices" + pgset "rem_device_all" + echo "Adding eth1" + pgset "add_device eth1" + + +# device config +# delay 0 means maximum speed. + +CLONE_SKB="clone_skb 1000000" +# NIC adds 4 bytes CRC +PKT_SIZE="pkt_size 60" + +# COUNT 0 means forever +#COUNT="count 0" +COUNT="count 10000000" +DELAY="delay 0" + +PGDEV=/proc/net/pktgen/eth1 + echo "Configuring $PGDEV" + pgset "$COUNT" + pgset "$CLONE_SKB" + pgset "$PKT_SIZE" + pgset "$DELAY" + pgset "dst 10.10.11.2" + pgset "dst_mac 00:04:23:08:91:dc" + + +# Time to run +PGDEV=/proc/net/pktgen/pgctrl + + echo "Running... ctrl^C to stop" + trap true INT + pgset "start" + echo "Done" + cat /proc/net/pktgen/eth1 diff --git a/kernel/samples/pktgen/pktgen.conf-1-1-flows b/kernel/samples/pktgen/pktgen.conf-1-1-flows new file mode 100755 index 000000000..081749c97 --- /dev/null +++ b/kernel/samples/pktgen/pktgen.conf-1-1-flows @@ -0,0 +1,67 @@ +#!/bin/bash + +#modprobe pktgen + + +function pgset() { + local result + + echo $1 > $PGDEV + + result=`cat $PGDEV | fgrep "Result: OK:"` + if [ "$result" = "" ]; then + cat $PGDEV | fgrep Result: + fi +} + +# Config Start Here ----------------------------------------------------------- + + +# thread config +# Each CPU has its own thread. One CPU example. We add eth1. + +PGDEV=/proc/net/pktgen/kpktgend_0 + echo "Removing all devices" + pgset "rem_device_all" + echo "Adding eth1" + pgset "add_device eth1" + + +# device config +# delay 0 +# We need to do alloc for every skb since we cannot clone here. + +CLONE_SKB="clone_skb 0" +# NIC adds 4 bytes CRC +PKT_SIZE="pkt_size 60" + +# COUNT 0 means forever +#COUNT="count 0" +COUNT="count 10000000" +DELAY="delay 0" + +PGDEV=/proc/net/pktgen/eth1 + echo "Configuring $PGDEV" + pgset "$COUNT" + pgset "$CLONE_SKB" + pgset "$PKT_SIZE" + pgset "$DELAY" + # Random address with in the min-max range + pgset "flag IPDST_RND" + pgset "dst_min 10.0.0.0" + pgset "dst_max 10.255.255.255" + + # 8k Concurrent flows at 4 pkts + pgset "flows 8192" + pgset "flowlen 4" + + pgset "dst_mac 00:04:23:08:91:dc" + +# Time to run +PGDEV=/proc/net/pktgen/pgctrl + + echo "Running... ctrl^C to stop" + trap true INT + pgset "start" + echo "Done" + cat /proc/net/pktgen/eth1 diff --git a/kernel/samples/pktgen/pktgen.conf-1-1-ip6 b/kernel/samples/pktgen/pktgen.conf-1-1-ip6 new file mode 100755 index 000000000..0b9ffd47f --- /dev/null +++ b/kernel/samples/pktgen/pktgen.conf-1-1-ip6 @@ -0,0 +1,60 @@ +#!/bin/bash + +#modprobe pktgen + + +function pgset() { + local result + + echo $1 > $PGDEV + + result=`cat $PGDEV | fgrep "Result: OK:"` + if [ "$result" = "" ]; then + cat $PGDEV | fgrep Result: + fi +} + +# Config Start Here ----------------------------------------------------------- + + +# thread config +# Each CPU has its own thread. One CPU example. We add eth1. +# IPv6. Note increase in minimal packet length + +PGDEV=/proc/net/pktgen/kpktgend_0 + echo "Removing all devices" + pgset "rem_device_all" + echo "Adding eth1" + pgset "add_device eth1" + + +# device config +# delay 0 + +CLONE_SKB="clone_skb 1000000" +# NIC adds 4 bytes CRC +PKT_SIZE="pkt_size 66" + +# COUNT 0 means forever +#COUNT="count 0" +COUNT="count 10000000" +DELAY="delay 0" + +PGDEV=/proc/net/pktgen/eth1 + echo "Configuring $PGDEV" + pgset "$COUNT" + pgset "$CLONE_SKB" + pgset "$PKT_SIZE" + pgset "$DELAY" + pgset "dst6 fec0::1" + pgset "src6 fec0::2" + pgset "dst_mac 00:04:23:08:91:dc" + +# Time to run +PGDEV=/proc/net/pktgen/pgctrl + + echo "Running... ctrl^C to stop" + trap true INT + pgset "start" + echo "Done" + cat /proc/net/pktgen/eth1 diff --git a/kernel/samples/pktgen/pktgen.conf-1-1-ip6-rdos b/kernel/samples/pktgen/pktgen.conf-1-1-ip6-rdos new file mode 100755 index 000000000..ad98e5f40 --- /dev/null +++ b/kernel/samples/pktgen/pktgen.conf-1-1-ip6-rdos @@ -0,0 +1,63 @@ +#!/bin/bash + +#modprobe pktgen + + +function pgset() { + local result + + echo $1 > $PGDEV + + result=`cat $PGDEV | fgrep "Result: OK:"` + if [ "$result" = "" ]; then + cat $PGDEV | fgrep Result: + fi +} + +# Config Start Here ----------------------------------------------------------- + + +# thread config +# Each CPU has its own thread. One CPU example. We add eth1. +# IPv6. Note increase in minimal packet length + +PGDEV=/proc/net/pktgen/kpktgend_0 + echo "Removing all devices" + pgset "rem_device_all" + echo "Adding eth1" + pgset "add_device eth1" + + +# device config +# delay 0 means maximum speed. + +# We need to do alloc for every skb since we cannot clone here. +CLONE_SKB="clone_skb 0" + +# NIC adds 4 bytes CRC +PKT_SIZE="pkt_size 66" + +# COUNT 0 means forever +#COUNT="count 0" +COUNT="count 10000000" +DELAY="delay 0" + +PGDEV=/proc/net/pktgen/eth1 + echo "Configuring $PGDEV" + pgset "$COUNT" + pgset "$CLONE_SKB" + pgset "$PKT_SIZE" + pgset "$DELAY" + pgset "dst6_min fec0::1" + pgset "dst6_max fec0::FFFF:FFFF" + + pgset "dst_mac 00:04:23:08:91:dc" + +# Time to run +PGDEV=/proc/net/pktgen/pgctrl + + echo "Running... ctrl^C to stop" + trap true INT + pgset "start" + echo "Done" + cat /proc/net/pktgen/eth1 diff --git a/kernel/samples/pktgen/pktgen.conf-1-1-rdos b/kernel/samples/pktgen/pktgen.conf-1-1-rdos new file mode 100755 index 000000000..c7553be49 --- /dev/null +++ b/kernel/samples/pktgen/pktgen.conf-1-1-rdos @@ -0,0 +1,64 @@ +#!/bin/bash + +#modprobe pktgen + + +function pgset() { + local result + + echo $1 > $PGDEV + + result=`cat $PGDEV | fgrep "Result: OK:"` + if [ "$result" = "" ]; then + cat $PGDEV | fgrep Result: + fi +} + +# Config Start Here ----------------------------------------------------------- + + +# thread config +# Each CPU has its own thread. One CPU example. We add eth1. + +PGDEV=/proc/net/pktgen/kpktgend_0 + echo "Removing all devices" + pgset "rem_device_all" + echo "Adding eth1" + pgset "add_device eth1" + + +# device config +# delay 0 + +# We need to do alloc for every skb since we cannot clone here. + +CLONE_SKB="clone_skb 0" +# NIC adds 4 bytes CRC +PKT_SIZE="pkt_size 60" + +# COUNT 0 means forever +#COUNT="count 0" +COUNT="count 10000000" +DELAY="delay 0" + +PGDEV=/proc/net/pktgen/eth1 + echo "Configuring $PGDEV" + pgset "$COUNT" + pgset "$CLONE_SKB" + pgset "$PKT_SIZE" + pgset "$DELAY" + # Random address with in the min-max range + pgset "flag IPDST_RND" + pgset "dst_min 10.0.0.0" + pgset "dst_max 10.255.255.255" + + pgset "dst_mac 00:04:23:08:91:dc" + +# Time to run +PGDEV=/proc/net/pktgen/pgctrl + + echo "Running... ctrl^C to stop" + trap true INT + pgset "start" + echo "Done" + cat /proc/net/pktgen/eth1 diff --git a/kernel/samples/pktgen/pktgen.conf-1-2 b/kernel/samples/pktgen/pktgen.conf-1-2 new file mode 100755 index 000000000..ba4eb26e1 --- /dev/null +++ b/kernel/samples/pktgen/pktgen.conf-1-2 @@ -0,0 +1,69 @@ +#!/bin/bash + +#modprobe pktgen + + +function pgset() { + local result + + echo $1 > $PGDEV + + result=`cat $PGDEV | fgrep "Result: OK:"` + if [ "$result" = "" ]; then + cat $PGDEV | fgrep Result: + fi +} + +# Config Start Here ----------------------------------------------------------- + + +# thread config +# One CPU means one thread. One CPU example. We add eth1, eth2 respectivly. + +PGDEV=/proc/net/pktgen/kpktgend_0 + echo "Removing all devices" + pgset "rem_device_all" + echo "Adding eth1" + pgset "add_device eth1" + echo "Adding eth2" + pgset "add_device eth2" + + +# device config +# delay 0 means maximum speed. + +CLONE_SKB="clone_skb 1000000" +# NIC adds 4 bytes CRC +PKT_SIZE="pkt_size 60" + +# COUNT 0 means forever +#COUNT="count 0" +COUNT="count 10000000" +DELAY="delay 0" + +PGDEV=/proc/net/pktgen/eth1 + echo "Configuring $PGDEV" + pgset "$COUNT" + pgset "$CLONE_SKB" + pgset "$PKT_SIZE" + pgset "$DELAY" + pgset "dst 10.10.11.2" + pgset "dst_mac 00:04:23:08:91:dc" + +PGDEV=/proc/net/pktgen/eth2 + echo "Configuring $PGDEV" + pgset "$COUNT" + pgset "$CLONE_SKB" + pgset "$PKT_SIZE" + pgset "$DELAY" + pgset "dst 192.168.2.2" + pgset "dst_mac 00:04:23:08:91:de" + +# Time to run +PGDEV=/proc/net/pktgen/pgctrl + + echo "Running... ctrl^C to stop" + trap true INT + pgset "start" + echo "Done" + cat /proc/net/pktgen/eth1 /proc/net/pktgen/eth2 diff --git a/kernel/samples/pktgen/pktgen.conf-2-1 b/kernel/samples/pktgen/pktgen.conf-2-1 new file mode 100755 index 000000000..e108e97d6 --- /dev/null +++ b/kernel/samples/pktgen/pktgen.conf-2-1 @@ -0,0 +1,66 @@ +#!/bin/bash + +#modprobe pktgen + + +function pgset() { + local result + + echo $1 > $PGDEV + + result=`cat $PGDEV | fgrep "Result: OK:"` + if [ "$result" = "" ]; then + cat $PGDEV | fgrep Result: + fi +} + +# Config Start Here ----------------------------------------------------------- + + +# thread config +# Each CPU has its own thread. Two CPU example. We add eth1 to the first +# and leave the second idle. + +PGDEV=/proc/net/pktgen/kpktgend_0 + echo "Removing all devices" + pgset "rem_device_all" + echo "Adding eth1" + pgset "add_device eth1" + +# We need to remove old config since we dont use this thread. We can only +# one NIC on one CPU due to affinity reasons. + +PGDEV=/proc/net/pktgen/kpktgend_1 + echo "Removing all devices" + pgset "rem_device_all" + +# device config +# delay 0 means maximum speed. + +CLONE_SKB="clone_skb 1000000" +# NIC adds 4 bytes CRC +PKT_SIZE="pkt_size 60" + +# COUNT 0 means forever +#COUNT="count 0" +COUNT="count 10000000" +DELAY="delay 0" + +PGDEV=/proc/net/pktgen/eth1 + echo "Configuring $PGDEV" + pgset "$COUNT" + pgset "$CLONE_SKB" + pgset "$PKT_SIZE" + pgset "$DELAY" + pgset "dst 10.10.11.2" + pgset "dst_mac 00:04:23:08:91:dc" + + +# Time to run +PGDEV=/proc/net/pktgen/pgctrl + + echo "Running... ctrl^C to stop" + trap true INT + pgset "start" + echo "Done" + cat /proc/net/pktgen/eth1 diff --git a/kernel/samples/pktgen/pktgen.conf-2-2 b/kernel/samples/pktgen/pktgen.conf-2-2 new file mode 100755 index 000000000..acea15503 --- /dev/null +++ b/kernel/samples/pktgen/pktgen.conf-2-2 @@ -0,0 +1,73 @@ +#!/bin/bash + +#modprobe pktgen + + +function pgset() { + local result + + echo $1 > $PGDEV + + result=`cat $PGDEV | fgrep "Result: OK:"` + if [ "$result" = "" ]; then + cat $PGDEV | fgrep Result: + fi +} + +# Config Start Here ----------------------------------------------------------- + + +# thread config +# Each CPU has its own thread. Two CPU example. We add eth1, eth2 respectively. + +PGDEV=/proc/net/pktgen/kpktgend_0 + echo "Removing all devices" + pgset "rem_device_all" + echo "Adding eth1" + pgset "add_device eth1" + +PGDEV=/proc/net/pktgen/kpktgend_1 + echo "Removing all devices" + pgset "rem_device_all" + echo "Adding eth2" + pgset "add_device eth2" + + +# device config +# delay 0 means maximum speed. + +CLONE_SKB="clone_skb 1000000" +# NIC adds 4 bytes CRC +PKT_SIZE="pkt_size 60" + +# COUNT 0 means forever +#COUNT="count 0" +COUNT="count 10000000" +DELAY="delay 0" + +PGDEV=/proc/net/pktgen/eth1 + echo "Configuring $PGDEV" + pgset "$COUNT" + pgset "$CLONE_SKB" + pgset "$PKT_SIZE" + pgset "$DELAY" + pgset "dst 10.10.11.2" + pgset "dst_mac 00:04:23:08:91:dc" + +PGDEV=/proc/net/pktgen/eth2 + echo "Configuring $PGDEV" + pgset "$COUNT" + pgset "$CLONE_SKB" + pgset "$PKT_SIZE" + pgset "$DELAY" + pgset "dst 192.168.2.2" + pgset "dst_mac 00:04:23:08:91:de" + +# Time to run +PGDEV=/proc/net/pktgen/pgctrl + + echo "Running... ctrl^C to stop" + trap true INT + pgset "start" + echo "Done" + cat /proc/net/pktgen/eth1 /proc/net/pktgen/eth2 diff --git a/kernel/samples/rpmsg/Makefile b/kernel/samples/rpmsg/Makefile new file mode 100644 index 000000000..2d4973c69 --- /dev/null +++ b/kernel/samples/rpmsg/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_SAMPLE_RPMSG_CLIENT) += rpmsg_client_sample.o diff --git a/kernel/samples/rpmsg/rpmsg_client_sample.c b/kernel/samples/rpmsg/rpmsg_client_sample.c new file mode 100644 index 000000000..59b134408 --- /dev/null +++ b/kernel/samples/rpmsg/rpmsg_client_sample.c @@ -0,0 +1,100 @@ +/* + * Remote processor messaging - sample client driver + * + * Copyright (C) 2011 Texas Instruments, Inc. + * Copyright (C) 2011 Google, Inc. + * + * Ohad Ben-Cohen <ohad@wizery.com> + * Brian Swetland <swetland@google.com> + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/rpmsg.h> + +#define MSG "hello world!" +#define MSG_LIMIT 100 + +static void rpmsg_sample_cb(struct rpmsg_channel *rpdev, void *data, int len, + void *priv, u32 src) +{ + int ret; + static int rx_count; + + dev_info(&rpdev->dev, "incoming msg %d (src: 0x%x)\n", ++rx_count, src); + + print_hex_dump(KERN_DEBUG, __func__, DUMP_PREFIX_NONE, 16, 1, + data, len, true); + + /* samples should not live forever */ + if (rx_count >= MSG_LIMIT) { + dev_info(&rpdev->dev, "goodbye!\n"); + return; + } + + /* send a new message now */ + ret = rpmsg_send(rpdev, MSG, strlen(MSG)); + if (ret) + dev_err(&rpdev->dev, "rpmsg_send failed: %d\n", ret); +} + +static int rpmsg_sample_probe(struct rpmsg_channel *rpdev) +{ + int ret; + + dev_info(&rpdev->dev, "new channel: 0x%x -> 0x%x!\n", + rpdev->src, rpdev->dst); + + /* send a message to our remote processor */ + ret = rpmsg_send(rpdev, MSG, strlen(MSG)); + if (ret) { + dev_err(&rpdev->dev, "rpmsg_send failed: %d\n", ret); + return ret; + } + + return 0; +} + +static void rpmsg_sample_remove(struct rpmsg_channel *rpdev) +{ + dev_info(&rpdev->dev, "rpmsg sample client driver is removed\n"); +} + +static struct rpmsg_device_id rpmsg_driver_sample_id_table[] = { + { .name = "rpmsg-client-sample" }, + { }, +}; +MODULE_DEVICE_TABLE(rpmsg, rpmsg_driver_sample_id_table); + +static struct rpmsg_driver rpmsg_sample_client = { + .drv.name = KBUILD_MODNAME, + .drv.owner = THIS_MODULE, + .id_table = rpmsg_driver_sample_id_table, + .probe = rpmsg_sample_probe, + .callback = rpmsg_sample_cb, + .remove = rpmsg_sample_remove, +}; + +static int __init rpmsg_client_sample_init(void) +{ + return register_rpmsg_driver(&rpmsg_sample_client); +} +module_init(rpmsg_client_sample_init); + +static void __exit rpmsg_client_sample_fini(void) +{ + unregister_rpmsg_driver(&rpmsg_sample_client); +} +module_exit(rpmsg_client_sample_fini); + +MODULE_DESCRIPTION("Remote processor messaging sample client driver"); +MODULE_LICENSE("GPL v2"); diff --git a/kernel/samples/seccomp/.gitignore b/kernel/samples/seccomp/.gitignore new file mode 100644 index 000000000..78fb78184 --- /dev/null +++ b/kernel/samples/seccomp/.gitignore @@ -0,0 +1,3 @@ +bpf-direct +bpf-fancy +dropper diff --git a/kernel/samples/seccomp/Makefile b/kernel/samples/seccomp/Makefile new file mode 100644 index 000000000..1b4e4b8f5 --- /dev/null +++ b/kernel/samples/seccomp/Makefile @@ -0,0 +1,48 @@ +# kbuild trick to avoid linker error. Can be omitted if a module is built. +obj- := dummy.o + +hostprogs-$(CONFIG_SECCOMP_FILTER) := bpf-fancy dropper bpf-direct + +HOSTCFLAGS_bpf-fancy.o += -I$(objtree)/usr/include +HOSTCFLAGS_bpf-fancy.o += -idirafter $(objtree)/include +HOSTCFLAGS_bpf-helper.o += -I$(objtree)/usr/include +HOSTCFLAGS_bpf-helper.o += -idirafter $(objtree)/include +bpf-fancy-objs := bpf-fancy.o bpf-helper.o + +HOSTCFLAGS_dropper.o += -I$(objtree)/usr/include +HOSTCFLAGS_dropper.o += -idirafter $(objtree)/include +dropper-objs := dropper.o + +HOSTCFLAGS_bpf-direct.o += -I$(objtree)/usr/include +HOSTCFLAGS_bpf-direct.o += -idirafter $(objtree)/include +bpf-direct-objs := bpf-direct.o + +# Try to match the kernel target. +ifndef CROSS_COMPILE +ifndef CONFIG_64BIT + +# s390 has -m31 flag to build 31 bit binaries +ifndef CONFIG_S390 +MFLAG = -m32 +else +MFLAG = -m31 +endif + +HOSTCFLAGS_bpf-direct.o += $(MFLAG) +HOSTCFLAGS_dropper.o += $(MFLAG) +HOSTCFLAGS_bpf-helper.o += $(MFLAG) +HOSTCFLAGS_bpf-fancy.o += $(MFLAG) +HOSTLOADLIBES_bpf-direct += $(MFLAG) +HOSTLOADLIBES_bpf-fancy += $(MFLAG) +HOSTLOADLIBES_dropper += $(MFLAG) +endif +always := $(hostprogs-y) +else +# MIPS system calls are defined based on the -mabi that is passed +# to the toolchain which may or may not be a valid option +# for the host toolchain. So disable tests if target architecture +# is MIPS but the host isn't. +ifndef CONFIG_MIPS +always := $(hostprogs-y) +endif +endif diff --git a/kernel/samples/seccomp/bpf-direct.c b/kernel/samples/seccomp/bpf-direct.c new file mode 100644 index 000000000..151ec3f52 --- /dev/null +++ b/kernel/samples/seccomp/bpf-direct.c @@ -0,0 +1,190 @@ +/* + * Seccomp filter example for x86 (32-bit and 64-bit) with BPF macros + * + * Copyright (c) 2012 The Chromium OS Authors <chromium-os-dev@chromium.org> + * Author: Will Drewry <wad@chromium.org> + * + * The code may be used by anyone for any purpose, + * and can serve as a starting point for developing + * applications using prctl(PR_SET_SECCOMP, 2, ...). + */ +#if defined(__i386__) || defined(__x86_64__) +#define SUPPORTED_ARCH 1 +#endif + +#if defined(SUPPORTED_ARCH) +#define __USE_GNU 1 +#define _GNU_SOURCE 1 + +#include <linux/types.h> +#include <linux/filter.h> +#include <linux/seccomp.h> +#include <linux/unistd.h> +#include <signal.h> +#include <stdio.h> +#include <stddef.h> +#include <string.h> +#include <sys/prctl.h> +#include <unistd.h> + +#define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n])) +#define syscall_nr (offsetof(struct seccomp_data, nr)) + +#if defined(__i386__) +#define REG_RESULT REG_EAX +#define REG_SYSCALL REG_EAX +#define REG_ARG0 REG_EBX +#define REG_ARG1 REG_ECX +#define REG_ARG2 REG_EDX +#define REG_ARG3 REG_ESI +#define REG_ARG4 REG_EDI +#define REG_ARG5 REG_EBP +#elif defined(__x86_64__) +#define REG_RESULT REG_RAX +#define REG_SYSCALL REG_RAX +#define REG_ARG0 REG_RDI +#define REG_ARG1 REG_RSI +#define REG_ARG2 REG_RDX +#define REG_ARG3 REG_R10 +#define REG_ARG4 REG_R8 +#define REG_ARG5 REG_R9 +#endif + +#ifndef PR_SET_NO_NEW_PRIVS +#define PR_SET_NO_NEW_PRIVS 38 +#endif + +#ifndef SYS_SECCOMP +#define SYS_SECCOMP 1 +#endif + +static void emulator(int nr, siginfo_t *info, void *void_context) +{ + ucontext_t *ctx = (ucontext_t *)(void_context); + int syscall; + char *buf; + ssize_t bytes; + size_t len; + if (info->si_code != SYS_SECCOMP) + return; + if (!ctx) + return; + syscall = ctx->uc_mcontext.gregs[REG_SYSCALL]; + buf = (char *) ctx->uc_mcontext.gregs[REG_ARG1]; + len = (size_t) ctx->uc_mcontext.gregs[REG_ARG2]; + + if (syscall != __NR_write) + return; + if (ctx->uc_mcontext.gregs[REG_ARG0] != STDERR_FILENO) + return; + /* Redirect stderr messages to stdout. Doesn't handle EINTR, etc */ + ctx->uc_mcontext.gregs[REG_RESULT] = -1; + if (write(STDOUT_FILENO, "[ERR] ", 6) > 0) { + bytes = write(STDOUT_FILENO, buf, len); + ctx->uc_mcontext.gregs[REG_RESULT] = bytes; + } + return; +} + +static int install_emulator(void) +{ + struct sigaction act; + sigset_t mask; + memset(&act, 0, sizeof(act)); + sigemptyset(&mask); + sigaddset(&mask, SIGSYS); + + act.sa_sigaction = &emulator; + act.sa_flags = SA_SIGINFO; + if (sigaction(SIGSYS, &act, NULL) < 0) { + perror("sigaction"); + return -1; + } + if (sigprocmask(SIG_UNBLOCK, &mask, NULL)) { + perror("sigprocmask"); + return -1; + } + return 0; +} + +static int install_filter(void) +{ + struct sock_filter filter[] = { + /* Grab the system call number */ + BPF_STMT(BPF_LD+BPF_W+BPF_ABS, syscall_nr), + /* Jump table for the allowed syscalls */ + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_rt_sigreturn, 0, 1), + BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW), +#ifdef __NR_sigreturn + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_sigreturn, 0, 1), + BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW), +#endif + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_exit_group, 0, 1), + BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW), + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_exit, 0, 1), + BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW), + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_read, 1, 0), + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_write, 3, 2), + + /* Check that read is only using stdin. */ + BPF_STMT(BPF_LD+BPF_W+BPF_ABS, syscall_arg(0)), + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, STDIN_FILENO, 4, 0), + BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL), + + /* Check that write is only using stdout */ + BPF_STMT(BPF_LD+BPF_W+BPF_ABS, syscall_arg(0)), + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, STDOUT_FILENO, 1, 0), + /* Trap attempts to write to stderr */ + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, STDERR_FILENO, 1, 2), + + BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW), + BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_TRAP), + BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL), + }; + struct sock_fprog prog = { + .len = (unsigned short)(sizeof(filter)/sizeof(filter[0])), + .filter = filter, + }; + + if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { + perror("prctl(NO_NEW_PRIVS)"); + return 1; + } + + + if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) { + perror("prctl"); + return 1; + } + return 0; +} + +#define payload(_c) (_c), sizeof((_c)) +int main(int argc, char **argv) +{ + char buf[4096]; + ssize_t bytes = 0; + if (install_emulator()) + return 1; + if (install_filter()) + return 1; + syscall(__NR_write, STDOUT_FILENO, + payload("OHAI! WHAT IS YOUR NAME? ")); + bytes = syscall(__NR_read, STDIN_FILENO, buf, sizeof(buf)); + syscall(__NR_write, STDOUT_FILENO, payload("HELLO, ")); + syscall(__NR_write, STDOUT_FILENO, buf, bytes); + syscall(__NR_write, STDERR_FILENO, + payload("Error message going to STDERR\n")); + return 0; +} +#else /* SUPPORTED_ARCH */ +/* + * This sample is x86-only. Since kernel samples are compiled with the + * host toolchain, a non-x86 host will result in using only the main() + * below. + */ +int main(void) +{ + return 1; +} +#endif /* SUPPORTED_ARCH */ diff --git a/kernel/samples/seccomp/bpf-fancy.c b/kernel/samples/seccomp/bpf-fancy.c new file mode 100644 index 000000000..e8b24f443 --- /dev/null +++ b/kernel/samples/seccomp/bpf-fancy.c @@ -0,0 +1,104 @@ +/* + * Seccomp BPF example using a macro-based generator. + * + * Copyright (c) 2012 The Chromium OS Authors <chromium-os-dev@chromium.org> + * Author: Will Drewry <wad@chromium.org> + * + * The code may be used by anyone for any purpose, + * and can serve as a starting point for developing + * applications using prctl(PR_ATTACH_SECCOMP_FILTER). + */ + +#include <linux/filter.h> +#include <linux/seccomp.h> +#include <linux/unistd.h> +#include <stdio.h> +#include <string.h> +#include <sys/prctl.h> +#include <unistd.h> + +#include "bpf-helper.h" + +#ifndef PR_SET_NO_NEW_PRIVS +#define PR_SET_NO_NEW_PRIVS 38 +#endif + +int main(int argc, char **argv) +{ + struct bpf_labels l = { + .count = 0, + }; + static const char msg1[] = "Please type something: "; + static const char msg2[] = "You typed: "; + char buf[256]; + struct sock_filter filter[] = { + /* TODO: LOAD_SYSCALL_NR(arch) and enforce an arch */ + LOAD_SYSCALL_NR, + SYSCALL(__NR_exit, ALLOW), + SYSCALL(__NR_exit_group, ALLOW), + SYSCALL(__NR_write, JUMP(&l, write_fd)), + SYSCALL(__NR_read, JUMP(&l, read)), + DENY, /* Don't passthrough into a label */ + + LABEL(&l, read), + ARG(0), + JNE(STDIN_FILENO, DENY), + ARG(1), + JNE((unsigned long)buf, DENY), + ARG(2), + JGE(sizeof(buf), DENY), + ALLOW, + + LABEL(&l, write_fd), + ARG(0), + JEQ(STDOUT_FILENO, JUMP(&l, write_buf)), + JEQ(STDERR_FILENO, JUMP(&l, write_buf)), + DENY, + + LABEL(&l, write_buf), + ARG(1), + JEQ((unsigned long)msg1, JUMP(&l, msg1_len)), + JEQ((unsigned long)msg2, JUMP(&l, msg2_len)), + JEQ((unsigned long)buf, JUMP(&l, buf_len)), + DENY, + + LABEL(&l, msg1_len), + ARG(2), + JLT(sizeof(msg1), ALLOW), + DENY, + + LABEL(&l, msg2_len), + ARG(2), + JLT(sizeof(msg2), ALLOW), + DENY, + + LABEL(&l, buf_len), + ARG(2), + JLT(sizeof(buf), ALLOW), + DENY, + }; + struct sock_fprog prog = { + .filter = filter, + .len = (unsigned short)(sizeof(filter)/sizeof(filter[0])), + }; + ssize_t bytes; + bpf_resolve_jumps(&l, filter, sizeof(filter)/sizeof(*filter)); + + if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { + perror("prctl(NO_NEW_PRIVS)"); + return 1; + } + + if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) { + perror("prctl(SECCOMP)"); + return 1; + } + syscall(__NR_write, STDOUT_FILENO, msg1, strlen(msg1)); + bytes = syscall(__NR_read, STDIN_FILENO, buf, sizeof(buf)-1); + bytes = (bytes > 0 ? bytes : 0); + syscall(__NR_write, STDERR_FILENO, msg2, strlen(msg2)); + syscall(__NR_write, STDERR_FILENO, buf, bytes); + /* Now get killed */ + syscall(__NR_write, STDERR_FILENO, msg2, strlen(msg2)+2); + return 0; +} diff --git a/kernel/samples/seccomp/bpf-helper.c b/kernel/samples/seccomp/bpf-helper.c new file mode 100644 index 000000000..05cb4d5ff --- /dev/null +++ b/kernel/samples/seccomp/bpf-helper.c @@ -0,0 +1,95 @@ +/* + * Seccomp BPF helper functions + * + * Copyright (c) 2012 The Chromium OS Authors <chromium-os-dev@chromium.org> + * Author: Will Drewry <wad@chromium.org> + * + * The code may be used by anyone for any purpose, + * and can serve as a starting point for developing + * applications using prctl(PR_ATTACH_SECCOMP_FILTER). + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "bpf-helper.h" + +int bpf_resolve_jumps(struct bpf_labels *labels, + struct sock_filter *filter, size_t count) +{ + struct sock_filter *begin = filter; + __u8 insn = count - 1; + + if (count < 1) + return -1; + /* + * Walk it once, backwards, to build the label table and do fixups. + * Since backward jumps are disallowed by BPF, this is easy. + */ + filter += insn; + for (; filter >= begin; --insn, --filter) { + if (filter->code != (BPF_JMP+BPF_JA)) + continue; + switch ((filter->jt<<8)|filter->jf) { + case (JUMP_JT<<8)|JUMP_JF: + if (labels->labels[filter->k].location == 0xffffffff) { + fprintf(stderr, "Unresolved label: '%s'\n", + labels->labels[filter->k].label); + return 1; + } + filter->k = labels->labels[filter->k].location - + (insn + 1); + filter->jt = 0; + filter->jf = 0; + continue; + case (LABEL_JT<<8)|LABEL_JF: + if (labels->labels[filter->k].location != 0xffffffff) { + fprintf(stderr, "Duplicate label use: '%s'\n", + labels->labels[filter->k].label); + return 1; + } + labels->labels[filter->k].location = insn; + filter->k = 0; /* fall through */ + filter->jt = 0; + filter->jf = 0; + continue; + } + } + return 0; +} + +/* Simple lookup table for labels. */ +__u32 seccomp_bpf_label(struct bpf_labels *labels, const char *label) +{ + struct __bpf_label *begin = labels->labels, *end; + int id; + + if (labels->count == BPF_LABELS_MAX) { + fprintf(stderr, "Too many labels\n"); + exit(1); + } + if (labels->count == 0) { + begin->label = label; + begin->location = 0xffffffff; + labels->count++; + return 0; + } + end = begin + labels->count; + for (id = 0; begin < end; ++begin, ++id) { + if (!strcmp(label, begin->label)) + return id; + } + begin->label = label; + begin->location = 0xffffffff; + labels->count++; + return id; +} + +void seccomp_bpf_print(struct sock_filter *filter, size_t count) +{ + struct sock_filter *end = filter + count; + for ( ; filter < end; ++filter) + printf("{ code=%u,jt=%u,jf=%u,k=%u },\n", + filter->code, filter->jt, filter->jf, filter->k); +} diff --git a/kernel/samples/seccomp/bpf-helper.h b/kernel/samples/seccomp/bpf-helper.h new file mode 100644 index 000000000..38ee70f3c --- /dev/null +++ b/kernel/samples/seccomp/bpf-helper.h @@ -0,0 +1,243 @@ +/* + * Example wrapper around BPF macros. + * + * Copyright (c) 2012 The Chromium OS Authors <chromium-os-dev@chromium.org> + * Author: Will Drewry <wad@chromium.org> + * + * The code may be used by anyone for any purpose, + * and can serve as a starting point for developing + * applications using prctl(PR_SET_SECCOMP, 2, ...). + * + * No guarantees are provided with respect to the correctness + * or functionality of this code. + */ +#ifndef __BPF_HELPER_H__ +#define __BPF_HELPER_H__ + +#include <asm/bitsperlong.h> /* for __BITS_PER_LONG */ +#include <endian.h> +#include <linux/filter.h> +#include <linux/seccomp.h> /* for seccomp_data */ +#include <linux/types.h> +#include <linux/unistd.h> +#include <stddef.h> + +#define BPF_LABELS_MAX 256 +struct bpf_labels { + int count; + struct __bpf_label { + const char *label; + __u32 location; + } labels[BPF_LABELS_MAX]; +}; + +int bpf_resolve_jumps(struct bpf_labels *labels, + struct sock_filter *filter, size_t count); +__u32 seccomp_bpf_label(struct bpf_labels *labels, const char *label); +void seccomp_bpf_print(struct sock_filter *filter, size_t count); + +#define JUMP_JT 0xff +#define JUMP_JF 0xff +#define LABEL_JT 0xfe +#define LABEL_JF 0xfe + +#define ALLOW \ + BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW) +#define DENY \ + BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL) +#define JUMP(labels, label) \ + BPF_JUMP(BPF_JMP+BPF_JA, FIND_LABEL((labels), (label)), \ + JUMP_JT, JUMP_JF) +#define LABEL(labels, label) \ + BPF_JUMP(BPF_JMP+BPF_JA, FIND_LABEL((labels), (label)), \ + LABEL_JT, LABEL_JF) +#define SYSCALL(nr, jt) \ + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (nr), 0, 1), \ + jt + +/* Lame, but just an example */ +#define FIND_LABEL(labels, label) seccomp_bpf_label((labels), #label) + +#define EXPAND(...) __VA_ARGS__ + +/* Ensure that we load the logically correct offset. */ +#if __BYTE_ORDER == __LITTLE_ENDIAN +#define LO_ARG(idx) offsetof(struct seccomp_data, args[(idx)]) +#elif __BYTE_ORDER == __BIG_ENDIAN +#define LO_ARG(idx) offsetof(struct seccomp_data, args[(idx)]) + sizeof(__u32) +#else +#error "Unknown endianness" +#endif + +/* Map all width-sensitive operations */ +#if __BITS_PER_LONG == 32 + +#define JEQ(x, jt) JEQ32(x, EXPAND(jt)) +#define JNE(x, jt) JNE32(x, EXPAND(jt)) +#define JGT(x, jt) JGT32(x, EXPAND(jt)) +#define JLT(x, jt) JLT32(x, EXPAND(jt)) +#define JGE(x, jt) JGE32(x, EXPAND(jt)) +#define JLE(x, jt) JLE32(x, EXPAND(jt)) +#define JA(x, jt) JA32(x, EXPAND(jt)) +#define ARG(i) ARG_32(i) + +#elif __BITS_PER_LONG == 64 + +/* Ensure that we load the logically correct offset. */ +#if __BYTE_ORDER == __LITTLE_ENDIAN +#define ENDIAN(_lo, _hi) _lo, _hi +#define HI_ARG(idx) offsetof(struct seccomp_data, args[(idx)]) + sizeof(__u32) +#elif __BYTE_ORDER == __BIG_ENDIAN +#define ENDIAN(_lo, _hi) _hi, _lo +#define HI_ARG(idx) offsetof(struct seccomp_data, args[(idx)]) +#endif + +union arg64 { + struct { + __u32 ENDIAN(lo32, hi32); + }; + __u64 u64; +}; + +#define JEQ(x, jt) \ + JEQ64(((union arg64){.u64 = (x)}).lo32, \ + ((union arg64){.u64 = (x)}).hi32, \ + EXPAND(jt)) +#define JGT(x, jt) \ + JGT64(((union arg64){.u64 = (x)}).lo32, \ + ((union arg64){.u64 = (x)}).hi32, \ + EXPAND(jt)) +#define JGE(x, jt) \ + JGE64(((union arg64){.u64 = (x)}).lo32, \ + ((union arg64){.u64 = (x)}).hi32, \ + EXPAND(jt)) +#define JNE(x, jt) \ + JNE64(((union arg64){.u64 = (x)}).lo32, \ + ((union arg64){.u64 = (x)}).hi32, \ + EXPAND(jt)) +#define JLT(x, jt) \ + JLT64(((union arg64){.u64 = (x)}).lo32, \ + ((union arg64){.u64 = (x)}).hi32, \ + EXPAND(jt)) +#define JLE(x, jt) \ + JLE64(((union arg64){.u64 = (x)}).lo32, \ + ((union arg64){.u64 = (x)}).hi32, \ + EXPAND(jt)) + +#define JA(x, jt) \ + JA64(((union arg64){.u64 = (x)}).lo32, \ + ((union arg64){.u64 = (x)}).hi32, \ + EXPAND(jt)) +#define ARG(i) ARG_64(i) + +#else +#error __BITS_PER_LONG value unusable. +#endif + +/* Loads the arg into A */ +#define ARG_32(idx) \ + BPF_STMT(BPF_LD+BPF_W+BPF_ABS, LO_ARG(idx)) + +/* Loads hi into A and lo in X */ +#define ARG_64(idx) \ + BPF_STMT(BPF_LD+BPF_W+BPF_ABS, LO_ARG(idx)), \ + BPF_STMT(BPF_ST, 0), /* lo -> M[0] */ \ + BPF_STMT(BPF_LD+BPF_W+BPF_ABS, HI_ARG(idx)), \ + BPF_STMT(BPF_ST, 1) /* hi -> M[1] */ + +#define JEQ32(value, jt) \ + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (value), 0, 1), \ + jt + +#define JNE32(value, jt) \ + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (value), 1, 0), \ + jt + +/* Checks the lo, then swaps to check the hi. A=lo,X=hi */ +#define JEQ64(lo, hi, jt) \ + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (hi), 0, 5), \ + BPF_STMT(BPF_LD+BPF_MEM, 0), /* swap in lo */ \ + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (lo), 0, 2), \ + BPF_STMT(BPF_LD+BPF_MEM, 1), /* passed: swap hi back in */ \ + jt, \ + BPF_STMT(BPF_LD+BPF_MEM, 1) /* failed: swap hi back in */ + +#define JNE64(lo, hi, jt) \ + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (hi), 5, 0), \ + BPF_STMT(BPF_LD+BPF_MEM, 0), /* swap in lo */ \ + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (lo), 2, 0), \ + BPF_STMT(BPF_LD+BPF_MEM, 1), /* passed: swap hi back in */ \ + jt, \ + BPF_STMT(BPF_LD+BPF_MEM, 1) /* failed: swap hi back in */ + +#define JA32(value, jt) \ + BPF_JUMP(BPF_JMP+BPF_JSET+BPF_K, (value), 0, 1), \ + jt + +#define JA64(lo, hi, jt) \ + BPF_JUMP(BPF_JMP+BPF_JSET+BPF_K, (hi), 3, 0), \ + BPF_STMT(BPF_LD+BPF_MEM, 0), /* swap in lo */ \ + BPF_JUMP(BPF_JMP+BPF_JSET+BPF_K, (lo), 0, 2), \ + BPF_STMT(BPF_LD+BPF_MEM, 1), /* passed: swap hi back in */ \ + jt, \ + BPF_STMT(BPF_LD+BPF_MEM, 1) /* failed: swap hi back in */ + +#define JGE32(value, jt) \ + BPF_JUMP(BPF_JMP+BPF_JGE+BPF_K, (value), 0, 1), \ + jt + +#define JLT32(value, jt) \ + BPF_JUMP(BPF_JMP+BPF_JGE+BPF_K, (value), 1, 0), \ + jt + +/* Shortcut checking if hi > arg.hi. */ +#define JGE64(lo, hi, jt) \ + BPF_JUMP(BPF_JMP+BPF_JGT+BPF_K, (hi), 4, 0), \ + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (hi), 0, 5), \ + BPF_STMT(BPF_LD+BPF_MEM, 0), /* swap in lo */ \ + BPF_JUMP(BPF_JMP+BPF_JGE+BPF_K, (lo), 0, 2), \ + BPF_STMT(BPF_LD+BPF_MEM, 1), /* passed: swap hi back in */ \ + jt, \ + BPF_STMT(BPF_LD+BPF_MEM, 1) /* failed: swap hi back in */ + +#define JLT64(lo, hi, jt) \ + BPF_JUMP(BPF_JMP+BPF_JGE+BPF_K, (hi), 0, 4), \ + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (hi), 0, 5), \ + BPF_STMT(BPF_LD+BPF_MEM, 0), /* swap in lo */ \ + BPF_JUMP(BPF_JMP+BPF_JGT+BPF_K, (lo), 2, 0), \ + BPF_STMT(BPF_LD+BPF_MEM, 1), /* passed: swap hi back in */ \ + jt, \ + BPF_STMT(BPF_LD+BPF_MEM, 1) /* failed: swap hi back in */ + +#define JGT32(value, jt) \ + BPF_JUMP(BPF_JMP+BPF_JGT+BPF_K, (value), 0, 1), \ + jt + +#define JLE32(value, jt) \ + BPF_JUMP(BPF_JMP+BPF_JGT+BPF_K, (value), 1, 0), \ + jt + +/* Check hi > args.hi first, then do the GE checking */ +#define JGT64(lo, hi, jt) \ + BPF_JUMP(BPF_JMP+BPF_JGT+BPF_K, (hi), 4, 0), \ + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (hi), 0, 5), \ + BPF_STMT(BPF_LD+BPF_MEM, 0), /* swap in lo */ \ + BPF_JUMP(BPF_JMP+BPF_JGT+BPF_K, (lo), 0, 2), \ + BPF_STMT(BPF_LD+BPF_MEM, 1), /* passed: swap hi back in */ \ + jt, \ + BPF_STMT(BPF_LD+BPF_MEM, 1) /* failed: swap hi back in */ + +#define JLE64(lo, hi, jt) \ + BPF_JUMP(BPF_JMP+BPF_JGT+BPF_K, (hi), 6, 0), \ + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (hi), 0, 3), \ + BPF_STMT(BPF_LD+BPF_MEM, 0), /* swap in lo */ \ + BPF_JUMP(BPF_JMP+BPF_JGT+BPF_K, (lo), 2, 0), \ + BPF_STMT(BPF_LD+BPF_MEM, 1), /* passed: swap hi back in */ \ + jt, \ + BPF_STMT(BPF_LD+BPF_MEM, 1) /* failed: swap hi back in */ + +#define LOAD_SYSCALL_NR \ + BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \ + offsetof(struct seccomp_data, nr)) + +#endif /* __BPF_HELPER_H__ */ diff --git a/kernel/samples/seccomp/dropper.c b/kernel/samples/seccomp/dropper.c new file mode 100644 index 000000000..c69c347c7 --- /dev/null +++ b/kernel/samples/seccomp/dropper.c @@ -0,0 +1,68 @@ +/* + * Naive system call dropper built on seccomp_filter. + * + * Copyright (c) 2012 The Chromium OS Authors <chromium-os-dev@chromium.org> + * Author: Will Drewry <wad@chromium.org> + * + * The code may be used by anyone for any purpose, + * and can serve as a starting point for developing + * applications using prctl(PR_SET_SECCOMP, 2, ...). + * + * When run, returns the specified errno for the specified + * system call number against the given architecture. + * + * Run this one as root as PR_SET_NO_NEW_PRIVS is not called. + */ + +#include <errno.h> +#include <linux/audit.h> +#include <linux/filter.h> +#include <linux/seccomp.h> +#include <linux/unistd.h> +#include <stdio.h> +#include <stddef.h> +#include <stdlib.h> +#include <sys/prctl.h> +#include <unistd.h> + +static int install_filter(int nr, int arch, int error) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_LD+BPF_W+BPF_ABS, + (offsetof(struct seccomp_data, arch))), + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, arch, 0, 3), + BPF_STMT(BPF_LD+BPF_W+BPF_ABS, + (offsetof(struct seccomp_data, nr))), + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, nr, 0, 1), + BPF_STMT(BPF_RET+BPF_K, + SECCOMP_RET_ERRNO|(error & SECCOMP_RET_DATA)), + BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)(sizeof(filter)/sizeof(filter[0])), + .filter = filter, + }; + if (prctl(PR_SET_SECCOMP, 2, &prog)) { + perror("prctl"); + return 1; + } + return 0; +} + +int main(int argc, char **argv) +{ + if (argc < 5) { + fprintf(stderr, "Usage:\n" + "dropper <syscall_nr> <arch> <errno> <prog> [<args>]\n" + "Hint: AUDIT_ARCH_I386: 0x%X\n" + " AUDIT_ARCH_X86_64: 0x%X\n" + "\n", AUDIT_ARCH_I386, AUDIT_ARCH_X86_64); + return 1; + } + if (install_filter(strtol(argv[1], NULL, 0), strtol(argv[2], NULL, 0), + strtol(argv[3], NULL, 0))) + return 1; + execv(argv[4], &argv[4]); + printf("Failed to execv\n"); + return 255; +} diff --git a/kernel/samples/trace_events/Makefile b/kernel/samples/trace_events/Makefile new file mode 100644 index 000000000..0f8d92120 --- /dev/null +++ b/kernel/samples/trace_events/Makefile @@ -0,0 +1,14 @@ +# builds the trace events example kernel modules; +# then to use one (as root): insmod <module_name.ko> + +# If you include a trace header outside of include/trace/events +# then the file that does the #define CREATE_TRACE_POINTS must +# have that tracer file in its main search path. This is because +# define_trace.h will include it, and must be able to find it from +# the include/trace directory. +# +# Here trace-events-sample.c does the CREATE_TRACE_POINTS. +# +CFLAGS_trace-events-sample.o := -I$(src) + +obj-$(CONFIG_SAMPLE_TRACE_EVENTS) += trace-events-sample.o diff --git a/kernel/samples/trace_events/trace-events-sample.c b/kernel/samples/trace_events/trace-events-sample.c new file mode 100644 index 000000000..880a7d1d2 --- /dev/null +++ b/kernel/samples/trace_events/trace-events-sample.c @@ -0,0 +1,130 @@ +#include <linux/module.h> +#include <linux/kthread.h> + +/* + * Any file that uses trace points, must include the header. + * But only one file, must include the header by defining + * CREATE_TRACE_POINTS first. This will make the C code that + * creates the handles for the trace points. + */ +#define CREATE_TRACE_POINTS +#include "trace-events-sample.h" + +static const char *random_strings[] = { + "Mother Goose", + "Snoopy", + "Gandalf", + "Frodo", + "One ring to rule them all" +}; + +static void simple_thread_func(int cnt) +{ + int array[6]; + int len = cnt % 5; + int i; + + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(HZ); + + for (i = 0; i < len; i++) + array[i] = i + 1; + array[i] = 0; + + /* Silly tracepoints */ + trace_foo_bar("hello", cnt, array, random_strings[len], + tsk_cpus_allowed(current)); + + trace_foo_with_template_simple("HELLO", cnt); + + trace_foo_bar_with_cond("Some times print", cnt); + + trace_foo_with_template_cond("prints other times", cnt); + + trace_foo_with_template_print("I have to be different", cnt); +} + +static int simple_thread(void *arg) +{ + int cnt = 0; + + while (!kthread_should_stop()) + simple_thread_func(cnt++); + + return 0; +} + +static struct task_struct *simple_tsk; +static struct task_struct *simple_tsk_fn; + +static void simple_thread_func_fn(int cnt) +{ + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(HZ); + + /* More silly tracepoints */ + trace_foo_bar_with_fn("Look at me", cnt); + trace_foo_with_template_fn("Look at me too", cnt); +} + +static int simple_thread_fn(void *arg) +{ + int cnt = 0; + + while (!kthread_should_stop()) + simple_thread_func_fn(cnt++); + + return 0; +} + +static DEFINE_MUTEX(thread_mutex); + +void foo_bar_reg(void) +{ + pr_info("Starting thread for foo_bar_fn\n"); + /* + * We shouldn't be able to start a trace when the module is + * unloading (there's other locks to prevent that). But + * for consistency sake, we still take the thread_mutex. + */ + mutex_lock(&thread_mutex); + simple_tsk_fn = kthread_run(simple_thread_fn, NULL, "event-sample-fn"); + mutex_unlock(&thread_mutex); +} + +void foo_bar_unreg(void) +{ + pr_info("Killing thread for foo_bar_fn\n"); + /* protect against module unloading */ + mutex_lock(&thread_mutex); + if (simple_tsk_fn) + kthread_stop(simple_tsk_fn); + simple_tsk_fn = NULL; + mutex_unlock(&thread_mutex); +} + +static int __init trace_event_init(void) +{ + simple_tsk = kthread_run(simple_thread, NULL, "event-sample"); + if (IS_ERR(simple_tsk)) + return -1; + + return 0; +} + +static void __exit trace_event_exit(void) +{ + kthread_stop(simple_tsk); + mutex_lock(&thread_mutex); + if (simple_tsk_fn) + kthread_stop(simple_tsk_fn); + simple_tsk_fn = NULL; + mutex_unlock(&thread_mutex); +} + +module_init(trace_event_init); +module_exit(trace_event_exit); + +MODULE_AUTHOR("Steven Rostedt"); +MODULE_DESCRIPTION("trace-events-sample"); +MODULE_LICENSE("GPL"); diff --git a/kernel/samples/trace_events/trace-events-sample.h b/kernel/samples/trace_events/trace-events-sample.h new file mode 100644 index 000000000..8965d1bb8 --- /dev/null +++ b/kernel/samples/trace_events/trace-events-sample.h @@ -0,0 +1,520 @@ +/* + * If TRACE_SYSTEM is defined, that will be the directory created + * in the ftrace directory under /sys/kernel/tracing/events/<system> + * + * The define_trace.h below will also look for a file name of + * TRACE_SYSTEM.h where TRACE_SYSTEM is what is defined here. + * In this case, it would look for sample.h + * + * If the header name will be different than the system name + * (as in this case), then you can override the header name that + * define_trace.h will look up by defining TRACE_INCLUDE_FILE + * + * This file is called trace-events-sample.h but we want the system + * to be called "sample". Therefore we must define the name of this + * file: + * + * #define TRACE_INCLUDE_FILE trace-events-sample + * + * As we do an the bottom of this file. + * + * Notice that TRACE_SYSTEM should be defined outside of #if + * protection, just like TRACE_INCLUDE_FILE. + */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM sample-trace + +/* + * TRACE_SYSTEM is expected to be a C valid variable (alpha-numeric + * and underscore), although it may start with numbers. If for some + * reason it is not, you need to add the following lines: + */ +#undef TRACE_SYSTEM_VAR +#define TRACE_SYSTEM_VAR sample_trace +/* + * But the above is only needed if TRACE_SYSTEM is not alpha-numeric + * and underscored. By default, TRACE_SYSTEM_VAR will be equal to + * TRACE_SYSTEM. As TRACE_SYSTEM_VAR must be alpha-numeric, if + * TRACE_SYSTEM is not, then TRACE_SYSTEM_VAR must be defined with + * only alpha-numeric and underscores. + * + * The TRACE_SYSTEM_VAR is only used internally and not visible to + * user space. + */ + +/* + * Notice that this file is not protected like a normal header. + * We also must allow for rereading of this file. The + * + * || defined(TRACE_HEADER_MULTI_READ) + * + * serves this purpose. + */ +#if !defined(_TRACE_EVENT_SAMPLE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_EVENT_SAMPLE_H + +/* + * All trace headers should include tracepoint.h, until we finally + * make it into a standard header. + */ +#include <linux/tracepoint.h> + +/* + * The TRACE_EVENT macro is broken up into 5 parts. + * + * name: name of the trace point. This is also how to enable the tracepoint. + * A function called trace_foo_bar() will be created. + * + * proto: the prototype of the function trace_foo_bar() + * Here it is trace_foo_bar(char *foo, int bar). + * + * args: must match the arguments in the prototype. + * Here it is simply "foo, bar". + * + * struct: This defines the way the data will be stored in the ring buffer. + * The items declared here become part of a special structure + * called "__entry", which can be used in the fast_assign part of the + * TRACE_EVENT macro. + * + * Here are the currently defined types you can use: + * + * __field : Is broken up into type and name. Where type can be any + * primitive type (integer, long or pointer). + * + * __field(int, foo) + * + * __entry->foo = 5; + * + * __field_struct : This can be any static complex data type (struct, union + * but not an array). Be careful using complex types, as each + * event is limited in size, and copying large amounts of data + * into the ring buffer can slow things down. + * + * __field_struct(struct bar, foo) + * + * __entry->bar.x = y; + + * __array: There are three fields (type, name, size). The type is the + * type of elements in teh array, the name is the name of the array. + * size is the number of items in the array (not the total size). + * + * __array( char, foo, 10) is the same as saying: char foo[10]; + * + * Assigning arrays can be done like any array: + * + * __entry->foo[0] = 'a'; + * + * memcpy(__entry->foo, bar, 10); + * + * __dynamic_array: This is similar to array, but can vary is size from + * instance to instance of the tracepoint being called. + * Like __array, this too has three elements (type, name, size); + * type is the type of the element, name is the name of the array. + * The size is different than __array. It is not a static number, + * but the algorithm to figure out the length of the array for the + * specific instance of tracepoint. Again, size is the numebr of + * items in the array, not the total length in bytes. + * + * __dynamic_array( int, foo, bar) is similar to: int foo[bar]; + * + * Note, unlike arrays, you must use the __get_dynamic_array() macro + * to access the array. + * + * memcpy(__get_dynamic_array(foo), bar, 10); + * + * Notice, that "__entry" is not needed here. + * + * __string: This is a special kind of __dynamic_array. It expects to + * have a nul terminated character array passed to it (it allows + * for NULL too, which would be converted into "(null)"). __string + * takes two paramenter (name, src), where name is the name of + * the string saved, and src is the string to copy into the + * ring buffer. + * + * __string(foo, bar) is similar to: strcpy(foo, bar) + * + * To assign a string, use the helper macro __assign_str(). + * + * __assign_str(foo, bar); + * + * In most cases, the __assign_str() macro will take the same + * parameters as the __string() macro had to declare the string. + * + * __bitmask: This is another kind of __dynamic_array, but it expects + * an array of longs, and the number of bits to parse. It takes + * two parameters (name, nr_bits), where name is the name of the + * bitmask to save, and the nr_bits is the number of bits to record. + * + * __bitmask(target_cpu, nr_cpumask_bits) + * + * To assign a bitmask, use the __assign_bitmask() helper macro. + * + * __assign_bitmask(target_cpus, cpumask_bits(bar), nr_cpumask_bits); + * + * + * fast_assign: This is a C like function that is used to store the items + * into the ring buffer. A special variable called "__entry" will be the + * structure that points into the ring buffer and has the same fields as + * described by the struct part of TRACE_EVENT above. + * + * printk: This is a way to print out the data in pretty print. This is + * useful if the system crashes and you are logging via a serial line, + * the data can be printed to the console using this "printk" method. + * This is also used to print out the data from the trace files. + * Again, the __entry macro is used to access the data from the ring buffer. + * + * Note, __dynamic_array, __string, and __bitmask require special helpers + * to access the data. + * + * For __dynamic_array(int, foo, bar) use __get_dynamic_array(foo) + * Use __get_dynamic_array_len(foo) to get the length of the array + * saved. + * + * For __string(foo, bar) use __get_str(foo) + * + * For __bitmask(target_cpus, nr_cpumask_bits) use __get_bitmask(target_cpus) + * + * + * Note, that for both the assign and the printk, __entry is the handler + * to the data structure in the ring buffer, and is defined by the + * TP_STRUCT__entry. + */ + +/* + * It is OK to have helper functions in the file, but they need to be protected + * from being defined more than once. Remember, this file gets included more + * than once. + */ +#ifndef __TRACE_EVENT_SAMPLE_HELPER_FUNCTIONS +#define __TRACE_EVENT_SAMPLE_HELPER_FUNCTIONS +static inline int __length_of(const int *list) +{ + int i; + + if (!list) + return 0; + + for (i = 0; list[i]; i++) + ; + return i; +} + +enum { + TRACE_SAMPLE_FOO = 2, + TRACE_SAMPLE_BAR = 4, + TRACE_SAMPLE_ZOO = 8, +}; +#endif + +/* + * If enums are used in the TP_printk(), their names will be shown in + * format files and not their values. This can cause problems with user + * space programs that parse the format files to know how to translate + * the raw binary trace output into human readable text. + * + * To help out user space programs, any enum that is used in the TP_printk() + * should be defined by TRACE_DEFINE_ENUM() macro. All that is needed to + * be done is to add this macro with the enum within it in the trace + * header file, and it will be converted in the output. + */ + +TRACE_DEFINE_ENUM(TRACE_SAMPLE_FOO); +TRACE_DEFINE_ENUM(TRACE_SAMPLE_BAR); +TRACE_DEFINE_ENUM(TRACE_SAMPLE_ZOO); + +TRACE_EVENT(foo_bar, + + TP_PROTO(const char *foo, int bar, const int *lst, + const char *string, const struct cpumask *mask), + + TP_ARGS(foo, bar, lst, string, mask), + + TP_STRUCT__entry( + __array( char, foo, 10 ) + __field( int, bar ) + __dynamic_array(int, list, __length_of(lst)) + __string( str, string ) + __bitmask( cpus, num_possible_cpus() ) + ), + + TP_fast_assign( + strlcpy(__entry->foo, foo, 10); + __entry->bar = bar; + memcpy(__get_dynamic_array(list), lst, + __length_of(lst) * sizeof(int)); + __assign_str(str, string); + __assign_bitmask(cpus, cpumask_bits(mask), num_possible_cpus()); + ), + + TP_printk("foo %s %d %s %s %s %s (%s)", __entry->foo, __entry->bar, + +/* + * Notice here the use of some helper functions. This includes: + * + * __print_symbolic( variable, { value, "string" }, ... ), + * + * The variable is tested against each value of the { } pair. If + * the variable matches one of the values, then it will print the + * string in that pair. If non are matched, it returns a string + * version of the number (if __entry->bar == 7 then "7" is returned). + */ + __print_symbolic(__entry->bar, + { 0, "zero" }, + { TRACE_SAMPLE_FOO, "TWO" }, + { TRACE_SAMPLE_BAR, "FOUR" }, + { TRACE_SAMPLE_ZOO, "EIGHT" }, + { 10, "TEN" } + ), + +/* + * __print_flags( variable, "delim", { value, "flag" }, ... ), + * + * This is similar to __print_symbolic, except that it tests the bits + * of the value. If ((FLAG & variable) == FLAG) then the string is + * printed. If more than one flag matches, then each one that does is + * also printed with delim in between them. + * If not all bits are accounted for, then the not found bits will be + * added in hex format: 0x506 will show BIT2|BIT4|0x500 + */ + __print_flags(__entry->bar, "|", + { 1, "BIT1" }, + { 2, "BIT2" }, + { 4, "BIT3" }, + { 8, "BIT4" } + ), +/* + * __print_array( array, len, element_size ) + * + * This prints out the array that is defined by __array in a nice format. + */ + __print_array(__get_dynamic_array(list), + __get_dynamic_array_len(list), + sizeof(int)), + __get_str(str), __get_bitmask(cpus)) +); + +/* + * There may be a case where a tracepoint should only be called if + * some condition is set. Otherwise the tracepoint should not be called. + * But to do something like: + * + * if (cond) + * trace_foo(); + * + * Would cause a little overhead when tracing is not enabled, and that + * overhead, even if small, is not something we want. As tracepoints + * use static branch (aka jump_labels), where no branch is taken to + * skip the tracepoint when not enabled, and a jmp is placed to jump + * to the tracepoint code when it is enabled, having a if statement + * nullifies that optimization. It would be nice to place that + * condition within the static branch. This is where TRACE_EVENT_CONDITION + * comes in. + * + * TRACE_EVENT_CONDITION() is just like TRACE_EVENT, except it adds another + * parameter just after args. Where TRACE_EVENT has: + * + * TRACE_EVENT(name, proto, args, struct, assign, printk) + * + * the CONDITION version has: + * + * TRACE_EVENT_CONDITION(name, proto, args, cond, struct, assign, printk) + * + * Everything is the same as TRACE_EVENT except for the new cond. Think + * of the cond variable as: + * + * if (cond) + * trace_foo_bar_with_cond(); + * + * Except that the logic for the if branch is placed after the static branch. + * That is, the if statement that processes the condition will not be + * executed unless that traecpoint is enabled. Otherwise it still remains + * a nop. + */ +TRACE_EVENT_CONDITION(foo_bar_with_cond, + + TP_PROTO(const char *foo, int bar), + + TP_ARGS(foo, bar), + + TP_CONDITION(!(bar % 10)), + + TP_STRUCT__entry( + __string( foo, foo ) + __field( int, bar ) + ), + + TP_fast_assign( + __assign_str(foo, foo); + __entry->bar = bar; + ), + + TP_printk("foo %s %d", __get_str(foo), __entry->bar) +); + +void foo_bar_reg(void); +void foo_bar_unreg(void); + +/* + * Now in the case that some function needs to be called when the + * tracepoint is enabled and/or when it is disabled, the + * TRACE_EVENT_FN() serves this purpose. This is just like TRACE_EVENT() + * but adds two more parameters at the end: + * + * TRACE_EVENT_FN( name, proto, args, struct, assign, printk, reg, unreg) + * + * reg and unreg are functions with the prototype of: + * + * void reg(void) + * + * The reg function gets called before the tracepoint is enabled, and + * the unreg function gets called after the tracepoint is disabled. + * + * Note, reg and unreg are allowed to be NULL. If you only need to + * call a function before enabling, or after disabling, just set one + * function and pass in NULL for the other parameter. + */ +TRACE_EVENT_FN(foo_bar_with_fn, + + TP_PROTO(const char *foo, int bar), + + TP_ARGS(foo, bar), + + TP_STRUCT__entry( + __string( foo, foo ) + __field( int, bar ) + ), + + TP_fast_assign( + __assign_str(foo, foo); + __entry->bar = bar; + ), + + TP_printk("foo %s %d", __get_str(foo), __entry->bar), + + foo_bar_reg, foo_bar_unreg +); + +/* + * Each TRACE_EVENT macro creates several helper functions to produce + * the code to add the tracepoint, create the files in the trace + * directory, hook it to perf, assign the values and to print out + * the raw data from the ring buffer. To prevent too much bloat, + * if there are more than one tracepoint that uses the same format + * for the proto, args, struct, assign and printk, and only the name + * is different, it is highly recommended to use the DECLARE_EVENT_CLASS + * + * DECLARE_EVENT_CLASS() macro creates most of the functions for the + * tracepoint. Then DEFINE_EVENT() is use to hook a tracepoint to those + * functions. This DEFINE_EVENT() is an instance of the class and can + * be enabled and disabled separately from other events (either TRACE_EVENT + * or other DEFINE_EVENT()s). + * + * Note, TRACE_EVENT() itself is simply defined as: + * + * #define TRACE_EVENT(name, proto, args, tstruct, assign, printk) \ + * DEFINE_EVENT_CLASS(name, proto, args, tstruct, assign, printk); \ + * DEFINE_EVENT(name, name, proto, args) + * + * The DEFINE_EVENT() also can be declared with conditions and reg functions: + * + * DEFINE_EVENT_CONDITION(template, name, proto, args, cond); + * DEFINE_EVENT_FN(template, name, proto, args, reg, unreg); + */ +DECLARE_EVENT_CLASS(foo_template, + + TP_PROTO(const char *foo, int bar), + + TP_ARGS(foo, bar), + + TP_STRUCT__entry( + __string( foo, foo ) + __field( int, bar ) + ), + + TP_fast_assign( + __assign_str(foo, foo); + __entry->bar = bar; + ), + + TP_printk("foo %s %d", __get_str(foo), __entry->bar) +); + +/* + * Here's a better way for the previous samples (except, the first + * exmaple had more fields and could not be used here). + */ +DEFINE_EVENT(foo_template, foo_with_template_simple, + TP_PROTO(const char *foo, int bar), + TP_ARGS(foo, bar)); + +DEFINE_EVENT_CONDITION(foo_template, foo_with_template_cond, + TP_PROTO(const char *foo, int bar), + TP_ARGS(foo, bar), + TP_CONDITION(!(bar % 8))); + + +DEFINE_EVENT_FN(foo_template, foo_with_template_fn, + TP_PROTO(const char *foo, int bar), + TP_ARGS(foo, bar), + foo_bar_reg, foo_bar_unreg); + +/* + * Anytime two events share basically the same values and have + * the same output, use the DECLARE_EVENT_CLASS() and DEFINE_EVENT() + * when ever possible. + */ + +/* + * If the event is similar to the DECLARE_EVENT_CLASS, but you need + * to have a different output, then use DEFINE_EVENT_PRINT() which + * lets you override the TP_printk() of the class. + */ + +DEFINE_EVENT_PRINT(foo_template, foo_with_template_print, + TP_PROTO(const char *foo, int bar), + TP_ARGS(foo, bar), + TP_printk("bar %s %d", __get_str(foo), __entry->bar)); + +#endif + +/***** NOTICE! The #if protection ends here. *****/ + + +/* + * There are several ways I could have done this. If I left out the + * TRACE_INCLUDE_PATH, then it would default to the kernel source + * include/trace/events directory. + * + * I could specify a path from the define_trace.h file back to this + * file. + * + * #define TRACE_INCLUDE_PATH ../../samples/trace_events + * + * But the safest and easiest way to simply make it use the directory + * that the file is in is to add in the Makefile: + * + * CFLAGS_trace-events-sample.o := -I$(src) + * + * This will make sure the current path is part of the include + * structure for our file so that define_trace.h can find it. + * + * I could have made only the top level directory the include: + * + * CFLAGS_trace-events-sample.o := -I$(PWD) + * + * And then let the path to this directory be the TRACE_INCLUDE_PATH: + * + * #define TRACE_INCLUDE_PATH samples/trace_events + * + * But then if something defines "samples" or "trace_events" as a macro + * then we could risk that being converted too, and give us an unexpected + * result. + */ +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH . +/* + * TRACE_INCLUDE_FILE is not needed if the filename and TRACE_SYSTEM are equal + */ +#define TRACE_INCLUDE_FILE trace-events-sample +#include <trace/define_trace.h> diff --git a/kernel/samples/uhid/Makefile b/kernel/samples/uhid/Makefile new file mode 100644 index 000000000..c95a69656 --- /dev/null +++ b/kernel/samples/uhid/Makefile @@ -0,0 +1,10 @@ +# kbuild trick to avoid linker error. Can be omitted if a module is built. +obj- := dummy.o + +# List of programs to build +hostprogs-y := uhid-example + +# Tell kbuild to always build the programs +always := $(hostprogs-y) + +HOSTCFLAGS_uhid-example.o += -I$(objtree)/usr/include diff --git a/kernel/samples/uhid/uhid-example.c b/kernel/samples/uhid/uhid-example.c new file mode 100644 index 000000000..7d58a4b8d --- /dev/null +++ b/kernel/samples/uhid/uhid-example.c @@ -0,0 +1,464 @@ +/* + * UHID Example + * + * Copyright (c) 2012-2013 David Herrmann <dh.herrmann@gmail.com> + * + * The code may be used by anyone for any purpose, + * and can serve as a starting point for developing + * applications using uhid. + */ + +/* + * UHID Example + * This example emulates a basic 3 buttons mouse with wheel over UHID. Run this + * program as root and then use the following keys to control the mouse: + * q: Quit the application + * 1: Toggle left button (down, up, ...) + * 2: Toggle right button + * 3: Toggle middle button + * a: Move mouse left + * d: Move mouse right + * w: Move mouse up + * s: Move mouse down + * r: Move wheel up + * f: Move wheel down + * + * Additionally to 3 button mouse, 3 keyboard LEDs are also supported (LED_NUML, + * LED_CAPSL and LED_SCROLLL). The device doesn't generate any related keyboard + * events, though. You need to manually write the EV_LED/LED_XY/1 activation + * input event to the evdev device to see it being sent to this device. + * + * If uhid is not available as /dev/uhid, then you can pass a different path as + * first argument. + * If <linux/uhid.h> is not installed in /usr, then compile this with: + * gcc -o ./uhid_test -Wall -I./include ./samples/uhid/uhid-example.c + * And ignore the warning about kernel headers. However, it is recommended to + * use the installed uhid.h if available. + */ + +#include <errno.h> +#include <fcntl.h> +#include <poll.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <termios.h> +#include <unistd.h> +#include <linux/uhid.h> + +/* + * HID Report Desciptor + * We emulate a basic 3 button mouse with wheel and 3 keyboard LEDs. This is + * the report-descriptor as the kernel will parse it: + * + * INPUT(1)[INPUT] + * Field(0) + * Physical(GenericDesktop.Pointer) + * Application(GenericDesktop.Mouse) + * Usage(3) + * Button.0001 + * Button.0002 + * Button.0003 + * Logical Minimum(0) + * Logical Maximum(1) + * Report Size(1) + * Report Count(3) + * Report Offset(0) + * Flags( Variable Absolute ) + * Field(1) + * Physical(GenericDesktop.Pointer) + * Application(GenericDesktop.Mouse) + * Usage(3) + * GenericDesktop.X + * GenericDesktop.Y + * GenericDesktop.Wheel + * Logical Minimum(-128) + * Logical Maximum(127) + * Report Size(8) + * Report Count(3) + * Report Offset(8) + * Flags( Variable Relative ) + * OUTPUT(2)[OUTPUT] + * Field(0) + * Application(GenericDesktop.Keyboard) + * Usage(3) + * LED.NumLock + * LED.CapsLock + * LED.ScrollLock + * Logical Minimum(0) + * Logical Maximum(1) + * Report Size(1) + * Report Count(3) + * Report Offset(0) + * Flags( Variable Absolute ) + * + * This is the mapping that we expect: + * Button.0001 ---> Key.LeftBtn + * Button.0002 ---> Key.RightBtn + * Button.0003 ---> Key.MiddleBtn + * GenericDesktop.X ---> Relative.X + * GenericDesktop.Y ---> Relative.Y + * GenericDesktop.Wheel ---> Relative.Wheel + * LED.NumLock ---> LED.NumLock + * LED.CapsLock ---> LED.CapsLock + * LED.ScrollLock ---> LED.ScrollLock + * + * This information can be verified by reading /sys/kernel/debug/hid/<dev>/rdesc + * This file should print the same information as showed above. + */ + +static unsigned char rdesc[] = { + 0x05, 0x01, /* USAGE_PAGE (Generic Desktop) */ + 0x09, 0x02, /* USAGE (Mouse) */ + 0xa1, 0x01, /* COLLECTION (Application) */ + 0x09, 0x01, /* USAGE (Pointer) */ + 0xa1, 0x00, /* COLLECTION (Physical) */ + 0x85, 0x01, /* REPORT_ID (1) */ + 0x05, 0x09, /* USAGE_PAGE (Button) */ + 0x19, 0x01, /* USAGE_MINIMUM (Button 1) */ + 0x29, 0x03, /* USAGE_MAXIMUM (Button 3) */ + 0x15, 0x00, /* LOGICAL_MINIMUM (0) */ + 0x25, 0x01, /* LOGICAL_MAXIMUM (1) */ + 0x95, 0x03, /* REPORT_COUNT (3) */ + 0x75, 0x01, /* REPORT_SIZE (1) */ + 0x81, 0x02, /* INPUT (Data,Var,Abs) */ + 0x95, 0x01, /* REPORT_COUNT (1) */ + 0x75, 0x05, /* REPORT_SIZE (5) */ + 0x81, 0x01, /* INPUT (Cnst,Var,Abs) */ + 0x05, 0x01, /* USAGE_PAGE (Generic Desktop) */ + 0x09, 0x30, /* USAGE (X) */ + 0x09, 0x31, /* USAGE (Y) */ + 0x09, 0x38, /* USAGE (WHEEL) */ + 0x15, 0x81, /* LOGICAL_MINIMUM (-127) */ + 0x25, 0x7f, /* LOGICAL_MAXIMUM (127) */ + 0x75, 0x08, /* REPORT_SIZE (8) */ + 0x95, 0x03, /* REPORT_COUNT (3) */ + 0x81, 0x06, /* INPUT (Data,Var,Rel) */ + 0xc0, /* END_COLLECTION */ + 0xc0, /* END_COLLECTION */ + 0x05, 0x01, /* USAGE_PAGE (Generic Desktop) */ + 0x09, 0x06, /* USAGE (Keyboard) */ + 0xa1, 0x01, /* COLLECTION (Application) */ + 0x85, 0x02, /* REPORT_ID (2) */ + 0x05, 0x08, /* USAGE_PAGE (Led) */ + 0x19, 0x01, /* USAGE_MINIMUM (1) */ + 0x29, 0x03, /* USAGE_MAXIMUM (3) */ + 0x15, 0x00, /* LOGICAL_MINIMUM (0) */ + 0x25, 0x01, /* LOGICAL_MAXIMUM (1) */ + 0x95, 0x03, /* REPORT_COUNT (3) */ + 0x75, 0x01, /* REPORT_SIZE (1) */ + 0x91, 0x02, /* Output (Data,Var,Abs) */ + 0x95, 0x01, /* REPORT_COUNT (1) */ + 0x75, 0x05, /* REPORT_SIZE (5) */ + 0x91, 0x01, /* Output (Cnst,Var,Abs) */ + 0xc0, /* END_COLLECTION */ +}; + +static int uhid_write(int fd, const struct uhid_event *ev) +{ + ssize_t ret; + + ret = write(fd, ev, sizeof(*ev)); + if (ret < 0) { + fprintf(stderr, "Cannot write to uhid: %m\n"); + return -errno; + } else if (ret != sizeof(*ev)) { + fprintf(stderr, "Wrong size written to uhid: %ld != %lu\n", + ret, sizeof(ev)); + return -EFAULT; + } else { + return 0; + } +} + +static int create(int fd) +{ + struct uhid_event ev; + + memset(&ev, 0, sizeof(ev)); + ev.type = UHID_CREATE; + strcpy((char*)ev.u.create.name, "test-uhid-device"); + ev.u.create.rd_data = rdesc; + ev.u.create.rd_size = sizeof(rdesc); + ev.u.create.bus = BUS_USB; + ev.u.create.vendor = 0x15d9; + ev.u.create.product = 0x0a37; + ev.u.create.version = 0; + ev.u.create.country = 0; + + return uhid_write(fd, &ev); +} + +static void destroy(int fd) +{ + struct uhid_event ev; + + memset(&ev, 0, sizeof(ev)); + ev.type = UHID_DESTROY; + + uhid_write(fd, &ev); +} + +/* This parses raw output reports sent by the kernel to the device. A normal + * uhid program shouldn't do this but instead just forward the raw report. + * However, for ducomentational purposes, we try to detect LED events here and + * print debug messages for it. */ +static void handle_output(struct uhid_event *ev) +{ + /* LED messages are adverised via OUTPUT reports; ignore the rest */ + if (ev->u.output.rtype != UHID_OUTPUT_REPORT) + return; + /* LED reports have length 2 bytes */ + if (ev->u.output.size != 2) + return; + /* first byte is report-id which is 0x02 for LEDs in our rdesc */ + if (ev->u.output.data[0] != 0x2) + return; + + /* print flags payload */ + fprintf(stderr, "LED output report received with flags %x\n", + ev->u.output.data[1]); +} + +static int event(int fd) +{ + struct uhid_event ev; + ssize_t ret; + + memset(&ev, 0, sizeof(ev)); + ret = read(fd, &ev, sizeof(ev)); + if (ret == 0) { + fprintf(stderr, "Read HUP on uhid-cdev\n"); + return -EFAULT; + } else if (ret < 0) { + fprintf(stderr, "Cannot read uhid-cdev: %m\n"); + return -errno; + } else if (ret != sizeof(ev)) { + fprintf(stderr, "Invalid size read from uhid-dev: %ld != %lu\n", + ret, sizeof(ev)); + return -EFAULT; + } + + switch (ev.type) { + case UHID_START: + fprintf(stderr, "UHID_START from uhid-dev\n"); + break; + case UHID_STOP: + fprintf(stderr, "UHID_STOP from uhid-dev\n"); + break; + case UHID_OPEN: + fprintf(stderr, "UHID_OPEN from uhid-dev\n"); + break; + case UHID_CLOSE: + fprintf(stderr, "UHID_CLOSE from uhid-dev\n"); + break; + case UHID_OUTPUT: + fprintf(stderr, "UHID_OUTPUT from uhid-dev\n"); + handle_output(&ev); + break; + case UHID_OUTPUT_EV: + fprintf(stderr, "UHID_OUTPUT_EV from uhid-dev\n"); + break; + default: + fprintf(stderr, "Invalid event from uhid-dev: %u\n", ev.type); + } + + return 0; +} + +static bool btn1_down; +static bool btn2_down; +static bool btn3_down; +static signed char abs_hor; +static signed char abs_ver; +static signed char wheel; + +static int send_event(int fd) +{ + struct uhid_event ev; + + memset(&ev, 0, sizeof(ev)); + ev.type = UHID_INPUT; + ev.u.input.size = 5; + + ev.u.input.data[0] = 0x1; + if (btn1_down) + ev.u.input.data[1] |= 0x1; + if (btn2_down) + ev.u.input.data[1] |= 0x2; + if (btn3_down) + ev.u.input.data[1] |= 0x4; + + ev.u.input.data[2] = abs_hor; + ev.u.input.data[3] = abs_ver; + ev.u.input.data[4] = wheel; + + return uhid_write(fd, &ev); +} + +static int keyboard(int fd) +{ + char buf[128]; + ssize_t ret, i; + + ret = read(STDIN_FILENO, buf, sizeof(buf)); + if (ret == 0) { + fprintf(stderr, "Read HUP on stdin\n"); + return -EFAULT; + } else if (ret < 0) { + fprintf(stderr, "Cannot read stdin: %m\n"); + return -errno; + } + + for (i = 0; i < ret; ++i) { + switch (buf[i]) { + case '1': + btn1_down = !btn1_down; + ret = send_event(fd); + if (ret) + return ret; + break; + case '2': + btn2_down = !btn2_down; + ret = send_event(fd); + if (ret) + return ret; + break; + case '3': + btn3_down = !btn3_down; + ret = send_event(fd); + if (ret) + return ret; + break; + case 'a': + abs_hor = -20; + ret = send_event(fd); + abs_hor = 0; + if (ret) + return ret; + break; + case 'd': + abs_hor = 20; + ret = send_event(fd); + abs_hor = 0; + if (ret) + return ret; + break; + case 'w': + abs_ver = -20; + ret = send_event(fd); + abs_ver = 0; + if (ret) + return ret; + break; + case 's': + abs_ver = 20; + ret = send_event(fd); + abs_ver = 0; + if (ret) + return ret; + break; + case 'r': + wheel = 1; + ret = send_event(fd); + wheel = 0; + if (ret) + return ret; + break; + case 'f': + wheel = -1; + ret = send_event(fd); + wheel = 0; + if (ret) + return ret; + break; + case 'q': + return -ECANCELED; + default: + fprintf(stderr, "Invalid input: %c\n", buf[i]); + } + } + + return 0; +} + +int main(int argc, char **argv) +{ + int fd; + const char *path = "/dev/uhid"; + struct pollfd pfds[2]; + int ret; + struct termios state; + + ret = tcgetattr(STDIN_FILENO, &state); + if (ret) { + fprintf(stderr, "Cannot get tty state\n"); + } else { + state.c_lflag &= ~ICANON; + state.c_cc[VMIN] = 1; + ret = tcsetattr(STDIN_FILENO, TCSANOW, &state); + if (ret) + fprintf(stderr, "Cannot set tty state\n"); + } + + if (argc >= 2) { + if (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) { + fprintf(stderr, "Usage: %s [%s]\n", argv[0], path); + return EXIT_SUCCESS; + } else { + path = argv[1]; + } + } + + fprintf(stderr, "Open uhid-cdev %s\n", path); + fd = open(path, O_RDWR | O_CLOEXEC); + if (fd < 0) { + fprintf(stderr, "Cannot open uhid-cdev %s: %m\n", path); + return EXIT_FAILURE; + } + + fprintf(stderr, "Create uhid device\n"); + ret = create(fd); + if (ret) { + close(fd); + return EXIT_FAILURE; + } + + pfds[0].fd = STDIN_FILENO; + pfds[0].events = POLLIN; + pfds[1].fd = fd; + pfds[1].events = POLLIN; + + fprintf(stderr, "Press 'q' to quit...\n"); + while (1) { + ret = poll(pfds, 2, -1); + if (ret < 0) { + fprintf(stderr, "Cannot poll for fds: %m\n"); + break; + } + if (pfds[0].revents & POLLHUP) { + fprintf(stderr, "Received HUP on stdin\n"); + break; + } + if (pfds[1].revents & POLLHUP) { + fprintf(stderr, "Received HUP on uhid-cdev\n"); + break; + } + + if (pfds[0].revents & POLLIN) { + ret = keyboard(fd); + if (ret) + break; + } + if (pfds[1].revents & POLLIN) { + ret = event(fd); + if (ret) + break; + } + } + + fprintf(stderr, "Destroy uhid device\n"); + destroy(fd); + return EXIT_SUCCESS; +} |