diff options
author | José Pekkarinen <jose.pekkarinen@nokia.com> | 2016-04-11 10:41:07 +0300 |
---|---|---|
committer | José Pekkarinen <jose.pekkarinen@nokia.com> | 2016-04-13 08:17:18 +0300 |
commit | e09b41010ba33a20a87472ee821fa407a5b8da36 (patch) | |
tree | d10dc367189862e7ca5c592f033dc3726e1df4e3 /kernel/kernel/trace | |
parent | f93b97fd65072de626c074dbe099a1fff05ce060 (diff) |
These changes are the raw update to linux-4.4.6-rt14. Kernel sources
are taken from kernel.org, and rt patch from the rt wiki download page.
During the rebasing, the following patch collided:
Force tick interrupt and get rid of softirq magic(I70131fb85).
Collisions have been removed because its logic was found on the
source already.
Change-Id: I7f57a4081d9deaa0d9ccfc41a6c8daccdee3b769
Signed-off-by: José Pekkarinen <jose.pekkarinen@nokia.com>
Diffstat (limited to 'kernel/kernel/trace')
31 files changed, 2606 insertions, 1711 deletions
diff --git a/kernel/kernel/trace/Kconfig b/kernel/kernel/trace/Kconfig index ab3a277a3..364ccd0eb 100644 --- a/kernel/kernel/trace/Kconfig +++ b/kernel/kernel/trace/Kconfig @@ -538,7 +538,7 @@ config UPROBE_EVENT config BPF_EVENTS depends on BPF_SYSCALL - depends on KPROBE_EVENT + depends on (KPROBE_EVENT || UPROBE_EVENT) && PERF_EVENTS bool default y help @@ -739,6 +739,13 @@ config TRACE_ENUM_MAP_FILE If unsure, say N +config TRACING_EVENTS_GPIO + bool "Trace gpio events" + depends on GPIOLIB + default y + help + Enable tracing events for gpio subsystem + endif # FTRACE endif # TRACING_SUPPORT diff --git a/kernel/kernel/trace/blktrace.c b/kernel/kernel/trace/blktrace.c index 483cecfa5..a990824c8 100644 --- a/kernel/kernel/trace/blktrace.c +++ b/kernel/kernel/trace/blktrace.c @@ -103,7 +103,7 @@ record_it: memcpy((void *) t + sizeof(*t), data, len); if (blk_tracer) - trace_buffer_unlock_commit(buffer, event, 0, pc); + trace_buffer_unlock_commit(blk_tr, buffer, event, 0, pc); } } @@ -278,7 +278,7 @@ record_it: memcpy((void *) t + sizeof(*t), pdu_data, pdu_len); if (blk_tracer) { - trace_buffer_unlock_commit(buffer, event, 0, pc); + trace_buffer_unlock_commit(blk_tr, buffer, event, 0, pc); return; } } @@ -437,9 +437,9 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, struct block_device *bdev, struct blk_user_trace_setup *buts) { - struct blk_trace *old_bt, *bt = NULL; + struct blk_trace *bt = NULL; struct dentry *dir = NULL; - int ret, i; + int ret; if (!buts->buf_size || !buts->buf_nr) return -EINVAL; @@ -451,9 +451,7 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, * some device names have larger paths - convert the slashes * to underscores for this to work as expected */ - for (i = 0; i < strlen(buts->name); i++) - if (buts->name[i] == '/') - buts->name[i] = '_'; + strreplace(buts->name, '/', '_'); bt = kzalloc(sizeof(*bt), GFP_KERNEL); if (!bt) @@ -521,11 +519,8 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, bt->trace_state = Blktrace_setup; ret = -EBUSY; - old_bt = xchg(&q->blk_trace, bt); - if (old_bt) { - (void) xchg(&q->blk_trace, old_bt); + if (cmpxchg(&q->blk_trace, NULL, bt)) goto err; - } if (atomic_inc_return(&blk_probes_ref) == 1) blk_register_tracepoints(); @@ -780,9 +775,6 @@ static void blk_add_trace_bio(struct request_queue *q, struct bio *bio, if (likely(!bt)) return; - if (!error && !bio_flagged(bio, BIO_UPTODATE)) - error = EIO; - __blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size, bio->bi_rw, what, error, 0, NULL); } @@ -889,8 +881,7 @@ static void blk_add_trace_split(void *ignore, __blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size, bio->bi_rw, BLK_TA_SPLIT, - !bio_flagged(bio, BIO_UPTODATE), - sizeof(rpdu), &rpdu); + bio->bi_error, sizeof(rpdu), &rpdu); } } @@ -922,8 +913,8 @@ static void blk_add_trace_bio_remap(void *ignore, r.sector_from = cpu_to_be64(from); __blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size, - bio->bi_rw, BLK_TA_REMAP, - !bio_flagged(bio, BIO_UPTODATE), sizeof(r), &r); + bio->bi_rw, BLK_TA_REMAP, bio->bi_error, + sizeof(r), &r); } /** @@ -1349,6 +1340,7 @@ static const struct { static enum print_line_t print_one_line(struct trace_iterator *iter, bool classic) { + struct trace_array *tr = iter->tr; struct trace_seq *s = &iter->seq; const struct blk_io_trace *t; u16 what; @@ -1357,7 +1349,7 @@ static enum print_line_t print_one_line(struct trace_iterator *iter, t = te_blk_io_trace(iter->ent); what = t->action & ((1 << BLK_TC_SHIFT) - 1); - long_act = !!(trace_flags & TRACE_ITER_VERBOSE); + long_act = !!(tr->trace_flags & TRACE_ITER_VERBOSE); log_action = classic ? &blk_log_action_classic : &blk_log_action; if (t->action == BLK_TN_MESSAGE) { @@ -1419,9 +1411,9 @@ blk_tracer_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set) /* don't output context-info for blk_classic output */ if (bit == TRACE_BLK_OPT_CLASSIC) { if (set) - trace_flags &= ~TRACE_ITER_CONTEXT_INFO; + tr->trace_flags &= ~TRACE_ITER_CONTEXT_INFO; else - trace_flags |= TRACE_ITER_CONTEXT_INFO; + tr->trace_flags |= TRACE_ITER_CONTEXT_INFO; } return 0; } @@ -1450,14 +1442,14 @@ static struct trace_event trace_blk_event = { static int __init init_blk_tracer(void) { - if (!register_ftrace_event(&trace_blk_event)) { + if (!register_trace_event(&trace_blk_event)) { pr_warning("Warning: could not register block events\n"); return 1; } if (register_tracer(&blk_tracer) != 0) { pr_warning("Warning: could not register the block tracer\n"); - unregister_ftrace_event(&trace_blk_event); + unregister_trace_event(&trace_blk_event); return 1; } @@ -1487,7 +1479,7 @@ static int blk_trace_remove_queue(struct request_queue *q) static int blk_trace_setup_queue(struct request_queue *q, struct block_device *bdev) { - struct blk_trace *old_bt, *bt = NULL; + struct blk_trace *bt = NULL; int ret = -ENOMEM; bt = kzalloc(sizeof(*bt), GFP_KERNEL); @@ -1503,12 +1495,9 @@ static int blk_trace_setup_queue(struct request_queue *q, blk_trace_setup_lba(bt, bdev); - old_bt = xchg(&q->blk_trace, bt); - if (old_bt != NULL) { - (void)xchg(&q->blk_trace, old_bt); - ret = -EBUSY; + ret = -EBUSY; + if (cmpxchg(&q->blk_trace, NULL, bt)) goto free_bt; - } if (atomic_inc_return(&blk_probes_ref) == 1) blk_register_tracepoints(); diff --git a/kernel/kernel/trace/bpf_trace.c b/kernel/kernel/trace/bpf_trace.c index 2d56ce501..4228fd368 100644 --- a/kernel/kernel/trace/bpf_trace.c +++ b/kernel/kernel/trace/bpf_trace.c @@ -79,27 +79,18 @@ static const struct bpf_func_proto bpf_probe_read_proto = { .arg3_type = ARG_ANYTHING, }; -static u64 bpf_ktime_get_ns(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) -{ - /* NMI safe access to clock monotonic */ - return ktime_get_mono_fast_ns(); -} - -static const struct bpf_func_proto bpf_ktime_get_ns_proto = { - .func = bpf_ktime_get_ns, - .gpl_only = true, - .ret_type = RET_INTEGER, -}; - /* * limited trace_printk() - * only %d %u %x %ld %lu %lx %lld %llu %llx %p conversion specifiers allowed + * only %d %u %x %ld %lu %lx %lld %llu %llx %p %s conversion specifiers allowed */ static u64 bpf_trace_printk(u64 r1, u64 fmt_size, u64 r3, u64 r4, u64 r5) { char *fmt = (char *) (long) r1; + bool str_seen = false; int mod[3] = {}; int fmt_cnt = 0; + u64 unsafe_addr; + char buf[64]; int i; /* @@ -126,12 +117,37 @@ static u64 bpf_trace_printk(u64 r1, u64 fmt_size, u64 r3, u64 r4, u64 r5) if (fmt[i] == 'l') { mod[fmt_cnt]++; i++; - } else if (fmt[i] == 'p') { + } else if (fmt[i] == 'p' || fmt[i] == 's') { mod[fmt_cnt]++; i++; if (!isspace(fmt[i]) && !ispunct(fmt[i]) && fmt[i] != 0) return -EINVAL; fmt_cnt++; + if (fmt[i - 1] == 's') { + if (str_seen) + /* allow only one '%s' per fmt string */ + return -EINVAL; + str_seen = true; + + switch (fmt_cnt) { + case 1: + unsafe_addr = r3; + r3 = (long) buf; + break; + case 2: + unsafe_addr = r4; + r4 = (long) buf; + break; + case 3: + unsafe_addr = r5; + r5 = (long) buf; + break; + } + buf[0] = 0; + strncpy_from_unsafe(buf, + (void *) (long) unsafe_addr, + sizeof(buf)); + } continue; } @@ -159,6 +175,95 @@ static const struct bpf_func_proto bpf_trace_printk_proto = { .arg2_type = ARG_CONST_STACK_SIZE, }; +const struct bpf_func_proto *bpf_get_trace_printk_proto(void) +{ + /* + * this program might be calling bpf_trace_printk, + * so allocate per-cpu printk buffers + */ + trace_printk_init_buffers(); + + return &bpf_trace_printk_proto; +} + +static u64 bpf_perf_event_read(u64 r1, u64 index, u64 r3, u64 r4, u64 r5) +{ + struct bpf_map *map = (struct bpf_map *) (unsigned long) r1; + struct bpf_array *array = container_of(map, struct bpf_array, map); + struct perf_event *event; + + if (unlikely(index >= array->map.max_entries)) + return -E2BIG; + + event = (struct perf_event *)array->ptrs[index]; + if (!event) + return -ENOENT; + + /* make sure event is local and doesn't have pmu::count */ + if (event->oncpu != smp_processor_id() || + event->pmu->count) + return -EINVAL; + + /* + * we don't know if the function is run successfully by the + * return value. It can be judged in other places, such as + * eBPF programs. + */ + return perf_event_read_local(event); +} + +static const struct bpf_func_proto bpf_perf_event_read_proto = { + .func = bpf_perf_event_read, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_ANYTHING, +}; + +static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 index, u64 r4, u64 size) +{ + struct pt_regs *regs = (struct pt_regs *) (long) r1; + struct bpf_map *map = (struct bpf_map *) (long) r2; + struct bpf_array *array = container_of(map, struct bpf_array, map); + void *data = (void *) (long) r4; + struct perf_sample_data sample_data; + struct perf_event *event; + struct perf_raw_record raw = { + .size = size, + .data = data, + }; + + if (unlikely(index >= array->map.max_entries)) + return -E2BIG; + + event = (struct perf_event *)array->ptrs[index]; + if (unlikely(!event)) + return -ENOENT; + + if (unlikely(event->attr.type != PERF_TYPE_SOFTWARE || + event->attr.config != PERF_COUNT_SW_BPF_OUTPUT)) + return -EINVAL; + + if (unlikely(event->oncpu != smp_processor_id())) + return -EOPNOTSUPP; + + perf_sample_data_init(&sample_data, 0, 0); + sample_data.raw = &raw; + perf_event_output(event, &sample_data, regs); + return 0; +} + +static const struct bpf_func_proto bpf_perf_event_output_proto = { + .func = bpf_perf_event_output, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_CONST_MAP_PTR, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_PTR_TO_STACK, + .arg5_type = ARG_CONST_STACK_SIZE, +}; + static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func_id) { switch (func_id) { @@ -172,15 +277,22 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func return &bpf_probe_read_proto; case BPF_FUNC_ktime_get_ns: return &bpf_ktime_get_ns_proto; - + case BPF_FUNC_tail_call: + return &bpf_tail_call_proto; + case BPF_FUNC_get_current_pid_tgid: + return &bpf_get_current_pid_tgid_proto; + case BPF_FUNC_get_current_uid_gid: + return &bpf_get_current_uid_gid_proto; + case BPF_FUNC_get_current_comm: + return &bpf_get_current_comm_proto; case BPF_FUNC_trace_printk: - /* - * this program might be calling bpf_trace_printk, - * so allocate per-cpu printk buffers - */ - trace_printk_init_buffers(); - - return &bpf_trace_printk_proto; + return bpf_get_trace_printk_proto(); + case BPF_FUNC_get_smp_processor_id: + return &bpf_get_smp_processor_id_proto; + case BPF_FUNC_perf_event_read: + return &bpf_perf_event_read_proto; + case BPF_FUNC_perf_event_output: + return &bpf_perf_event_output_proto; default: return NULL; } diff --git a/kernel/kernel/trace/ftrace.c b/kernel/kernel/trace/ftrace.c index eb11011b5..3f743b147 100644 --- a/kernel/kernel/trace/ftrace.c +++ b/kernel/kernel/trace/ftrace.c @@ -243,6 +243,11 @@ static void ftrace_sync_ipi(void *data) #ifdef CONFIG_FUNCTION_GRAPH_TRACER static void update_function_graph_func(void); + +/* Both enabled by default (can be cleared by function_graph tracer flags */ +static bool fgraph_sleep_time = true; +static bool fgraph_graph_time = true; + #else static inline void update_function_graph_func(void) { } #endif @@ -630,13 +635,18 @@ static int function_stat_show(struct seq_file *m, void *v) goto out; } +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + avg = rec->time; + do_div(avg, rec->counter); + if (tracing_thresh && (avg < tracing_thresh)) + goto out; +#endif + kallsyms_lookup(rec->ip, NULL, NULL, NULL, str); seq_printf(m, " %-30.30s %10lu", str, rec->counter); #ifdef CONFIG_FUNCTION_GRAPH_TRACER seq_puts(m, " "); - avg = rec->time; - do_div(avg, rec->counter); /* Sample standard deviation (s^2) */ if (rec->counter <= 1) @@ -912,7 +922,7 @@ static void profile_graph_return(struct ftrace_graph_ret *trace) calltime = trace->rettime - trace->calltime; - if (!(trace_flags & TRACE_ITER_GRAPH_TIME)) { + if (!fgraph_graph_time) { int index; index = trace->depth; @@ -3415,27 +3425,35 @@ ftrace_notrace_open(struct inode *inode, struct file *file) inode, file); } -static int ftrace_match(char *str, char *regex, int len, int type) +/* Type for quick search ftrace basic regexes (globs) from filter_parse_regex */ +struct ftrace_glob { + char *search; + unsigned len; + int type; +}; + +static int ftrace_match(char *str, struct ftrace_glob *g) { int matched = 0; int slen; - switch (type) { + switch (g->type) { case MATCH_FULL: - if (strcmp(str, regex) == 0) + if (strcmp(str, g->search) == 0) matched = 1; break; case MATCH_FRONT_ONLY: - if (strncmp(str, regex, len) == 0) + if (strncmp(str, g->search, g->len) == 0) matched = 1; break; case MATCH_MIDDLE_ONLY: - if (strstr(str, regex)) + if (strstr(str, g->search)) matched = 1; break; case MATCH_END_ONLY: slen = strlen(str); - if (slen >= len && memcmp(str + slen - len, regex, len) == 0) + if (slen >= g->len && + memcmp(str + slen - g->len, g->search, g->len) == 0) matched = 1; break; } @@ -3444,13 +3462,13 @@ static int ftrace_match(char *str, char *regex, int len, int type) } static int -enter_record(struct ftrace_hash *hash, struct dyn_ftrace *rec, int not) +enter_record(struct ftrace_hash *hash, struct dyn_ftrace *rec, int clear_filter) { struct ftrace_func_entry *entry; int ret = 0; entry = ftrace_lookup_ip(hash, rec->ip); - if (not) { + if (clear_filter) { /* Do nothing if it doesn't exist */ if (!entry) return 0; @@ -3467,42 +3485,68 @@ enter_record(struct ftrace_hash *hash, struct dyn_ftrace *rec, int not) } static int -ftrace_match_record(struct dyn_ftrace *rec, char *mod, - char *regex, int len, int type) +ftrace_match_record(struct dyn_ftrace *rec, struct ftrace_glob *func_g, + struct ftrace_glob *mod_g, int exclude_mod) { char str[KSYM_SYMBOL_LEN]; char *modname; kallsyms_lookup(rec->ip, NULL, NULL, &modname, str); - if (mod) { - /* module lookup requires matching the module */ - if (!modname || strcmp(modname, mod)) + if (mod_g) { + int mod_matches = (modname) ? ftrace_match(modname, mod_g) : 0; + + /* blank module name to match all modules */ + if (!mod_g->len) { + /* blank module globbing: modname xor exclude_mod */ + if ((!exclude_mod) != (!modname)) + goto func_match; + return 0; + } + + /* not matching the module */ + if (!modname || !mod_matches) { + if (exclude_mod) + goto func_match; + else + return 0; + } + + if (mod_matches && exclude_mod) return 0; +func_match: /* blank search means to match all funcs in the mod */ - if (!len) + if (!func_g->len) return 1; } - return ftrace_match(str, regex, len, type); + return ftrace_match(str, func_g); } static int -match_records(struct ftrace_hash *hash, char *buff, - int len, char *mod, int not) +match_records(struct ftrace_hash *hash, char *func, int len, char *mod) { - unsigned search_len = 0; struct ftrace_page *pg; struct dyn_ftrace *rec; - int type = MATCH_FULL; - char *search = buff; + struct ftrace_glob func_g = { .type = MATCH_FULL }; + struct ftrace_glob mod_g = { .type = MATCH_FULL }; + struct ftrace_glob *mod_match = (mod) ? &mod_g : NULL; + int exclude_mod = 0; int found = 0; int ret; + int clear_filter; - if (len) { - type = filter_parse_regex(buff, len, &search, ¬); - search_len = strlen(search); + if (func) { + func_g.type = filter_parse_regex(func, len, &func_g.search, + &clear_filter); + func_g.len = strlen(func_g.search); + } + + if (mod) { + mod_g.type = filter_parse_regex(mod, strlen(mod), + &mod_g.search, &exclude_mod); + mod_g.len = strlen(mod_g.search); } mutex_lock(&ftrace_lock); @@ -3511,8 +3555,8 @@ match_records(struct ftrace_hash *hash, char *buff, goto out_unlock; do_for_each_ftrace_rec(pg, rec) { - if (ftrace_match_record(rec, mod, search, search_len, type)) { - ret = enter_record(hash, rec, not); + if (ftrace_match_record(rec, &func_g, mod_match, exclude_mod)) { + ret = enter_record(hash, rec, clear_filter); if (ret < 0) { found = ret; goto out_unlock; @@ -3529,26 +3573,9 @@ match_records(struct ftrace_hash *hash, char *buff, static int ftrace_match_records(struct ftrace_hash *hash, char *buff, int len) { - return match_records(hash, buff, len, NULL, 0); + return match_records(hash, buff, len, NULL); } -static int -ftrace_match_module_records(struct ftrace_hash *hash, char *buff, char *mod) -{ - int not = 0; - - /* blank or '*' mean the same */ - if (strcmp(buff, "*") == 0) - buff[0] = 0; - - /* handle the case of 'dont filter this module' */ - if (strcmp(buff, "!") == 0 || strcmp(buff, "!*") == 0) { - buff[0] = 0; - not = 1; - } - - return match_records(hash, buff, strlen(buff), mod, not); -} /* * We register the module command as a template to show others how @@ -3557,10 +3584,9 @@ ftrace_match_module_records(struct ftrace_hash *hash, char *buff, char *mod) static int ftrace_mod_callback(struct ftrace_hash *hash, - char *func, char *cmd, char *param, int enable) + char *func, char *cmd, char *module, int enable) { - char *mod; - int ret = -EINVAL; + int ret; /* * cmd == 'mod' because we only registered this func @@ -3569,21 +3595,11 @@ ftrace_mod_callback(struct ftrace_hash *hash, * you can tell which command was used by the cmd * parameter. */ - - /* we must have a module name */ - if (!param) - return ret; - - mod = strsep(¶m, ":"); - if (!strlen(mod)) - return ret; - - ret = ftrace_match_module_records(hash, func, mod); + ret = match_records(hash, func, strlen(func), module); if (!ret) - ret = -EINVAL; + return -EINVAL; if (ret < 0) return ret; - return 0; } @@ -3694,19 +3710,20 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, { struct ftrace_ops_hash old_hash_ops; struct ftrace_func_probe *entry; + struct ftrace_glob func_g; struct ftrace_hash **orig_hash = &trace_probe_ops.func_hash->filter_hash; struct ftrace_hash *old_hash = *orig_hash; struct ftrace_hash *hash; struct ftrace_page *pg; struct dyn_ftrace *rec; - int type, len, not; + int not; unsigned long key; int count = 0; - char *search; int ret; - type = filter_parse_regex(glob, strlen(glob), &search, ¬); - len = strlen(search); + func_g.type = filter_parse_regex(glob, strlen(glob), + &func_g.search, ¬); + func_g.len = strlen(func_g.search); /* we do not support '!' for function probes */ if (WARN_ON(not)) @@ -3733,7 +3750,7 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, do_for_each_ftrace_rec(pg, rec) { - if (!ftrace_match_record(rec, NULL, search, len, type)) + if (!ftrace_match_record(rec, &func_g, NULL, 0)) continue; entry = kmalloc(sizeof(*entry), GFP_KERNEL); @@ -3806,24 +3823,24 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, struct ftrace_func_entry *rec_entry; struct ftrace_func_probe *entry; struct ftrace_func_probe *p; + struct ftrace_glob func_g; struct ftrace_hash **orig_hash = &trace_probe_ops.func_hash->filter_hash; struct ftrace_hash *old_hash = *orig_hash; struct list_head free_list; struct ftrace_hash *hash; struct hlist_node *tmp; char str[KSYM_SYMBOL_LEN]; - int type = MATCH_FULL; - int i, len = 0; - char *search; - int ret; + int i, ret; if (glob && (strcmp(glob, "*") == 0 || !strlen(glob))) - glob = NULL; + func_g.search = NULL; else if (glob) { int not; - type = filter_parse_regex(glob, strlen(glob), &search, ¬); - len = strlen(search); + func_g.type = filter_parse_regex(glob, strlen(glob), + &func_g.search, ¬); + func_g.len = strlen(func_g.search); + func_g.search = glob; /* we do not support '!' for function probes */ if (WARN_ON(not)) @@ -3852,10 +3869,10 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, continue; /* do this last, since it is the most expensive */ - if (glob) { + if (func_g.search) { kallsyms_lookup(entry->ip, NULL, NULL, NULL, str); - if (!ftrace_match(str, glob, len, type)) + if (!ftrace_match(str, &func_g)) continue; } @@ -3884,7 +3901,7 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, ftrace_free_entry(entry); } mutex_unlock(&ftrace_lock); - + out_unlock: mutex_unlock(&trace_probe_ops.func_hash->regex_lock); free_ftrace_hash(hash); @@ -4600,21 +4617,21 @@ ftrace_graph_release(struct inode *inode, struct file *file) static int ftrace_set_func(unsigned long *array, int *idx, int size, char *buffer) { + struct ftrace_glob func_g; struct dyn_ftrace *rec; struct ftrace_page *pg; - int search_len; int fail = 1; - int type, not; - char *search; + int not; bool exists; int i; /* decode regex */ - type = filter_parse_regex(buffer, strlen(buffer), &search, ¬); + func_g.type = filter_parse_regex(buffer, strlen(buffer), + &func_g.search, ¬); if (!not && *idx >= size) return -EBUSY; - search_len = strlen(search); + func_g.len = strlen(func_g.search); mutex_lock(&ftrace_lock); @@ -4625,7 +4642,7 @@ ftrace_set_func(unsigned long *array, int *idx, int size, char *buffer) do_for_each_ftrace_rec(pg, rec) { - if (ftrace_match_record(rec, NULL, search, search_len, type)) { + if (ftrace_match_record(rec, &func_g, NULL, 0)) { /* if it is in the array */ exists = false; for (i = 0; i < *idx; i++) { @@ -4778,17 +4795,6 @@ static int ftrace_cmp_ips(const void *a, const void *b) return 0; } -static void ftrace_swap_ips(void *a, void *b, int size) -{ - unsigned long *ipa = a; - unsigned long *ipb = b; - unsigned long t; - - t = *ipa; - *ipa = *ipb; - *ipb = t; -} - static int ftrace_process_locs(struct module *mod, unsigned long *start, unsigned long *end) @@ -4808,7 +4814,7 @@ static int ftrace_process_locs(struct module *mod, return 0; sort(start, count, sizeof(*start), - ftrace_cmp_ips, ftrace_swap_ips); + ftrace_cmp_ips, NULL); start_pg = ftrace_allocate_pages(count); if (!start_pg) @@ -5634,6 +5640,16 @@ static struct ftrace_ops graph_ops = { ASSIGN_OPS_HASH(graph_ops, &global_ops.local_hash) }; +void ftrace_graph_sleep_time_control(bool enable) +{ + fgraph_sleep_time = enable; +} + +void ftrace_graph_graph_time_control(bool enable) +{ + fgraph_graph_time = enable; +} + int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace) { return 0; @@ -5692,7 +5708,7 @@ free: } static void -ftrace_graph_probe_sched_switch(void *ignore, +ftrace_graph_probe_sched_switch(void *ignore, bool preempt, struct task_struct *prev, struct task_struct *next) { unsigned long long timestamp; @@ -5702,7 +5718,7 @@ ftrace_graph_probe_sched_switch(void *ignore, * Does the user want to count the time a function was asleep. * If so, do not update the time stamps. */ - if (trace_flags & TRACE_ITER_SLEEP_TIME) + if (fgraph_sleep_time) return; timestamp = trace_clock_local(); diff --git a/kernel/kernel/trace/latency_hist.c b/kernel/kernel/trace/latency_hist.c index 66a69eb53..7f6ee70de 100644 --- a/kernel/kernel/trace/latency_hist.c +++ b/kernel/kernel/trace/latency_hist.c @@ -115,9 +115,9 @@ static DEFINE_PER_CPU(struct hist_data, wakeup_latency_hist_sharedprio); static char *wakeup_latency_hist_dir = "wakeup"; static char *wakeup_latency_hist_dir_sharedprio = "sharedprio"; static notrace void probe_wakeup_latency_hist_start(void *v, - struct task_struct *p, int success); + struct task_struct *p); static notrace void probe_wakeup_latency_hist_stop(void *v, - struct task_struct *prev, struct task_struct *next); + bool preempt, struct task_struct *prev, struct task_struct *next); static notrace void probe_sched_migrate_task(void *, struct task_struct *task, int cpu); static struct enable_data wakeup_latency_enabled_data = { @@ -869,7 +869,7 @@ static notrace void probe_sched_migrate_task(void *v, struct task_struct *task, } static notrace void probe_wakeup_latency_hist_start(void *v, - struct task_struct *p, int success) + struct task_struct *p) { unsigned long flags; struct task_struct *curr = current; @@ -907,7 +907,7 @@ out: } static notrace void probe_wakeup_latency_hist_stop(void *v, - struct task_struct *prev, struct task_struct *next) + bool preempt, struct task_struct *prev, struct task_struct *next) { unsigned long flags; int cpu = task_cpu(next); diff --git a/kernel/kernel/trace/ring_buffer.c b/kernel/kernel/trace/ring_buffer.c index 0315d4317..9c6045a27 100644 --- a/kernel/kernel/trace/ring_buffer.c +++ b/kernel/kernel/trace/ring_buffer.c @@ -3,7 +3,7 @@ * * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com> */ -#include <linux/ftrace_event.h> +#include <linux/trace_events.h> #include <linux/ring_buffer.h> #include <linux/trace_clock.h> #include <linux/trace_seq.h> @@ -115,63 +115,11 @@ int ring_buffer_print_entry_header(struct trace_seq *s) * */ -/* - * A fast way to enable or disable all ring buffers is to - * call tracing_on or tracing_off. Turning off the ring buffers - * prevents all ring buffers from being recorded to. - * Turning this switch on, makes it OK to write to the - * ring buffer, if the ring buffer is enabled itself. - * - * There's three layers that must be on in order to write - * to the ring buffer. - * - * 1) This global flag must be set. - * 2) The ring buffer must be enabled for recording. - * 3) The per cpu buffer must be enabled for recording. - * - * In case of an anomaly, this global flag has a bit set that - * will permantly disable all ring buffers. - */ - -/* - * Global flag to disable all recording to ring buffers - * This has two bits: ON, DISABLED - * - * ON DISABLED - * ---- ---------- - * 0 0 : ring buffers are off - * 1 0 : ring buffers are on - * X 1 : ring buffers are permanently disabled - */ - -enum { - RB_BUFFERS_ON_BIT = 0, - RB_BUFFERS_DISABLED_BIT = 1, -}; - -enum { - RB_BUFFERS_ON = 1 << RB_BUFFERS_ON_BIT, - RB_BUFFERS_DISABLED = 1 << RB_BUFFERS_DISABLED_BIT, -}; - -static unsigned long ring_buffer_flags __read_mostly = RB_BUFFERS_ON; - /* Used for individual buffers (after the counter) */ #define RB_BUFFER_OFF (1 << 20) #define BUF_PAGE_HDR_SIZE offsetof(struct buffer_data_page, data) -/** - * tracing_off_permanent - permanently disable ring buffers - * - * This function, once called, will disable all ring buffers - * permanently. - */ -void tracing_off_permanent(void) -{ - set_bit(RB_BUFFERS_DISABLED_BIT, &ring_buffer_flags); -} - #define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array)) #define RB_ALIGNMENT 4U #define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX) @@ -452,6 +400,34 @@ struct rb_irq_work { }; /* + * Structure to hold event state and handle nested events. + */ +struct rb_event_info { + u64 ts; + u64 delta; + unsigned long length; + struct buffer_page *tail_page; + int add_timestamp; +}; + +/* + * Used for which event context the event is in. + * NMI = 0 + * IRQ = 1 + * SOFTIRQ = 2 + * NORMAL = 3 + * + * See trace_recursive_lock() comment below for more details. + */ +enum { + RB_CTX_NMI, + RB_CTX_IRQ, + RB_CTX_SOFTIRQ, + RB_CTX_NORMAL, + RB_CTX_MAX +}; + +/* * head_page == tail_page && head == tail then buffer is empty. */ struct ring_buffer_per_cpu { @@ -462,6 +438,7 @@ struct ring_buffer_per_cpu { arch_spinlock_t lock; struct lock_class_key lock_key; unsigned int nr_pages; + unsigned int current_context; struct list_head *pages; struct buffer_page *head_page; /* read from head */ struct buffer_page *tail_page; /* write to tail */ @@ -852,7 +829,7 @@ rb_is_head_page(struct ring_buffer_per_cpu *cpu_buffer, * writer is ever on it, the previous pointer never points * back to the reader page. */ -static int rb_is_reader_page(struct buffer_page *page) +static bool rb_is_reader_page(struct buffer_page *page) { struct list_head *list = page->list.prev; @@ -1910,79 +1887,6 @@ rb_event_index(struct ring_buffer_event *event) return (addr & ~PAGE_MASK) - BUF_PAGE_HDR_SIZE; } -static inline int -rb_event_is_commit(struct ring_buffer_per_cpu *cpu_buffer, - struct ring_buffer_event *event) -{ - unsigned long addr = (unsigned long)event; - unsigned long index; - - index = rb_event_index(event); - addr &= PAGE_MASK; - - return cpu_buffer->commit_page->page == (void *)addr && - rb_commit_index(cpu_buffer) == index; -} - -static void -rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) -{ - unsigned long max_count; - - /* - * We only race with interrupts and NMIs on this CPU. - * If we own the commit event, then we can commit - * all others that interrupted us, since the interruptions - * are in stack format (they finish before they come - * back to us). This allows us to do a simple loop to - * assign the commit to the tail. - */ - again: - max_count = cpu_buffer->nr_pages * 100; - - while (cpu_buffer->commit_page != cpu_buffer->tail_page) { - if (RB_WARN_ON(cpu_buffer, !(--max_count))) - return; - if (RB_WARN_ON(cpu_buffer, - rb_is_reader_page(cpu_buffer->tail_page))) - return; - local_set(&cpu_buffer->commit_page->page->commit, - rb_page_write(cpu_buffer->commit_page)); - rb_inc_page(cpu_buffer, &cpu_buffer->commit_page); - cpu_buffer->write_stamp = - cpu_buffer->commit_page->page->time_stamp; - /* add barrier to keep gcc from optimizing too much */ - barrier(); - } - while (rb_commit_index(cpu_buffer) != - rb_page_write(cpu_buffer->commit_page)) { - - local_set(&cpu_buffer->commit_page->page->commit, - rb_page_write(cpu_buffer->commit_page)); - RB_WARN_ON(cpu_buffer, - local_read(&cpu_buffer->commit_page->page->commit) & - ~RB_WRITE_MASK); - barrier(); - } - - /* again, keep gcc from optimizing */ - barrier(); - - /* - * If an interrupt came in just after the first while loop - * and pushed the tail page forward, we will be left with - * a dangling commit that will never go forward. - */ - if (unlikely(cpu_buffer->commit_page != cpu_buffer->tail_page)) - goto again; -} - -static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer) -{ - cpu_buffer->read_stamp = cpu_buffer->reader_page->page->time_stamp; - cpu_buffer->reader_page->read = 0; -} - static void rb_inc_iter(struct ring_buffer_iter *iter) { struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; @@ -2002,64 +1906,6 @@ static void rb_inc_iter(struct ring_buffer_iter *iter) iter->head = 0; } -/* Slow path, do not inline */ -static noinline struct ring_buffer_event * -rb_add_time_stamp(struct ring_buffer_event *event, u64 delta) -{ - event->type_len = RINGBUF_TYPE_TIME_EXTEND; - - /* Not the first event on the page? */ - if (rb_event_index(event)) { - event->time_delta = delta & TS_MASK; - event->array[0] = delta >> TS_SHIFT; - } else { - /* nope, just zero it */ - event->time_delta = 0; - event->array[0] = 0; - } - - return skip_time_extend(event); -} - -/** - * rb_update_event - update event type and data - * @event: the event to update - * @type: the type of event - * @length: the size of the event field in the ring buffer - * - * Update the type and data fields of the event. The length - * is the actual size that is written to the ring buffer, - * and with this, we can determine what to place into the - * data field. - */ -static void -rb_update_event(struct ring_buffer_per_cpu *cpu_buffer, - struct ring_buffer_event *event, unsigned length, - int add_timestamp, u64 delta) -{ - /* Only a commit updates the timestamp */ - if (unlikely(!rb_event_is_commit(cpu_buffer, event))) - delta = 0; - - /* - * If we need to add a timestamp, then we - * add it to the start of the resevered space. - */ - if (unlikely(add_timestamp)) { - event = rb_add_time_stamp(event, delta); - length -= RB_LEN_TIME_EXTEND; - delta = 0; - } - - event->time_delta = delta; - length -= RB_EVNT_HDR_SIZE; - if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) { - event->type_len = 0; - event->array[0] = length; - } else - event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT); -} - /* * rb_handle_head_page - writer hit the head page * @@ -2218,29 +2064,13 @@ rb_handle_head_page(struct ring_buffer_per_cpu *cpu_buffer, return 0; } -static unsigned rb_calculate_event_length(unsigned length) -{ - struct ring_buffer_event event; /* Used only for sizeof array */ - - /* zero length can cause confusions */ - if (!length) - length = 1; - - if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) - length += sizeof(event.array[0]); - - length += RB_EVNT_HDR_SIZE; - length = ALIGN(length, RB_ARCH_ALIGNMENT); - - return length; -} - static inline void rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, - struct buffer_page *tail_page, - unsigned long tail, unsigned long length) + unsigned long tail, struct rb_event_info *info) { + struct buffer_page *tail_page = info->tail_page; struct ring_buffer_event *event; + unsigned long length = info->length; /* * Only the event that crossed the page boundary @@ -2310,13 +2140,14 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, */ static noinline struct ring_buffer_event * rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, - unsigned long length, unsigned long tail, - struct buffer_page *tail_page, u64 ts) + unsigned long tail, struct rb_event_info *info) { + struct buffer_page *tail_page = info->tail_page; struct buffer_page *commit_page = cpu_buffer->commit_page; struct ring_buffer *buffer = cpu_buffer->buffer; struct buffer_page *next_page; int ret; + u64 ts; next_page = tail_page; @@ -2402,74 +2233,120 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, out_again: - rb_reset_tail(cpu_buffer, tail_page, tail, length); + rb_reset_tail(cpu_buffer, tail, info); /* fail and let the caller try again */ return ERR_PTR(-EAGAIN); out_reset: /* reset write */ - rb_reset_tail(cpu_buffer, tail_page, tail, length); + rb_reset_tail(cpu_buffer, tail, info); return NULL; } -static struct ring_buffer_event * -__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, - unsigned long length, u64 ts, - u64 delta, int add_timestamp) +/* Slow path, do not inline */ +static noinline struct ring_buffer_event * +rb_add_time_stamp(struct ring_buffer_event *event, u64 delta) { - struct buffer_page *tail_page; - struct ring_buffer_event *event; - unsigned long tail, write; + event->type_len = RINGBUF_TYPE_TIME_EXTEND; - /* - * If the time delta since the last event is too big to - * hold in the time field of the event, then we append a - * TIME EXTEND event ahead of the data event. - */ - if (unlikely(add_timestamp)) - length += RB_LEN_TIME_EXTEND; + /* Not the first event on the page? */ + if (rb_event_index(event)) { + event->time_delta = delta & TS_MASK; + event->array[0] = delta >> TS_SHIFT; + } else { + /* nope, just zero it */ + event->time_delta = 0; + event->array[0] = 0; + } - tail_page = cpu_buffer->tail_page; - write = local_add_return(length, &tail_page->write); + return skip_time_extend(event); +} - /* set write to only the index of the write */ - write &= RB_WRITE_MASK; - tail = write - length; +static inline bool rb_event_is_commit(struct ring_buffer_per_cpu *cpu_buffer, + struct ring_buffer_event *event); + +/** + * rb_update_event - update event type and data + * @event: the event to update + * @type: the type of event + * @length: the size of the event field in the ring buffer + * + * Update the type and data fields of the event. The length + * is the actual size that is written to the ring buffer, + * and with this, we can determine what to place into the + * data field. + */ +static void +rb_update_event(struct ring_buffer_per_cpu *cpu_buffer, + struct ring_buffer_event *event, + struct rb_event_info *info) +{ + unsigned length = info->length; + u64 delta = info->delta; + + /* Only a commit updates the timestamp */ + if (unlikely(!rb_event_is_commit(cpu_buffer, event))) + delta = 0; /* - * If this is the first commit on the page, then it has the same - * timestamp as the page itself. + * If we need to add a timestamp, then we + * add it to the start of the resevered space. */ - if (!tail) + if (unlikely(info->add_timestamp)) { + event = rb_add_time_stamp(event, delta); + length -= RB_LEN_TIME_EXTEND; delta = 0; + } - /* See if we shot pass the end of this buffer page */ - if (unlikely(write > BUF_PAGE_SIZE)) - return rb_move_tail(cpu_buffer, length, tail, - tail_page, ts); + event->time_delta = delta; + length -= RB_EVNT_HDR_SIZE; + if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) { + event->type_len = 0; + event->array[0] = length; + } else + event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT); +} - /* We reserved something on the buffer */ +static unsigned rb_calculate_event_length(unsigned length) +{ + struct ring_buffer_event event; /* Used only for sizeof array */ - event = __rb_page_index(tail_page, tail); - kmemcheck_annotate_bitfield(event, bitfield); - rb_update_event(cpu_buffer, event, length, add_timestamp, delta); + /* zero length can cause confusions */ + if (!length) + length++; - local_inc(&tail_page->entries); + if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) + length += sizeof(event.array[0]); + + length += RB_EVNT_HDR_SIZE; + length = ALIGN(length, RB_ARCH_ALIGNMENT); /* - * If this is the first commit on the page, then update - * its timestamp. + * In case the time delta is larger than the 27 bits for it + * in the header, we need to add a timestamp. If another + * event comes in when trying to discard this one to increase + * the length, then the timestamp will be added in the allocated + * space of this event. If length is bigger than the size needed + * for the TIME_EXTEND, then padding has to be used. The events + * length must be either RB_LEN_TIME_EXTEND, or greater than or equal + * to RB_LEN_TIME_EXTEND + 8, as 8 is the minimum size for padding. + * As length is a multiple of 4, we only need to worry if it + * is 12 (RB_LEN_TIME_EXTEND + 4). */ - if (!tail) - tail_page->page->time_stamp = ts; + if (length == RB_LEN_TIME_EXTEND + RB_ALIGNMENT) + length += RB_ALIGNMENT; - /* account for these added bytes */ - local_add(length, &cpu_buffer->entries_bytes); + return length; +} - return event; +#ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK +static inline bool sched_clock_stable(void) +{ + return true; } +#endif static inline int rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer, @@ -2517,6 +2394,59 @@ static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer) local_inc(&cpu_buffer->commits); } +static void +rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) +{ + unsigned long max_count; + + /* + * We only race with interrupts and NMIs on this CPU. + * If we own the commit event, then we can commit + * all others that interrupted us, since the interruptions + * are in stack format (they finish before they come + * back to us). This allows us to do a simple loop to + * assign the commit to the tail. + */ + again: + max_count = cpu_buffer->nr_pages * 100; + + while (cpu_buffer->commit_page != cpu_buffer->tail_page) { + if (RB_WARN_ON(cpu_buffer, !(--max_count))) + return; + if (RB_WARN_ON(cpu_buffer, + rb_is_reader_page(cpu_buffer->tail_page))) + return; + local_set(&cpu_buffer->commit_page->page->commit, + rb_page_write(cpu_buffer->commit_page)); + rb_inc_page(cpu_buffer, &cpu_buffer->commit_page); + cpu_buffer->write_stamp = + cpu_buffer->commit_page->page->time_stamp; + /* add barrier to keep gcc from optimizing too much */ + barrier(); + } + while (rb_commit_index(cpu_buffer) != + rb_page_write(cpu_buffer->commit_page)) { + + local_set(&cpu_buffer->commit_page->page->commit, + rb_page_write(cpu_buffer->commit_page)); + RB_WARN_ON(cpu_buffer, + local_read(&cpu_buffer->commit_page->page->commit) & + ~RB_WRITE_MASK); + barrier(); + } + + /* again, keep gcc from optimizing */ + barrier(); + + /* + * If an interrupt came in just after the first while loop + * and pushed the tail page forward, we will be left with + * a dangling commit that will never go forward. + */ + if (unlikely(cpu_buffer->commit_page != cpu_buffer->tail_page)) + goto again; +} + static inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer) { unsigned long commits; @@ -2549,95 +2479,96 @@ static inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer) } } -static struct ring_buffer_event * -rb_reserve_next_event(struct ring_buffer *buffer, - struct ring_buffer_per_cpu *cpu_buffer, - unsigned long length) +static inline void rb_event_discard(struct ring_buffer_event *event) { - struct ring_buffer_event *event; - u64 ts, delta; - int nr_loops = 0; - int add_timestamp; - u64 diff; + if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) + event = skip_time_extend(event); - rb_start_commit(cpu_buffer); + /* array[0] holds the actual length for the discarded event */ + event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE; + event->type_len = RINGBUF_TYPE_PADDING; + /* time delta must be non zero */ + if (!event->time_delta) + event->time_delta = 1; +} -#ifdef CONFIG_RING_BUFFER_ALLOW_SWAP - /* - * Due to the ability to swap a cpu buffer from a buffer - * it is possible it was swapped before we committed. - * (committing stops a swap). We check for it here and - * if it happened, we have to fail the write. - */ - barrier(); - if (unlikely(ACCESS_ONCE(cpu_buffer->buffer) != buffer)) { - local_dec(&cpu_buffer->committing); - local_dec(&cpu_buffer->commits); - return NULL; - } -#endif +static inline bool +rb_event_is_commit(struct ring_buffer_per_cpu *cpu_buffer, + struct ring_buffer_event *event) +{ + unsigned long addr = (unsigned long)event; + unsigned long index; - length = rb_calculate_event_length(length); - again: - add_timestamp = 0; - delta = 0; + index = rb_event_index(event); + addr &= PAGE_MASK; + + return cpu_buffer->commit_page->page == (void *)addr && + rb_commit_index(cpu_buffer) == index; +} + +static void +rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer, + struct ring_buffer_event *event) +{ + u64 delta; /* - * We allow for interrupts to reenter here and do a trace. - * If one does, it will cause this original code to loop - * back here. Even with heavy interrupts happening, this - * should only happen a few times in a row. If this happens - * 1000 times in a row, there must be either an interrupt - * storm or we have something buggy. - * Bail! + * The event first in the commit queue updates the + * time stamp. */ - if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000)) - goto out_fail; + if (rb_event_is_commit(cpu_buffer, event)) { + /* + * A commit event that is first on a page + * updates the write timestamp with the page stamp + */ + if (!rb_event_index(event)) + cpu_buffer->write_stamp = + cpu_buffer->commit_page->page->time_stamp; + else if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) { + delta = event->array[0]; + delta <<= TS_SHIFT; + delta += event->time_delta; + cpu_buffer->write_stamp += delta; + } else + cpu_buffer->write_stamp += event->time_delta; + } +} - ts = rb_time_stamp(cpu_buffer->buffer); - diff = ts - cpu_buffer->write_stamp; +static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer, + struct ring_buffer_event *event) +{ + local_inc(&cpu_buffer->entries); + rb_update_write_stamp(cpu_buffer, event); + rb_end_commit(cpu_buffer); +} - /* make sure this diff is calculated here */ - barrier(); +static __always_inline void +rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer) +{ + bool pagebusy; - /* Did the write stamp get updated already? */ - if (likely(ts >= cpu_buffer->write_stamp)) { - delta = diff; - if (unlikely(test_time_stamp(delta))) { - int local_clock_stable = 1; -#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK - local_clock_stable = sched_clock_stable(); -#endif - WARN_ONCE(delta > (1ULL << 59), - KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n%s", - (unsigned long long)delta, - (unsigned long long)ts, - (unsigned long long)cpu_buffer->write_stamp, - local_clock_stable ? "" : - "If you just came from a suspend/resume,\n" - "please switch to the trace global clock:\n" - " echo global > /sys/kernel/debug/tracing/trace_clock\n"); - add_timestamp = 1; - } + if (buffer->irq_work.waiters_pending) { + buffer->irq_work.waiters_pending = false; + /* irq_work_queue() supplies it's own memory barriers */ + irq_work_queue(&buffer->irq_work.work); } - event = __rb_reserve_next(cpu_buffer, length, ts, - delta, add_timestamp); - if (unlikely(PTR_ERR(event) == -EAGAIN)) - goto again; - - if (!event) - goto out_fail; + if (cpu_buffer->irq_work.waiters_pending) { + cpu_buffer->irq_work.waiters_pending = false; + /* irq_work_queue() supplies it's own memory barriers */ + irq_work_queue(&cpu_buffer->irq_work.work); + } - return event; + pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page; - out_fail: - rb_end_commit(cpu_buffer); - return NULL; + if (!pagebusy && cpu_buffer->irq_work.full_waiters_pending) { + cpu_buffer->irq_work.wakeup_full = true; + cpu_buffer->irq_work.full_waiters_pending = false; + /* irq_work_queue() supplies it's own memory barriers */ + irq_work_queue(&cpu_buffer->irq_work.work); + } } -#ifdef CONFIG_TRACING - /* * The lock and unlock are done within a preempt disable section. * The current_context per_cpu variable can only be modified @@ -2675,210 +2606,270 @@ rb_reserve_next_event(struct ring_buffer *buffer, * just so happens that it is the same bit corresponding to * the current context. */ -static DEFINE_PER_CPU(unsigned int, current_context); -static __always_inline int trace_recursive_lock(void) +static __always_inline int +trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer) { - unsigned int val = __this_cpu_read(current_context); + unsigned int val = cpu_buffer->current_context; int bit; if (in_interrupt()) { if (in_nmi()) - bit = 0; + bit = RB_CTX_NMI; else if (in_irq()) - bit = 1; + bit = RB_CTX_IRQ; else - bit = 2; + bit = RB_CTX_SOFTIRQ; } else - bit = 3; + bit = RB_CTX_NORMAL; if (unlikely(val & (1 << bit))) return 1; val |= (1 << bit); - __this_cpu_write(current_context, val); + cpu_buffer->current_context = val; return 0; } -static __always_inline void trace_recursive_unlock(void) +static __always_inline void +trace_recursive_unlock(struct ring_buffer_per_cpu *cpu_buffer) { - __this_cpu_and(current_context, __this_cpu_read(current_context) - 1); + cpu_buffer->current_context &= cpu_buffer->current_context - 1; } -#else - -#define trace_recursive_lock() (0) -#define trace_recursive_unlock() do { } while (0) - -#endif - /** - * ring_buffer_lock_reserve - reserve a part of the buffer - * @buffer: the ring buffer to reserve from - * @length: the length of the data to reserve (excluding event header) - * - * Returns a reseverd event on the ring buffer to copy directly to. - * The user of this interface will need to get the body to write into - * and can use the ring_buffer_event_data() interface. + * ring_buffer_unlock_commit - commit a reserved + * @buffer: The buffer to commit to + * @event: The event pointer to commit. * - * The length is the length of the data needed, not the event length - * which also includes the event header. + * This commits the data to the ring buffer, and releases any locks held. * - * Must be paired with ring_buffer_unlock_commit, unless NULL is returned. - * If NULL is returned, then nothing has been allocated or locked. + * Must be paired with ring_buffer_lock_reserve. */ -struct ring_buffer_event * -ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length) +int ring_buffer_unlock_commit(struct ring_buffer *buffer, + struct ring_buffer_event *event) { struct ring_buffer_per_cpu *cpu_buffer; - struct ring_buffer_event *event; - int cpu; + int cpu = raw_smp_processor_id(); - if (ring_buffer_flags != RB_BUFFERS_ON) - return NULL; + cpu_buffer = buffer->buffers[cpu]; - /* If we are tracing schedule, we don't want to recurse */ - preempt_disable_notrace(); + rb_commit(cpu_buffer, event); - if (atomic_read(&buffer->record_disabled)) - goto out_nocheck; + rb_wakeups(buffer, cpu_buffer); - if (trace_recursive_lock()) - goto out_nocheck; + trace_recursive_unlock(cpu_buffer); - cpu = raw_smp_processor_id(); + preempt_enable_notrace(); - if (!cpumask_test_cpu(cpu, buffer->cpumask)) - goto out; + return 0; +} +EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit); - cpu_buffer = buffer->buffers[cpu]; +static noinline void +rb_handle_timestamp(struct ring_buffer_per_cpu *cpu_buffer, + struct rb_event_info *info) +{ + WARN_ONCE(info->delta > (1ULL << 59), + KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n%s", + (unsigned long long)info->delta, + (unsigned long long)info->ts, + (unsigned long long)cpu_buffer->write_stamp, + sched_clock_stable() ? "" : + "If you just came from a suspend/resume,\n" + "please switch to the trace global clock:\n" + " echo global > /sys/kernel/debug/tracing/trace_clock\n"); + info->add_timestamp = 1; +} - if (atomic_read(&cpu_buffer->record_disabled)) - goto out; +static struct ring_buffer_event * +__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, + struct rb_event_info *info) +{ + struct ring_buffer_event *event; + struct buffer_page *tail_page; + unsigned long tail, write; - if (length > BUF_MAX_DATA_SIZE) - goto out; + /* + * If the time delta since the last event is too big to + * hold in the time field of the event, then we append a + * TIME EXTEND event ahead of the data event. + */ + if (unlikely(info->add_timestamp)) + info->length += RB_LEN_TIME_EXTEND; - event = rb_reserve_next_event(buffer, cpu_buffer, length); - if (!event) - goto out; + tail_page = info->tail_page = cpu_buffer->tail_page; + write = local_add_return(info->length, &tail_page->write); - return event; + /* set write to only the index of the write */ + write &= RB_WRITE_MASK; + tail = write - info->length; - out: - trace_recursive_unlock(); + /* + * If this is the first commit on the page, then it has the same + * timestamp as the page itself. + */ + if (!tail) + info->delta = 0; - out_nocheck: - preempt_enable_notrace(); - return NULL; -} -EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve); + /* See if we shot pass the end of this buffer page */ + if (unlikely(write > BUF_PAGE_SIZE)) + return rb_move_tail(cpu_buffer, tail, info); -static void -rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer, - struct ring_buffer_event *event) -{ - u64 delta; + /* We reserved something on the buffer */ + + event = __rb_page_index(tail_page, tail); + kmemcheck_annotate_bitfield(event, bitfield); + rb_update_event(cpu_buffer, event, info); + + local_inc(&tail_page->entries); /* - * The event first in the commit queue updates the - * time stamp. + * If this is the first commit on the page, then update + * its timestamp. */ - if (rb_event_is_commit(cpu_buffer, event)) { - /* - * A commit event that is first on a page - * updates the write timestamp with the page stamp - */ - if (!rb_event_index(event)) - cpu_buffer->write_stamp = - cpu_buffer->commit_page->page->time_stamp; - else if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) { - delta = event->array[0]; - delta <<= TS_SHIFT; - delta += event->time_delta; - cpu_buffer->write_stamp += delta; - } else - cpu_buffer->write_stamp += event->time_delta; - } -} + if (!tail) + tail_page->page->time_stamp = info->ts; -static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer, - struct ring_buffer_event *event) -{ - local_inc(&cpu_buffer->entries); - rb_update_write_stamp(cpu_buffer, event); - rb_end_commit(cpu_buffer); + /* account for these added bytes */ + local_add(info->length, &cpu_buffer->entries_bytes); + + return event; } -static __always_inline void -rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer) +static struct ring_buffer_event * +rb_reserve_next_event(struct ring_buffer *buffer, + struct ring_buffer_per_cpu *cpu_buffer, + unsigned long length) { - bool pagebusy; + struct ring_buffer_event *event; + struct rb_event_info info; + int nr_loops = 0; + u64 diff; - if (buffer->irq_work.waiters_pending) { - buffer->irq_work.waiters_pending = false; - /* irq_work_queue() supplies it's own memory barriers */ - irq_work_queue(&buffer->irq_work.work); + rb_start_commit(cpu_buffer); + +#ifdef CONFIG_RING_BUFFER_ALLOW_SWAP + /* + * Due to the ability to swap a cpu buffer from a buffer + * it is possible it was swapped before we committed. + * (committing stops a swap). We check for it here and + * if it happened, we have to fail the write. + */ + barrier(); + if (unlikely(ACCESS_ONCE(cpu_buffer->buffer) != buffer)) { + local_dec(&cpu_buffer->committing); + local_dec(&cpu_buffer->commits); + return NULL; } +#endif - if (cpu_buffer->irq_work.waiters_pending) { - cpu_buffer->irq_work.waiters_pending = false; - /* irq_work_queue() supplies it's own memory barriers */ - irq_work_queue(&cpu_buffer->irq_work.work); + info.length = rb_calculate_event_length(length); + again: + info.add_timestamp = 0; + info.delta = 0; + + /* + * We allow for interrupts to reenter here and do a trace. + * If one does, it will cause this original code to loop + * back here. Even with heavy interrupts happening, this + * should only happen a few times in a row. If this happens + * 1000 times in a row, there must be either an interrupt + * storm or we have something buggy. + * Bail! + */ + if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000)) + goto out_fail; + + info.ts = rb_time_stamp(cpu_buffer->buffer); + diff = info.ts - cpu_buffer->write_stamp; + + /* make sure this diff is calculated here */ + barrier(); + + /* Did the write stamp get updated already? */ + if (likely(info.ts >= cpu_buffer->write_stamp)) { + info.delta = diff; + if (unlikely(test_time_stamp(info.delta))) + rb_handle_timestamp(cpu_buffer, &info); } - pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page; + event = __rb_reserve_next(cpu_buffer, &info); - if (!pagebusy && cpu_buffer->irq_work.full_waiters_pending) { - cpu_buffer->irq_work.wakeup_full = true; - cpu_buffer->irq_work.full_waiters_pending = false; - /* irq_work_queue() supplies it's own memory barriers */ - irq_work_queue(&cpu_buffer->irq_work.work); + if (unlikely(PTR_ERR(event) == -EAGAIN)) { + if (info.add_timestamp) + info.length -= RB_LEN_TIME_EXTEND; + goto again; } + + if (!event) + goto out_fail; + + return event; + + out_fail: + rb_end_commit(cpu_buffer); + return NULL; } /** - * ring_buffer_unlock_commit - commit a reserved - * @buffer: The buffer to commit to - * @event: The event pointer to commit. + * ring_buffer_lock_reserve - reserve a part of the buffer + * @buffer: the ring buffer to reserve from + * @length: the length of the data to reserve (excluding event header) * - * This commits the data to the ring buffer, and releases any locks held. + * Returns a reseverd event on the ring buffer to copy directly to. + * The user of this interface will need to get the body to write into + * and can use the ring_buffer_event_data() interface. * - * Must be paired with ring_buffer_lock_reserve. + * The length is the length of the data needed, not the event length + * which also includes the event header. + * + * Must be paired with ring_buffer_unlock_commit, unless NULL is returned. + * If NULL is returned, then nothing has been allocated or locked. */ -int ring_buffer_unlock_commit(struct ring_buffer *buffer, - struct ring_buffer_event *event) +struct ring_buffer_event * +ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length) { struct ring_buffer_per_cpu *cpu_buffer; - int cpu = raw_smp_processor_id(); + struct ring_buffer_event *event; + int cpu; - cpu_buffer = buffer->buffers[cpu]; + /* If we are tracing schedule, we don't want to recurse */ + preempt_disable_notrace(); - rb_commit(cpu_buffer, event); + if (unlikely(atomic_read(&buffer->record_disabled))) + goto out; - rb_wakeups(buffer, cpu_buffer); + cpu = raw_smp_processor_id(); - trace_recursive_unlock(); + if (unlikely(!cpumask_test_cpu(cpu, buffer->cpumask))) + goto out; - preempt_enable_notrace(); + cpu_buffer = buffer->buffers[cpu]; - return 0; -} -EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit); + if (unlikely(atomic_read(&cpu_buffer->record_disabled))) + goto out; -static inline void rb_event_discard(struct ring_buffer_event *event) -{ - if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) - event = skip_time_extend(event); + if (unlikely(length > BUF_MAX_DATA_SIZE)) + goto out; - /* array[0] holds the actual length for the discarded event */ - event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE; - event->type_len = RINGBUF_TYPE_PADDING; - /* time delta must be non zero */ - if (!event->time_delta) - event->time_delta = 1; + if (unlikely(trace_recursive_lock(cpu_buffer))) + goto out; + + event = rb_reserve_next_event(buffer, cpu_buffer, length); + if (!event) + goto out_unlock; + + return event; + + out_unlock: + trace_recursive_unlock(cpu_buffer); + out: + preempt_enable_notrace(); + return NULL; } +EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve); /* * Decrement the entries to the page that an event is on. @@ -2970,7 +2961,7 @@ void ring_buffer_discard_commit(struct ring_buffer *buffer, out: rb_end_commit(cpu_buffer); - trace_recursive_unlock(); + trace_recursive_unlock(cpu_buffer); preempt_enable_notrace(); @@ -3000,9 +2991,6 @@ int ring_buffer_write(struct ring_buffer *buffer, int ret = -EBUSY; int cpu; - if (ring_buffer_flags != RB_BUFFERS_ON) - return -EBUSY; - preempt_disable_notrace(); if (atomic_read(&buffer->record_disabled)) @@ -3021,9 +3009,12 @@ int ring_buffer_write(struct ring_buffer *buffer, if (length > BUF_MAX_DATA_SIZE) goto out; + if (unlikely(trace_recursive_lock(cpu_buffer))) + goto out; + event = rb_reserve_next_event(buffer, cpu_buffer, length); if (!event) - goto out; + goto out_unlock; body = rb_event_data(event); @@ -3034,6 +3025,10 @@ int ring_buffer_write(struct ring_buffer *buffer, rb_wakeups(buffer, cpu_buffer); ret = 0; + + out_unlock: + trace_recursive_unlock(cpu_buffer); + out: preempt_enable_notrace(); @@ -3041,7 +3036,7 @@ int ring_buffer_write(struct ring_buffer *buffer, } EXPORT_SYMBOL_GPL(ring_buffer_write); -static int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer) +static bool rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer) { struct buffer_page *reader = cpu_buffer->reader_page; struct buffer_page *head = rb_set_head_page(cpu_buffer); @@ -3049,7 +3044,7 @@ static int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer) /* In case of error, head will be NULL */ if (unlikely(!head)) - return 1; + return true; return reader->read == rb_page_commit(reader) && (commit == reader || @@ -3628,7 +3623,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) /* Finally update the reader page to the new head */ cpu_buffer->reader_page = reader; - rb_reset_reader_page(cpu_buffer); + cpu_buffer->reader_page->read = 0; if (overwrite != cpu_buffer->last_overrun) { cpu_buffer->lost_events = overwrite - cpu_buffer->last_overrun; @@ -3638,6 +3633,10 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) goto again; out: + /* Update the read_stamp on the first event */ + if (reader && reader->read == 0) + cpu_buffer->read_stamp = reader->page->time_stamp; + arch_spin_unlock(&cpu_buffer->lock); local_irq_restore(flags); @@ -3860,19 +3859,36 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) } EXPORT_SYMBOL_GPL(ring_buffer_iter_peek); -static inline int rb_ok_to_lock(void) +static inline bool rb_reader_lock(struct ring_buffer_per_cpu *cpu_buffer) { + if (likely(!in_nmi())) { + raw_spin_lock(&cpu_buffer->reader_lock); + return true; + } + /* * If an NMI die dumps out the content of the ring buffer - * do not grab locks. We also permanently disable the ring - * buffer too. A one time deal is all you get from reading - * the ring buffer from an NMI. + * trylock must be used to prevent a deadlock if the NMI + * preempted a task that holds the ring buffer locks. If + * we get the lock then all is fine, if not, then continue + * to do the read, but this can corrupt the ring buffer, + * so it must be permanently disabled from future writes. + * Reading from NMI is a oneshot deal. */ - if (likely(!in_nmi())) - return 1; + if (raw_spin_trylock(&cpu_buffer->reader_lock)) + return true; - tracing_off_permanent(); - return 0; + /* Continue without locking, but disable the ring buffer */ + atomic_inc(&cpu_buffer->record_disabled); + return false; +} + +static inline void +rb_reader_unlock(struct ring_buffer_per_cpu *cpu_buffer, bool locked) +{ + if (likely(locked)) + raw_spin_unlock(&cpu_buffer->reader_lock); + return; } /** @@ -3892,21 +3908,18 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts, struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; struct ring_buffer_event *event; unsigned long flags; - int dolock; + bool dolock; if (!cpumask_test_cpu(cpu, buffer->cpumask)) return NULL; - dolock = rb_ok_to_lock(); again: local_irq_save(flags); - if (dolock) - raw_spin_lock(&cpu_buffer->reader_lock); + dolock = rb_reader_lock(cpu_buffer); event = rb_buffer_peek(cpu_buffer, ts, lost_events); if (event && event->type_len == RINGBUF_TYPE_PADDING) rb_advance_reader(cpu_buffer); - if (dolock) - raw_spin_unlock(&cpu_buffer->reader_lock); + rb_reader_unlock(cpu_buffer, dolock); local_irq_restore(flags); if (event && event->type_len == RINGBUF_TYPE_PADDING) @@ -3959,9 +3972,7 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts, struct ring_buffer_per_cpu *cpu_buffer; struct ring_buffer_event *event = NULL; unsigned long flags; - int dolock; - - dolock = rb_ok_to_lock(); + bool dolock; again: /* might be called in atomic */ @@ -3972,8 +3983,7 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts, cpu_buffer = buffer->buffers[cpu]; local_irq_save(flags); - if (dolock) - raw_spin_lock(&cpu_buffer->reader_lock); + dolock = rb_reader_lock(cpu_buffer); event = rb_buffer_peek(cpu_buffer, ts, lost_events); if (event) { @@ -3981,8 +3991,7 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts, rb_advance_reader(cpu_buffer); } - if (dolock) - raw_spin_unlock(&cpu_buffer->reader_lock); + rb_reader_unlock(cpu_buffer, dolock); local_irq_restore(flags); out: @@ -4259,32 +4268,28 @@ EXPORT_SYMBOL_GPL(ring_buffer_reset); * rind_buffer_empty - is the ring buffer empty? * @buffer: The ring buffer to test */ -int ring_buffer_empty(struct ring_buffer *buffer) +bool ring_buffer_empty(struct ring_buffer *buffer) { struct ring_buffer_per_cpu *cpu_buffer; unsigned long flags; - int dolock; + bool dolock; int cpu; int ret; - dolock = rb_ok_to_lock(); - /* yes this is racy, but if you don't like the race, lock the buffer */ for_each_buffer_cpu(buffer, cpu) { cpu_buffer = buffer->buffers[cpu]; local_irq_save(flags); - if (dolock) - raw_spin_lock(&cpu_buffer->reader_lock); + dolock = rb_reader_lock(cpu_buffer); ret = rb_per_cpu_empty(cpu_buffer); - if (dolock) - raw_spin_unlock(&cpu_buffer->reader_lock); + rb_reader_unlock(cpu_buffer, dolock); local_irq_restore(flags); if (!ret) - return 0; + return false; } - return 1; + return true; } EXPORT_SYMBOL_GPL(ring_buffer_empty); @@ -4293,25 +4298,21 @@ EXPORT_SYMBOL_GPL(ring_buffer_empty); * @buffer: The ring buffer * @cpu: The CPU buffer to test */ -int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu) +bool ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; unsigned long flags; - int dolock; + bool dolock; int ret; if (!cpumask_test_cpu(cpu, buffer->cpumask)) - return 1; - - dolock = rb_ok_to_lock(); + return true; cpu_buffer = buffer->buffers[cpu]; local_irq_save(flags); - if (dolock) - raw_spin_lock(&cpu_buffer->reader_lock); + dolock = rb_reader_lock(cpu_buffer); ret = rb_per_cpu_empty(cpu_buffer); - if (dolock) - raw_spin_unlock(&cpu_buffer->reader_lock); + rb_reader_unlock(cpu_buffer, dolock); local_irq_restore(flags); return ret; @@ -4349,9 +4350,6 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a, ret = -EAGAIN; - if (ring_buffer_flags != RB_BUFFERS_ON) - goto out; - if (atomic_read(&buffer_a->record_disabled)) goto out; diff --git a/kernel/kernel/trace/ring_buffer_benchmark.c b/kernel/kernel/trace/ring_buffer_benchmark.c index 1b28df2d9..6df9a83e2 100644 --- a/kernel/kernel/trace/ring_buffer_benchmark.c +++ b/kernel/kernel/trace/ring_buffer_benchmark.c @@ -24,19 +24,19 @@ struct rb_page { static int wakeup_interval = 100; static int reader_finish; -static struct completion read_start; -static struct completion read_done; +static DECLARE_COMPLETION(read_start); +static DECLARE_COMPLETION(read_done); static struct ring_buffer *buffer; static struct task_struct *producer; static struct task_struct *consumer; static unsigned long read; -static int disable_reader; +static unsigned int disable_reader; module_param(disable_reader, uint, 0644); MODULE_PARM_DESC(disable_reader, "only run producer"); -static int write_iteration = 50; +static unsigned int write_iteration = 50; module_param(write_iteration, uint, 0644); MODULE_PARM_DESC(write_iteration, "# of writes between timestamp readings"); @@ -46,26 +46,26 @@ static int consumer_nice = MAX_NICE; static int producer_fifo = -1; static int consumer_fifo = -1; -module_param(producer_nice, uint, 0644); +module_param(producer_nice, int, 0644); MODULE_PARM_DESC(producer_nice, "nice prio for producer"); -module_param(consumer_nice, uint, 0644); +module_param(consumer_nice, int, 0644); MODULE_PARM_DESC(consumer_nice, "nice prio for consumer"); -module_param(producer_fifo, uint, 0644); +module_param(producer_fifo, int, 0644); MODULE_PARM_DESC(producer_fifo, "fifo prio for producer"); -module_param(consumer_fifo, uint, 0644); +module_param(consumer_fifo, int, 0644); MODULE_PARM_DESC(consumer_fifo, "fifo prio for consumer"); static int read_events; -static int kill_test; +static int test_error; -#define KILL_TEST() \ +#define TEST_ERROR() \ do { \ - if (!kill_test) { \ - kill_test = 1; \ + if (!test_error) { \ + test_error = 1; \ WARN_ON(1); \ } \ } while (0) @@ -75,6 +75,11 @@ enum event_status { EVENT_DROPPED, }; +static bool break_test(void) +{ + return test_error || kthread_should_stop(); +} + static enum event_status read_event(int cpu) { struct ring_buffer_event *event; @@ -87,7 +92,7 @@ static enum event_status read_event(int cpu) entry = ring_buffer_event_data(event); if (*entry != cpu) { - KILL_TEST(); + TEST_ERROR(); return EVENT_DROPPED; } @@ -115,10 +120,10 @@ static enum event_status read_page(int cpu) rpage = bpage; /* The commit may have missed event flags set, clear them */ commit = local_read(&rpage->commit) & 0xfffff; - for (i = 0; i < commit && !kill_test; i += inc) { + for (i = 0; i < commit && !test_error ; i += inc) { if (i >= (PAGE_SIZE - offsetof(struct rb_page, data))) { - KILL_TEST(); + TEST_ERROR(); break; } @@ -128,7 +133,7 @@ static enum event_status read_page(int cpu) case RINGBUF_TYPE_PADDING: /* failed writes may be discarded events */ if (!event->time_delta) - KILL_TEST(); + TEST_ERROR(); inc = event->array[0] + 4; break; case RINGBUF_TYPE_TIME_EXTEND: @@ -137,12 +142,12 @@ static enum event_status read_page(int cpu) case 0: entry = ring_buffer_event_data(event); if (*entry != cpu) { - KILL_TEST(); + TEST_ERROR(); break; } read++; if (!event->array[0]) { - KILL_TEST(); + TEST_ERROR(); break; } inc = event->array[0] + 4; @@ -150,17 +155,17 @@ static enum event_status read_page(int cpu) default: entry = ring_buffer_event_data(event); if (*entry != cpu) { - KILL_TEST(); + TEST_ERROR(); break; } read++; inc = ((event->type_len + 1) * 4); } - if (kill_test) + if (test_error) break; if (inc <= 0) { - KILL_TEST(); + TEST_ERROR(); break; } } @@ -178,10 +183,14 @@ static void ring_buffer_consumer(void) read_events ^= 1; read = 0; - while (!reader_finish && !kill_test) { - int found; + /* + * Continue running until the producer specifically asks to stop + * and is ready for the completion. + */ + while (!READ_ONCE(reader_finish)) { + int found = 1; - do { + while (found && !test_error) { int cpu; found = 0; @@ -193,19 +202,25 @@ static void ring_buffer_consumer(void) else stat = read_page(cpu); - if (kill_test) + if (test_error) break; + if (stat == EVENT_FOUND) found = 1; + } - } while (found && !kill_test); + } + /* Wait till the producer wakes us up when there is more data + * available or when the producer wants us to finish reading. + */ set_current_state(TASK_INTERRUPTIBLE); if (reader_finish) break; schedule(); } + __set_current_state(TASK_RUNNING); reader_finish = 0; complete(&read_done); } @@ -263,8 +278,7 @@ static void ring_buffer_producer(void) if (cnt % wakeup_interval) cond_resched(); #endif - - } while (ktime_before(end_time, timeout) && !kill_test); + } while (ktime_before(end_time, timeout) && !break_test()); trace_printk("End ring buffer hammer\n"); if (consumer) { @@ -274,8 +288,6 @@ static void ring_buffer_producer(void) /* the completions must be visible before the finish var */ smp_wmb(); reader_finish = 1; - /* finish var visible before waking up the consumer */ - smp_wmb(); wake_up_process(consumer); wait_for_completion(&read_done); } @@ -285,7 +297,7 @@ static void ring_buffer_producer(void) entries = ring_buffer_entries(buffer); overruns = ring_buffer_overruns(buffer); - if (kill_test) + if (test_error) trace_printk("ERROR!\n"); if (!disable_reader) { @@ -366,20 +378,19 @@ static void wait_to_die(void) static int ring_buffer_consumer_thread(void *arg) { - while (!kthread_should_stop() && !kill_test) { + while (!break_test()) { complete(&read_start); ring_buffer_consumer(); set_current_state(TASK_INTERRUPTIBLE); - if (kthread_should_stop() || kill_test) + if (break_test()) break; - schedule(); } __set_current_state(TASK_RUNNING); - if (kill_test) + if (!kthread_should_stop()) wait_to_die(); return 0; @@ -387,25 +398,28 @@ static int ring_buffer_consumer_thread(void *arg) static int ring_buffer_producer_thread(void *arg) { - init_completion(&read_start); - - while (!kthread_should_stop() && !kill_test) { + while (!break_test()) { ring_buffer_reset(buffer); if (consumer) { - smp_wmb(); wake_up_process(consumer); wait_for_completion(&read_start); } ring_buffer_producer(); + if (break_test()) + goto out_kill; trace_printk("Sleeping for 10 secs\n"); set_current_state(TASK_INTERRUPTIBLE); + if (break_test()) + goto out_kill; schedule_timeout(HZ * SLEEP_TIME); } - if (kill_test) +out_kill: + __set_current_state(TASK_RUNNING); + if (!kthread_should_stop()) wait_to_die(); return 0; diff --git a/kernel/kernel/trace/trace.c b/kernel/kernel/trace/trace.c index 5088d1ce2..f1ee84b00 100644 --- a/kernel/kernel/trace/trace.c +++ b/kernel/kernel/trace/trace.c @@ -100,8 +100,6 @@ static DEFINE_PER_CPU(bool, trace_cmdline_save); */ static int tracing_disabled = 1; -DEFINE_PER_CPU(int, ftrace_cpu_disabled); - cpumask_var_t __read_mostly tracing_buffer_mask; /* @@ -214,12 +212,10 @@ __setup("alloc_snapshot", boot_alloc_snapshot); static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata; -static char *trace_boot_options __initdata; static int __init set_trace_boot_options(char *str) { strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE); - trace_boot_options = trace_boot_options_buf; return 0; } __setup("trace_options=", set_trace_boot_options); @@ -250,6 +246,19 @@ unsigned long long ns2usecs(cycle_t nsec) return nsec; } +/* trace_flags holds trace_options default values */ +#define TRACE_DEFAULT_FLAGS \ + (FUNCTION_DEFAULT_FLAGS | \ + TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | \ + TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | \ + TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | \ + TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS) + +/* trace_options that are only supported by global_trace */ +#define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK | \ + TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD) + + /* * The global_trace is the descriptor that holds the tracing * buffers for the live tracing. For each CPU, it contains @@ -262,7 +271,9 @@ unsigned long long ns2usecs(cycle_t nsec) * pages for the buffer for that CPU. Each CPU has the same number * of pages allocated for its buffer. */ -static struct trace_array global_trace; +static struct trace_array global_trace = { + .trace_flags = TRACE_DEFAULT_FLAGS, +}; LIST_HEAD(ftrace_trace_arrays); @@ -297,11 +308,11 @@ void trace_array_put(struct trace_array *this_tr) mutex_unlock(&trace_types_lock); } -int filter_check_discard(struct ftrace_event_file *file, void *rec, +int filter_check_discard(struct trace_event_file *file, void *rec, struct ring_buffer *buffer, struct ring_buffer_event *event) { - if (unlikely(file->flags & FTRACE_EVENT_FL_FILTERED) && + if (unlikely(file->flags & EVENT_FILE_FL_FILTERED) && !filter_match_preds(file->filter, rec)) { ring_buffer_discard_commit(buffer, event); return 1; @@ -311,7 +322,7 @@ int filter_check_discard(struct ftrace_event_file *file, void *rec, } EXPORT_SYMBOL_GPL(filter_check_discard); -int call_filter_check_discard(struct ftrace_event_call *call, void *rec, +int call_filter_check_discard(struct trace_event_call *call, void *rec, struct ring_buffer *buffer, struct ring_buffer_event *event) { @@ -468,11 +479,29 @@ static inline void trace_access_lock_init(void) #endif -/* trace_flags holds trace_options default values */ -unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | - TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME | - TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | - TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS | TRACE_ITER_FUNCTION; +#ifdef CONFIG_STACKTRACE +static void __ftrace_trace_stack(struct ring_buffer *buffer, + unsigned long flags, + int skip, int pc, struct pt_regs *regs); +static inline void ftrace_trace_stack(struct trace_array *tr, + struct ring_buffer *buffer, + unsigned long flags, + int skip, int pc, struct pt_regs *regs); + +#else +static inline void __ftrace_trace_stack(struct ring_buffer *buffer, + unsigned long flags, + int skip, int pc, struct pt_regs *regs) +{ +} +static inline void ftrace_trace_stack(struct trace_array *tr, + struct ring_buffer *buffer, + unsigned long flags, + int skip, int pc, struct pt_regs *regs) +{ +} + +#endif static void tracer_tracing_on(struct trace_array *tr) { @@ -518,7 +547,7 @@ int __trace_puts(unsigned long ip, const char *str, int size) int alloc; int pc; - if (!(trace_flags & TRACE_ITER_PRINTK)) + if (!(global_trace.trace_flags & TRACE_ITER_PRINTK)) return 0; pc = preempt_count(); @@ -548,7 +577,7 @@ int __trace_puts(unsigned long ip, const char *str, int size) entry->buf[size] = '\0'; __buffer_unlock_commit(buffer, event); - ftrace_trace_stack(buffer, irq_flags, 4, pc); + ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL); return size; } @@ -568,7 +597,7 @@ int __trace_bputs(unsigned long ip, const char *str) int size = sizeof(struct bputs_entry); int pc; - if (!(trace_flags & TRACE_ITER_PRINTK)) + if (!(global_trace.trace_flags & TRACE_ITER_PRINTK)) return 0; pc = preempt_count(); @@ -588,7 +617,7 @@ int __trace_bputs(unsigned long ip, const char *str) entry->str = str; __buffer_unlock_commit(buffer, event); - ftrace_trace_stack(buffer, irq_flags, 4, pc); + ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL); return 1; } @@ -834,34 +863,18 @@ unsigned long nsecs_to_usecs(unsigned long nsecs) return nsecs / 1000; } +/* + * TRACE_FLAGS is defined as a tuple matching bit masks with strings. + * It uses C(a, b) where 'a' is the enum name and 'b' is the string that + * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list + * of strings in the order that the enums were defined. + */ +#undef C +#define C(a, b) b + /* These must match the bit postions in trace_iterator_flags */ static const char *trace_options[] = { - "print-parent", - "sym-offset", - "sym-addr", - "verbose", - "raw", - "hex", - "bin", - "block", - "stacktrace", - "trace_printk", - "ftrace_preempt", - "branch", - "annotate", - "userstacktrace", - "sym-userobj", - "printk-msg-only", - "context-info", - "latency-format", - "sleep-time", - "graph-time", - "record-cmd", - "overwrite", - "disable_on_free", - "irq-info", - "markers", - "function-trace", + TRACE_FLAGS NULL }; @@ -876,6 +889,7 @@ static struct { { trace_clock_jiffies, "uptime", 0 }, { trace_clock, "perf", 1 }, { ktime_get_mono_fast_ns, "mono", 1 }, + { ktime_get_raw_fast_ns, "mono_raw", 1 }, ARCH_TRACE_CLOCKS }; @@ -1203,13 +1217,17 @@ static inline int run_tracer_selftest(struct tracer *type) } #endif /* CONFIG_FTRACE_STARTUP_TEST */ +static void add_tracer_options(struct trace_array *tr, struct tracer *t); + +static void __init apply_trace_boot_options(void); + /** * register_tracer - register a tracer with the ftrace system. * @type - the plugin for the tracer * * Register a new plugin tracer. */ -int register_tracer(struct tracer *type) +int __init register_tracer(struct tracer *type) { struct tracer *t; int ret = 0; @@ -1252,6 +1270,7 @@ int register_tracer(struct tracer *type) type->next = trace_types; trace_types = type; + add_tracer_options(&global_trace, type); out: tracing_selftest_running = false; @@ -1267,6 +1286,9 @@ int register_tracer(struct tracer *type) /* Do we want this tracer to start on bootup? */ tracing_set_tracer(&global_trace, type->name); default_bootup_tracer = NULL; + + apply_trace_boot_options(); + /* disable other selftests, since this will break it. */ tracing_selftest_disabled = true; #ifdef CONFIG_FTRACE_STARTUP_TEST @@ -1674,36 +1696,29 @@ __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *eve ring_buffer_unlock_commit(buffer, event); } -static inline void -__trace_buffer_unlock_commit(struct ring_buffer *buffer, - struct ring_buffer_event *event, - unsigned long flags, int pc) +void trace_buffer_unlock_commit(struct trace_array *tr, + struct ring_buffer *buffer, + struct ring_buffer_event *event, + unsigned long flags, int pc) { __buffer_unlock_commit(buffer, event); - ftrace_trace_stack(buffer, flags, 6, pc); + ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL); ftrace_trace_userstack(buffer, flags, pc); } - -void trace_buffer_unlock_commit(struct ring_buffer *buffer, - struct ring_buffer_event *event, - unsigned long flags, int pc) -{ - __trace_buffer_unlock_commit(buffer, event, flags, pc); -} EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit); static struct ring_buffer *temp_buffer; struct ring_buffer_event * trace_event_buffer_lock_reserve(struct ring_buffer **current_rb, - struct ftrace_event_file *ftrace_file, + struct trace_event_file *trace_file, int type, unsigned long len, unsigned long flags, int pc) { struct ring_buffer_event *entry; - *current_rb = ftrace_file->tr->trace_buffer.buffer; + *current_rb = trace_file->tr->trace_buffer.buffer; entry = trace_buffer_lock_reserve(*current_rb, type, len, flags, pc); /* @@ -1712,7 +1727,7 @@ trace_event_buffer_lock_reserve(struct ring_buffer **current_rb, * to store the trace event for the tigger to use. It's recusive * safe and will not be recorded anywhere. */ - if (!entry && ftrace_file->flags & FTRACE_EVENT_FL_TRIGGER_COND) { + if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) { *current_rb = temp_buffer; entry = trace_buffer_lock_reserve(*current_rb, type, len, flags, pc); @@ -1732,22 +1747,15 @@ trace_current_buffer_lock_reserve(struct ring_buffer **current_rb, } EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve); -void trace_current_buffer_unlock_commit(struct ring_buffer *buffer, - struct ring_buffer_event *event, - unsigned long flags, int pc) -{ - __trace_buffer_unlock_commit(buffer, event, flags, pc); -} -EXPORT_SYMBOL_GPL(trace_current_buffer_unlock_commit); - -void trace_buffer_unlock_commit_regs(struct ring_buffer *buffer, +void trace_buffer_unlock_commit_regs(struct trace_array *tr, + struct ring_buffer *buffer, struct ring_buffer_event *event, unsigned long flags, int pc, struct pt_regs *regs) { __buffer_unlock_commit(buffer, event); - ftrace_trace_stack_regs(buffer, flags, 0, pc, regs); + ftrace_trace_stack(tr, buffer, flags, 0, pc, regs); ftrace_trace_userstack(buffer, flags, pc); } EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit_regs); @@ -1764,15 +1772,11 @@ trace_function(struct trace_array *tr, unsigned long ip, unsigned long parent_ip, unsigned long flags, int pc) { - struct ftrace_event_call *call = &event_function; + struct trace_event_call *call = &event_function; struct ring_buffer *buffer = tr->trace_buffer.buffer; struct ring_buffer_event *event; struct ftrace_entry *entry; - /* If we are reading the ring buffer, don't trace */ - if (unlikely(__this_cpu_read(ftrace_cpu_disabled))) - return; - event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry), flags, pc); if (!event) @@ -1799,7 +1803,7 @@ static void __ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags, int skip, int pc, struct pt_regs *regs) { - struct ftrace_event_call *call = &event_kernel_stack; + struct trace_event_call *call = &event_kernel_stack; struct ring_buffer_event *event; struct stack_entry *entry; struct stack_trace trace; @@ -1876,24 +1880,17 @@ static void __ftrace_trace_stack(struct ring_buffer *buffer, } -void ftrace_trace_stack_regs(struct ring_buffer *buffer, unsigned long flags, - int skip, int pc, struct pt_regs *regs) +static inline void ftrace_trace_stack(struct trace_array *tr, + struct ring_buffer *buffer, + unsigned long flags, + int skip, int pc, struct pt_regs *regs) { - if (!(trace_flags & TRACE_ITER_STACKTRACE)) + if (!(tr->trace_flags & TRACE_ITER_STACKTRACE)) return; __ftrace_trace_stack(buffer, flags, skip, pc, regs); } -void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags, - int skip, int pc) -{ - if (!(trace_flags & TRACE_ITER_STACKTRACE)) - return; - - __ftrace_trace_stack(buffer, flags, skip, pc, NULL); -} - void __trace_stack(struct trace_array *tr, unsigned long flags, int skip, int pc) { @@ -1927,12 +1924,12 @@ static DEFINE_PER_CPU(int, user_stack_count); void ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc) { - struct ftrace_event_call *call = &event_user_stack; + struct trace_event_call *call = &event_user_stack; struct ring_buffer_event *event; struct userstack_entry *entry; struct stack_trace trace; - if (!(trace_flags & TRACE_ITER_USERSTACKTRACE)) + if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE)) return; /* @@ -2133,7 +2130,7 @@ static void trace_printk_start_stop_comm(int enabled) */ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) { - struct ftrace_event_call *call = &event_bprint; + struct trace_event_call *call = &event_bprint; struct ring_buffer_event *event; struct ring_buffer *buffer; struct trace_array *tr = &global_trace; @@ -2176,7 +2173,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) memcpy(entry->buf, tbuffer, sizeof(u32) * len); if (!call_filter_check_discard(call, entry, buffer, event)) { __buffer_unlock_commit(buffer, event); - ftrace_trace_stack(buffer, flags, 6, pc); + ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL); } out: @@ -2191,7 +2188,7 @@ static int __trace_array_vprintk(struct ring_buffer *buffer, unsigned long ip, const char *fmt, va_list args) { - struct ftrace_event_call *call = &event_print; + struct trace_event_call *call = &event_print; struct ring_buffer_event *event; int len = 0, size, pc; struct print_entry *entry; @@ -2228,7 +2225,7 @@ __trace_array_vprintk(struct ring_buffer *buffer, memcpy(&entry->buf, tbuffer, len + 1); if (!call_filter_check_discard(call, entry, buffer, event)) { __buffer_unlock_commit(buffer, event); - ftrace_trace_stack(buffer, flags, 6, pc); + ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL); } out: preempt_enable_notrace(); @@ -2249,7 +2246,7 @@ int trace_array_printk(struct trace_array *tr, int ret; va_list ap; - if (!(trace_flags & TRACE_ITER_PRINTK)) + if (!(global_trace.trace_flags & TRACE_ITER_PRINTK)) return 0; va_start(ap, fmt); @@ -2264,7 +2261,7 @@ int trace_array_printk_buf(struct ring_buffer *buffer, int ret; va_list ap; - if (!(trace_flags & TRACE_ITER_PRINTK)) + if (!(global_trace.trace_flags & TRACE_ITER_PRINTK)) return 0; va_start(ap, fmt); @@ -2611,7 +2608,7 @@ static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file void print_trace_header(struct seq_file *m, struct trace_iterator *iter) { - unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); + unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK); struct trace_buffer *buf = iter->trace_buffer; struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu); struct tracer *type = iter->trace; @@ -2673,20 +2670,22 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter) static void test_cpu_buff_start(struct trace_iterator *iter) { struct trace_seq *s = &iter->seq; + struct trace_array *tr = iter->tr; - if (!(trace_flags & TRACE_ITER_ANNOTATE)) + if (!(tr->trace_flags & TRACE_ITER_ANNOTATE)) return; if (!(iter->iter_flags & TRACE_FILE_ANNOTATE)) return; - if (cpumask_test_cpu(iter->cpu, iter->started)) + if (iter->started && cpumask_test_cpu(iter->cpu, iter->started)) return; if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries) return; - cpumask_set_cpu(iter->cpu, iter->started); + if (iter->started) + cpumask_set_cpu(iter->cpu, iter->started); /* Don't print started cpu buffer for the first entry of the trace */ if (iter->idx > 1) @@ -2696,8 +2695,9 @@ static void test_cpu_buff_start(struct trace_iterator *iter) static enum print_line_t print_trace_fmt(struct trace_iterator *iter) { + struct trace_array *tr = iter->tr; struct trace_seq *s = &iter->seq; - unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); + unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK); struct trace_entry *entry; struct trace_event *event; @@ -2707,7 +2707,7 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter) event = ftrace_find_event(entry->type); - if (trace_flags & TRACE_ITER_CONTEXT_INFO) { + if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) { if (iter->iter_flags & TRACE_FILE_LAT_FMT) trace_print_lat_context(iter); else @@ -2727,13 +2727,14 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter) static enum print_line_t print_raw_fmt(struct trace_iterator *iter) { + struct trace_array *tr = iter->tr; struct trace_seq *s = &iter->seq; struct trace_entry *entry; struct trace_event *event; entry = iter->ent; - if (trace_flags & TRACE_ITER_CONTEXT_INFO) + if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) trace_seq_printf(s, "%d %d %llu ", entry->pid, iter->cpu, iter->ts); @@ -2751,6 +2752,7 @@ static enum print_line_t print_raw_fmt(struct trace_iterator *iter) static enum print_line_t print_hex_fmt(struct trace_iterator *iter) { + struct trace_array *tr = iter->tr; struct trace_seq *s = &iter->seq; unsigned char newline = '\n'; struct trace_entry *entry; @@ -2758,7 +2760,7 @@ static enum print_line_t print_hex_fmt(struct trace_iterator *iter) entry = iter->ent; - if (trace_flags & TRACE_ITER_CONTEXT_INFO) { + if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) { SEQ_PUT_HEX_FIELD(s, entry->pid); SEQ_PUT_HEX_FIELD(s, iter->cpu); SEQ_PUT_HEX_FIELD(s, iter->ts); @@ -2780,13 +2782,14 @@ static enum print_line_t print_hex_fmt(struct trace_iterator *iter) static enum print_line_t print_bin_fmt(struct trace_iterator *iter) { + struct trace_array *tr = iter->tr; struct trace_seq *s = &iter->seq; struct trace_entry *entry; struct trace_event *event; entry = iter->ent; - if (trace_flags & TRACE_ITER_CONTEXT_INFO) { + if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) { SEQ_PUT_FIELD(s, entry->pid); SEQ_PUT_FIELD(s, iter->cpu); SEQ_PUT_FIELD(s, iter->ts); @@ -2835,6 +2838,8 @@ int trace_empty(struct trace_iterator *iter) /* Called with trace_event_read_lock() held. */ enum print_line_t print_trace_line(struct trace_iterator *iter) { + struct trace_array *tr = iter->tr; + unsigned long trace_flags = tr->trace_flags; enum print_line_t ret; if (iter->lost_events) { @@ -2880,6 +2885,7 @@ enum print_line_t print_trace_line(struct trace_iterator *iter) void trace_latency_header(struct seq_file *m) { struct trace_iterator *iter = m->private; + struct trace_array *tr = iter->tr; /* print nothing if the buffers are empty */ if (trace_empty(iter)) @@ -2888,13 +2894,15 @@ void trace_latency_header(struct seq_file *m) if (iter->iter_flags & TRACE_FILE_LAT_FMT) print_trace_header(m, iter); - if (!(trace_flags & TRACE_ITER_VERBOSE)) + if (!(tr->trace_flags & TRACE_ITER_VERBOSE)) print_lat_help_header(m); } void trace_default_header(struct seq_file *m) { struct trace_iterator *iter = m->private; + struct trace_array *tr = iter->tr; + unsigned long trace_flags = tr->trace_flags; if (!(trace_flags & TRACE_ITER_CONTEXT_INFO)) return; @@ -3044,7 +3052,7 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot) if (!iter) return ERR_PTR(-ENOMEM); - iter->buffer_iter = kzalloc(sizeof(*iter->buffer_iter) * num_possible_cpus(), + iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter), GFP_KERNEL); if (!iter->buffer_iter) goto release; @@ -3239,7 +3247,7 @@ static int tracing_open(struct inode *inode, struct file *file) iter = __tracing_open(inode, file, false); if (IS_ERR(iter)) ret = PTR_ERR(iter); - else if (trace_flags & TRACE_ITER_LATENCY_FMT) + else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT) iter->iter_flags |= TRACE_FILE_LAT_FMT; } @@ -3486,7 +3494,7 @@ static int tracing_trace_options_show(struct seq_file *m, void *v) trace_opts = tr->current_trace->flags->opts; for (i = 0; trace_options[i]; i++) { - if (trace_flags & (1 << i)) + if (tr->trace_flags & (1 << i)) seq_printf(m, "%s\n", trace_options[i]); else seq_printf(m, "no%s\n", trace_options[i]); @@ -3551,7 +3559,7 @@ int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set) int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled) { /* do nothing if flag is already set */ - if (!!(trace_flags & mask) == !!enabled) + if (!!(tr->trace_flags & mask) == !!enabled) return 0; /* Give the tracer a chance to approve the change */ @@ -3560,9 +3568,9 @@ int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled) return -EINVAL; if (enabled) - trace_flags |= mask; + tr->trace_flags |= mask; else - trace_flags &= ~mask; + tr->trace_flags &= ~mask; if (mask == TRACE_ITER_RECORD_CMD) trace_event_enable_cmd_record(enabled); @@ -3574,8 +3582,10 @@ int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled) #endif } - if (mask == TRACE_ITER_PRINTK) + if (mask == TRACE_ITER_PRINTK) { trace_printk_start_stop_comm(enabled); + trace_printk_control(enabled); + } return 0; } @@ -3586,6 +3596,7 @@ static int trace_set_options(struct trace_array *tr, char *option) int neg = 0; int ret = -ENODEV; int i; + size_t orig_len = strlen(option); cmp = strstrip(option); @@ -3609,9 +3620,36 @@ static int trace_set_options(struct trace_array *tr, char *option) mutex_unlock(&trace_types_lock); + /* + * If the first trailing whitespace is replaced with '\0' by strstrip, + * turn it back into a space. + */ + if (orig_len > strlen(option)) + option[strlen(option)] = ' '; + return ret; } +static void __init apply_trace_boot_options(void) +{ + char *buf = trace_boot_options_buf; + char *option; + + while (true) { + option = strsep(&buf, ","); + + if (!option) + break; + + if (*option) + trace_set_options(&global_trace, option); + + /* Put back the comma to allow this to be called again */ + if (buf) + *(buf - 1) = ','; + } +} + static ssize_t tracing_trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) @@ -4306,11 +4344,8 @@ int tracing_update_buffers(void) struct trace_option_dentry; -static struct trace_option_dentry * -create_trace_option_files(struct trace_array *tr, struct tracer *tracer); - static void -destroy_trace_option_files(struct trace_option_dentry *topts); +create_trace_option_files(struct trace_array *tr, struct tracer *tracer); /* * Used to clear out the tracer before deletion of an instance. @@ -4329,20 +4364,13 @@ static void tracing_set_nop(struct trace_array *tr) tr->current_trace = &nop_trace; } -static void update_tracer_options(struct trace_array *tr, struct tracer *t) +static void add_tracer_options(struct trace_array *tr, struct tracer *t) { - static struct trace_option_dentry *topts; - /* Only enable if the directory has been created already. */ if (!tr->dir) return; - /* Currently, only the top instance has options */ - if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL)) - return; - - destroy_trace_option_files(topts); - topts = create_trace_option_files(tr, t); + create_trace_option_files(tr, t); } static int tracing_set_tracer(struct trace_array *tr, const char *buf) @@ -4411,7 +4439,6 @@ static int tracing_set_tracer(struct trace_array *tr, const char *buf) free_snapshot(tr); } #endif - update_tracer_options(tr, t); #ifdef CONFIG_TRACER_MAX_TRACE if (t->use_max_tr && !had_max_tr) { @@ -4531,6 +4558,8 @@ out: return ret; } +#ifdef CONFIG_TRACER_MAX_TRACE + static ssize_t tracing_max_lat_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) @@ -4545,6 +4574,8 @@ tracing_max_lat_write(struct file *filp, const char __user *ubuf, return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos); } +#endif + static int tracing_open_pipe(struct inode *inode, struct file *filp) { struct trace_array *tr = inode->i_private; @@ -4578,7 +4609,7 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) /* trace pipe does not show start of buffer */ cpumask_setall(iter->started); - if (trace_flags & TRACE_ITER_LATENCY_FMT) + if (tr->trace_flags & TRACE_ITER_LATENCY_FMT) iter->iter_flags |= TRACE_FILE_LAT_FMT; /* Output in nanoseconds only if we are using a clock in nanoseconds. */ @@ -4635,11 +4666,13 @@ static int tracing_release_pipe(struct inode *inode, struct file *file) static unsigned int trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table) { + struct trace_array *tr = iter->tr; + /* Iterators are static, they should be filled or empty */ if (trace_buffer_iter(iter, iter->cpu_file)) return POLLIN | POLLRDNORM; - if (trace_flags & TRACE_ITER_BLOCK) + if (tr->trace_flags & TRACE_ITER_BLOCK) /* * Always select as readable when in blocking mode */ @@ -5056,7 +5089,7 @@ tracing_free_buffer_release(struct inode *inode, struct file *filp) struct trace_array *tr = inode->i_private; /* disable tracing ? */ - if (trace_flags & TRACE_ITER_STOP_ON_FREE) + if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE) tracer_tracing_off(tr); /* resize the ring buffer to 0 */ tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS); @@ -5089,7 +5122,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, if (tracing_disabled) return -EINVAL; - if (!(trace_flags & TRACE_ITER_MARKERS)) + if (!(tr->trace_flags & TRACE_ITER_MARKERS)) return -EINVAL; if (cnt > TRACE_BUF_SIZE) @@ -5444,12 +5477,14 @@ static const struct file_operations tracing_thresh_fops = { .llseek = generic_file_llseek, }; +#ifdef CONFIG_TRACER_MAX_TRACE static const struct file_operations tracing_max_lat_fops = { .open = tracing_open_generic, .read = tracing_max_lat_read, .write = tracing_max_lat_write, .llseek = generic_file_llseek, }; +#endif static const struct file_operations set_tracer_fops = { .open = tracing_open_generic, @@ -6141,13 +6176,6 @@ tracing_init_tracefs_percpu(struct trace_array *tr, long cpu) #include "trace_selftest.c" #endif -struct trace_option_dentry { - struct tracer_opt *opt; - struct tracer_flags *flags; - struct trace_array *tr; - struct dentry *entry; -}; - static ssize_t trace_options_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) @@ -6200,14 +6228,51 @@ static const struct file_operations trace_options_fops = { .llseek = generic_file_llseek, }; +/* + * In order to pass in both the trace_array descriptor as well as the index + * to the flag that the trace option file represents, the trace_array + * has a character array of trace_flags_index[], which holds the index + * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc. + * The address of this character array is passed to the flag option file + * read/write callbacks. + * + * In order to extract both the index and the trace_array descriptor, + * get_tr_index() uses the following algorithm. + * + * idx = *ptr; + * + * As the pointer itself contains the address of the index (remember + * index[1] == 1). + * + * Then to get the trace_array descriptor, by subtracting that index + * from the ptr, we get to the start of the index itself. + * + * ptr - idx == &index[0] + * + * Then a simple container_of() from that pointer gets us to the + * trace_array descriptor. + */ +static void get_tr_index(void *data, struct trace_array **ptr, + unsigned int *pindex) +{ + *pindex = *(unsigned char *)data; + + *ptr = container_of(data - *pindex, struct trace_array, + trace_flags_index); +} + static ssize_t trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { - long index = (long)filp->private_data; + void *tr_index = filp->private_data; + struct trace_array *tr; + unsigned int index; char *buf; - if (trace_flags & (1 << index)) + get_tr_index(tr_index, &tr, &index); + + if (tr->trace_flags & (1 << index)) buf = "1\n"; else buf = "0\n"; @@ -6219,11 +6284,14 @@ static ssize_t trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { - struct trace_array *tr = &global_trace; - long index = (long)filp->private_data; + void *tr_index = filp->private_data; + struct trace_array *tr; + unsigned int index; unsigned long val; int ret; + get_tr_index(tr_index, &tr, &index); + ret = kstrtoul_from_user(ubuf, cnt, 10, &val); if (ret) return ret; @@ -6307,21 +6375,39 @@ create_trace_option_file(struct trace_array *tr, } -static struct trace_option_dentry * +static void create_trace_option_files(struct trace_array *tr, struct tracer *tracer) { struct trace_option_dentry *topts; + struct trace_options *tr_topts; struct tracer_flags *flags; struct tracer_opt *opts; int cnt; + int i; if (!tracer) - return NULL; + return; flags = tracer->flags; if (!flags || !flags->opts) - return NULL; + return; + + /* + * If this is an instance, only create flags for tracers + * the instance may have. + */ + if (!trace_ok_for_array(tracer, tr)) + return; + + for (i = 0; i < tr->nr_topts; i++) { + /* + * Check if these flags have already been added. + * Some tracers share flags. + */ + if (tr->topts[i].tracer->flags == tracer->flags) + return; + } opts = flags->opts; @@ -6330,27 +6416,27 @@ create_trace_option_files(struct trace_array *tr, struct tracer *tracer) topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL); if (!topts) - return NULL; - - for (cnt = 0; opts[cnt].name; cnt++) - create_trace_option_file(tr, &topts[cnt], flags, - &opts[cnt]); - - return topts; -} - -static void -destroy_trace_option_files(struct trace_option_dentry *topts) -{ - int cnt; + return; - if (!topts) + tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1), + GFP_KERNEL); + if (!tr_topts) { + kfree(topts); return; + } - for (cnt = 0; topts[cnt].opt; cnt++) - tracefs_remove(topts[cnt].entry); + tr->topts = tr_topts; + tr->topts[tr->nr_topts].tracer = tracer; + tr->topts[tr->nr_topts].topts = topts; + tr->nr_topts++; - kfree(topts); + for (cnt = 0; opts[cnt].name; cnt++) { + create_trace_option_file(tr, &topts[cnt], flags, + &opts[cnt]); + WARN_ONCE(topts[cnt].entry == NULL, + "Failed to create trace option: %s", + opts[cnt].name); + } } static struct dentry * @@ -6363,21 +6449,26 @@ create_trace_option_core_file(struct trace_array *tr, if (!t_options) return NULL; - return trace_create_file(option, 0644, t_options, (void *)index, - &trace_options_core_fops); + return trace_create_file(option, 0644, t_options, + (void *)&tr->trace_flags_index[index], + &trace_options_core_fops); } -static __init void create_trace_options_dir(struct trace_array *tr) +static void create_trace_options_dir(struct trace_array *tr) { struct dentry *t_options; + bool top_level = tr == &global_trace; int i; t_options = trace_options_init_dentry(tr); if (!t_options) return; - for (i = 0; trace_options[i]; i++) - create_trace_option_core_file(tr, trace_options[i], i); + for (i = 0; trace_options[i]; i++) { + if (top_level || + !((1 << i) & TOP_LEVEL_TRACE_FLAGS)) + create_trace_option_core_file(tr, trace_options[i], i); + } } static ssize_t @@ -6444,7 +6535,7 @@ allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size { enum ring_buffer_flags rb_flags; - rb_flags = trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0; + rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0; buf->tr = tr; @@ -6514,6 +6605,30 @@ static void free_trace_buffers(struct trace_array *tr) #endif } +static void init_trace_flags_index(struct trace_array *tr) +{ + int i; + + /* Used by the trace options files */ + for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) + tr->trace_flags_index[i] = i; +} + +static void __update_tracer_options(struct trace_array *tr) +{ + struct tracer *t; + + for (t = trace_types; t; t = t->next) + add_tracer_options(tr, t); +} + +static void update_tracer_options(struct trace_array *tr) +{ + mutex_lock(&trace_types_lock); + __update_tracer_options(tr); + mutex_unlock(&trace_types_lock); +} + static int instance_mkdir(const char *name) { struct trace_array *tr; @@ -6539,6 +6654,8 @@ static int instance_mkdir(const char *name) if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL)) goto out_free_tr; + tr->trace_flags = global_trace.trace_flags; + cpumask_copy(tr->tracing_cpumask, cpu_all_mask); raw_spin_lock_init(&tr->start_lock); @@ -6564,6 +6681,8 @@ static int instance_mkdir(const char *name) } init_tracer_tracefs(tr, tr->dir); + init_trace_flags_index(tr); + __update_tracer_options(tr); list_add(&tr->list, &ftrace_trace_arrays); @@ -6589,6 +6708,7 @@ static int instance_rmdir(const char *name) struct trace_array *tr; int found = 0; int ret; + int i; mutex_lock(&trace_types_lock); @@ -6611,9 +6731,14 @@ static int instance_rmdir(const char *name) tracing_set_nop(tr); event_trace_del_tracer(tr); ftrace_destroy_function_files(tr); - debugfs_remove_recursive(tr->dir); + tracefs_remove_recursive(tr->dir); free_trace_buffers(tr); + for (i = 0; i < tr->nr_topts; i++) { + kfree(tr->topts[i].topts); + } + kfree(tr->topts); + kfree(tr->name); kfree(tr); @@ -6675,6 +6800,8 @@ init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer) trace_create_file("tracing_on", 0644, d_tracer, tr, &rb_simple_fops); + create_trace_options_dir(tr); + #ifdef CONFIG_TRACER_MAX_TRACE trace_create_file("tracing_max_latency", 0644, d_tracer, &tr->max_latency, &tracing_max_lat_fops); @@ -6730,7 +6857,9 @@ struct dentry *tracing_init_dentry(void) if (tr->dir) return NULL; - if (WARN_ON(!debugfs_initialized())) + if (WARN_ON(!tracefs_initialized()) || + (IS_ENABLED(CONFIG_DEBUG_FS) && + WARN_ON(!debugfs_initialized()))) return ERR_PTR(-ENODEV); /* @@ -6870,11 +6999,7 @@ static __init int tracer_init_tracefs(void) create_trace_instances(d_tracer); - create_trace_options_dir(&global_trace); - - /* If the tracer was started via cmdline, create options for it here */ - if (global_trace.current_trace != &nop_trace) - update_tracer_options(&global_trace, global_trace.current_trace); + update_tracer_options(&global_trace); return 0; } @@ -6973,6 +7098,7 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) /* use static because iter can be a bit big for the stack */ static struct trace_iterator iter; static atomic_t dump_running; + struct trace_array *tr = &global_trace; unsigned int old_userobj; unsigned long flags; int cnt = 0, cpu; @@ -6999,13 +7125,13 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) trace_init_global_iter(&iter); for_each_tracing_cpu(cpu) { - atomic_inc(&per_cpu_ptr(iter.tr->trace_buffer.data, cpu)->disabled); + atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled); } - old_userobj = trace_flags & TRACE_ITER_SYM_USEROBJ; + old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ; /* don't look at user memory in panic mode */ - trace_flags &= ~TRACE_ITER_SYM_USEROBJ; + tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ; switch (oops_dump_mode) { case DUMP_ALL: @@ -7068,7 +7194,7 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) printk(KERN_TRACE "---------------------------------\n"); out_enable: - trace_flags |= old_userobj; + tr->trace_flags |= old_userobj; for_each_tracing_cpu(cpu) { atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled); @@ -7083,6 +7209,12 @@ __init static int tracer_alloc_buffers(void) int ring_buf_size; int ret = -ENOMEM; + /* + * Make sure we don't accidently add more trace options + * than we have bits for. + */ + BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE); + if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL)) goto out; @@ -7141,6 +7273,8 @@ __init static int tracer_alloc_buffers(void) ftrace_init_global_array_ops(&global_trace); + init_trace_flags_index(&global_trace); + register_tracer(&nop_trace); /* All seems OK, enable tracing */ @@ -7157,12 +7291,7 @@ __init static int tracer_alloc_buffers(void) INIT_LIST_HEAD(&global_trace.events); list_add(&global_trace.list, &ftrace_trace_arrays); - while (trace_boot_options) { - char *option; - - option = strsep(&trace_boot_options, ","); - trace_set_options(&global_trace, option); - } + apply_trace_boot_options(); register_snapshot_cmd(); diff --git a/kernel/kernel/trace/trace.h b/kernel/kernel/trace/trace.h index c0f3c568c..3bf86ece6 100644 --- a/kernel/kernel/trace/trace.h +++ b/kernel/kernel/trace/trace.h @@ -12,7 +12,7 @@ #include <linux/ftrace.h> #include <linux/hw_breakpoint.h> #include <linux/trace_seq.h> -#include <linux/ftrace_event.h> +#include <linux/trace_events.h> #include <linux/compiler.h> #include <linux/trace_seq.h> @@ -71,9 +71,6 @@ enum trace_type { tstruct \ } -#undef TP_ARGS -#define TP_ARGS(args...) args - #undef FTRACE_ENTRY_DUP #define FTRACE_ENTRY_DUP(name, name_struct, id, tstruct, printk, filter) @@ -158,9 +155,12 @@ struct trace_array_cpu { pid_t pid; kuid_t uid; char comm[TASK_COMM_LEN]; + + bool ignore_pid; }; struct tracer; +struct trace_option_dentry; struct trace_buffer { struct trace_array *tr; @@ -170,6 +170,19 @@ struct trace_buffer { int cpu; }; +#define TRACE_FLAGS_MAX_SIZE 32 + +struct trace_options { + struct tracer *tracer; + struct trace_option_dentry *topts; +}; + +struct trace_pid_list { + unsigned int nr_pids; + int order; + pid_t *pids; +}; + /* * The trace array - an array of per-CPU trace arrays. This is the * highest level data structure that individual tracers deal with. @@ -195,6 +208,7 @@ struct trace_array { bool allocated_snapshot; unsigned long max_latency; #endif + struct trace_pid_list __rcu *filtered_pids; /* * max_lock is used to protect the swapping of buffers * when taking a max snapshot. The buffers themselves are @@ -213,18 +227,22 @@ struct trace_array { #ifdef CONFIG_FTRACE_SYSCALLS int sys_refcount_enter; int sys_refcount_exit; - struct ftrace_event_file __rcu *enter_syscall_files[NR_syscalls]; - struct ftrace_event_file __rcu *exit_syscall_files[NR_syscalls]; + struct trace_event_file __rcu *enter_syscall_files[NR_syscalls]; + struct trace_event_file __rcu *exit_syscall_files[NR_syscalls]; #endif int stop_count; int clock_id; + int nr_topts; struct tracer *current_trace; + unsigned int trace_flags; + unsigned char trace_flags_index[TRACE_FLAGS_MAX_SIZE]; unsigned int flags; raw_spinlock_t start_lock; struct dentry *dir; struct dentry *options; struct dentry *percpu_dir; struct dentry *event_dir; + struct trace_options *topts; struct list_head systems; struct list_head events; cpumask_var_t tracing_cpumask; /* only trace on set CPUs */ @@ -335,6 +353,13 @@ struct tracer_flags { #define TRACER_OPT(s, b) .name = #s, .bit = b +struct trace_option_dentry { + struct tracer_opt *opt; + struct tracer_flags *flags; + struct trace_array *tr; + struct dentry *entry; +}; + /** * struct tracer - a specific tracer and its callbacks to interact with tracefs * @name: the name chosen to select it on the available_tracers file @@ -613,29 +638,12 @@ void update_max_tr_single(struct trace_array *tr, #endif /* CONFIG_TRACER_MAX_TRACE */ #ifdef CONFIG_STACKTRACE -void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags, - int skip, int pc); - -void ftrace_trace_stack_regs(struct ring_buffer *buffer, unsigned long flags, - int skip, int pc, struct pt_regs *regs); - void ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc); void __trace_stack(struct trace_array *tr, unsigned long flags, int skip, int pc); #else -static inline void ftrace_trace_stack(struct ring_buffer *buffer, - unsigned long flags, int skip, int pc) -{ -} - -static inline void ftrace_trace_stack_regs(struct ring_buffer *buffer, - unsigned long flags, int skip, - int pc, struct pt_regs *regs) -{ -} - static inline void ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc) { @@ -661,7 +669,6 @@ extern int DYN_FTRACE_TEST_NAME2(void); extern bool ring_buffer_expanded; extern bool tracing_selftest_disabled; -DECLARE_PER_CPU(int, ftrace_cpu_disabled); #ifdef CONFIG_FTRACE_STARTUP_TEST extern int trace_selftest_startup_function(struct tracer *trace, @@ -709,8 +716,6 @@ int trace_array_printk_buf(struct ring_buffer *buffer, void trace_printk_seq(struct trace_seq *s); enum print_line_t print_trace_line(struct trace_iterator *iter); -extern unsigned long trace_flags; - extern char trace_find_mark(unsigned long long duration); /* Standard output formatting function used for function return traces */ @@ -725,9 +730,14 @@ extern char trace_find_mark(unsigned long long duration); #define TRACE_GRAPH_PRINT_ABS_TIME 0x20 #define TRACE_GRAPH_PRINT_IRQS 0x40 #define TRACE_GRAPH_PRINT_TAIL 0x80 +#define TRACE_GRAPH_SLEEP_TIME 0x100 +#define TRACE_GRAPH_GRAPH_TIME 0x200 #define TRACE_GRAPH_PRINT_FILL_SHIFT 28 #define TRACE_GRAPH_PRINT_FILL_MASK (0x3 << TRACE_GRAPH_PRINT_FILL_SHIFT) +extern void ftrace_graph_sleep_time_control(bool enable); +extern void ftrace_graph_graph_time_control(bool enable); + extern enum print_line_t print_graph_function_flags(struct trace_iterator *iter, u32 flags); extern void print_graph_headers_flags(struct seq_file *s, u32 flags); @@ -861,7 +871,7 @@ void ftrace_destroy_filter_files(struct ftrace_ops *ops); #define ftrace_destroy_filter_files(ops) do { } while (0) #endif /* CONFIG_FUNCTION_TRACER && CONFIG_DYNAMIC_FTRACE */ -int ftrace_event_is_function(struct ftrace_event_call *call); +bool ftrace_event_is_function(struct trace_event_call *call); /* * struct trace_parser - servers for reading the user input separated by spaces @@ -899,42 +909,94 @@ extern int trace_get_user(struct trace_parser *parser, const char __user *ubuf, size_t cnt, loff_t *ppos); /* + * Only create function graph options if function graph is configured. + */ +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +# define FGRAPH_FLAGS \ + C(DISPLAY_GRAPH, "display-graph"), +#else +# define FGRAPH_FLAGS +#endif + +#ifdef CONFIG_BRANCH_TRACER +# define BRANCH_FLAGS \ + C(BRANCH, "branch"), +#else +# define BRANCH_FLAGS +#endif + +#ifdef CONFIG_FUNCTION_TRACER +# define FUNCTION_FLAGS \ + C(FUNCTION, "function-trace"), +# define FUNCTION_DEFAULT_FLAGS TRACE_ITER_FUNCTION +#else +# define FUNCTION_FLAGS +# define FUNCTION_DEFAULT_FLAGS 0UL +#endif + +#ifdef CONFIG_STACKTRACE +# define STACK_FLAGS \ + C(STACKTRACE, "stacktrace"), +#else +# define STACK_FLAGS +#endif + +/* * trace_iterator_flags is an enumeration that defines bit * positions into trace_flags that controls the output. * * NOTE: These bits must match the trace_options array in - * trace.c. + * trace.c (this macro guarantees it). + */ +#define TRACE_FLAGS \ + C(PRINT_PARENT, "print-parent"), \ + C(SYM_OFFSET, "sym-offset"), \ + C(SYM_ADDR, "sym-addr"), \ + C(VERBOSE, "verbose"), \ + C(RAW, "raw"), \ + C(HEX, "hex"), \ + C(BIN, "bin"), \ + C(BLOCK, "block"), \ + C(PRINTK, "trace_printk"), \ + C(ANNOTATE, "annotate"), \ + C(USERSTACKTRACE, "userstacktrace"), \ + C(SYM_USEROBJ, "sym-userobj"), \ + C(PRINTK_MSGONLY, "printk-msg-only"), \ + C(CONTEXT_INFO, "context-info"), /* Print pid/cpu/time */ \ + C(LATENCY_FMT, "latency-format"), \ + C(RECORD_CMD, "record-cmd"), \ + C(OVERWRITE, "overwrite"), \ + C(STOP_ON_FREE, "disable_on_free"), \ + C(IRQ_INFO, "irq-info"), \ + C(MARKERS, "markers"), \ + FUNCTION_FLAGS \ + FGRAPH_FLAGS \ + STACK_FLAGS \ + BRANCH_FLAGS + +/* + * By defining C, we can make TRACE_FLAGS a list of bit names + * that will define the bits for the flag masks. */ -enum trace_iterator_flags { - TRACE_ITER_PRINT_PARENT = 0x01, - TRACE_ITER_SYM_OFFSET = 0x02, - TRACE_ITER_SYM_ADDR = 0x04, - TRACE_ITER_VERBOSE = 0x08, - TRACE_ITER_RAW = 0x10, - TRACE_ITER_HEX = 0x20, - TRACE_ITER_BIN = 0x40, - TRACE_ITER_BLOCK = 0x80, - TRACE_ITER_STACKTRACE = 0x100, - TRACE_ITER_PRINTK = 0x200, - TRACE_ITER_PREEMPTONLY = 0x400, - TRACE_ITER_BRANCH = 0x800, - TRACE_ITER_ANNOTATE = 0x1000, - TRACE_ITER_USERSTACKTRACE = 0x2000, - TRACE_ITER_SYM_USEROBJ = 0x4000, - TRACE_ITER_PRINTK_MSGONLY = 0x8000, - TRACE_ITER_CONTEXT_INFO = 0x10000, /* Print pid/cpu/time */ - TRACE_ITER_LATENCY_FMT = 0x20000, - TRACE_ITER_SLEEP_TIME = 0x40000, - TRACE_ITER_GRAPH_TIME = 0x80000, - TRACE_ITER_RECORD_CMD = 0x100000, - TRACE_ITER_OVERWRITE = 0x200000, - TRACE_ITER_STOP_ON_FREE = 0x400000, - TRACE_ITER_IRQ_INFO = 0x800000, - TRACE_ITER_MARKERS = 0x1000000, - TRACE_ITER_FUNCTION = 0x2000000, +#undef C +#define C(a, b) TRACE_ITER_##a##_BIT + +enum trace_iterator_bits { + TRACE_FLAGS + /* Make sure we don't go more than we have bits for */ + TRACE_ITER_LAST_BIT }; /* + * By redefining C, we can make TRACE_FLAGS a list of masks that + * use the bits as defined above. + */ +#undef C +#define C(a, b) TRACE_ITER_##a = (1 << TRACE_ITER_##a##_BIT) + +enum trace_iterator_flags { TRACE_FLAGS }; + +/* * TRACE_ITER_SYM_MASK masks the options in trace_flags that * control the output of kernel symbols. */ @@ -948,7 +1010,7 @@ extern int enable_branch_tracing(struct trace_array *tr); extern void disable_branch_tracing(void); static inline int trace_branch_enable(struct trace_array *tr) { - if (trace_flags & TRACE_ITER_BRANCH) + if (tr->trace_flags & TRACE_ITER_BRANCH) return enable_branch_tracing(tr); return 0; } @@ -995,7 +1057,7 @@ struct event_subsystem { int ref_count; }; -struct ftrace_subsystem_dir { +struct trace_subsystem_dir { struct list_head list; struct event_subsystem *subsystem; struct trace_array *tr; @@ -1055,30 +1117,30 @@ struct filter_pred { extern enum regex_type filter_parse_regex(char *buff, int len, char **search, int *not); -extern void print_event_filter(struct ftrace_event_file *file, +extern void print_event_filter(struct trace_event_file *file, struct trace_seq *s); -extern int apply_event_filter(struct ftrace_event_file *file, +extern int apply_event_filter(struct trace_event_file *file, char *filter_string); -extern int apply_subsystem_event_filter(struct ftrace_subsystem_dir *dir, +extern int apply_subsystem_event_filter(struct trace_subsystem_dir *dir, char *filter_string); extern void print_subsystem_event_filter(struct event_subsystem *system, struct trace_seq *s); extern int filter_assign_type(const char *type); -extern int create_event_filter(struct ftrace_event_call *call, +extern int create_event_filter(struct trace_event_call *call, char *filter_str, bool set_str, struct event_filter **filterp); extern void free_event_filter(struct event_filter *filter); struct ftrace_event_field * -trace_find_event_field(struct ftrace_event_call *call, char *name); +trace_find_event_field(struct trace_event_call *call, char *name); extern void trace_event_enable_cmd_record(bool enable); extern int event_trace_add_tracer(struct dentry *parent, struct trace_array *tr); extern int event_trace_del_tracer(struct trace_array *tr); -extern struct ftrace_event_file *find_event_file(struct trace_array *tr, - const char *system, - const char *event); +extern struct trace_event_file *find_event_file(struct trace_array *tr, + const char *system, + const char *event); static inline void *event_file_data(struct file *filp) { @@ -1183,7 +1245,7 @@ struct event_trigger_ops { * commands need to do this if they themselves log to the trace * buffer (see the @post_trigger() member below). @trigger_type * values are defined by adding new values to the trigger_type - * enum in include/linux/ftrace_event.h. + * enum in include/linux/trace_events.h. * * @post_trigger: A flag that says whether or not this command needs * to have its action delayed until after the current event has @@ -1245,23 +1307,23 @@ struct event_command { enum event_trigger_type trigger_type; bool post_trigger; int (*func)(struct event_command *cmd_ops, - struct ftrace_event_file *file, + struct trace_event_file *file, char *glob, char *cmd, char *params); int (*reg)(char *glob, struct event_trigger_ops *ops, struct event_trigger_data *data, - struct ftrace_event_file *file); + struct trace_event_file *file); void (*unreg)(char *glob, struct event_trigger_ops *ops, struct event_trigger_data *data, - struct ftrace_event_file *file); + struct trace_event_file *file); int (*set_filter)(char *filter_str, struct event_trigger_data *data, - struct ftrace_event_file *file); + struct trace_event_file *file); struct event_trigger_ops *(*get_trigger_ops)(char *cmd, char *param); }; -extern int trace_event_enable_disable(struct ftrace_event_file *file, +extern int trace_event_enable_disable(struct trace_event_file *file, int enable, int soft_disable); extern int tracing_alloc_snapshot(void); @@ -1271,6 +1333,7 @@ extern const char *__stop___trace_bprintk_fmt[]; extern const char *__start___tracepoint_str[]; extern const char *__stop___tracepoint_str[]; +void trace_printk_control(bool enabled); void trace_printk_init_buffers(void); void trace_printk_start_comm(void); int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set); @@ -1289,7 +1352,7 @@ int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled); #undef FTRACE_ENTRY #define FTRACE_ENTRY(call, struct_name, id, tstruct, print, filter) \ - extern struct ftrace_event_call \ + extern struct trace_event_call \ __aligned(4) event_##call; #undef FTRACE_ENTRY_DUP #define FTRACE_ENTRY_DUP(call, struct_name, id, tstruct, print, filter) \ @@ -1298,7 +1361,7 @@ int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled); #include "trace_entries.h" #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_FUNCTION_TRACER) -int perf_ftrace_event_register(struct ftrace_event_call *call, +int perf_ftrace_event_register(struct trace_event_call *call, enum trace_reg type, void *data); #else #define perf_ftrace_event_register NULL diff --git a/kernel/kernel/trace/trace_benchmark.c b/kernel/kernel/trace/trace_benchmark.c index 40a14cbcf..0f109c413 100644 --- a/kernel/kernel/trace/trace_benchmark.c +++ b/kernel/kernel/trace/trace_benchmark.c @@ -43,7 +43,7 @@ static void trace_do_benchmark(void) unsigned int std = 0; /* Only run if the tracepoint is actually active */ - if (!trace_benchmark_event_enabled()) + if (!trace_benchmark_event_enabled() || !tracing_is_on()) return; local_irq_disable(); diff --git a/kernel/kernel/trace/trace_branch.c b/kernel/kernel/trace/trace_branch.c index 1879980f0..3a2a73716 100644 --- a/kernel/kernel/trace/trace_branch.c +++ b/kernel/kernel/trace/trace_branch.c @@ -29,7 +29,7 @@ static struct trace_array *branch_tracer; static void probe_likely_condition(struct ftrace_branch_data *f, int val, int expect) { - struct ftrace_event_call *call = &event_branch; + struct trace_event_call *call = &event_branch; struct trace_array *tr = branch_tracer; struct trace_array_cpu *data; struct ring_buffer_event *event; @@ -125,25 +125,14 @@ void disable_branch_tracing(void) mutex_unlock(&branch_tracing_mutex); } -static void start_branch_trace(struct trace_array *tr) -{ - enable_branch_tracing(tr); -} - -static void stop_branch_trace(struct trace_array *tr) -{ - disable_branch_tracing(); -} - static int branch_trace_init(struct trace_array *tr) { - start_branch_trace(tr); - return 0; + return enable_branch_tracing(tr); } static void branch_trace_reset(struct trace_array *tr) { - stop_branch_trace(tr); + disable_branch_tracing(); } static enum print_line_t trace_branch_print(struct trace_iterator *iter, @@ -194,7 +183,7 @@ __init static int init_branch_tracer(void) { int ret; - ret = register_ftrace_event(&trace_branch_event); + ret = register_trace_event(&trace_branch_event); if (!ret) { printk(KERN_WARNING "Warning: could not register " "branch events\n"); diff --git a/kernel/kernel/trace/trace_clock.c b/kernel/kernel/trace/trace_clock.c index 57b67b1f2..0f06532a7 100644 --- a/kernel/kernel/trace/trace_clock.c +++ b/kernel/kernel/trace/trace_clock.c @@ -56,6 +56,7 @@ u64 notrace trace_clock(void) { return local_clock(); } +EXPORT_SYMBOL_GPL(trace_clock); /* * trace_jiffy_clock(): Simply use jiffies as a clock counter. @@ -68,6 +69,7 @@ u64 notrace trace_clock_jiffies(void) { return jiffies_64_to_clock_t(jiffies_64 - INITIAL_JIFFIES); } +EXPORT_SYMBOL_GPL(trace_clock_jiffies); /* * trace_clock_global(): special globally coherent trace clock @@ -123,6 +125,7 @@ u64 notrace trace_clock_global(void) return now; } +EXPORT_SYMBOL_GPL(trace_clock_global); static atomic64_t trace_counter; diff --git a/kernel/kernel/trace/trace_event_perf.c b/kernel/kernel/trace/trace_event_perf.c index 6fa484de2..cc9f7a931 100644 --- a/kernel/kernel/trace/trace_event_perf.c +++ b/kernel/kernel/trace/trace_event_perf.c @@ -1,7 +1,7 @@ /* * trace event based perf event profiling/tracing * - * Copyright (C) 2009 Red Hat Inc, Peter Zijlstra <pzijlstr@redhat.com> + * Copyright (C) 2009 Red Hat Inc, Peter Zijlstra * Copyright (C) 2009-2010 Frederic Weisbecker <fweisbec@gmail.com> */ @@ -21,7 +21,7 @@ typedef typeof(unsigned long [PERF_MAX_TRACE_SIZE / sizeof(unsigned long)]) /* Count the events in use (per event id, not per instance) */ static int total_ref_count; -static int perf_trace_event_perm(struct ftrace_event_call *tp_event, +static int perf_trace_event_perm(struct trace_event_call *tp_event, struct perf_event *p_event) { if (tp_event->perf_perm) { @@ -83,7 +83,7 @@ static int perf_trace_event_perm(struct ftrace_event_call *tp_event, return 0; } -static int perf_trace_event_reg(struct ftrace_event_call *tp_event, +static int perf_trace_event_reg(struct trace_event_call *tp_event, struct perf_event *p_event) { struct hlist_head __percpu *list; @@ -143,7 +143,7 @@ fail: static void perf_trace_event_unreg(struct perf_event *p_event) { - struct ftrace_event_call *tp_event = p_event->tp_event; + struct trace_event_call *tp_event = p_event->tp_event; int i; if (--tp_event->perf_refcount > 0) @@ -172,17 +172,17 @@ out: static int perf_trace_event_open(struct perf_event *p_event) { - struct ftrace_event_call *tp_event = p_event->tp_event; + struct trace_event_call *tp_event = p_event->tp_event; return tp_event->class->reg(tp_event, TRACE_REG_PERF_OPEN, p_event); } static void perf_trace_event_close(struct perf_event *p_event) { - struct ftrace_event_call *tp_event = p_event->tp_event; + struct trace_event_call *tp_event = p_event->tp_event; tp_event->class->reg(tp_event, TRACE_REG_PERF_CLOSE, p_event); } -static int perf_trace_event_init(struct ftrace_event_call *tp_event, +static int perf_trace_event_init(struct trace_event_call *tp_event, struct perf_event *p_event) { int ret; @@ -206,7 +206,7 @@ static int perf_trace_event_init(struct ftrace_event_call *tp_event, int perf_trace_init(struct perf_event *p_event) { - struct ftrace_event_call *tp_event; + struct trace_event_call *tp_event; u64 event_id = p_event->attr.config; int ret = -EINVAL; @@ -236,7 +236,7 @@ void perf_trace_destroy(struct perf_event *p_event) int perf_trace_add(struct perf_event *p_event, int flags) { - struct ftrace_event_call *tp_event = p_event->tp_event; + struct trace_event_call *tp_event = p_event->tp_event; struct hlist_head __percpu *pcpu_list; struct hlist_head *list; @@ -255,7 +255,7 @@ int perf_trace_add(struct perf_event *p_event, int flags) void perf_trace_del(struct perf_event *p_event, int flags) { - struct ftrace_event_call *tp_event = p_event->tp_event; + struct trace_event_call *tp_event = p_event->tp_event; hlist_del_rcu(&p_event->hlist_entry); tp_event->class->reg(tp_event, TRACE_REG_PERF_DEL, p_event); } @@ -357,7 +357,7 @@ static void perf_ftrace_function_disable(struct perf_event *event) ftrace_function_local_disable(&event->ftrace_ops); } -int perf_ftrace_event_register(struct ftrace_event_call *call, +int perf_ftrace_event_register(struct trace_event_call *call, enum trace_reg type, void *data) { switch (type) { diff --git a/kernel/kernel/trace/trace_events.c b/kernel/kernel/trace/trace_events.c index 711529bba..0e508e99b 100644 --- a/kernel/kernel/trace/trace_events.c +++ b/kernel/kernel/trace/trace_events.c @@ -15,11 +15,15 @@ #include <linux/kthread.h> #include <linux/tracefs.h> #include <linux/uaccess.h> +#include <linux/bsearch.h> #include <linux/module.h> #include <linux/ctype.h> +#include <linux/sort.h> #include <linux/slab.h> #include <linux/delay.h> +#include <trace/events/sched.h> + #include <asm/setup.h> #include "trace_output.h" @@ -30,6 +34,7 @@ DEFINE_MUTEX(event_mutex); LIST_HEAD(ftrace_events); +static LIST_HEAD(ftrace_generic_fields); static LIST_HEAD(ftrace_common_fields); #define GFP_TRACE (GFP_KERNEL | __GFP_ZERO) @@ -37,21 +42,19 @@ static LIST_HEAD(ftrace_common_fields); static struct kmem_cache *field_cachep; static struct kmem_cache *file_cachep; -#define SYSTEM_FL_FREE_NAME (1 << 31) - static inline int system_refcount(struct event_subsystem *system) { - return system->ref_count & ~SYSTEM_FL_FREE_NAME; + return system->ref_count; } static int system_refcount_inc(struct event_subsystem *system) { - return (system->ref_count++) & ~SYSTEM_FL_FREE_NAME; + return system->ref_count++; } static int system_refcount_dec(struct event_subsystem *system) { - return (--system->ref_count) & ~SYSTEM_FL_FREE_NAME; + return --system->ref_count; } /* Double loops, do not use break, only goto's work */ @@ -61,14 +64,14 @@ static int system_refcount_dec(struct event_subsystem *system) #define do_for_each_event_file_safe(tr, file) \ list_for_each_entry(tr, &ftrace_trace_arrays, list) { \ - struct ftrace_event_file *___n; \ + struct trace_event_file *___n; \ list_for_each_entry_safe(file, ___n, &tr->events, list) #define while_for_each_event_file() \ } static struct list_head * -trace_get_fields(struct ftrace_event_call *event_call) +trace_get_fields(struct trace_event_call *event_call) { if (!event_call->class->get_fields) return &event_call->class->fields; @@ -89,17 +92,21 @@ __find_event_field(struct list_head *head, char *name) } struct ftrace_event_field * -trace_find_event_field(struct ftrace_event_call *call, char *name) +trace_find_event_field(struct trace_event_call *call, char *name) { struct ftrace_event_field *field; struct list_head *head; - field = __find_event_field(&ftrace_common_fields, name); + head = trace_get_fields(call); + field = __find_event_field(head, name); if (field) return field; - head = trace_get_fields(call); - return __find_event_field(head, name); + field = __find_event_field(&ftrace_generic_fields, name); + if (field) + return field; + + return __find_event_field(&ftrace_common_fields, name); } static int __trace_define_field(struct list_head *head, const char *type, @@ -129,7 +136,7 @@ static int __trace_define_field(struct list_head *head, const char *type, return 0; } -int trace_define_field(struct ftrace_event_call *call, const char *type, +int trace_define_field(struct trace_event_call *call, const char *type, const char *name, int offset, int size, int is_signed, int filter_type) { @@ -144,6 +151,13 @@ int trace_define_field(struct ftrace_event_call *call, const char *type, } EXPORT_SYMBOL_GPL(trace_define_field); +#define __generic_field(type, item, filter_type) \ + ret = __trace_define_field(&ftrace_generic_fields, #type, \ + #item, 0, 0, is_signed_type(type), \ + filter_type); \ + if (ret) \ + return ret; + #define __common_field(type, item) \ ret = __trace_define_field(&ftrace_common_fields, #type, \ "common_" #item, \ @@ -153,6 +167,18 @@ EXPORT_SYMBOL_GPL(trace_define_field); if (ret) \ return ret; +static int trace_define_generic_fields(void) +{ + int ret; + + __generic_field(int, CPU, FILTER_CPU); + __generic_field(int, cpu, FILTER_CPU); + __generic_field(char *, COMM, FILTER_COMM); + __generic_field(char *, comm, FILTER_COMM); + + return ret; +} + static int trace_define_common_fields(void) { int ret; @@ -168,7 +194,7 @@ static int trace_define_common_fields(void) return ret; } -static void trace_destroy_fields(struct ftrace_event_call *call) +static void trace_destroy_fields(struct trace_event_call *call) { struct ftrace_event_field *field, *next; struct list_head *head; @@ -180,11 +206,11 @@ static void trace_destroy_fields(struct ftrace_event_call *call) } } -int trace_event_raw_init(struct ftrace_event_call *call) +int trace_event_raw_init(struct trace_event_call *call) { int id; - id = register_ftrace_event(&call->event); + id = register_trace_event(&call->event); if (!id) return -ENODEV; @@ -192,18 +218,38 @@ int trace_event_raw_init(struct ftrace_event_call *call) } EXPORT_SYMBOL_GPL(trace_event_raw_init); -void *ftrace_event_buffer_reserve(struct ftrace_event_buffer *fbuffer, - struct ftrace_event_file *ftrace_file, - unsigned long len) +bool trace_event_ignore_this_pid(struct trace_event_file *trace_file) +{ + struct trace_array *tr = trace_file->tr; + struct trace_array_cpu *data; + struct trace_pid_list *pid_list; + + pid_list = rcu_dereference_sched(tr->filtered_pids); + if (!pid_list) + return false; + + data = this_cpu_ptr(tr->trace_buffer.data); + + return data->ignore_pid; +} +EXPORT_SYMBOL_GPL(trace_event_ignore_this_pid); + +void *trace_event_buffer_reserve(struct trace_event_buffer *fbuffer, + struct trace_event_file *trace_file, + unsigned long len) { - struct ftrace_event_call *event_call = ftrace_file->event_call; + struct trace_event_call *event_call = trace_file->event_call; + + if ((trace_file->flags & EVENT_FILE_FL_PID_FILTER) && + trace_event_ignore_this_pid(trace_file)) + return NULL; local_save_flags(fbuffer->flags); fbuffer->pc = preempt_count(); - fbuffer->ftrace_file = ftrace_file; + fbuffer->trace_file = trace_file; fbuffer->event = - trace_event_buffer_lock_reserve(&fbuffer->buffer, ftrace_file, + trace_event_buffer_lock_reserve(&fbuffer->buffer, trace_file, event_call->event.type, len, fbuffer->flags, fbuffer->pc); if (!fbuffer->event) @@ -212,13 +258,13 @@ void *ftrace_event_buffer_reserve(struct ftrace_event_buffer *fbuffer, fbuffer->entry = ring_buffer_event_data(fbuffer->event); return fbuffer->entry; } -EXPORT_SYMBOL_GPL(ftrace_event_buffer_reserve); +EXPORT_SYMBOL_GPL(trace_event_buffer_reserve); static DEFINE_SPINLOCK(tracepoint_iter_lock); -static void output_printk(struct ftrace_event_buffer *fbuffer) +static void output_printk(struct trace_event_buffer *fbuffer) { - struct ftrace_event_call *event_call; + struct trace_event_call *event_call; struct trace_event *event; unsigned long flags; struct trace_iterator *iter = tracepoint_print_iter; @@ -226,12 +272,12 @@ static void output_printk(struct ftrace_event_buffer *fbuffer) if (!iter) return; - event_call = fbuffer->ftrace_file->event_call; + event_call = fbuffer->trace_file->event_call; if (!event_call || !event_call->event.funcs || !event_call->event.funcs->trace) return; - event = &fbuffer->ftrace_file->event_call->event; + event = &fbuffer->trace_file->event_call->event; spin_lock_irqsave(&tracepoint_iter_lock, flags); trace_seq_init(&iter->seq); @@ -243,21 +289,21 @@ static void output_printk(struct ftrace_event_buffer *fbuffer) spin_unlock_irqrestore(&tracepoint_iter_lock, flags); } -void ftrace_event_buffer_commit(struct ftrace_event_buffer *fbuffer) +void trace_event_buffer_commit(struct trace_event_buffer *fbuffer) { if (tracepoint_printk) output_printk(fbuffer); - event_trigger_unlock_commit(fbuffer->ftrace_file, fbuffer->buffer, + event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer, fbuffer->event, fbuffer->entry, fbuffer->flags, fbuffer->pc); } -EXPORT_SYMBOL_GPL(ftrace_event_buffer_commit); +EXPORT_SYMBOL_GPL(trace_event_buffer_commit); -int ftrace_event_reg(struct ftrace_event_call *call, - enum trace_reg type, void *data) +int trace_event_reg(struct trace_event_call *call, + enum trace_reg type, void *data) { - struct ftrace_event_file *file = data; + struct trace_event_file *file = data; WARN_ON(!(call->flags & TRACE_EVENT_FL_TRACEPOINT)); switch (type) { @@ -290,34 +336,35 @@ int ftrace_event_reg(struct ftrace_event_call *call, } return 0; } -EXPORT_SYMBOL_GPL(ftrace_event_reg); +EXPORT_SYMBOL_GPL(trace_event_reg); void trace_event_enable_cmd_record(bool enable) { - struct ftrace_event_file *file; + struct trace_event_file *file; struct trace_array *tr; mutex_lock(&event_mutex); do_for_each_event_file(tr, file) { - if (!(file->flags & FTRACE_EVENT_FL_ENABLED)) + if (!(file->flags & EVENT_FILE_FL_ENABLED)) continue; if (enable) { tracing_start_cmdline_record(); - set_bit(FTRACE_EVENT_FL_RECORDED_CMD_BIT, &file->flags); + set_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags); } else { tracing_stop_cmdline_record(); - clear_bit(FTRACE_EVENT_FL_RECORDED_CMD_BIT, &file->flags); + clear_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags); } } while_for_each_event_file(); mutex_unlock(&event_mutex); } -static int __ftrace_event_enable_disable(struct ftrace_event_file *file, +static int __ftrace_event_enable_disable(struct trace_event_file *file, int enable, int soft_disable) { - struct ftrace_event_call *call = file->event_call; + struct trace_event_call *call = file->event_call; + struct trace_array *tr = file->tr; int ret = 0; int disable; @@ -339,24 +386,24 @@ static int __ftrace_event_enable_disable(struct ftrace_event_file *file, if (soft_disable) { if (atomic_dec_return(&file->sm_ref) > 0) break; - disable = file->flags & FTRACE_EVENT_FL_SOFT_DISABLED; - clear_bit(FTRACE_EVENT_FL_SOFT_MODE_BIT, &file->flags); + disable = file->flags & EVENT_FILE_FL_SOFT_DISABLED; + clear_bit(EVENT_FILE_FL_SOFT_MODE_BIT, &file->flags); } else - disable = !(file->flags & FTRACE_EVENT_FL_SOFT_MODE); + disable = !(file->flags & EVENT_FILE_FL_SOFT_MODE); - if (disable && (file->flags & FTRACE_EVENT_FL_ENABLED)) { - clear_bit(FTRACE_EVENT_FL_ENABLED_BIT, &file->flags); - if (file->flags & FTRACE_EVENT_FL_RECORDED_CMD) { + if (disable && (file->flags & EVENT_FILE_FL_ENABLED)) { + clear_bit(EVENT_FILE_FL_ENABLED_BIT, &file->flags); + if (file->flags & EVENT_FILE_FL_RECORDED_CMD) { tracing_stop_cmdline_record(); - clear_bit(FTRACE_EVENT_FL_RECORDED_CMD_BIT, &file->flags); + clear_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags); } call->class->reg(call, TRACE_REG_UNREGISTER, file); } /* If in SOFT_MODE, just set the SOFT_DISABLE_BIT, else clear it */ - if (file->flags & FTRACE_EVENT_FL_SOFT_MODE) - set_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &file->flags); + if (file->flags & EVENT_FILE_FL_SOFT_MODE) + set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags); else - clear_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &file->flags); + clear_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags); break; case 1: /* @@ -368,31 +415,31 @@ static int __ftrace_event_enable_disable(struct ftrace_event_file *file, * it still seems to be disabled. */ if (!soft_disable) - clear_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &file->flags); + clear_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags); else { if (atomic_inc_return(&file->sm_ref) > 1) break; - set_bit(FTRACE_EVENT_FL_SOFT_MODE_BIT, &file->flags); + set_bit(EVENT_FILE_FL_SOFT_MODE_BIT, &file->flags); } - if (!(file->flags & FTRACE_EVENT_FL_ENABLED)) { + if (!(file->flags & EVENT_FILE_FL_ENABLED)) { /* Keep the event disabled, when going to SOFT_MODE. */ if (soft_disable) - set_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &file->flags); + set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags); - if (trace_flags & TRACE_ITER_RECORD_CMD) { + if (tr->trace_flags & TRACE_ITER_RECORD_CMD) { tracing_start_cmdline_record(); - set_bit(FTRACE_EVENT_FL_RECORDED_CMD_BIT, &file->flags); + set_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags); } ret = call->class->reg(call, TRACE_REG_REGISTER, file); if (ret) { tracing_stop_cmdline_record(); pr_info("event trace: Could not enable event " - "%s\n", ftrace_event_name(call)); + "%s\n", trace_event_name(call)); break; } - set_bit(FTRACE_EVENT_FL_ENABLED_BIT, &file->flags); + set_bit(EVENT_FILE_FL_ENABLED_BIT, &file->flags); /* WAS_ENABLED gets set but never cleared. */ call->flags |= TRACE_EVENT_FL_WAS_ENABLED; @@ -403,13 +450,13 @@ static int __ftrace_event_enable_disable(struct ftrace_event_file *file, return ret; } -int trace_event_enable_disable(struct ftrace_event_file *file, +int trace_event_enable_disable(struct trace_event_file *file, int enable, int soft_disable) { return __ftrace_event_enable_disable(file, enable, soft_disable); } -static int ftrace_event_enable_disable(struct ftrace_event_file *file, +static int ftrace_event_enable_disable(struct trace_event_file *file, int enable) { return __ftrace_event_enable_disable(file, enable, 0); @@ -417,7 +464,7 @@ static int ftrace_event_enable_disable(struct ftrace_event_file *file, static void ftrace_clear_events(struct trace_array *tr) { - struct ftrace_event_file *file; + struct trace_event_file *file; mutex_lock(&event_mutex); list_for_each_entry(file, &tr->events, list) { @@ -426,6 +473,148 @@ static void ftrace_clear_events(struct trace_array *tr) mutex_unlock(&event_mutex); } +static int cmp_pid(const void *key, const void *elt) +{ + const pid_t *search_pid = key; + const pid_t *pid = elt; + + if (*search_pid == *pid) + return 0; + if (*search_pid < *pid) + return -1; + return 1; +} + +static bool +check_ignore_pid(struct trace_pid_list *filtered_pids, struct task_struct *task) +{ + pid_t search_pid; + pid_t *pid; + + /* + * Return false, because if filtered_pids does not exist, + * all pids are good to trace. + */ + if (!filtered_pids) + return false; + + search_pid = task->pid; + + pid = bsearch(&search_pid, filtered_pids->pids, + filtered_pids->nr_pids, sizeof(pid_t), + cmp_pid); + if (!pid) + return true; + + return false; +} + +static void +event_filter_pid_sched_switch_probe_pre(void *data, bool preempt, + struct task_struct *prev, struct task_struct *next) +{ + struct trace_array *tr = data; + struct trace_pid_list *pid_list; + + pid_list = rcu_dereference_sched(tr->filtered_pids); + + this_cpu_write(tr->trace_buffer.data->ignore_pid, + check_ignore_pid(pid_list, prev) && + check_ignore_pid(pid_list, next)); +} + +static void +event_filter_pid_sched_switch_probe_post(void *data, bool preempt, + struct task_struct *prev, struct task_struct *next) +{ + struct trace_array *tr = data; + struct trace_pid_list *pid_list; + + pid_list = rcu_dereference_sched(tr->filtered_pids); + + this_cpu_write(tr->trace_buffer.data->ignore_pid, + check_ignore_pid(pid_list, next)); +} + +static void +event_filter_pid_sched_wakeup_probe_pre(void *data, struct task_struct *task) +{ + struct trace_array *tr = data; + struct trace_pid_list *pid_list; + + /* Nothing to do if we are already tracing */ + if (!this_cpu_read(tr->trace_buffer.data->ignore_pid)) + return; + + pid_list = rcu_dereference_sched(tr->filtered_pids); + + this_cpu_write(tr->trace_buffer.data->ignore_pid, + check_ignore_pid(pid_list, task)); +} + +static void +event_filter_pid_sched_wakeup_probe_post(void *data, struct task_struct *task) +{ + struct trace_array *tr = data; + struct trace_pid_list *pid_list; + + /* Nothing to do if we are not tracing */ + if (this_cpu_read(tr->trace_buffer.data->ignore_pid)) + return; + + pid_list = rcu_dereference_sched(tr->filtered_pids); + + /* Set tracing if current is enabled */ + this_cpu_write(tr->trace_buffer.data->ignore_pid, + check_ignore_pid(pid_list, current)); +} + +static void __ftrace_clear_event_pids(struct trace_array *tr) +{ + struct trace_pid_list *pid_list; + struct trace_event_file *file; + int cpu; + + pid_list = rcu_dereference_protected(tr->filtered_pids, + lockdep_is_held(&event_mutex)); + if (!pid_list) + return; + + unregister_trace_sched_switch(event_filter_pid_sched_switch_probe_pre, tr); + unregister_trace_sched_switch(event_filter_pid_sched_switch_probe_post, tr); + + unregister_trace_sched_wakeup(event_filter_pid_sched_wakeup_probe_pre, tr); + unregister_trace_sched_wakeup(event_filter_pid_sched_wakeup_probe_post, tr); + + unregister_trace_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_pre, tr); + unregister_trace_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_post, tr); + + unregister_trace_sched_waking(event_filter_pid_sched_wakeup_probe_pre, tr); + unregister_trace_sched_waking(event_filter_pid_sched_wakeup_probe_post, tr); + + list_for_each_entry(file, &tr->events, list) { + clear_bit(EVENT_FILE_FL_PID_FILTER_BIT, &file->flags); + } + + for_each_possible_cpu(cpu) + per_cpu_ptr(tr->trace_buffer.data, cpu)->ignore_pid = false; + + rcu_assign_pointer(tr->filtered_pids, NULL); + + /* Wait till all users are no longer using pid filtering */ + synchronize_sched(); + + free_pages((unsigned long)pid_list->pids, pid_list->order); + kfree(pid_list); +} + +static void ftrace_clear_event_pids(struct trace_array *tr) +{ + mutex_lock(&event_mutex); + __ftrace_clear_event_pids(tr); + mutex_unlock(&event_mutex); +} + static void __put_system(struct event_subsystem *system) { struct event_filter *filter = system->filter; @@ -440,8 +629,7 @@ static void __put_system(struct event_subsystem *system) kfree(filter->filter_string); kfree(filter); } - if (system->ref_count & SYSTEM_FL_FREE_NAME) - kfree(system->name); + kfree_const(system->name); kfree(system); } @@ -451,14 +639,14 @@ static void __get_system(struct event_subsystem *system) system_refcount_inc(system); } -static void __get_system_dir(struct ftrace_subsystem_dir *dir) +static void __get_system_dir(struct trace_subsystem_dir *dir) { WARN_ON_ONCE(dir->ref_count == 0); dir->ref_count++; __get_system(dir->subsystem); } -static void __put_system_dir(struct ftrace_subsystem_dir *dir) +static void __put_system_dir(struct trace_subsystem_dir *dir) { WARN_ON_ONCE(dir->ref_count == 0); /* If the subsystem is about to be freed, the dir must be too */ @@ -469,14 +657,14 @@ static void __put_system_dir(struct ftrace_subsystem_dir *dir) kfree(dir); } -static void put_system(struct ftrace_subsystem_dir *dir) +static void put_system(struct trace_subsystem_dir *dir) { mutex_lock(&event_mutex); __put_system_dir(dir); mutex_unlock(&event_mutex); } -static void remove_subsystem(struct ftrace_subsystem_dir *dir) +static void remove_subsystem(struct trace_subsystem_dir *dir) { if (!dir) return; @@ -488,7 +676,7 @@ static void remove_subsystem(struct ftrace_subsystem_dir *dir) } } -static void remove_event_file_dir(struct ftrace_event_file *file) +static void remove_event_file_dir(struct trace_event_file *file) { struct dentry *dir = file->dir; struct dentry *child; @@ -517,15 +705,15 @@ static int __ftrace_set_clr_event_nolock(struct trace_array *tr, const char *match, const char *sub, const char *event, int set) { - struct ftrace_event_file *file; - struct ftrace_event_call *call; + struct trace_event_file *file; + struct trace_event_call *call; const char *name; int ret = -EINVAL; list_for_each_entry(file, &tr->events, list) { call = file->event_call; - name = ftrace_event_name(call); + name = trace_event_name(call); if (!name || !call->class || !call->class->reg) continue; @@ -673,8 +861,8 @@ ftrace_event_write(struct file *file, const char __user *ubuf, static void * t_next(struct seq_file *m, void *v, loff_t *pos) { - struct ftrace_event_file *file = v; - struct ftrace_event_call *call; + struct trace_event_file *file = v; + struct trace_event_call *call; struct trace_array *tr = m->private; (*pos)++; @@ -685,7 +873,8 @@ t_next(struct seq_file *m, void *v, loff_t *pos) * The ftrace subsystem is for showing formats only. * They can not be enabled or disabled via the event files. */ - if (call->class && call->class->reg) + if (call->class && call->class->reg && + !(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)) return file; } @@ -694,13 +883,13 @@ t_next(struct seq_file *m, void *v, loff_t *pos) static void *t_start(struct seq_file *m, loff_t *pos) { - struct ftrace_event_file *file; + struct trace_event_file *file; struct trace_array *tr = m->private; loff_t l; mutex_lock(&event_mutex); - file = list_entry(&tr->events, struct ftrace_event_file, list); + file = list_entry(&tr->events, struct trace_event_file, list); for (l = 0; l <= *pos; ) { file = t_next(m, file, &l); if (!file) @@ -712,13 +901,13 @@ static void *t_start(struct seq_file *m, loff_t *pos) static void * s_next(struct seq_file *m, void *v, loff_t *pos) { - struct ftrace_event_file *file = v; + struct trace_event_file *file = v; struct trace_array *tr = m->private; (*pos)++; list_for_each_entry_continue(file, &tr->events, list) { - if (file->flags & FTRACE_EVENT_FL_ENABLED) + if (file->flags & EVENT_FILE_FL_ENABLED) return file; } @@ -727,13 +916,13 @@ s_next(struct seq_file *m, void *v, loff_t *pos) static void *s_start(struct seq_file *m, loff_t *pos) { - struct ftrace_event_file *file; + struct trace_event_file *file; struct trace_array *tr = m->private; loff_t l; mutex_lock(&event_mutex); - file = list_entry(&tr->events, struct ftrace_event_file, list); + file = list_entry(&tr->events, struct trace_event_file, list); for (l = 0; l <= *pos; ) { file = s_next(m, file, &l); if (!file) @@ -744,12 +933,12 @@ static void *s_start(struct seq_file *m, loff_t *pos) static int t_show(struct seq_file *m, void *v) { - struct ftrace_event_file *file = v; - struct ftrace_event_call *call = file->event_call; + struct trace_event_file *file = v; + struct trace_event_call *call = file->event_call; if (strcmp(call->class->system, TRACE_SYSTEM) != 0) seq_printf(m, "%s:", call->class->system); - seq_printf(m, "%s\n", ftrace_event_name(call)); + seq_printf(m, "%s\n", trace_event_name(call)); return 0; } @@ -759,11 +948,63 @@ static void t_stop(struct seq_file *m, void *p) mutex_unlock(&event_mutex); } +static void *p_start(struct seq_file *m, loff_t *pos) + __acquires(RCU) +{ + struct trace_pid_list *pid_list; + struct trace_array *tr = m->private; + + /* + * Grab the mutex, to keep calls to p_next() having the same + * tr->filtered_pids as p_start() has. + * If we just passed the tr->filtered_pids around, then RCU would + * have been enough, but doing that makes things more complex. + */ + mutex_lock(&event_mutex); + rcu_read_lock_sched(); + + pid_list = rcu_dereference_sched(tr->filtered_pids); + + if (!pid_list || *pos >= pid_list->nr_pids) + return NULL; + + return (void *)&pid_list->pids[*pos]; +} + +static void p_stop(struct seq_file *m, void *p) + __releases(RCU) +{ + rcu_read_unlock_sched(); + mutex_unlock(&event_mutex); +} + +static void * +p_next(struct seq_file *m, void *v, loff_t *pos) +{ + struct trace_array *tr = m->private; + struct trace_pid_list *pid_list = rcu_dereference_sched(tr->filtered_pids); + + (*pos)++; + + if (*pos >= pid_list->nr_pids) + return NULL; + + return (void *)&pid_list->pids[*pos]; +} + +static int p_show(struct seq_file *m, void *v) +{ + pid_t *pid = v; + + seq_printf(m, "%d\n", *pid); + return 0; +} + static ssize_t event_enable_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { - struct ftrace_event_file *file; + struct trace_event_file *file; unsigned long flags; char buf[4] = "0"; @@ -776,12 +1017,12 @@ event_enable_read(struct file *filp, char __user *ubuf, size_t cnt, if (!file) return -ENODEV; - if (flags & FTRACE_EVENT_FL_ENABLED && - !(flags & FTRACE_EVENT_FL_SOFT_DISABLED)) + if (flags & EVENT_FILE_FL_ENABLED && + !(flags & EVENT_FILE_FL_SOFT_DISABLED)) strcpy(buf, "1"); - if (flags & FTRACE_EVENT_FL_SOFT_DISABLED || - flags & FTRACE_EVENT_FL_SOFT_MODE) + if (flags & EVENT_FILE_FL_SOFT_DISABLED || + flags & EVENT_FILE_FL_SOFT_MODE) strcat(buf, "*"); strcat(buf, "\n"); @@ -793,7 +1034,7 @@ static ssize_t event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { - struct ftrace_event_file *file; + struct trace_event_file *file; unsigned long val; int ret; @@ -830,10 +1071,10 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { const char set_to_char[4] = { '?', '0', '1', 'X' }; - struct ftrace_subsystem_dir *dir = filp->private_data; + struct trace_subsystem_dir *dir = filp->private_data; struct event_subsystem *system = dir->subsystem; - struct ftrace_event_call *call; - struct ftrace_event_file *file; + struct trace_event_call *call; + struct trace_event_file *file; struct trace_array *tr = dir->tr; char buf[2]; int set = 0; @@ -842,7 +1083,7 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt, mutex_lock(&event_mutex); list_for_each_entry(file, &tr->events, list) { call = file->event_call; - if (!ftrace_event_name(call) || !call->class || !call->class->reg) + if (!trace_event_name(call) || !call->class || !call->class->reg) continue; if (system && strcmp(call->class->system, system->name) != 0) @@ -853,7 +1094,7 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt, * or if all events or cleared, or if we have * a mixture. */ - set |= (1 << !!(file->flags & FTRACE_EVENT_FL_ENABLED)); + set |= (1 << !!(file->flags & EVENT_FILE_FL_ENABLED)); /* * If we have a mixture, no need to look further. @@ -875,7 +1116,7 @@ static ssize_t system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { - struct ftrace_subsystem_dir *dir = filp->private_data; + struct trace_subsystem_dir *dir = filp->private_data; struct event_subsystem *system = dir->subsystem; const char *name = NULL; unsigned long val; @@ -919,7 +1160,7 @@ enum { static void *f_next(struct seq_file *m, void *v, loff_t *pos) { - struct ftrace_event_call *call = event_file_data(m->private); + struct trace_event_call *call = event_file_data(m->private); struct list_head *common_head = &ftrace_common_fields; struct list_head *head = trace_get_fields(call); struct list_head *node = v; @@ -951,13 +1192,13 @@ static void *f_next(struct seq_file *m, void *v, loff_t *pos) static int f_show(struct seq_file *m, void *v) { - struct ftrace_event_call *call = event_file_data(m->private); + struct trace_event_call *call = event_file_data(m->private); struct ftrace_event_field *field; const char *array_descriptor; switch ((unsigned long)v) { case FORMAT_HEADER: - seq_printf(m, "name: %s\n", ftrace_event_name(call)); + seq_printf(m, "name: %s\n", trace_event_name(call)); seq_printf(m, "ID: %d\n", call->event.type); seq_puts(m, "format:\n"); return 0; @@ -1064,7 +1305,7 @@ static ssize_t event_filter_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { - struct ftrace_event_file *file; + struct trace_event_file *file; struct trace_seq *s; int r = -ENODEV; @@ -1097,7 +1338,7 @@ static ssize_t event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { - struct ftrace_event_file *file; + struct trace_event_file *file; char *buf; int err = -ENODEV; @@ -1134,7 +1375,7 @@ static LIST_HEAD(event_subsystems); static int subsystem_open(struct inode *inode, struct file *filp) { struct event_subsystem *system = NULL; - struct ftrace_subsystem_dir *dir = NULL; /* Initialize for gcc */ + struct trace_subsystem_dir *dir = NULL; /* Initialize for gcc */ struct trace_array *tr; int ret; @@ -1183,7 +1424,7 @@ static int subsystem_open(struct inode *inode, struct file *filp) static int system_tr_open(struct inode *inode, struct file *filp) { - struct ftrace_subsystem_dir *dir; + struct trace_subsystem_dir *dir; struct trace_array *tr = inode->i_private; int ret; @@ -1216,7 +1457,7 @@ static int system_tr_open(struct inode *inode, struct file *filp) static int subsystem_release(struct inode *inode, struct file *file) { - struct ftrace_subsystem_dir *dir = file->private_data; + struct trace_subsystem_dir *dir = file->private_data; trace_array_put(dir->tr); @@ -1237,7 +1478,7 @@ static ssize_t subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { - struct ftrace_subsystem_dir *dir = filp->private_data; + struct trace_subsystem_dir *dir = filp->private_data; struct event_subsystem *system = dir->subsystem; struct trace_seq *s; int r; @@ -1264,7 +1505,7 @@ static ssize_t subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { - struct ftrace_subsystem_dir *dir = filp->private_data; + struct trace_subsystem_dir *dir = filp->private_data; char *buf; int err; @@ -1316,8 +1557,219 @@ show_header(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) return r; } +static int max_pids(struct trace_pid_list *pid_list) +{ + return (PAGE_SIZE << pid_list->order) / sizeof(pid_t); +} + +static void ignore_task_cpu(void *data) +{ + struct trace_array *tr = data; + struct trace_pid_list *pid_list; + + /* + * This function is called by on_each_cpu() while the + * event_mutex is held. + */ + pid_list = rcu_dereference_protected(tr->filtered_pids, + mutex_is_locked(&event_mutex)); + + this_cpu_write(tr->trace_buffer.data->ignore_pid, + check_ignore_pid(pid_list, current)); +} + +static ssize_t +ftrace_event_pid_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + struct seq_file *m = filp->private_data; + struct trace_array *tr = m->private; + struct trace_pid_list *filtered_pids = NULL; + struct trace_pid_list *pid_list = NULL; + struct trace_event_file *file; + struct trace_parser parser; + unsigned long val; + loff_t this_pos; + ssize_t read = 0; + ssize_t ret = 0; + pid_t pid; + int i; + + if (!cnt) + return 0; + + ret = tracing_update_buffers(); + if (ret < 0) + return ret; + + if (trace_parser_get_init(&parser, EVENT_BUF_SIZE + 1)) + return -ENOMEM; + + mutex_lock(&event_mutex); + /* + * Load as many pids into the array before doing a + * swap from the tr->filtered_pids to the new list. + */ + while (cnt > 0) { + + this_pos = 0; + + ret = trace_get_user(&parser, ubuf, cnt, &this_pos); + if (ret < 0 || !trace_parser_loaded(&parser)) + break; + + read += ret; + ubuf += ret; + cnt -= ret; + + parser.buffer[parser.idx] = 0; + + ret = -EINVAL; + if (kstrtoul(parser.buffer, 0, &val)) + break; + if (val > INT_MAX) + break; + + pid = (pid_t)val; + + ret = -ENOMEM; + if (!pid_list) { + pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL); + if (!pid_list) + break; + + filtered_pids = rcu_dereference_protected(tr->filtered_pids, + lockdep_is_held(&event_mutex)); + if (filtered_pids) + pid_list->order = filtered_pids->order; + else + pid_list->order = 0; + + pid_list->pids = (void *)__get_free_pages(GFP_KERNEL, + pid_list->order); + if (!pid_list->pids) + break; + + if (filtered_pids) { + pid_list->nr_pids = filtered_pids->nr_pids; + memcpy(pid_list->pids, filtered_pids->pids, + pid_list->nr_pids * sizeof(pid_t)); + } else + pid_list->nr_pids = 0; + } + + if (pid_list->nr_pids >= max_pids(pid_list)) { + pid_t *pid_page; + + pid_page = (void *)__get_free_pages(GFP_KERNEL, + pid_list->order + 1); + if (!pid_page) + break; + memcpy(pid_page, pid_list->pids, + pid_list->nr_pids * sizeof(pid_t)); + free_pages((unsigned long)pid_list->pids, pid_list->order); + + pid_list->order++; + pid_list->pids = pid_page; + } + + pid_list->pids[pid_list->nr_pids++] = pid; + trace_parser_clear(&parser); + ret = 0; + } + trace_parser_put(&parser); + + if (ret < 0) { + if (pid_list) + free_pages((unsigned long)pid_list->pids, pid_list->order); + kfree(pid_list); + mutex_unlock(&event_mutex); + return ret; + } + + if (!pid_list) { + mutex_unlock(&event_mutex); + return ret; + } + + sort(pid_list->pids, pid_list->nr_pids, sizeof(pid_t), cmp_pid, NULL); + + /* Remove duplicates */ + for (i = 1; i < pid_list->nr_pids; i++) { + int start = i; + + while (i < pid_list->nr_pids && + pid_list->pids[i - 1] == pid_list->pids[i]) + i++; + + if (start != i) { + if (i < pid_list->nr_pids) { + memmove(&pid_list->pids[start], &pid_list->pids[i], + (pid_list->nr_pids - i) * sizeof(pid_t)); + pid_list->nr_pids -= i - start; + i = start; + } else + pid_list->nr_pids = start; + } + } + + rcu_assign_pointer(tr->filtered_pids, pid_list); + + list_for_each_entry(file, &tr->events, list) { + set_bit(EVENT_FILE_FL_PID_FILTER_BIT, &file->flags); + } + + if (filtered_pids) { + synchronize_sched(); + + free_pages((unsigned long)filtered_pids->pids, filtered_pids->order); + kfree(filtered_pids); + } else { + /* + * Register a probe that is called before all other probes + * to set ignore_pid if next or prev do not match. + * Register a probe this is called after all other probes + * to only keep ignore_pid set if next pid matches. + */ + register_trace_prio_sched_switch(event_filter_pid_sched_switch_probe_pre, + tr, INT_MAX); + register_trace_prio_sched_switch(event_filter_pid_sched_switch_probe_post, + tr, 0); + + register_trace_prio_sched_wakeup(event_filter_pid_sched_wakeup_probe_pre, + tr, INT_MAX); + register_trace_prio_sched_wakeup(event_filter_pid_sched_wakeup_probe_post, + tr, 0); + + register_trace_prio_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_pre, + tr, INT_MAX); + register_trace_prio_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_post, + tr, 0); + + register_trace_prio_sched_waking(event_filter_pid_sched_wakeup_probe_pre, + tr, INT_MAX); + register_trace_prio_sched_waking(event_filter_pid_sched_wakeup_probe_post, + tr, 0); + } + + /* + * Ignoring of pids is done at task switch. But we have to + * check for those tasks that are currently running. + * Always do this in case a pid was appended or removed. + */ + on_each_cpu(ignore_task_cpu, tr, 1); + + mutex_unlock(&event_mutex); + + ret = read; + *ppos += read; + + return ret; +} + static int ftrace_event_avail_open(struct inode *inode, struct file *file); static int ftrace_event_set_open(struct inode *inode, struct file *file); +static int ftrace_event_set_pid_open(struct inode *inode, struct file *file); static int ftrace_event_release(struct inode *inode, struct file *file); static const struct seq_operations show_event_seq_ops = { @@ -1334,6 +1786,13 @@ static const struct seq_operations show_set_event_seq_ops = { .stop = t_stop, }; +static const struct seq_operations show_set_pid_seq_ops = { + .start = p_start, + .next = p_next, + .show = p_show, + .stop = p_stop, +}; + static const struct file_operations ftrace_avail_fops = { .open = ftrace_event_avail_open, .read = seq_read, @@ -1349,6 +1808,14 @@ static const struct file_operations ftrace_set_event_fops = { .release = ftrace_event_release, }; +static const struct file_operations ftrace_set_event_pid_fops = { + .open = ftrace_event_set_pid_open, + .read = seq_read, + .write = ftrace_event_pid_write, + .llseek = seq_lseek, + .release = ftrace_event_release, +}; + static const struct file_operations ftrace_enable_fops = { .open = tracing_open_generic, .read = event_enable_read, @@ -1459,6 +1926,26 @@ ftrace_event_set_open(struct inode *inode, struct file *file) return ret; } +static int +ftrace_event_set_pid_open(struct inode *inode, struct file *file) +{ + const struct seq_operations *seq_ops = &show_set_pid_seq_ops; + struct trace_array *tr = inode->i_private; + int ret; + + if (trace_array_get(tr) < 0) + return -ENODEV; + + if ((file->f_mode & FMODE_WRITE) && + (file->f_flags & O_TRUNC)) + ftrace_clear_event_pids(tr); + + ret = ftrace_event_open(inode, file, seq_ops); + if (ret < 0) + trace_array_put(tr); + return ret; +} + static struct event_subsystem * create_new_subsystem(const char *name) { @@ -1472,13 +1959,9 @@ create_new_subsystem(const char *name) system->ref_count = 1; /* Only allocate if dynamic (kprobes and modules) */ - if (!core_kernel_data((unsigned long)name)) { - system->ref_count |= SYSTEM_FL_FREE_NAME; - system->name = kstrdup(name, GFP_KERNEL); - if (!system->name) - goto out_free; - } else - system->name = name; + system->name = kstrdup_const(name, GFP_KERNEL); + if (!system->name) + goto out_free; system->filter = NULL; @@ -1491,17 +1974,16 @@ create_new_subsystem(const char *name) return system; out_free: - if (system->ref_count & SYSTEM_FL_FREE_NAME) - kfree(system->name); + kfree_const(system->name); kfree(system); return NULL; } static struct dentry * event_subsystem_dir(struct trace_array *tr, const char *name, - struct ftrace_event_file *file, struct dentry *parent) + struct trace_event_file *file, struct dentry *parent) { - struct ftrace_subsystem_dir *dir; + struct trace_subsystem_dir *dir; struct event_subsystem *system; struct dentry *entry; @@ -1573,9 +2055,9 @@ event_subsystem_dir(struct trace_array *tr, const char *name, } static int -event_create_dir(struct dentry *parent, struct ftrace_event_file *file) +event_create_dir(struct dentry *parent, struct trace_event_file *file) { - struct ftrace_event_call *call = file->event_call; + struct trace_event_call *call = file->event_call; struct trace_array *tr = file->tr; struct list_head *head; struct dentry *d_events; @@ -1593,7 +2075,7 @@ event_create_dir(struct dentry *parent, struct ftrace_event_file *file) } else d_events = parent; - name = ftrace_event_name(call); + name = trace_event_name(call); file->dir = tracefs_create_dir(name, d_events); if (!file->dir) { pr_warn("Could not create tracefs '%s' directory\n", name); @@ -1636,9 +2118,9 @@ event_create_dir(struct dentry *parent, struct ftrace_event_file *file) return 0; } -static void remove_event_from_tracers(struct ftrace_event_call *call) +static void remove_event_from_tracers(struct trace_event_call *call) { - struct ftrace_event_file *file; + struct trace_event_file *file; struct trace_array *tr; do_for_each_event_file_safe(tr, file) { @@ -1656,10 +2138,10 @@ static void remove_event_from_tracers(struct ftrace_event_call *call) } while_for_each_event_file(); } -static void event_remove(struct ftrace_event_call *call) +static void event_remove(struct trace_event_call *call) { struct trace_array *tr; - struct ftrace_event_file *file; + struct trace_event_file *file; do_for_each_event_file(tr, file) { if (file->event_call != call) @@ -1675,17 +2157,17 @@ static void event_remove(struct ftrace_event_call *call) } while_for_each_event_file(); if (call->event.funcs) - __unregister_ftrace_event(&call->event); + __unregister_trace_event(&call->event); remove_event_from_tracers(call); list_del(&call->list); } -static int event_init(struct ftrace_event_call *call) +static int event_init(struct trace_event_call *call) { int ret = 0; const char *name; - name = ftrace_event_name(call); + name = trace_event_name(call); if (WARN_ON(!name)) return -EINVAL; @@ -1699,7 +2181,7 @@ static int event_init(struct ftrace_event_call *call) } static int -__register_event(struct ftrace_event_call *call, struct module *mod) +__register_event(struct trace_event_call *call, struct module *mod) { int ret; @@ -1735,7 +2217,7 @@ static char *enum_replace(char *ptr, struct trace_enum_map *map, int len) return ptr + elen; } -static void update_event_printk(struct ftrace_event_call *call, +static void update_event_printk(struct trace_event_call *call, struct trace_enum_map *map) { char *ptr; @@ -1813,7 +2295,7 @@ static void update_event_printk(struct ftrace_event_call *call, void trace_event_enum_update(struct trace_enum_map **map, int len) { - struct ftrace_event_call *call, *p; + struct trace_event_call *call, *p; const char *last_system = NULL; int last_i; int i; @@ -1838,11 +2320,11 @@ void trace_event_enum_update(struct trace_enum_map **map, int len) up_write(&trace_event_sem); } -static struct ftrace_event_file * -trace_create_new_event(struct ftrace_event_call *call, +static struct trace_event_file * +trace_create_new_event(struct trace_event_call *call, struct trace_array *tr) { - struct ftrace_event_file *file; + struct trace_event_file *file; file = kmem_cache_alloc(file_cachep, GFP_TRACE); if (!file) @@ -1860,9 +2342,9 @@ trace_create_new_event(struct ftrace_event_call *call, /* Add an event to a trace directory */ static int -__trace_add_new_event(struct ftrace_event_call *call, struct trace_array *tr) +__trace_add_new_event(struct trace_event_call *call, struct trace_array *tr) { - struct ftrace_event_file *file; + struct trace_event_file *file; file = trace_create_new_event(call, tr); if (!file) @@ -1877,10 +2359,10 @@ __trace_add_new_event(struct ftrace_event_call *call, struct trace_array *tr) * the filesystem is initialized. */ static __init int -__trace_early_add_new_event(struct ftrace_event_call *call, +__trace_early_add_new_event(struct trace_event_call *call, struct trace_array *tr) { - struct ftrace_event_file *file; + struct trace_event_file *file; file = trace_create_new_event(call, tr); if (!file) @@ -1890,10 +2372,10 @@ __trace_early_add_new_event(struct ftrace_event_call *call, } struct ftrace_module_file_ops; -static void __add_event_to_tracers(struct ftrace_event_call *call); +static void __add_event_to_tracers(struct trace_event_call *call); /* Add an additional event_call dynamically */ -int trace_add_event_call(struct ftrace_event_call *call) +int trace_add_event_call(struct trace_event_call *call) { int ret; mutex_lock(&trace_types_lock); @@ -1912,7 +2394,7 @@ int trace_add_event_call(struct ftrace_event_call *call) * Must be called under locking of trace_types_lock, event_mutex and * trace_event_sem. */ -static void __trace_remove_event_call(struct ftrace_event_call *call) +static void __trace_remove_event_call(struct trace_event_call *call) { event_remove(call); trace_destroy_fields(call); @@ -1920,10 +2402,10 @@ static void __trace_remove_event_call(struct ftrace_event_call *call) call->filter = NULL; } -static int probe_remove_event_call(struct ftrace_event_call *call) +static int probe_remove_event_call(struct trace_event_call *call) { struct trace_array *tr; - struct ftrace_event_file *file; + struct trace_event_file *file; #ifdef CONFIG_PERF_EVENTS if (call->perf_refcount) @@ -1934,10 +2416,10 @@ static int probe_remove_event_call(struct ftrace_event_call *call) continue; /* * We can't rely on ftrace_event_enable_disable(enable => 0) - * we are going to do, FTRACE_EVENT_FL_SOFT_MODE can suppress + * we are going to do, EVENT_FILE_FL_SOFT_MODE can suppress * TRACE_REG_UNREGISTER. */ - if (file->flags & FTRACE_EVENT_FL_ENABLED) + if (file->flags & EVENT_FILE_FL_ENABLED) return -EBUSY; /* * The do_for_each_event_file_safe() is @@ -1954,7 +2436,7 @@ static int probe_remove_event_call(struct ftrace_event_call *call) } /* Remove an event_call */ -int trace_remove_event_call(struct ftrace_event_call *call) +int trace_remove_event_call(struct trace_event_call *call) { int ret; @@ -1978,7 +2460,7 @@ int trace_remove_event_call(struct ftrace_event_call *call) static void trace_module_add_events(struct module *mod) { - struct ftrace_event_call **call, **start, **end; + struct trace_event_call **call, **start, **end; if (!mod->num_trace_events) return; @@ -2001,7 +2483,7 @@ static void trace_module_add_events(struct module *mod) static void trace_module_remove_events(struct module *mod) { - struct ftrace_event_call *call, *p; + struct trace_event_call *call, *p; bool clear_trace = false; down_write(&trace_event_sem); @@ -2057,28 +2539,28 @@ static struct notifier_block trace_module_nb = { static void __trace_add_event_dirs(struct trace_array *tr) { - struct ftrace_event_call *call; + struct trace_event_call *call; int ret; list_for_each_entry(call, &ftrace_events, list) { ret = __trace_add_new_event(call, tr); if (ret < 0) pr_warn("Could not create directory for event %s\n", - ftrace_event_name(call)); + trace_event_name(call)); } } -struct ftrace_event_file * +struct trace_event_file * find_event_file(struct trace_array *tr, const char *system, const char *event) { - struct ftrace_event_file *file; - struct ftrace_event_call *call; + struct trace_event_file *file; + struct trace_event_call *call; const char *name; list_for_each_entry(file, &tr->events, list) { call = file->event_call; - name = ftrace_event_name(call); + name = trace_event_name(call); if (!name || !call->class || !call->class->reg) continue; @@ -2100,7 +2582,7 @@ find_event_file(struct trace_array *tr, const char *system, const char *event) #define DISABLE_EVENT_STR "disable_event" struct event_probe_data { - struct ftrace_event_file *file; + struct trace_event_file *file; unsigned long count; int ref; bool enable; @@ -2116,9 +2598,9 @@ event_enable_probe(unsigned long ip, unsigned long parent_ip, void **_data) return; if (data->enable) - clear_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &data->file->flags); + clear_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &data->file->flags); else - set_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &data->file->flags); + set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &data->file->flags); } static void @@ -2134,7 +2616,7 @@ event_enable_count_probe(unsigned long ip, unsigned long parent_ip, void **_data return; /* Skip if the event is in a state we want to switch to */ - if (data->enable == !(data->file->flags & FTRACE_EVENT_FL_SOFT_DISABLED)) + if (data->enable == !(data->file->flags & EVENT_FILE_FL_SOFT_DISABLED)) return; if (data->count != -1) @@ -2154,7 +2636,7 @@ event_enable_print(struct seq_file *m, unsigned long ip, seq_printf(m, "%s:%s:%s", data->enable ? ENABLE_EVENT_STR : DISABLE_EVENT_STR, data->file->event_call->class->system, - ftrace_event_name(data->file->event_call)); + trace_event_name(data->file->event_call)); if (data->count == -1) seq_puts(m, ":unlimited\n"); @@ -2228,7 +2710,7 @@ event_enable_func(struct ftrace_hash *hash, char *glob, char *cmd, char *param, int enabled) { struct trace_array *tr = top_trace_array(); - struct ftrace_event_file *file; + struct trace_event_file *file; struct ftrace_probe_ops *ops; struct event_probe_data *data; const char *system; @@ -2360,7 +2842,7 @@ static inline int register_event_cmds(void) { return 0; } #endif /* CONFIG_DYNAMIC_FTRACE */ /* - * The top level array has already had its ftrace_event_file + * The top level array has already had its trace_event_file * descriptors created in order to allow for early events to * be recorded. This function is called after the tracefs has been * initialized, and we now have to create the files associated @@ -2369,7 +2851,7 @@ static inline int register_event_cmds(void) { return 0; } static __init void __trace_early_add_event_dirs(struct trace_array *tr) { - struct ftrace_event_file *file; + struct trace_event_file *file; int ret; @@ -2377,7 +2859,7 @@ __trace_early_add_event_dirs(struct trace_array *tr) ret = event_create_dir(tr->event_dir, file); if (ret < 0) pr_warn("Could not create directory for event %s\n", - ftrace_event_name(file->event_call)); + trace_event_name(file->event_call)); } } @@ -2390,7 +2872,7 @@ __trace_early_add_event_dirs(struct trace_array *tr) static __init void __trace_early_add_events(struct trace_array *tr) { - struct ftrace_event_call *call; + struct trace_event_call *call; int ret; list_for_each_entry(call, &ftrace_events, list) { @@ -2401,7 +2883,7 @@ __trace_early_add_events(struct trace_array *tr) ret = __trace_early_add_new_event(call, tr); if (ret < 0) pr_warn("Could not create early event %s\n", - ftrace_event_name(call)); + trace_event_name(call)); } } @@ -2409,13 +2891,13 @@ __trace_early_add_events(struct trace_array *tr) static void __trace_remove_event_dirs(struct trace_array *tr) { - struct ftrace_event_file *file, *next; + struct trace_event_file *file, *next; list_for_each_entry_safe(file, next, &tr->events, list) remove_event_file_dir(file); } -static void __add_event_to_tracers(struct ftrace_event_call *call) +static void __add_event_to_tracers(struct trace_event_call *call) { struct trace_array *tr; @@ -2423,8 +2905,8 @@ static void __add_event_to_tracers(struct ftrace_event_call *call) __trace_add_new_event(call, tr); } -extern struct ftrace_event_call *__start_ftrace_events[]; -extern struct ftrace_event_call *__stop_ftrace_events[]; +extern struct trace_event_call *__start_ftrace_events[]; +extern struct trace_event_call *__stop_ftrace_events[]; static char bootup_event_buf[COMMAND_LINE_SIZE] __initdata; @@ -2458,6 +2940,9 @@ create_event_toplevel_files(struct dentry *parent, struct trace_array *tr) return -ENOMEM; } + entry = tracefs_create_file("set_event_pid", 0644, parent, + tr, &ftrace_set_event_pid_fops); + /* ring buffer internal formats */ trace_create_file("header_page", 0444, d_events, ring_buffer_print_page_header, @@ -2538,6 +3023,9 @@ int event_trace_del_tracer(struct trace_array *tr) /* Disable any event triggers and associated soft-disabled events */ clear_event_triggers(tr); + /* Clear the pid list */ + __ftrace_clear_event_pids(tr); + /* Disable any running events */ __ftrace_set_clr_event_nolock(tr, NULL, NULL, NULL, 0); @@ -2559,7 +3047,7 @@ int event_trace_del_tracer(struct trace_array *tr) static __init int event_trace_memsetup(void) { field_cachep = KMEM_CACHE(ftrace_event_field, SLAB_PANIC); - file_cachep = KMEM_CACHE(ftrace_event_file, SLAB_PANIC); + file_cachep = KMEM_CACHE(trace_event_file, SLAB_PANIC); return 0; } @@ -2575,16 +3063,16 @@ early_enable_events(struct trace_array *tr, bool disable_first) if (!token) break; - if (!*token) - continue; - /* Restarting syscalls requires that we stop them first */ - if (disable_first) - ftrace_set_clr_event(tr, token, 0); + if (*token) { + /* Restarting syscalls requires that we stop them first */ + if (disable_first) + ftrace_set_clr_event(tr, token, 0); - ret = ftrace_set_clr_event(tr, token, 1); - if (ret) - pr_warn("Failed to enable trace event: %s\n", token); + ret = ftrace_set_clr_event(tr, token, 1); + if (ret) + pr_warn("Failed to enable trace event: %s\n", token); + } /* Put back the comma to allow this to be called again */ if (buf) @@ -2595,7 +3083,7 @@ early_enable_events(struct trace_array *tr, bool disable_first) static __init int event_trace_enable(void) { struct trace_array *tr = top_trace_array(); - struct ftrace_event_call **iter, *call; + struct trace_event_call **iter, *call; int ret; if (!tr) @@ -2673,6 +3161,9 @@ static __init int event_trace_init(void) if (!entry) pr_warn("Could not create tracefs 'available_events' entry\n"); + if (trace_define_generic_fields()) + pr_warn("tracing: Failed to allocated generic fields"); + if (trace_define_common_fields()) pr_warn("tracing: Failed to allocate common fields"); @@ -2756,9 +3247,9 @@ static __init void event_test_stuff(void) */ static __init void event_trace_self_tests(void) { - struct ftrace_subsystem_dir *dir; - struct ftrace_event_file *file; - struct ftrace_event_call *call; + struct trace_subsystem_dir *dir; + struct trace_event_file *file; + struct trace_event_call *call; struct event_subsystem *system; struct trace_array *tr; int ret; @@ -2789,13 +3280,13 @@ static __init void event_trace_self_tests(void) continue; #endif - pr_info("Testing event %s: ", ftrace_event_name(call)); + pr_info("Testing event %s: ", trace_event_name(call)); /* * If an event is already enabled, someone is using * it and the self test should not be on. */ - if (file->flags & FTRACE_EVENT_FL_ENABLED) { + if (file->flags & EVENT_FILE_FL_ENABLED) { pr_warn("Enabled event during self test!\n"); WARN_ON_ONCE(1); continue; @@ -2868,7 +3359,9 @@ static __init void event_trace_self_tests(void) static DEFINE_PER_CPU(atomic_t, ftrace_test_event_disable); -static void +static struct trace_array *event_tr; + +static void __init function_test_events_call(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct pt_regs *pt_regs) { @@ -2899,7 +3392,7 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip, entry->ip = ip; entry->parent_ip = parent_ip; - trace_buffer_unlock_commit(buffer, event, flags, pc); + trace_buffer_unlock_commit(event_tr, buffer, event, flags, pc); out: atomic_dec(&per_cpu(ftrace_test_event_disable, cpu)); @@ -2915,6 +3408,9 @@ static struct ftrace_ops trace_ops __initdata = static __init void event_trace_self_test_with_function(void) { int ret; + event_tr = top_trace_array(); + if (WARN_ON(!event_tr)) + return; ret = register_ftrace_function(&trace_ops); if (WARN_ON(ret < 0)) { pr_info("Failed to enable function tracer for event tests\n"); diff --git a/kernel/kernel/trace/trace_events_filter.c b/kernel/kernel/trace/trace_events_filter.c index 52adf02d7..681630254 100644 --- a/kernel/kernel/trace/trace_events_filter.c +++ b/kernel/kernel/trace/trace_events_filter.c @@ -252,6 +252,50 @@ static int filter_pred_strloc(struct filter_pred *pred, void *event) return match; } +/* Filter predicate for CPUs. */ +static int filter_pred_cpu(struct filter_pred *pred, void *event) +{ + int cpu, cmp; + int match = 0; + + cpu = raw_smp_processor_id(); + cmp = pred->val; + + switch (pred->op) { + case OP_EQ: + match = cpu == cmp; + break; + case OP_LT: + match = cpu < cmp; + break; + case OP_LE: + match = cpu <= cmp; + break; + case OP_GT: + match = cpu > cmp; + break; + case OP_GE: + match = cpu >= cmp; + break; + default: + break; + } + + return !!match == !pred->not; +} + +/* Filter predicate for COMM. */ +static int filter_pred_comm(struct filter_pred *pred, void *event) +{ + int cmp, match; + + cmp = pred->regex.match(current->comm, &pred->regex, + pred->regex.field_len); + match = cmp ^ pred->not; + + return match; +} + static int filter_pred_none(struct filter_pred *pred, void *event) { return 0; @@ -643,7 +687,7 @@ static void append_filter_err(struct filter_parse_state *ps, free_page((unsigned long) buf); } -static inline struct event_filter *event_filter(struct ftrace_event_file *file) +static inline struct event_filter *event_filter(struct trace_event_file *file) { if (file->event_call->flags & TRACE_EVENT_FL_USE_CALL_FILTER) return file->event_call->filter; @@ -652,7 +696,7 @@ static inline struct event_filter *event_filter(struct ftrace_event_file *file) } /* caller must hold event_mutex */ -void print_event_filter(struct ftrace_event_file *file, struct trace_seq *s) +void print_event_filter(struct trace_event_file *file, struct trace_seq *s) { struct event_filter *filter = event_filter(file); @@ -780,14 +824,14 @@ static void __free_preds(struct event_filter *filter) filter->n_preds = 0; } -static void filter_disable(struct ftrace_event_file *file) +static void filter_disable(struct trace_event_file *file) { - struct ftrace_event_call *call = file->event_call; + struct trace_event_call *call = file->event_call; if (call->flags & TRACE_EVENT_FL_USE_CALL_FILTER) call->flags &= ~TRACE_EVENT_FL_FILTERED; else - file->flags &= ~FTRACE_EVENT_FL_FILTERED; + file->flags &= ~EVENT_FILE_FL_FILTERED; } static void __free_filter(struct event_filter *filter) @@ -837,9 +881,9 @@ static int __alloc_preds(struct event_filter *filter, int n_preds) return 0; } -static inline void __remove_filter(struct ftrace_event_file *file) +static inline void __remove_filter(struct trace_event_file *file) { - struct ftrace_event_call *call = file->event_call; + struct trace_event_call *call = file->event_call; filter_disable(file); if (call->flags & TRACE_EVENT_FL_USE_CALL_FILTER) @@ -848,10 +892,10 @@ static inline void __remove_filter(struct ftrace_event_file *file) remove_filter_string(file->filter); } -static void filter_free_subsystem_preds(struct ftrace_subsystem_dir *dir, +static void filter_free_subsystem_preds(struct trace_subsystem_dir *dir, struct trace_array *tr) { - struct ftrace_event_file *file; + struct trace_event_file *file; list_for_each_entry(file, &tr->events, list) { if (file->system != dir) @@ -860,9 +904,9 @@ static void filter_free_subsystem_preds(struct ftrace_subsystem_dir *dir, } } -static inline void __free_subsystem_filter(struct ftrace_event_file *file) +static inline void __free_subsystem_filter(struct trace_event_file *file) { - struct ftrace_event_call *call = file->event_call; + struct trace_event_call *call = file->event_call; if (call->flags & TRACE_EVENT_FL_USE_CALL_FILTER) { __free_filter(call->filter); @@ -873,10 +917,10 @@ static inline void __free_subsystem_filter(struct ftrace_event_file *file) } } -static void filter_free_subsystem_filters(struct ftrace_subsystem_dir *dir, +static void filter_free_subsystem_filters(struct trace_subsystem_dir *dir, struct trace_array *tr) { - struct ftrace_event_file *file; + struct trace_event_file *file; list_for_each_entry(file, &tr->events, list) { if (file->system != dir) @@ -929,15 +973,15 @@ static bool is_string_field(struct ftrace_event_field *field) field->filter_type == FILTER_PTR_STRING; } -static int is_legal_op(struct ftrace_event_field *field, int op) +static bool is_legal_op(struct ftrace_event_field *field, int op) { if (is_string_field(field) && (op != OP_EQ && op != OP_NE && op != OP_GLOB)) - return 0; + return false; if (!is_string_field(field) && op == OP_GLOB) - return 0; + return false; - return 1; + return true; } static filter_pred_fn_t select_comparison_fn(int op, int field_size, @@ -999,7 +1043,11 @@ static int init_pred(struct filter_parse_state *ps, return -EINVAL; } - if (is_string_field(field)) { + if (field->filter_type == FILTER_COMM) { + filter_build_regex(pred); + fn = filter_pred_comm; + pred->regex.field_len = TASK_COMM_LEN; + } else if (is_string_field(field)) { filter_build_regex(pred); if (field->filter_type == FILTER_STATIC_STRING) { @@ -1025,7 +1073,10 @@ static int init_pred(struct filter_parse_state *ps, } pred->val = val; - fn = select_comparison_fn(pred->op, field->size, + if (field->filter_type == FILTER_CPU) + fn = filter_pred_cpu; + else + fn = select_comparison_fn(pred->op, field->size, field->is_signed); if (!fn) { parse_error(ps, FILT_ERR_INVALID_OP, 0); @@ -1342,7 +1393,7 @@ parse_operand: } static struct filter_pred *create_pred(struct filter_parse_state *ps, - struct ftrace_event_call *call, + struct trace_event_call *call, int op, char *operand1, char *operand2) { struct ftrace_event_field *field; @@ -1564,7 +1615,7 @@ static int fold_pred_tree(struct event_filter *filter, filter->preds); } -static int replace_preds(struct ftrace_event_call *call, +static int replace_preds(struct trace_event_call *call, struct event_filter *filter, struct filter_parse_state *ps, bool dry_run) @@ -1677,20 +1728,20 @@ fail: return err; } -static inline void event_set_filtered_flag(struct ftrace_event_file *file) +static inline void event_set_filtered_flag(struct trace_event_file *file) { - struct ftrace_event_call *call = file->event_call; + struct trace_event_call *call = file->event_call; if (call->flags & TRACE_EVENT_FL_USE_CALL_FILTER) call->flags |= TRACE_EVENT_FL_FILTERED; else - file->flags |= FTRACE_EVENT_FL_FILTERED; + file->flags |= EVENT_FILE_FL_FILTERED; } -static inline void event_set_filter(struct ftrace_event_file *file, +static inline void event_set_filter(struct trace_event_file *file, struct event_filter *filter) { - struct ftrace_event_call *call = file->event_call; + struct trace_event_call *call = file->event_call; if (call->flags & TRACE_EVENT_FL_USE_CALL_FILTER) rcu_assign_pointer(call->filter, filter); @@ -1698,9 +1749,9 @@ static inline void event_set_filter(struct ftrace_event_file *file, rcu_assign_pointer(file->filter, filter); } -static inline void event_clear_filter(struct ftrace_event_file *file) +static inline void event_clear_filter(struct trace_event_file *file) { - struct ftrace_event_call *call = file->event_call; + struct trace_event_call *call = file->event_call; if (call->flags & TRACE_EVENT_FL_USE_CALL_FILTER) RCU_INIT_POINTER(call->filter, NULL); @@ -1709,33 +1760,33 @@ static inline void event_clear_filter(struct ftrace_event_file *file) } static inline void -event_set_no_set_filter_flag(struct ftrace_event_file *file) +event_set_no_set_filter_flag(struct trace_event_file *file) { - struct ftrace_event_call *call = file->event_call; + struct trace_event_call *call = file->event_call; if (call->flags & TRACE_EVENT_FL_USE_CALL_FILTER) call->flags |= TRACE_EVENT_FL_NO_SET_FILTER; else - file->flags |= FTRACE_EVENT_FL_NO_SET_FILTER; + file->flags |= EVENT_FILE_FL_NO_SET_FILTER; } static inline void -event_clear_no_set_filter_flag(struct ftrace_event_file *file) +event_clear_no_set_filter_flag(struct trace_event_file *file) { - struct ftrace_event_call *call = file->event_call; + struct trace_event_call *call = file->event_call; if (call->flags & TRACE_EVENT_FL_USE_CALL_FILTER) call->flags &= ~TRACE_EVENT_FL_NO_SET_FILTER; else - file->flags &= ~FTRACE_EVENT_FL_NO_SET_FILTER; + file->flags &= ~EVENT_FILE_FL_NO_SET_FILTER; } static inline bool -event_no_set_filter_flag(struct ftrace_event_file *file) +event_no_set_filter_flag(struct trace_event_file *file) { - struct ftrace_event_call *call = file->event_call; + struct trace_event_call *call = file->event_call; - if (file->flags & FTRACE_EVENT_FL_NO_SET_FILTER) + if (file->flags & EVENT_FILE_FL_NO_SET_FILTER) return true; if ((call->flags & TRACE_EVENT_FL_USE_CALL_FILTER) && @@ -1750,12 +1801,12 @@ struct filter_list { struct event_filter *filter; }; -static int replace_system_preds(struct ftrace_subsystem_dir *dir, +static int replace_system_preds(struct trace_subsystem_dir *dir, struct trace_array *tr, struct filter_parse_state *ps, char *filter_string) { - struct ftrace_event_file *file; + struct trace_event_file *file; struct filter_list *filter_item; struct filter_list *tmp; LIST_HEAD(filter_list); @@ -1899,8 +1950,8 @@ static void create_filter_finish(struct filter_parse_state *ps) } /** - * create_filter - create a filter for a ftrace_event_call - * @call: ftrace_event_call to create a filter for + * create_filter - create a filter for a trace_event_call + * @call: trace_event_call to create a filter for * @filter_str: filter string * @set_str: remember @filter_str and enable detailed error in filter * @filterp: out param for created filter (always updated on return) @@ -1914,7 +1965,7 @@ static void create_filter_finish(struct filter_parse_state *ps) * information if @set_str is %true and the caller is responsible for * freeing it. */ -static int create_filter(struct ftrace_event_call *call, +static int create_filter(struct trace_event_call *call, char *filter_str, bool set_str, struct event_filter **filterp) { @@ -1934,7 +1985,7 @@ static int create_filter(struct ftrace_event_call *call, return err; } -int create_event_filter(struct ftrace_event_call *call, +int create_event_filter(struct trace_event_call *call, char *filter_str, bool set_str, struct event_filter **filterp) { @@ -1950,7 +2001,7 @@ int create_event_filter(struct ftrace_event_call *call, * Identical to create_filter() except that it creates a subsystem filter * and always remembers @filter_str. */ -static int create_system_filter(struct ftrace_subsystem_dir *dir, +static int create_system_filter(struct trace_subsystem_dir *dir, struct trace_array *tr, char *filter_str, struct event_filter **filterp) { @@ -1976,9 +2027,9 @@ static int create_system_filter(struct ftrace_subsystem_dir *dir, } /* caller must hold event_mutex */ -int apply_event_filter(struct ftrace_event_file *file, char *filter_string) +int apply_event_filter(struct trace_event_file *file, char *filter_string) { - struct ftrace_event_call *call = file->event_call; + struct trace_event_call *call = file->event_call; struct event_filter *filter; int err; @@ -2027,7 +2078,7 @@ int apply_event_filter(struct ftrace_event_file *file, char *filter_string) return err; } -int apply_subsystem_event_filter(struct ftrace_subsystem_dir *dir, +int apply_subsystem_event_filter(struct trace_subsystem_dir *dir, char *filter_string) { struct event_subsystem *system = dir->subsystem; @@ -2090,7 +2141,7 @@ struct function_filter_data { static char ** ftrace_function_filter_re(char *buf, int len, int *count) { - char *str, *sep, **re; + char *str, **re; str = kstrndup(buf, len, GFP_KERNEL); if (!str) @@ -2100,8 +2151,7 @@ ftrace_function_filter_re(char *buf, int len, int *count) * The argv_split function takes white space * as a separator, so convert ',' into spaces. */ - while ((sep = strchr(str, ','))) - *sep = ' '; + strreplace(str, ',', ' '); re = argv_split(GFP_KERNEL, str, count); kfree(str); @@ -2227,7 +2277,7 @@ int ftrace_profile_set_filter(struct perf_event *event, int event_id, { int err; struct event_filter *filter; - struct ftrace_event_call *call; + struct trace_event_call *call; mutex_lock(&event_mutex); @@ -2283,7 +2333,7 @@ out_unlock: static struct test_filter_data_t { char *filter; - struct ftrace_raw_ftrace_test_filter rec; + struct trace_event_raw_ftrace_test_filter rec; int match; char *not_visited; } test_filter_data[] = { diff --git a/kernel/kernel/trace/trace_events_trigger.c b/kernel/kernel/trace/trace_events_trigger.c index 8712df9de..42a4009fd 100644 --- a/kernel/kernel/trace/trace_events_trigger.c +++ b/kernel/kernel/trace/trace_events_trigger.c @@ -40,7 +40,7 @@ trigger_data_free(struct event_trigger_data *data) /** * event_triggers_call - Call triggers associated with a trace event - * @file: The ftrace_event_file associated with the event + * @file: The trace_event_file associated with the event * @rec: The trace entry for the event, NULL for unconditional invocation * * For each trigger associated with an event, invoke the trigger @@ -63,7 +63,7 @@ trigger_data_free(struct event_trigger_data *data) * any trigger that should be deferred, ETT_NONE if nothing to defer. */ enum event_trigger_type -event_triggers_call(struct ftrace_event_file *file, void *rec) +event_triggers_call(struct trace_event_file *file, void *rec) { struct event_trigger_data *data; enum event_trigger_type tt = ETT_NONE; @@ -92,7 +92,7 @@ EXPORT_SYMBOL_GPL(event_triggers_call); /** * event_triggers_post_call - Call 'post_triggers' for a trace event - * @file: The ftrace_event_file associated with the event + * @file: The trace_event_file associated with the event * @tt: enum event_trigger_type containing a set bit for each trigger to invoke * * For each trigger associated with an event, invoke the trigger @@ -103,7 +103,7 @@ EXPORT_SYMBOL_GPL(event_triggers_call); * Called from tracepoint handlers (with rcu_read_lock_sched() held). */ void -event_triggers_post_call(struct ftrace_event_file *file, +event_triggers_post_call(struct trace_event_file *file, enum event_trigger_type tt) { struct event_trigger_data *data; @@ -119,7 +119,7 @@ EXPORT_SYMBOL_GPL(event_triggers_post_call); static void *trigger_next(struct seq_file *m, void *t, loff_t *pos) { - struct ftrace_event_file *event_file = event_file_data(m->private); + struct trace_event_file *event_file = event_file_data(m->private); if (t == SHOW_AVAILABLE_TRIGGERS) return NULL; @@ -129,7 +129,7 @@ static void *trigger_next(struct seq_file *m, void *t, loff_t *pos) static void *trigger_start(struct seq_file *m, loff_t *pos) { - struct ftrace_event_file *event_file; + struct trace_event_file *event_file; /* ->stop() is called even if ->start() fails */ mutex_lock(&event_mutex); @@ -201,7 +201,7 @@ static int event_trigger_regex_open(struct inode *inode, struct file *file) return ret; } -static int trigger_process_regex(struct ftrace_event_file *file, char *buff) +static int trigger_process_regex(struct trace_event_file *file, char *buff) { char *command, *next = buff; struct event_command *p; @@ -227,7 +227,7 @@ static ssize_t event_trigger_regex_write(struct file *file, const char __user *ubuf, size_t cnt, loff_t *ppos) { - struct ftrace_event_file *event_file; + struct trace_event_file *event_file; ssize_t ret; char *buf; @@ -430,7 +430,7 @@ event_trigger_free(struct event_trigger_ops *ops, trigger_data_free(data); } -static int trace_event_trigger_enable_disable(struct ftrace_event_file *file, +static int trace_event_trigger_enable_disable(struct trace_event_file *file, int trigger_enable) { int ret = 0; @@ -438,12 +438,12 @@ static int trace_event_trigger_enable_disable(struct ftrace_event_file *file, if (trigger_enable) { if (atomic_inc_return(&file->tm_ref) > 1) return ret; - set_bit(FTRACE_EVENT_FL_TRIGGER_MODE_BIT, &file->flags); + set_bit(EVENT_FILE_FL_TRIGGER_MODE_BIT, &file->flags); ret = trace_event_enable_disable(file, 1, 1); } else { if (atomic_dec_return(&file->tm_ref) > 0) return ret; - clear_bit(FTRACE_EVENT_FL_TRIGGER_MODE_BIT, &file->flags); + clear_bit(EVENT_FILE_FL_TRIGGER_MODE_BIT, &file->flags); ret = trace_event_enable_disable(file, 0, 1); } @@ -466,7 +466,7 @@ static int trace_event_trigger_enable_disable(struct ftrace_event_file *file, void clear_event_triggers(struct trace_array *tr) { - struct ftrace_event_file *file; + struct trace_event_file *file; list_for_each_entry(file, &tr->events, list) { struct event_trigger_data *data; @@ -480,7 +480,7 @@ clear_event_triggers(struct trace_array *tr) /** * update_cond_flag - Set or reset the TRIGGER_COND bit - * @file: The ftrace_event_file associated with the event + * @file: The trace_event_file associated with the event * * If an event has triggers and any of those triggers has a filter or * a post_trigger, trigger invocation needs to be deferred until after @@ -488,7 +488,7 @@ clear_event_triggers(struct trace_array *tr) * its TRIGGER_COND bit set, otherwise the TRIGGER_COND bit should be * cleared. */ -static void update_cond_flag(struct ftrace_event_file *file) +static void update_cond_flag(struct trace_event_file *file) { struct event_trigger_data *data; bool set_cond = false; @@ -501,9 +501,9 @@ static void update_cond_flag(struct ftrace_event_file *file) } if (set_cond) - set_bit(FTRACE_EVENT_FL_TRIGGER_COND_BIT, &file->flags); + set_bit(EVENT_FILE_FL_TRIGGER_COND_BIT, &file->flags); else - clear_bit(FTRACE_EVENT_FL_TRIGGER_COND_BIT, &file->flags); + clear_bit(EVENT_FILE_FL_TRIGGER_COND_BIT, &file->flags); } /** @@ -511,7 +511,7 @@ static void update_cond_flag(struct ftrace_event_file *file) * @glob: The raw string used to register the trigger * @ops: The trigger ops associated with the trigger * @data: Trigger-specific data to associate with the trigger - * @file: The ftrace_event_file associated with the event + * @file: The trace_event_file associated with the event * * Common implementation for event trigger registration. * @@ -522,7 +522,7 @@ static void update_cond_flag(struct ftrace_event_file *file) */ static int register_trigger(char *glob, struct event_trigger_ops *ops, struct event_trigger_data *data, - struct ftrace_event_file *file) + struct trace_event_file *file) { struct event_trigger_data *test; int ret = 0; @@ -557,7 +557,7 @@ out: * @glob: The raw string used to register the trigger * @ops: The trigger ops associated with the trigger * @test: Trigger-specific data used to find the trigger to remove - * @file: The ftrace_event_file associated with the event + * @file: The trace_event_file associated with the event * * Common implementation for event trigger unregistration. * @@ -566,7 +566,7 @@ out: */ static void unregister_trigger(char *glob, struct event_trigger_ops *ops, struct event_trigger_data *test, - struct ftrace_event_file *file) + struct trace_event_file *file) { struct event_trigger_data *data; bool unregistered = false; @@ -588,7 +588,7 @@ static void unregister_trigger(char *glob, struct event_trigger_ops *ops, /** * event_trigger_callback - Generic event_command @func implementation * @cmd_ops: The command ops, used for trigger registration - * @file: The ftrace_event_file associated with the event + * @file: The trace_event_file associated with the event * @glob: The raw string used to register the trigger * @cmd: The cmd portion of the string used to register the trigger * @param: The params portion of the string used to register the trigger @@ -603,7 +603,7 @@ static void unregister_trigger(char *glob, struct event_trigger_ops *ops, */ static int event_trigger_callback(struct event_command *cmd_ops, - struct ftrace_event_file *file, + struct trace_event_file *file, char *glob, char *cmd, char *param) { struct event_trigger_data *trigger_data; @@ -688,7 +688,7 @@ event_trigger_callback(struct event_command *cmd_ops, * set_trigger_filter - Generic event_command @set_filter implementation * @filter_str: The filter string for the trigger, NULL to remove filter * @trigger_data: Trigger-specific data - * @file: The ftrace_event_file associated with the event + * @file: The trace_event_file associated with the event * * Common implementation for event command filter parsing and filter * instantiation. @@ -702,7 +702,7 @@ event_trigger_callback(struct event_command *cmd_ops, */ static int set_trigger_filter(char *filter_str, struct event_trigger_data *trigger_data, - struct ftrace_event_file *file) + struct trace_event_file *file) { struct event_trigger_data *data = trigger_data; struct event_filter *filter = NULL, *tmp; @@ -900,7 +900,7 @@ snapshot_count_trigger(struct event_trigger_data *data) static int register_snapshot_trigger(char *glob, struct event_trigger_ops *ops, struct event_trigger_data *data, - struct ftrace_event_file *file) + struct trace_event_file *file) { int ret = register_trigger(glob, ops, data, file); @@ -968,7 +968,7 @@ static __init int register_trigger_snapshot_cmd(void) { return 0; } * Skip 3: * stacktrace_trigger() * event_triggers_post_call() - * ftrace_raw_event_xxx() + * trace_event_raw_event_xxx() */ #define STACK_SKIP 3 @@ -1053,7 +1053,7 @@ static __init void unregister_trigger_traceon_traceoff_cmds(void) #define DISABLE_EVENT_STR "disable_event" struct enable_trigger_data { - struct ftrace_event_file *file; + struct trace_event_file *file; bool enable; }; @@ -1063,9 +1063,9 @@ event_enable_trigger(struct event_trigger_data *data) struct enable_trigger_data *enable_data = data->private_data; if (enable_data->enable) - clear_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &enable_data->file->flags); + clear_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &enable_data->file->flags); else - set_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &enable_data->file->flags); + set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &enable_data->file->flags); } static void @@ -1077,7 +1077,7 @@ event_enable_count_trigger(struct event_trigger_data *data) return; /* Skip if the event is in a state we want to switch to */ - if (enable_data->enable == !(enable_data->file->flags & FTRACE_EVENT_FL_SOFT_DISABLED)) + if (enable_data->enable == !(enable_data->file->flags & EVENT_FILE_FL_SOFT_DISABLED)) return; if (data->count != -1) @@ -1095,7 +1095,7 @@ event_enable_trigger_print(struct seq_file *m, struct event_trigger_ops *ops, seq_printf(m, "%s:%s:%s", enable_data->enable ? ENABLE_EVENT_STR : DISABLE_EVENT_STR, enable_data->file->event_call->class->system, - ftrace_event_name(enable_data->file->event_call)); + trace_event_name(enable_data->file->event_call)); if (data->count == -1) seq_puts(m, ":unlimited"); @@ -1159,10 +1159,10 @@ static struct event_trigger_ops event_disable_count_trigger_ops = { static int event_enable_trigger_func(struct event_command *cmd_ops, - struct ftrace_event_file *file, + struct trace_event_file *file, char *glob, char *cmd, char *param) { - struct ftrace_event_file *event_enable_file; + struct trace_event_file *event_enable_file; struct enable_trigger_data *enable_data; struct event_trigger_data *trigger_data; struct event_trigger_ops *trigger_ops; @@ -1294,7 +1294,7 @@ event_enable_trigger_func(struct event_command *cmd_ops, static int event_enable_register_trigger(char *glob, struct event_trigger_ops *ops, struct event_trigger_data *data, - struct ftrace_event_file *file) + struct trace_event_file *file) { struct enable_trigger_data *enable_data = data->private_data; struct enable_trigger_data *test_enable_data; @@ -1331,7 +1331,7 @@ out: static void event_enable_unregister_trigger(char *glob, struct event_trigger_ops *ops, struct event_trigger_data *test, - struct ftrace_event_file *file) + struct trace_event_file *file) { struct enable_trigger_data *test_enable_data = test->private_data; struct enable_trigger_data *enable_data; diff --git a/kernel/kernel/trace/trace_export.c b/kernel/kernel/trace/trace_export.c index 174a6a711..39aa7aa66 100644 --- a/kernel/kernel/trace/trace_export.c +++ b/kernel/kernel/trace/trace_export.c @@ -125,7 +125,7 @@ static void __always_unused ____ftrace_check_##name(void) \ #undef FTRACE_ENTRY #define FTRACE_ENTRY(name, struct_name, id, tstruct, print, filter) \ static int __init \ -ftrace_define_fields_##name(struct ftrace_event_call *event_call) \ +ftrace_define_fields_##name(struct trace_event_call *event_call) \ { \ struct struct_name field; \ int ret; \ @@ -163,14 +163,14 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call) \ #define FTRACE_ENTRY_REG(call, struct_name, etype, tstruct, print, filter,\ regfn) \ \ -struct ftrace_event_class __refdata event_class_ftrace_##call = { \ +struct trace_event_class __refdata event_class_ftrace_##call = { \ .system = __stringify(TRACE_SYSTEM), \ .define_fields = ftrace_define_fields_##call, \ .fields = LIST_HEAD_INIT(event_class_ftrace_##call.fields),\ .reg = regfn, \ }; \ \ -struct ftrace_event_call __used event_##call = { \ +struct trace_event_call __used event_##call = { \ .class = &event_class_ftrace_##call, \ { \ .name = #call, \ @@ -179,7 +179,7 @@ struct ftrace_event_call __used event_##call = { \ .print_fmt = print, \ .flags = TRACE_EVENT_FL_IGNORE_ENABLE, \ }; \ -struct ftrace_event_call __used \ +struct trace_event_call __used \ __attribute__((section("_ftrace_events"))) *__event_##call = &event_##call; #undef FTRACE_ENTRY @@ -187,7 +187,7 @@ __attribute__((section("_ftrace_events"))) *__event_##call = &event_##call; FTRACE_ENTRY_REG(call, struct_name, etype, \ PARAMS(tstruct), PARAMS(print), filter, NULL) -int ftrace_event_is_function(struct ftrace_event_call *call) +bool ftrace_event_is_function(struct trace_event_call *call) { return call == &event_function; } diff --git a/kernel/kernel/trace/trace_functions_graph.c b/kernel/kernel/trace/trace_functions_graph.c index a51e79688..a663cbb84 100644 --- a/kernel/kernel/trace/trace_functions_graph.c +++ b/kernel/kernel/trace/trace_functions_graph.c @@ -83,13 +83,18 @@ static struct tracer_opt trace_opts[] = { { TRACER_OPT(funcgraph-irqs, TRACE_GRAPH_PRINT_IRQS) }, /* Display function name after trailing } */ { TRACER_OPT(funcgraph-tail, TRACE_GRAPH_PRINT_TAIL) }, + /* Include sleep time (scheduled out) between entry and return */ + { TRACER_OPT(sleep-time, TRACE_GRAPH_SLEEP_TIME) }, + /* Include time within nested functions */ + { TRACER_OPT(graph-time, TRACE_GRAPH_GRAPH_TIME) }, { } /* Empty entry */ }; static struct tracer_flags tracer_flags = { /* Don't display overruns, proc, or tail by default */ .val = TRACE_GRAPH_PRINT_CPU | TRACE_GRAPH_PRINT_OVERHEAD | - TRACE_GRAPH_PRINT_DURATION | TRACE_GRAPH_PRINT_IRQS, + TRACE_GRAPH_PRINT_DURATION | TRACE_GRAPH_PRINT_IRQS | + TRACE_GRAPH_SLEEP_TIME | TRACE_GRAPH_GRAPH_TIME, .opts = trace_opts }; @@ -107,8 +112,8 @@ enum { }; static void -print_graph_duration(unsigned long long duration, struct trace_seq *s, - u32 flags); +print_graph_duration(struct trace_array *tr, unsigned long long duration, + struct trace_seq *s, u32 flags); /* Add a function return address to the trace stack on thread info.*/ int @@ -278,14 +283,11 @@ int __trace_graph_entry(struct trace_array *tr, unsigned long flags, int pc) { - struct ftrace_event_call *call = &event_funcgraph_entry; + struct trace_event_call *call = &event_funcgraph_entry; struct ring_buffer_event *event; struct ring_buffer *buffer = tr->trace_buffer.buffer; struct ftrace_graph_ent_entry *entry; - if (unlikely(__this_cpu_read(ftrace_cpu_disabled))) - return 0; - event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_ENT, sizeof(*entry), flags, pc); if (!event) @@ -393,14 +395,11 @@ void __trace_graph_return(struct trace_array *tr, unsigned long flags, int pc) { - struct ftrace_event_call *call = &event_funcgraph_exit; + struct trace_event_call *call = &event_funcgraph_exit; struct ring_buffer_event *event; struct ring_buffer *buffer = tr->trace_buffer.buffer; struct ftrace_graph_ret_entry *entry; - if (unlikely(__this_cpu_read(ftrace_cpu_disabled))) - return; - event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_RET, sizeof(*entry), flags, pc); if (!event) @@ -653,6 +652,7 @@ static void print_graph_irq(struct trace_iterator *iter, unsigned long addr, enum trace_type type, int cpu, pid_t pid, u32 flags) { + struct trace_array *tr = iter->tr; struct trace_seq *s = &iter->seq; struct trace_entry *ent = iter->ent; @@ -660,7 +660,7 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr, addr >= (unsigned long)__irqentry_text_end) return; - if (trace_flags & TRACE_ITER_CONTEXT_INFO) { + if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) { /* Absolute time */ if (flags & TRACE_GRAPH_PRINT_ABS_TIME) print_graph_abs_time(iter->ts, s); @@ -676,19 +676,19 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr, } /* Latency format */ - if (trace_flags & TRACE_ITER_LATENCY_FMT) + if (tr->trace_flags & TRACE_ITER_LATENCY_FMT) print_graph_lat_fmt(s, ent); } /* No overhead */ - print_graph_duration(0, s, flags | FLAGS_FILL_START); + print_graph_duration(tr, 0, s, flags | FLAGS_FILL_START); if (type == TRACE_GRAPH_ENT) trace_seq_puts(s, "==========>"); else trace_seq_puts(s, "<=========="); - print_graph_duration(0, s, flags | FLAGS_FILL_END); + print_graph_duration(tr, 0, s, flags | FLAGS_FILL_END); trace_seq_putc(s, '\n'); } @@ -715,22 +715,22 @@ trace_print_graph_duration(unsigned long long duration, struct trace_seq *s) snprintf(nsecs_str, slen, "%03lu", nsecs_rem); trace_seq_printf(s, ".%s", nsecs_str); - len += strlen(nsecs_str); + len += strlen(nsecs_str) + 1; } trace_seq_puts(s, " us "); /* Print remaining spaces to fit the row's width */ - for (i = len; i < 7; i++) + for (i = len; i < 8; i++) trace_seq_putc(s, ' '); } static void -print_graph_duration(unsigned long long duration, struct trace_seq *s, - u32 flags) +print_graph_duration(struct trace_array *tr, unsigned long long duration, + struct trace_seq *s, u32 flags) { if (!(flags & TRACE_GRAPH_PRINT_DURATION) || - !(trace_flags & TRACE_ITER_CONTEXT_INFO)) + !(tr->trace_flags & TRACE_ITER_CONTEXT_INFO)) return; /* No real adata, just filling the column with spaces */ @@ -764,6 +764,7 @@ print_graph_entry_leaf(struct trace_iterator *iter, struct trace_seq *s, u32 flags) { struct fgraph_data *data = iter->private; + struct trace_array *tr = iter->tr; struct ftrace_graph_ret *graph_ret; struct ftrace_graph_ent *call; unsigned long long duration; @@ -792,7 +793,7 @@ print_graph_entry_leaf(struct trace_iterator *iter, } /* Overhead and duration */ - print_graph_duration(duration, s, flags); + print_graph_duration(tr, duration, s, flags); /* Function */ for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) @@ -810,6 +811,7 @@ print_graph_entry_nested(struct trace_iterator *iter, { struct ftrace_graph_ent *call = &entry->graph_ent; struct fgraph_data *data = iter->private; + struct trace_array *tr = iter->tr; int i; if (data) { @@ -825,7 +827,7 @@ print_graph_entry_nested(struct trace_iterator *iter, } /* No time */ - print_graph_duration(0, s, flags | FLAGS_FILL_FULL); + print_graph_duration(tr, 0, s, flags | FLAGS_FILL_FULL); /* Function */ for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) @@ -849,6 +851,7 @@ print_graph_prologue(struct trace_iterator *iter, struct trace_seq *s, { struct fgraph_data *data = iter->private; struct trace_entry *ent = iter->ent; + struct trace_array *tr = iter->tr; int cpu = iter->cpu; /* Pid */ @@ -858,7 +861,7 @@ print_graph_prologue(struct trace_iterator *iter, struct trace_seq *s, /* Interrupt */ print_graph_irq(iter, addr, type, cpu, ent->pid, flags); - if (!(trace_flags & TRACE_ITER_CONTEXT_INFO)) + if (!(tr->trace_flags & TRACE_ITER_CONTEXT_INFO)) return; /* Absolute time */ @@ -876,7 +879,7 @@ print_graph_prologue(struct trace_iterator *iter, struct trace_seq *s, } /* Latency format */ - if (trace_flags & TRACE_ITER_LATENCY_FMT) + if (tr->trace_flags & TRACE_ITER_LATENCY_FMT) print_graph_lat_fmt(s, ent); return; @@ -1027,6 +1030,7 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s, { unsigned long long duration = trace->rettime - trace->calltime; struct fgraph_data *data = iter->private; + struct trace_array *tr = iter->tr; pid_t pid = ent->pid; int cpu = iter->cpu; int func_match = 1; @@ -1058,7 +1062,7 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s, print_graph_prologue(iter, s, 0, 0, flags); /* Overhead and duration */ - print_graph_duration(duration, s, flags); + print_graph_duration(tr, duration, s, flags); /* Closing brace */ for (i = 0; i < trace->depth * TRACE_GRAPH_INDENT; i++) @@ -1091,7 +1095,8 @@ static enum print_line_t print_graph_comment(struct trace_seq *s, struct trace_entry *ent, struct trace_iterator *iter, u32 flags) { - unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); + struct trace_array *tr = iter->tr; + unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK); struct fgraph_data *data = iter->private; struct trace_event *event; int depth = 0; @@ -1104,7 +1109,7 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent, print_graph_prologue(iter, s, 0, 0, flags); /* No time */ - print_graph_duration(0, s, flags | FLAGS_FILL_FULL); + print_graph_duration(tr, 0, s, flags | FLAGS_FILL_FULL); /* Indentation */ if (depth > 0) @@ -1245,9 +1250,10 @@ static void print_lat_header(struct seq_file *s, u32 flags) seq_printf(s, "#%.*s||| / \n", size, spaces); } -static void __print_graph_headers_flags(struct seq_file *s, u32 flags) +static void __print_graph_headers_flags(struct trace_array *tr, + struct seq_file *s, u32 flags) { - int lat = trace_flags & TRACE_ITER_LATENCY_FMT; + int lat = tr->trace_flags & TRACE_ITER_LATENCY_FMT; if (lat) print_lat_header(s, flags); @@ -1289,11 +1295,12 @@ static void print_graph_headers(struct seq_file *s) void print_graph_headers_flags(struct seq_file *s, u32 flags) { struct trace_iterator *iter = s->private; + struct trace_array *tr = iter->tr; - if (!(trace_flags & TRACE_ITER_CONTEXT_INFO)) + if (!(tr->trace_flags & TRACE_ITER_CONTEXT_INFO)) return; - if (trace_flags & TRACE_ITER_LATENCY_FMT) { + if (tr->trace_flags & TRACE_ITER_LATENCY_FMT) { /* print nothing if the buffers are empty */ if (trace_empty(iter)) return; @@ -1301,7 +1308,7 @@ void print_graph_headers_flags(struct seq_file *s, u32 flags) print_trace_header(s, iter); } - __print_graph_headers_flags(s, flags); + __print_graph_headers_flags(tr, s, flags); } void graph_trace_open(struct trace_iterator *iter) @@ -1362,6 +1369,12 @@ func_graph_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set) if (bit == TRACE_GRAPH_PRINT_IRQS) ftrace_graph_skip_irqs = !set; + if (bit == TRACE_GRAPH_SLEEP_TIME) + ftrace_graph_sleep_time_control(set); + + if (bit == TRACE_GRAPH_GRAPH_TIME) + ftrace_graph_graph_time_control(set); + return 0; } @@ -1454,12 +1467,12 @@ static __init int init_graph_trace(void) { max_bytes_for_cpu = snprintf(NULL, 0, "%d", nr_cpu_ids - 1); - if (!register_ftrace_event(&graph_trace_entry_event)) { + if (!register_trace_event(&graph_trace_entry_event)) { pr_warning("Warning: could not register graph trace events\n"); return 1; } - if (!register_ftrace_event(&graph_trace_ret_event)) { + if (!register_trace_event(&graph_trace_ret_event)) { pr_warning("Warning: could not register graph trace events\n"); return 1; } diff --git a/kernel/kernel/trace/trace_irqsoff.c b/kernel/kernel/trace/trace_irqsoff.c index d0e1d0e48..069942c22 100644 --- a/kernel/kernel/trace/trace_irqsoff.c +++ b/kernel/kernel/trace/trace_irqsoff.c @@ -32,7 +32,6 @@ enum { static int trace_type __read_mostly; static int save_flags; -static bool function_enabled; static void stop_irqsoff_tracer(struct trace_array *tr, int graph); static int start_irqsoff_tracer(struct trace_array *tr, int graph); @@ -58,22 +57,16 @@ irq_trace(void) # define irq_trace() (0) #endif -#define TRACE_DISPLAY_GRAPH 1 - -static struct tracer_opt trace_opts[] = { #ifdef CONFIG_FUNCTION_GRAPH_TRACER - /* display latency trace as call graph */ - { TRACER_OPT(display-graph, TRACE_DISPLAY_GRAPH) }, +static int irqsoff_display_graph(struct trace_array *tr, int set); +# define is_graph(tr) ((tr)->trace_flags & TRACE_ITER_DISPLAY_GRAPH) +#else +static inline int irqsoff_display_graph(struct trace_array *tr, int set) +{ + return -EINVAL; +} +# define is_graph(tr) false #endif - { } /* Empty entry */ -}; - -static struct tracer_flags tracer_flags = { - .val = 0, - .opts = trace_opts, -}; - -#define is_graph() (tracer_flags.val & TRACE_DISPLAY_GRAPH) /* * Sequence count - we record it when starting a measurement and @@ -153,15 +146,11 @@ irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip, #endif /* CONFIG_FUNCTION_TRACER */ #ifdef CONFIG_FUNCTION_GRAPH_TRACER -static int -irqsoff_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set) +static int irqsoff_display_graph(struct trace_array *tr, int set) { int cpu; - if (!(bit & TRACE_DISPLAY_GRAPH)) - return -EINVAL; - - if (!(is_graph() ^ set)) + if (!(is_graph(tr) ^ set)) return 0; stop_irqsoff_tracer(irqsoff_trace, !set); @@ -210,7 +199,7 @@ static void irqsoff_graph_return(struct ftrace_graph_ret *trace) static void irqsoff_trace_open(struct trace_iterator *iter) { - if (is_graph()) + if (is_graph(iter->tr)) graph_trace_open(iter); } @@ -232,7 +221,7 @@ static enum print_line_t irqsoff_print_line(struct trace_iterator *iter) * In graph mode call the graph tracer output function, * otherwise go with the TRACE_FN event handler */ - if (is_graph()) + if (is_graph(iter->tr)) return print_graph_function_flags(iter, GRAPH_TRACER_FLAGS); return TRACE_TYPE_UNHANDLED; @@ -240,7 +229,9 @@ static enum print_line_t irqsoff_print_line(struct trace_iterator *iter) static void irqsoff_print_header(struct seq_file *s) { - if (is_graph()) + struct trace_array *tr = irqsoff_trace; + + if (is_graph(tr)) print_graph_headers_flags(s, GRAPH_TRACER_FLAGS); else trace_default_header(s); @@ -251,7 +242,7 @@ __trace_function(struct trace_array *tr, unsigned long ip, unsigned long parent_ip, unsigned long flags, int pc) { - if (is_graph()) + if (is_graph(tr)) trace_graph_function(tr, ip, parent_ip, flags, pc); else trace_function(tr, ip, parent_ip, flags, pc); @@ -260,27 +251,23 @@ __trace_function(struct trace_array *tr, #else #define __trace_function trace_function -static int -irqsoff_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set) -{ - return -EINVAL; -} - +#ifdef CONFIG_FUNCTION_TRACER static int irqsoff_graph_entry(struct ftrace_graph_ent *trace) { return -1; } +#endif static enum print_line_t irqsoff_print_line(struct trace_iterator *iter) { return TRACE_TYPE_UNHANDLED; } -static void irqsoff_graph_return(struct ftrace_graph_ret *trace) { } static void irqsoff_trace_open(struct trace_iterator *iter) { } static void irqsoff_trace_close(struct trace_iterator *iter) { } #ifdef CONFIG_FUNCTION_TRACER +static void irqsoff_graph_return(struct ftrace_graph_ret *trace) { } static void irqsoff_print_header(struct seq_file *s) { trace_default_header(s); @@ -296,16 +283,16 @@ static void irqsoff_print_header(struct seq_file *s) /* * Should this new latency be reported/recorded? */ -static int report_latency(struct trace_array *tr, cycle_t delta) +static bool report_latency(struct trace_array *tr, cycle_t delta) { if (tracing_thresh) { if (delta < tracing_thresh) - return 0; + return false; } else { if (delta <= tr->max_latency) - return 0; + return false; } - return 1; + return true; } static void @@ -434,13 +421,13 @@ void start_critical_timings(void) { if (preempt_trace() || irq_trace()) start_critical_timing(CALLER_ADDR0, CALLER_ADDR1); - trace_preemptirqsoff_hist(TRACE_START, 1); + trace_preemptirqsoff_hist_rcuidle(TRACE_START, 1); } EXPORT_SYMBOL_GPL(start_critical_timings); void stop_critical_timings(void) { - trace_preemptirqsoff_hist(TRACE_STOP, 0); + trace_preemptirqsoff_hist_rcuidle(TRACE_STOP, 0); if (preempt_trace() || irq_trace()) stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1); } @@ -534,12 +521,15 @@ void trace_preempt_off(unsigned long a0, unsigned long a1) } #endif /* CONFIG_PREEMPT_TRACER */ +#ifdef CONFIG_FUNCTION_TRACER +static bool function_enabled; + static int register_irqsoff_function(struct trace_array *tr, int graph, int set) { int ret; /* 'set' is set if TRACE_ITER_FUNCTION is about to be set */ - if (function_enabled || (!set && !(trace_flags & TRACE_ITER_FUNCTION))) + if (function_enabled || (!set && !(tr->trace_flags & TRACE_ITER_FUNCTION))) return 0; if (graph) @@ -567,20 +557,40 @@ static void unregister_irqsoff_function(struct trace_array *tr, int graph) function_enabled = false; } -static void irqsoff_function_set(struct trace_array *tr, int set) +static int irqsoff_function_set(struct trace_array *tr, u32 mask, int set) { + if (!(mask & TRACE_ITER_FUNCTION)) + return 0; + if (set) - register_irqsoff_function(tr, is_graph(), 1); + register_irqsoff_function(tr, is_graph(tr), 1); else - unregister_irqsoff_function(tr, is_graph()); + unregister_irqsoff_function(tr, is_graph(tr)); + return 1; +} +#else +static int register_irqsoff_function(struct trace_array *tr, int graph, int set) +{ + return 0; +} +static void unregister_irqsoff_function(struct trace_array *tr, int graph) { } +static inline int irqsoff_function_set(struct trace_array *tr, u32 mask, int set) +{ + return 0; } +#endif /* CONFIG_FUNCTION_TRACER */ static int irqsoff_flag_changed(struct trace_array *tr, u32 mask, int set) { struct tracer *tracer = tr->current_trace; - if (mask & TRACE_ITER_FUNCTION) - irqsoff_function_set(tr, set); + if (irqsoff_function_set(tr, mask, set)) + return 0; + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + if (mask & TRACE_ITER_DISPLAY_GRAPH) + return irqsoff_display_graph(tr, set); +#endif return trace_keep_overwrite(tracer, mask, set); } @@ -613,7 +623,7 @@ static int __irqsoff_tracer_init(struct trace_array *tr) if (irqsoff_busy) return -EBUSY; - save_flags = trace_flags; + save_flags = tr->trace_flags; /* non overwrite screws up the latency tracers */ set_tracer_flag(tr, TRACE_ITER_OVERWRITE, 1); @@ -629,7 +639,7 @@ static int __irqsoff_tracer_init(struct trace_array *tr) /* Only toplevel instance supports graph tracing */ if (start_irqsoff_tracer(tr, (tr->flags & TRACE_ARRAY_FL_GLOBAL && - is_graph()))) + is_graph(tr)))) printk(KERN_ERR "failed to start irqsoff tracer\n"); irqsoff_busy = true; @@ -641,7 +651,7 @@ static void irqsoff_tracer_reset(struct trace_array *tr) int lat_flag = save_flags & TRACE_ITER_LATENCY_FMT; int overwrite_flag = save_flags & TRACE_ITER_OVERWRITE; - stop_irqsoff_tracer(tr, is_graph()); + stop_irqsoff_tracer(tr, is_graph(tr)); set_tracer_flag(tr, TRACE_ITER_LATENCY_FMT, lat_flag); set_tracer_flag(tr, TRACE_ITER_OVERWRITE, overwrite_flag); @@ -677,8 +687,6 @@ static struct tracer irqsoff_tracer __read_mostly = .print_max = true, .print_header = irqsoff_print_header, .print_line = irqsoff_print_line, - .flags = &tracer_flags, - .set_flag = irqsoff_set_flag, .flag_changed = irqsoff_flag_changed, #ifdef CONFIG_FTRACE_SELFTEST .selftest = trace_selftest_startup_irqsoff, @@ -711,8 +719,6 @@ static struct tracer preemptoff_tracer __read_mostly = .print_max = true, .print_header = irqsoff_print_header, .print_line = irqsoff_print_line, - .flags = &tracer_flags, - .set_flag = irqsoff_set_flag, .flag_changed = irqsoff_flag_changed, #ifdef CONFIG_FTRACE_SELFTEST .selftest = trace_selftest_startup_preemptoff, @@ -747,8 +753,6 @@ static struct tracer preemptirqsoff_tracer __read_mostly = .print_max = true, .print_header = irqsoff_print_header, .print_line = irqsoff_print_line, - .flags = &tracer_flags, - .set_flag = irqsoff_set_flag, .flag_changed = irqsoff_flag_changed, #ifdef CONFIG_FTRACE_SELFTEST .selftest = trace_selftest_startup_preemptirqsoff, diff --git a/kernel/kernel/trace/trace_kdb.c b/kernel/kernel/trace/trace_kdb.c index 3ccf5c2c1..57149bce6 100644 --- a/kernel/kernel/trace/trace_kdb.c +++ b/kernel/kernel/trace/trace_kdb.c @@ -21,20 +21,22 @@ static void ftrace_dump_buf(int skip_lines, long cpu_file) /* use static because iter can be a bit big for the stack */ static struct trace_iterator iter; static struct ring_buffer_iter *buffer_iter[CONFIG_NR_CPUS]; + struct trace_array *tr; unsigned int old_userobj; int cnt = 0, cpu; trace_init_global_iter(&iter); iter.buffer_iter = buffer_iter; + tr = iter.tr; for_each_tracing_cpu(cpu) { atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled); } - old_userobj = trace_flags; + old_userobj = tr->trace_flags; /* don't look at user memory in panic mode */ - trace_flags &= ~TRACE_ITER_SYM_USEROBJ; + tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ; kdb_printf("Dumping ftrace buffer:\n"); @@ -82,7 +84,7 @@ static void ftrace_dump_buf(int skip_lines, long cpu_file) kdb_printf("---------------------------------\n"); out: - trace_flags = old_userobj; + tr->trace_flags = old_userobj; for_each_tracing_cpu(cpu) { atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled); diff --git a/kernel/kernel/trace/trace_kprobe.c b/kernel/kernel/trace/trace_kprobe.c index d0ce590f0..c9956440d 100644 --- a/kernel/kernel/trace/trace_kprobe.c +++ b/kernel/kernel/trace/trace_kprobe.c @@ -165,11 +165,9 @@ DEFINE_BASIC_FETCH_FUNCS(memory) static void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs, void *addr, void *dest) { - long ret; int maxlen = get_rloc_len(*(u32 *)dest); u8 *dst = get_rloc_data(dest); - u8 *src = addr; - mm_segment_t old_fs = get_fs(); + long ret; if (!maxlen) return; @@ -178,23 +176,13 @@ static void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs, * Try to get string again, since the string can be changed while * probing. */ - set_fs(KERNEL_DS); - pagefault_disable(); - - do - ret = __copy_from_user_inatomic(dst++, src++, 1); - while (dst[-1] && ret == 0 && src - (u8 *)addr < maxlen); - - dst[-1] = '\0'; - pagefault_enable(); - set_fs(old_fs); + ret = strncpy_from_unsafe(dst, addr, maxlen); if (ret < 0) { /* Failed to fetch string */ - ((u8 *)get_rloc_data(dest))[0] = '\0'; + dst[0] = '\0'; *(u32 *)dest = make_data_rloc(0, get_rloc_offs(*(u32 *)dest)); } else { - *(u32 *)dest = make_data_rloc(src - (u8 *)addr, - get_rloc_offs(*(u32 *)dest)); + *(u32 *)dest = make_data_rloc(ret, get_rloc_offs(*(u32 *)dest)); } } NOKPROBE_SYMBOL(FETCH_FUNC_NAME(memory, string)); @@ -348,7 +336,7 @@ static struct trace_kprobe *find_trace_kprobe(const char *event, struct trace_kprobe *tk; list_for_each_entry(tk, &probe_list, list) - if (strcmp(ftrace_event_name(&tk->tp.call), event) == 0 && + if (strcmp(trace_event_name(&tk->tp.call), event) == 0 && strcmp(tk->tp.call.class->system, group) == 0) return tk; return NULL; @@ -359,7 +347,7 @@ static struct trace_kprobe *find_trace_kprobe(const char *event, * if the file is NULL, enable "perf" handler, or enable "trace" handler. */ static int -enable_trace_kprobe(struct trace_kprobe *tk, struct ftrace_event_file *file) +enable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file) { int ret = 0; @@ -394,7 +382,7 @@ enable_trace_kprobe(struct trace_kprobe *tk, struct ftrace_event_file *file) * if the file is NULL, disable "perf" handler, or disable "trace" handler. */ static int -disable_trace_kprobe(struct trace_kprobe *tk, struct ftrace_event_file *file) +disable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file) { struct event_file_link *link = NULL; int wait = 0; @@ -523,7 +511,7 @@ static int register_trace_kprobe(struct trace_kprobe *tk) mutex_lock(&probe_lock); /* Delete old (same name) event if exist */ - old_tk = find_trace_kprobe(ftrace_event_name(&tk->tp.call), + old_tk = find_trace_kprobe(trace_event_name(&tk->tp.call), tk->tp.call.class->system); if (old_tk) { ret = unregister_trace_kprobe(old_tk); @@ -572,7 +560,7 @@ static int trace_kprobe_module_callback(struct notifier_block *nb, if (ret) pr_warning("Failed to re-register probe %s on" "%s: %d\n", - ftrace_event_name(&tk->tp.call), + trace_event_name(&tk->tp.call), mod->name, ret); } } @@ -829,7 +817,7 @@ static int probes_seq_show(struct seq_file *m, void *v) seq_putc(m, trace_kprobe_is_return(tk) ? 'r' : 'p'); seq_printf(m, ":%s/%s", tk->tp.call.class->system, - ftrace_event_name(&tk->tp.call)); + trace_event_name(&tk->tp.call)); if (!tk->symbol) seq_printf(m, " 0x%p", tk->rp.kp.addr); @@ -888,7 +876,7 @@ static int probes_profile_seq_show(struct seq_file *m, void *v) struct trace_kprobe *tk = v; seq_printf(m, " %-44s %15lu %15lu\n", - ftrace_event_name(&tk->tp.call), tk->nhit, + trace_event_name(&tk->tp.call), tk->nhit, tk->rp.kp.nmissed); return 0; @@ -917,18 +905,18 @@ static const struct file_operations kprobe_profile_ops = { /* Kprobe handler */ static nokprobe_inline void __kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs, - struct ftrace_event_file *ftrace_file) + struct trace_event_file *trace_file) { struct kprobe_trace_entry_head *entry; struct ring_buffer_event *event; struct ring_buffer *buffer; int size, dsize, pc; unsigned long irq_flags; - struct ftrace_event_call *call = &tk->tp.call; + struct trace_event_call *call = &tk->tp.call; - WARN_ON(call != ftrace_file->event_call); + WARN_ON(call != trace_file->event_call); - if (ftrace_trigger_soft_disabled(ftrace_file)) + if (trace_trigger_soft_disabled(trace_file)) return; local_save_flags(irq_flags); @@ -937,7 +925,7 @@ __kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs, dsize = __get_data_size(&tk->tp, regs); size = sizeof(*entry) + tk->tp.size + dsize; - event = trace_event_buffer_lock_reserve(&buffer, ftrace_file, + event = trace_event_buffer_lock_reserve(&buffer, trace_file, call->event.type, size, irq_flags, pc); if (!event) @@ -947,7 +935,7 @@ __kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs, entry->ip = (unsigned long)tk->rp.kp.addr; store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize); - event_trigger_unlock_commit_regs(ftrace_file, buffer, event, + event_trigger_unlock_commit_regs(trace_file, buffer, event, entry, irq_flags, pc, regs); } @@ -965,18 +953,18 @@ NOKPROBE_SYMBOL(kprobe_trace_func); static nokprobe_inline void __kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri, struct pt_regs *regs, - struct ftrace_event_file *ftrace_file) + struct trace_event_file *trace_file) { struct kretprobe_trace_entry_head *entry; struct ring_buffer_event *event; struct ring_buffer *buffer; int size, pc, dsize; unsigned long irq_flags; - struct ftrace_event_call *call = &tk->tp.call; + struct trace_event_call *call = &tk->tp.call; - WARN_ON(call != ftrace_file->event_call); + WARN_ON(call != trace_file->event_call); - if (ftrace_trigger_soft_disabled(ftrace_file)) + if (trace_trigger_soft_disabled(trace_file)) return; local_save_flags(irq_flags); @@ -985,7 +973,7 @@ __kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri, dsize = __get_data_size(&tk->tp, regs); size = sizeof(*entry) + tk->tp.size + dsize; - event = trace_event_buffer_lock_reserve(&buffer, ftrace_file, + event = trace_event_buffer_lock_reserve(&buffer, trace_file, call->event.type, size, irq_flags, pc); if (!event) @@ -996,7 +984,7 @@ __kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri, entry->ret_ip = (unsigned long)ri->ret_addr; store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize); - event_trigger_unlock_commit_regs(ftrace_file, buffer, event, + event_trigger_unlock_commit_regs(trace_file, buffer, event, entry, irq_flags, pc, regs); } @@ -1025,7 +1013,7 @@ print_kprobe_event(struct trace_iterator *iter, int flags, field = (struct kprobe_trace_entry_head *)iter->ent; tp = container_of(event, struct trace_probe, call.event); - trace_seq_printf(s, "%s: (", ftrace_event_name(&tp->call)); + trace_seq_printf(s, "%s: (", trace_event_name(&tp->call)); if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET)) goto out; @@ -1056,7 +1044,7 @@ print_kretprobe_event(struct trace_iterator *iter, int flags, field = (struct kretprobe_trace_entry_head *)iter->ent; tp = container_of(event, struct trace_probe, call.event); - trace_seq_printf(s, "%s: (", ftrace_event_name(&tp->call)); + trace_seq_printf(s, "%s: (", trace_event_name(&tp->call)); if (!seq_print_ip_sym(s, field->ret_ip, flags | TRACE_ITER_SYM_OFFSET)) goto out; @@ -1081,7 +1069,7 @@ print_kretprobe_event(struct trace_iterator *iter, int flags, } -static int kprobe_event_define_fields(struct ftrace_event_call *event_call) +static int kprobe_event_define_fields(struct trace_event_call *event_call) { int ret, i; struct kprobe_trace_entry_head field; @@ -1104,7 +1092,7 @@ static int kprobe_event_define_fields(struct ftrace_event_call *event_call) return 0; } -static int kretprobe_event_define_fields(struct ftrace_event_call *event_call) +static int kretprobe_event_define_fields(struct trace_event_call *event_call) { int ret, i; struct kretprobe_trace_entry_head field; @@ -1134,7 +1122,7 @@ static int kretprobe_event_define_fields(struct ftrace_event_call *event_call) static void kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs) { - struct ftrace_event_call *call = &tk->tp.call; + struct trace_event_call *call = &tk->tp.call; struct bpf_prog *prog = call->prog; struct kprobe_trace_entry_head *entry; struct hlist_head *head; @@ -1169,7 +1157,7 @@ static void kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri, struct pt_regs *regs) { - struct ftrace_event_call *call = &tk->tp.call; + struct trace_event_call *call = &tk->tp.call; struct bpf_prog *prog = call->prog; struct kretprobe_trace_entry_head *entry; struct hlist_head *head; @@ -1206,11 +1194,11 @@ NOKPROBE_SYMBOL(kretprobe_perf_func); * kprobe_trace_self_tests_init() does enable_trace_probe/disable_trace_probe * lockless, but we can't race with this __init function. */ -static int kprobe_register(struct ftrace_event_call *event, +static int kprobe_register(struct trace_event_call *event, enum trace_reg type, void *data) { struct trace_kprobe *tk = (struct trace_kprobe *)event->data; - struct ftrace_event_file *file = data; + struct trace_event_file *file = data; switch (type) { case TRACE_REG_REGISTER: @@ -1276,10 +1264,10 @@ static struct trace_event_functions kprobe_funcs = { static int register_kprobe_event(struct trace_kprobe *tk) { - struct ftrace_event_call *call = &tk->tp.call; + struct trace_event_call *call = &tk->tp.call; int ret; - /* Initialize ftrace_event_call */ + /* Initialize trace_event_call */ INIT_LIST_HEAD(&call->class->fields); if (trace_kprobe_is_return(tk)) { call->event.funcs = &kretprobe_funcs; @@ -1290,7 +1278,7 @@ static int register_kprobe_event(struct trace_kprobe *tk) } if (set_print_fmt(&tk->tp, trace_kprobe_is_return(tk)) < 0) return -ENOMEM; - ret = register_ftrace_event(&call->event); + ret = register_trace_event(&call->event); if (!ret) { kfree(call->print_fmt); return -ENODEV; @@ -1301,9 +1289,9 @@ static int register_kprobe_event(struct trace_kprobe *tk) ret = trace_add_event_call(call); if (ret) { pr_info("Failed to register kprobe event: %s\n", - ftrace_event_name(call)); + trace_event_name(call)); kfree(call->print_fmt); - unregister_ftrace_event(&call->event); + unregister_trace_event(&call->event); } return ret; } @@ -1364,10 +1352,10 @@ static __used int kprobe_trace_selftest_target(int a1, int a2, int a3, return a1 + a2 + a3 + a4 + a5 + a6; } -static struct ftrace_event_file * +static struct trace_event_file * find_trace_probe_file(struct trace_kprobe *tk, struct trace_array *tr) { - struct ftrace_event_file *file; + struct trace_event_file *file; list_for_each_entry(file, &tr->events, list) if (file->event_call == &tk->tp.call) @@ -1385,7 +1373,7 @@ static __init int kprobe_trace_self_tests_init(void) int ret, warn = 0; int (*target)(int, int, int, int, int, int); struct trace_kprobe *tk; - struct ftrace_event_file *file; + struct trace_event_file *file; if (tracing_is_disabled()) return -ENODEV; diff --git a/kernel/kernel/trace/trace_mmiotrace.c b/kernel/kernel/trace/trace_mmiotrace.c index 7a9ba62e9..2be8c4f24 100644 --- a/kernel/kernel/trace/trace_mmiotrace.c +++ b/kernel/kernel/trace/trace_mmiotrace.c @@ -298,7 +298,7 @@ static void __trace_mmiotrace_rw(struct trace_array *tr, struct trace_array_cpu *data, struct mmiotrace_rw *rw) { - struct ftrace_event_call *call = &event_mmiotrace_rw; + struct trace_event_call *call = &event_mmiotrace_rw; struct ring_buffer *buffer = tr->trace_buffer.buffer; struct ring_buffer_event *event; struct trace_mmiotrace_rw *entry; @@ -314,7 +314,7 @@ static void __trace_mmiotrace_rw(struct trace_array *tr, entry->rw = *rw; if (!call_filter_check_discard(call, entry, buffer, event)) - trace_buffer_unlock_commit(buffer, event, 0, pc); + trace_buffer_unlock_commit(tr, buffer, event, 0, pc); } void mmio_trace_rw(struct mmiotrace_rw *rw) @@ -328,7 +328,7 @@ static void __trace_mmiotrace_map(struct trace_array *tr, struct trace_array_cpu *data, struct mmiotrace_map *map) { - struct ftrace_event_call *call = &event_mmiotrace_map; + struct trace_event_call *call = &event_mmiotrace_map; struct ring_buffer *buffer = tr->trace_buffer.buffer; struct ring_buffer_event *event; struct trace_mmiotrace_map *entry; @@ -344,7 +344,7 @@ static void __trace_mmiotrace_map(struct trace_array *tr, entry->map = *map; if (!call_filter_check_discard(call, entry, buffer, event)) - trace_buffer_unlock_commit(buffer, event, 0, pc); + trace_buffer_unlock_commit(tr, buffer, event, 0, pc); } void mmio_trace_mapping(struct mmiotrace_map *map) diff --git a/kernel/kernel/trace/trace_output.c b/kernel/kernel/trace/trace_output.c index c86bed272..9f19d839a 100644 --- a/kernel/kernel/trace/trace_output.c +++ b/kernel/kernel/trace/trace_output.c @@ -60,9 +60,9 @@ enum print_line_t trace_print_printk_msg_only(struct trace_iterator *iter) } const char * -ftrace_print_flags_seq(struct trace_seq *p, const char *delim, - unsigned long flags, - const struct trace_print_flags *flag_array) +trace_print_flags_seq(struct trace_seq *p, const char *delim, + unsigned long flags, + const struct trace_print_flags *flag_array) { unsigned long mask; const char *str; @@ -95,11 +95,11 @@ ftrace_print_flags_seq(struct trace_seq *p, const char *delim, return ret; } -EXPORT_SYMBOL(ftrace_print_flags_seq); +EXPORT_SYMBOL(trace_print_flags_seq); const char * -ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val, - const struct trace_print_flags *symbol_array) +trace_print_symbols_seq(struct trace_seq *p, unsigned long val, + const struct trace_print_flags *symbol_array) { int i; const char *ret = trace_seq_buffer_ptr(p); @@ -120,11 +120,11 @@ ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val, return ret; } -EXPORT_SYMBOL(ftrace_print_symbols_seq); +EXPORT_SYMBOL(trace_print_symbols_seq); #if BITS_PER_LONG == 32 const char * -ftrace_print_symbols_seq_u64(struct trace_seq *p, unsigned long long val, +trace_print_symbols_seq_u64(struct trace_seq *p, unsigned long long val, const struct trace_print_flags_u64 *symbol_array) { int i; @@ -146,12 +146,12 @@ ftrace_print_symbols_seq_u64(struct trace_seq *p, unsigned long long val, return ret; } -EXPORT_SYMBOL(ftrace_print_symbols_seq_u64); +EXPORT_SYMBOL(trace_print_symbols_seq_u64); #endif const char * -ftrace_print_bitmask_seq(struct trace_seq *p, void *bitmask_ptr, - unsigned int bitmask_size) +trace_print_bitmask_seq(struct trace_seq *p, void *bitmask_ptr, + unsigned int bitmask_size) { const char *ret = trace_seq_buffer_ptr(p); @@ -160,10 +160,10 @@ ftrace_print_bitmask_seq(struct trace_seq *p, void *bitmask_ptr, return ret; } -EXPORT_SYMBOL_GPL(ftrace_print_bitmask_seq); +EXPORT_SYMBOL_GPL(trace_print_bitmask_seq); const char * -ftrace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int buf_len) +trace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int buf_len) { int i; const char *ret = trace_seq_buffer_ptr(p); @@ -175,11 +175,11 @@ ftrace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int buf_len) return ret; } -EXPORT_SYMBOL(ftrace_print_hex_seq); +EXPORT_SYMBOL(trace_print_hex_seq); const char * -ftrace_print_array_seq(struct trace_seq *p, const void *buf, int count, - size_t el_size) +trace_print_array_seq(struct trace_seq *p, const void *buf, int count, + size_t el_size) { const char *ret = trace_seq_buffer_ptr(p); const char *prefix = ""; @@ -220,17 +220,17 @@ ftrace_print_array_seq(struct trace_seq *p, const void *buf, int count, return ret; } -EXPORT_SYMBOL(ftrace_print_array_seq); +EXPORT_SYMBOL(trace_print_array_seq); -int ftrace_raw_output_prep(struct trace_iterator *iter, - struct trace_event *trace_event) +int trace_raw_output_prep(struct trace_iterator *iter, + struct trace_event *trace_event) { - struct ftrace_event_call *event; + struct trace_event_call *event; struct trace_seq *s = &iter->seq; struct trace_seq *p = &iter->tmp_seq; struct trace_entry *entry; - event = container_of(trace_event, struct ftrace_event_call, event); + event = container_of(trace_event, struct trace_event_call, event); entry = iter->ent; if (entry->type != event->event.type) { @@ -239,14 +239,14 @@ int ftrace_raw_output_prep(struct trace_iterator *iter, } trace_seq_init(p); - trace_seq_printf(s, "%s: ", ftrace_event_name(event)); + trace_seq_printf(s, "%s: ", trace_event_name(event)); return trace_handle_return(s); } -EXPORT_SYMBOL(ftrace_raw_output_prep); +EXPORT_SYMBOL(trace_raw_output_prep); -static int ftrace_output_raw(struct trace_iterator *iter, char *name, - char *fmt, va_list ap) +static int trace_output_raw(struct trace_iterator *iter, char *name, + char *fmt, va_list ap) { struct trace_seq *s = &iter->seq; @@ -256,18 +256,18 @@ static int ftrace_output_raw(struct trace_iterator *iter, char *name, return trace_handle_return(s); } -int ftrace_output_call(struct trace_iterator *iter, char *name, char *fmt, ...) +int trace_output_call(struct trace_iterator *iter, char *name, char *fmt, ...) { va_list ap; int ret; va_start(ap, fmt); - ret = ftrace_output_raw(iter, name, fmt, ap); + ret = trace_output_raw(iter, name, fmt, ap); va_end(ap); return ret; } -EXPORT_SYMBOL_GPL(ftrace_output_call); +EXPORT_SYMBOL_GPL(trace_output_call); #ifdef CONFIG_KRETPROBES static inline const char *kretprobed(const char *name) @@ -322,8 +322,8 @@ seq_print_sym_offset(struct trace_seq *s, const char *fmt, # define IP_FMT "%016lx" #endif -int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm, - unsigned long ip, unsigned long sym_flags) +static int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm, + unsigned long ip, unsigned long sym_flags) { struct file *file = NULL; unsigned long vmstart = 0; @@ -355,50 +355,6 @@ int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm, } int -seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s, - unsigned long sym_flags) -{ - struct mm_struct *mm = NULL; - unsigned int i; - - if (trace_flags & TRACE_ITER_SYM_USEROBJ) { - struct task_struct *task; - /* - * we do the lookup on the thread group leader, - * since individual threads might have already quit! - */ - rcu_read_lock(); - task = find_task_by_vpid(entry->tgid); - if (task) - mm = get_task_mm(task); - rcu_read_unlock(); - } - - for (i = 0; i < FTRACE_STACK_ENTRIES; i++) { - unsigned long ip = entry->caller[i]; - - if (ip == ULONG_MAX || trace_seq_has_overflowed(s)) - break; - - trace_seq_puts(s, " => "); - - if (!ip) { - trace_seq_puts(s, "??"); - trace_seq_putc(s, '\n'); - continue; - } - - seq_print_user_ip(s, mm, ip, sym_flags); - trace_seq_putc(s, '\n'); - } - - if (mm) - mmput(mm); - - return !trace_seq_has_overflowed(s); -} - -int seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags) { if (!ip) { @@ -510,6 +466,8 @@ static const struct trace_mark { char sym; } mark[] = { MARK(1000000000ULL , '$'), /* 1 sec */ + MARK(100000000ULL , '@'), /* 100 msec */ + MARK(10000000ULL , '*'), /* 10 msec */ MARK(1000000ULL , '#'), /* 1000 usecs */ MARK(100000ULL , '!'), /* 100 usecs */ MARK(10000ULL , '+'), /* 10 usecs */ @@ -522,7 +480,7 @@ char trace_find_mark(unsigned long long d) int size = ARRAY_SIZE(mark); for (i = 0; i < size; i++) { - if (d >= mark[i].val) + if (d > mark[i].val) break; } @@ -532,7 +490,8 @@ char trace_find_mark(unsigned long long d) static int lat_print_timestamp(struct trace_iterator *iter, u64 next_ts) { - unsigned long verbose = trace_flags & TRACE_ITER_VERBOSE; + struct trace_array *tr = iter->tr; + unsigned long verbose = tr->trace_flags & TRACE_ITER_VERBOSE; unsigned long in_ns = iter->iter_flags & TRACE_FILE_TIME_IN_NS; unsigned long long abs_ts = iter->ts - iter->trace_buffer->time_start; unsigned long long rel_ts = next_ts - iter->ts; @@ -575,6 +534,7 @@ lat_print_timestamp(struct trace_iterator *iter, u64 next_ts) int trace_print_context(struct trace_iterator *iter) { + struct trace_array *tr = iter->tr; struct trace_seq *s = &iter->seq; struct trace_entry *entry = iter->ent; unsigned long long t; @@ -586,7 +546,7 @@ int trace_print_context(struct trace_iterator *iter) trace_seq_printf(s, "%16s-%-5d [%03d] ", comm, entry->pid, iter->cpu); - if (trace_flags & TRACE_ITER_IRQ_INFO) + if (tr->trace_flags & TRACE_ITER_IRQ_INFO) trace_print_lat_fmt(s, entry); if (iter->iter_flags & TRACE_FILE_TIME_IN_NS) { @@ -602,14 +562,15 @@ int trace_print_context(struct trace_iterator *iter) int trace_print_lat_context(struct trace_iterator *iter) { - u64 next_ts; + struct trace_array *tr = iter->tr; /* trace_find_next_entry will reset ent_size */ int ent_size = iter->ent_size; struct trace_seq *s = &iter->seq; + u64 next_ts; struct trace_entry *entry = iter->ent, *next_entry = trace_find_next_entry(iter, NULL, &next_ts); - unsigned long verbose = (trace_flags & TRACE_ITER_VERBOSE); + unsigned long verbose = (tr->trace_flags & TRACE_ITER_VERBOSE); /* Restore the original ent_size */ iter->ent_size = ent_size; @@ -689,7 +650,7 @@ static int trace_search_list(struct list_head **list) } /* Did we used up all 65 thousand events??? */ - if ((last + 1) > FTRACE_MAX_EVENT) + if ((last + 1) > TRACE_EVENT_TYPE_MAX) return 0; *list = &e->list; @@ -707,7 +668,7 @@ void trace_event_read_unlock(void) } /** - * register_ftrace_event - register output for an event type + * register_trace_event - register output for an event type * @event: the event type to register * * Event types are stored in a hash and this hash is used to @@ -721,7 +682,7 @@ void trace_event_read_unlock(void) * * Returns the event type number or zero on error. */ -int register_ftrace_event(struct trace_event *event) +int register_trace_event(struct trace_event *event) { unsigned key; int ret = 0; @@ -739,7 +700,7 @@ int register_ftrace_event(struct trace_event *event) if (!event->type) { struct list_head *list = NULL; - if (next_event_type > FTRACE_MAX_EVENT) { + if (next_event_type > TRACE_EVENT_TYPE_MAX) { event->type = trace_search_list(&list); if (!event->type) @@ -785,12 +746,12 @@ int register_ftrace_event(struct trace_event *event) return ret; } -EXPORT_SYMBOL_GPL(register_ftrace_event); +EXPORT_SYMBOL_GPL(register_trace_event); /* * Used by module code with the trace_event_sem held for write. */ -int __unregister_ftrace_event(struct trace_event *event) +int __unregister_trace_event(struct trace_event *event) { hlist_del(&event->node); list_del(&event->list); @@ -798,18 +759,18 @@ int __unregister_ftrace_event(struct trace_event *event) } /** - * unregister_ftrace_event - remove a no longer used event + * unregister_trace_event - remove a no longer used event * @event: the event to remove */ -int unregister_ftrace_event(struct trace_event *event) +int unregister_trace_event(struct trace_event *event) { down_write(&trace_event_sem); - __unregister_ftrace_event(event); + __unregister_trace_event(event); up_write(&trace_event_sem); return 0; } -EXPORT_SYMBOL_GPL(unregister_ftrace_event); +EXPORT_SYMBOL_GPL(unregister_trace_event); /* * Standard events @@ -1091,13 +1052,49 @@ static struct trace_event trace_stack_event = { static enum print_line_t trace_user_stack_print(struct trace_iterator *iter, int flags, struct trace_event *event) { + struct trace_array *tr = iter->tr; struct userstack_entry *field; struct trace_seq *s = &iter->seq; + struct mm_struct *mm = NULL; + unsigned int i; trace_assign_type(field, iter->ent); trace_seq_puts(s, "<user stack trace>\n"); - seq_print_userip_objs(field, s, flags); + + if (tr->trace_flags & TRACE_ITER_SYM_USEROBJ) { + struct task_struct *task; + /* + * we do the lookup on the thread group leader, + * since individual threads might have already quit! + */ + rcu_read_lock(); + task = find_task_by_vpid(field->tgid); + if (task) + mm = get_task_mm(task); + rcu_read_unlock(); + } + + for (i = 0; i < FTRACE_STACK_ENTRIES; i++) { + unsigned long ip = field->caller[i]; + + if (ip == ULONG_MAX || trace_seq_has_overflowed(s)) + break; + + trace_seq_puts(s, " => "); + + if (!ip) { + trace_seq_puts(s, "??"); + trace_seq_putc(s, '\n'); + continue; + } + + seq_print_user_ip(s, mm, ip, flags); + trace_seq_putc(s, '\n'); + } + + if (mm) + mmput(mm); return trace_handle_return(s); } @@ -1257,7 +1254,7 @@ __init static int init_events(void) for (i = 0; events[i]; i++) { event = events[i]; - ret = register_ftrace_event(event); + ret = register_trace_event(event); if (!ret) { printk(KERN_WARNING "event %d failed to register\n", event->type); diff --git a/kernel/kernel/trace/trace_output.h b/kernel/kernel/trace/trace_output.h index 8ef2c40ef..fabc49bcd 100644 --- a/kernel/kernel/trace/trace_output.h +++ b/kernel/kernel/trace/trace_output.h @@ -14,10 +14,6 @@ trace_print_printk_msg_only(struct trace_iterator *iter); extern int seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags); -extern int seq_print_userip_objs(const struct userstack_entry *entry, - struct trace_seq *s, unsigned long sym_flags); -extern int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm, - unsigned long ip, unsigned long sym_flags); extern int trace_print_context(struct trace_iterator *iter); extern int trace_print_lat_context(struct trace_iterator *iter); @@ -32,7 +28,7 @@ extern int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry); /* used by module unregistering */ -extern int __unregister_ftrace_event(struct trace_event *event); +extern int __unregister_trace_event(struct trace_event *event); extern struct rw_semaphore trace_event_sem; #define SEQ_PUT_FIELD(s, x) \ diff --git a/kernel/kernel/trace/trace_printk.c b/kernel/kernel/trace/trace_printk.c index 36c1455b7..060df67db 100644 --- a/kernel/kernel/trace/trace_printk.c +++ b/kernel/kernel/trace/trace_printk.c @@ -178,6 +178,12 @@ static inline void format_mod_start(void) { } static inline void format_mod_stop(void) { } #endif /* CONFIG_MODULES */ +static bool __read_mostly trace_printk_enabled = true; + +void trace_printk_control(bool enabled) +{ + trace_printk_enabled = enabled; +} __initdata_or_module static struct notifier_block module_trace_bprintk_format_nb = { @@ -192,7 +198,7 @@ int __trace_bprintk(unsigned long ip, const char *fmt, ...) if (unlikely(!fmt)) return 0; - if (!(trace_flags & TRACE_ITER_PRINTK)) + if (!trace_printk_enabled) return 0; va_start(ap, fmt); @@ -207,7 +213,7 @@ int __ftrace_vbprintk(unsigned long ip, const char *fmt, va_list ap) if (unlikely(!fmt)) return 0; - if (!(trace_flags & TRACE_ITER_PRINTK)) + if (!trace_printk_enabled) return 0; return trace_vbprintk(ip, fmt, ap); @@ -219,7 +225,7 @@ int __trace_printk(unsigned long ip, const char *fmt, ...) int ret; va_list ap; - if (!(trace_flags & TRACE_ITER_PRINTK)) + if (!trace_printk_enabled) return 0; va_start(ap, fmt); @@ -231,7 +237,7 @@ EXPORT_SYMBOL_GPL(__trace_printk); int __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap) { - if (!(trace_flags & TRACE_ITER_PRINTK)) + if (!trace_printk_enabled) return 0; return trace_vprintk(ip, fmt, ap); @@ -267,6 +273,7 @@ static const char **find_next(void *v, loff_t *pos) if (*pos < last_index + start_index) return __start___tracepoint_str + (*pos - last_index); + start_index += last_index; return find_next_mod_format(start_index, v, fmt, pos); } diff --git a/kernel/kernel/trace/trace_probe.h b/kernel/kernel/trace/trace_probe.h index ab283e146..f6398db09 100644 --- a/kernel/kernel/trace/trace_probe.h +++ b/kernel/kernel/trace/trace_probe.h @@ -272,8 +272,8 @@ struct probe_arg { struct trace_probe { unsigned int flags; /* For TP_FLAG_* */ - struct ftrace_event_class class; - struct ftrace_event_call call; + struct trace_event_class class; + struct trace_event_call call; struct list_head files; ssize_t size; /* trace entry size */ unsigned int nr_args; @@ -281,7 +281,7 @@ struct trace_probe { }; struct event_file_link { - struct ftrace_event_file *file; + struct trace_event_file *file; struct list_head list; }; @@ -302,19 +302,19 @@ static nokprobe_inline void call_fetch(struct fetch_param *fprm, } /* Check the name is good for event/group/fields */ -static inline int is_good_name(const char *name) +static inline bool is_good_name(const char *name) { if (!isalpha(*name) && *name != '_') - return 0; + return false; while (*++name != '\0') { if (!isalpha(*name) && !isdigit(*name) && *name != '_') - return 0; + return false; } - return 1; + return true; } static inline struct event_file_link * -find_event_file_link(struct trace_probe *tp, struct ftrace_event_file *file) +find_event_file_link(struct trace_probe *tp, struct trace_event_file *file) { struct event_file_link *link; diff --git a/kernel/kernel/trace/trace_sched_switch.c b/kernel/kernel/trace/trace_sched_switch.c index 419ca37e7..4c896a010 100644 --- a/kernel/kernel/trace/trace_sched_switch.c +++ b/kernel/kernel/trace/trace_sched_switch.c @@ -16,7 +16,8 @@ static int sched_ref; static DEFINE_MUTEX(sched_register_mutex); static void -probe_sched_switch(void *ignore, struct task_struct *prev, struct task_struct *next) +probe_sched_switch(void *ignore, bool preempt, + struct task_struct *prev, struct task_struct *next) { if (unlikely(!sched_ref)) return; @@ -26,7 +27,7 @@ probe_sched_switch(void *ignore, struct task_struct *prev, struct task_struct *n } static void -probe_sched_wakeup(void *ignore, struct task_struct *wakee, int success) +probe_sched_wakeup(void *ignore, struct task_struct *wakee) { if (unlikely(!sched_ref)) return; diff --git a/kernel/kernel/trace/trace_sched_wakeup.c b/kernel/kernel/trace/trace_sched_wakeup.c index d6e100372..9d4399b55 100644 --- a/kernel/kernel/trace/trace_sched_wakeup.c +++ b/kernel/kernel/trace/trace_sched_wakeup.c @@ -34,31 +34,28 @@ static arch_spinlock_t wakeup_lock = static void wakeup_reset(struct trace_array *tr); static void __wakeup_reset(struct trace_array *tr); -static int wakeup_graph_entry(struct ftrace_graph_ent *trace); -static void wakeup_graph_return(struct ftrace_graph_ret *trace); static int save_flags; -static bool function_enabled; - -#define TRACE_DISPLAY_GRAPH 1 -static struct tracer_opt trace_opts[] = { #ifdef CONFIG_FUNCTION_GRAPH_TRACER - /* display latency trace as call graph */ - { TRACER_OPT(display-graph, TRACE_DISPLAY_GRAPH) }, +static int wakeup_display_graph(struct trace_array *tr, int set); +# define is_graph(tr) ((tr)->trace_flags & TRACE_ITER_DISPLAY_GRAPH) +#else +static inline int wakeup_display_graph(struct trace_array *tr, int set) +{ + return 0; +} +# define is_graph(tr) false #endif - { } /* Empty entry */ -}; - -static struct tracer_flags tracer_flags = { - .val = 0, - .opts = trace_opts, -}; -#define is_graph() (tracer_flags.val & TRACE_DISPLAY_GRAPH) #ifdef CONFIG_FUNCTION_TRACER +static int wakeup_graph_entry(struct ftrace_graph_ent *trace); +static void wakeup_graph_return(struct ftrace_graph_ret *trace); + +static bool function_enabled; + /* * Prologue for the wakeup function tracers. * @@ -128,14 +125,13 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip, atomic_dec(&data->disabled); preempt_enable_notrace(); } -#endif /* CONFIG_FUNCTION_TRACER */ static int register_wakeup_function(struct trace_array *tr, int graph, int set) { int ret; /* 'set' is set if TRACE_ITER_FUNCTION is about to be set */ - if (function_enabled || (!set && !(trace_flags & TRACE_ITER_FUNCTION))) + if (function_enabled || (!set && !(tr->trace_flags & TRACE_ITER_FUNCTION))) return 0; if (graph) @@ -163,20 +159,40 @@ static void unregister_wakeup_function(struct trace_array *tr, int graph) function_enabled = false; } -static void wakeup_function_set(struct trace_array *tr, int set) +static int wakeup_function_set(struct trace_array *tr, u32 mask, int set) { + if (!(mask & TRACE_ITER_FUNCTION)) + return 0; + if (set) - register_wakeup_function(tr, is_graph(), 1); + register_wakeup_function(tr, is_graph(tr), 1); else - unregister_wakeup_function(tr, is_graph()); + unregister_wakeup_function(tr, is_graph(tr)); + return 1; +} +#else +static int register_wakeup_function(struct trace_array *tr, int graph, int set) +{ + return 0; +} +static void unregister_wakeup_function(struct trace_array *tr, int graph) { } +static int wakeup_function_set(struct trace_array *tr, u32 mask, int set) +{ + return 0; } +#endif /* CONFIG_FUNCTION_TRACER */ static int wakeup_flag_changed(struct trace_array *tr, u32 mask, int set) { struct tracer *tracer = tr->current_trace; - if (mask & TRACE_ITER_FUNCTION) - wakeup_function_set(tr, set); + if (wakeup_function_set(tr, mask, set)) + return 0; + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + if (mask & TRACE_ITER_DISPLAY_GRAPH) + return wakeup_display_graph(tr, set); +#endif return trace_keep_overwrite(tracer, mask, set); } @@ -203,14 +219,9 @@ static void stop_func_tracer(struct trace_array *tr, int graph) } #ifdef CONFIG_FUNCTION_GRAPH_TRACER -static int -wakeup_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set) +static int wakeup_display_graph(struct trace_array *tr, int set) { - - if (!(bit & TRACE_DISPLAY_GRAPH)) - return -EINVAL; - - if (!(is_graph() ^ set)) + if (!(is_graph(tr) ^ set)) return 0; stop_func_tracer(tr, !set); @@ -259,7 +270,7 @@ static void wakeup_graph_return(struct ftrace_graph_ret *trace) static void wakeup_trace_open(struct trace_iterator *iter) { - if (is_graph()) + if (is_graph(iter->tr)) graph_trace_open(iter); } @@ -279,7 +290,7 @@ static enum print_line_t wakeup_print_line(struct trace_iterator *iter) * In graph mode call the graph tracer output function, * otherwise go with the TRACE_FN event handler */ - if (is_graph()) + if (is_graph(iter->tr)) return print_graph_function_flags(iter, GRAPH_TRACER_FLAGS); return TRACE_TYPE_UNHANDLED; @@ -287,7 +298,7 @@ static enum print_line_t wakeup_print_line(struct trace_iterator *iter) static void wakeup_print_header(struct seq_file *s) { - if (is_graph()) + if (is_graph(wakeup_trace)) print_graph_headers_flags(s, GRAPH_TRACER_FLAGS); else trace_default_header(s); @@ -298,7 +309,7 @@ __trace_function(struct trace_array *tr, unsigned long ip, unsigned long parent_ip, unsigned long flags, int pc) { - if (is_graph()) + if (is_graph(tr)) trace_graph_function(tr, ip, parent_ip, flags, pc); else trace_function(tr, ip, parent_ip, flags, pc); @@ -306,27 +317,20 @@ __trace_function(struct trace_array *tr, #else #define __trace_function trace_function -static int -wakeup_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set) -{ - return -EINVAL; -} - -static int wakeup_graph_entry(struct ftrace_graph_ent *trace) -{ - return -1; -} - static enum print_line_t wakeup_print_line(struct trace_iterator *iter) { return TRACE_TYPE_UNHANDLED; } -static void wakeup_graph_return(struct ftrace_graph_ret *trace) { } static void wakeup_trace_open(struct trace_iterator *iter) { } static void wakeup_trace_close(struct trace_iterator *iter) { } #ifdef CONFIG_FUNCTION_TRACER +static int wakeup_graph_entry(struct ftrace_graph_ent *trace) +{ + return -1; +} +static void wakeup_graph_return(struct ftrace_graph_ret *trace) { } static void wakeup_print_header(struct seq_file *s) { trace_default_header(s); @@ -342,16 +346,16 @@ static void wakeup_print_header(struct seq_file *s) /* * Should this new latency be reported/recorded? */ -static int report_latency(struct trace_array *tr, cycle_t delta) +static bool report_latency(struct trace_array *tr, cycle_t delta) { if (tracing_thresh) { if (delta < tracing_thresh) - return 0; + return false; } else { if (delta <= tr->max_latency) - return 0; + return false; } - return 1; + return true; } static void @@ -369,7 +373,7 @@ tracing_sched_switch_trace(struct trace_array *tr, struct task_struct *next, unsigned long flags, int pc) { - struct ftrace_event_call *call = &event_context_switch; + struct trace_event_call *call = &event_context_switch; struct ring_buffer *buffer = tr->trace_buffer.buffer; struct ring_buffer_event *event; struct ctx_switch_entry *entry; @@ -388,7 +392,7 @@ tracing_sched_switch_trace(struct trace_array *tr, entry->next_cpu = task_cpu(next); if (!call_filter_check_discard(call, entry, buffer, event)) - trace_buffer_unlock_commit(buffer, event, flags, pc); + trace_buffer_unlock_commit(tr, buffer, event, flags, pc); } static void @@ -397,7 +401,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr, struct task_struct *curr, unsigned long flags, int pc) { - struct ftrace_event_call *call = &event_wakeup; + struct trace_event_call *call = &event_wakeup; struct ring_buffer_event *event; struct ctx_switch_entry *entry; struct ring_buffer *buffer = tr->trace_buffer.buffer; @@ -416,11 +420,11 @@ tracing_sched_wakeup_trace(struct trace_array *tr, entry->next_cpu = task_cpu(wakee); if (!call_filter_check_discard(call, entry, buffer, event)) - trace_buffer_unlock_commit(buffer, event, flags, pc); + trace_buffer_unlock_commit(tr, buffer, event, flags, pc); } static void notrace -probe_wakeup_sched_switch(void *ignore, +probe_wakeup_sched_switch(void *ignore, bool preempt, struct task_struct *prev, struct task_struct *next) { struct trace_array_cpu *data; @@ -514,7 +518,7 @@ static void wakeup_reset(struct trace_array *tr) } static void -probe_wakeup(void *ignore, struct task_struct *p, int success) +probe_wakeup(void *ignore, struct task_struct *p) { struct trace_array_cpu *data; int cpu = smp_processor_id(); @@ -635,7 +639,7 @@ static void start_wakeup_tracer(struct trace_array *tr) */ smp_wmb(); - if (start_func_tracer(tr, is_graph())) + if (start_func_tracer(tr, is_graph(tr))) printk(KERN_ERR "failed to start wakeup tracer\n"); return; @@ -648,7 +652,7 @@ fail_deprobe: static void stop_wakeup_tracer(struct trace_array *tr) { tracer_enabled = 0; - stop_func_tracer(tr, is_graph()); + stop_func_tracer(tr, is_graph(tr)); unregister_trace_sched_switch(probe_wakeup_sched_switch, NULL); unregister_trace_sched_wakeup_new(probe_wakeup, NULL); unregister_trace_sched_wakeup(probe_wakeup, NULL); @@ -659,7 +663,7 @@ static bool wakeup_busy; static int __wakeup_tracer_init(struct trace_array *tr) { - save_flags = trace_flags; + save_flags = tr->trace_flags; /* non overwrite screws up the latency tracers */ set_tracer_flag(tr, TRACE_ITER_OVERWRITE, 1); @@ -740,8 +744,6 @@ static struct tracer wakeup_tracer __read_mostly = .print_max = true, .print_header = wakeup_print_header, .print_line = wakeup_print_line, - .flags = &tracer_flags, - .set_flag = wakeup_set_flag, .flag_changed = wakeup_flag_changed, #ifdef CONFIG_FTRACE_SELFTEST .selftest = trace_selftest_startup_wakeup, @@ -762,8 +764,6 @@ static struct tracer wakeup_rt_tracer __read_mostly = .print_max = true, .print_header = wakeup_print_header, .print_line = wakeup_print_line, - .flags = &tracer_flags, - .set_flag = wakeup_set_flag, .flag_changed = wakeup_flag_changed, #ifdef CONFIG_FTRACE_SELFTEST .selftest = trace_selftest_startup_wakeup, @@ -784,8 +784,6 @@ static struct tracer wakeup_dl_tracer __read_mostly = .print_max = true, .print_header = wakeup_print_header, .print_line = wakeup_print_line, - .flags = &tracer_flags, - .set_flag = wakeup_set_flag, .flag_changed = wakeup_flag_changed, #ifdef CONFIG_FTRACE_SELFTEST .selftest = trace_selftest_startup_wakeup, diff --git a/kernel/kernel/trace/trace_stack.c b/kernel/kernel/trace/trace_stack.c index 3f3449624..202df6cff 100644 --- a/kernel/kernel/trace/trace_stack.c +++ b/kernel/kernel/trace/trace_stack.c @@ -16,30 +16,22 @@ #include "trace.h" -#define STACK_TRACE_ENTRIES 500 - -#ifdef CC_USING_FENTRY -# define fentry 1 -#else -# define fentry 0 -#endif - static unsigned long stack_dump_trace[STACK_TRACE_ENTRIES+1] = { [0 ... (STACK_TRACE_ENTRIES)] = ULONG_MAX }; -static unsigned stack_dump_index[STACK_TRACE_ENTRIES]; +unsigned stack_trace_index[STACK_TRACE_ENTRIES]; /* * Reserve one entry for the passed in ip. This will allow * us to remove most or all of the stack size overhead * added by the stack tracer itself. */ -static struct stack_trace max_stack_trace = { +struct stack_trace stack_trace_max = { .max_entries = STACK_TRACE_ENTRIES - 1, - .entries = &stack_dump_trace[1], + .entries = &stack_dump_trace[0], }; -static unsigned long max_stack_size; -static arch_spinlock_t max_stack_lock = +unsigned long stack_trace_max_size; +arch_spinlock_t stack_trace_max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; static DEFINE_PER_CPU(int, trace_active); @@ -48,83 +40,102 @@ static DEFINE_MUTEX(stack_sysctl_mutex); int stack_tracer_enabled; static int last_stack_tracer_enabled; -static inline void print_max_stack(void) +void stack_trace_print(void) { long i; int size; pr_emerg(" Depth Size Location (%d entries)\n" " ----- ---- --------\n", - max_stack_trace.nr_entries - 1); + stack_trace_max.nr_entries); - for (i = 0; i < max_stack_trace.nr_entries; i++) { + for (i = 0; i < stack_trace_max.nr_entries; i++) { if (stack_dump_trace[i] == ULONG_MAX) break; - if (i+1 == max_stack_trace.nr_entries || + if (i+1 == stack_trace_max.nr_entries || stack_dump_trace[i+1] == ULONG_MAX) - size = stack_dump_index[i]; + size = stack_trace_index[i]; else - size = stack_dump_index[i] - stack_dump_index[i+1]; + size = stack_trace_index[i] - stack_trace_index[i+1]; - pr_emerg("%3ld) %8d %5d %pS\n", i, stack_dump_index[i], + pr_emerg("%3ld) %8d %5d %pS\n", i, stack_trace_index[i], size, (void *)stack_dump_trace[i]); } } -static inline void +/* + * When arch-specific code overides this function, the following + * data should be filled up, assuming stack_trace_max_lock is held to + * prevent concurrent updates. + * stack_trace_index[] + * stack_trace_max + * stack_trace_max_size + */ +void __weak check_stack(unsigned long ip, unsigned long *stack) { unsigned long this_size, flags; unsigned long *p, *top, *start; static int tracer_frame; int frame_size = ACCESS_ONCE(tracer_frame); - int i; + int i, x; this_size = ((unsigned long)stack) & (THREAD_SIZE-1); this_size = THREAD_SIZE - this_size; /* Remove the frame of the tracer */ this_size -= frame_size; - if (this_size <= max_stack_size) + if (this_size <= stack_trace_max_size) return; /* we do not handle interrupt stacks yet */ if (!object_is_on_stack(stack)) return; + /* Can't do this from NMI context (can cause deadlocks) */ + if (in_nmi()) + return; + local_irq_save(flags); - arch_spin_lock(&max_stack_lock); + arch_spin_lock(&stack_trace_max_lock); + + /* + * RCU may not be watching, make it see us. + * The stack trace code uses rcu_sched. + */ + rcu_irq_enter(); /* In case another CPU set the tracer_frame on us */ if (unlikely(!frame_size)) this_size -= tracer_frame; /* a race could have already updated it */ - if (this_size <= max_stack_size) + if (this_size <= stack_trace_max_size) goto out; - max_stack_size = this_size; + stack_trace_max_size = this_size; - max_stack_trace.nr_entries = 0; + stack_trace_max.nr_entries = 0; + stack_trace_max.skip = 3; - if (using_ftrace_ops_list_func()) - max_stack_trace.skip = 4; - else - max_stack_trace.skip = 3; + save_stack_trace(&stack_trace_max); - save_stack_trace(&max_stack_trace); + /* Skip over the overhead of the stack tracer itself */ + for (i = 0; i < stack_trace_max.nr_entries; i++) { + if (stack_dump_trace[i] == ip) + break; + } /* - * Add the passed in ip from the function tracer. - * Searching for this on the stack will skip over - * most of the overhead from the stack tracer itself. + * Some archs may not have the passed in ip in the dump. + * If that happens, we need to show everything. */ - stack_dump_trace[0] = ip; - max_stack_trace.nr_entries++; + if (i == stack_trace_max.nr_entries) + i = 0; /* * Now find where in the stack these are. */ - i = 0; + x = 0; start = stack; top = (unsigned long *) (((unsigned long)start & ~(THREAD_SIZE-1)) + THREAD_SIZE); @@ -136,15 +147,18 @@ check_stack(unsigned long ip, unsigned long *stack) * loop will only happen once. This code only takes place * on a new max, so it is far from a fast path. */ - while (i < max_stack_trace.nr_entries) { + while (i < stack_trace_max.nr_entries) { int found = 0; - stack_dump_index[i] = this_size; + stack_trace_index[x] = this_size; p = start; - for (; p < top && i < max_stack_trace.nr_entries; p++) { + for (; p < top && i < stack_trace_max.nr_entries; p++) { + if (stack_dump_trace[i] == ULONG_MAX) + break; if (*p == stack_dump_trace[i]) { - this_size = stack_dump_index[i++] = + stack_dump_trace[x] = stack_dump_trace[i++]; + this_size = stack_trace_index[x++] = (top - p) * sizeof(unsigned long); found = 1; /* Start the search from here */ @@ -156,10 +170,10 @@ check_stack(unsigned long ip, unsigned long *stack) * out what that is, then figure it out * now. */ - if (unlikely(!tracer_frame) && i == 1) { + if (unlikely(!tracer_frame)) { tracer_frame = (p - stack) * sizeof(unsigned long); - max_stack_size -= tracer_frame; + stack_trace_max_size -= tracer_frame; } } } @@ -168,13 +182,18 @@ check_stack(unsigned long ip, unsigned long *stack) i++; } + stack_trace_max.nr_entries = x; + for (; x < i; x++) + stack_dump_trace[x] = ULONG_MAX; + if (task_stack_end_corrupted(current)) { - print_max_stack(); + stack_trace_print(); BUG(); } out: - arch_spin_unlock(&max_stack_lock); + rcu_irq_exit(); + arch_spin_unlock(&stack_trace_max_lock); local_irq_restore(flags); } @@ -192,24 +211,7 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip, if (per_cpu(trace_active, cpu)++ != 0) goto out; - /* - * When fentry is used, the traced function does not get - * its stack frame set up, and we lose the parent. - * The ip is pretty useless because the function tracer - * was called before that function set up its stack frame. - * In this case, we use the parent ip. - * - * By adding the return address of either the parent ip - * or the current ip we can disregard most of the stack usage - * caused by the stack tracer itself. - * - * The function tracer always reports the address of where the - * mcount call was, but the stack will hold the return address. - */ - if (fentry) - ip = parent_ip; - else - ip += MCOUNT_INSN_SIZE; + ip += MCOUNT_INSN_SIZE; check_stack(ip, &stack); @@ -262,9 +264,9 @@ stack_max_size_write(struct file *filp, const char __user *ubuf, cpu = smp_processor_id(); per_cpu(trace_active, cpu)++; - arch_spin_lock(&max_stack_lock); + arch_spin_lock(&stack_trace_max_lock); *ptr = val; - arch_spin_unlock(&max_stack_lock); + arch_spin_unlock(&stack_trace_max_lock); per_cpu(trace_active, cpu)--; local_irq_restore(flags); @@ -284,7 +286,7 @@ __next(struct seq_file *m, loff_t *pos) { long n = *pos - 1; - if (n >= max_stack_trace.nr_entries || stack_dump_trace[n] == ULONG_MAX) + if (n > stack_trace_max.nr_entries || stack_dump_trace[n] == ULONG_MAX) return NULL; m->private = (void *)n; @@ -307,7 +309,7 @@ static void *t_start(struct seq_file *m, loff_t *pos) cpu = smp_processor_id(); per_cpu(trace_active, cpu)++; - arch_spin_lock(&max_stack_lock); + arch_spin_lock(&stack_trace_max_lock); if (*pos == 0) return SEQ_START_TOKEN; @@ -319,7 +321,7 @@ static void t_stop(struct seq_file *m, void *p) { int cpu; - arch_spin_unlock(&max_stack_lock); + arch_spin_unlock(&stack_trace_max_lock); cpu = smp_processor_id(); per_cpu(trace_active, cpu)--; @@ -354,9 +356,9 @@ static int t_show(struct seq_file *m, void *v) seq_printf(m, " Depth Size Location" " (%d entries)\n" " ----- ---- --------\n", - max_stack_trace.nr_entries - 1); + stack_trace_max.nr_entries); - if (!stack_tracer_enabled && !max_stack_size) + if (!stack_tracer_enabled && !stack_trace_max_size) print_disabled(m); return 0; @@ -364,17 +366,17 @@ static int t_show(struct seq_file *m, void *v) i = *(long *)v; - if (i >= max_stack_trace.nr_entries || + if (i >= stack_trace_max.nr_entries || stack_dump_trace[i] == ULONG_MAX) return 0; - if (i+1 == max_stack_trace.nr_entries || + if (i+1 == stack_trace_max.nr_entries || stack_dump_trace[i+1] == ULONG_MAX) - size = stack_dump_index[i]; + size = stack_trace_index[i]; else - size = stack_dump_index[i] - stack_dump_index[i+1]; + size = stack_trace_index[i] - stack_trace_index[i+1]; - seq_printf(m, "%3ld) %8d %5d ", i, stack_dump_index[i], size); + seq_printf(m, "%3ld) %8d %5d ", i, stack_trace_index[i], size); trace_lookup_stack(m, i); @@ -464,7 +466,7 @@ static __init int stack_trace_init(void) return 0; trace_create_file("stack_max_size", 0644, d_tracer, - &max_stack_size, &stack_max_size_fops); + &stack_trace_max_size, &stack_max_size_fops); trace_create_file("stack_trace", 0444, d_tracer, NULL, &stack_trace_fops); diff --git a/kernel/kernel/trace/trace_syscalls.c b/kernel/kernel/trace/trace_syscalls.c index f97f6e3a6..0655afbea 100644 --- a/kernel/kernel/trace/trace_syscalls.c +++ b/kernel/kernel/trace/trace_syscalls.c @@ -13,13 +13,13 @@ static DEFINE_MUTEX(syscall_trace_lock); -static int syscall_enter_register(struct ftrace_event_call *event, +static int syscall_enter_register(struct trace_event_call *event, enum trace_reg type, void *data); -static int syscall_exit_register(struct ftrace_event_call *event, +static int syscall_exit_register(struct trace_event_call *event, enum trace_reg type, void *data); static struct list_head * -syscall_get_enter_fields(struct ftrace_event_call *call) +syscall_get_enter_fields(struct trace_event_call *call) { struct syscall_metadata *entry = call->data; @@ -110,6 +110,7 @@ static enum print_line_t print_syscall_enter(struct trace_iterator *iter, int flags, struct trace_event *event) { + struct trace_array *tr = iter->tr; struct trace_seq *s = &iter->seq; struct trace_entry *ent = iter->ent; struct syscall_trace_enter *trace; @@ -136,7 +137,7 @@ print_syscall_enter(struct trace_iterator *iter, int flags, goto end; /* parameter types */ - if (trace_flags & TRACE_ITER_VERBOSE) + if (tr->trace_flags & TRACE_ITER_VERBOSE) trace_seq_printf(s, "%s ", entry->types[i]); /* parameter values */ @@ -219,7 +220,7 @@ __set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len) return pos; } -static int __init set_syscall_print_fmt(struct ftrace_event_call *call) +static int __init set_syscall_print_fmt(struct trace_event_call *call) { char *print_fmt; int len; @@ -244,7 +245,7 @@ static int __init set_syscall_print_fmt(struct ftrace_event_call *call) return 0; } -static void __init free_syscall_print_fmt(struct ftrace_event_call *call) +static void __init free_syscall_print_fmt(struct trace_event_call *call) { struct syscall_metadata *entry = call->data; @@ -252,7 +253,7 @@ static void __init free_syscall_print_fmt(struct ftrace_event_call *call) kfree(call->print_fmt); } -static int __init syscall_enter_define_fields(struct ftrace_event_call *call) +static int __init syscall_enter_define_fields(struct trace_event_call *call) { struct syscall_trace_enter trace; struct syscall_metadata *meta = call->data; @@ -275,7 +276,7 @@ static int __init syscall_enter_define_fields(struct ftrace_event_call *call) return ret; } -static int __init syscall_exit_define_fields(struct ftrace_event_call *call) +static int __init syscall_exit_define_fields(struct trace_event_call *call) { struct syscall_trace_exit trace; int ret; @@ -293,7 +294,7 @@ static int __init syscall_exit_define_fields(struct ftrace_event_call *call) static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id) { struct trace_array *tr = data; - struct ftrace_event_file *ftrace_file; + struct trace_event_file *trace_file; struct syscall_trace_enter *entry; struct syscall_metadata *sys_data; struct ring_buffer_event *event; @@ -308,11 +309,11 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id) return; /* Here we're inside tp handler's rcu_read_lock_sched (__DO_TRACE) */ - ftrace_file = rcu_dereference_sched(tr->enter_syscall_files[syscall_nr]); - if (!ftrace_file) + trace_file = rcu_dereference_sched(tr->enter_syscall_files[syscall_nr]); + if (!trace_file) return; - if (ftrace_trigger_soft_disabled(ftrace_file)) + if (trace_trigger_soft_disabled(trace_file)) return; sys_data = syscall_nr_to_meta(syscall_nr); @@ -334,14 +335,14 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id) entry->nr = syscall_nr; syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args); - event_trigger_unlock_commit(ftrace_file, buffer, event, entry, + event_trigger_unlock_commit(trace_file, buffer, event, entry, irq_flags, pc); } static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) { struct trace_array *tr = data; - struct ftrace_event_file *ftrace_file; + struct trace_event_file *trace_file; struct syscall_trace_exit *entry; struct syscall_metadata *sys_data; struct ring_buffer_event *event; @@ -355,11 +356,11 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) return; /* Here we're inside tp handler's rcu_read_lock_sched (__DO_TRACE()) */ - ftrace_file = rcu_dereference_sched(tr->exit_syscall_files[syscall_nr]); - if (!ftrace_file) + trace_file = rcu_dereference_sched(tr->exit_syscall_files[syscall_nr]); + if (!trace_file) return; - if (ftrace_trigger_soft_disabled(ftrace_file)) + if (trace_trigger_soft_disabled(trace_file)) return; sys_data = syscall_nr_to_meta(syscall_nr); @@ -380,12 +381,12 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) entry->nr = syscall_nr; entry->ret = syscall_get_return_value(current, regs); - event_trigger_unlock_commit(ftrace_file, buffer, event, entry, + event_trigger_unlock_commit(trace_file, buffer, event, entry, irq_flags, pc); } -static int reg_event_syscall_enter(struct ftrace_event_file *file, - struct ftrace_event_call *call) +static int reg_event_syscall_enter(struct trace_event_file *file, + struct trace_event_call *call) { struct trace_array *tr = file->tr; int ret = 0; @@ -405,8 +406,8 @@ static int reg_event_syscall_enter(struct ftrace_event_file *file, return ret; } -static void unreg_event_syscall_enter(struct ftrace_event_file *file, - struct ftrace_event_call *call) +static void unreg_event_syscall_enter(struct trace_event_file *file, + struct trace_event_call *call) { struct trace_array *tr = file->tr; int num; @@ -422,8 +423,8 @@ static void unreg_event_syscall_enter(struct ftrace_event_file *file, mutex_unlock(&syscall_trace_lock); } -static int reg_event_syscall_exit(struct ftrace_event_file *file, - struct ftrace_event_call *call) +static int reg_event_syscall_exit(struct trace_event_file *file, + struct trace_event_call *call) { struct trace_array *tr = file->tr; int ret = 0; @@ -443,8 +444,8 @@ static int reg_event_syscall_exit(struct ftrace_event_file *file, return ret; } -static void unreg_event_syscall_exit(struct ftrace_event_file *file, - struct ftrace_event_call *call) +static void unreg_event_syscall_exit(struct trace_event_file *file, + struct trace_event_call *call) { struct trace_array *tr = file->tr; int num; @@ -460,7 +461,7 @@ static void unreg_event_syscall_exit(struct ftrace_event_file *file, mutex_unlock(&syscall_trace_lock); } -static int __init init_syscall_trace(struct ftrace_event_call *call) +static int __init init_syscall_trace(struct trace_event_call *call) { int id; int num; @@ -493,7 +494,7 @@ struct trace_event_functions exit_syscall_print_funcs = { .trace = print_syscall_exit, }; -struct ftrace_event_class __refdata event_class_syscall_enter = { +struct trace_event_class __refdata event_class_syscall_enter = { .system = "syscalls", .reg = syscall_enter_register, .define_fields = syscall_enter_define_fields, @@ -501,7 +502,7 @@ struct ftrace_event_class __refdata event_class_syscall_enter = { .raw_init = init_syscall_trace, }; -struct ftrace_event_class __refdata event_class_syscall_exit = { +struct trace_event_class __refdata event_class_syscall_exit = { .system = "syscalls", .reg = syscall_exit_register, .define_fields = syscall_exit_define_fields, @@ -584,7 +585,7 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head, NULL); } -static int perf_sysenter_enable(struct ftrace_event_call *call) +static int perf_sysenter_enable(struct trace_event_call *call) { int ret = 0; int num; @@ -605,7 +606,7 @@ static int perf_sysenter_enable(struct ftrace_event_call *call) return ret; } -static void perf_sysenter_disable(struct ftrace_event_call *call) +static void perf_sysenter_disable(struct trace_event_call *call) { int num; @@ -656,7 +657,7 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret) perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head, NULL); } -static int perf_sysexit_enable(struct ftrace_event_call *call) +static int perf_sysexit_enable(struct trace_event_call *call) { int ret = 0; int num; @@ -677,7 +678,7 @@ static int perf_sysexit_enable(struct ftrace_event_call *call) return ret; } -static void perf_sysexit_disable(struct ftrace_event_call *call) +static void perf_sysexit_disable(struct trace_event_call *call) { int num; @@ -693,10 +694,10 @@ static void perf_sysexit_disable(struct ftrace_event_call *call) #endif /* CONFIG_PERF_EVENTS */ -static int syscall_enter_register(struct ftrace_event_call *event, +static int syscall_enter_register(struct trace_event_call *event, enum trace_reg type, void *data) { - struct ftrace_event_file *file = data; + struct trace_event_file *file = data; switch (type) { case TRACE_REG_REGISTER: @@ -721,10 +722,10 @@ static int syscall_enter_register(struct ftrace_event_call *event, return 0; } -static int syscall_exit_register(struct ftrace_event_call *event, +static int syscall_exit_register(struct trace_event_call *event, enum trace_reg type, void *data) { - struct ftrace_event_file *file = data; + struct trace_event_file *file = data; switch (type) { case TRACE_REG_REGISTER: diff --git a/kernel/kernel/trace/trace_uprobe.c b/kernel/kernel/trace/trace_uprobe.c index 6dd022c7b..d2f6d0be3 100644 --- a/kernel/kernel/trace/trace_uprobe.c +++ b/kernel/kernel/trace/trace_uprobe.c @@ -293,7 +293,7 @@ static struct trace_uprobe *find_probe_event(const char *event, const char *grou struct trace_uprobe *tu; list_for_each_entry(tu, &uprobe_list, list) - if (strcmp(ftrace_event_name(&tu->tp.call), event) == 0 && + if (strcmp(trace_event_name(&tu->tp.call), event) == 0 && strcmp(tu->tp.call.class->system, group) == 0) return tu; @@ -323,7 +323,7 @@ static int register_trace_uprobe(struct trace_uprobe *tu) mutex_lock(&uprobe_lock); /* register as an event */ - old_tu = find_probe_event(ftrace_event_name(&tu->tp.call), + old_tu = find_probe_event(trace_event_name(&tu->tp.call), tu->tp.call.class->system); if (old_tu) { /* delete old event */ @@ -600,8 +600,23 @@ static int probes_seq_show(struct seq_file *m, void *v) int i; seq_printf(m, "%c:%s/%s", c, tu->tp.call.class->system, - ftrace_event_name(&tu->tp.call)); - seq_printf(m, " %s:0x%p", tu->filename, (void *)tu->offset); + trace_event_name(&tu->tp.call)); + seq_printf(m, " %s:", tu->filename); + + /* Don't print "0x (null)" when offset is 0 */ + if (tu->offset) { + seq_printf(m, "0x%p", (void *)tu->offset); + } else { + switch (sizeof(void *)) { + case 4: + seq_printf(m, "0x00000000"); + break; + case 8: + default: + seq_printf(m, "0x0000000000000000"); + break; + } + } for (i = 0; i < tu->tp.nr_args; i++) seq_printf(m, " %s=%s", tu->tp.args[i].name, tu->tp.args[i].comm); @@ -651,7 +666,7 @@ static int probes_profile_seq_show(struct seq_file *m, void *v) struct trace_uprobe *tu = v; seq_printf(m, " %s %-44s %15lu\n", tu->filename, - ftrace_event_name(&tu->tp.call), tu->nhit); + trace_event_name(&tu->tp.call), tu->nhit); return 0; } @@ -770,26 +785,26 @@ static void uprobe_buffer_put(struct uprobe_cpu_buffer *ucb) static void __uprobe_trace_func(struct trace_uprobe *tu, unsigned long func, struct pt_regs *regs, struct uprobe_cpu_buffer *ucb, int dsize, - struct ftrace_event_file *ftrace_file) + struct trace_event_file *trace_file) { struct uprobe_trace_entry_head *entry; struct ring_buffer_event *event; struct ring_buffer *buffer; void *data; int size, esize; - struct ftrace_event_call *call = &tu->tp.call; + struct trace_event_call *call = &tu->tp.call; - WARN_ON(call != ftrace_file->event_call); + WARN_ON(call != trace_file->event_call); if (WARN_ON_ONCE(tu->tp.size + dsize > PAGE_SIZE)) return; - if (ftrace_trigger_soft_disabled(ftrace_file)) + if (trace_trigger_soft_disabled(trace_file)) return; esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); size = esize + tu->tp.size + dsize; - event = trace_event_buffer_lock_reserve(&buffer, ftrace_file, + event = trace_event_buffer_lock_reserve(&buffer, trace_file, call->event.type, size, 0, 0); if (!event) return; @@ -806,7 +821,7 @@ static void __uprobe_trace_func(struct trace_uprobe *tu, memcpy(data, ucb->buf, tu->tp.size + dsize); - event_trigger_unlock_commit(ftrace_file, buffer, event, entry, 0, 0); + event_trigger_unlock_commit(trace_file, buffer, event, entry, 0, 0); } /* uprobe handler */ @@ -853,12 +868,12 @@ print_uprobe_event(struct trace_iterator *iter, int flags, struct trace_event *e if (is_ret_probe(tu)) { trace_seq_printf(s, "%s: (0x%lx <- 0x%lx)", - ftrace_event_name(&tu->tp.call), + trace_event_name(&tu->tp.call), entry->vaddr[1], entry->vaddr[0]); data = DATAOF_TRACE_ENTRY(entry, true); } else { trace_seq_printf(s, "%s: (0x%lx)", - ftrace_event_name(&tu->tp.call), + trace_event_name(&tu->tp.call), entry->vaddr[0]); data = DATAOF_TRACE_ENTRY(entry, false); } @@ -881,7 +896,7 @@ typedef bool (*filter_func_t)(struct uprobe_consumer *self, struct mm_struct *mm); static int -probe_event_enable(struct trace_uprobe *tu, struct ftrace_event_file *file, +probe_event_enable(struct trace_uprobe *tu, struct trace_event_file *file, filter_func_t filter) { bool enabled = trace_probe_is_enabled(&tu->tp); @@ -938,7 +953,7 @@ probe_event_enable(struct trace_uprobe *tu, struct ftrace_event_file *file, } static void -probe_event_disable(struct trace_uprobe *tu, struct ftrace_event_file *file) +probe_event_disable(struct trace_uprobe *tu, struct trace_event_file *file) { if (!trace_probe_is_enabled(&tu->tp)) return; @@ -967,7 +982,7 @@ probe_event_disable(struct trace_uprobe *tu, struct ftrace_event_file *file) uprobe_buffer_disable(); } -static int uprobe_event_define_fields(struct ftrace_event_call *event_call) +static int uprobe_event_define_fields(struct trace_event_call *event_call) { int ret, i, size; struct uprobe_trace_entry_head field; @@ -1093,13 +1108,17 @@ static void __uprobe_perf_func(struct trace_uprobe *tu, unsigned long func, struct pt_regs *regs, struct uprobe_cpu_buffer *ucb, int dsize) { - struct ftrace_event_call *call = &tu->tp.call; + struct trace_event_call *call = &tu->tp.call; struct uprobe_trace_entry_head *entry; + struct bpf_prog *prog = call->prog; struct hlist_head *head; void *data; int size, esize; int rctx; + if (prog && !trace_call_bpf(prog, regs)) + return; + esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); size = esize + tu->tp.size + dsize; @@ -1159,11 +1178,11 @@ static void uretprobe_perf_func(struct trace_uprobe *tu, unsigned long func, #endif /* CONFIG_PERF_EVENTS */ static int -trace_uprobe_register(struct ftrace_event_call *event, enum trace_reg type, +trace_uprobe_register(struct trace_event_call *event, enum trace_reg type, void *data) { struct trace_uprobe *tu = event->data; - struct ftrace_event_file *file = data; + struct trace_event_file *file = data; switch (type) { case TRACE_REG_REGISTER: @@ -1272,10 +1291,10 @@ static struct trace_event_functions uprobe_funcs = { static int register_uprobe_event(struct trace_uprobe *tu) { - struct ftrace_event_call *call = &tu->tp.call; + struct trace_event_call *call = &tu->tp.call; int ret; - /* Initialize ftrace_event_call */ + /* Initialize trace_event_call */ INIT_LIST_HEAD(&call->class->fields); call->event.funcs = &uprobe_funcs; call->class->define_fields = uprobe_event_define_fields; @@ -1283,21 +1302,22 @@ static int register_uprobe_event(struct trace_uprobe *tu) if (set_print_fmt(&tu->tp, is_ret_probe(tu)) < 0) return -ENOMEM; - ret = register_ftrace_event(&call->event); + ret = register_trace_event(&call->event); if (!ret) { kfree(call->print_fmt); return -ENODEV; } + call->flags = TRACE_EVENT_FL_UPROBE; call->class->reg = trace_uprobe_register; call->data = tu; ret = trace_add_event_call(call); if (ret) { pr_info("Failed to register uprobe event: %s\n", - ftrace_event_name(call)); + trace_event_name(call)); kfree(call->print_fmt); - unregister_ftrace_event(&call->event); + unregister_trace_event(&call->event); } return ret; |