From e09b41010ba33a20a87472ee821fa407a5b8da36 Mon Sep 17 00:00:00 2001 From: José Pekkarinen Date: Mon, 11 Apr 2016 10:41:07 +0300 Subject: These changes are the raw update to linux-4.4.6-rt14. Kernel sources are taken from kernel.org, and rt patch from the rt wiki download page. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During the rebasing, the following patch collided: Force tick interrupt and get rid of softirq magic(I70131fb85). Collisions have been removed because its logic was found on the source already. Change-Id: I7f57a4081d9deaa0d9ccfc41a6c8daccdee3b769 Signed-off-by: José Pekkarinen --- kernel/tools/perf/util/intel-pt.c | 2164 +++++++++++++++++++++++++++++++++++++ 1 file changed, 2164 insertions(+) create mode 100644 kernel/tools/perf/util/intel-pt.c (limited to 'kernel/tools/perf/util/intel-pt.c') diff --git a/kernel/tools/perf/util/intel-pt.c b/kernel/tools/perf/util/intel-pt.c new file mode 100644 index 000000000..97f963a3d --- /dev/null +++ b/kernel/tools/perf/util/intel-pt.c @@ -0,0 +1,2164 @@ +/* + * intel_pt.c: Intel Processor Trace support + * Copyright (c) 2013-2015, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#include +#include +#include +#include +#include + +#include "../perf.h" +#include "session.h" +#include "machine.h" +#include "sort.h" +#include "tool.h" +#include "event.h" +#include "evlist.h" +#include "evsel.h" +#include "map.h" +#include "color.h" +#include "util.h" +#include "thread.h" +#include "thread-stack.h" +#include "symbol.h" +#include "callchain.h" +#include "dso.h" +#include "debug.h" +#include "auxtrace.h" +#include "tsc.h" +#include "intel-pt.h" + +#include "intel-pt-decoder/intel-pt-log.h" +#include "intel-pt-decoder/intel-pt-decoder.h" +#include "intel-pt-decoder/intel-pt-insn-decoder.h" +#include "intel-pt-decoder/intel-pt-pkt-decoder.h" + +#define MAX_TIMESTAMP (~0ULL) + +struct intel_pt { + struct auxtrace auxtrace; + struct auxtrace_queues queues; + struct auxtrace_heap heap; + u32 auxtrace_type; + struct perf_session *session; + struct machine *machine; + struct perf_evsel *switch_evsel; + struct thread *unknown_thread; + bool timeless_decoding; + bool sampling_mode; + bool snapshot_mode; + bool per_cpu_mmaps; + bool have_tsc; + bool data_queued; + bool est_tsc; + bool sync_switch; + bool mispred_all; + int have_sched_switch; + u32 pmu_type; + u64 kernel_start; + u64 switch_ip; + u64 ptss_ip; + + struct perf_tsc_conversion tc; + bool cap_user_time_zero; + + struct itrace_synth_opts synth_opts; + + bool sample_instructions; + u64 instructions_sample_type; + u64 instructions_sample_period; + u64 instructions_id; + + bool sample_branches; + u32 branches_filter; + u64 branches_sample_type; + u64 branches_id; + + bool sample_transactions; + u64 transactions_sample_type; + u64 transactions_id; + + bool synth_needs_swap; + + u64 tsc_bit; + u64 mtc_bit; + u64 mtc_freq_bits; + u32 tsc_ctc_ratio_n; + u32 tsc_ctc_ratio_d; + u64 cyc_bit; + u64 noretcomp_bit; + unsigned max_non_turbo_ratio; +}; + +enum switch_state { + INTEL_PT_SS_NOT_TRACING, + INTEL_PT_SS_UNKNOWN, + INTEL_PT_SS_TRACING, + INTEL_PT_SS_EXPECTING_SWITCH_EVENT, + INTEL_PT_SS_EXPECTING_SWITCH_IP, +}; + +struct intel_pt_queue { + struct intel_pt *pt; + unsigned int queue_nr; + struct auxtrace_buffer *buffer; + void *decoder; + const struct intel_pt_state *state; + struct ip_callchain *chain; + struct branch_stack *last_branch; + struct branch_stack *last_branch_rb; + size_t last_branch_pos; + union perf_event *event_buf; + bool on_heap; + bool stop; + bool step_through_buffers; + bool use_buffer_pid_tid; + pid_t pid, tid; + int cpu; + int switch_state; + pid_t next_tid; + struct thread *thread; + bool exclude_kernel; + bool have_sample; + u64 time; + u64 timestamp; + u32 flags; + u16 insn_len; + u64 last_insn_cnt; +}; + +static void intel_pt_dump(struct intel_pt *pt __maybe_unused, + unsigned char *buf, size_t len) +{ + struct intel_pt_pkt packet; + size_t pos = 0; + int ret, pkt_len, i; + char desc[INTEL_PT_PKT_DESC_MAX]; + const char *color = PERF_COLOR_BLUE; + + color_fprintf(stdout, color, + ". ... Intel Processor Trace data: size %zu bytes\n", + len); + + while (len) { + ret = intel_pt_get_packet(buf, len, &packet); + if (ret > 0) + pkt_len = ret; + else + pkt_len = 1; + printf("."); + color_fprintf(stdout, color, " %08x: ", pos); + for (i = 0; i < pkt_len; i++) + color_fprintf(stdout, color, " %02x", buf[i]); + for (; i < 16; i++) + color_fprintf(stdout, color, " "); + if (ret > 0) { + ret = intel_pt_pkt_desc(&packet, desc, + INTEL_PT_PKT_DESC_MAX); + if (ret > 0) + color_fprintf(stdout, color, " %s\n", desc); + } else { + color_fprintf(stdout, color, " Bad packet!\n"); + } + pos += pkt_len; + buf += pkt_len; + len -= pkt_len; + } +} + +static void intel_pt_dump_event(struct intel_pt *pt, unsigned char *buf, + size_t len) +{ + printf(".\n"); + intel_pt_dump(pt, buf, len); +} + +static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer *a, + struct auxtrace_buffer *b) +{ + void *start; + + start = intel_pt_find_overlap(a->data, a->size, b->data, b->size, + pt->have_tsc); + if (!start) + return -EINVAL; + b->use_size = b->data + b->size - start; + b->use_data = start; + return 0; +} + +static void intel_pt_use_buffer_pid_tid(struct intel_pt_queue *ptq, + struct auxtrace_queue *queue, + struct auxtrace_buffer *buffer) +{ + if (queue->cpu == -1 && buffer->cpu != -1) + ptq->cpu = buffer->cpu; + + ptq->pid = buffer->pid; + ptq->tid = buffer->tid; + + intel_pt_log("queue %u cpu %d pid %d tid %d\n", + ptq->queue_nr, ptq->cpu, ptq->pid, ptq->tid); + + thread__zput(ptq->thread); + + if (ptq->tid != -1) { + if (ptq->pid != -1) + ptq->thread = machine__findnew_thread(ptq->pt->machine, + ptq->pid, + ptq->tid); + else + ptq->thread = machine__find_thread(ptq->pt->machine, -1, + ptq->tid); + } +} + +/* This function assumes data is processed sequentially only */ +static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data) +{ + struct intel_pt_queue *ptq = data; + struct auxtrace_buffer *buffer = ptq->buffer, *old_buffer = buffer; + struct auxtrace_queue *queue; + + if (ptq->stop) { + b->len = 0; + return 0; + } + + queue = &ptq->pt->queues.queue_array[ptq->queue_nr]; + + buffer = auxtrace_buffer__next(queue, buffer); + if (!buffer) { + if (old_buffer) + auxtrace_buffer__drop_data(old_buffer); + b->len = 0; + return 0; + } + + ptq->buffer = buffer; + + if (!buffer->data) { + int fd = perf_data_file__fd(ptq->pt->session->file); + + buffer->data = auxtrace_buffer__get_data(buffer, fd); + if (!buffer->data) + return -ENOMEM; + } + + if (ptq->pt->snapshot_mode && !buffer->consecutive && old_buffer && + intel_pt_do_fix_overlap(ptq->pt, old_buffer, buffer)) + return -ENOMEM; + + if (old_buffer) + auxtrace_buffer__drop_data(old_buffer); + + if (buffer->use_data) { + b->len = buffer->use_size; + b->buf = buffer->use_data; + } else { + b->len = buffer->size; + b->buf = buffer->data; + } + b->ref_timestamp = buffer->reference; + + if (!old_buffer || ptq->pt->sampling_mode || (ptq->pt->snapshot_mode && + !buffer->consecutive)) { + b->consecutive = false; + b->trace_nr = buffer->buffer_nr + 1; + } else { + b->consecutive = true; + } + + if (ptq->use_buffer_pid_tid && (ptq->pid != buffer->pid || + ptq->tid != buffer->tid)) + intel_pt_use_buffer_pid_tid(ptq, queue, buffer); + + if (ptq->step_through_buffers) + ptq->stop = true; + + if (!b->len) + return intel_pt_get_trace(b, data); + + return 0; +} + +struct intel_pt_cache_entry { + struct auxtrace_cache_entry entry; + u64 insn_cnt; + u64 byte_cnt; + enum intel_pt_insn_op op; + enum intel_pt_insn_branch branch; + int length; + int32_t rel; +}; + +static int intel_pt_config_div(const char *var, const char *value, void *data) +{ + int *d = data; + long val; + + if (!strcmp(var, "intel-pt.cache-divisor")) { + val = strtol(value, NULL, 0); + if (val > 0 && val <= INT_MAX) + *d = val; + } + + return 0; +} + +static int intel_pt_cache_divisor(void) +{ + static int d; + + if (d) + return d; + + perf_config(intel_pt_config_div, &d); + + if (!d) + d = 64; + + return d; +} + +static unsigned int intel_pt_cache_size(struct dso *dso, + struct machine *machine) +{ + off_t size; + + size = dso__data_size(dso, machine); + size /= intel_pt_cache_divisor(); + if (size < 1000) + return 10; + if (size > (1 << 21)) + return 21; + return 32 - __builtin_clz(size); +} + +static struct auxtrace_cache *intel_pt_cache(struct dso *dso, + struct machine *machine) +{ + struct auxtrace_cache *c; + unsigned int bits; + + if (dso->auxtrace_cache) + return dso->auxtrace_cache; + + bits = intel_pt_cache_size(dso, machine); + + /* Ignoring cache creation failure */ + c = auxtrace_cache__new(bits, sizeof(struct intel_pt_cache_entry), 200); + + dso->auxtrace_cache = c; + + return c; +} + +static int intel_pt_cache_add(struct dso *dso, struct machine *machine, + u64 offset, u64 insn_cnt, u64 byte_cnt, + struct intel_pt_insn *intel_pt_insn) +{ + struct auxtrace_cache *c = intel_pt_cache(dso, machine); + struct intel_pt_cache_entry *e; + int err; + + if (!c) + return -ENOMEM; + + e = auxtrace_cache__alloc_entry(c); + if (!e) + return -ENOMEM; + + e->insn_cnt = insn_cnt; + e->byte_cnt = byte_cnt; + e->op = intel_pt_insn->op; + e->branch = intel_pt_insn->branch; + e->length = intel_pt_insn->length; + e->rel = intel_pt_insn->rel; + + err = auxtrace_cache__add(c, offset, &e->entry); + if (err) + auxtrace_cache__free_entry(c, e); + + return err; +} + +static struct intel_pt_cache_entry * +intel_pt_cache_lookup(struct dso *dso, struct machine *machine, u64 offset) +{ + struct auxtrace_cache *c = intel_pt_cache(dso, machine); + + if (!c) + return NULL; + + return auxtrace_cache__lookup(dso->auxtrace_cache, offset); +} + +static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn, + uint64_t *insn_cnt_ptr, uint64_t *ip, + uint64_t to_ip, uint64_t max_insn_cnt, + void *data) +{ + struct intel_pt_queue *ptq = data; + struct machine *machine = ptq->pt->machine; + struct thread *thread; + struct addr_location al; + unsigned char buf[1024]; + size_t bufsz; + ssize_t len; + int x86_64; + u8 cpumode; + u64 offset, start_offset, start_ip; + u64 insn_cnt = 0; + bool one_map = true; + + if (to_ip && *ip == to_ip) + goto out_no_cache; + + bufsz = intel_pt_insn_max_size(); + + if (*ip >= ptq->pt->kernel_start) + cpumode = PERF_RECORD_MISC_KERNEL; + else + cpumode = PERF_RECORD_MISC_USER; + + thread = ptq->thread; + if (!thread) { + if (cpumode != PERF_RECORD_MISC_KERNEL) + return -EINVAL; + thread = ptq->pt->unknown_thread; + } + + while (1) { + thread__find_addr_map(thread, cpumode, MAP__FUNCTION, *ip, &al); + if (!al.map || !al.map->dso) + return -EINVAL; + + if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR && + dso__data_status_seen(al.map->dso, + DSO_DATA_STATUS_SEEN_ITRACE)) + return -ENOENT; + + offset = al.map->map_ip(al.map, *ip); + + if (!to_ip && one_map) { + struct intel_pt_cache_entry *e; + + e = intel_pt_cache_lookup(al.map->dso, machine, offset); + if (e && + (!max_insn_cnt || e->insn_cnt <= max_insn_cnt)) { + *insn_cnt_ptr = e->insn_cnt; + *ip += e->byte_cnt; + intel_pt_insn->op = e->op; + intel_pt_insn->branch = e->branch; + intel_pt_insn->length = e->length; + intel_pt_insn->rel = e->rel; + intel_pt_log_insn_no_data(intel_pt_insn, *ip); + return 0; + } + } + + start_offset = offset; + start_ip = *ip; + + /* Load maps to ensure dso->is_64_bit has been updated */ + map__load(al.map, machine->symbol_filter); + + x86_64 = al.map->dso->is_64_bit; + + while (1) { + len = dso__data_read_offset(al.map->dso, machine, + offset, buf, bufsz); + if (len <= 0) + return -EINVAL; + + if (intel_pt_get_insn(buf, len, x86_64, intel_pt_insn)) + return -EINVAL; + + intel_pt_log_insn(intel_pt_insn, *ip); + + insn_cnt += 1; + + if (intel_pt_insn->branch != INTEL_PT_BR_NO_BRANCH) + goto out; + + if (max_insn_cnt && insn_cnt >= max_insn_cnt) + goto out_no_cache; + + *ip += intel_pt_insn->length; + + if (to_ip && *ip == to_ip) + goto out_no_cache; + + if (*ip >= al.map->end) + break; + + offset += intel_pt_insn->length; + } + one_map = false; + } +out: + *insn_cnt_ptr = insn_cnt; + + if (!one_map) + goto out_no_cache; + + /* + * Didn't lookup in the 'to_ip' case, so do it now to prevent duplicate + * entries. + */ + if (to_ip) { + struct intel_pt_cache_entry *e; + + e = intel_pt_cache_lookup(al.map->dso, machine, start_offset); + if (e) + return 0; + } + + /* Ignore cache errors */ + intel_pt_cache_add(al.map->dso, machine, start_offset, insn_cnt, + *ip - start_ip, intel_pt_insn); + + return 0; + +out_no_cache: + *insn_cnt_ptr = insn_cnt; + return 0; +} + +static bool intel_pt_get_config(struct intel_pt *pt, + struct perf_event_attr *attr, u64 *config) +{ + if (attr->type == pt->pmu_type) { + if (config) + *config = attr->config; + return true; + } + + return false; +} + +static bool intel_pt_exclude_kernel(struct intel_pt *pt) +{ + struct perf_evsel *evsel; + + evlist__for_each(pt->session->evlist, evsel) { + if (intel_pt_get_config(pt, &evsel->attr, NULL) && + !evsel->attr.exclude_kernel) + return false; + } + return true; +} + +static bool intel_pt_return_compression(struct intel_pt *pt) +{ + struct perf_evsel *evsel; + u64 config; + + if (!pt->noretcomp_bit) + return true; + + evlist__for_each(pt->session->evlist, evsel) { + if (intel_pt_get_config(pt, &evsel->attr, &config) && + (config & pt->noretcomp_bit)) + return false; + } + return true; +} + +static unsigned int intel_pt_mtc_period(struct intel_pt *pt) +{ + struct perf_evsel *evsel; + unsigned int shift; + u64 config; + + if (!pt->mtc_freq_bits) + return 0; + + for (shift = 0, config = pt->mtc_freq_bits; !(config & 1); shift++) + config >>= 1; + + evlist__for_each(pt->session->evlist, evsel) { + if (intel_pt_get_config(pt, &evsel->attr, &config)) + return (config & pt->mtc_freq_bits) >> shift; + } + return 0; +} + +static bool intel_pt_timeless_decoding(struct intel_pt *pt) +{ + struct perf_evsel *evsel; + bool timeless_decoding = true; + u64 config; + + if (!pt->tsc_bit || !pt->cap_user_time_zero) + return true; + + evlist__for_each(pt->session->evlist, evsel) { + if (!(evsel->attr.sample_type & PERF_SAMPLE_TIME)) + return true; + if (intel_pt_get_config(pt, &evsel->attr, &config)) { + if (config & pt->tsc_bit) + timeless_decoding = false; + else + return true; + } + } + return timeless_decoding; +} + +static bool intel_pt_tracing_kernel(struct intel_pt *pt) +{ + struct perf_evsel *evsel; + + evlist__for_each(pt->session->evlist, evsel) { + if (intel_pt_get_config(pt, &evsel->attr, NULL) && + !evsel->attr.exclude_kernel) + return true; + } + return false; +} + +static bool intel_pt_have_tsc(struct intel_pt *pt) +{ + struct perf_evsel *evsel; + bool have_tsc = false; + u64 config; + + if (!pt->tsc_bit) + return false; + + evlist__for_each(pt->session->evlist, evsel) { + if (intel_pt_get_config(pt, &evsel->attr, &config)) { + if (config & pt->tsc_bit) + have_tsc = true; + else + return false; + } + } + return have_tsc; +} + +static u64 intel_pt_ns_to_ticks(const struct intel_pt *pt, u64 ns) +{ + u64 quot, rem; + + quot = ns / pt->tc.time_mult; + rem = ns % pt->tc.time_mult; + return (quot << pt->tc.time_shift) + (rem << pt->tc.time_shift) / + pt->tc.time_mult; +} + +static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt, + unsigned int queue_nr) +{ + struct intel_pt_params params = { .get_trace = 0, }; + struct intel_pt_queue *ptq; + + ptq = zalloc(sizeof(struct intel_pt_queue)); + if (!ptq) + return NULL; + + if (pt->synth_opts.callchain) { + size_t sz = sizeof(struct ip_callchain); + + sz += pt->synth_opts.callchain_sz * sizeof(u64); + ptq->chain = zalloc(sz); + if (!ptq->chain) + goto out_free; + } + + if (pt->synth_opts.last_branch) { + size_t sz = sizeof(struct branch_stack); + + sz += pt->synth_opts.last_branch_sz * + sizeof(struct branch_entry); + ptq->last_branch = zalloc(sz); + if (!ptq->last_branch) + goto out_free; + ptq->last_branch_rb = zalloc(sz); + if (!ptq->last_branch_rb) + goto out_free; + } + + ptq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE); + if (!ptq->event_buf) + goto out_free; + + ptq->pt = pt; + ptq->queue_nr = queue_nr; + ptq->exclude_kernel = intel_pt_exclude_kernel(pt); + ptq->pid = -1; + ptq->tid = -1; + ptq->cpu = -1; + ptq->next_tid = -1; + + params.get_trace = intel_pt_get_trace; + params.walk_insn = intel_pt_walk_next_insn; + params.data = ptq; + params.return_compression = intel_pt_return_compression(pt); + params.max_non_turbo_ratio = pt->max_non_turbo_ratio; + params.mtc_period = intel_pt_mtc_period(pt); + params.tsc_ctc_ratio_n = pt->tsc_ctc_ratio_n; + params.tsc_ctc_ratio_d = pt->tsc_ctc_ratio_d; + + if (pt->synth_opts.instructions) { + if (pt->synth_opts.period) { + switch (pt->synth_opts.period_type) { + case PERF_ITRACE_PERIOD_INSTRUCTIONS: + params.period_type = + INTEL_PT_PERIOD_INSTRUCTIONS; + params.period = pt->synth_opts.period; + break; + case PERF_ITRACE_PERIOD_TICKS: + params.period_type = INTEL_PT_PERIOD_TICKS; + params.period = pt->synth_opts.period; + break; + case PERF_ITRACE_PERIOD_NANOSECS: + params.period_type = INTEL_PT_PERIOD_TICKS; + params.period = intel_pt_ns_to_ticks(pt, + pt->synth_opts.period); + break; + default: + break; + } + } + + if (!params.period) { + params.period_type = INTEL_PT_PERIOD_INSTRUCTIONS; + params.period = 1; + } + } + + ptq->decoder = intel_pt_decoder_new(¶ms); + if (!ptq->decoder) + goto out_free; + + return ptq; + +out_free: + zfree(&ptq->event_buf); + zfree(&ptq->last_branch); + zfree(&ptq->last_branch_rb); + zfree(&ptq->chain); + free(ptq); + return NULL; +} + +static void intel_pt_free_queue(void *priv) +{ + struct intel_pt_queue *ptq = priv; + + if (!ptq) + return; + thread__zput(ptq->thread); + intel_pt_decoder_free(ptq->decoder); + zfree(&ptq->event_buf); + zfree(&ptq->last_branch); + zfree(&ptq->last_branch_rb); + zfree(&ptq->chain); + free(ptq); +} + +static void intel_pt_set_pid_tid_cpu(struct intel_pt *pt, + struct auxtrace_queue *queue) +{ + struct intel_pt_queue *ptq = queue->priv; + + if (queue->tid == -1 || pt->have_sched_switch) { + ptq->tid = machine__get_current_tid(pt->machine, ptq->cpu); + thread__zput(ptq->thread); + } + + if (!ptq->thread && ptq->tid != -1) + ptq->thread = machine__find_thread(pt->machine, -1, ptq->tid); + + if (ptq->thread) { + ptq->pid = ptq->thread->pid_; + if (queue->cpu == -1) + ptq->cpu = ptq->thread->cpu; + } +} + +static void intel_pt_sample_flags(struct intel_pt_queue *ptq) +{ + if (ptq->state->flags & INTEL_PT_ABORT_TX) { + ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TX_ABORT; + } else if (ptq->state->flags & INTEL_PT_ASYNC) { + if (ptq->state->to_ip) + ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | + PERF_IP_FLAG_ASYNC | + PERF_IP_FLAG_INTERRUPT; + else + ptq->flags = PERF_IP_FLAG_BRANCH | + PERF_IP_FLAG_TRACE_END; + ptq->insn_len = 0; + } else { + if (ptq->state->from_ip) + ptq->flags = intel_pt_insn_type(ptq->state->insn_op); + else + ptq->flags = PERF_IP_FLAG_BRANCH | + PERF_IP_FLAG_TRACE_BEGIN; + if (ptq->state->flags & INTEL_PT_IN_TX) + ptq->flags |= PERF_IP_FLAG_IN_TX; + ptq->insn_len = ptq->state->insn_len; + } +} + +static int intel_pt_setup_queue(struct intel_pt *pt, + struct auxtrace_queue *queue, + unsigned int queue_nr) +{ + struct intel_pt_queue *ptq = queue->priv; + + if (list_empty(&queue->head)) + return 0; + + if (!ptq) { + ptq = intel_pt_alloc_queue(pt, queue_nr); + if (!ptq) + return -ENOMEM; + queue->priv = ptq; + + if (queue->cpu != -1) + ptq->cpu = queue->cpu; + ptq->tid = queue->tid; + + if (pt->sampling_mode) { + if (pt->timeless_decoding) + ptq->step_through_buffers = true; + if (pt->timeless_decoding || !pt->have_sched_switch) + ptq->use_buffer_pid_tid = true; + } + } + + if (!ptq->on_heap && + (!pt->sync_switch || + ptq->switch_state != INTEL_PT_SS_EXPECTING_SWITCH_EVENT)) { + const struct intel_pt_state *state; + int ret; + + if (pt->timeless_decoding) + return 0; + + intel_pt_log("queue %u getting timestamp\n", queue_nr); + intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n", + queue_nr, ptq->cpu, ptq->pid, ptq->tid); + while (1) { + state = intel_pt_decode(ptq->decoder); + if (state->err) { + if (state->err == INTEL_PT_ERR_NODATA) { + intel_pt_log("queue %u has no timestamp\n", + queue_nr); + return 0; + } + continue; + } + if (state->timestamp) + break; + } + + ptq->timestamp = state->timestamp; + intel_pt_log("queue %u timestamp 0x%" PRIx64 "\n", + queue_nr, ptq->timestamp); + ptq->state = state; + ptq->have_sample = true; + intel_pt_sample_flags(ptq); + ret = auxtrace_heap__add(&pt->heap, queue_nr, ptq->timestamp); + if (ret) + return ret; + ptq->on_heap = true; + } + + return 0; +} + +static int intel_pt_setup_queues(struct intel_pt *pt) +{ + unsigned int i; + int ret; + + for (i = 0; i < pt->queues.nr_queues; i++) { + ret = intel_pt_setup_queue(pt, &pt->queues.queue_array[i], i); + if (ret) + return ret; + } + return 0; +} + +static inline void intel_pt_copy_last_branch_rb(struct intel_pt_queue *ptq) +{ + struct branch_stack *bs_src = ptq->last_branch_rb; + struct branch_stack *bs_dst = ptq->last_branch; + size_t nr = 0; + + bs_dst->nr = bs_src->nr; + + if (!bs_src->nr) + return; + + nr = ptq->pt->synth_opts.last_branch_sz - ptq->last_branch_pos; + memcpy(&bs_dst->entries[0], + &bs_src->entries[ptq->last_branch_pos], + sizeof(struct branch_entry) * nr); + + if (bs_src->nr >= ptq->pt->synth_opts.last_branch_sz) { + memcpy(&bs_dst->entries[nr], + &bs_src->entries[0], + sizeof(struct branch_entry) * ptq->last_branch_pos); + } +} + +static inline void intel_pt_reset_last_branch_rb(struct intel_pt_queue *ptq) +{ + ptq->last_branch_pos = 0; + ptq->last_branch_rb->nr = 0; +} + +static void intel_pt_update_last_branch_rb(struct intel_pt_queue *ptq) +{ + const struct intel_pt_state *state = ptq->state; + struct branch_stack *bs = ptq->last_branch_rb; + struct branch_entry *be; + + if (!ptq->last_branch_pos) + ptq->last_branch_pos = ptq->pt->synth_opts.last_branch_sz; + + ptq->last_branch_pos -= 1; + + be = &bs->entries[ptq->last_branch_pos]; + be->from = state->from_ip; + be->to = state->to_ip; + be->flags.abort = !!(state->flags & INTEL_PT_ABORT_TX); + be->flags.in_tx = !!(state->flags & INTEL_PT_IN_TX); + /* No support for mispredict */ + be->flags.mispred = ptq->pt->mispred_all; + + if (bs->nr < ptq->pt->synth_opts.last_branch_sz) + bs->nr += 1; +} + +static int intel_pt_inject_event(union perf_event *event, + struct perf_sample *sample, u64 type, + bool swapped) +{ + event->header.size = perf_event__sample_event_size(sample, type, 0); + return perf_event__synthesize_sample(event, type, 0, sample, swapped); +} + +static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) +{ + int ret; + struct intel_pt *pt = ptq->pt; + union perf_event *event = ptq->event_buf; + struct perf_sample sample = { .ip = 0, }; + struct dummy_branch_stack { + u64 nr; + struct branch_entry entries; + } dummy_bs; + + if (pt->branches_filter && !(pt->branches_filter & ptq->flags)) + return 0; + + event->sample.header.type = PERF_RECORD_SAMPLE; + event->sample.header.misc = PERF_RECORD_MISC_USER; + event->sample.header.size = sizeof(struct perf_event_header); + + if (!pt->timeless_decoding) + sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc); + + sample.ip = ptq->state->from_ip; + sample.pid = ptq->pid; + sample.tid = ptq->tid; + sample.addr = ptq->state->to_ip; + sample.id = ptq->pt->branches_id; + sample.stream_id = ptq->pt->branches_id; + sample.period = 1; + sample.cpu = ptq->cpu; + sample.flags = ptq->flags; + sample.insn_len = ptq->insn_len; + + /* + * perf report cannot handle events without a branch stack when using + * SORT_MODE__BRANCH so make a dummy one. + */ + if (pt->synth_opts.last_branch && sort__mode == SORT_MODE__BRANCH) { + dummy_bs = (struct dummy_branch_stack){ + .nr = 1, + .entries = { + .from = sample.ip, + .to = sample.addr, + }, + }; + sample.branch_stack = (struct branch_stack *)&dummy_bs; + } + + if (pt->synth_opts.inject) { + ret = intel_pt_inject_event(event, &sample, + pt->branches_sample_type, + pt->synth_needs_swap); + if (ret) + return ret; + } + + ret = perf_session__deliver_synth_event(pt->session, event, &sample); + if (ret) + pr_err("Intel Processor Trace: failed to deliver branch event, error %d\n", + ret); + + return ret; +} + +static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq) +{ + int ret; + struct intel_pt *pt = ptq->pt; + union perf_event *event = ptq->event_buf; + struct perf_sample sample = { .ip = 0, }; + + event->sample.header.type = PERF_RECORD_SAMPLE; + event->sample.header.misc = PERF_RECORD_MISC_USER; + event->sample.header.size = sizeof(struct perf_event_header); + + if (!pt->timeless_decoding) + sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc); + + sample.ip = ptq->state->from_ip; + sample.pid = ptq->pid; + sample.tid = ptq->tid; + sample.addr = ptq->state->to_ip; + sample.id = ptq->pt->instructions_id; + sample.stream_id = ptq->pt->instructions_id; + sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt; + sample.cpu = ptq->cpu; + sample.flags = ptq->flags; + sample.insn_len = ptq->insn_len; + + ptq->last_insn_cnt = ptq->state->tot_insn_cnt; + + if (pt->synth_opts.callchain) { + thread_stack__sample(ptq->thread, ptq->chain, + pt->synth_opts.callchain_sz, sample.ip); + sample.callchain = ptq->chain; + } + + if (pt->synth_opts.last_branch) { + intel_pt_copy_last_branch_rb(ptq); + sample.branch_stack = ptq->last_branch; + } + + if (pt->synth_opts.inject) { + ret = intel_pt_inject_event(event, &sample, + pt->instructions_sample_type, + pt->synth_needs_swap); + if (ret) + return ret; + } + + ret = perf_session__deliver_synth_event(pt->session, event, &sample); + if (ret) + pr_err("Intel Processor Trace: failed to deliver instruction event, error %d\n", + ret); + + if (pt->synth_opts.last_branch) + intel_pt_reset_last_branch_rb(ptq); + + return ret; +} + +static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq) +{ + int ret; + struct intel_pt *pt = ptq->pt; + union perf_event *event = ptq->event_buf; + struct perf_sample sample = { .ip = 0, }; + + event->sample.header.type = PERF_RECORD_SAMPLE; + event->sample.header.misc = PERF_RECORD_MISC_USER; + event->sample.header.size = sizeof(struct perf_event_header); + + if (!pt->timeless_decoding) + sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc); + + sample.ip = ptq->state->from_ip; + sample.pid = ptq->pid; + sample.tid = ptq->tid; + sample.addr = ptq->state->to_ip; + sample.id = ptq->pt->transactions_id; + sample.stream_id = ptq->pt->transactions_id; + sample.period = 1; + sample.cpu = ptq->cpu; + sample.flags = ptq->flags; + sample.insn_len = ptq->insn_len; + + if (pt->synth_opts.callchain) { + thread_stack__sample(ptq->thread, ptq->chain, + pt->synth_opts.callchain_sz, sample.ip); + sample.callchain = ptq->chain; + } + + if (pt->synth_opts.last_branch) { + intel_pt_copy_last_branch_rb(ptq); + sample.branch_stack = ptq->last_branch; + } + + if (pt->synth_opts.inject) { + ret = intel_pt_inject_event(event, &sample, + pt->transactions_sample_type, + pt->synth_needs_swap); + if (ret) + return ret; + } + + ret = perf_session__deliver_synth_event(pt->session, event, &sample); + if (ret) + pr_err("Intel Processor Trace: failed to deliver transaction event, error %d\n", + ret); + + if (pt->synth_opts.callchain) + intel_pt_reset_last_branch_rb(ptq); + + return ret; +} + +static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu, + pid_t pid, pid_t tid, u64 ip) +{ + union perf_event event; + char msg[MAX_AUXTRACE_ERROR_MSG]; + int err; + + intel_pt__strerror(code, msg, MAX_AUXTRACE_ERROR_MSG); + + auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE, + code, cpu, pid, tid, ip, msg); + + err = perf_session__deliver_synth_event(pt->session, &event, NULL); + if (err) + pr_err("Intel Processor Trace: failed to deliver error event, error %d\n", + err); + + return err; +} + +static int intel_pt_next_tid(struct intel_pt *pt, struct intel_pt_queue *ptq) +{ + struct auxtrace_queue *queue; + pid_t tid = ptq->next_tid; + int err; + + if (tid == -1) + return 0; + + intel_pt_log("switch: cpu %d tid %d\n", ptq->cpu, tid); + + err = machine__set_current_tid(pt->machine, ptq->cpu, -1, tid); + + queue = &pt->queues.queue_array[ptq->queue_nr]; + intel_pt_set_pid_tid_cpu(pt, queue); + + ptq->next_tid = -1; + + return err; +} + +static inline bool intel_pt_is_switch_ip(struct intel_pt_queue *ptq, u64 ip) +{ + struct intel_pt *pt = ptq->pt; + + return ip == pt->switch_ip && + (ptq->flags & PERF_IP_FLAG_BRANCH) && + !(ptq->flags & (PERF_IP_FLAG_CONDITIONAL | PERF_IP_FLAG_ASYNC | + PERF_IP_FLAG_INTERRUPT | PERF_IP_FLAG_TX_ABORT)); +} + +static int intel_pt_sample(struct intel_pt_queue *ptq) +{ + const struct intel_pt_state *state = ptq->state; + struct intel_pt *pt = ptq->pt; + int err; + + if (!ptq->have_sample) + return 0; + + ptq->have_sample = false; + + if (pt->sample_instructions && + (state->type & INTEL_PT_INSTRUCTION)) { + err = intel_pt_synth_instruction_sample(ptq); + if (err) + return err; + } + + if (pt->sample_transactions && + (state->type & INTEL_PT_TRANSACTION)) { + err = intel_pt_synth_transaction_sample(ptq); + if (err) + return err; + } + + if (!(state->type & INTEL_PT_BRANCH)) + return 0; + + if (pt->synth_opts.callchain) + thread_stack__event(ptq->thread, ptq->flags, state->from_ip, + state->to_ip, ptq->insn_len, + state->trace_nr); + else + thread_stack__set_trace_nr(ptq->thread, state->trace_nr); + + if (pt->sample_branches) { + err = intel_pt_synth_branch_sample(ptq); + if (err) + return err; + } + + if (pt->synth_opts.last_branch) + intel_pt_update_last_branch_rb(ptq); + + if (!pt->sync_switch) + return 0; + + if (intel_pt_is_switch_ip(ptq, state->to_ip)) { + switch (ptq->switch_state) { + case INTEL_PT_SS_UNKNOWN: + case INTEL_PT_SS_EXPECTING_SWITCH_IP: + err = intel_pt_next_tid(pt, ptq); + if (err) + return err; + ptq->switch_state = INTEL_PT_SS_TRACING; + break; + default: + ptq->switch_state = INTEL_PT_SS_EXPECTING_SWITCH_EVENT; + return 1; + } + } else if (!state->to_ip) { + ptq->switch_state = INTEL_PT_SS_NOT_TRACING; + } else if (ptq->switch_state == INTEL_PT_SS_NOT_TRACING) { + ptq->switch_state = INTEL_PT_SS_UNKNOWN; + } else if (ptq->switch_state == INTEL_PT_SS_UNKNOWN && + state->to_ip == pt->ptss_ip && + (ptq->flags & PERF_IP_FLAG_CALL)) { + ptq->switch_state = INTEL_PT_SS_TRACING; + } + + return 0; +} + +static u64 intel_pt_switch_ip(struct intel_pt *pt, u64 *ptss_ip) +{ + struct machine *machine = pt->machine; + struct map *map; + struct symbol *sym, *start; + u64 ip, switch_ip = 0; + const char *ptss; + + if (ptss_ip) + *ptss_ip = 0; + + map = machine__kernel_map(machine); + if (!map) + return 0; + + if (map__load(map, machine->symbol_filter)) + return 0; + + start = dso__first_symbol(map->dso, MAP__FUNCTION); + + for (sym = start; sym; sym = dso__next_symbol(sym)) { + if (sym->binding == STB_GLOBAL && + !strcmp(sym->name, "__switch_to")) { + ip = map->unmap_ip(map, sym->start); + if (ip >= map->start && ip < map->end) { + switch_ip = ip; + break; + } + } + } + + if (!switch_ip || !ptss_ip) + return 0; + + if (pt->have_sched_switch == 1) + ptss = "perf_trace_sched_switch"; + else + ptss = "__perf_event_task_sched_out"; + + for (sym = start; sym; sym = dso__next_symbol(sym)) { + if (!strcmp(sym->name, ptss)) { + ip = map->unmap_ip(map, sym->start); + if (ip >= map->start && ip < map->end) { + *ptss_ip = ip; + break; + } + } + } + + return switch_ip; +} + +static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp) +{ + const struct intel_pt_state *state = ptq->state; + struct intel_pt *pt = ptq->pt; + int err; + + if (!pt->kernel_start) { + pt->kernel_start = machine__kernel_start(pt->machine); + if (pt->per_cpu_mmaps && + (pt->have_sched_switch == 1 || pt->have_sched_switch == 3) && + !pt->timeless_decoding && intel_pt_tracing_kernel(pt) && + !pt->sampling_mode) { + pt->switch_ip = intel_pt_switch_ip(pt, &pt->ptss_ip); + if (pt->switch_ip) { + intel_pt_log("switch_ip: %"PRIx64" ptss_ip: %"PRIx64"\n", + pt->switch_ip, pt->ptss_ip); + pt->sync_switch = true; + } + } + } + + intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n", + ptq->queue_nr, ptq->cpu, ptq->pid, ptq->tid); + while (1) { + err = intel_pt_sample(ptq); + if (err) + return err; + + state = intel_pt_decode(ptq->decoder); + if (state->err) { + if (state->err == INTEL_PT_ERR_NODATA) + return 1; + if (pt->sync_switch && + state->from_ip >= pt->kernel_start) { + pt->sync_switch = false; + intel_pt_next_tid(pt, ptq); + } + if (pt->synth_opts.errors) { + err = intel_pt_synth_error(pt, state->err, + ptq->cpu, ptq->pid, + ptq->tid, + state->from_ip); + if (err) + return err; + } + continue; + } + + ptq->state = state; + ptq->have_sample = true; + intel_pt_sample_flags(ptq); + + /* Use estimated TSC upon return to user space */ + if (pt->est_tsc && + (state->from_ip >= pt->kernel_start || !state->from_ip) && + state->to_ip && state->to_ip < pt->kernel_start) { + intel_pt_log("TSC %"PRIx64" est. TSC %"PRIx64"\n", + state->timestamp, state->est_timestamp); + ptq->timestamp = state->est_timestamp; + /* Use estimated TSC in unknown switch state */ + } else if (pt->sync_switch && + ptq->switch_state == INTEL_PT_SS_UNKNOWN && + intel_pt_is_switch_ip(ptq, state->to_ip) && + ptq->next_tid == -1) { + intel_pt_log("TSC %"PRIx64" est. TSC %"PRIx64"\n", + state->timestamp, state->est_timestamp); + ptq->timestamp = state->est_timestamp; + } else if (state->timestamp > ptq->timestamp) { + ptq->timestamp = state->timestamp; + } + + if (!pt->timeless_decoding && ptq->timestamp >= *timestamp) { + *timestamp = ptq->timestamp; + return 0; + } + } + return 0; +} + +static inline int intel_pt_update_queues(struct intel_pt *pt) +{ + if (pt->queues.new_data) { + pt->queues.new_data = false; + return intel_pt_setup_queues(pt); + } + return 0; +} + +static int intel_pt_process_queues(struct intel_pt *pt, u64 timestamp) +{ + unsigned int queue_nr; + u64 ts; + int ret; + + while (1) { + struct auxtrace_queue *queue; + struct intel_pt_queue *ptq; + + if (!pt->heap.heap_cnt) + return 0; + + if (pt->heap.heap_array[0].ordinal >= timestamp) + return 0; + + queue_nr = pt->heap.heap_array[0].queue_nr; + queue = &pt->queues.queue_array[queue_nr]; + ptq = queue->priv; + + intel_pt_log("queue %u processing 0x%" PRIx64 " to 0x%" PRIx64 "\n", + queue_nr, pt->heap.heap_array[0].ordinal, + timestamp); + + auxtrace_heap__pop(&pt->heap); + + if (pt->heap.heap_cnt) { + ts = pt->heap.heap_array[0].ordinal + 1; + if (ts > timestamp) + ts = timestamp; + } else { + ts = timestamp; + } + + intel_pt_set_pid_tid_cpu(pt, queue); + + ret = intel_pt_run_decoder(ptq, &ts); + + if (ret < 0) { + auxtrace_heap__add(&pt->heap, queue_nr, ts); + return ret; + } + + if (!ret) { + ret = auxtrace_heap__add(&pt->heap, queue_nr, ts); + if (ret < 0) + return ret; + } else { + ptq->on_heap = false; + } + } + + return 0; +} + +static int intel_pt_process_timeless_queues(struct intel_pt *pt, pid_t tid, + u64 time_) +{ + struct auxtrace_queues *queues = &pt->queues; + unsigned int i; + u64 ts = 0; + + for (i = 0; i < queues->nr_queues; i++) { + struct auxtrace_queue *queue = &pt->queues.queue_array[i]; + struct intel_pt_queue *ptq = queue->priv; + + if (ptq && (tid == -1 || ptq->tid == tid)) { + ptq->time = time_; + intel_pt_set_pid_tid_cpu(pt, queue); + intel_pt_run_decoder(ptq, &ts); + } + } + return 0; +} + +static int intel_pt_lost(struct intel_pt *pt, struct perf_sample *sample) +{ + return intel_pt_synth_error(pt, INTEL_PT_ERR_LOST, sample->cpu, + sample->pid, sample->tid, 0); +} + +static struct intel_pt_queue *intel_pt_cpu_to_ptq(struct intel_pt *pt, int cpu) +{ + unsigned i, j; + + if (cpu < 0 || !pt->queues.nr_queues) + return NULL; + + if ((unsigned)cpu >= pt->queues.nr_queues) + i = pt->queues.nr_queues - 1; + else + i = cpu; + + if (pt->queues.queue_array[i].cpu == cpu) + return pt->queues.queue_array[i].priv; + + for (j = 0; i > 0; j++) { + if (pt->queues.queue_array[--i].cpu == cpu) + return pt->queues.queue_array[i].priv; + } + + for (; j < pt->queues.nr_queues; j++) { + if (pt->queues.queue_array[j].cpu == cpu) + return pt->queues.queue_array[j].priv; + } + + return NULL; +} + +static int intel_pt_sync_switch(struct intel_pt *pt, int cpu, pid_t tid, + u64 timestamp) +{ + struct intel_pt_queue *ptq; + int err; + + if (!pt->sync_switch) + return 1; + + ptq = intel_pt_cpu_to_ptq(pt, cpu); + if (!ptq) + return 1; + + switch (ptq->switch_state) { + case INTEL_PT_SS_NOT_TRACING: + ptq->next_tid = -1; + break; + case INTEL_PT_SS_UNKNOWN: + case INTEL_PT_SS_TRACING: + ptq->next_tid = tid; + ptq->switch_state = INTEL_PT_SS_EXPECTING_SWITCH_IP; + return 0; + case INTEL_PT_SS_EXPECTING_SWITCH_EVENT: + if (!ptq->on_heap) { + ptq->timestamp = perf_time_to_tsc(timestamp, + &pt->tc); + err = auxtrace_heap__add(&pt->heap, ptq->queue_nr, + ptq->timestamp); + if (err) + return err; + ptq->on_heap = true; + } + ptq->switch_state = INTEL_PT_SS_TRACING; + break; + case INTEL_PT_SS_EXPECTING_SWITCH_IP: + ptq->next_tid = tid; + intel_pt_log("ERROR: cpu %d expecting switch ip\n", cpu); + break; + default: + break; + } + + return 1; +} + +static int intel_pt_process_switch(struct intel_pt *pt, + struct perf_sample *sample) +{ + struct perf_evsel *evsel; + pid_t tid; + int cpu, ret; + + evsel = perf_evlist__id2evsel(pt->session->evlist, sample->id); + if (evsel != pt->switch_evsel) + return 0; + + tid = perf_evsel__intval(evsel, sample, "next_pid"); + cpu = sample->cpu; + + intel_pt_log("sched_switch: cpu %d tid %d time %"PRIu64" tsc %#"PRIx64"\n", + cpu, tid, sample->time, perf_time_to_tsc(sample->time, + &pt->tc)); + + ret = intel_pt_sync_switch(pt, cpu, tid, sample->time); + if (ret <= 0) + return ret; + + return machine__set_current_tid(pt->machine, cpu, -1, tid); +} + +static int intel_pt_context_switch(struct intel_pt *pt, union perf_event *event, + struct perf_sample *sample) +{ + bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT; + pid_t pid, tid; + int cpu, ret; + + cpu = sample->cpu; + + if (pt->have_sched_switch == 3) { + if (!out) + return 0; + if (event->header.type != PERF_RECORD_SWITCH_CPU_WIDE) { + pr_err("Expecting CPU-wide context switch event\n"); + return -EINVAL; + } + pid = event->context_switch.next_prev_pid; + tid = event->context_switch.next_prev_tid; + } else { + if (out) + return 0; + pid = sample->pid; + tid = sample->tid; + } + + if (tid == -1) { + pr_err("context_switch event has no tid\n"); + return -EINVAL; + } + + intel_pt_log("context_switch: cpu %d pid %d tid %d time %"PRIu64" tsc %#"PRIx64"\n", + cpu, pid, tid, sample->time, perf_time_to_tsc(sample->time, + &pt->tc)); + + ret = intel_pt_sync_switch(pt, cpu, tid, sample->time); + if (ret <= 0) + return ret; + + return machine__set_current_tid(pt->machine, cpu, pid, tid); +} + +static int intel_pt_process_itrace_start(struct intel_pt *pt, + union perf_event *event, + struct perf_sample *sample) +{ + if (!pt->per_cpu_mmaps) + return 0; + + intel_pt_log("itrace_start: cpu %d pid %d tid %d time %"PRIu64" tsc %#"PRIx64"\n", + sample->cpu, event->itrace_start.pid, + event->itrace_start.tid, sample->time, + perf_time_to_tsc(sample->time, &pt->tc)); + + return machine__set_current_tid(pt->machine, sample->cpu, + event->itrace_start.pid, + event->itrace_start.tid); +} + +static int intel_pt_process_event(struct perf_session *session, + union perf_event *event, + struct perf_sample *sample, + struct perf_tool *tool) +{ + struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, + auxtrace); + u64 timestamp; + int err = 0; + + if (dump_trace) + return 0; + + if (!tool->ordered_events) { + pr_err("Intel Processor Trace requires ordered events\n"); + return -EINVAL; + } + + if (sample->time && sample->time != (u64)-1) + timestamp = perf_time_to_tsc(sample->time, &pt->tc); + else + timestamp = 0; + + if (timestamp || pt->timeless_decoding) { + err = intel_pt_update_queues(pt); + if (err) + return err; + } + + if (pt->timeless_decoding) { + if (event->header.type == PERF_RECORD_EXIT) { + err = intel_pt_process_timeless_queues(pt, + event->fork.tid, + sample->time); + } + } else if (timestamp) { + err = intel_pt_process_queues(pt, timestamp); + } + if (err) + return err; + + if (event->header.type == PERF_RECORD_AUX && + (event->aux.flags & PERF_AUX_FLAG_TRUNCATED) && + pt->synth_opts.errors) { + err = intel_pt_lost(pt, sample); + if (err) + return err; + } + + if (pt->switch_evsel && event->header.type == PERF_RECORD_SAMPLE) + err = intel_pt_process_switch(pt, sample); + else if (event->header.type == PERF_RECORD_ITRACE_START) + err = intel_pt_process_itrace_start(pt, event, sample); + else if (event->header.type == PERF_RECORD_SWITCH || + event->header.type == PERF_RECORD_SWITCH_CPU_WIDE) + err = intel_pt_context_switch(pt, event, sample); + + intel_pt_log("event %s (%u): cpu %d time %"PRIu64" tsc %#"PRIx64"\n", + perf_event__name(event->header.type), event->header.type, + sample->cpu, sample->time, timestamp); + + return err; +} + +static int intel_pt_flush(struct perf_session *session, struct perf_tool *tool) +{ + struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, + auxtrace); + int ret; + + if (dump_trace) + return 0; + + if (!tool->ordered_events) + return -EINVAL; + + ret = intel_pt_update_queues(pt); + if (ret < 0) + return ret; + + if (pt->timeless_decoding) + return intel_pt_process_timeless_queues(pt, -1, + MAX_TIMESTAMP - 1); + + return intel_pt_process_queues(pt, MAX_TIMESTAMP); +} + +static void intel_pt_free_events(struct perf_session *session) +{ + struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, + auxtrace); + struct auxtrace_queues *queues = &pt->queues; + unsigned int i; + + for (i = 0; i < queues->nr_queues; i++) { + intel_pt_free_queue(queues->queue_array[i].priv); + queues->queue_array[i].priv = NULL; + } + intel_pt_log_disable(); + auxtrace_queues__free(queues); +} + +static void intel_pt_free(struct perf_session *session) +{ + struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, + auxtrace); + + auxtrace_heap__free(&pt->heap); + intel_pt_free_events(session); + session->auxtrace = NULL; + thread__delete(pt->unknown_thread); + free(pt); +} + +static int intel_pt_process_auxtrace_event(struct perf_session *session, + union perf_event *event, + struct perf_tool *tool __maybe_unused) +{ + struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, + auxtrace); + + if (pt->sampling_mode) + return 0; + + if (!pt->data_queued) { + struct auxtrace_buffer *buffer; + off_t data_offset; + int fd = perf_data_file__fd(session->file); + int err; + + if (perf_data_file__is_pipe(session->file)) { + data_offset = 0; + } else { + data_offset = lseek(fd, 0, SEEK_CUR); + if (data_offset == -1) + return -errno; + } + + err = auxtrace_queues__add_event(&pt->queues, session, event, + data_offset, &buffer); + if (err) + return err; + + /* Dump here now we have copied a piped trace out of the pipe */ + if (dump_trace) { + if (auxtrace_buffer__get_data(buffer, fd)) { + intel_pt_dump_event(pt, buffer->data, + buffer->size); + auxtrace_buffer__put_data(buffer); + } + } + } + + return 0; +} + +struct intel_pt_synth { + struct perf_tool dummy_tool; + struct perf_session *session; +}; + +static int intel_pt_event_synth(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample __maybe_unused, + struct machine *machine __maybe_unused) +{ + struct intel_pt_synth *intel_pt_synth = + container_of(tool, struct intel_pt_synth, dummy_tool); + + return perf_session__deliver_synth_event(intel_pt_synth->session, event, + NULL); +} + +static int intel_pt_synth_event(struct perf_session *session, + struct perf_event_attr *attr, u64 id) +{ + struct intel_pt_synth intel_pt_synth; + + memset(&intel_pt_synth, 0, sizeof(struct intel_pt_synth)); + intel_pt_synth.session = session; + + return perf_event__synthesize_attr(&intel_pt_synth.dummy_tool, attr, 1, + &id, intel_pt_event_synth); +} + +static int intel_pt_synth_events(struct intel_pt *pt, + struct perf_session *session) +{ + struct perf_evlist *evlist = session->evlist; + struct perf_evsel *evsel; + struct perf_event_attr attr; + bool found = false; + u64 id; + int err; + + evlist__for_each(evlist, evsel) { + if (evsel->attr.type == pt->pmu_type && evsel->ids) { + found = true; + break; + } + } + + if (!found) { + pr_debug("There are no selected events with Intel Processor Trace data\n"); + return 0; + } + + memset(&attr, 0, sizeof(struct perf_event_attr)); + attr.size = sizeof(struct perf_event_attr); + attr.type = PERF_TYPE_HARDWARE; + attr.sample_type = evsel->attr.sample_type & PERF_SAMPLE_MASK; + attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID | + PERF_SAMPLE_PERIOD; + if (pt->timeless_decoding) + attr.sample_type &= ~(u64)PERF_SAMPLE_TIME; + else + attr.sample_type |= PERF_SAMPLE_TIME; + if (!pt->per_cpu_mmaps) + attr.sample_type &= ~(u64)PERF_SAMPLE_CPU; + attr.exclude_user = evsel->attr.exclude_user; + attr.exclude_kernel = evsel->attr.exclude_kernel; + attr.exclude_hv = evsel->attr.exclude_hv; + attr.exclude_host = evsel->attr.exclude_host; + attr.exclude_guest = evsel->attr.exclude_guest; + attr.sample_id_all = evsel->attr.sample_id_all; + attr.read_format = evsel->attr.read_format; + + id = evsel->id[0] + 1000000000; + if (!id) + id = 1; + + if (pt->synth_opts.instructions) { + attr.config = PERF_COUNT_HW_INSTRUCTIONS; + if (pt->synth_opts.period_type == PERF_ITRACE_PERIOD_NANOSECS) + attr.sample_period = + intel_pt_ns_to_ticks(pt, pt->synth_opts.period); + else + attr.sample_period = pt->synth_opts.period; + pt->instructions_sample_period = attr.sample_period; + if (pt->synth_opts.callchain) + attr.sample_type |= PERF_SAMPLE_CALLCHAIN; + if (pt->synth_opts.last_branch) + attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; + pr_debug("Synthesizing 'instructions' event with id %" PRIu64 " sample type %#" PRIx64 "\n", + id, (u64)attr.sample_type); + err = intel_pt_synth_event(session, &attr, id); + if (err) { + pr_err("%s: failed to synthesize 'instructions' event type\n", + __func__); + return err; + } + pt->sample_instructions = true; + pt->instructions_sample_type = attr.sample_type; + pt->instructions_id = id; + id += 1; + } + + if (pt->synth_opts.transactions) { + attr.config = PERF_COUNT_HW_INSTRUCTIONS; + attr.sample_period = 1; + if (pt->synth_opts.callchain) + attr.sample_type |= PERF_SAMPLE_CALLCHAIN; + if (pt->synth_opts.last_branch) + attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; + pr_debug("Synthesizing 'transactions' event with id %" PRIu64 " sample type %#" PRIx64 "\n", + id, (u64)attr.sample_type); + err = intel_pt_synth_event(session, &attr, id); + if (err) { + pr_err("%s: failed to synthesize 'transactions' event type\n", + __func__); + return err; + } + pt->sample_transactions = true; + pt->transactions_id = id; + id += 1; + evlist__for_each(evlist, evsel) { + if (evsel->id && evsel->id[0] == pt->transactions_id) { + if (evsel->name) + zfree(&evsel->name); + evsel->name = strdup("transactions"); + break; + } + } + } + + if (pt->synth_opts.branches) { + attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS; + attr.sample_period = 1; + attr.sample_type |= PERF_SAMPLE_ADDR; + attr.sample_type &= ~(u64)PERF_SAMPLE_CALLCHAIN; + attr.sample_type &= ~(u64)PERF_SAMPLE_BRANCH_STACK; + pr_debug("Synthesizing 'branches' event with id %" PRIu64 " sample type %#" PRIx64 "\n", + id, (u64)attr.sample_type); + err = intel_pt_synth_event(session, &attr, id); + if (err) { + pr_err("%s: failed to synthesize 'branches' event type\n", + __func__); + return err; + } + pt->sample_branches = true; + pt->branches_sample_type = attr.sample_type; + pt->branches_id = id; + } + + pt->synth_needs_swap = evsel->needs_swap; + + return 0; +} + +static struct perf_evsel *intel_pt_find_sched_switch(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel; + + evlist__for_each_reverse(evlist, evsel) { + const char *name = perf_evsel__name(evsel); + + if (!strcmp(name, "sched:sched_switch")) + return evsel; + } + + return NULL; +} + +static bool intel_pt_find_switch(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel; + + evlist__for_each(evlist, evsel) { + if (evsel->attr.context_switch) + return true; + } + + return false; +} + +static int intel_pt_perf_config(const char *var, const char *value, void *data) +{ + struct intel_pt *pt = data; + + if (!strcmp(var, "intel-pt.mispred-all")) + pt->mispred_all = perf_config_bool(var, value); + + return 0; +} + +static const char * const intel_pt_info_fmts[] = { + [INTEL_PT_PMU_TYPE] = " PMU Type %"PRId64"\n", + [INTEL_PT_TIME_SHIFT] = " Time Shift %"PRIu64"\n", + [INTEL_PT_TIME_MULT] = " Time Muliplier %"PRIu64"\n", + [INTEL_PT_TIME_ZERO] = " Time Zero %"PRIu64"\n", + [INTEL_PT_CAP_USER_TIME_ZERO] = " Cap Time Zero %"PRId64"\n", + [INTEL_PT_TSC_BIT] = " TSC bit %#"PRIx64"\n", + [INTEL_PT_NORETCOMP_BIT] = " NoRETComp bit %#"PRIx64"\n", + [INTEL_PT_HAVE_SCHED_SWITCH] = " Have sched_switch %"PRId64"\n", + [INTEL_PT_SNAPSHOT_MODE] = " Snapshot mode %"PRId64"\n", + [INTEL_PT_PER_CPU_MMAPS] = " Per-cpu maps %"PRId64"\n", + [INTEL_PT_MTC_BIT] = " MTC bit %#"PRIx64"\n", + [INTEL_PT_TSC_CTC_N] = " TSC:CTC numerator %"PRIu64"\n", + [INTEL_PT_TSC_CTC_D] = " TSC:CTC denominator %"PRIu64"\n", + [INTEL_PT_CYC_BIT] = " CYC bit %#"PRIx64"\n", +}; + +static void intel_pt_print_info(u64 *arr, int start, int finish) +{ + int i; + + if (!dump_trace) + return; + + for (i = start; i <= finish; i++) + fprintf(stdout, intel_pt_info_fmts[i], arr[i]); +} + +int intel_pt_process_auxtrace_info(union perf_event *event, + struct perf_session *session) +{ + struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info; + size_t min_sz = sizeof(u64) * INTEL_PT_PER_CPU_MMAPS; + struct intel_pt *pt; + int err; + + if (auxtrace_info->header.size < sizeof(struct auxtrace_info_event) + + min_sz) + return -EINVAL; + + pt = zalloc(sizeof(struct intel_pt)); + if (!pt) + return -ENOMEM; + + perf_config(intel_pt_perf_config, pt); + + err = auxtrace_queues__init(&pt->queues); + if (err) + goto err_free; + + intel_pt_log_set_name(INTEL_PT_PMU_NAME); + + pt->session = session; + pt->machine = &session->machines.host; /* No kvm support */ + pt->auxtrace_type = auxtrace_info->type; + pt->pmu_type = auxtrace_info->priv[INTEL_PT_PMU_TYPE]; + pt->tc.time_shift = auxtrace_info->priv[INTEL_PT_TIME_SHIFT]; + pt->tc.time_mult = auxtrace_info->priv[INTEL_PT_TIME_MULT]; + pt->tc.time_zero = auxtrace_info->priv[INTEL_PT_TIME_ZERO]; + pt->cap_user_time_zero = auxtrace_info->priv[INTEL_PT_CAP_USER_TIME_ZERO]; + pt->tsc_bit = auxtrace_info->priv[INTEL_PT_TSC_BIT]; + pt->noretcomp_bit = auxtrace_info->priv[INTEL_PT_NORETCOMP_BIT]; + pt->have_sched_switch = auxtrace_info->priv[INTEL_PT_HAVE_SCHED_SWITCH]; + pt->snapshot_mode = auxtrace_info->priv[INTEL_PT_SNAPSHOT_MODE]; + pt->per_cpu_mmaps = auxtrace_info->priv[INTEL_PT_PER_CPU_MMAPS]; + intel_pt_print_info(&auxtrace_info->priv[0], INTEL_PT_PMU_TYPE, + INTEL_PT_PER_CPU_MMAPS); + + if (auxtrace_info->header.size >= sizeof(struct auxtrace_info_event) + + (sizeof(u64) * INTEL_PT_CYC_BIT)) { + pt->mtc_bit = auxtrace_info->priv[INTEL_PT_MTC_BIT]; + pt->mtc_freq_bits = auxtrace_info->priv[INTEL_PT_MTC_FREQ_BITS]; + pt->tsc_ctc_ratio_n = auxtrace_info->priv[INTEL_PT_TSC_CTC_N]; + pt->tsc_ctc_ratio_d = auxtrace_info->priv[INTEL_PT_TSC_CTC_D]; + pt->cyc_bit = auxtrace_info->priv[INTEL_PT_CYC_BIT]; + intel_pt_print_info(&auxtrace_info->priv[0], INTEL_PT_MTC_BIT, + INTEL_PT_CYC_BIT); + } + + pt->timeless_decoding = intel_pt_timeless_decoding(pt); + pt->have_tsc = intel_pt_have_tsc(pt); + pt->sampling_mode = false; + pt->est_tsc = !pt->timeless_decoding; + + pt->unknown_thread = thread__new(999999999, 999999999); + if (!pt->unknown_thread) { + err = -ENOMEM; + goto err_free_queues; + } + err = thread__set_comm(pt->unknown_thread, "unknown", 0); + if (err) + goto err_delete_thread; + if (thread__init_map_groups(pt->unknown_thread, pt->machine)) { + err = -ENOMEM; + goto err_delete_thread; + } + + pt->auxtrace.process_event = intel_pt_process_event; + pt->auxtrace.process_auxtrace_event = intel_pt_process_auxtrace_event; + pt->auxtrace.flush_events = intel_pt_flush; + pt->auxtrace.free_events = intel_pt_free_events; + pt->auxtrace.free = intel_pt_free; + session->auxtrace = &pt->auxtrace; + + if (dump_trace) + return 0; + + if (pt->have_sched_switch == 1) { + pt->switch_evsel = intel_pt_find_sched_switch(session->evlist); + if (!pt->switch_evsel) { + pr_err("%s: missing sched_switch event\n", __func__); + goto err_delete_thread; + } + } else if (pt->have_sched_switch == 2 && + !intel_pt_find_switch(session->evlist)) { + pr_err("%s: missing context_switch attribute flag\n", __func__); + goto err_delete_thread; + } + + if (session->itrace_synth_opts && session->itrace_synth_opts->set) { + pt->synth_opts = *session->itrace_synth_opts; + } else { + itrace_synth_opts__set_default(&pt->synth_opts); + if (use_browser != -1) { + pt->synth_opts.branches = false; + pt->synth_opts.callchain = true; + } + } + + if (pt->synth_opts.log) + intel_pt_log_enable(); + + /* Maximum non-turbo ratio is TSC freq / 100 MHz */ + if (pt->tc.time_mult) { + u64 tsc_freq = intel_pt_ns_to_ticks(pt, 1000000000); + + pt->max_non_turbo_ratio = (tsc_freq + 50000000) / 100000000; + intel_pt_log("TSC frequency %"PRIu64"\n", tsc_freq); + intel_pt_log("Maximum non-turbo ratio %u\n", + pt->max_non_turbo_ratio); + } + + if (pt->synth_opts.calls) + pt->branches_filter |= PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC | + PERF_IP_FLAG_TRACE_END; + if (pt->synth_opts.returns) + pt->branches_filter |= PERF_IP_FLAG_RETURN | + PERF_IP_FLAG_TRACE_BEGIN; + + if (pt->synth_opts.callchain && !symbol_conf.use_callchain) { + symbol_conf.use_callchain = true; + if (callchain_register_param(&callchain_param) < 0) { + symbol_conf.use_callchain = false; + pt->synth_opts.callchain = false; + } + } + + err = intel_pt_synth_events(pt, session); + if (err) + goto err_delete_thread; + + err = auxtrace_queues__process_index(&pt->queues, session); + if (err) + goto err_delete_thread; + + if (pt->queues.populated) + pt->data_queued = true; + + if (pt->timeless_decoding) + pr_debug2("Intel PT decoding without timestamps\n"); + + return 0; + +err_delete_thread: + thread__delete(pt->unknown_thread); +err_free_queues: + intel_pt_log_disable(); + auxtrace_queues__free(&pt->queues); + session->auxtrace = NULL; +err_free: + free(pt); + return err; +} -- cgit 1.2.3-korg