From e09b41010ba33a20a87472ee821fa407a5b8da36 Mon Sep 17 00:00:00 2001 From: José Pekkarinen Date: Mon, 11 Apr 2016 10:41:07 +0300 Subject: These changes are the raw update to linux-4.4.6-rt14. Kernel sources are taken from kernel.org, and rt patch from the rt wiki download page. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During the rebasing, the following patch collided: Force tick interrupt and get rid of softirq magic(I70131fb85). Collisions have been removed because its logic was found on the source already. Change-Id: I7f57a4081d9deaa0d9ccfc41a6c8daccdee3b769 Signed-off-by: José Pekkarinen --- kernel/tools/perf/arch/x86/util/Build | 6 + kernel/tools/perf/arch/x86/util/auxtrace.c | 83 ++ kernel/tools/perf/arch/x86/util/dwarf-regs.c | 122 ++- kernel/tools/perf/arch/x86/util/intel-bts.c | 458 +++++++++++ kernel/tools/perf/arch/x86/util/intel-pt.c | 1046 ++++++++++++++++++++++++++ kernel/tools/perf/arch/x86/util/perf_regs.c | 28 + kernel/tools/perf/arch/x86/util/pmu.c | 18 + 7 files changed, 1727 insertions(+), 34 deletions(-) create mode 100644 kernel/tools/perf/arch/x86/util/auxtrace.c create mode 100644 kernel/tools/perf/arch/x86/util/intel-bts.c create mode 100644 kernel/tools/perf/arch/x86/util/intel-pt.c create mode 100644 kernel/tools/perf/arch/x86/util/perf_regs.c create mode 100644 kernel/tools/perf/arch/x86/util/pmu.c (limited to 'kernel/tools/perf/arch/x86/util') diff --git a/kernel/tools/perf/arch/x86/util/Build b/kernel/tools/perf/arch/x86/util/Build index cfbccc4e3..ff63649fa 100644 --- a/kernel/tools/perf/arch/x86/util/Build +++ b/kernel/tools/perf/arch/x86/util/Build @@ -1,8 +1,14 @@ libperf-y += header.o libperf-y += tsc.o +libperf-y += pmu.o libperf-y += kvm-stat.o +libperf-y += perf_regs.o libperf-$(CONFIG_DWARF) += dwarf-regs.o libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o + +libperf-$(CONFIG_AUXTRACE) += auxtrace.o +libperf-$(CONFIG_AUXTRACE) += intel-pt.o +libperf-$(CONFIG_AUXTRACE) += intel-bts.o diff --git a/kernel/tools/perf/arch/x86/util/auxtrace.c b/kernel/tools/perf/arch/x86/util/auxtrace.c new file mode 100644 index 000000000..7a7805583 --- /dev/null +++ b/kernel/tools/perf/arch/x86/util/auxtrace.c @@ -0,0 +1,83 @@ +/* + * auxtrace.c: AUX area tracing support + * Copyright (c) 2013-2014, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#include + +#include "../../util/header.h" +#include "../../util/debug.h" +#include "../../util/pmu.h" +#include "../../util/auxtrace.h" +#include "../../util/intel-pt.h" +#include "../../util/intel-bts.h" +#include "../../util/evlist.h" + +static +struct auxtrace_record *auxtrace_record__init_intel(struct perf_evlist *evlist, + int *err) +{ + struct perf_pmu *intel_pt_pmu; + struct perf_pmu *intel_bts_pmu; + struct perf_evsel *evsel; + bool found_pt = false; + bool found_bts = false; + + intel_pt_pmu = perf_pmu__find(INTEL_PT_PMU_NAME); + intel_bts_pmu = perf_pmu__find(INTEL_BTS_PMU_NAME); + + if (evlist) { + evlist__for_each(evlist, evsel) { + if (intel_pt_pmu && + evsel->attr.type == intel_pt_pmu->type) + found_pt = true; + if (intel_bts_pmu && + evsel->attr.type == intel_bts_pmu->type) + found_bts = true; + } + } + + if (found_pt && found_bts) { + pr_err("intel_pt and intel_bts may not be used together\n"); + *err = -EINVAL; + return NULL; + } + + if (found_pt) + return intel_pt_recording_init(err); + + if (found_bts) + return intel_bts_recording_init(err); + + return NULL; +} + +struct auxtrace_record *auxtrace_record__init(struct perf_evlist *evlist, + int *err) +{ + char buffer[64]; + int ret; + + *err = 0; + + ret = get_cpuid(buffer, sizeof(buffer)); + if (ret) { + *err = ret; + return NULL; + } + + if (!strncmp(buffer, "GenuineIntel,", 13)) + return auxtrace_record__init_intel(evlist, err); + + return NULL; +} diff --git a/kernel/tools/perf/arch/x86/util/dwarf-regs.c b/kernel/tools/perf/arch/x86/util/dwarf-regs.c index be22dd463..9223c164e 100644 --- a/kernel/tools/perf/arch/x86/util/dwarf-regs.c +++ b/kernel/tools/perf/arch/x86/util/dwarf-regs.c @@ -21,55 +21,109 @@ */ #include +#include /* for EINVAL */ +#include /* for strcmp */ +#include /* for struct pt_regs */ +#include /* for offsetof */ #include /* - * Generic dwarf analysis helpers + * See arch/x86/kernel/ptrace.c. + * Different from it: + * + * - Since struct pt_regs is defined differently for user and kernel, + * but we want to use 'ax, bx' instead of 'rax, rbx' (which is struct + * field name of user's pt_regs), we make REG_OFFSET_NAME to accept + * both string name and reg field name. + * + * - Since accessing x86_32's pt_regs from x86_64 building is difficult + * and vise versa, we simply fill offset with -1, so + * get_arch_regstr() still works but regs_query_register_offset() + * returns error. + * The only inconvenience caused by it now is that we are not allowed + * to generate BPF prologue for a x86_64 kernel if perf is built for + * x86_32. This is really a rare usecase. + * + * - Order is different from kernel's ptrace.c for get_arch_regstr(). Use + * the order defined by dwarf. */ -#define X86_32_MAX_REGS 8 -const char *x86_32_regs_table[X86_32_MAX_REGS] = { - "%ax", - "%cx", - "%dx", - "%bx", - "$stack", /* Stack address instead of %sp */ - "%bp", - "%si", - "%di", +struct pt_regs_offset { + const char *name; + int offset; +}; + +#define REG_OFFSET_END {.name = NULL, .offset = 0} + +#ifdef __x86_64__ +# define REG_OFFSET_NAME_64(n, r) {.name = n, .offset = offsetof(struct pt_regs, r)} +# define REG_OFFSET_NAME_32(n, r) {.name = n, .offset = -1} +#else +# define REG_OFFSET_NAME_64(n, r) {.name = n, .offset = -1} +# define REG_OFFSET_NAME_32(n, r) {.name = n, .offset = offsetof(struct pt_regs, r)} +#endif + +static const struct pt_regs_offset x86_32_regoffset_table[] = { + REG_OFFSET_NAME_32("%ax", eax), + REG_OFFSET_NAME_32("%cx", ecx), + REG_OFFSET_NAME_32("%dx", edx), + REG_OFFSET_NAME_32("%bx", ebx), + REG_OFFSET_NAME_32("$stack", esp), /* Stack address instead of %sp */ + REG_OFFSET_NAME_32("%bp", ebp), + REG_OFFSET_NAME_32("%si", esi), + REG_OFFSET_NAME_32("%di", edi), + REG_OFFSET_END, }; -#define X86_64_MAX_REGS 16 -const char *x86_64_regs_table[X86_64_MAX_REGS] = { - "%ax", - "%dx", - "%cx", - "%bx", - "%si", - "%di", - "%bp", - "%sp", - "%r8", - "%r9", - "%r10", - "%r11", - "%r12", - "%r13", - "%r14", - "%r15", +static const struct pt_regs_offset x86_64_regoffset_table[] = { + REG_OFFSET_NAME_64("%ax", rax), + REG_OFFSET_NAME_64("%dx", rdx), + REG_OFFSET_NAME_64("%cx", rcx), + REG_OFFSET_NAME_64("%bx", rbx), + REG_OFFSET_NAME_64("%si", rsi), + REG_OFFSET_NAME_64("%di", rdi), + REG_OFFSET_NAME_64("%bp", rbp), + REG_OFFSET_NAME_64("%sp", rsp), + REG_OFFSET_NAME_64("%r8", r8), + REG_OFFSET_NAME_64("%r9", r9), + REG_OFFSET_NAME_64("%r10", r10), + REG_OFFSET_NAME_64("%r11", r11), + REG_OFFSET_NAME_64("%r12", r12), + REG_OFFSET_NAME_64("%r13", r13), + REG_OFFSET_NAME_64("%r14", r14), + REG_OFFSET_NAME_64("%r15", r15), + REG_OFFSET_END, }; /* TODO: switching by dwarf address size */ #ifdef __x86_64__ -#define ARCH_MAX_REGS X86_64_MAX_REGS -#define arch_regs_table x86_64_regs_table +#define regoffset_table x86_64_regoffset_table #else -#define ARCH_MAX_REGS X86_32_MAX_REGS -#define arch_regs_table x86_32_regs_table +#define regoffset_table x86_32_regoffset_table #endif +/* Minus 1 for the ending REG_OFFSET_END */ +#define ARCH_MAX_REGS ((sizeof(regoffset_table) / sizeof(regoffset_table[0])) - 1) + /* Return architecture dependent register string (for kprobe-tracer) */ const char *get_arch_regstr(unsigned int n) { - return (n <= ARCH_MAX_REGS) ? arch_regs_table[n] : NULL; + return (n < ARCH_MAX_REGS) ? regoffset_table[n].name : NULL; +} + +/* Reuse code from arch/x86/kernel/ptrace.c */ +/** + * regs_query_register_offset() - query register offset from its name + * @name: the name of a register + * + * regs_query_register_offset() returns the offset of a register in struct + * pt_regs from its name. If the name is invalid, this returns -EINVAL; + */ +int regs_query_register_offset(const char *name) +{ + const struct pt_regs_offset *roff; + for (roff = regoffset_table; roff->name != NULL; roff++) + if (!strcmp(roff->name, name)) + return roff->offset; + return -EINVAL; } diff --git a/kernel/tools/perf/arch/x86/util/intel-bts.c b/kernel/tools/perf/arch/x86/util/intel-bts.c new file mode 100644 index 000000000..9b94ce520 --- /dev/null +++ b/kernel/tools/perf/arch/x86/util/intel-bts.c @@ -0,0 +1,458 @@ +/* + * intel-bts.c: Intel Processor Trace support + * Copyright (c) 2013-2015, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#include +#include +#include +#include + +#include "../../util/cpumap.h" +#include "../../util/evsel.h" +#include "../../util/evlist.h" +#include "../../util/session.h" +#include "../../util/util.h" +#include "../../util/pmu.h" +#include "../../util/debug.h" +#include "../../util/tsc.h" +#include "../../util/auxtrace.h" +#include "../../util/intel-bts.h" + +#define KiB(x) ((x) * 1024) +#define MiB(x) ((x) * 1024 * 1024) +#define KiB_MASK(x) (KiB(x) - 1) +#define MiB_MASK(x) (MiB(x) - 1) + +#define INTEL_BTS_DFLT_SAMPLE_SIZE KiB(4) + +#define INTEL_BTS_MAX_SAMPLE_SIZE KiB(60) + +struct intel_bts_snapshot_ref { + void *ref_buf; + size_t ref_offset; + bool wrapped; +}; + +struct intel_bts_recording { + struct auxtrace_record itr; + struct perf_pmu *intel_bts_pmu; + struct perf_evlist *evlist; + bool snapshot_mode; + size_t snapshot_size; + int snapshot_ref_cnt; + struct intel_bts_snapshot_ref *snapshot_refs; +}; + +struct branch { + u64 from; + u64 to; + u64 misc; +}; + +static size_t intel_bts_info_priv_size(struct auxtrace_record *itr __maybe_unused) +{ + return INTEL_BTS_AUXTRACE_PRIV_SIZE; +} + +static int intel_bts_info_fill(struct auxtrace_record *itr, + struct perf_session *session, + struct auxtrace_info_event *auxtrace_info, + size_t priv_size) +{ + struct intel_bts_recording *btsr = + container_of(itr, struct intel_bts_recording, itr); + struct perf_pmu *intel_bts_pmu = btsr->intel_bts_pmu; + struct perf_event_mmap_page *pc; + struct perf_tsc_conversion tc = { .time_mult = 0, }; + bool cap_user_time_zero = false; + int err; + + if (priv_size != INTEL_BTS_AUXTRACE_PRIV_SIZE) + return -EINVAL; + + if (!session->evlist->nr_mmaps) + return -EINVAL; + + pc = session->evlist->mmap[0].base; + if (pc) { + err = perf_read_tsc_conversion(pc, &tc); + if (err) { + if (err != -EOPNOTSUPP) + return err; + } else { + cap_user_time_zero = tc.time_mult != 0; + } + if (!cap_user_time_zero) + ui__warning("Intel BTS: TSC not available\n"); + } + + auxtrace_info->type = PERF_AUXTRACE_INTEL_BTS; + auxtrace_info->priv[INTEL_BTS_PMU_TYPE] = intel_bts_pmu->type; + auxtrace_info->priv[INTEL_BTS_TIME_SHIFT] = tc.time_shift; + auxtrace_info->priv[INTEL_BTS_TIME_MULT] = tc.time_mult; + auxtrace_info->priv[INTEL_BTS_TIME_ZERO] = tc.time_zero; + auxtrace_info->priv[INTEL_BTS_CAP_USER_TIME_ZERO] = cap_user_time_zero; + auxtrace_info->priv[INTEL_BTS_SNAPSHOT_MODE] = btsr->snapshot_mode; + + return 0; +} + +static int intel_bts_recording_options(struct auxtrace_record *itr, + struct perf_evlist *evlist, + struct record_opts *opts) +{ + struct intel_bts_recording *btsr = + container_of(itr, struct intel_bts_recording, itr); + struct perf_pmu *intel_bts_pmu = btsr->intel_bts_pmu; + struct perf_evsel *evsel, *intel_bts_evsel = NULL; + const struct cpu_map *cpus = evlist->cpus; + bool privileged = geteuid() == 0 || perf_event_paranoid() < 0; + + btsr->evlist = evlist; + btsr->snapshot_mode = opts->auxtrace_snapshot_mode; + + evlist__for_each(evlist, evsel) { + if (evsel->attr.type == intel_bts_pmu->type) { + if (intel_bts_evsel) { + pr_err("There may be only one " INTEL_BTS_PMU_NAME " event\n"); + return -EINVAL; + } + evsel->attr.freq = 0; + evsel->attr.sample_period = 1; + intel_bts_evsel = evsel; + opts->full_auxtrace = true; + } + } + + if (opts->auxtrace_snapshot_mode && !opts->full_auxtrace) { + pr_err("Snapshot mode (-S option) requires " INTEL_BTS_PMU_NAME " PMU event (-e " INTEL_BTS_PMU_NAME ")\n"); + return -EINVAL; + } + + if (!opts->full_auxtrace) + return 0; + + if (opts->full_auxtrace && !cpu_map__empty(cpus)) { + pr_err(INTEL_BTS_PMU_NAME " does not support per-cpu recording\n"); + return -EINVAL; + } + + /* Set default sizes for snapshot mode */ + if (opts->auxtrace_snapshot_mode) { + if (!opts->auxtrace_snapshot_size && !opts->auxtrace_mmap_pages) { + if (privileged) { + opts->auxtrace_mmap_pages = MiB(4) / page_size; + } else { + opts->auxtrace_mmap_pages = KiB(128) / page_size; + if (opts->mmap_pages == UINT_MAX) + opts->mmap_pages = KiB(256) / page_size; + } + } else if (!opts->auxtrace_mmap_pages && !privileged && + opts->mmap_pages == UINT_MAX) { + opts->mmap_pages = KiB(256) / page_size; + } + if (!opts->auxtrace_snapshot_size) + opts->auxtrace_snapshot_size = + opts->auxtrace_mmap_pages * (size_t)page_size; + if (!opts->auxtrace_mmap_pages) { + size_t sz = opts->auxtrace_snapshot_size; + + sz = round_up(sz, page_size) / page_size; + opts->auxtrace_mmap_pages = roundup_pow_of_two(sz); + } + if (opts->auxtrace_snapshot_size > + opts->auxtrace_mmap_pages * (size_t)page_size) { + pr_err("Snapshot size %zu must not be greater than AUX area tracing mmap size %zu\n", + opts->auxtrace_snapshot_size, + opts->auxtrace_mmap_pages * (size_t)page_size); + return -EINVAL; + } + if (!opts->auxtrace_snapshot_size || !opts->auxtrace_mmap_pages) { + pr_err("Failed to calculate default snapshot size and/or AUX area tracing mmap pages\n"); + return -EINVAL; + } + pr_debug2("Intel BTS snapshot size: %zu\n", + opts->auxtrace_snapshot_size); + } + + /* Set default sizes for full trace mode */ + if (opts->full_auxtrace && !opts->auxtrace_mmap_pages) { + if (privileged) { + opts->auxtrace_mmap_pages = MiB(4) / page_size; + } else { + opts->auxtrace_mmap_pages = KiB(128) / page_size; + if (opts->mmap_pages == UINT_MAX) + opts->mmap_pages = KiB(256) / page_size; + } + } + + /* Validate auxtrace_mmap_pages */ + if (opts->auxtrace_mmap_pages) { + size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size; + size_t min_sz; + + if (opts->auxtrace_snapshot_mode) + min_sz = KiB(4); + else + min_sz = KiB(8); + + if (sz < min_sz || !is_power_of_2(sz)) { + pr_err("Invalid mmap size for Intel BTS: must be at least %zuKiB and a power of 2\n", + min_sz / 1024); + return -EINVAL; + } + } + + if (intel_bts_evsel) { + /* + * To obtain the auxtrace buffer file descriptor, the auxtrace event + * must come first. + */ + perf_evlist__to_front(evlist, intel_bts_evsel); + /* + * In the case of per-cpu mmaps, we need the CPU on the + * AUX event. + */ + if (!cpu_map__empty(cpus)) + perf_evsel__set_sample_bit(intel_bts_evsel, CPU); + } + + /* Add dummy event to keep tracking */ + if (opts->full_auxtrace) { + struct perf_evsel *tracking_evsel; + int err; + + err = parse_events(evlist, "dummy:u", NULL); + if (err) + return err; + + tracking_evsel = perf_evlist__last(evlist); + + perf_evlist__set_tracking_event(evlist, tracking_evsel); + + tracking_evsel->attr.freq = 0; + tracking_evsel->attr.sample_period = 1; + } + + return 0; +} + +static int intel_bts_parse_snapshot_options(struct auxtrace_record *itr, + struct record_opts *opts, + const char *str) +{ + struct intel_bts_recording *btsr = + container_of(itr, struct intel_bts_recording, itr); + unsigned long long snapshot_size = 0; + char *endptr; + + if (str) { + snapshot_size = strtoull(str, &endptr, 0); + if (*endptr || snapshot_size > SIZE_MAX) + return -1; + } + + opts->auxtrace_snapshot_mode = true; + opts->auxtrace_snapshot_size = snapshot_size; + + btsr->snapshot_size = snapshot_size; + + return 0; +} + +static u64 intel_bts_reference(struct auxtrace_record *itr __maybe_unused) +{ + return rdtsc(); +} + +static int intel_bts_alloc_snapshot_refs(struct intel_bts_recording *btsr, + int idx) +{ + const size_t sz = sizeof(struct intel_bts_snapshot_ref); + int cnt = btsr->snapshot_ref_cnt, new_cnt = cnt * 2; + struct intel_bts_snapshot_ref *refs; + + if (!new_cnt) + new_cnt = 16; + + while (new_cnt <= idx) + new_cnt *= 2; + + refs = calloc(new_cnt, sz); + if (!refs) + return -ENOMEM; + + memcpy(refs, btsr->snapshot_refs, cnt * sz); + + btsr->snapshot_refs = refs; + btsr->snapshot_ref_cnt = new_cnt; + + return 0; +} + +static void intel_bts_free_snapshot_refs(struct intel_bts_recording *btsr) +{ + int i; + + for (i = 0; i < btsr->snapshot_ref_cnt; i++) + zfree(&btsr->snapshot_refs[i].ref_buf); + zfree(&btsr->snapshot_refs); +} + +static void intel_bts_recording_free(struct auxtrace_record *itr) +{ + struct intel_bts_recording *btsr = + container_of(itr, struct intel_bts_recording, itr); + + intel_bts_free_snapshot_refs(btsr); + free(btsr); +} + +static int intel_bts_snapshot_start(struct auxtrace_record *itr) +{ + struct intel_bts_recording *btsr = + container_of(itr, struct intel_bts_recording, itr); + struct perf_evsel *evsel; + + evlist__for_each(btsr->evlist, evsel) { + if (evsel->attr.type == btsr->intel_bts_pmu->type) + return perf_evlist__disable_event(btsr->evlist, evsel); + } + return -EINVAL; +} + +static int intel_bts_snapshot_finish(struct auxtrace_record *itr) +{ + struct intel_bts_recording *btsr = + container_of(itr, struct intel_bts_recording, itr); + struct perf_evsel *evsel; + + evlist__for_each(btsr->evlist, evsel) { + if (evsel->attr.type == btsr->intel_bts_pmu->type) + return perf_evlist__enable_event(btsr->evlist, evsel); + } + return -EINVAL; +} + +static bool intel_bts_first_wrap(u64 *data, size_t buf_size) +{ + int i, a, b; + + b = buf_size >> 3; + a = b - 512; + if (a < 0) + a = 0; + + for (i = a; i < b; i++) { + if (data[i]) + return true; + } + + return false; +} + +static int intel_bts_find_snapshot(struct auxtrace_record *itr, int idx, + struct auxtrace_mmap *mm, unsigned char *data, + u64 *head, u64 *old) +{ + struct intel_bts_recording *btsr = + container_of(itr, struct intel_bts_recording, itr); + bool wrapped; + int err; + + pr_debug3("%s: mmap index %d old head %zu new head %zu\n", + __func__, idx, (size_t)*old, (size_t)*head); + + if (idx >= btsr->snapshot_ref_cnt) { + err = intel_bts_alloc_snapshot_refs(btsr, idx); + if (err) + goto out_err; + } + + wrapped = btsr->snapshot_refs[idx].wrapped; + if (!wrapped && intel_bts_first_wrap((u64 *)data, mm->len)) { + btsr->snapshot_refs[idx].wrapped = true; + wrapped = true; + } + + /* + * In full trace mode 'head' continually increases. However in snapshot + * mode 'head' is an offset within the buffer. Here 'old' and 'head' + * are adjusted to match the full trace case which expects that 'old' is + * always less than 'head'. + */ + if (wrapped) { + *old = *head; + *head += mm->len; + } else { + if (mm->mask) + *old &= mm->mask; + else + *old %= mm->len; + if (*old > *head) + *head += mm->len; + } + + pr_debug3("%s: wrap-around %sdetected, adjusted old head %zu adjusted new head %zu\n", + __func__, wrapped ? "" : "not ", (size_t)*old, (size_t)*head); + + return 0; + +out_err: + pr_err("%s: failed, error %d\n", __func__, err); + return err; +} + +static int intel_bts_read_finish(struct auxtrace_record *itr, int idx) +{ + struct intel_bts_recording *btsr = + container_of(itr, struct intel_bts_recording, itr); + struct perf_evsel *evsel; + + evlist__for_each(btsr->evlist, evsel) { + if (evsel->attr.type == btsr->intel_bts_pmu->type) + return perf_evlist__enable_event_idx(btsr->evlist, + evsel, idx); + } + return -EINVAL; +} + +struct auxtrace_record *intel_bts_recording_init(int *err) +{ + struct perf_pmu *intel_bts_pmu = perf_pmu__find(INTEL_BTS_PMU_NAME); + struct intel_bts_recording *btsr; + + if (!intel_bts_pmu) + return NULL; + + btsr = zalloc(sizeof(struct intel_bts_recording)); + if (!btsr) { + *err = -ENOMEM; + return NULL; + } + + btsr->intel_bts_pmu = intel_bts_pmu; + btsr->itr.recording_options = intel_bts_recording_options; + btsr->itr.info_priv_size = intel_bts_info_priv_size; + btsr->itr.info_fill = intel_bts_info_fill; + btsr->itr.free = intel_bts_recording_free; + btsr->itr.snapshot_start = intel_bts_snapshot_start; + btsr->itr.snapshot_finish = intel_bts_snapshot_finish; + btsr->itr.find_snapshot = intel_bts_find_snapshot; + btsr->itr.parse_snapshot_options = intel_bts_parse_snapshot_options; + btsr->itr.reference = intel_bts_reference; + btsr->itr.read_finish = intel_bts_read_finish; + btsr->itr.alignment = sizeof(struct branch); + return &btsr->itr; +} diff --git a/kernel/tools/perf/arch/x86/util/intel-pt.c b/kernel/tools/perf/arch/x86/util/intel-pt.c new file mode 100644 index 000000000..b02af064f --- /dev/null +++ b/kernel/tools/perf/arch/x86/util/intel-pt.c @@ -0,0 +1,1046 @@ +/* + * intel_pt.c: Intel Processor Trace support + * Copyright (c) 2013-2015, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#include +#include +#include +#include +#include +#include + +#include "../../perf.h" +#include "../../util/session.h" +#include "../../util/event.h" +#include "../../util/evlist.h" +#include "../../util/evsel.h" +#include "../../util/cpumap.h" +#include "../../util/parse-options.h" +#include "../../util/parse-events.h" +#include "../../util/pmu.h" +#include "../../util/debug.h" +#include "../../util/auxtrace.h" +#include "../../util/tsc.h" +#include "../../util/intel-pt.h" + +#define KiB(x) ((x) * 1024) +#define MiB(x) ((x) * 1024 * 1024) +#define KiB_MASK(x) (KiB(x) - 1) +#define MiB_MASK(x) (MiB(x) - 1) + +#define INTEL_PT_DEFAULT_SAMPLE_SIZE KiB(4) + +#define INTEL_PT_MAX_SAMPLE_SIZE KiB(60) + +#define INTEL_PT_PSB_PERIOD_NEAR 256 + +struct intel_pt_snapshot_ref { + void *ref_buf; + size_t ref_offset; + bool wrapped; +}; + +struct intel_pt_recording { + struct auxtrace_record itr; + struct perf_pmu *intel_pt_pmu; + int have_sched_switch; + struct perf_evlist *evlist; + bool snapshot_mode; + bool snapshot_init_done; + size_t snapshot_size; + size_t snapshot_ref_buf_size; + int snapshot_ref_cnt; + struct intel_pt_snapshot_ref *snapshot_refs; +}; + +static int intel_pt_parse_terms_with_default(struct list_head *formats, + const char *str, + u64 *config) +{ + struct list_head *terms; + struct perf_event_attr attr = { .size = 0, }; + int err; + + terms = malloc(sizeof(struct list_head)); + if (!terms) + return -ENOMEM; + + INIT_LIST_HEAD(terms); + + err = parse_events_terms(terms, str); + if (err) + goto out_free; + + attr.config = *config; + err = perf_pmu__config_terms(formats, &attr, terms, true, NULL); + if (err) + goto out_free; + + *config = attr.config; +out_free: + parse_events__free_terms(terms); + return err; +} + +static int intel_pt_parse_terms(struct list_head *formats, const char *str, + u64 *config) +{ + *config = 0; + return intel_pt_parse_terms_with_default(formats, str, config); +} + +static u64 intel_pt_masked_bits(u64 mask, u64 bits) +{ + const u64 top_bit = 1ULL << 63; + u64 res = 0; + int i; + + for (i = 0; i < 64; i++) { + if (mask & top_bit) { + res <<= 1; + if (bits & top_bit) + res |= 1; + } + mask <<= 1; + bits <<= 1; + } + + return res; +} + +static int intel_pt_read_config(struct perf_pmu *intel_pt_pmu, const char *str, + struct perf_evlist *evlist, u64 *res) +{ + struct perf_evsel *evsel; + u64 mask; + + *res = 0; + + mask = perf_pmu__format_bits(&intel_pt_pmu->format, str); + if (!mask) + return -EINVAL; + + evlist__for_each(evlist, evsel) { + if (evsel->attr.type == intel_pt_pmu->type) { + *res = intel_pt_masked_bits(mask, evsel->attr.config); + return 0; + } + } + + return -EINVAL; +} + +static size_t intel_pt_psb_period(struct perf_pmu *intel_pt_pmu, + struct perf_evlist *evlist) +{ + u64 val; + int err, topa_multiple_entries; + size_t psb_period; + + if (perf_pmu__scan_file(intel_pt_pmu, "caps/topa_multiple_entries", + "%d", &topa_multiple_entries) != 1) + topa_multiple_entries = 0; + + /* + * Use caps/topa_multiple_entries to indicate early hardware that had + * extra frequent PSBs. + */ + if (!topa_multiple_entries) { + psb_period = 256; + goto out; + } + + err = intel_pt_read_config(intel_pt_pmu, "psb_period", evlist, &val); + if (err) + val = 0; + + psb_period = 1 << (val + 11); +out: + pr_debug2("%s psb_period %zu\n", intel_pt_pmu->name, psb_period); + return psb_period; +} + +static int intel_pt_pick_bit(int bits, int target) +{ + int pos, pick = -1; + + for (pos = 0; bits; bits >>= 1, pos++) { + if (bits & 1) { + if (pos <= target || pick < 0) + pick = pos; + if (pos >= target) + break; + } + } + + return pick; +} + +static u64 intel_pt_default_config(struct perf_pmu *intel_pt_pmu) +{ + char buf[256]; + int mtc, mtc_periods = 0, mtc_period; + int psb_cyc, psb_periods, psb_period; + int pos = 0; + u64 config; + + pos += scnprintf(buf + pos, sizeof(buf) - pos, "tsc"); + + if (perf_pmu__scan_file(intel_pt_pmu, "caps/mtc", "%d", + &mtc) != 1) + mtc = 1; + + if (mtc) { + if (perf_pmu__scan_file(intel_pt_pmu, "caps/mtc_periods", "%x", + &mtc_periods) != 1) + mtc_periods = 0; + if (mtc_periods) { + mtc_period = intel_pt_pick_bit(mtc_periods, 3); + pos += scnprintf(buf + pos, sizeof(buf) - pos, + ",mtc,mtc_period=%d", mtc_period); + } + } + + if (perf_pmu__scan_file(intel_pt_pmu, "caps/psb_cyc", "%d", + &psb_cyc) != 1) + psb_cyc = 1; + + if (psb_cyc && mtc_periods) { + if (perf_pmu__scan_file(intel_pt_pmu, "caps/psb_periods", "%x", + &psb_periods) != 1) + psb_periods = 0; + if (psb_periods) { + psb_period = intel_pt_pick_bit(psb_periods, 3); + pos += scnprintf(buf + pos, sizeof(buf) - pos, + ",psb_period=%d", psb_period); + } + } + + pr_debug2("%s default config: %s\n", intel_pt_pmu->name, buf); + + intel_pt_parse_terms(&intel_pt_pmu->format, buf, &config); + + return config; +} + +static int intel_pt_parse_snapshot_options(struct auxtrace_record *itr, + struct record_opts *opts, + const char *str) +{ + struct intel_pt_recording *ptr = + container_of(itr, struct intel_pt_recording, itr); + unsigned long long snapshot_size = 0; + char *endptr; + + if (str) { + snapshot_size = strtoull(str, &endptr, 0); + if (*endptr || snapshot_size > SIZE_MAX) + return -1; + } + + opts->auxtrace_snapshot_mode = true; + opts->auxtrace_snapshot_size = snapshot_size; + + ptr->snapshot_size = snapshot_size; + + return 0; +} + +struct perf_event_attr * +intel_pt_pmu_default_config(struct perf_pmu *intel_pt_pmu) +{ + struct perf_event_attr *attr; + + attr = zalloc(sizeof(struct perf_event_attr)); + if (!attr) + return NULL; + + attr->config = intel_pt_default_config(intel_pt_pmu); + + intel_pt_pmu->selectable = true; + + return attr; +} + +static size_t intel_pt_info_priv_size(struct auxtrace_record *itr __maybe_unused) +{ + return INTEL_PT_AUXTRACE_PRIV_SIZE; +} + +static void intel_pt_tsc_ctc_ratio(u32 *n, u32 *d) +{ + unsigned int eax = 0, ebx = 0, ecx = 0, edx = 0; + + __get_cpuid(0x15, &eax, &ebx, &ecx, &edx); + *n = ebx; + *d = eax; +} + +static int intel_pt_info_fill(struct auxtrace_record *itr, + struct perf_session *session, + struct auxtrace_info_event *auxtrace_info, + size_t priv_size) +{ + struct intel_pt_recording *ptr = + container_of(itr, struct intel_pt_recording, itr); + struct perf_pmu *intel_pt_pmu = ptr->intel_pt_pmu; + struct perf_event_mmap_page *pc; + struct perf_tsc_conversion tc = { .time_mult = 0, }; + bool cap_user_time_zero = false, per_cpu_mmaps; + u64 tsc_bit, mtc_bit, mtc_freq_bits, cyc_bit, noretcomp_bit; + u32 tsc_ctc_ratio_n, tsc_ctc_ratio_d; + int err; + + if (priv_size != INTEL_PT_AUXTRACE_PRIV_SIZE) + return -EINVAL; + + intel_pt_parse_terms(&intel_pt_pmu->format, "tsc", &tsc_bit); + intel_pt_parse_terms(&intel_pt_pmu->format, "noretcomp", + &noretcomp_bit); + intel_pt_parse_terms(&intel_pt_pmu->format, "mtc", &mtc_bit); + mtc_freq_bits = perf_pmu__format_bits(&intel_pt_pmu->format, + "mtc_period"); + intel_pt_parse_terms(&intel_pt_pmu->format, "cyc", &cyc_bit); + + intel_pt_tsc_ctc_ratio(&tsc_ctc_ratio_n, &tsc_ctc_ratio_d); + + if (!session->evlist->nr_mmaps) + return -EINVAL; + + pc = session->evlist->mmap[0].base; + if (pc) { + err = perf_read_tsc_conversion(pc, &tc); + if (err) { + if (err != -EOPNOTSUPP) + return err; + } else { + cap_user_time_zero = tc.time_mult != 0; + } + if (!cap_user_time_zero) + ui__warning("Intel Processor Trace: TSC not available\n"); + } + + per_cpu_mmaps = !cpu_map__empty(session->evlist->cpus); + + auxtrace_info->type = PERF_AUXTRACE_INTEL_PT; + auxtrace_info->priv[INTEL_PT_PMU_TYPE] = intel_pt_pmu->type; + auxtrace_info->priv[INTEL_PT_TIME_SHIFT] = tc.time_shift; + auxtrace_info->priv[INTEL_PT_TIME_MULT] = tc.time_mult; + auxtrace_info->priv[INTEL_PT_TIME_ZERO] = tc.time_zero; + auxtrace_info->priv[INTEL_PT_CAP_USER_TIME_ZERO] = cap_user_time_zero; + auxtrace_info->priv[INTEL_PT_TSC_BIT] = tsc_bit; + auxtrace_info->priv[INTEL_PT_NORETCOMP_BIT] = noretcomp_bit; + auxtrace_info->priv[INTEL_PT_HAVE_SCHED_SWITCH] = ptr->have_sched_switch; + auxtrace_info->priv[INTEL_PT_SNAPSHOT_MODE] = ptr->snapshot_mode; + auxtrace_info->priv[INTEL_PT_PER_CPU_MMAPS] = per_cpu_mmaps; + auxtrace_info->priv[INTEL_PT_MTC_BIT] = mtc_bit; + auxtrace_info->priv[INTEL_PT_MTC_FREQ_BITS] = mtc_freq_bits; + auxtrace_info->priv[INTEL_PT_TSC_CTC_N] = tsc_ctc_ratio_n; + auxtrace_info->priv[INTEL_PT_TSC_CTC_D] = tsc_ctc_ratio_d; + auxtrace_info->priv[INTEL_PT_CYC_BIT] = cyc_bit; + + return 0; +} + +static int intel_pt_track_switches(struct perf_evlist *evlist) +{ + const char *sched_switch = "sched:sched_switch"; + struct perf_evsel *evsel; + int err; + + if (!perf_evlist__can_select_event(evlist, sched_switch)) + return -EPERM; + + err = parse_events(evlist, sched_switch, NULL); + if (err) { + pr_debug2("%s: failed to parse %s, error %d\n", + __func__, sched_switch, err); + return err; + } + + evsel = perf_evlist__last(evlist); + + perf_evsel__set_sample_bit(evsel, CPU); + perf_evsel__set_sample_bit(evsel, TIME); + + evsel->system_wide = true; + evsel->no_aux_samples = true; + evsel->immediate = true; + + return 0; +} + +static void intel_pt_valid_str(char *str, size_t len, u64 valid) +{ + unsigned int val, last = 0, state = 1; + int p = 0; + + str[0] = '\0'; + + for (val = 0; val <= 64; val++, valid >>= 1) { + if (valid & 1) { + last = val; + switch (state) { + case 0: + p += scnprintf(str + p, len - p, ","); + /* Fall through */ + case 1: + p += scnprintf(str + p, len - p, "%u", val); + state = 2; + break; + case 2: + state = 3; + break; + case 3: + state = 4; + break; + default: + break; + } + } else { + switch (state) { + case 3: + p += scnprintf(str + p, len - p, ",%u", last); + state = 0; + break; + case 4: + p += scnprintf(str + p, len - p, "-%u", last); + state = 0; + break; + default: + break; + } + if (state != 1) + state = 0; + } + } +} + +static int intel_pt_val_config_term(struct perf_pmu *intel_pt_pmu, + const char *caps, const char *name, + const char *supported, u64 config) +{ + char valid_str[256]; + unsigned int shift; + unsigned long long valid; + u64 bits; + int ok; + + if (perf_pmu__scan_file(intel_pt_pmu, caps, "%llx", &valid) != 1) + valid = 0; + + if (supported && + perf_pmu__scan_file(intel_pt_pmu, supported, "%d", &ok) == 1 && !ok) + valid = 0; + + valid |= 1; + + bits = perf_pmu__format_bits(&intel_pt_pmu->format, name); + + config &= bits; + + for (shift = 0; bits && !(bits & 1); shift++) + bits >>= 1; + + config >>= shift; + + if (config > 63) + goto out_err; + + if (valid & (1 << config)) + return 0; +out_err: + intel_pt_valid_str(valid_str, sizeof(valid_str), valid); + pr_err("Invalid %s for %s. Valid values are: %s\n", + name, INTEL_PT_PMU_NAME, valid_str); + return -EINVAL; +} + +static int intel_pt_validate_config(struct perf_pmu *intel_pt_pmu, + struct perf_evsel *evsel) +{ + int err; + + if (!evsel) + return 0; + + err = intel_pt_val_config_term(intel_pt_pmu, "caps/cycle_thresholds", + "cyc_thresh", "caps/psb_cyc", + evsel->attr.config); + if (err) + return err; + + err = intel_pt_val_config_term(intel_pt_pmu, "caps/mtc_periods", + "mtc_period", "caps/mtc", + evsel->attr.config); + if (err) + return err; + + return intel_pt_val_config_term(intel_pt_pmu, "caps/psb_periods", + "psb_period", "caps/psb_cyc", + evsel->attr.config); +} + +static int intel_pt_recording_options(struct auxtrace_record *itr, + struct perf_evlist *evlist, + struct record_opts *opts) +{ + struct intel_pt_recording *ptr = + container_of(itr, struct intel_pt_recording, itr); + struct perf_pmu *intel_pt_pmu = ptr->intel_pt_pmu; + bool have_timing_info; + struct perf_evsel *evsel, *intel_pt_evsel = NULL; + const struct cpu_map *cpus = evlist->cpus; + bool privileged = geteuid() == 0 || perf_event_paranoid() < 0; + u64 tsc_bit; + int err; + + ptr->evlist = evlist; + ptr->snapshot_mode = opts->auxtrace_snapshot_mode; + + evlist__for_each(evlist, evsel) { + if (evsel->attr.type == intel_pt_pmu->type) { + if (intel_pt_evsel) { + pr_err("There may be only one " INTEL_PT_PMU_NAME " event\n"); + return -EINVAL; + } + evsel->attr.freq = 0; + evsel->attr.sample_period = 1; + intel_pt_evsel = evsel; + opts->full_auxtrace = true; + } + } + + if (opts->auxtrace_snapshot_mode && !opts->full_auxtrace) { + pr_err("Snapshot mode (-S option) requires " INTEL_PT_PMU_NAME " PMU event (-e " INTEL_PT_PMU_NAME ")\n"); + return -EINVAL; + } + + if (opts->use_clockid) { + pr_err("Cannot use clockid (-k option) with " INTEL_PT_PMU_NAME "\n"); + return -EINVAL; + } + + if (!opts->full_auxtrace) + return 0; + + err = intel_pt_validate_config(intel_pt_pmu, intel_pt_evsel); + if (err) + return err; + + /* Set default sizes for snapshot mode */ + if (opts->auxtrace_snapshot_mode) { + size_t psb_period = intel_pt_psb_period(intel_pt_pmu, evlist); + + if (!opts->auxtrace_snapshot_size && !opts->auxtrace_mmap_pages) { + if (privileged) { + opts->auxtrace_mmap_pages = MiB(4) / page_size; + } else { + opts->auxtrace_mmap_pages = KiB(128) / page_size; + if (opts->mmap_pages == UINT_MAX) + opts->mmap_pages = KiB(256) / page_size; + } + } else if (!opts->auxtrace_mmap_pages && !privileged && + opts->mmap_pages == UINT_MAX) { + opts->mmap_pages = KiB(256) / page_size; + } + if (!opts->auxtrace_snapshot_size) + opts->auxtrace_snapshot_size = + opts->auxtrace_mmap_pages * (size_t)page_size; + if (!opts->auxtrace_mmap_pages) { + size_t sz = opts->auxtrace_snapshot_size; + + sz = round_up(sz, page_size) / page_size; + opts->auxtrace_mmap_pages = roundup_pow_of_two(sz); + } + if (opts->auxtrace_snapshot_size > + opts->auxtrace_mmap_pages * (size_t)page_size) { + pr_err("Snapshot size %zu must not be greater than AUX area tracing mmap size %zu\n", + opts->auxtrace_snapshot_size, + opts->auxtrace_mmap_pages * (size_t)page_size); + return -EINVAL; + } + if (!opts->auxtrace_snapshot_size || !opts->auxtrace_mmap_pages) { + pr_err("Failed to calculate default snapshot size and/or AUX area tracing mmap pages\n"); + return -EINVAL; + } + pr_debug2("Intel PT snapshot size: %zu\n", + opts->auxtrace_snapshot_size); + if (psb_period && + opts->auxtrace_snapshot_size <= psb_period + + INTEL_PT_PSB_PERIOD_NEAR) + ui__warning("Intel PT snapshot size (%zu) may be too small for PSB period (%zu)\n", + opts->auxtrace_snapshot_size, psb_period); + } + + /* Set default sizes for full trace mode */ + if (opts->full_auxtrace && !opts->auxtrace_mmap_pages) { + if (privileged) { + opts->auxtrace_mmap_pages = MiB(4) / page_size; + } else { + opts->auxtrace_mmap_pages = KiB(128) / page_size; + if (opts->mmap_pages == UINT_MAX) + opts->mmap_pages = KiB(256) / page_size; + } + } + + /* Validate auxtrace_mmap_pages */ + if (opts->auxtrace_mmap_pages) { + size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size; + size_t min_sz; + + if (opts->auxtrace_snapshot_mode) + min_sz = KiB(4); + else + min_sz = KiB(8); + + if (sz < min_sz || !is_power_of_2(sz)) { + pr_err("Invalid mmap size for Intel Processor Trace: must be at least %zuKiB and a power of 2\n", + min_sz / 1024); + return -EINVAL; + } + } + + intel_pt_parse_terms(&intel_pt_pmu->format, "tsc", &tsc_bit); + + if (opts->full_auxtrace && (intel_pt_evsel->attr.config & tsc_bit)) + have_timing_info = true; + else + have_timing_info = false; + + /* + * Per-cpu recording needs sched_switch events to distinguish different + * threads. + */ + if (have_timing_info && !cpu_map__empty(cpus)) { + if (perf_can_record_switch_events()) { + bool cpu_wide = !target__none(&opts->target) && + !target__has_task(&opts->target); + + if (!cpu_wide && perf_can_record_cpu_wide()) { + struct perf_evsel *switch_evsel; + + err = parse_events(evlist, "dummy:u", NULL); + if (err) + return err; + + switch_evsel = perf_evlist__last(evlist); + + switch_evsel->attr.freq = 0; + switch_evsel->attr.sample_period = 1; + switch_evsel->attr.context_switch = 1; + + switch_evsel->system_wide = true; + switch_evsel->no_aux_samples = true; + switch_evsel->immediate = true; + + perf_evsel__set_sample_bit(switch_evsel, TID); + perf_evsel__set_sample_bit(switch_evsel, TIME); + perf_evsel__set_sample_bit(switch_evsel, CPU); + + opts->record_switch_events = false; + ptr->have_sched_switch = 3; + } else { + opts->record_switch_events = true; + if (cpu_wide) + ptr->have_sched_switch = 3; + else + ptr->have_sched_switch = 2; + } + } else { + err = intel_pt_track_switches(evlist); + if (err == -EPERM) + pr_debug2("Unable to select sched:sched_switch\n"); + else if (err) + return err; + else + ptr->have_sched_switch = 1; + } + } + + if (intel_pt_evsel) { + /* + * To obtain the auxtrace buffer file descriptor, the auxtrace + * event must come first. + */ + perf_evlist__to_front(evlist, intel_pt_evsel); + /* + * In the case of per-cpu mmaps, we need the CPU on the + * AUX event. + */ + if (!cpu_map__empty(cpus)) + perf_evsel__set_sample_bit(intel_pt_evsel, CPU); + } + + /* Add dummy event to keep tracking */ + if (opts->full_auxtrace) { + struct perf_evsel *tracking_evsel; + + err = parse_events(evlist, "dummy:u", NULL); + if (err) + return err; + + tracking_evsel = perf_evlist__last(evlist); + + perf_evlist__set_tracking_event(evlist, tracking_evsel); + + tracking_evsel->attr.freq = 0; + tracking_evsel->attr.sample_period = 1; + + /* In per-cpu case, always need the time of mmap events etc */ + if (!cpu_map__empty(cpus)) { + perf_evsel__set_sample_bit(tracking_evsel, TIME); + /* And the CPU for switch events */ + perf_evsel__set_sample_bit(tracking_evsel, CPU); + } + } + + /* + * Warn the user when we do not have enough information to decode i.e. + * per-cpu with no sched_switch (except workload-only). + */ + if (!ptr->have_sched_switch && !cpu_map__empty(cpus) && + !target__none(&opts->target)) + ui__warning("Intel Processor Trace decoding will not be possible except for kernel tracing!\n"); + + return 0; +} + +static int intel_pt_snapshot_start(struct auxtrace_record *itr) +{ + struct intel_pt_recording *ptr = + container_of(itr, struct intel_pt_recording, itr); + struct perf_evsel *evsel; + + evlist__for_each(ptr->evlist, evsel) { + if (evsel->attr.type == ptr->intel_pt_pmu->type) + return perf_evlist__disable_event(ptr->evlist, evsel); + } + return -EINVAL; +} + +static int intel_pt_snapshot_finish(struct auxtrace_record *itr) +{ + struct intel_pt_recording *ptr = + container_of(itr, struct intel_pt_recording, itr); + struct perf_evsel *evsel; + + evlist__for_each(ptr->evlist, evsel) { + if (evsel->attr.type == ptr->intel_pt_pmu->type) + return perf_evlist__enable_event(ptr->evlist, evsel); + } + return -EINVAL; +} + +static int intel_pt_alloc_snapshot_refs(struct intel_pt_recording *ptr, int idx) +{ + const size_t sz = sizeof(struct intel_pt_snapshot_ref); + int cnt = ptr->snapshot_ref_cnt, new_cnt = cnt * 2; + struct intel_pt_snapshot_ref *refs; + + if (!new_cnt) + new_cnt = 16; + + while (new_cnt <= idx) + new_cnt *= 2; + + refs = calloc(new_cnt, sz); + if (!refs) + return -ENOMEM; + + memcpy(refs, ptr->snapshot_refs, cnt * sz); + + ptr->snapshot_refs = refs; + ptr->snapshot_ref_cnt = new_cnt; + + return 0; +} + +static void intel_pt_free_snapshot_refs(struct intel_pt_recording *ptr) +{ + int i; + + for (i = 0; i < ptr->snapshot_ref_cnt; i++) + zfree(&ptr->snapshot_refs[i].ref_buf); + zfree(&ptr->snapshot_refs); +} + +static void intel_pt_recording_free(struct auxtrace_record *itr) +{ + struct intel_pt_recording *ptr = + container_of(itr, struct intel_pt_recording, itr); + + intel_pt_free_snapshot_refs(ptr); + free(ptr); +} + +static int intel_pt_alloc_snapshot_ref(struct intel_pt_recording *ptr, int idx, + size_t snapshot_buf_size) +{ + size_t ref_buf_size = ptr->snapshot_ref_buf_size; + void *ref_buf; + + ref_buf = zalloc(ref_buf_size); + if (!ref_buf) + return -ENOMEM; + + ptr->snapshot_refs[idx].ref_buf = ref_buf; + ptr->snapshot_refs[idx].ref_offset = snapshot_buf_size - ref_buf_size; + + return 0; +} + +static size_t intel_pt_snapshot_ref_buf_size(struct intel_pt_recording *ptr, + size_t snapshot_buf_size) +{ + const size_t max_size = 256 * 1024; + size_t buf_size = 0, psb_period; + + if (ptr->snapshot_size <= 64 * 1024) + return 0; + + psb_period = intel_pt_psb_period(ptr->intel_pt_pmu, ptr->evlist); + if (psb_period) + buf_size = psb_period * 2; + + if (!buf_size || buf_size > max_size) + buf_size = max_size; + + if (buf_size >= snapshot_buf_size) + return 0; + + if (buf_size >= ptr->snapshot_size / 2) + return 0; + + return buf_size; +} + +static int intel_pt_snapshot_init(struct intel_pt_recording *ptr, + size_t snapshot_buf_size) +{ + if (ptr->snapshot_init_done) + return 0; + + ptr->snapshot_init_done = true; + + ptr->snapshot_ref_buf_size = intel_pt_snapshot_ref_buf_size(ptr, + snapshot_buf_size); + + return 0; +} + +/** + * intel_pt_compare_buffers - compare bytes in a buffer to a circular buffer. + * @buf1: first buffer + * @compare_size: number of bytes to compare + * @buf2: second buffer (a circular buffer) + * @offs2: offset in second buffer + * @buf2_size: size of second buffer + * + * The comparison allows for the possibility that the bytes to compare in the + * circular buffer are not contiguous. It is assumed that @compare_size <= + * @buf2_size. This function returns %false if the bytes are identical, %true + * otherwise. + */ +static bool intel_pt_compare_buffers(void *buf1, size_t compare_size, + void *buf2, size_t offs2, size_t buf2_size) +{ + size_t end2 = offs2 + compare_size, part_size; + + if (end2 <= buf2_size) + return memcmp(buf1, buf2 + offs2, compare_size); + + part_size = end2 - buf2_size; + if (memcmp(buf1, buf2 + offs2, part_size)) + return true; + + compare_size -= part_size; + + return memcmp(buf1 + part_size, buf2, compare_size); +} + +static bool intel_pt_compare_ref(void *ref_buf, size_t ref_offset, + size_t ref_size, size_t buf_size, + void *data, size_t head) +{ + size_t ref_end = ref_offset + ref_size; + + if (ref_end > buf_size) { + if (head > ref_offset || head < ref_end - buf_size) + return true; + } else if (head > ref_offset && head < ref_end) { + return true; + } + + return intel_pt_compare_buffers(ref_buf, ref_size, data, ref_offset, + buf_size); +} + +static void intel_pt_copy_ref(void *ref_buf, size_t ref_size, size_t buf_size, + void *data, size_t head) +{ + if (head >= ref_size) { + memcpy(ref_buf, data + head - ref_size, ref_size); + } else { + memcpy(ref_buf, data, head); + ref_size -= head; + memcpy(ref_buf + head, data + buf_size - ref_size, ref_size); + } +} + +static bool intel_pt_wrapped(struct intel_pt_recording *ptr, int idx, + struct auxtrace_mmap *mm, unsigned char *data, + u64 head) +{ + struct intel_pt_snapshot_ref *ref = &ptr->snapshot_refs[idx]; + bool wrapped; + + wrapped = intel_pt_compare_ref(ref->ref_buf, ref->ref_offset, + ptr->snapshot_ref_buf_size, mm->len, + data, head); + + intel_pt_copy_ref(ref->ref_buf, ptr->snapshot_ref_buf_size, mm->len, + data, head); + + return wrapped; +} + +static bool intel_pt_first_wrap(u64 *data, size_t buf_size) +{ + int i, a, b; + + b = buf_size >> 3; + a = b - 512; + if (a < 0) + a = 0; + + for (i = a; i < b; i++) { + if (data[i]) + return true; + } + + return false; +} + +static int intel_pt_find_snapshot(struct auxtrace_record *itr, int idx, + struct auxtrace_mmap *mm, unsigned char *data, + u64 *head, u64 *old) +{ + struct intel_pt_recording *ptr = + container_of(itr, struct intel_pt_recording, itr); + bool wrapped; + int err; + + pr_debug3("%s: mmap index %d old head %zu new head %zu\n", + __func__, idx, (size_t)*old, (size_t)*head); + + err = intel_pt_snapshot_init(ptr, mm->len); + if (err) + goto out_err; + + if (idx >= ptr->snapshot_ref_cnt) { + err = intel_pt_alloc_snapshot_refs(ptr, idx); + if (err) + goto out_err; + } + + if (ptr->snapshot_ref_buf_size) { + if (!ptr->snapshot_refs[idx].ref_buf) { + err = intel_pt_alloc_snapshot_ref(ptr, idx, mm->len); + if (err) + goto out_err; + } + wrapped = intel_pt_wrapped(ptr, idx, mm, data, *head); + } else { + wrapped = ptr->snapshot_refs[idx].wrapped; + if (!wrapped && intel_pt_first_wrap((u64 *)data, mm->len)) { + ptr->snapshot_refs[idx].wrapped = true; + wrapped = true; + } + } + + /* + * In full trace mode 'head' continually increases. However in snapshot + * mode 'head' is an offset within the buffer. Here 'old' and 'head' + * are adjusted to match the full trace case which expects that 'old' is + * always less than 'head'. + */ + if (wrapped) { + *old = *head; + *head += mm->len; + } else { + if (mm->mask) + *old &= mm->mask; + else + *old %= mm->len; + if (*old > *head) + *head += mm->len; + } + + pr_debug3("%s: wrap-around %sdetected, adjusted old head %zu adjusted new head %zu\n", + __func__, wrapped ? "" : "not ", (size_t)*old, (size_t)*head); + + return 0; + +out_err: + pr_err("%s: failed, error %d\n", __func__, err); + return err; +} + +static u64 intel_pt_reference(struct auxtrace_record *itr __maybe_unused) +{ + return rdtsc(); +} + +static int intel_pt_read_finish(struct auxtrace_record *itr, int idx) +{ + struct intel_pt_recording *ptr = + container_of(itr, struct intel_pt_recording, itr); + struct perf_evsel *evsel; + + evlist__for_each(ptr->evlist, evsel) { + if (evsel->attr.type == ptr->intel_pt_pmu->type) + return perf_evlist__enable_event_idx(ptr->evlist, evsel, + idx); + } + return -EINVAL; +} + +struct auxtrace_record *intel_pt_recording_init(int *err) +{ + struct perf_pmu *intel_pt_pmu = perf_pmu__find(INTEL_PT_PMU_NAME); + struct intel_pt_recording *ptr; + + if (!intel_pt_pmu) + return NULL; + + ptr = zalloc(sizeof(struct intel_pt_recording)); + if (!ptr) { + *err = -ENOMEM; + return NULL; + } + + ptr->intel_pt_pmu = intel_pt_pmu; + ptr->itr.recording_options = intel_pt_recording_options; + ptr->itr.info_priv_size = intel_pt_info_priv_size; + ptr->itr.info_fill = intel_pt_info_fill; + ptr->itr.free = intel_pt_recording_free; + ptr->itr.snapshot_start = intel_pt_snapshot_start; + ptr->itr.snapshot_finish = intel_pt_snapshot_finish; + ptr->itr.find_snapshot = intel_pt_find_snapshot; + ptr->itr.parse_snapshot_options = intel_pt_parse_snapshot_options; + ptr->itr.reference = intel_pt_reference; + ptr->itr.read_finish = intel_pt_read_finish; + return &ptr->itr; +} diff --git a/kernel/tools/perf/arch/x86/util/perf_regs.c b/kernel/tools/perf/arch/x86/util/perf_regs.c new file mode 100644 index 000000000..c5db14f36 --- /dev/null +++ b/kernel/tools/perf/arch/x86/util/perf_regs.c @@ -0,0 +1,28 @@ +#include "../../perf.h" +#include "../../util/perf_regs.h" + +const struct sample_reg sample_reg_masks[] = { + SMPL_REG(AX, PERF_REG_X86_AX), + SMPL_REG(BX, PERF_REG_X86_BX), + SMPL_REG(CX, PERF_REG_X86_CX), + SMPL_REG(DX, PERF_REG_X86_DX), + SMPL_REG(SI, PERF_REG_X86_SI), + SMPL_REG(DI, PERF_REG_X86_DI), + SMPL_REG(BP, PERF_REG_X86_BP), + SMPL_REG(SP, PERF_REG_X86_SP), + SMPL_REG(IP, PERF_REG_X86_IP), + SMPL_REG(FLAGS, PERF_REG_X86_FLAGS), + SMPL_REG(CS, PERF_REG_X86_CS), + SMPL_REG(SS, PERF_REG_X86_SS), +#ifdef HAVE_ARCH_X86_64_SUPPORT + SMPL_REG(R8, PERF_REG_X86_R8), + SMPL_REG(R9, PERF_REG_X86_R9), + SMPL_REG(R10, PERF_REG_X86_R10), + SMPL_REG(R11, PERF_REG_X86_R11), + SMPL_REG(R12, PERF_REG_X86_R12), + SMPL_REG(R13, PERF_REG_X86_R13), + SMPL_REG(R14, PERF_REG_X86_R14), + SMPL_REG(R15, PERF_REG_X86_R15), +#endif + SMPL_REG_END +}; diff --git a/kernel/tools/perf/arch/x86/util/pmu.c b/kernel/tools/perf/arch/x86/util/pmu.c new file mode 100644 index 000000000..79fe07158 --- /dev/null +++ b/kernel/tools/perf/arch/x86/util/pmu.c @@ -0,0 +1,18 @@ +#include + +#include + +#include "../../util/intel-pt.h" +#include "../../util/intel-bts.h" +#include "../../util/pmu.h" + +struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused) +{ +#ifdef HAVE_AUXTRACE_SUPPORT + if (!strcmp(pmu->name, INTEL_PT_PMU_NAME)) + return intel_pt_pmu_default_config(pmu); + if (!strcmp(pmu->name, INTEL_BTS_PMU_NAME)) + pmu->selectable = true; +#endif + return NULL; +} -- cgit 1.2.3-korg