summaryrefslogtreecommitdiffstats
path: root/kernel/arch/x86/oprofile
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/arch/x86/oprofile')
-rw-r--r--kernel/arch/x86/oprofile/Makefile11
-rw-r--r--kernel/arch/x86/oprofile/backtrace.c127
-rw-r--r--kernel/arch/x86/oprofile/init.c38
-rw-r--r--kernel/arch/x86/oprofile/nmi_int.c802
-rw-r--r--kernel/arch/x86/oprofile/op_counter.h30
-rw-r--r--kernel/arch/x86/oprofile/op_model_amd.c543
-rw-r--r--kernel/arch/x86/oprofile/op_model_p4.c723
-rw-r--r--kernel/arch/x86/oprofile/op_model_ppro.c245
-rw-r--r--kernel/arch/x86/oprofile/op_x86_model.h90
9 files changed, 2609 insertions, 0 deletions
diff --git a/kernel/arch/x86/oprofile/Makefile b/kernel/arch/x86/oprofile/Makefile
new file mode 100644
index 000000000..1599f568f
--- /dev/null
+++ b/kernel/arch/x86/oprofile/Makefile
@@ -0,0 +1,11 @@
+obj-$(CONFIG_OPROFILE) += oprofile.o
+
+DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
+ oprof.o cpu_buffer.o buffer_sync.o \
+ event_buffer.o oprofile_files.o \
+ oprofilefs.o oprofile_stats.o \
+ timer_int.o nmi_timer_int.o )
+
+oprofile-y := $(DRIVER_OBJS) init.o backtrace.o
+oprofile-$(CONFIG_X86_LOCAL_APIC) += nmi_int.o op_model_amd.o \
+ op_model_ppro.o op_model_p4.o
diff --git a/kernel/arch/x86/oprofile/backtrace.c b/kernel/arch/x86/oprofile/backtrace.c
new file mode 100644
index 000000000..4e664bdb5
--- /dev/null
+++ b/kernel/arch/x86/oprofile/backtrace.c
@@ -0,0 +1,127 @@
+/**
+ * @file backtrace.c
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon
+ * @author David Smith
+ */
+
+#include <linux/oprofile.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/compat.h>
+#include <linux/uaccess.h>
+
+#include <asm/ptrace.h>
+#include <asm/stacktrace.h>
+
+static int backtrace_stack(void *data, char *name)
+{
+ /* Yes, we want all stacks */
+ return 0;
+}
+
+static void backtrace_address(void *data, unsigned long addr, int reliable)
+{
+ unsigned int *depth = data;
+
+ if ((*depth)--)
+ oprofile_add_trace(addr);
+}
+
+static struct stacktrace_ops backtrace_ops = {
+ .stack = backtrace_stack,
+ .address = backtrace_address,
+ .walk_stack = print_context_stack,
+};
+
+#ifdef CONFIG_COMPAT
+static struct stack_frame_ia32 *
+dump_user_backtrace_32(struct stack_frame_ia32 *head)
+{
+ /* Also check accessibility of one struct frame_head beyond: */
+ struct stack_frame_ia32 bufhead[2];
+ struct stack_frame_ia32 *fp;
+ unsigned long bytes;
+
+ bytes = copy_from_user_nmi(bufhead, head, sizeof(bufhead));
+ if (bytes != 0)
+ return NULL;
+
+ fp = (struct stack_frame_ia32 *) compat_ptr(bufhead[0].next_frame);
+
+ oprofile_add_trace(bufhead[0].return_address);
+
+ /* frame pointers should strictly progress back up the stack
+ * (towards higher addresses) */
+ if (head >= fp)
+ return NULL;
+
+ return fp;
+}
+
+static inline int
+x86_backtrace_32(struct pt_regs * const regs, unsigned int depth)
+{
+ struct stack_frame_ia32 *head;
+
+ /* User process is IA32 */
+ if (!current || !test_thread_flag(TIF_IA32))
+ return 0;
+
+ head = (struct stack_frame_ia32 *) regs->bp;
+ while (depth-- && head)
+ head = dump_user_backtrace_32(head);
+
+ return 1;
+}
+
+#else
+static inline int
+x86_backtrace_32(struct pt_regs * const regs, unsigned int depth)
+{
+ return 0;
+}
+#endif /* CONFIG_COMPAT */
+
+static struct stack_frame *dump_user_backtrace(struct stack_frame *head)
+{
+ /* Also check accessibility of one struct frame_head beyond: */
+ struct stack_frame bufhead[2];
+ unsigned long bytes;
+
+ bytes = copy_from_user_nmi(bufhead, head, sizeof(bufhead));
+ if (bytes != 0)
+ return NULL;
+
+ oprofile_add_trace(bufhead[0].return_address);
+
+ /* frame pointers should strictly progress back up the stack
+ * (towards higher addresses) */
+ if (head >= bufhead[0].next_frame)
+ return NULL;
+
+ return bufhead[0].next_frame;
+}
+
+void
+x86_backtrace(struct pt_regs * const regs, unsigned int depth)
+{
+ struct stack_frame *head = (struct stack_frame *)frame_pointer(regs);
+
+ if (!user_mode(regs)) {
+ unsigned long stack = kernel_stack_pointer(regs);
+ if (depth)
+ dump_trace(NULL, regs, (unsigned long *)stack, 0,
+ &backtrace_ops, &depth);
+ return;
+ }
+
+ if (x86_backtrace_32(regs, depth))
+ return;
+
+ while (depth-- && head)
+ head = dump_user_backtrace(head);
+}
diff --git a/kernel/arch/x86/oprofile/init.c b/kernel/arch/x86/oprofile/init.c
new file mode 100644
index 000000000..9e138d00a
--- /dev/null
+++ b/kernel/arch/x86/oprofile/init.c
@@ -0,0 +1,38 @@
+/**
+ * @file init.c
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon <levon@movementarian.org>
+ */
+
+#include <linux/oprofile.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+
+/*
+ * We support CPUs that have performance counters like the Pentium Pro
+ * with the NMI mode driver.
+ */
+
+#ifdef CONFIG_X86_LOCAL_APIC
+extern int op_nmi_init(struct oprofile_operations *ops);
+extern void op_nmi_exit(void);
+#else
+static int op_nmi_init(struct oprofile_operations *ops) { return -ENODEV; }
+static void op_nmi_exit(void) { }
+#endif
+
+extern void x86_backtrace(struct pt_regs * const regs, unsigned int depth);
+
+int __init oprofile_arch_init(struct oprofile_operations *ops)
+{
+ ops->backtrace = x86_backtrace;
+ return op_nmi_init(ops);
+}
+
+void oprofile_arch_exit(void)
+{
+ op_nmi_exit();
+}
diff --git a/kernel/arch/x86/oprofile/nmi_int.c b/kernel/arch/x86/oprofile/nmi_int.c
new file mode 100644
index 000000000..1d2e6392f
--- /dev/null
+++ b/kernel/arch/x86/oprofile/nmi_int.c
@@ -0,0 +1,802 @@
+/**
+ * @file nmi_int.c
+ *
+ * @remark Copyright 2002-2009 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon <levon@movementarian.org>
+ * @author Robert Richter <robert.richter@amd.com>
+ * @author Barry Kasindorf <barry.kasindorf@amd.com>
+ * @author Jason Yeh <jason.yeh@amd.com>
+ * @author Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
+ */
+
+#include <linux/init.h>
+#include <linux/notifier.h>
+#include <linux/smp.h>
+#include <linux/oprofile.h>
+#include <linux/syscore_ops.h>
+#include <linux/slab.h>
+#include <linux/moduleparam.h>
+#include <linux/kdebug.h>
+#include <linux/cpu.h>
+#include <asm/nmi.h>
+#include <asm/msr.h>
+#include <asm/apic.h>
+
+#include "op_counter.h"
+#include "op_x86_model.h"
+
+static struct op_x86_model_spec *model;
+static DEFINE_PER_CPU(struct op_msrs, cpu_msrs);
+static DEFINE_PER_CPU(unsigned long, saved_lvtpc);
+
+/* must be protected with get_online_cpus()/put_online_cpus(): */
+static int nmi_enabled;
+static int ctr_running;
+
+struct op_counter_config counter_config[OP_MAX_COUNTER];
+
+/* common functions */
+
+u64 op_x86_get_ctrl(struct op_x86_model_spec const *model,
+ struct op_counter_config *counter_config)
+{
+ u64 val = 0;
+ u16 event = (u16)counter_config->event;
+
+ val |= ARCH_PERFMON_EVENTSEL_INT;
+ val |= counter_config->user ? ARCH_PERFMON_EVENTSEL_USR : 0;
+ val |= counter_config->kernel ? ARCH_PERFMON_EVENTSEL_OS : 0;
+ val |= (counter_config->unit_mask & 0xFF) << 8;
+ counter_config->extra &= (ARCH_PERFMON_EVENTSEL_INV |
+ ARCH_PERFMON_EVENTSEL_EDGE |
+ ARCH_PERFMON_EVENTSEL_CMASK);
+ val |= counter_config->extra;
+ event &= model->event_mask ? model->event_mask : 0xFF;
+ val |= event & 0xFF;
+ val |= (u64)(event & 0x0F00) << 24;
+
+ return val;
+}
+
+
+static int profile_exceptions_notify(unsigned int val, struct pt_regs *regs)
+{
+ if (ctr_running)
+ model->check_ctrs(regs, this_cpu_ptr(&cpu_msrs));
+ else if (!nmi_enabled)
+ return NMI_DONE;
+ else
+ model->stop(this_cpu_ptr(&cpu_msrs));
+ return NMI_HANDLED;
+}
+
+static void nmi_cpu_save_registers(struct op_msrs *msrs)
+{
+ struct op_msr *counters = msrs->counters;
+ struct op_msr *controls = msrs->controls;
+ unsigned int i;
+
+ for (i = 0; i < model->num_counters; ++i) {
+ if (counters[i].addr)
+ rdmsrl(counters[i].addr, counters[i].saved);
+ }
+
+ for (i = 0; i < model->num_controls; ++i) {
+ if (controls[i].addr)
+ rdmsrl(controls[i].addr, controls[i].saved);
+ }
+}
+
+static void nmi_cpu_start(void *dummy)
+{
+ struct op_msrs const *msrs = this_cpu_ptr(&cpu_msrs);
+ if (!msrs->controls)
+ WARN_ON_ONCE(1);
+ else
+ model->start(msrs);
+}
+
+static int nmi_start(void)
+{
+ get_online_cpus();
+ ctr_running = 1;
+ /* make ctr_running visible to the nmi handler: */
+ smp_mb();
+ on_each_cpu(nmi_cpu_start, NULL, 1);
+ put_online_cpus();
+ return 0;
+}
+
+static void nmi_cpu_stop(void *dummy)
+{
+ struct op_msrs const *msrs = this_cpu_ptr(&cpu_msrs);
+ if (!msrs->controls)
+ WARN_ON_ONCE(1);
+ else
+ model->stop(msrs);
+}
+
+static void nmi_stop(void)
+{
+ get_online_cpus();
+ on_each_cpu(nmi_cpu_stop, NULL, 1);
+ ctr_running = 0;
+ put_online_cpus();
+}
+
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+
+static DEFINE_PER_CPU(int, switch_index);
+
+static inline int has_mux(void)
+{
+ return !!model->switch_ctrl;
+}
+
+inline int op_x86_phys_to_virt(int phys)
+{
+ return __this_cpu_read(switch_index) + phys;
+}
+
+inline int op_x86_virt_to_phys(int virt)
+{
+ return virt % model->num_counters;
+}
+
+static void nmi_shutdown_mux(void)
+{
+ int i;
+
+ if (!has_mux())
+ return;
+
+ for_each_possible_cpu(i) {
+ kfree(per_cpu(cpu_msrs, i).multiplex);
+ per_cpu(cpu_msrs, i).multiplex = NULL;
+ per_cpu(switch_index, i) = 0;
+ }
+}
+
+static int nmi_setup_mux(void)
+{
+ size_t multiplex_size =
+ sizeof(struct op_msr) * model->num_virt_counters;
+ int i;
+
+ if (!has_mux())
+ return 1;
+
+ for_each_possible_cpu(i) {
+ per_cpu(cpu_msrs, i).multiplex =
+ kzalloc(multiplex_size, GFP_KERNEL);
+ if (!per_cpu(cpu_msrs, i).multiplex)
+ return 0;
+ }
+
+ return 1;
+}
+
+static void nmi_cpu_setup_mux(int cpu, struct op_msrs const * const msrs)
+{
+ int i;
+ struct op_msr *multiplex = msrs->multiplex;
+
+ if (!has_mux())
+ return;
+
+ for (i = 0; i < model->num_virt_counters; ++i) {
+ if (counter_config[i].enabled) {
+ multiplex[i].saved = -(u64)counter_config[i].count;
+ } else {
+ multiplex[i].saved = 0;
+ }
+ }
+
+ per_cpu(switch_index, cpu) = 0;
+}
+
+static void nmi_cpu_save_mpx_registers(struct op_msrs *msrs)
+{
+ struct op_msr *counters = msrs->counters;
+ struct op_msr *multiplex = msrs->multiplex;
+ int i;
+
+ for (i = 0; i < model->num_counters; ++i) {
+ int virt = op_x86_phys_to_virt(i);
+ if (counters[i].addr)
+ rdmsrl(counters[i].addr, multiplex[virt].saved);
+ }
+}
+
+static void nmi_cpu_restore_mpx_registers(struct op_msrs *msrs)
+{
+ struct op_msr *counters = msrs->counters;
+ struct op_msr *multiplex = msrs->multiplex;
+ int i;
+
+ for (i = 0; i < model->num_counters; ++i) {
+ int virt = op_x86_phys_to_virt(i);
+ if (counters[i].addr)
+ wrmsrl(counters[i].addr, multiplex[virt].saved);
+ }
+}
+
+static void nmi_cpu_switch(void *dummy)
+{
+ int cpu = smp_processor_id();
+ int si = per_cpu(switch_index, cpu);
+ struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu);
+
+ nmi_cpu_stop(NULL);
+ nmi_cpu_save_mpx_registers(msrs);
+
+ /* move to next set */
+ si += model->num_counters;
+ if ((si >= model->num_virt_counters) || (counter_config[si].count == 0))
+ per_cpu(switch_index, cpu) = 0;
+ else
+ per_cpu(switch_index, cpu) = si;
+
+ model->switch_ctrl(model, msrs);
+ nmi_cpu_restore_mpx_registers(msrs);
+
+ nmi_cpu_start(NULL);
+}
+
+
+/*
+ * Quick check to see if multiplexing is necessary.
+ * The check should be sufficient since counters are used
+ * in ordre.
+ */
+static int nmi_multiplex_on(void)
+{
+ return counter_config[model->num_counters].count ? 0 : -EINVAL;
+}
+
+static int nmi_switch_event(void)
+{
+ if (!has_mux())
+ return -ENOSYS; /* not implemented */
+ if (nmi_multiplex_on() < 0)
+ return -EINVAL; /* not necessary */
+
+ get_online_cpus();
+ if (ctr_running)
+ on_each_cpu(nmi_cpu_switch, NULL, 1);
+ put_online_cpus();
+
+ return 0;
+}
+
+static inline void mux_init(struct oprofile_operations *ops)
+{
+ if (has_mux())
+ ops->switch_events = nmi_switch_event;
+}
+
+static void mux_clone(int cpu)
+{
+ if (!has_mux())
+ return;
+
+ memcpy(per_cpu(cpu_msrs, cpu).multiplex,
+ per_cpu(cpu_msrs, 0).multiplex,
+ sizeof(struct op_msr) * model->num_virt_counters);
+}
+
+#else
+
+inline int op_x86_phys_to_virt(int phys) { return phys; }
+inline int op_x86_virt_to_phys(int virt) { return virt; }
+static inline void nmi_shutdown_mux(void) { }
+static inline int nmi_setup_mux(void) { return 1; }
+static inline void
+nmi_cpu_setup_mux(int cpu, struct op_msrs const * const msrs) { }
+static inline void mux_init(struct oprofile_operations *ops) { }
+static void mux_clone(int cpu) { }
+
+#endif
+
+static void free_msrs(void)
+{
+ int i;
+ for_each_possible_cpu(i) {
+ kfree(per_cpu(cpu_msrs, i).counters);
+ per_cpu(cpu_msrs, i).counters = NULL;
+ kfree(per_cpu(cpu_msrs, i).controls);
+ per_cpu(cpu_msrs, i).controls = NULL;
+ }
+ nmi_shutdown_mux();
+}
+
+static int allocate_msrs(void)
+{
+ size_t controls_size = sizeof(struct op_msr) * model->num_controls;
+ size_t counters_size = sizeof(struct op_msr) * model->num_counters;
+
+ int i;
+ for_each_possible_cpu(i) {
+ per_cpu(cpu_msrs, i).counters = kzalloc(counters_size,
+ GFP_KERNEL);
+ if (!per_cpu(cpu_msrs, i).counters)
+ goto fail;
+ per_cpu(cpu_msrs, i).controls = kzalloc(controls_size,
+ GFP_KERNEL);
+ if (!per_cpu(cpu_msrs, i).controls)
+ goto fail;
+ }
+
+ if (!nmi_setup_mux())
+ goto fail;
+
+ return 1;
+
+fail:
+ free_msrs();
+ return 0;
+}
+
+static void nmi_cpu_setup(void *dummy)
+{
+ int cpu = smp_processor_id();
+ struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu);
+ nmi_cpu_save_registers(msrs);
+ raw_spin_lock(&oprofilefs_lock);
+ model->setup_ctrs(model, msrs);
+ nmi_cpu_setup_mux(cpu, msrs);
+ raw_spin_unlock(&oprofilefs_lock);
+ per_cpu(saved_lvtpc, cpu) = apic_read(APIC_LVTPC);
+ apic_write(APIC_LVTPC, APIC_DM_NMI);
+}
+
+static void nmi_cpu_restore_registers(struct op_msrs *msrs)
+{
+ struct op_msr *counters = msrs->counters;
+ struct op_msr *controls = msrs->controls;
+ unsigned int i;
+
+ for (i = 0; i < model->num_controls; ++i) {
+ if (controls[i].addr)
+ wrmsrl(controls[i].addr, controls[i].saved);
+ }
+
+ for (i = 0; i < model->num_counters; ++i) {
+ if (counters[i].addr)
+ wrmsrl(counters[i].addr, counters[i].saved);
+ }
+}
+
+static void nmi_cpu_shutdown(void *dummy)
+{
+ unsigned int v;
+ int cpu = smp_processor_id();
+ struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu);
+
+ /* restoring APIC_LVTPC can trigger an apic error because the delivery
+ * mode and vector nr combination can be illegal. That's by design: on
+ * power on apic lvt contain a zero vector nr which are legal only for
+ * NMI delivery mode. So inhibit apic err before restoring lvtpc
+ */
+ v = apic_read(APIC_LVTERR);
+ apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
+ apic_write(APIC_LVTPC, per_cpu(saved_lvtpc, cpu));
+ apic_write(APIC_LVTERR, v);
+ nmi_cpu_restore_registers(msrs);
+}
+
+static void nmi_cpu_up(void *dummy)
+{
+ if (nmi_enabled)
+ nmi_cpu_setup(dummy);
+ if (ctr_running)
+ nmi_cpu_start(dummy);
+}
+
+static void nmi_cpu_down(void *dummy)
+{
+ if (ctr_running)
+ nmi_cpu_stop(dummy);
+ if (nmi_enabled)
+ nmi_cpu_shutdown(dummy);
+}
+
+static int nmi_create_files(struct dentry *root)
+{
+ unsigned int i;
+
+ for (i = 0; i < model->num_virt_counters; ++i) {
+ struct dentry *dir;
+ char buf[4];
+
+ /* quick little hack to _not_ expose a counter if it is not
+ * available for use. This should protect userspace app.
+ * NOTE: assumes 1:1 mapping here (that counters are organized
+ * sequentially in their struct assignment).
+ */
+ if (!avail_to_resrv_perfctr_nmi_bit(op_x86_virt_to_phys(i)))
+ continue;
+
+ snprintf(buf, sizeof(buf), "%d", i);
+ dir = oprofilefs_mkdir(root, buf);
+ oprofilefs_create_ulong(dir, "enabled", &counter_config[i].enabled);
+ oprofilefs_create_ulong(dir, "event", &counter_config[i].event);
+ oprofilefs_create_ulong(dir, "count", &counter_config[i].count);
+ oprofilefs_create_ulong(dir, "unit_mask", &counter_config[i].unit_mask);
+ oprofilefs_create_ulong(dir, "kernel", &counter_config[i].kernel);
+ oprofilefs_create_ulong(dir, "user", &counter_config[i].user);
+ oprofilefs_create_ulong(dir, "extra", &counter_config[i].extra);
+ }
+
+ return 0;
+}
+
+static int oprofile_cpu_notifier(struct notifier_block *b, unsigned long action,
+ void *data)
+{
+ int cpu = (unsigned long)data;
+ switch (action) {
+ case CPU_DOWN_FAILED:
+ case CPU_ONLINE:
+ smp_call_function_single(cpu, nmi_cpu_up, NULL, 0);
+ break;
+ case CPU_DOWN_PREPARE:
+ smp_call_function_single(cpu, nmi_cpu_down, NULL, 1);
+ break;
+ }
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block oprofile_cpu_nb = {
+ .notifier_call = oprofile_cpu_notifier
+};
+
+static int nmi_setup(void)
+{
+ int err = 0;
+ int cpu;
+
+ if (!allocate_msrs())
+ return -ENOMEM;
+
+ /* We need to serialize save and setup for HT because the subset
+ * of msrs are distinct for save and setup operations
+ */
+
+ /* Assume saved/restored counters are the same on all CPUs */
+ err = model->fill_in_addresses(&per_cpu(cpu_msrs, 0));
+ if (err)
+ goto fail;
+
+ for_each_possible_cpu(cpu) {
+ if (!cpu)
+ continue;
+
+ memcpy(per_cpu(cpu_msrs, cpu).counters,
+ per_cpu(cpu_msrs, 0).counters,
+ sizeof(struct op_msr) * model->num_counters);
+
+ memcpy(per_cpu(cpu_msrs, cpu).controls,
+ per_cpu(cpu_msrs, 0).controls,
+ sizeof(struct op_msr) * model->num_controls);
+
+ mux_clone(cpu);
+ }
+
+ nmi_enabled = 0;
+ ctr_running = 0;
+ /* make variables visible to the nmi handler: */
+ smp_mb();
+ err = register_nmi_handler(NMI_LOCAL, profile_exceptions_notify,
+ 0, "oprofile");
+ if (err)
+ goto fail;
+
+ cpu_notifier_register_begin();
+
+ /* Use get/put_online_cpus() to protect 'nmi_enabled' */
+ get_online_cpus();
+ nmi_enabled = 1;
+ /* make nmi_enabled visible to the nmi handler: */
+ smp_mb();
+ on_each_cpu(nmi_cpu_setup, NULL, 1);
+ __register_cpu_notifier(&oprofile_cpu_nb);
+ put_online_cpus();
+
+ cpu_notifier_register_done();
+
+ return 0;
+fail:
+ free_msrs();
+ return err;
+}
+
+static void nmi_shutdown(void)
+{
+ struct op_msrs *msrs;
+
+ cpu_notifier_register_begin();
+
+ /* Use get/put_online_cpus() to protect 'nmi_enabled' & 'ctr_running' */
+ get_online_cpus();
+ on_each_cpu(nmi_cpu_shutdown, NULL, 1);
+ nmi_enabled = 0;
+ ctr_running = 0;
+ __unregister_cpu_notifier(&oprofile_cpu_nb);
+ put_online_cpus();
+
+ cpu_notifier_register_done();
+
+ /* make variables visible to the nmi handler: */
+ smp_mb();
+ unregister_nmi_handler(NMI_LOCAL, "oprofile");
+ msrs = &get_cpu_var(cpu_msrs);
+ model->shutdown(msrs);
+ free_msrs();
+ put_cpu_var(cpu_msrs);
+}
+
+#ifdef CONFIG_PM
+
+static int nmi_suspend(void)
+{
+ /* Only one CPU left, just stop that one */
+ if (nmi_enabled == 1)
+ nmi_cpu_stop(NULL);
+ return 0;
+}
+
+static void nmi_resume(void)
+{
+ if (nmi_enabled == 1)
+ nmi_cpu_start(NULL);
+}
+
+static struct syscore_ops oprofile_syscore_ops = {
+ .resume = nmi_resume,
+ .suspend = nmi_suspend,
+};
+
+static void __init init_suspend_resume(void)
+{
+ register_syscore_ops(&oprofile_syscore_ops);
+}
+
+static void exit_suspend_resume(void)
+{
+ unregister_syscore_ops(&oprofile_syscore_ops);
+}
+
+#else
+
+static inline void init_suspend_resume(void) { }
+static inline void exit_suspend_resume(void) { }
+
+#endif /* CONFIG_PM */
+
+static int __init p4_init(char **cpu_type)
+{
+ __u8 cpu_model = boot_cpu_data.x86_model;
+
+ if (cpu_model > 6 || cpu_model == 5)
+ return 0;
+
+#ifndef CONFIG_SMP
+ *cpu_type = "i386/p4";
+ model = &op_p4_spec;
+ return 1;
+#else
+ switch (smp_num_siblings) {
+ case 1:
+ *cpu_type = "i386/p4";
+ model = &op_p4_spec;
+ return 1;
+
+ case 2:
+ *cpu_type = "i386/p4-ht";
+ model = &op_p4_ht2_spec;
+ return 1;
+ }
+#endif
+
+ printk(KERN_INFO "oprofile: P4 HyperThreading detected with > 2 threads\n");
+ printk(KERN_INFO "oprofile: Reverting to timer mode.\n");
+ return 0;
+}
+
+enum __force_cpu_type {
+ reserved = 0, /* do not force */
+ timer,
+ arch_perfmon,
+};
+
+static int force_cpu_type;
+
+static int set_cpu_type(const char *str, struct kernel_param *kp)
+{
+ if (!strcmp(str, "timer")) {
+ force_cpu_type = timer;
+ printk(KERN_INFO "oprofile: forcing NMI timer mode\n");
+ } else if (!strcmp(str, "arch_perfmon")) {
+ force_cpu_type = arch_perfmon;
+ printk(KERN_INFO "oprofile: forcing architectural perfmon\n");
+ } else {
+ force_cpu_type = 0;
+ }
+
+ return 0;
+}
+module_param_call(cpu_type, set_cpu_type, NULL, NULL, 0);
+
+static int __init ppro_init(char **cpu_type)
+{
+ __u8 cpu_model = boot_cpu_data.x86_model;
+ struct op_x86_model_spec *spec = &op_ppro_spec; /* default */
+
+ if (force_cpu_type == arch_perfmon && cpu_has_arch_perfmon)
+ return 0;
+
+ /*
+ * Documentation on identifying Intel processors by CPU family
+ * and model can be found in the Intel Software Developer's
+ * Manuals (SDM):
+ *
+ * http://www.intel.com/products/processor/manuals/
+ *
+ * As of May 2010 the documentation for this was in the:
+ * "Intel 64 and IA-32 Architectures Software Developer's
+ * Manual Volume 3B: System Programming Guide", "Table B-1
+ * CPUID Signature Values of DisplayFamily_DisplayModel".
+ */
+ switch (cpu_model) {
+ case 0 ... 2:
+ *cpu_type = "i386/ppro";
+ break;
+ case 3 ... 5:
+ *cpu_type = "i386/pii";
+ break;
+ case 6 ... 8:
+ case 10 ... 11:
+ *cpu_type = "i386/piii";
+ break;
+ case 9:
+ case 13:
+ *cpu_type = "i386/p6_mobile";
+ break;
+ case 14:
+ *cpu_type = "i386/core";
+ break;
+ case 0x0f:
+ case 0x16:
+ case 0x17:
+ case 0x1d:
+ *cpu_type = "i386/core_2";
+ break;
+ case 0x1a:
+ case 0x1e:
+ case 0x2e:
+ spec = &op_arch_perfmon_spec;
+ *cpu_type = "i386/core_i7";
+ break;
+ case 0x1c:
+ *cpu_type = "i386/atom";
+ break;
+ default:
+ /* Unknown */
+ return 0;
+ }
+
+ model = spec;
+ return 1;
+}
+
+int __init op_nmi_init(struct oprofile_operations *ops)
+{
+ __u8 vendor = boot_cpu_data.x86_vendor;
+ __u8 family = boot_cpu_data.x86;
+ char *cpu_type = NULL;
+ int ret = 0;
+
+ if (!cpu_has_apic)
+ return -ENODEV;
+
+ if (force_cpu_type == timer)
+ return -ENODEV;
+
+ switch (vendor) {
+ case X86_VENDOR_AMD:
+ /* Needs to be at least an Athlon (or hammer in 32bit mode) */
+
+ switch (family) {
+ case 6:
+ cpu_type = "i386/athlon";
+ break;
+ case 0xf:
+ /*
+ * Actually it could be i386/hammer too, but
+ * give user space an consistent name.
+ */
+ cpu_type = "x86-64/hammer";
+ break;
+ case 0x10:
+ cpu_type = "x86-64/family10";
+ break;
+ case 0x11:
+ cpu_type = "x86-64/family11h";
+ break;
+ case 0x12:
+ cpu_type = "x86-64/family12h";
+ break;
+ case 0x14:
+ cpu_type = "x86-64/family14h";
+ break;
+ case 0x15:
+ cpu_type = "x86-64/family15h";
+ break;
+ default:
+ return -ENODEV;
+ }
+ model = &op_amd_spec;
+ break;
+
+ case X86_VENDOR_INTEL:
+ switch (family) {
+ /* Pentium IV */
+ case 0xf:
+ p4_init(&cpu_type);
+ break;
+
+ /* A P6-class processor */
+ case 6:
+ ppro_init(&cpu_type);
+ break;
+
+ default:
+ break;
+ }
+
+ if (cpu_type)
+ break;
+
+ if (!cpu_has_arch_perfmon)
+ return -ENODEV;
+
+ /* use arch perfmon as fallback */
+ cpu_type = "i386/arch_perfmon";
+ model = &op_arch_perfmon_spec;
+ break;
+
+ default:
+ return -ENODEV;
+ }
+
+ /* default values, can be overwritten by model */
+ ops->create_files = nmi_create_files;
+ ops->setup = nmi_setup;
+ ops->shutdown = nmi_shutdown;
+ ops->start = nmi_start;
+ ops->stop = nmi_stop;
+ ops->cpu_type = cpu_type;
+
+ if (model->init)
+ ret = model->init(ops);
+ if (ret)
+ return ret;
+
+ if (!model->num_virt_counters)
+ model->num_virt_counters = model->num_counters;
+
+ mux_init(ops);
+
+ init_suspend_resume();
+
+ printk(KERN_INFO "oprofile: using NMI interrupt.\n");
+ return 0;
+}
+
+void op_nmi_exit(void)
+{
+ exit_suspend_resume();
+}
diff --git a/kernel/arch/x86/oprofile/op_counter.h b/kernel/arch/x86/oprofile/op_counter.h
new file mode 100644
index 000000000..0b7b7b179
--- /dev/null
+++ b/kernel/arch/x86/oprofile/op_counter.h
@@ -0,0 +1,30 @@
+/**
+ * @file op_counter.h
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon
+ */
+
+#ifndef OP_COUNTER_H
+#define OP_COUNTER_H
+
+#define OP_MAX_COUNTER 32
+
+/* Per-perfctr configuration as set via
+ * oprofilefs.
+ */
+struct op_counter_config {
+ unsigned long count;
+ unsigned long enabled;
+ unsigned long event;
+ unsigned long kernel;
+ unsigned long user;
+ unsigned long unit_mask;
+ unsigned long extra;
+};
+
+extern struct op_counter_config counter_config[];
+
+#endif /* OP_COUNTER_H */
diff --git a/kernel/arch/x86/oprofile/op_model_amd.c b/kernel/arch/x86/oprofile/op_model_amd.c
new file mode 100644
index 000000000..50d86c0e9
--- /dev/null
+++ b/kernel/arch/x86/oprofile/op_model_amd.c
@@ -0,0 +1,543 @@
+/*
+ * @file op_model_amd.c
+ * athlon / K7 / K8 / Family 10h model-specific MSR operations
+ *
+ * @remark Copyright 2002-2009 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon
+ * @author Philippe Elie
+ * @author Graydon Hoare
+ * @author Robert Richter <robert.richter@amd.com>
+ * @author Barry Kasindorf <barry.kasindorf@amd.com>
+ * @author Jason Yeh <jason.yeh@amd.com>
+ * @author Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
+ */
+
+#include <linux/oprofile.h>
+#include <linux/device.h>
+#include <linux/pci.h>
+#include <linux/percpu.h>
+
+#include <asm/ptrace.h>
+#include <asm/msr.h>
+#include <asm/nmi.h>
+#include <asm/apic.h>
+#include <asm/processor.h>
+#include <asm/cpufeature.h>
+
+#include "op_x86_model.h"
+#include "op_counter.h"
+
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+#define NUM_VIRT_COUNTERS 32
+#else
+#define NUM_VIRT_COUNTERS 0
+#endif
+
+#define OP_EVENT_MASK 0x0FFF
+#define OP_CTR_OVERFLOW (1ULL<<31)
+
+#define MSR_AMD_EVENTSEL_RESERVED ((0xFFFFFCF0ULL<<32)|(1ULL<<21))
+
+static int num_counters;
+static unsigned long reset_value[OP_MAX_COUNTER];
+
+#define IBS_FETCH_SIZE 6
+#define IBS_OP_SIZE 12
+
+static u32 ibs_caps;
+
+struct ibs_config {
+ unsigned long op_enabled;
+ unsigned long fetch_enabled;
+ unsigned long max_cnt_fetch;
+ unsigned long max_cnt_op;
+ unsigned long rand_en;
+ unsigned long dispatched_ops;
+ unsigned long branch_target;
+};
+
+struct ibs_state {
+ u64 ibs_op_ctl;
+ int branch_target;
+ unsigned long sample_size;
+};
+
+static struct ibs_config ibs_config;
+static struct ibs_state ibs_state;
+
+/*
+ * IBS randomization macros
+ */
+#define IBS_RANDOM_BITS 12
+#define IBS_RANDOM_MASK ((1ULL << IBS_RANDOM_BITS) - 1)
+#define IBS_RANDOM_MAXCNT_OFFSET (1ULL << (IBS_RANDOM_BITS - 5))
+
+/*
+ * 16-bit Linear Feedback Shift Register (LFSR)
+ *
+ * 16 14 13 11
+ * Feedback polynomial = X + X + X + X + 1
+ */
+static unsigned int lfsr_random(void)
+{
+ static unsigned int lfsr_value = 0xF00D;
+ unsigned int bit;
+
+ /* Compute next bit to shift in */
+ bit = ((lfsr_value >> 0) ^
+ (lfsr_value >> 2) ^
+ (lfsr_value >> 3) ^
+ (lfsr_value >> 5)) & 0x0001;
+
+ /* Advance to next register value */
+ lfsr_value = (lfsr_value >> 1) | (bit << 15);
+
+ return lfsr_value;
+}
+
+/*
+ * IBS software randomization
+ *
+ * The IBS periodic op counter is randomized in software. The lower 12
+ * bits of the 20 bit counter are randomized. IbsOpCurCnt is
+ * initialized with a 12 bit random value.
+ */
+static inline u64 op_amd_randomize_ibs_op(u64 val)
+{
+ unsigned int random = lfsr_random();
+
+ if (!(ibs_caps & IBS_CAPS_RDWROPCNT))
+ /*
+ * Work around if the hw can not write to IbsOpCurCnt
+ *
+ * Randomize the lower 8 bits of the 16 bit
+ * IbsOpMaxCnt [15:0] value in the range of -128 to
+ * +127 by adding/subtracting an offset to the
+ * maximum count (IbsOpMaxCnt).
+ *
+ * To avoid over or underflows and protect upper bits
+ * starting at bit 16, the initial value for
+ * IbsOpMaxCnt must fit in the range from 0x0081 to
+ * 0xff80.
+ */
+ val += (s8)(random >> 4);
+ else
+ val |= (u64)(random & IBS_RANDOM_MASK) << 32;
+
+ return val;
+}
+
+static inline void
+op_amd_handle_ibs(struct pt_regs * const regs,
+ struct op_msrs const * const msrs)
+{
+ u64 val, ctl;
+ struct op_entry entry;
+
+ if (!ibs_caps)
+ return;
+
+ if (ibs_config.fetch_enabled) {
+ rdmsrl(MSR_AMD64_IBSFETCHCTL, ctl);
+ if (ctl & IBS_FETCH_VAL) {
+ rdmsrl(MSR_AMD64_IBSFETCHLINAD, val);
+ oprofile_write_reserve(&entry, regs, val,
+ IBS_FETCH_CODE, IBS_FETCH_SIZE);
+ oprofile_add_data64(&entry, val);
+ oprofile_add_data64(&entry, ctl);
+ rdmsrl(MSR_AMD64_IBSFETCHPHYSAD, val);
+ oprofile_add_data64(&entry, val);
+ oprofile_write_commit(&entry);
+
+ /* reenable the IRQ */
+ ctl &= ~(IBS_FETCH_VAL | IBS_FETCH_CNT);
+ ctl |= IBS_FETCH_ENABLE;
+ wrmsrl(MSR_AMD64_IBSFETCHCTL, ctl);
+ }
+ }
+
+ if (ibs_config.op_enabled) {
+ rdmsrl(MSR_AMD64_IBSOPCTL, ctl);
+ if (ctl & IBS_OP_VAL) {
+ rdmsrl(MSR_AMD64_IBSOPRIP, val);
+ oprofile_write_reserve(&entry, regs, val, IBS_OP_CODE,
+ ibs_state.sample_size);
+ oprofile_add_data64(&entry, val);
+ rdmsrl(MSR_AMD64_IBSOPDATA, val);
+ oprofile_add_data64(&entry, val);
+ rdmsrl(MSR_AMD64_IBSOPDATA2, val);
+ oprofile_add_data64(&entry, val);
+ rdmsrl(MSR_AMD64_IBSOPDATA3, val);
+ oprofile_add_data64(&entry, val);
+ rdmsrl(MSR_AMD64_IBSDCLINAD, val);
+ oprofile_add_data64(&entry, val);
+ rdmsrl(MSR_AMD64_IBSDCPHYSAD, val);
+ oprofile_add_data64(&entry, val);
+ if (ibs_state.branch_target) {
+ rdmsrl(MSR_AMD64_IBSBRTARGET, val);
+ oprofile_add_data(&entry, (unsigned long)val);
+ }
+ oprofile_write_commit(&entry);
+
+ /* reenable the IRQ */
+ ctl = op_amd_randomize_ibs_op(ibs_state.ibs_op_ctl);
+ wrmsrl(MSR_AMD64_IBSOPCTL, ctl);
+ }
+ }
+}
+
+static inline void op_amd_start_ibs(void)
+{
+ u64 val;
+
+ if (!ibs_caps)
+ return;
+
+ memset(&ibs_state, 0, sizeof(ibs_state));
+
+ /*
+ * Note: Since the max count settings may out of range we
+ * write back the actual used values so that userland can read
+ * it.
+ */
+
+ if (ibs_config.fetch_enabled) {
+ val = ibs_config.max_cnt_fetch >> 4;
+ val = min(val, IBS_FETCH_MAX_CNT);
+ ibs_config.max_cnt_fetch = val << 4;
+ val |= ibs_config.rand_en ? IBS_FETCH_RAND_EN : 0;
+ val |= IBS_FETCH_ENABLE;
+ wrmsrl(MSR_AMD64_IBSFETCHCTL, val);
+ }
+
+ if (ibs_config.op_enabled) {
+ val = ibs_config.max_cnt_op >> 4;
+ if (!(ibs_caps & IBS_CAPS_RDWROPCNT)) {
+ /*
+ * IbsOpCurCnt not supported. See
+ * op_amd_randomize_ibs_op() for details.
+ */
+ val = clamp(val, 0x0081ULL, 0xFF80ULL);
+ ibs_config.max_cnt_op = val << 4;
+ } else {
+ /*
+ * The start value is randomized with a
+ * positive offset, we need to compensate it
+ * with the half of the randomized range. Also
+ * avoid underflows.
+ */
+ val += IBS_RANDOM_MAXCNT_OFFSET;
+ if (ibs_caps & IBS_CAPS_OPCNTEXT)
+ val = min(val, IBS_OP_MAX_CNT_EXT);
+ else
+ val = min(val, IBS_OP_MAX_CNT);
+ ibs_config.max_cnt_op =
+ (val - IBS_RANDOM_MAXCNT_OFFSET) << 4;
+ }
+ val = ((val & ~IBS_OP_MAX_CNT) << 4) | (val & IBS_OP_MAX_CNT);
+ val |= ibs_config.dispatched_ops ? IBS_OP_CNT_CTL : 0;
+ val |= IBS_OP_ENABLE;
+ ibs_state.ibs_op_ctl = val;
+ ibs_state.sample_size = IBS_OP_SIZE;
+ if (ibs_config.branch_target) {
+ ibs_state.branch_target = 1;
+ ibs_state.sample_size++;
+ }
+ val = op_amd_randomize_ibs_op(ibs_state.ibs_op_ctl);
+ wrmsrl(MSR_AMD64_IBSOPCTL, val);
+ }
+}
+
+static void op_amd_stop_ibs(void)
+{
+ if (!ibs_caps)
+ return;
+
+ if (ibs_config.fetch_enabled)
+ /* clear max count and enable */
+ wrmsrl(MSR_AMD64_IBSFETCHCTL, 0);
+
+ if (ibs_config.op_enabled)
+ /* clear max count and enable */
+ wrmsrl(MSR_AMD64_IBSOPCTL, 0);
+}
+
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+
+static void op_mux_switch_ctrl(struct op_x86_model_spec const *model,
+ struct op_msrs const * const msrs)
+{
+ u64 val;
+ int i;
+
+ /* enable active counters */
+ for (i = 0; i < num_counters; ++i) {
+ int virt = op_x86_phys_to_virt(i);
+ if (!reset_value[virt])
+ continue;
+ rdmsrl(msrs->controls[i].addr, val);
+ val &= model->reserved;
+ val |= op_x86_get_ctrl(model, &counter_config[virt]);
+ wrmsrl(msrs->controls[i].addr, val);
+ }
+}
+
+#endif
+
+/* functions for op_amd_spec */
+
+static void op_amd_shutdown(struct op_msrs const * const msrs)
+{
+ int i;
+
+ for (i = 0; i < num_counters; ++i) {
+ if (!msrs->counters[i].addr)
+ continue;
+ release_perfctr_nmi(MSR_K7_PERFCTR0 + i);
+ release_evntsel_nmi(MSR_K7_EVNTSEL0 + i);
+ }
+}
+
+static int op_amd_fill_in_addresses(struct op_msrs * const msrs)
+{
+ int i;
+
+ for (i = 0; i < num_counters; i++) {
+ if (!reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i))
+ goto fail;
+ if (!reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i)) {
+ release_perfctr_nmi(MSR_K7_PERFCTR0 + i);
+ goto fail;
+ }
+ /* both registers must be reserved */
+ if (num_counters == AMD64_NUM_COUNTERS_CORE) {
+ msrs->counters[i].addr = MSR_F15H_PERF_CTR + (i << 1);
+ msrs->controls[i].addr = MSR_F15H_PERF_CTL + (i << 1);
+ } else {
+ msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i;
+ msrs->counters[i].addr = MSR_K7_PERFCTR0 + i;
+ }
+ continue;
+ fail:
+ if (!counter_config[i].enabled)
+ continue;
+ op_x86_warn_reserved(i);
+ op_amd_shutdown(msrs);
+ return -EBUSY;
+ }
+
+ return 0;
+}
+
+static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
+ struct op_msrs const * const msrs)
+{
+ u64 val;
+ int i;
+
+ /* setup reset_value */
+ for (i = 0; i < OP_MAX_COUNTER; ++i) {
+ if (counter_config[i].enabled
+ && msrs->counters[op_x86_virt_to_phys(i)].addr)
+ reset_value[i] = counter_config[i].count;
+ else
+ reset_value[i] = 0;
+ }
+
+ /* clear all counters */
+ for (i = 0; i < num_counters; ++i) {
+ if (!msrs->controls[i].addr)
+ continue;
+ rdmsrl(msrs->controls[i].addr, val);
+ if (val & ARCH_PERFMON_EVENTSEL_ENABLE)
+ op_x86_warn_in_use(i);
+ val &= model->reserved;
+ wrmsrl(msrs->controls[i].addr, val);
+ /*
+ * avoid a false detection of ctr overflows in NMI
+ * handler
+ */
+ wrmsrl(msrs->counters[i].addr, -1LL);
+ }
+
+ /* enable active counters */
+ for (i = 0; i < num_counters; ++i) {
+ int virt = op_x86_phys_to_virt(i);
+ if (!reset_value[virt])
+ continue;
+
+ /* setup counter registers */
+ wrmsrl(msrs->counters[i].addr, -(u64)reset_value[virt]);
+
+ /* setup control registers */
+ rdmsrl(msrs->controls[i].addr, val);
+ val &= model->reserved;
+ val |= op_x86_get_ctrl(model, &counter_config[virt]);
+ wrmsrl(msrs->controls[i].addr, val);
+ }
+}
+
+static int op_amd_check_ctrs(struct pt_regs * const regs,
+ struct op_msrs const * const msrs)
+{
+ u64 val;
+ int i;
+
+ for (i = 0; i < num_counters; ++i) {
+ int virt = op_x86_phys_to_virt(i);
+ if (!reset_value[virt])
+ continue;
+ rdmsrl(msrs->counters[i].addr, val);
+ /* bit is clear if overflowed: */
+ if (val & OP_CTR_OVERFLOW)
+ continue;
+ oprofile_add_sample(regs, virt);
+ wrmsrl(msrs->counters[i].addr, -(u64)reset_value[virt]);
+ }
+
+ op_amd_handle_ibs(regs, msrs);
+
+ /* See op_model_ppro.c */
+ return 1;
+}
+
+static void op_amd_start(struct op_msrs const * const msrs)
+{
+ u64 val;
+ int i;
+
+ for (i = 0; i < num_counters; ++i) {
+ if (!reset_value[op_x86_phys_to_virt(i)])
+ continue;
+ rdmsrl(msrs->controls[i].addr, val);
+ val |= ARCH_PERFMON_EVENTSEL_ENABLE;
+ wrmsrl(msrs->controls[i].addr, val);
+ }
+
+ op_amd_start_ibs();
+}
+
+static void op_amd_stop(struct op_msrs const * const msrs)
+{
+ u64 val;
+ int i;
+
+ /*
+ * Subtle: stop on all counters to avoid race with setting our
+ * pm callback
+ */
+ for (i = 0; i < num_counters; ++i) {
+ if (!reset_value[op_x86_phys_to_virt(i)])
+ continue;
+ rdmsrl(msrs->controls[i].addr, val);
+ val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
+ wrmsrl(msrs->controls[i].addr, val);
+ }
+
+ op_amd_stop_ibs();
+}
+
+/*
+ * check and reserve APIC extended interrupt LVT offset for IBS if
+ * available
+ */
+
+static void init_ibs(void)
+{
+ ibs_caps = get_ibs_caps();
+
+ if (!ibs_caps)
+ return;
+
+ printk(KERN_INFO "oprofile: AMD IBS detected (0x%08x)\n", ibs_caps);
+}
+
+static int (*create_arch_files)(struct dentry *root);
+
+static int setup_ibs_files(struct dentry *root)
+{
+ struct dentry *dir;
+ int ret = 0;
+
+ /* architecture specific files */
+ if (create_arch_files)
+ ret = create_arch_files(root);
+
+ if (ret)
+ return ret;
+
+ if (!ibs_caps)
+ return ret;
+
+ /* model specific files */
+
+ /* setup some reasonable defaults */
+ memset(&ibs_config, 0, sizeof(ibs_config));
+ ibs_config.max_cnt_fetch = 250000;
+ ibs_config.max_cnt_op = 250000;
+
+ if (ibs_caps & IBS_CAPS_FETCHSAM) {
+ dir = oprofilefs_mkdir(root, "ibs_fetch");
+ oprofilefs_create_ulong(dir, "enable",
+ &ibs_config.fetch_enabled);
+ oprofilefs_create_ulong(dir, "max_count",
+ &ibs_config.max_cnt_fetch);
+ oprofilefs_create_ulong(dir, "rand_enable",
+ &ibs_config.rand_en);
+ }
+
+ if (ibs_caps & IBS_CAPS_OPSAM) {
+ dir = oprofilefs_mkdir(root, "ibs_op");
+ oprofilefs_create_ulong(dir, "enable",
+ &ibs_config.op_enabled);
+ oprofilefs_create_ulong(dir, "max_count",
+ &ibs_config.max_cnt_op);
+ if (ibs_caps & IBS_CAPS_OPCNT)
+ oprofilefs_create_ulong(dir, "dispatched_ops",
+ &ibs_config.dispatched_ops);
+ if (ibs_caps & IBS_CAPS_BRNTRGT)
+ oprofilefs_create_ulong(dir, "branch_target",
+ &ibs_config.branch_target);
+ }
+
+ return 0;
+}
+
+struct op_x86_model_spec op_amd_spec;
+
+static int op_amd_init(struct oprofile_operations *ops)
+{
+ init_ibs();
+ create_arch_files = ops->create_files;
+ ops->create_files = setup_ibs_files;
+
+ if (boot_cpu_data.x86 == 0x15) {
+ num_counters = AMD64_NUM_COUNTERS_CORE;
+ } else {
+ num_counters = AMD64_NUM_COUNTERS;
+ }
+
+ op_amd_spec.num_counters = num_counters;
+ op_amd_spec.num_controls = num_counters;
+ op_amd_spec.num_virt_counters = max(num_counters, NUM_VIRT_COUNTERS);
+
+ return 0;
+}
+
+struct op_x86_model_spec op_amd_spec = {
+ /* num_counters/num_controls filled in at runtime */
+ .reserved = MSR_AMD_EVENTSEL_RESERVED,
+ .event_mask = OP_EVENT_MASK,
+ .init = op_amd_init,
+ .fill_in_addresses = &op_amd_fill_in_addresses,
+ .setup_ctrs = &op_amd_setup_ctrs,
+ .check_ctrs = &op_amd_check_ctrs,
+ .start = &op_amd_start,
+ .stop = &op_amd_stop,
+ .shutdown = &op_amd_shutdown,
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+ .switch_ctrl = &op_mux_switch_ctrl,
+#endif
+};
diff --git a/kernel/arch/x86/oprofile/op_model_p4.c b/kernel/arch/x86/oprofile/op_model_p4.c
new file mode 100644
index 000000000..ad1d91f47
--- /dev/null
+++ b/kernel/arch/x86/oprofile/op_model_p4.c
@@ -0,0 +1,723 @@
+/**
+ * @file op_model_p4.c
+ * P4 model-specific MSR operations
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author Graydon Hoare
+ */
+
+#include <linux/oprofile.h>
+#include <linux/smp.h>
+#include <linux/ptrace.h>
+#include <asm/nmi.h>
+#include <asm/msr.h>
+#include <asm/fixmap.h>
+#include <asm/apic.h>
+
+
+#include "op_x86_model.h"
+#include "op_counter.h"
+
+#define NUM_EVENTS 39
+
+#define NUM_COUNTERS_NON_HT 8
+#define NUM_ESCRS_NON_HT 45
+#define NUM_CCCRS_NON_HT 18
+#define NUM_CONTROLS_NON_HT (NUM_ESCRS_NON_HT + NUM_CCCRS_NON_HT)
+
+#define NUM_COUNTERS_HT2 4
+#define NUM_ESCRS_HT2 23
+#define NUM_CCCRS_HT2 9
+#define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2)
+
+#define OP_CTR_OVERFLOW (1ULL<<31)
+
+static unsigned int num_counters = NUM_COUNTERS_NON_HT;
+static unsigned int num_controls = NUM_CONTROLS_NON_HT;
+
+/* this has to be checked dynamically since the
+ hyper-threadedness of a chip is discovered at
+ kernel boot-time. */
+static inline void setup_num_counters(void)
+{
+#ifdef CONFIG_SMP
+ if (smp_num_siblings == 2) {
+ num_counters = NUM_COUNTERS_HT2;
+ num_controls = NUM_CONTROLS_HT2;
+ }
+#endif
+}
+
+static inline int addr_increment(void)
+{
+#ifdef CONFIG_SMP
+ return smp_num_siblings == 2 ? 2 : 1;
+#else
+ return 1;
+#endif
+}
+
+
+/* tables to simulate simplified hardware view of p4 registers */
+struct p4_counter_binding {
+ int virt_counter;
+ int counter_address;
+ int cccr_address;
+};
+
+struct p4_event_binding {
+ int escr_select; /* value to put in CCCR */
+ int event_select; /* value to put in ESCR */
+ struct {
+ int virt_counter; /* for this counter... */
+ int escr_address; /* use this ESCR */
+ } bindings[2];
+};
+
+/* nb: these CTR_* defines are a duplicate of defines in
+ event/i386.p4*events. */
+
+
+#define CTR_BPU_0 (1 << 0)
+#define CTR_MS_0 (1 << 1)
+#define CTR_FLAME_0 (1 << 2)
+#define CTR_IQ_4 (1 << 3)
+#define CTR_BPU_2 (1 << 4)
+#define CTR_MS_2 (1 << 5)
+#define CTR_FLAME_2 (1 << 6)
+#define CTR_IQ_5 (1 << 7)
+
+static struct p4_counter_binding p4_counters[NUM_COUNTERS_NON_HT] = {
+ { CTR_BPU_0, MSR_P4_BPU_PERFCTR0, MSR_P4_BPU_CCCR0 },
+ { CTR_MS_0, MSR_P4_MS_PERFCTR0, MSR_P4_MS_CCCR0 },
+ { CTR_FLAME_0, MSR_P4_FLAME_PERFCTR0, MSR_P4_FLAME_CCCR0 },
+ { CTR_IQ_4, MSR_P4_IQ_PERFCTR4, MSR_P4_IQ_CCCR4 },
+ { CTR_BPU_2, MSR_P4_BPU_PERFCTR2, MSR_P4_BPU_CCCR2 },
+ { CTR_MS_2, MSR_P4_MS_PERFCTR2, MSR_P4_MS_CCCR2 },
+ { CTR_FLAME_2, MSR_P4_FLAME_PERFCTR2, MSR_P4_FLAME_CCCR2 },
+ { CTR_IQ_5, MSR_P4_IQ_PERFCTR5, MSR_P4_IQ_CCCR5 }
+};
+
+#define NUM_UNUSED_CCCRS (NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT)
+
+/* p4 event codes in libop/op_event.h are indices into this table. */
+
+static struct p4_event_binding p4_events[NUM_EVENTS] = {
+
+ { /* BRANCH_RETIRED */
+ 0x05, 0x06,
+ { {CTR_IQ_4, MSR_P4_CRU_ESCR2},
+ {CTR_IQ_5, MSR_P4_CRU_ESCR3} }
+ },
+
+ { /* MISPRED_BRANCH_RETIRED */
+ 0x04, 0x03,
+ { { CTR_IQ_4, MSR_P4_CRU_ESCR0},
+ { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
+ },
+
+ { /* TC_DELIVER_MODE */
+ 0x01, 0x01,
+ { { CTR_MS_0, MSR_P4_TC_ESCR0},
+ { CTR_MS_2, MSR_P4_TC_ESCR1} }
+ },
+
+ { /* BPU_FETCH_REQUEST */
+ 0x00, 0x03,
+ { { CTR_BPU_0, MSR_P4_BPU_ESCR0},
+ { CTR_BPU_2, MSR_P4_BPU_ESCR1} }
+ },
+
+ { /* ITLB_REFERENCE */
+ 0x03, 0x18,
+ { { CTR_BPU_0, MSR_P4_ITLB_ESCR0},
+ { CTR_BPU_2, MSR_P4_ITLB_ESCR1} }
+ },
+
+ { /* MEMORY_CANCEL */
+ 0x05, 0x02,
+ { { CTR_FLAME_0, MSR_P4_DAC_ESCR0},
+ { CTR_FLAME_2, MSR_P4_DAC_ESCR1} }
+ },
+
+ { /* MEMORY_COMPLETE */
+ 0x02, 0x08,
+ { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
+ { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
+ },
+
+ { /* LOAD_PORT_REPLAY */
+ 0x02, 0x04,
+ { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
+ { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
+ },
+
+ { /* STORE_PORT_REPLAY */
+ 0x02, 0x05,
+ { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
+ { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
+ },
+
+ { /* MOB_LOAD_REPLAY */
+ 0x02, 0x03,
+ { { CTR_BPU_0, MSR_P4_MOB_ESCR0},
+ { CTR_BPU_2, MSR_P4_MOB_ESCR1} }
+ },
+
+ { /* PAGE_WALK_TYPE */
+ 0x04, 0x01,
+ { { CTR_BPU_0, MSR_P4_PMH_ESCR0},
+ { CTR_BPU_2, MSR_P4_PMH_ESCR1} }
+ },
+
+ { /* BSQ_CACHE_REFERENCE */
+ 0x07, 0x0c,
+ { { CTR_BPU_0, MSR_P4_BSU_ESCR0},
+ { CTR_BPU_2, MSR_P4_BSU_ESCR1} }
+ },
+
+ { /* IOQ_ALLOCATION */
+ 0x06, 0x03,
+ { { CTR_BPU_0, MSR_P4_FSB_ESCR0},
+ { 0, 0 } }
+ },
+
+ { /* IOQ_ACTIVE_ENTRIES */
+ 0x06, 0x1a,
+ { { CTR_BPU_2, MSR_P4_FSB_ESCR1},
+ { 0, 0 } }
+ },
+
+ { /* FSB_DATA_ACTIVITY */
+ 0x06, 0x17,
+ { { CTR_BPU_0, MSR_P4_FSB_ESCR0},
+ { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
+ },
+
+ { /* BSQ_ALLOCATION */
+ 0x07, 0x05,
+ { { CTR_BPU_0, MSR_P4_BSU_ESCR0},
+ { 0, 0 } }
+ },
+
+ { /* BSQ_ACTIVE_ENTRIES */
+ 0x07, 0x06,
+ { { CTR_BPU_2, MSR_P4_BSU_ESCR1 /* guess */},
+ { 0, 0 } }
+ },
+
+ { /* X87_ASSIST */
+ 0x05, 0x03,
+ { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
+ { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
+ },
+
+ { /* SSE_INPUT_ASSIST */
+ 0x01, 0x34,
+ { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+ { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+ },
+
+ { /* PACKED_SP_UOP */
+ 0x01, 0x08,
+ { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+ { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+ },
+
+ { /* PACKED_DP_UOP */
+ 0x01, 0x0c,
+ { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+ { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+ },
+
+ { /* SCALAR_SP_UOP */
+ 0x01, 0x0a,
+ { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+ { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+ },
+
+ { /* SCALAR_DP_UOP */
+ 0x01, 0x0e,
+ { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+ { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+ },
+
+ { /* 64BIT_MMX_UOP */
+ 0x01, 0x02,
+ { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+ { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+ },
+
+ { /* 128BIT_MMX_UOP */
+ 0x01, 0x1a,
+ { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+ { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+ },
+
+ { /* X87_FP_UOP */
+ 0x01, 0x04,
+ { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+ { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+ },
+
+ { /* X87_SIMD_MOVES_UOP */
+ 0x01, 0x2e,
+ { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+ { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+ },
+
+ { /* MACHINE_CLEAR */
+ 0x05, 0x02,
+ { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
+ { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
+ },
+
+ { /* GLOBAL_POWER_EVENTS */
+ 0x06, 0x13 /* older manual says 0x05, newer 0x13 */,
+ { { CTR_BPU_0, MSR_P4_FSB_ESCR0},
+ { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
+ },
+
+ { /* TC_MS_XFER */
+ 0x00, 0x05,
+ { { CTR_MS_0, MSR_P4_MS_ESCR0},
+ { CTR_MS_2, MSR_P4_MS_ESCR1} }
+ },
+
+ { /* UOP_QUEUE_WRITES */
+ 0x00, 0x09,
+ { { CTR_MS_0, MSR_P4_MS_ESCR0},
+ { CTR_MS_2, MSR_P4_MS_ESCR1} }
+ },
+
+ { /* FRONT_END_EVENT */
+ 0x05, 0x08,
+ { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
+ { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
+ },
+
+ { /* EXECUTION_EVENT */
+ 0x05, 0x0c,
+ { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
+ { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
+ },
+
+ { /* REPLAY_EVENT */
+ 0x05, 0x09,
+ { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
+ { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
+ },
+
+ { /* INSTR_RETIRED */
+ 0x04, 0x02,
+ { { CTR_IQ_4, MSR_P4_CRU_ESCR0},
+ { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
+ },
+
+ { /* UOPS_RETIRED */
+ 0x04, 0x01,
+ { { CTR_IQ_4, MSR_P4_CRU_ESCR0},
+ { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
+ },
+
+ { /* UOP_TYPE */
+ 0x02, 0x02,
+ { { CTR_IQ_4, MSR_P4_RAT_ESCR0},
+ { CTR_IQ_5, MSR_P4_RAT_ESCR1} }
+ },
+
+ { /* RETIRED_MISPRED_BRANCH_TYPE */
+ 0x02, 0x05,
+ { { CTR_MS_0, MSR_P4_TBPU_ESCR0},
+ { CTR_MS_2, MSR_P4_TBPU_ESCR1} }
+ },
+
+ { /* RETIRED_BRANCH_TYPE */
+ 0x02, 0x04,
+ { { CTR_MS_0, MSR_P4_TBPU_ESCR0},
+ { CTR_MS_2, MSR_P4_TBPU_ESCR1} }
+ }
+};
+
+
+#define MISC_PMC_ENABLED_P(x) ((x) & 1 << 7)
+
+#define ESCR_RESERVED_BITS 0x80000003
+#define ESCR_CLEAR(escr) ((escr) &= ESCR_RESERVED_BITS)
+#define ESCR_SET_USR_0(escr, usr) ((escr) |= (((usr) & 1) << 2))
+#define ESCR_SET_OS_0(escr, os) ((escr) |= (((os) & 1) << 3))
+#define ESCR_SET_USR_1(escr, usr) ((escr) |= (((usr) & 1)))
+#define ESCR_SET_OS_1(escr, os) ((escr) |= (((os) & 1) << 1))
+#define ESCR_SET_EVENT_SELECT(escr, sel) ((escr) |= (((sel) & 0x3f) << 25))
+#define ESCR_SET_EVENT_MASK(escr, mask) ((escr) |= (((mask) & 0xffff) << 9))
+
+#define CCCR_RESERVED_BITS 0x38030FFF
+#define CCCR_CLEAR(cccr) ((cccr) &= CCCR_RESERVED_BITS)
+#define CCCR_SET_REQUIRED_BITS(cccr) ((cccr) |= 0x00030000)
+#define CCCR_SET_ESCR_SELECT(cccr, sel) ((cccr) |= (((sel) & 0x07) << 13))
+#define CCCR_SET_PMI_OVF_0(cccr) ((cccr) |= (1<<26))
+#define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27))
+#define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12))
+#define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12))
+#define CCCR_OVF_P(cccr) ((cccr) & (1U<<31))
+#define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31)))
+
+
+/* this assigns a "stagger" to the current CPU, which is used throughout
+ the code in this module as an extra array offset, to select the "even"
+ or "odd" part of all the divided resources. */
+static unsigned int get_stagger(void)
+{
+#ifdef CONFIG_SMP
+ int cpu = smp_processor_id();
+ return cpu != cpumask_first(this_cpu_cpumask_var_ptr(cpu_sibling_map));
+#endif
+ return 0;
+}
+
+
+/* finally, mediate access to a real hardware counter
+ by passing a "virtual" counter numer to this macro,
+ along with your stagger setting. */
+#define VIRT_CTR(stagger, i) ((i) + ((num_counters) * (stagger)))
+
+static unsigned long reset_value[NUM_COUNTERS_NON_HT];
+
+static void p4_shutdown(struct op_msrs const * const msrs)
+{
+ int i;
+
+ for (i = 0; i < num_counters; ++i) {
+ if (msrs->counters[i].addr)
+ release_perfctr_nmi(msrs->counters[i].addr);
+ }
+ /*
+ * some of the control registers are specially reserved in
+ * conjunction with the counter registers (hence the starting offset).
+ * This saves a few bits.
+ */
+ for (i = num_counters; i < num_controls; ++i) {
+ if (msrs->controls[i].addr)
+ release_evntsel_nmi(msrs->controls[i].addr);
+ }
+}
+
+static int p4_fill_in_addresses(struct op_msrs * const msrs)
+{
+ unsigned int i;
+ unsigned int addr, cccraddr, stag;
+
+ setup_num_counters();
+ stag = get_stagger();
+
+ /* the counter & cccr registers we pay attention to */
+ for (i = 0; i < num_counters; ++i) {
+ addr = p4_counters[VIRT_CTR(stag, i)].counter_address;
+ cccraddr = p4_counters[VIRT_CTR(stag, i)].cccr_address;
+ if (reserve_perfctr_nmi(addr)) {
+ msrs->counters[i].addr = addr;
+ msrs->controls[i].addr = cccraddr;
+ }
+ }
+
+ /* 43 ESCR registers in three or four discontiguous group */
+ for (addr = MSR_P4_BSU_ESCR0 + stag;
+ addr < MSR_P4_IQ_ESCR0; ++i, addr += addr_increment()) {
+ if (reserve_evntsel_nmi(addr))
+ msrs->controls[i].addr = addr;
+ }
+
+ /* no IQ_ESCR0/1 on some models, we save a seconde time BSU_ESCR0/1
+ * to avoid special case in nmi_{save|restore}_registers() */
+ if (boot_cpu_data.x86_model >= 0x3) {
+ for (addr = MSR_P4_BSU_ESCR0 + stag;
+ addr <= MSR_P4_BSU_ESCR1; ++i, addr += addr_increment()) {
+ if (reserve_evntsel_nmi(addr))
+ msrs->controls[i].addr = addr;
+ }
+ } else {
+ for (addr = MSR_P4_IQ_ESCR0 + stag;
+ addr <= MSR_P4_IQ_ESCR1; ++i, addr += addr_increment()) {
+ if (reserve_evntsel_nmi(addr))
+ msrs->controls[i].addr = addr;
+ }
+ }
+
+ for (addr = MSR_P4_RAT_ESCR0 + stag;
+ addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) {
+ if (reserve_evntsel_nmi(addr))
+ msrs->controls[i].addr = addr;
+ }
+
+ for (addr = MSR_P4_MS_ESCR0 + stag;
+ addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) {
+ if (reserve_evntsel_nmi(addr))
+ msrs->controls[i].addr = addr;
+ }
+
+ for (addr = MSR_P4_IX_ESCR0 + stag;
+ addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) {
+ if (reserve_evntsel_nmi(addr))
+ msrs->controls[i].addr = addr;
+ }
+
+ /* there are 2 remaining non-contiguously located ESCRs */
+
+ if (num_counters == NUM_COUNTERS_NON_HT) {
+ /* standard non-HT CPUs handle both remaining ESCRs*/
+ if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5))
+ msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
+ if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR4))
+ msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
+
+ } else if (stag == 0) {
+ /* HT CPUs give the first remainder to the even thread, as
+ the 32nd control register */
+ if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR4))
+ msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
+
+ } else {
+ /* and two copies of the second to the odd thread,
+ for the 22st and 23nd control registers */
+ if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5)) {
+ msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
+ msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
+ }
+ }
+
+ for (i = 0; i < num_counters; ++i) {
+ if (!counter_config[i].enabled)
+ continue;
+ if (msrs->controls[i].addr)
+ continue;
+ op_x86_warn_reserved(i);
+ p4_shutdown(msrs);
+ return -EBUSY;
+ }
+
+ return 0;
+}
+
+
+static void pmc_setup_one_p4_counter(unsigned int ctr)
+{
+ int i;
+ int const maxbind = 2;
+ unsigned int cccr = 0;
+ unsigned int escr = 0;
+ unsigned int high = 0;
+ unsigned int counter_bit;
+ struct p4_event_binding *ev = NULL;
+ unsigned int stag;
+
+ stag = get_stagger();
+
+ /* convert from counter *number* to counter *bit* */
+ counter_bit = 1 << VIRT_CTR(stag, ctr);
+
+ /* find our event binding structure. */
+ if (counter_config[ctr].event <= 0 || counter_config[ctr].event > NUM_EVENTS) {
+ printk(KERN_ERR
+ "oprofile: P4 event code 0x%lx out of range\n",
+ counter_config[ctr].event);
+ return;
+ }
+
+ ev = &(p4_events[counter_config[ctr].event - 1]);
+
+ for (i = 0; i < maxbind; i++) {
+ if (ev->bindings[i].virt_counter & counter_bit) {
+
+ /* modify ESCR */
+ rdmsr(ev->bindings[i].escr_address, escr, high);
+ ESCR_CLEAR(escr);
+ if (stag == 0) {
+ ESCR_SET_USR_0(escr, counter_config[ctr].user);
+ ESCR_SET_OS_0(escr, counter_config[ctr].kernel);
+ } else {
+ ESCR_SET_USR_1(escr, counter_config[ctr].user);
+ ESCR_SET_OS_1(escr, counter_config[ctr].kernel);
+ }
+ ESCR_SET_EVENT_SELECT(escr, ev->event_select);
+ ESCR_SET_EVENT_MASK(escr, counter_config[ctr].unit_mask);
+ wrmsr(ev->bindings[i].escr_address, escr, high);
+
+ /* modify CCCR */
+ rdmsr(p4_counters[VIRT_CTR(stag, ctr)].cccr_address,
+ cccr, high);
+ CCCR_CLEAR(cccr);
+ CCCR_SET_REQUIRED_BITS(cccr);
+ CCCR_SET_ESCR_SELECT(cccr, ev->escr_select);
+ if (stag == 0)
+ CCCR_SET_PMI_OVF_0(cccr);
+ else
+ CCCR_SET_PMI_OVF_1(cccr);
+ wrmsr(p4_counters[VIRT_CTR(stag, ctr)].cccr_address,
+ cccr, high);
+ return;
+ }
+ }
+
+ printk(KERN_ERR
+ "oprofile: P4 event code 0x%lx no binding, stag %d ctr %d\n",
+ counter_config[ctr].event, stag, ctr);
+}
+
+
+static void p4_setup_ctrs(struct op_x86_model_spec const *model,
+ struct op_msrs const * const msrs)
+{
+ unsigned int i;
+ unsigned int low, high;
+ unsigned int stag;
+
+ stag = get_stagger();
+
+ rdmsr(MSR_IA32_MISC_ENABLE, low, high);
+ if (!MISC_PMC_ENABLED_P(low)) {
+ printk(KERN_ERR "oprofile: P4 PMC not available\n");
+ return;
+ }
+
+ /* clear the cccrs we will use */
+ for (i = 0; i < num_counters; i++) {
+ if (unlikely(!msrs->controls[i].addr))
+ continue;
+ rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
+ CCCR_CLEAR(low);
+ CCCR_SET_REQUIRED_BITS(low);
+ wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
+ }
+
+ /* clear all escrs (including those outside our concern) */
+ for (i = num_counters; i < num_controls; i++) {
+ if (unlikely(!msrs->controls[i].addr))
+ continue;
+ wrmsr(msrs->controls[i].addr, 0, 0);
+ }
+
+ /* setup all counters */
+ for (i = 0; i < num_counters; ++i) {
+ if (counter_config[i].enabled && msrs->controls[i].addr) {
+ reset_value[i] = counter_config[i].count;
+ pmc_setup_one_p4_counter(i);
+ wrmsrl(p4_counters[VIRT_CTR(stag, i)].counter_address,
+ -(u64)counter_config[i].count);
+ } else {
+ reset_value[i] = 0;
+ }
+ }
+}
+
+
+static int p4_check_ctrs(struct pt_regs * const regs,
+ struct op_msrs const * const msrs)
+{
+ unsigned long ctr, low, high, stag, real;
+ int i;
+
+ stag = get_stagger();
+
+ for (i = 0; i < num_counters; ++i) {
+
+ if (!reset_value[i])
+ continue;
+
+ /*
+ * there is some eccentricity in the hardware which
+ * requires that we perform 2 extra corrections:
+ *
+ * - check both the CCCR:OVF flag for overflow and the
+ * counter high bit for un-flagged overflows.
+ *
+ * - write the counter back twice to ensure it gets
+ * updated properly.
+ *
+ * the former seems to be related to extra NMIs happening
+ * during the current NMI; the latter is reported as errata
+ * N15 in intel doc 249199-029, pentium 4 specification
+ * update, though their suggested work-around does not
+ * appear to solve the problem.
+ */
+
+ real = VIRT_CTR(stag, i);
+
+ rdmsr(p4_counters[real].cccr_address, low, high);
+ rdmsr(p4_counters[real].counter_address, ctr, high);
+ if (CCCR_OVF_P(low) || !(ctr & OP_CTR_OVERFLOW)) {
+ oprofile_add_sample(regs, i);
+ wrmsrl(p4_counters[real].counter_address,
+ -(u64)reset_value[i]);
+ CCCR_CLEAR_OVF(low);
+ wrmsr(p4_counters[real].cccr_address, low, high);
+ wrmsrl(p4_counters[real].counter_address,
+ -(u64)reset_value[i]);
+ }
+ }
+
+ /* P4 quirk: you have to re-unmask the apic vector */
+ apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
+
+ /* See op_model_ppro.c */
+ return 1;
+}
+
+
+static void p4_start(struct op_msrs const * const msrs)
+{
+ unsigned int low, high, stag;
+ int i;
+
+ stag = get_stagger();
+
+ for (i = 0; i < num_counters; ++i) {
+ if (!reset_value[i])
+ continue;
+ rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
+ CCCR_SET_ENABLE(low);
+ wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
+ }
+}
+
+
+static void p4_stop(struct op_msrs const * const msrs)
+{
+ unsigned int low, high, stag;
+ int i;
+
+ stag = get_stagger();
+
+ for (i = 0; i < num_counters; ++i) {
+ if (!reset_value[i])
+ continue;
+ rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
+ CCCR_SET_DISABLE(low);
+ wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
+ }
+}
+
+#ifdef CONFIG_SMP
+struct op_x86_model_spec op_p4_ht2_spec = {
+ .num_counters = NUM_COUNTERS_HT2,
+ .num_controls = NUM_CONTROLS_HT2,
+ .fill_in_addresses = &p4_fill_in_addresses,
+ .setup_ctrs = &p4_setup_ctrs,
+ .check_ctrs = &p4_check_ctrs,
+ .start = &p4_start,
+ .stop = &p4_stop,
+ .shutdown = &p4_shutdown
+};
+#endif
+
+struct op_x86_model_spec op_p4_spec = {
+ .num_counters = NUM_COUNTERS_NON_HT,
+ .num_controls = NUM_CONTROLS_NON_HT,
+ .fill_in_addresses = &p4_fill_in_addresses,
+ .setup_ctrs = &p4_setup_ctrs,
+ .check_ctrs = &p4_check_ctrs,
+ .start = &p4_start,
+ .stop = &p4_stop,
+ .shutdown = &p4_shutdown
+};
diff --git a/kernel/arch/x86/oprofile/op_model_ppro.c b/kernel/arch/x86/oprofile/op_model_ppro.c
new file mode 100644
index 000000000..d90528ea5
--- /dev/null
+++ b/kernel/arch/x86/oprofile/op_model_ppro.c
@@ -0,0 +1,245 @@
+/*
+ * @file op_model_ppro.h
+ * Family 6 perfmon and architectural perfmon MSR operations
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Copyright 2008 Intel Corporation
+ * @remark Read the file COPYING
+ *
+ * @author John Levon
+ * @author Philippe Elie
+ * @author Graydon Hoare
+ * @author Andi Kleen
+ * @author Robert Richter <robert.richter@amd.com>
+ */
+
+#include <linux/oprofile.h>
+#include <linux/slab.h>
+#include <asm/ptrace.h>
+#include <asm/msr.h>
+#include <asm/apic.h>
+#include <asm/nmi.h>
+
+#include "op_x86_model.h"
+#include "op_counter.h"
+
+static int num_counters = 2;
+static int counter_width = 32;
+
+#define MSR_PPRO_EVENTSEL_RESERVED ((0xFFFFFFFFULL<<32)|(1ULL<<21))
+
+static u64 reset_value[OP_MAX_COUNTER];
+
+static void ppro_shutdown(struct op_msrs const * const msrs)
+{
+ int i;
+
+ for (i = 0; i < num_counters; ++i) {
+ if (!msrs->counters[i].addr)
+ continue;
+ release_perfctr_nmi(MSR_P6_PERFCTR0 + i);
+ release_evntsel_nmi(MSR_P6_EVNTSEL0 + i);
+ }
+}
+
+static int ppro_fill_in_addresses(struct op_msrs * const msrs)
+{
+ int i;
+
+ for (i = 0; i < num_counters; i++) {
+ if (!reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i))
+ goto fail;
+ if (!reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i)) {
+ release_perfctr_nmi(MSR_P6_PERFCTR0 + i);
+ goto fail;
+ }
+ /* both registers must be reserved */
+ msrs->counters[i].addr = MSR_P6_PERFCTR0 + i;
+ msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i;
+ continue;
+ fail:
+ if (!counter_config[i].enabled)
+ continue;
+ op_x86_warn_reserved(i);
+ ppro_shutdown(msrs);
+ return -EBUSY;
+ }
+
+ return 0;
+}
+
+
+static void ppro_setup_ctrs(struct op_x86_model_spec const *model,
+ struct op_msrs const * const msrs)
+{
+ u64 val;
+ int i;
+
+ if (cpu_has_arch_perfmon) {
+ union cpuid10_eax eax;
+ eax.full = cpuid_eax(0xa);
+
+ /*
+ * For Core2 (family 6, model 15), don't reset the
+ * counter width:
+ */
+ if (!(eax.split.version_id == 0 &&
+ __this_cpu_read(cpu_info.x86) == 6 &&
+ __this_cpu_read(cpu_info.x86_model) == 15)) {
+
+ if (counter_width < eax.split.bit_width)
+ counter_width = eax.split.bit_width;
+ }
+ }
+
+ /* clear all counters */
+ for (i = 0; i < num_counters; ++i) {
+ if (!msrs->controls[i].addr)
+ continue;
+ rdmsrl(msrs->controls[i].addr, val);
+ if (val & ARCH_PERFMON_EVENTSEL_ENABLE)
+ op_x86_warn_in_use(i);
+ val &= model->reserved;
+ wrmsrl(msrs->controls[i].addr, val);
+ /*
+ * avoid a false detection of ctr overflows in NMI *
+ * handler
+ */
+ wrmsrl(msrs->counters[i].addr, -1LL);
+ }
+
+ /* enable active counters */
+ for (i = 0; i < num_counters; ++i) {
+ if (counter_config[i].enabled && msrs->counters[i].addr) {
+ reset_value[i] = counter_config[i].count;
+ wrmsrl(msrs->counters[i].addr, -reset_value[i]);
+ rdmsrl(msrs->controls[i].addr, val);
+ val &= model->reserved;
+ val |= op_x86_get_ctrl(model, &counter_config[i]);
+ wrmsrl(msrs->controls[i].addr, val);
+ } else {
+ reset_value[i] = 0;
+ }
+ }
+}
+
+
+static int ppro_check_ctrs(struct pt_regs * const regs,
+ struct op_msrs const * const msrs)
+{
+ u64 val;
+ int i;
+
+ for (i = 0; i < num_counters; ++i) {
+ if (!reset_value[i])
+ continue;
+ rdmsrl(msrs->counters[i].addr, val);
+ if (val & (1ULL << (counter_width - 1)))
+ continue;
+ oprofile_add_sample(regs, i);
+ wrmsrl(msrs->counters[i].addr, -reset_value[i]);
+ }
+
+ /* Only P6 based Pentium M need to re-unmask the apic vector but it
+ * doesn't hurt other P6 variant */
+ apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
+
+ /* We can't work out if we really handled an interrupt. We
+ * might have caught a *second* counter just after overflowing
+ * the interrupt for this counter then arrives
+ * and we don't find a counter that's overflowed, so we
+ * would return 0 and get dazed + confused. Instead we always
+ * assume we found an overflow. This sucks.
+ */
+ return 1;
+}
+
+
+static void ppro_start(struct op_msrs const * const msrs)
+{
+ u64 val;
+ int i;
+
+ for (i = 0; i < num_counters; ++i) {
+ if (reset_value[i]) {
+ rdmsrl(msrs->controls[i].addr, val);
+ val |= ARCH_PERFMON_EVENTSEL_ENABLE;
+ wrmsrl(msrs->controls[i].addr, val);
+ }
+ }
+}
+
+
+static void ppro_stop(struct op_msrs const * const msrs)
+{
+ u64 val;
+ int i;
+
+ for (i = 0; i < num_counters; ++i) {
+ if (!reset_value[i])
+ continue;
+ rdmsrl(msrs->controls[i].addr, val);
+ val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
+ wrmsrl(msrs->controls[i].addr, val);
+ }
+}
+
+struct op_x86_model_spec op_ppro_spec = {
+ .num_counters = 2,
+ .num_controls = 2,
+ .reserved = MSR_PPRO_EVENTSEL_RESERVED,
+ .fill_in_addresses = &ppro_fill_in_addresses,
+ .setup_ctrs = &ppro_setup_ctrs,
+ .check_ctrs = &ppro_check_ctrs,
+ .start = &ppro_start,
+ .stop = &ppro_stop,
+ .shutdown = &ppro_shutdown
+};
+
+/*
+ * Architectural performance monitoring.
+ *
+ * Newer Intel CPUs (Core1+) have support for architectural
+ * events described in CPUID 0xA. See the IA32 SDM Vol3b.18 for details.
+ * The advantage of this is that it can be done without knowing about
+ * the specific CPU.
+ */
+
+static void arch_perfmon_setup_counters(void)
+{
+ union cpuid10_eax eax;
+
+ eax.full = cpuid_eax(0xa);
+
+ /* Workaround for BIOS bugs in 6/15. Taken from perfmon2 */
+ if (eax.split.version_id == 0 && __this_cpu_read(cpu_info.x86) == 6 &&
+ __this_cpu_read(cpu_info.x86_model) == 15) {
+ eax.split.version_id = 2;
+ eax.split.num_counters = 2;
+ eax.split.bit_width = 40;
+ }
+
+ num_counters = min((int)eax.split.num_counters, OP_MAX_COUNTER);
+
+ op_arch_perfmon_spec.num_counters = num_counters;
+ op_arch_perfmon_spec.num_controls = num_counters;
+}
+
+static int arch_perfmon_init(struct oprofile_operations *ignore)
+{
+ arch_perfmon_setup_counters();
+ return 0;
+}
+
+struct op_x86_model_spec op_arch_perfmon_spec = {
+ .reserved = MSR_PPRO_EVENTSEL_RESERVED,
+ .init = &arch_perfmon_init,
+ /* num_counters/num_controls filled in at runtime */
+ .fill_in_addresses = &ppro_fill_in_addresses,
+ /* user space does the cpuid check for available events */
+ .setup_ctrs = &ppro_setup_ctrs,
+ .check_ctrs = &ppro_check_ctrs,
+ .start = &ppro_start,
+ .stop = &ppro_stop,
+ .shutdown = &ppro_shutdown
+};
diff --git a/kernel/arch/x86/oprofile/op_x86_model.h b/kernel/arch/x86/oprofile/op_x86_model.h
new file mode 100644
index 000000000..71e8a6733
--- /dev/null
+++ b/kernel/arch/x86/oprofile/op_x86_model.h
@@ -0,0 +1,90 @@
+/**
+ * @file op_x86_model.h
+ * interface to x86 model-specific MSR operations
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author Graydon Hoare
+ * @author Robert Richter <robert.richter@amd.com>
+ */
+
+#ifndef OP_X86_MODEL_H
+#define OP_X86_MODEL_H
+
+#include <asm/types.h>
+#include <asm/perf_event.h>
+
+struct op_msr {
+ unsigned long addr;
+ u64 saved;
+};
+
+struct op_msrs {
+ struct op_msr *counters;
+ struct op_msr *controls;
+ struct op_msr *multiplex;
+};
+
+struct pt_regs;
+
+struct oprofile_operations;
+
+/* The model vtable abstracts the differences between
+ * various x86 CPU models' perfctr support.
+ */
+struct op_x86_model_spec {
+ unsigned int num_counters;
+ unsigned int num_controls;
+ unsigned int num_virt_counters;
+ u64 reserved;
+ u16 event_mask;
+ int (*init)(struct oprofile_operations *ops);
+ int (*fill_in_addresses)(struct op_msrs * const msrs);
+ void (*setup_ctrs)(struct op_x86_model_spec const *model,
+ struct op_msrs const * const msrs);
+ int (*check_ctrs)(struct pt_regs * const regs,
+ struct op_msrs const * const msrs);
+ void (*start)(struct op_msrs const * const msrs);
+ void (*stop)(struct op_msrs const * const msrs);
+ void (*shutdown)(struct op_msrs const * const msrs);
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+ void (*switch_ctrl)(struct op_x86_model_spec const *model,
+ struct op_msrs const * const msrs);
+#endif
+};
+
+struct op_counter_config;
+
+static inline void op_x86_warn_in_use(int counter)
+{
+ /*
+ * The warning indicates an already running counter. If
+ * oprofile doesn't collect data, then try using a different
+ * performance counter on your platform to monitor the desired
+ * event. Delete counter #%d from the desired event by editing
+ * the /usr/share/oprofile/%s/<cpu>/events file. If the event
+ * cannot be monitored by any other counter, contact your
+ * hardware or BIOS vendor.
+ */
+ pr_warning("oprofile: counter #%d on cpu #%d may already be used\n",
+ counter, smp_processor_id());
+}
+
+static inline void op_x86_warn_reserved(int counter)
+{
+ pr_warning("oprofile: counter #%d is already reserved\n", counter);
+}
+
+extern u64 op_x86_get_ctrl(struct op_x86_model_spec const *model,
+ struct op_counter_config *counter_config);
+extern int op_x86_phys_to_virt(int phys);
+extern int op_x86_virt_to_phys(int virt);
+
+extern struct op_x86_model_spec op_ppro_spec;
+extern struct op_x86_model_spec op_p4_spec;
+extern struct op_x86_model_spec op_p4_ht2_spec;
+extern struct op_x86_model_spec op_amd_spec;
+extern struct op_x86_model_spec op_arch_perfmon_spec;
+
+#endif /* OP_X86_MODEL_H */