/*
 * S390 Version
 *   Copyright IBM Corp. 2002, 2011
 *   Author(s): Thomas Spatzier (tspat@de.ibm.com)
 *   Author(s): Mahesh Salgaonkar (mahesh@linux.vnet.ibm.com)
 *   Author(s): Heinz Graalfs (graalfs@linux.vnet.ibm.com)
 *   Author(s): Andreas Krebbel (krebbel@linux.vnet.ibm.com)
 *
 * @remark Copyright 2002-2011 OProfile authors
 */

#include <linux/oprofile.h>
#include <linux/perf_event.h>
#include <linux/init.h>
#include <linux/errno.h>
#include <linux/fs.h>
#include <linux/module.h>
#include <asm/processor.h>
#include <asm/perf_event.h>

#include "../../../drivers/oprofile/oprof.h"

extern void s390_backtrace(struct pt_regs * const regs, unsigned int depth);

#include "hwsampler.h"
#include "op_counter.h"

#define DEFAULT_INTERVAL	4127518

#define DEFAULT_SDBT_BLOCKS	1
#define DEFAULT_SDB_BLOCKS	511

static unsigned long oprofile_hw_interval = DEFAULT_INTERVAL;
static unsigned long oprofile_min_interval;
static unsigned long oprofile_max_interval;

static unsigned long oprofile_sdbt_blocks = DEFAULT_SDBT_BLOCKS;
static unsigned long oprofile_sdb_blocks = DEFAULT_SDB_BLOCKS;

static int hwsampler_enabled;
static int hwsampler_running;	/* start_mutex must be held to change */
static int hwsampler_available;

static struct oprofile_operations timer_ops;

struct op_counter_config counter_config;

enum __force_cpu_type {
	reserved = 0,		/* do not force */
	timer,
};
static int force_cpu_type;

static int set_cpu_type(const char *str, struct kernel_param *kp)
{
	if (!strcmp(str, "timer")) {
		force_cpu_type = timer;
		printk(KERN_INFO "oprofile: forcing timer to be returned "
		                 "as cpu type\n");
	} else {
		force_cpu_type = 0;
	}

	return 0;
}
module_param_call(cpu_type, set_cpu_type, NULL, NULL, 0);
MODULE_PARM_DESC(cpu_type, "Force legacy basic mode sampling"
		           "(report cpu_type \"timer\"");

static int __oprofile_hwsampler_start(void)
{
	int retval;

	retval = hwsampler_allocate(oprofile_sdbt_blocks, oprofile_sdb_blocks);
	if (retval)
		return retval;

	retval = hwsampler_start_all(oprofile_hw_interval);
	if (retval)
		hwsampler_deallocate();

	return retval;
}

static int oprofile_hwsampler_start(void)
{
	int retval;

	hwsampler_running = hwsampler_enabled;

	if (!hwsampler_running)
		return timer_ops.start();

	retval = perf_reserve_sampling();
	if (retval)
		return retval;

	retval = __oprofile_hwsampler_start();
	if (retval)
		perf_release_sampling();

	return retval;
}

static void oprofile_hwsampler_stop(void)
{
	if (!hwsampler_running) {
		timer_ops.stop();
		return;
	}

	hwsampler_stop_all();
	hwsampler_deallocate();
	perf_release_sampling();
	return;
}

/*
 * File ops used for:
 * /dev/oprofile/0/enabled
 * /dev/oprofile/hwsampling/hwsampler  (cpu_type = timer)
 */

static ssize_t hwsampler_read(struct file *file, char __user *buf,
		size_t count, loff_t *offset)
{
	return oprofilefs_ulong_to_user(hwsampler_enabled, buf, count, offset);
}

static ssize_t hwsampler_write(struct file *file, char const __user *buf,
		size_t count, loff_t *offset)
{
	unsigned long val;
	int retval;

	if (*offset)
		return -EINVAL;

	retval = oprofilefs_ulong_from_user(&val, buf, count);
	if (retval <= 0)
		return retval;

	if (val != 0 && val != 1)
		return -EINVAL;

	if (oprofile_started)
		/*
		 * save to do without locking as we set
		 * hwsampler_running in start() when start_mutex is
		 * held
		 */
		return -EBUSY;

	hwsampler_enabled = val;

	return count;
}

static const struct file_operations hwsampler_fops = {
	.read		= hwsampler_read,
	.write		= hwsampler_write,
};

/*
 * File ops used for:
 * /dev/oprofile/0/count
 * /dev/oprofile/hwsampling/hw_interval  (cpu_type = timer)
 *
 * Make sure that the value is within the hardware range.
 */

static ssize_t hw_interval_read(struct file *file, char __user *buf,
				size_t count, loff_t *offset)
{
	return oprofilefs_ulong_to_user(oprofile_hw_interval, buf,
					count, offset);
}

static ssize_t hw_interval_write(struct file *file, char const __user *buf,
				 size_t count, loff_t *offset)
{
	unsigned long val;
	int retval;

	if (*offset)
		return -EINVAL;
	retval = oprofilefs_ulong_from_user(&val, buf, count);
	if (retval <= 0)
		return retval;
	if (val < oprofile_min_interval)
		oprofile_hw_interval = oprofile_min_interval;
	else if (val > oprofile_max_interval)
		oprofile_hw_interval = oprofile_max_interval;
	else
		oprofile_hw_interval = val;

	return count;
}

static const struct file_operations hw_interval_fops = {
	.read		= hw_interval_read,
	.write		= hw_interval_write,
};

/*
 * File ops used for:
 * /dev/oprofile/0/event
 * Only a single event with number 0 is supported with this counter.
 *
 * /dev/oprofile/0/unit_mask
 * This is a dummy file needed by the user space tools.
 * No value other than 0 is accepted or returned.
 */

static ssize_t hwsampler_zero_read(struct file *file, char __user *buf,
				    size_t count, loff_t *offset)
{
	return oprofilefs_ulong_to_user(0, buf, count, offset);
}

static ssize_t hwsampler_zero_write(struct file *file, char const __user *buf,
				     size_t count, loff_t *offset)
{
	unsigned long val;
	int retval;

	if (*offset)
		return -EINVAL;

	retval = oprofilefs_ulong_from_user(&val, buf, count);
	if (retval <= 0)
		return retval;
	if (val != 0)
		return -EINVAL;
	return count;
}

static const struct file_operations zero_fops = {
	.read		= hwsampler_zero_read,
	.write		= hwsampler_zero_write,
};

/* /dev/oprofile/0/kernel file ops.  */

static ssize_t hwsampler_kernel_read(struct file *file, char __user *buf,
				     size_t count, loff_t *offset)
{
	return oprofilefs_ulong_to_user(counter_config.kernel,
					buf, count, offset);
}

static ssize_t hwsampler_kernel_write(struct file *file, char const __user *buf,
				      size_t count, loff_t *offset)
{
	unsigned long val;
	int retval;

	if (*offset)
		return -EINVAL;

	retval = oprofilefs_ulong_from_user(&val, buf, count);
	if (retval <= 0)
		return retval;

	if (val != 0 && val != 1)
		return -EINVAL;

	counter_config.kernel = val;

	return count;
}

static const struct file_operations kernel_fops = {
	.read		= hwsampler_kernel_read,
	.write		= hwsampler_kernel_write,
};

/* /dev/oprofile/0/user file ops. */

static ssize_t hwsampler_user_read(struct file *file, char __user *buf,
				   size_t count, loff_t *offset)
{
	return oprofilefs_ulong_to_user(counter_config.user,
					buf, count, offset);
}

static ssize_t hwsampler_user_write(struct file *file, char const __user *buf,
				    size_t count, loff_t *offset)
{
	unsigned long val;
	int retval;

	if (*offset)
		return -EINVAL;

	retval = oprofilefs_ulong_from_user(&val, buf, count);
	if (retval <= 0)
		return retval;

	if (val != 0 && val != 1)
		return -EINVAL;

	counter_config.user = val;

	return count;
}

static const struct file_operations user_fops = {
	.read		= hwsampler_user_read,
	.write		= hwsampler_user_write,
};


/*
 * File ops used for: /dev/oprofile/timer/enabled
 * The value always has to be the inverted value of hwsampler_enabled. So
 * no separate variable is created. That way we do not need locking.
 */

static ssize_t timer_enabled_read(struct file *file, char __user *buf,
				  size_t count, loff_t *offset)
{
	return oprofilefs_ulong_to_user(!hwsampler_enabled, buf, count, offset);
}

static ssize_t timer_enabled_write(struct file *file, char const __user *buf,
				   size_t count, loff_t *offset)
{
	unsigned long val;
	int retval;

	if (*offset)
		return -EINVAL;

	retval = oprofilefs_ulong_from_user(&val, buf, count);
	if (retval <= 0)
		return retval;

	if (val != 0 && val != 1)
		return -EINVAL;

	/* Timer cannot be disabled without having hardware sampling.  */
	if (val == 0 && !hwsampler_available)
		return -EINVAL;

	if (oprofile_started)
		/*
		 * save to do without locking as we set
		 * hwsampler_running in start() when start_mutex is
		 * held
		 */
		return -EBUSY;

	hwsampler_enabled = !val;

	return count;
}

static const struct file_operations timer_enabled_fops = {
	.read		= timer_enabled_read,
	.write		= timer_enabled_write,
};


static int oprofile_create_hwsampling_files(struct dentry *root)
{
	struct dentry *dir;

	dir = oprofilefs_mkdir(root, "timer");
	if (!dir)
		return -EINVAL;

	oprofilefs_create_file(dir, "enabled", &timer_enabled_fops);

	if (!hwsampler_available)
		return 0;

	/* reinitialize default values */
	hwsampler_enabled = 1;
	counter_config.kernel = 1;
	counter_config.user = 1;

	if (!force_cpu_type) {
		/*
		 * Create the counter file system.  A single virtual
		 * counter is created which can be used to
		 * enable/disable hardware sampling dynamically from
		 * user space.  The user space will configure a single
		 * counter with a single event.  The value of 'event'
		 * and 'unit_mask' are not evaluated by the kernel code
		 * and can only be set to 0.
		 */

		dir = oprofilefs_mkdir(root, "0");
		if (!dir)
			return -EINVAL;

		oprofilefs_create_file(dir, "enabled", &hwsampler_fops);
		oprofilefs_create_file(dir, "event", &zero_fops);
		oprofilefs_create_file(dir, "count", &hw_interval_fops);
		oprofilefs_create_file(dir, "unit_mask", &zero_fops);
		oprofilefs_create_file(dir, "kernel", &kernel_fops);
		oprofilefs_create_file(dir, "user", &user_fops);
		oprofilefs_create_ulong(dir, "hw_sdbt_blocks",
					&oprofile_sdbt_blocks);

	} else {
		/*
		 * Hardware sampling can be used but the cpu_type is
		 * forced to timer in order to deal with legacy user
		 * space tools.  The /dev/oprofile/hwsampling fs is
		 * provided in that case.
		 */
		dir = oprofilefs_mkdir(root, "hwsampling");
		if (!dir)
			return -EINVAL;

		oprofilefs_create_file(dir, "hwsampler",
				       &hwsampler_fops);
		oprofilefs_create_file(dir, "hw_interval",
				       &hw_interval_fops);
		oprofilefs_create_ro_ulong(dir, "hw_min_interval",
					   &oprofile_min_interval);
		oprofilefs_create_ro_ulong(dir, "hw_max_interval",
					   &oprofile_max_interval);
		oprofilefs_create_ulong(dir, "hw_sdbt_blocks",
					&oprofile_sdbt_blocks);
	}
	return 0;
}

static int oprofile_hwsampler_init(struct oprofile_operations *ops)
{
	/*
	 * Initialize the timer mode infrastructure as well in order
	 * to be able to switch back dynamically.  oprofile_timer_init
	 * is not supposed to fail.
	 */
	if (oprofile_timer_init(ops))
		BUG();

	memcpy(&timer_ops, ops, sizeof(timer_ops));
	ops->create_files = oprofile_create_hwsampling_files;

	/*
	 * If the user space tools do not support newer cpu types,
	 * the force_cpu_type module parameter
	 * can be used to always return \"timer\" as cpu type.
	 */
	if (force_cpu_type != timer) {
		struct cpuid id;

		get_cpu_id (&id);

		switch (id.machine) {
		case 0x2097: case 0x2098: ops->cpu_type = "s390/z10"; break;
		case 0x2817: case 0x2818: ops->cpu_type = "s390/z196"; break;
		case 0x2827: case 0x2828: ops->cpu_type = "s390/zEC12"; break;
		default: return -ENODEV;
		}
	}

	if (hwsampler_setup())
		return -ENODEV;

	/*
	 * Query the range for the sampling interval from the
	 * hardware.
	 */
	oprofile_min_interval = hwsampler_query_min_interval();
	if (oprofile_min_interval == 0)
		return -ENODEV;
	oprofile_max_interval = hwsampler_query_max_interval();
	if (oprofile_max_interval == 0)
		return -ENODEV;

	/* The initial value should be sane */
	if (oprofile_hw_interval < oprofile_min_interval)
		oprofile_hw_interval = oprofile_min_interval;
	if (oprofile_hw_interval > oprofile_max_interval)
		oprofile_hw_interval = oprofile_max_interval;

	printk(KERN_INFO "oprofile: System z hardware sampling "
	       "facility found.\n");

	ops->start = oprofile_hwsampler_start;
	ops->stop = oprofile_hwsampler_stop;

	return 0;
}

static void oprofile_hwsampler_exit(void)
{
	hwsampler_shutdown();
}

int __init oprofile_arch_init(struct oprofile_operations *ops)
{
	ops->backtrace = s390_backtrace;

	/*
	 * -ENODEV is not reported to the caller.  The module itself
         * will use the timer mode sampling as fallback and this is
         * always available.
	 */
	hwsampler_available = oprofile_hwsampler_init(ops) == 0;

	return 0;
}

void oprofile_arch_exit(void)
{
	oprofile_hwsampler_exit();
}