diff options
Diffstat (limited to 'kernel/tools/power/cpupower/utils')
30 files changed, 5601 insertions, 0 deletions
diff --git a/kernel/tools/power/cpupower/utils/builtin.h b/kernel/tools/power/cpupower/utils/builtin.h new file mode 100644 index 000000000..2284c8ea4 --- /dev/null +++ b/kernel/tools/power/cpupower/utils/builtin.h @@ -0,0 +1,12 @@ +#ifndef BUILTIN_H +#define BUILTIN_H + +extern int cmd_set(int argc, const char **argv); +extern int cmd_info(int argc, const char **argv); +extern int cmd_freq_set(int argc, const char **argv); +extern int cmd_freq_info(int argc, const char **argv); +extern int cmd_idle_set(int argc, const char **argv); +extern int cmd_idle_info(int argc, const char **argv); +extern int cmd_monitor(int argc, const char **argv); + +#endif diff --git a/kernel/tools/power/cpupower/utils/cpufreq-info.c b/kernel/tools/power/cpupower/utils/cpufreq-info.c new file mode 100644 index 000000000..b4b90a976 --- /dev/null +++ b/kernel/tools/power/cpupower/utils/cpufreq-info.c @@ -0,0 +1,698 @@ +/* + * (C) 2004-2009 Dominik Brodowski <linux@dominikbrodowski.de> + * + * Licensed under the terms of the GNU GPL License version 2. + */ + + +#include <unistd.h> +#include <stdio.h> +#include <errno.h> +#include <stdlib.h> +#include <string.h> + +#include <getopt.h> + +#include "cpufreq.h" +#include "helpers/helpers.h" +#include "helpers/bitmask.h" + +#define LINE_LEN 10 + +static unsigned int count_cpus(void) +{ + FILE *fp; + char value[LINE_LEN]; + unsigned int ret = 0; + unsigned int cpunr = 0; + + fp = fopen("/proc/stat", "r"); + if (!fp) { + printf(_("Couldn't count the number of CPUs (%s: %s), assuming 1\n"), "/proc/stat", strerror(errno)); + return 1; + } + + while (!feof(fp)) { + if (!fgets(value, LINE_LEN, fp)) + continue; + value[LINE_LEN - 1] = '\0'; + if (strlen(value) < (LINE_LEN - 2)) + continue; + if (strstr(value, "cpu ")) + continue; + if (sscanf(value, "cpu%d ", &cpunr) != 1) + continue; + if (cpunr > ret) + ret = cpunr; + } + fclose(fp); + + /* cpu count starts from 0, on error return 1 (UP) */ + return ret + 1; +} + + +static void proc_cpufreq_output(void) +{ + unsigned int cpu, nr_cpus; + struct cpufreq_policy *policy; + unsigned int min_pctg = 0; + unsigned int max_pctg = 0; + unsigned long min, max; + + printf(_(" minimum CPU frequency - maximum CPU frequency - governor\n")); + + nr_cpus = count_cpus(); + for (cpu = 0; cpu < nr_cpus; cpu++) { + policy = cpufreq_get_policy(cpu); + if (!policy) + continue; + + if (cpufreq_get_hardware_limits(cpu, &min, &max)) { + max = 0; + } else { + min_pctg = (policy->min * 100) / max; + max_pctg = (policy->max * 100) / max; + } + printf("CPU%3d %9lu kHz (%3d %%) - %9lu kHz (%3d %%) - %s\n", + cpu , policy->min, max ? min_pctg : 0, policy->max, + max ? max_pctg : 0, policy->governor); + + cpufreq_put_policy(policy); + } +} + +static int no_rounding; +static void print_speed(unsigned long speed) +{ + unsigned long tmp; + + if (no_rounding) { + if (speed > 1000000) + printf("%u.%06u GHz", ((unsigned int) speed/1000000), + ((unsigned int) speed%1000000)); + else if (speed > 100000) + printf("%u MHz", (unsigned int) speed); + else if (speed > 1000) + printf("%u.%03u MHz", ((unsigned int) speed/1000), + (unsigned int) (speed%1000)); + else + printf("%lu kHz", speed); + } else { + if (speed > 1000000) { + tmp = speed%10000; + if (tmp >= 5000) + speed += 10000; + printf("%u.%02u GHz", ((unsigned int) speed/1000000), + ((unsigned int) (speed%1000000)/10000)); + } else if (speed > 100000) { + tmp = speed%1000; + if (tmp >= 500) + speed += 1000; + printf("%u MHz", ((unsigned int) speed/1000)); + } else if (speed > 1000) { + tmp = speed%100; + if (tmp >= 50) + speed += 100; + printf("%u.%01u MHz", ((unsigned int) speed/1000), + ((unsigned int) (speed%1000)/100)); + } + } + + return; +} + +static void print_duration(unsigned long duration) +{ + unsigned long tmp; + + if (no_rounding) { + if (duration > 1000000) + printf("%u.%06u ms", ((unsigned int) duration/1000000), + ((unsigned int) duration%1000000)); + else if (duration > 100000) + printf("%u us", ((unsigned int) duration/1000)); + else if (duration > 1000) + printf("%u.%03u us", ((unsigned int) duration/1000), + ((unsigned int) duration%1000)); + else + printf("%lu ns", duration); + } else { + if (duration > 1000000) { + tmp = duration%10000; + if (tmp >= 5000) + duration += 10000; + printf("%u.%02u ms", ((unsigned int) duration/1000000), + ((unsigned int) (duration%1000000)/10000)); + } else if (duration > 100000) { + tmp = duration%1000; + if (tmp >= 500) + duration += 1000; + printf("%u us", ((unsigned int) duration / 1000)); + } else if (duration > 1000) { + tmp = duration%100; + if (tmp >= 50) + duration += 100; + printf("%u.%01u us", ((unsigned int) duration/1000), + ((unsigned int) (duration%1000)/100)); + } else + printf("%lu ns", duration); + } + return; +} + +/* --boost / -b */ + +static int get_boost_mode(unsigned int cpu) +{ + int support, active, b_states = 0, ret, pstate_no, i; + /* ToDo: Make this more global */ + unsigned long pstates[MAX_HW_PSTATES] = {0,}; + + if (cpupower_cpu_info.vendor != X86_VENDOR_AMD && + cpupower_cpu_info.vendor != X86_VENDOR_INTEL) + return 0; + + ret = cpufreq_has_boost_support(cpu, &support, &active, &b_states); + if (ret) { + printf(_("Error while evaluating Boost Capabilities" + " on CPU %d -- are you root?\n"), cpu); + return ret; + } + /* P state changes via MSR are identified via cpuid 80000007 + on Intel and AMD, but we assume boost capable machines can do that + if (cpuid_eax(0x80000000) >= 0x80000007 + && (cpuid_edx(0x80000007) & (1 << 7))) + */ + + printf(_(" boost state support:\n")); + + printf(_(" Supported: %s\n"), support ? _("yes") : _("no")); + printf(_(" Active: %s\n"), active ? _("yes") : _("no")); + + if (cpupower_cpu_info.vendor == X86_VENDOR_AMD && + cpupower_cpu_info.family >= 0x10) { + ret = decode_pstates(cpu, cpupower_cpu_info.family, b_states, + pstates, &pstate_no); + if (ret) + return ret; + + printf(_(" Boost States: %d\n"), b_states); + printf(_(" Total States: %d\n"), pstate_no); + for (i = 0; i < pstate_no; i++) { + if (i < b_states) + printf(_(" Pstate-Pb%d: %luMHz (boost state)" + "\n"), i, pstates[i]); + else + printf(_(" Pstate-P%d: %luMHz\n"), + i - b_states, pstates[i]); + } + } else if (cpupower_cpu_info.caps & CPUPOWER_CAP_HAS_TURBO_RATIO) { + double bclk; + unsigned long long intel_turbo_ratio = 0; + unsigned int ratio; + + /* Any way to autodetect this ? */ + if (cpupower_cpu_info.caps & CPUPOWER_CAP_IS_SNB) + bclk = 100.00; + else + bclk = 133.33; + intel_turbo_ratio = msr_intel_get_turbo_ratio(cpu); + dprint (" Ratio: 0x%llx - bclk: %f\n", + intel_turbo_ratio, bclk); + + ratio = (intel_turbo_ratio >> 24) & 0xFF; + if (ratio) + printf(_(" %.0f MHz max turbo 4 active cores\n"), + ratio * bclk); + + ratio = (intel_turbo_ratio >> 16) & 0xFF; + if (ratio) + printf(_(" %.0f MHz max turbo 3 active cores\n"), + ratio * bclk); + + ratio = (intel_turbo_ratio >> 8) & 0xFF; + if (ratio) + printf(_(" %.0f MHz max turbo 2 active cores\n"), + ratio * bclk); + + ratio = (intel_turbo_ratio >> 0) & 0xFF; + if (ratio) + printf(_(" %.0f MHz max turbo 1 active cores\n"), + ratio * bclk); + } + return 0; +} + +static void debug_output_one(unsigned int cpu) +{ + char *driver; + struct cpufreq_affected_cpus *cpus; + struct cpufreq_available_frequencies *freqs; + unsigned long min, max, freq_kernel, freq_hardware; + unsigned long total_trans, latency; + unsigned long long total_time; + struct cpufreq_policy *policy; + struct cpufreq_available_governors *governors; + struct cpufreq_stats *stats; + + if (cpufreq_cpu_exists(cpu)) + return; + + freq_kernel = cpufreq_get_freq_kernel(cpu); + freq_hardware = cpufreq_get_freq_hardware(cpu); + + driver = cpufreq_get_driver(cpu); + if (!driver) { + printf(_(" no or unknown cpufreq driver is active on this CPU\n")); + } else { + printf(_(" driver: %s\n"), driver); + cpufreq_put_driver(driver); + } + + cpus = cpufreq_get_related_cpus(cpu); + if (cpus) { + printf(_(" CPUs which run at the same hardware frequency: ")); + while (cpus->next) { + printf("%d ", cpus->cpu); + cpus = cpus->next; + } + printf("%d\n", cpus->cpu); + cpufreq_put_related_cpus(cpus); + } + + cpus = cpufreq_get_affected_cpus(cpu); + if (cpus) { + printf(_(" CPUs which need to have their frequency coordinated by software: ")); + while (cpus->next) { + printf("%d ", cpus->cpu); + cpus = cpus->next; + } + printf("%d\n", cpus->cpu); + cpufreq_put_affected_cpus(cpus); + } + + latency = cpufreq_get_transition_latency(cpu); + if (latency) { + printf(_(" maximum transition latency: ")); + print_duration(latency); + printf(".\n"); + } + + if (!(cpufreq_get_hardware_limits(cpu, &min, &max))) { + printf(_(" hardware limits: ")); + print_speed(min); + printf(" - "); + print_speed(max); + printf("\n"); + } + + freqs = cpufreq_get_available_frequencies(cpu); + if (freqs) { + printf(_(" available frequency steps: ")); + while (freqs->next) { + print_speed(freqs->frequency); + printf(", "); + freqs = freqs->next; + } + print_speed(freqs->frequency); + printf("\n"); + cpufreq_put_available_frequencies(freqs); + } + + governors = cpufreq_get_available_governors(cpu); + if (governors) { + printf(_(" available cpufreq governors: ")); + while (governors->next) { + printf("%s, ", governors->governor); + governors = governors->next; + } + printf("%s\n", governors->governor); + cpufreq_put_available_governors(governors); + } + + policy = cpufreq_get_policy(cpu); + if (policy) { + printf(_(" current policy: frequency should be within ")); + print_speed(policy->min); + printf(_(" and ")); + print_speed(policy->max); + + printf(".\n "); + printf(_("The governor \"%s\" may" + " decide which speed to use\n within this range.\n"), + policy->governor); + cpufreq_put_policy(policy); + } + + if (freq_kernel || freq_hardware) { + printf(_(" current CPU frequency is ")); + if (freq_hardware) { + print_speed(freq_hardware); + printf(_(" (asserted by call to hardware)")); + } else + print_speed(freq_kernel); + printf(".\n"); + } + stats = cpufreq_get_stats(cpu, &total_time); + if (stats) { + printf(_(" cpufreq stats: ")); + while (stats) { + print_speed(stats->frequency); + printf(":%.2f%%", (100.0 * stats->time_in_state) / total_time); + stats = stats->next; + if (stats) + printf(", "); + } + cpufreq_put_stats(stats); + total_trans = cpufreq_get_transitions(cpu); + if (total_trans) + printf(" (%lu)\n", total_trans); + else + printf("\n"); + } + get_boost_mode(cpu); + +} + +/* --freq / -f */ + +static int get_freq_kernel(unsigned int cpu, unsigned int human) +{ + unsigned long freq = cpufreq_get_freq_kernel(cpu); + if (!freq) + return -EINVAL; + if (human) { + print_speed(freq); + printf("\n"); + } else + printf("%lu\n", freq); + return 0; +} + + +/* --hwfreq / -w */ + +static int get_freq_hardware(unsigned int cpu, unsigned int human) +{ + unsigned long freq = cpufreq_get_freq_hardware(cpu); + if (!freq) + return -EINVAL; + if (human) { + print_speed(freq); + printf("\n"); + } else + printf("%lu\n", freq); + return 0; +} + +/* --hwlimits / -l */ + +static int get_hardware_limits(unsigned int cpu) +{ + unsigned long min, max; + if (cpufreq_get_hardware_limits(cpu, &min, &max)) + return -EINVAL; + printf("%lu %lu\n", min, max); + return 0; +} + +/* --driver / -d */ + +static int get_driver(unsigned int cpu) +{ + char *driver = cpufreq_get_driver(cpu); + if (!driver) + return -EINVAL; + printf("%s\n", driver); + cpufreq_put_driver(driver); + return 0; +} + +/* --policy / -p */ + +static int get_policy(unsigned int cpu) +{ + struct cpufreq_policy *policy = cpufreq_get_policy(cpu); + if (!policy) + return -EINVAL; + printf("%lu %lu %s\n", policy->min, policy->max, policy->governor); + cpufreq_put_policy(policy); + return 0; +} + +/* --governors / -g */ + +static int get_available_governors(unsigned int cpu) +{ + struct cpufreq_available_governors *governors = + cpufreq_get_available_governors(cpu); + if (!governors) + return -EINVAL; + + while (governors->next) { + printf("%s ", governors->governor); + governors = governors->next; + } + printf("%s\n", governors->governor); + cpufreq_put_available_governors(governors); + return 0; +} + + +/* --affected-cpus / -a */ + +static int get_affected_cpus(unsigned int cpu) +{ + struct cpufreq_affected_cpus *cpus = cpufreq_get_affected_cpus(cpu); + if (!cpus) + return -EINVAL; + + while (cpus->next) { + printf("%d ", cpus->cpu); + cpus = cpus->next; + } + printf("%d\n", cpus->cpu); + cpufreq_put_affected_cpus(cpus); + return 0; +} + +/* --related-cpus / -r */ + +static int get_related_cpus(unsigned int cpu) +{ + struct cpufreq_affected_cpus *cpus = cpufreq_get_related_cpus(cpu); + if (!cpus) + return -EINVAL; + + while (cpus->next) { + printf("%d ", cpus->cpu); + cpus = cpus->next; + } + printf("%d\n", cpus->cpu); + cpufreq_put_related_cpus(cpus); + return 0; +} + +/* --stats / -s */ + +static int get_freq_stats(unsigned int cpu, unsigned int human) +{ + unsigned long total_trans = cpufreq_get_transitions(cpu); + unsigned long long total_time; + struct cpufreq_stats *stats = cpufreq_get_stats(cpu, &total_time); + while (stats) { + if (human) { + print_speed(stats->frequency); + printf(":%.2f%%", + (100.0 * stats->time_in_state) / total_time); + } else + printf("%lu:%llu", + stats->frequency, stats->time_in_state); + stats = stats->next; + if (stats) + printf(", "); + } + cpufreq_put_stats(stats); + if (total_trans) + printf(" (%lu)\n", total_trans); + return 0; +} + +/* --latency / -y */ + +static int get_latency(unsigned int cpu, unsigned int human) +{ + unsigned long latency = cpufreq_get_transition_latency(cpu); + if (!latency) + return -EINVAL; + + if (human) { + print_duration(latency); + printf("\n"); + } else + printf("%lu\n", latency); + return 0; +} + +static struct option info_opts[] = { + { .name = "debug", .has_arg = no_argument, .flag = NULL, .val = 'e'}, + { .name = "boost", .has_arg = no_argument, .flag = NULL, .val = 'b'}, + { .name = "freq", .has_arg = no_argument, .flag = NULL, .val = 'f'}, + { .name = "hwfreq", .has_arg = no_argument, .flag = NULL, .val = 'w'}, + { .name = "hwlimits", .has_arg = no_argument, .flag = NULL, .val = 'l'}, + { .name = "driver", .has_arg = no_argument, .flag = NULL, .val = 'd'}, + { .name = "policy", .has_arg = no_argument, .flag = NULL, .val = 'p'}, + { .name = "governors", .has_arg = no_argument, .flag = NULL, .val = 'g'}, + { .name = "related-cpus", .has_arg = no_argument, .flag = NULL, .val = 'r'}, + { .name = "affected-cpus",.has_arg = no_argument, .flag = NULL, .val = 'a'}, + { .name = "stats", .has_arg = no_argument, .flag = NULL, .val = 's'}, + { .name = "latency", .has_arg = no_argument, .flag = NULL, .val = 'y'}, + { .name = "proc", .has_arg = no_argument, .flag = NULL, .val = 'o'}, + { .name = "human", .has_arg = no_argument, .flag = NULL, .val = 'm'}, + { .name = "no-rounding", .has_arg = no_argument, .flag = NULL, .val = 'n'}, + { }, +}; + +int cmd_freq_info(int argc, char **argv) +{ + extern char *optarg; + extern int optind, opterr, optopt; + int ret = 0, cont = 1; + unsigned int cpu = 0; + unsigned int human = 0; + int output_param = 0; + + do { + ret = getopt_long(argc, argv, "oefwldpgrasmybn", info_opts, + NULL); + switch (ret) { + case '?': + output_param = '?'; + cont = 0; + break; + case -1: + cont = 0; + break; + case 'b': + case 'o': + case 'a': + case 'r': + case 'g': + case 'p': + case 'd': + case 'l': + case 'w': + case 'f': + case 'e': + case 's': + case 'y': + if (output_param) { + output_param = -1; + cont = 0; + break; + } + output_param = ret; + break; + case 'm': + if (human) { + output_param = -1; + cont = 0; + break; + } + human = 1; + break; + case 'n': + no_rounding = 1; + break; + default: + fprintf(stderr, "invalid or unknown argument\n"); + return EXIT_FAILURE; + } + } while (cont); + + switch (output_param) { + case 'o': + if (!bitmask_isallclear(cpus_chosen)) { + printf(_("The argument passed to this tool can't be " + "combined with passing a --cpu argument\n")); + return -EINVAL; + } + break; + case 0: + output_param = 'e'; + } + + ret = 0; + + /* Default is: show output of CPU 0 only */ + if (bitmask_isallclear(cpus_chosen)) + bitmask_setbit(cpus_chosen, 0); + + switch (output_param) { + case -1: + printf(_("You can't specify more than one --cpu parameter and/or\n" + "more than one output-specific argument\n")); + return -EINVAL; + case '?': + printf(_("invalid or unknown argument\n")); + return -EINVAL; + case 'o': + proc_cpufreq_output(); + return EXIT_SUCCESS; + } + + for (cpu = bitmask_first(cpus_chosen); + cpu <= bitmask_last(cpus_chosen); cpu++) { + + if (!bitmask_isbitset(cpus_chosen, cpu)) + continue; + if (cpufreq_cpu_exists(cpu)) { + printf(_("couldn't analyze CPU %d as it doesn't seem to be present\n"), cpu); + continue; + } + printf(_("analyzing CPU %d:\n"), cpu); + + switch (output_param) { + case 'b': + get_boost_mode(cpu); + break; + case 'e': + debug_output_one(cpu); + break; + case 'a': + ret = get_affected_cpus(cpu); + break; + case 'r': + ret = get_related_cpus(cpu); + break; + case 'g': + ret = get_available_governors(cpu); + break; + case 'p': + ret = get_policy(cpu); + break; + case 'd': + ret = get_driver(cpu); + break; + case 'l': + ret = get_hardware_limits(cpu); + break; + case 'w': + ret = get_freq_hardware(cpu, human); + break; + case 'f': + ret = get_freq_kernel(cpu, human); + break; + case 's': + ret = get_freq_stats(cpu, human); + break; + case 'y': + ret = get_latency(cpu, human); + break; + } + if (ret) + return ret; + } + return ret; +} diff --git a/kernel/tools/power/cpupower/utils/cpufreq-set.c b/kernel/tools/power/cpupower/utils/cpufreq-set.c new file mode 100644 index 000000000..f656e585e --- /dev/null +++ b/kernel/tools/power/cpupower/utils/cpufreq-set.c @@ -0,0 +1,330 @@ +/* + * (C) 2004-2009 Dominik Brodowski <linux@dominikbrodowski.de> + * + * Licensed under the terms of the GNU GPL License version 2. + */ + + +#include <unistd.h> +#include <stdio.h> +#include <errno.h> +#include <stdlib.h> +#include <limits.h> +#include <string.h> +#include <ctype.h> + +#include <getopt.h> + +#include "cpufreq.h" +#include "helpers/helpers.h" + +#define NORM_FREQ_LEN 32 + +static struct option set_opts[] = { + { .name = "min", .has_arg = required_argument, .flag = NULL, .val = 'd'}, + { .name = "max", .has_arg = required_argument, .flag = NULL, .val = 'u'}, + { .name = "governor", .has_arg = required_argument, .flag = NULL, .val = 'g'}, + { .name = "freq", .has_arg = required_argument, .flag = NULL, .val = 'f'}, + { .name = "related", .has_arg = no_argument, .flag = NULL, .val='r'}, + { }, +}; + +static void print_error(void) +{ + printf(_("Error setting new values. Common errors:\n" + "- Do you have proper administration rights? (super-user?)\n" + "- Is the governor you requested available and modprobed?\n" + "- Trying to set an invalid policy?\n" + "- Trying to set a specific frequency, but userspace governor is not available,\n" + " for example because of hardware which cannot be set to a specific frequency\n" + " or because the userspace governor isn't loaded?\n")); +}; + +struct freq_units { + char *str_unit; + int power_of_ten; +}; + +const struct freq_units def_units[] = { + {"hz", -3}, + {"khz", 0}, /* default */ + {"mhz", 3}, + {"ghz", 6}, + {"thz", 9}, + {NULL, 0} +}; + +static void print_unknown_arg(void) +{ + printf(_("invalid or unknown argument\n")); +} + +static unsigned long string_to_frequency(const char *str) +{ + char normalized[NORM_FREQ_LEN]; + const struct freq_units *unit; + const char *scan; + char *end; + unsigned long freq; + int power = 0, match_count = 0, i, cp, pad; + + while (*str == '0') + str++; + + for (scan = str; isdigit(*scan) || *scan == '.'; scan++) { + if (*scan == '.' && match_count == 0) + match_count = 1; + else if (*scan == '.' && match_count == 1) + return 0; + } + + if (*scan) { + match_count = 0; + for (unit = def_units; unit->str_unit; unit++) { + for (i = 0; + scan[i] && tolower(scan[i]) == unit->str_unit[i]; + ++i) + continue; + if (scan[i]) + continue; + match_count++; + power = unit->power_of_ten; + } + if (match_count != 1) + return 0; + } + + /* count the number of digits to be copied */ + for (cp = 0; isdigit(str[cp]); cp++) + continue; + + if (str[cp] == '.') { + while (power > -1 && isdigit(str[cp+1])) + cp++, power--; + } + if (power >= -1) /* not enough => pad */ + pad = power + 1; + else /* to much => strip */ + pad = 0, cp += power + 1; + /* check bounds */ + if (cp <= 0 || cp + pad > NORM_FREQ_LEN - 1) + return 0; + + /* copy digits */ + for (i = 0; i < cp; i++, str++) { + if (*str == '.') + str++; + normalized[i] = *str; + } + /* and pad */ + for (; i < cp + pad; i++) + normalized[i] = '0'; + + /* round up, down ? */ + match_count = (normalized[i-1] >= '5'); + /* and drop the decimal part */ + normalized[i-1] = 0; /* cp > 0 && pad >= 0 ==> i > 0 */ + + /* final conversion (and applying rounding) */ + errno = 0; + freq = strtoul(normalized, &end, 10); + if (errno) + return 0; + else { + if (match_count && freq != ULONG_MAX) + freq++; + return freq; + } +} + +static int do_new_policy(unsigned int cpu, struct cpufreq_policy *new_pol) +{ + struct cpufreq_policy *cur_pol = cpufreq_get_policy(cpu); + int ret; + + if (!cur_pol) { + printf(_("wrong, unknown or unhandled CPU?\n")); + return -EINVAL; + } + + if (!new_pol->min) + new_pol->min = cur_pol->min; + + if (!new_pol->max) + new_pol->max = cur_pol->max; + + if (!new_pol->governor) + new_pol->governor = cur_pol->governor; + + ret = cpufreq_set_policy(cpu, new_pol); + + cpufreq_put_policy(cur_pol); + + return ret; +} + + +static int do_one_cpu(unsigned int cpu, struct cpufreq_policy *new_pol, + unsigned long freq, unsigned int pc) +{ + switch (pc) { + case 0: + return cpufreq_set_frequency(cpu, freq); + + case 1: + /* if only one value of a policy is to be changed, we can + * use a "fast path". + */ + if (new_pol->min) + return cpufreq_modify_policy_min(cpu, new_pol->min); + else if (new_pol->max) + return cpufreq_modify_policy_max(cpu, new_pol->max); + else if (new_pol->governor) + return cpufreq_modify_policy_governor(cpu, + new_pol->governor); + + default: + /* slow path */ + return do_new_policy(cpu, new_pol); + } +} + +int cmd_freq_set(int argc, char **argv) +{ + extern char *optarg; + extern int optind, opterr, optopt; + int ret = 0, cont = 1; + int double_parm = 0, related = 0, policychange = 0; + unsigned long freq = 0; + char gov[20]; + unsigned int cpu; + + struct cpufreq_policy new_pol = { + .min = 0, + .max = 0, + .governor = NULL, + }; + + /* parameter parsing */ + do { + ret = getopt_long(argc, argv, "d:u:g:f:r", set_opts, NULL); + switch (ret) { + case '?': + print_unknown_arg(); + return -EINVAL; + case -1: + cont = 0; + break; + case 'r': + if (related) + double_parm++; + related++; + break; + case 'd': + if (new_pol.min) + double_parm++; + policychange++; + new_pol.min = string_to_frequency(optarg); + if (new_pol.min == 0) { + print_unknown_arg(); + return -EINVAL; + } + break; + case 'u': + if (new_pol.max) + double_parm++; + policychange++; + new_pol.max = string_to_frequency(optarg); + if (new_pol.max == 0) { + print_unknown_arg(); + return -EINVAL; + } + break; + case 'f': + if (freq) + double_parm++; + freq = string_to_frequency(optarg); + if (freq == 0) { + print_unknown_arg(); + return -EINVAL; + } + break; + case 'g': + if (new_pol.governor) + double_parm++; + policychange++; + if ((strlen(optarg) < 3) || (strlen(optarg) > 18)) { + print_unknown_arg(); + return -EINVAL; + } + if ((sscanf(optarg, "%19s", gov)) != 1) { + print_unknown_arg(); + return -EINVAL; + } + new_pol.governor = gov; + break; + } + } while (cont); + + /* parameter checking */ + if (double_parm) { + printf("the same parameter was passed more than once\n"); + return -EINVAL; + } + + if (freq && policychange) { + printf(_("the -f/--freq parameter cannot be combined with -d/--min, -u/--max or\n" + "-g/--governor parameters\n")); + return -EINVAL; + } + + if (!freq && !policychange) { + printf(_("At least one parameter out of -f/--freq, -d/--min, -u/--max, and\n" + "-g/--governor must be passed\n")); + return -EINVAL; + } + + /* Default is: set all CPUs */ + if (bitmask_isallclear(cpus_chosen)) + bitmask_setall(cpus_chosen); + + /* Also set frequency settings for related CPUs if -r is passed */ + if (related) { + for (cpu = bitmask_first(cpus_chosen); + cpu <= bitmask_last(cpus_chosen); cpu++) { + struct cpufreq_affected_cpus *cpus; + + if (!bitmask_isbitset(cpus_chosen, cpu) || + cpufreq_cpu_exists(cpu)) + continue; + + cpus = cpufreq_get_related_cpus(cpu); + if (!cpus) + break; + while (cpus->next) { + bitmask_setbit(cpus_chosen, cpus->cpu); + cpus = cpus->next; + } + cpufreq_put_related_cpus(cpus); + } + } + + + /* loop over CPUs */ + for (cpu = bitmask_first(cpus_chosen); + cpu <= bitmask_last(cpus_chosen); cpu++) { + + if (!bitmask_isbitset(cpus_chosen, cpu) || + cpufreq_cpu_exists(cpu)) + continue; + + printf(_("Setting cpu: %d\n"), cpu); + ret = do_one_cpu(cpu, &new_pol, freq, policychange); + if (ret) { + print_error(); + return ret; + } + } + + return 0; +} diff --git a/kernel/tools/power/cpupower/utils/cpuidle-info.c b/kernel/tools/power/cpupower/utils/cpuidle-info.c new file mode 100644 index 000000000..75e66de7e --- /dev/null +++ b/kernel/tools/power/cpupower/utils/cpuidle-info.c @@ -0,0 +1,208 @@ +/* + * (C) 2004-2009 Dominik Brodowski <linux@dominikbrodowski.de> + * (C) 2010 Thomas Renninger <trenn@suse.de> + * + * Licensed under the terms of the GNU GPL License version 2. + */ + + +#include <unistd.h> +#include <stdio.h> +#include <errno.h> +#include <stdlib.h> +#include <string.h> +#include <getopt.h> +#include <cpufreq.h> + +#include "helpers/helpers.h" +#include "helpers/sysfs.h" +#include "helpers/bitmask.h" + +#define LINE_LEN 10 + +static void cpuidle_cpu_output(unsigned int cpu, int verbose) +{ + unsigned int idlestates, idlestate; + char *tmp; + + printf(_ ("Analyzing CPU %d:\n"), cpu); + + idlestates = sysfs_get_idlestate_count(cpu); + if (idlestates == 0) { + printf(_("CPU %u: No idle states\n"), cpu); + return; + } + + printf(_("Number of idle states: %d\n"), idlestates); + printf(_("Available idle states:")); + for (idlestate = 0; idlestate < idlestates; idlestate++) { + tmp = sysfs_get_idlestate_name(cpu, idlestate); + if (!tmp) + continue; + printf(" %s", tmp); + free(tmp); + } + printf("\n"); + + if (!verbose) + return; + + for (idlestate = 0; idlestate < idlestates; idlestate++) { + int disabled = sysfs_is_idlestate_disabled(cpu, idlestate); + /* Disabled interface not supported on older kernels */ + if (disabled < 0) + disabled = 0; + tmp = sysfs_get_idlestate_name(cpu, idlestate); + if (!tmp) + continue; + printf("%s%s:\n", tmp, (disabled) ? " (DISABLED) " : ""); + free(tmp); + + tmp = sysfs_get_idlestate_desc(cpu, idlestate); + if (!tmp) + continue; + printf(_("Flags/Description: %s\n"), tmp); + free(tmp); + + printf(_("Latency: %lu\n"), + sysfs_get_idlestate_latency(cpu, idlestate)); + printf(_("Usage: %lu\n"), + sysfs_get_idlestate_usage(cpu, idlestate)); + printf(_("Duration: %llu\n"), + sysfs_get_idlestate_time(cpu, idlestate)); + } + printf("\n"); +} + +static void cpuidle_general_output(void) +{ + char *tmp; + + tmp = sysfs_get_cpuidle_driver(); + if (!tmp) { + printf(_("Could not determine cpuidle driver\n")); + return; + } + + printf(_("CPUidle driver: %s\n"), tmp); + free(tmp); + + tmp = sysfs_get_cpuidle_governor(); + if (!tmp) { + printf(_("Could not determine cpuidle governor\n")); + return; + } + + printf(_("CPUidle governor: %s\n"), tmp); + free(tmp); +} + +static void proc_cpuidle_cpu_output(unsigned int cpu) +{ + long max_allowed_cstate = 2000000000; + unsigned int cstate, cstates; + + cstates = sysfs_get_idlestate_count(cpu); + if (cstates == 0) { + printf(_("CPU %u: No C-states info\n"), cpu); + return; + } + + printf(_("active state: C0\n")); + printf(_("max_cstate: C%u\n"), cstates-1); + printf(_("maximum allowed latency: %lu usec\n"), max_allowed_cstate); + printf(_("states:\t\n")); + for (cstate = 1; cstate < cstates; cstate++) { + printf(_(" C%d: " + "type[C%d] "), cstate, cstate); + printf(_("promotion[--] demotion[--] ")); + printf(_("latency[%03lu] "), + sysfs_get_idlestate_latency(cpu, cstate)); + printf(_("usage[%08lu] "), + sysfs_get_idlestate_usage(cpu, cstate)); + printf(_("duration[%020Lu] \n"), + sysfs_get_idlestate_time(cpu, cstate)); + } +} + +static struct option info_opts[] = { + { .name = "silent", .has_arg = no_argument, .flag = NULL, .val = 's'}, + { .name = "proc", .has_arg = no_argument, .flag = NULL, .val = 'o'}, + { }, +}; + +static inline void cpuidle_exit(int fail) +{ + exit(EXIT_FAILURE); +} + +int cmd_idle_info(int argc, char **argv) +{ + extern char *optarg; + extern int optind, opterr, optopt; + int ret = 0, cont = 1, output_param = 0, verbose = 1; + unsigned int cpu = 0; + + do { + ret = getopt_long(argc, argv, "os", info_opts, NULL); + if (ret == -1) + break; + switch (ret) { + case '?': + output_param = '?'; + cont = 0; + break; + case 's': + verbose = 0; + break; + case -1: + cont = 0; + break; + case 'o': + if (output_param) { + output_param = -1; + cont = 0; + break; + } + output_param = ret; + break; + } + } while (cont); + + switch (output_param) { + case -1: + printf(_("You can't specify more than one " + "output-specific argument\n")); + cpuidle_exit(EXIT_FAILURE); + case '?': + printf(_("invalid or unknown argument\n")); + cpuidle_exit(EXIT_FAILURE); + } + + /* Default is: show output of CPU 0 only */ + if (bitmask_isallclear(cpus_chosen)) + bitmask_setbit(cpus_chosen, 0); + + if (output_param == 0) + cpuidle_general_output(); + + for (cpu = bitmask_first(cpus_chosen); + cpu <= bitmask_last(cpus_chosen); cpu++) { + + if (!bitmask_isbitset(cpus_chosen, cpu) || + cpufreq_cpu_exists(cpu)) + continue; + + switch (output_param) { + + case 'o': + proc_cpuidle_cpu_output(cpu); + break; + case 0: + printf("\n"); + cpuidle_cpu_output(cpu, verbose); + break; + } + } + return EXIT_SUCCESS; +} diff --git a/kernel/tools/power/cpupower/utils/cpuidle-set.c b/kernel/tools/power/cpupower/utils/cpuidle-set.c new file mode 100644 index 000000000..d45d8d775 --- /dev/null +++ b/kernel/tools/power/cpupower/utils/cpuidle-set.c @@ -0,0 +1,181 @@ +#include <unistd.h> +#include <stdio.h> +#include <errno.h> +#include <stdlib.h> +#include <limits.h> +#include <string.h> +#include <ctype.h> + +#include <getopt.h> + +#include "cpufreq.h" +#include "helpers/helpers.h" +#include "helpers/sysfs.h" + +static struct option info_opts[] = { + { .name = "disable", + .has_arg = required_argument, .flag = NULL, .val = 'd'}, + { .name = "enable", + .has_arg = required_argument, .flag = NULL, .val = 'e'}, + { .name = "disable-by-latency", + .has_arg = required_argument, .flag = NULL, .val = 'D'}, + { .name = "enable-all", + .has_arg = no_argument, .flag = NULL, .val = 'E'}, + { }, +}; + + +int cmd_idle_set(int argc, char **argv) +{ + extern char *optarg; + extern int optind, opterr, optopt; + int ret = 0, cont = 1, param = 0, disabled; + unsigned long long latency = 0, state_latency; + unsigned int cpu = 0, idlestate = 0, idlestates = 0; + char *endptr; + + do { + ret = getopt_long(argc, argv, "d:e:ED:", info_opts, NULL); + if (ret == -1) + break; + switch (ret) { + case '?': + param = '?'; + cont = 0; + break; + case 'd': + if (param) { + param = -1; + cont = 0; + break; + } + param = ret; + idlestate = atoi(optarg); + break; + case 'e': + if (param) { + param = -1; + cont = 0; + break; + } + param = ret; + idlestate = atoi(optarg); + break; + case 'D': + if (param) { + param = -1; + cont = 0; + break; + } + param = ret; + latency = strtoull(optarg, &endptr, 10); + if (*endptr != '\0') { + printf(_("Bad latency value: %s\n"), optarg); + exit(EXIT_FAILURE); + } + break; + case 'E': + if (param) { + param = -1; + cont = 0; + break; + } + param = ret; + break; + case -1: + cont = 0; + break; + } + } while (cont); + + switch (param) { + case -1: + printf(_("You can't specify more than one " + "output-specific argument\n")); + exit(EXIT_FAILURE); + case '?': + printf(_("invalid or unknown argument\n")); + exit(EXIT_FAILURE); + } + + /* Default is: set all CPUs */ + if (bitmask_isallclear(cpus_chosen)) + bitmask_setall(cpus_chosen); + + for (cpu = bitmask_first(cpus_chosen); + cpu <= bitmask_last(cpus_chosen); cpu++) { + + if (!bitmask_isbitset(cpus_chosen, cpu)) + continue; + + if (sysfs_is_cpu_online(cpu) != 1) + continue; + + idlestates = sysfs_get_idlestate_count(cpu); + if (idlestates <= 0) + continue; + + switch (param) { + case 'd': + ret = sysfs_idlestate_disable(cpu, idlestate, 1); + if (ret == 0) + printf(_("Idlestate %u disabled on CPU %u\n"), idlestate, cpu); + else if (ret == -1) + printf(_("Idlestate %u not available on CPU %u\n"), + idlestate, cpu); + else if (ret == -2) + printf(_("Idlestate disabling not supported by kernel\n")); + else + printf(_("Idlestate %u not disabled on CPU %u\n"), + idlestate, cpu); + break; + case 'e': + ret = sysfs_idlestate_disable(cpu, idlestate, 0); + if (ret == 0) + printf(_("Idlestate %u enabled on CPU %u\n"), idlestate, cpu); + else if (ret == -1) + printf(_("Idlestate %u not available on CPU %u\n"), + idlestate, cpu); + else if (ret == -2) + printf(_("Idlestate enabling not supported by kernel\n")); + else + printf(_("Idlestate %u not enabled on CPU %u\n"), + idlestate, cpu); + break; + case 'D': + for (idlestate = 0; idlestate < idlestates; idlestate++) { + disabled = sysfs_is_idlestate_disabled + (cpu, idlestate); + state_latency = sysfs_get_idlestate_latency + (cpu, idlestate); + printf("CPU: %u - idlestate %u - state_latency: %llu - latency: %llu\n", + cpu, idlestate, state_latency, latency); + if (disabled == 1 || latency > state_latency) + continue; + ret = sysfs_idlestate_disable + (cpu, idlestate, 1); + if (ret == 0) + printf(_("Idlestate %u disabled on CPU %u\n"), idlestate, cpu); + } + break; + case 'E': + for (idlestate = 0; idlestate < idlestates; idlestate++) { + disabled = sysfs_is_idlestate_disabled + (cpu, idlestate); + if (disabled == 1) { + ret = sysfs_idlestate_disable + (cpu, idlestate, 0); + if (ret == 0) + printf(_("Idlestate %u enabled on CPU %u\n"), idlestate, cpu); + } + } + break; + default: + /* Not reachable with proper args checking */ + printf(_("Invalid or unknown argument\n")); + exit(EXIT_FAILURE); + break; + } + } + return EXIT_SUCCESS; +} diff --git a/kernel/tools/power/cpupower/utils/cpupower-info.c b/kernel/tools/power/cpupower/utils/cpupower-info.c new file mode 100644 index 000000000..136d979e9 --- /dev/null +++ b/kernel/tools/power/cpupower/utils/cpupower-info.c @@ -0,0 +1,103 @@ +/* + * (C) 2011 Thomas Renninger <trenn@suse.de>, Novell Inc. + * + * Licensed under the terms of the GNU GPL License version 2. + */ + + +#include <unistd.h> +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <string.h> +#include <getopt.h> + +#include <cpufreq.h> +#include "helpers/helpers.h" +#include "helpers/sysfs.h" + +static struct option set_opts[] = { + { .name = "perf-bias", .has_arg = optional_argument, .flag = NULL, .val = 'b'}, + { }, +}; + +static void print_wrong_arg_exit(void) +{ + printf(_("invalid or unknown argument\n")); + exit(EXIT_FAILURE); +} + +int cmd_info(int argc, char **argv) +{ + extern char *optarg; + extern int optind, opterr, optopt; + unsigned int cpu; + + union { + struct { + int perf_bias:1; + }; + int params; + } params = {}; + int ret = 0; + + setlocale(LC_ALL, ""); + textdomain(PACKAGE); + + /* parameter parsing */ + while ((ret = getopt_long(argc, argv, "b", set_opts, NULL)) != -1) { + switch (ret) { + case 'b': + if (params.perf_bias) + print_wrong_arg_exit(); + params.perf_bias = 1; + break; + default: + print_wrong_arg_exit(); + } + }; + + if (!params.params) + params.params = 0x7; + + /* Default is: show output of CPU 0 only */ + if (bitmask_isallclear(cpus_chosen)) + bitmask_setbit(cpus_chosen, 0); + + /* Add more per cpu options here */ + if (!params.perf_bias) + return ret; + + if (params.perf_bias) { + if (!run_as_root) { + params.perf_bias = 0; + printf(_("Intel's performance bias setting needs root privileges\n")); + } else if (!(cpupower_cpu_info.caps & CPUPOWER_CAP_PERF_BIAS)) { + printf(_("System does not support Intel's performance" + " bias setting\n")); + params.perf_bias = 0; + } + } + + /* loop over CPUs */ + for (cpu = bitmask_first(cpus_chosen); + cpu <= bitmask_last(cpus_chosen); cpu++) { + + if (!bitmask_isbitset(cpus_chosen, cpu) || + cpufreq_cpu_exists(cpu)) + continue; + + printf(_("analyzing CPU %d:\n"), cpu); + + if (params.perf_bias) { + ret = msr_intel_get_perf_bias(cpu); + if (ret < 0) { + fprintf(stderr, + _("Could not read perf-bias value[%d]\n"), ret); + exit(EXIT_FAILURE); + } else + printf(_("perf-bias: %d\n"), ret); + } + } + return 0; +} diff --git a/kernel/tools/power/cpupower/utils/cpupower-set.c b/kernel/tools/power/cpupower/utils/cpupower-set.c new file mode 100644 index 000000000..573c75f8e --- /dev/null +++ b/kernel/tools/power/cpupower/utils/cpupower-set.c @@ -0,0 +1,95 @@ +/* + * (C) 2011 Thomas Renninger <trenn@suse.de>, Novell Inc. + * + * Licensed under the terms of the GNU GPL License version 2. + */ + + +#include <unistd.h> +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <string.h> +#include <getopt.h> + +#include <cpufreq.h> +#include "helpers/helpers.h" +#include "helpers/sysfs.h" +#include "helpers/bitmask.h" + +static struct option set_opts[] = { + { .name = "perf-bias", .has_arg = required_argument, .flag = NULL, .val = 'b'}, + { }, +}; + +static void print_wrong_arg_exit(void) +{ + printf(_("invalid or unknown argument\n")); + exit(EXIT_FAILURE); +} + +int cmd_set(int argc, char **argv) +{ + extern char *optarg; + extern int optind, opterr, optopt; + unsigned int cpu; + + union { + struct { + int perf_bias:1; + }; + int params; + } params; + int perf_bias = 0; + int ret = 0; + + setlocale(LC_ALL, ""); + textdomain(PACKAGE); + + params.params = 0; + /* parameter parsing */ + while ((ret = getopt_long(argc, argv, "b:", + set_opts, NULL)) != -1) { + switch (ret) { + case 'b': + if (params.perf_bias) + print_wrong_arg_exit(); + perf_bias = atoi(optarg); + if (perf_bias < 0 || perf_bias > 15) { + printf(_("--perf-bias param out " + "of range [0-%d]\n"), 15); + print_wrong_arg_exit(); + } + params.perf_bias = 1; + break; + default: + print_wrong_arg_exit(); + } + }; + + if (!params.params) + print_wrong_arg_exit(); + + /* Default is: set all CPUs */ + if (bitmask_isallclear(cpus_chosen)) + bitmask_setall(cpus_chosen); + + /* loop over CPUs */ + for (cpu = bitmask_first(cpus_chosen); + cpu <= bitmask_last(cpus_chosen); cpu++) { + + if (!bitmask_isbitset(cpus_chosen, cpu) || + cpufreq_cpu_exists(cpu)) + continue; + + if (params.perf_bias) { + ret = msr_intel_set_perf_bias(cpu, perf_bias); + if (ret) { + fprintf(stderr, _("Error setting perf-bias " + "value on CPU %d\n"), cpu); + break; + } + } + } + return ret; +} diff --git a/kernel/tools/power/cpupower/utils/cpupower.c b/kernel/tools/power/cpupower/utils/cpupower.c new file mode 100644 index 000000000..9ea914378 --- /dev/null +++ b/kernel/tools/power/cpupower/utils/cpupower.c @@ -0,0 +1,229 @@ +/* + * (C) 2010,2011 Thomas Renninger <trenn@suse.de>, Novell Inc. + * + * Licensed under the terms of the GNU GPL License version 2. + * + * Ideas taken over from the perf userspace tool (included in the Linus + * kernel git repo): subcommand builtins and param parsing. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <errno.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/utsname.h> + +#include "builtin.h" +#include "helpers/helpers.h" +#include "helpers/bitmask.h" + +#define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0])) + +static int cmd_help(int argc, const char **argv); + +/* Global cpu_info object available for all binaries + * Info only retrieved from CPU 0 + * + * Values will be zero/unknown on non X86 archs + */ +struct cpupower_cpu_info cpupower_cpu_info; +int run_as_root; +/* Affected cpus chosen by -c/--cpu param */ +struct bitmask *cpus_chosen; + +#ifdef DEBUG +int be_verbose; +#endif + +static void print_help(void); + +struct cmd_struct { + const char *cmd; + int (*main)(int, const char **); + int needs_root; +}; + +static struct cmd_struct commands[] = { + { "frequency-info", cmd_freq_info, 0 }, + { "frequency-set", cmd_freq_set, 1 }, + { "idle-info", cmd_idle_info, 0 }, + { "idle-set", cmd_idle_set, 1 }, + { "set", cmd_set, 1 }, + { "info", cmd_info, 0 }, + { "monitor", cmd_monitor, 0 }, + { "help", cmd_help, 0 }, + /* { "bench", cmd_bench, 1 }, */ +}; + +static void print_help(void) +{ + unsigned int i; + +#ifdef DEBUG + printf(_("Usage:\tcpupower [-d|--debug] [-c|--cpu cpulist ] <command> [<args>]\n")); +#else + printf(_("Usage:\tcpupower [-c|--cpu cpulist ] <command> [<args>]\n")); +#endif + printf(_("Supported commands are:\n")); + for (i = 0; i < ARRAY_SIZE(commands); i++) + printf("\t%s\n", commands[i].cmd); + printf(_("\nNot all commands can make use of the -c cpulist option.\n")); + printf(_("\nUse 'cpupower help <command>' for getting help for above commands.\n")); +} + +static int print_man_page(const char *subpage) +{ + int len; + char *page; + + len = 10; /* enough for "cpupower-" */ + if (subpage != NULL) + len += strlen(subpage); + + page = malloc(len); + if (!page) + return -ENOMEM; + + sprintf(page, "cpupower"); + if ((subpage != NULL) && strcmp(subpage, "help")) { + strcat(page, "-"); + strcat(page, subpage); + } + + execlp("man", "man", page, NULL); + + /* should not be reached */ + return -EINVAL; +} + +static int cmd_help(int argc, const char **argv) +{ + if (argc > 1) { + print_man_page(argv[1]); /* exits within execlp() */ + return EXIT_FAILURE; + } + + print_help(); + return EXIT_SUCCESS; +} + +static void print_version(void) +{ + printf(PACKAGE " " VERSION "\n"); + printf(_("Report errors and bugs to %s, please.\n"), PACKAGE_BUGREPORT); +} + +static void handle_options(int *argc, const char ***argv) +{ + int ret, x, new_argc = 0; + + if (*argc < 1) + return; + + for (x = 0; x < *argc && ((*argv)[x])[0] == '-'; x++) { + const char *param = (*argv)[x]; + if (!strcmp(param, "-h") || !strcmp(param, "--help")) { + print_help(); + exit(EXIT_SUCCESS); + } else if (!strcmp(param, "-c") || !strcmp(param, "--cpu")) { + if (*argc < 2) { + print_help(); + exit(EXIT_FAILURE); + } + if (!strcmp((*argv)[x+1], "all")) + bitmask_setall(cpus_chosen); + else { + ret = bitmask_parselist( + (*argv)[x+1], cpus_chosen); + if (ret < 0) { + fprintf(stderr, _("Error parsing cpu " + "list\n")); + exit(EXIT_FAILURE); + } + } + x += 1; + /* Cut out param: cpupower -c 1 info -> cpupower info */ + new_argc += 2; + continue; + } else if (!strcmp(param, "-v") || + !strcmp(param, "--version")) { + print_version(); + exit(EXIT_SUCCESS); +#ifdef DEBUG + } else if (!strcmp(param, "-d") || !strcmp(param, "--debug")) { + be_verbose = 1; + new_argc++; + continue; +#endif + } else { + fprintf(stderr, "Unknown option: %s\n", param); + print_help(); + exit(EXIT_FAILURE); + } + } + *argc -= new_argc; + *argv += new_argc; +} + +int main(int argc, const char *argv[]) +{ + const char *cmd; + unsigned int i, ret; + struct stat statbuf; + struct utsname uts; + + cpus_chosen = bitmask_alloc(sysconf(_SC_NPROCESSORS_CONF)); + + argc--; + argv += 1; + + handle_options(&argc, &argv); + + cmd = argv[0]; + + if (argc < 1) { + print_help(); + return EXIT_FAILURE; + } + + setlocale(LC_ALL, ""); + textdomain(PACKAGE); + + /* Turn "perf cmd --help" into "perf help cmd" */ + if (argc > 1 && !strcmp(argv[1], "--help")) { + argv[1] = argv[0]; + argv[0] = cmd = "help"; + } + + get_cpu_info(0, &cpupower_cpu_info); + run_as_root = !geteuid(); + if (run_as_root) { + ret = uname(&uts); + if (!ret && !strcmp(uts.machine, "x86_64") && + stat("/dev/cpu/0/msr", &statbuf) != 0) { + if (system("modprobe msr") == -1) + fprintf(stderr, _("MSR access not available.\n")); + } + } + + + for (i = 0; i < ARRAY_SIZE(commands); i++) { + struct cmd_struct *p = commands + i; + if (strcmp(p->cmd, cmd)) + continue; + if (!run_as_root && p->needs_root) { + fprintf(stderr, _("Subcommand %s needs root " + "privileges\n"), cmd); + return EXIT_FAILURE; + } + ret = p->main(argc, argv); + if (cpus_chosen) + bitmask_free(cpus_chosen); + return ret; + } + print_help(); + return EXIT_FAILURE; +} diff --git a/kernel/tools/power/cpupower/utils/helpers/amd.c b/kernel/tools/power/cpupower/utils/helpers/amd.c new file mode 100644 index 000000000..6437ef39a --- /dev/null +++ b/kernel/tools/power/cpupower/utils/helpers/amd.c @@ -0,0 +1,135 @@ +#if defined(__i386__) || defined(__x86_64__) +#include <unistd.h> +#include <errno.h> +#include <stdio.h> +#include <stdint.h> + +#include <pci/pci.h> + +#include "helpers/helpers.h" + +#define MSR_AMD_PSTATE_STATUS 0xc0010063 +#define MSR_AMD_PSTATE 0xc0010064 +#define MSR_AMD_PSTATE_LIMIT 0xc0010061 + +union msr_pstate { + struct { + unsigned fid:6; + unsigned did:3; + unsigned vid:7; + unsigned res1:6; + unsigned nbdid:1; + unsigned res2:2; + unsigned nbvid:7; + unsigned iddval:8; + unsigned idddiv:2; + unsigned res3:21; + unsigned en:1; + } bits; + unsigned long long val; +}; + +static int get_did(int family, union msr_pstate pstate) +{ + int t; + + if (family == 0x12) + t = pstate.val & 0xf; + else + t = pstate.bits.did; + + return t; +} + +static int get_cof(int family, union msr_pstate pstate) +{ + int t; + int fid, did; + + did = get_did(family, pstate); + + t = 0x10; + fid = pstate.bits.fid; + if (family == 0x11) + t = 0x8; + + return (100 * (fid + t)) >> did; +} + +/* Needs: + * cpu -> the cpu that gets evaluated + * cpu_family -> The cpu's family (0x10, 0x12,...) + * boots_states -> how much boost states the machines support + * + * Fills up: + * pstates -> a pointer to an array of size MAX_HW_PSTATES + * must be initialized with zeros. + * All available HW pstates (including boost states) + * no -> amount of pstates above array got filled up with + * + * returns zero on success, -1 on failure + */ +int decode_pstates(unsigned int cpu, unsigned int cpu_family, + int boost_states, unsigned long *pstates, int *no) +{ + int i, psmax, pscur; + union msr_pstate pstate; + unsigned long long val; + + /* Only read out frequencies from HW when CPU might be boostable + to keep the code as short and clean as possible. + Otherwise frequencies are exported via ACPI tables. + */ + if (cpu_family < 0x10 || cpu_family == 0x14) + return -1; + + if (read_msr(cpu, MSR_AMD_PSTATE_LIMIT, &val)) + return -1; + + psmax = (val >> 4) & 0x7; + + if (read_msr(cpu, MSR_AMD_PSTATE_STATUS, &val)) + return -1; + + pscur = val & 0x7; + + pscur += boost_states; + psmax += boost_states; + for (i = 0; i <= psmax; i++) { + if (i >= MAX_HW_PSTATES) { + fprintf(stderr, "HW pstates [%d] exceeding max [%d]\n", + psmax, MAX_HW_PSTATES); + return -1; + } + if (read_msr(cpu, MSR_AMD_PSTATE + i, &pstate.val)) + return -1; + pstates[i] = get_cof(cpu_family, pstate); + } + *no = i; + return 0; +} + +int amd_pci_get_num_boost_states(int *active, int *states) +{ + struct pci_access *pci_acc; + struct pci_dev *device; + uint8_t val = 0; + + *active = *states = 0; + + device = pci_slot_func_init(&pci_acc, 0x18, 4); + + if (device == NULL) + return -ENODEV; + + val = pci_read_byte(device, 0x15c); + if (val & 3) + *active = 1; + else + *active = 0; + *states = (val >> 2) & 7; + + pci_cleanup(pci_acc); + return 0; +} +#endif /* defined(__i386__) || defined(__x86_64__) */ diff --git a/kernel/tools/power/cpupower/utils/helpers/bitmask.c b/kernel/tools/power/cpupower/utils/helpers/bitmask.c new file mode 100644 index 000000000..5c074c60f --- /dev/null +++ b/kernel/tools/power/cpupower/utils/helpers/bitmask.c @@ -0,0 +1,292 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include <helpers/bitmask.h> + +/* How many bits in an unsigned long */ +#define bitsperlong (8 * sizeof(unsigned long)) + +/* howmany(a,b) : how many elements of size b needed to hold all of a */ +#define howmany(x, y) (((x)+((y)-1))/(y)) + +/* How many longs in mask of n bits */ +#define longsperbits(n) howmany(n, bitsperlong) + +#define max(a, b) ((a) > (b) ? (a) : (b)) + +/* + * Allocate and free `struct bitmask *` + */ + +/* Allocate a new `struct bitmask` with a size of n bits */ +struct bitmask *bitmask_alloc(unsigned int n) +{ + struct bitmask *bmp; + + bmp = malloc(sizeof(*bmp)); + if (bmp == 0) + return 0; + bmp->size = n; + bmp->maskp = calloc(longsperbits(n), sizeof(unsigned long)); + if (bmp->maskp == 0) { + free(bmp); + return 0; + } + return bmp; +} + +/* Free `struct bitmask` */ +void bitmask_free(struct bitmask *bmp) +{ + if (bmp == 0) + return; + free(bmp->maskp); + bmp->maskp = (unsigned long *)0xdeadcdef; /* double free tripwire */ + free(bmp); +} + +/* + * The routines _getbit() and _setbit() are the only + * routines that actually understand the layout of bmp->maskp[]. + * + * On little endian architectures, this could simply be an array of + * bytes. But the kernel layout of bitmasks _is_ visible to userspace + * via the sched_(set/get)affinity calls in Linux 2.6, and on big + * endian architectures, it is painfully obvious that this is an + * array of unsigned longs. + */ + +/* Return the value (0 or 1) of bit n in bitmask bmp */ +static unsigned int _getbit(const struct bitmask *bmp, unsigned int n) +{ + if (n < bmp->size) + return (bmp->maskp[n/bitsperlong] >> (n % bitsperlong)) & 1; + else + return 0; +} + +/* Set bit n in bitmask bmp to value v (0 or 1) */ +static void _setbit(struct bitmask *bmp, unsigned int n, unsigned int v) +{ + if (n < bmp->size) { + if (v) + bmp->maskp[n/bitsperlong] |= 1UL << (n % bitsperlong); + else + bmp->maskp[n/bitsperlong] &= + ~(1UL << (n % bitsperlong)); + } +} + +/* + * When parsing bitmask lists, only allow numbers, separated by one + * of the allowed next characters. + * + * The parameter 'sret' is the return from a sscanf "%u%c". It is + * -1 if the sscanf input string was empty. It is 0 if the first + * character in the sscanf input string was not a decimal number. + * It is 1 if the unsigned number matching the "%u" was the end of the + * input string. It is 2 if one or more additional characters followed + * the matched unsigned number. If it is 2, then 'nextc' is the first + * character following the number. The parameter 'ok_next_chars' + * is the nul-terminated list of allowed next characters. + * + * The mask term just scanned was ok if and only if either the numbers + * matching the %u were all of the input or if the next character in + * the input past the numbers was one of the allowed next characters. + */ +static int scan_was_ok(int sret, char nextc, const char *ok_next_chars) +{ + return sret == 1 || + (sret == 2 && strchr(ok_next_chars, nextc) != NULL); +} + +static const char *nexttoken(const char *q, int sep) +{ + if (q) + q = strchr(q, sep); + if (q) + q++; + return q; +} + +/* Set a single bit i in bitmask */ +struct bitmask *bitmask_setbit(struct bitmask *bmp, unsigned int i) +{ + _setbit(bmp, i, 1); + return bmp; +} + +/* Set all bits in bitmask: bmp = ~0 */ +struct bitmask *bitmask_setall(struct bitmask *bmp) +{ + unsigned int i; + for (i = 0; i < bmp->size; i++) + _setbit(bmp, i, 1); + return bmp; +} + +/* Clear all bits in bitmask: bmp = 0 */ +struct bitmask *bitmask_clearall(struct bitmask *bmp) +{ + unsigned int i; + for (i = 0; i < bmp->size; i++) + _setbit(bmp, i, 0); + return bmp; +} + +/* True if all bits are clear */ +int bitmask_isallclear(const struct bitmask *bmp) +{ + unsigned int i; + for (i = 0; i < bmp->size; i++) + if (_getbit(bmp, i)) + return 0; + return 1; +} + +/* True if specified bit i is set */ +int bitmask_isbitset(const struct bitmask *bmp, unsigned int i) +{ + return _getbit(bmp, i); +} + +/* Number of lowest set bit (min) */ +unsigned int bitmask_first(const struct bitmask *bmp) +{ + return bitmask_next(bmp, 0); +} + +/* Number of highest set bit (max) */ +unsigned int bitmask_last(const struct bitmask *bmp) +{ + unsigned int i; + unsigned int m = bmp->size; + for (i = 0; i < bmp->size; i++) + if (_getbit(bmp, i)) + m = i; + return m; +} + +/* Number of next set bit at or above given bit i */ +unsigned int bitmask_next(const struct bitmask *bmp, unsigned int i) +{ + unsigned int n; + for (n = i; n < bmp->size; n++) + if (_getbit(bmp, n)) + break; + return n; +} + +/* + * Parses a comma-separated list of numbers and ranges of numbers, + * with optional ':%u' strides modifying ranges, into provided bitmask. + * Some examples of input lists and their equivalent simple list: + * Input Equivalent to + * 0-3 0,1,2,3 + * 0-7:2 0,2,4,6 + * 1,3,5-7 1,3,5,6,7 + * 0-3:2,8-15:4 0,2,8,12 + */ +int bitmask_parselist(const char *buf, struct bitmask *bmp) +{ + const char *p, *q; + + bitmask_clearall(bmp); + + q = buf; + while (p = q, q = nexttoken(q, ','), p) { + unsigned int a; /* begin of range */ + unsigned int b; /* end of range */ + unsigned int s; /* stride */ + const char *c1, *c2; /* next tokens after '-' or ',' */ + char nextc; /* char after sscanf %u match */ + int sret; /* sscanf return (number of matches) */ + + sret = sscanf(p, "%u%c", &a, &nextc); + if (!scan_was_ok(sret, nextc, ",-")) + goto err; + b = a; + s = 1; + c1 = nexttoken(p, '-'); + c2 = nexttoken(p, ','); + if (c1 != NULL && (c2 == NULL || c1 < c2)) { + sret = sscanf(c1, "%u%c", &b, &nextc); + if (!scan_was_ok(sret, nextc, ",:")) + goto err; + c1 = nexttoken(c1, ':'); + if (c1 != NULL && (c2 == NULL || c1 < c2)) { + sret = sscanf(c1, "%u%c", &s, &nextc); + if (!scan_was_ok(sret, nextc, ",")) + goto err; + } + } + if (!(a <= b)) + goto err; + if (b >= bmp->size) + goto err; + while (a <= b) { + _setbit(bmp, a, 1); + a += s; + } + } + return 0; +err: + bitmask_clearall(bmp); + return -1; +} + +/* + * emit(buf, buflen, rbot, rtop, len) + * + * Helper routine for bitmask_displaylist(). Write decimal number + * or range to buf+len, suppressing output past buf+buflen, with optional + * comma-prefix. Return len of what would be written to buf, if it + * all fit. + */ + +static inline int emit(char *buf, int buflen, int rbot, int rtop, int len) +{ + if (len > 0) + len += snprintf(buf + len, max(buflen - len, 0), ","); + if (rbot == rtop) + len += snprintf(buf + len, max(buflen - len, 0), "%d", rbot); + else + len += snprintf(buf + len, max(buflen - len, 0), "%d-%d", + rbot, rtop); + return len; +} + +/* + * Write decimal list representation of bmp to buf. + * + * Output format is a comma-separated list of decimal numbers and + * ranges. Consecutively set bits are shown as two hyphen-separated + * decimal numbers, the smallest and largest bit numbers set in + * the range. Output format is compatible with the format + * accepted as input by bitmap_parselist(). + * + * The return value is the number of characters which would be + * generated for the given input, excluding the trailing '\0', as + * per ISO C99. + */ + +int bitmask_displaylist(char *buf, int buflen, const struct bitmask *bmp) +{ + int len = 0; + /* current bit is 'cur', most recently seen range is [rbot, rtop] */ + unsigned int cur, rbot, rtop; + + if (buflen > 0) + *buf = 0; + rbot = cur = bitmask_first(bmp); + while (cur < bmp->size) { + rtop = cur; + cur = bitmask_next(bmp, cur+1); + if (cur >= bmp->size || cur > rtop + 1) { + len = emit(buf, buflen, rbot, rtop, len); + rbot = cur; + } + } + return len; +} diff --git a/kernel/tools/power/cpupower/utils/helpers/bitmask.h b/kernel/tools/power/cpupower/utils/helpers/bitmask.h new file mode 100644 index 000000000..eb289df41 --- /dev/null +++ b/kernel/tools/power/cpupower/utils/helpers/bitmask.h @@ -0,0 +1,33 @@ +#ifndef __CPUPOWER_BITMASK__ +#define __CPUPOWER_BITMASK__ + +/* Taken over from libbitmask, a project initiated from sgi: + * Url: http://oss.sgi.com/projects/cpusets/ + * Unfortunately it's not very widespread, therefore relevant parts are + * pasted here. + */ + +struct bitmask { + unsigned int size; + unsigned long *maskp; +}; + +struct bitmask *bitmask_alloc(unsigned int n); +void bitmask_free(struct bitmask *bmp); + +struct bitmask *bitmask_setbit(struct bitmask *bmp, unsigned int i); +struct bitmask *bitmask_setall(struct bitmask *bmp); +struct bitmask *bitmask_clearall(struct bitmask *bmp); + +unsigned int bitmask_first(const struct bitmask *bmp); +unsigned int bitmask_next(const struct bitmask *bmp, unsigned int i); +unsigned int bitmask_last(const struct bitmask *bmp); +int bitmask_isallclear(const struct bitmask *bmp); +int bitmask_isbitset(const struct bitmask *bmp, unsigned int i); + +int bitmask_parselist(const char *buf, struct bitmask *bmp); +int bitmask_displaylist(char *buf, int len, const struct bitmask *bmp); + + + +#endif /*__CPUPOWER_BITMASK__ */ diff --git a/kernel/tools/power/cpupower/utils/helpers/cpuid.c b/kernel/tools/power/cpupower/utils/helpers/cpuid.c new file mode 100644 index 000000000..93b0aa74c --- /dev/null +++ b/kernel/tools/power/cpupower/utils/helpers/cpuid.c @@ -0,0 +1,178 @@ +#include <stdio.h> +#include <errno.h> +#include <string.h> +#include <unistd.h> +#include <stdlib.h> + +#include "helpers/helpers.h" + +static const char *cpu_vendor_table[X86_VENDOR_MAX] = { + "Unknown", "GenuineIntel", "AuthenticAMD", +}; + +#if defined(__i386__) || defined(__x86_64__) + +/* from gcc */ +#include <cpuid.h> + +/* + * CPUID functions returning a single datum + * + * Define unsigned int cpuid_e[abcd]x(unsigned int op) + */ +#define cpuid_func(reg) \ + unsigned int cpuid_##reg(unsigned int op) \ + { \ + unsigned int eax, ebx, ecx, edx; \ + __cpuid(op, eax, ebx, ecx, edx); \ + return reg; \ + } +cpuid_func(eax); +cpuid_func(ebx); +cpuid_func(ecx); +cpuid_func(edx); + +#endif /* defined(__i386__) || defined(__x86_64__) */ + +/* get_cpu_info + * + * Extract CPU vendor, family, model, stepping info from /proc/cpuinfo + * + * Returns 0 on success or a negativ error code + * + * TBD: Should there be a cpuid alternative for this if /proc is not mounted? + */ +int get_cpu_info(unsigned int cpu, struct cpupower_cpu_info *cpu_info) +{ + FILE *fp; + char value[64]; + unsigned int proc, x; + unsigned int unknown = 0xffffff; + unsigned int cpuid_level, ext_cpuid_level; + + int ret = -EINVAL; + + cpu_info->vendor = X86_VENDOR_UNKNOWN; + cpu_info->family = unknown; + cpu_info->model = unknown; + cpu_info->stepping = unknown; + cpu_info->caps = 0; + + fp = fopen("/proc/cpuinfo", "r"); + if (!fp) + return -EIO; + + while (!feof(fp)) { + if (!fgets(value, 64, fp)) + continue; + value[63 - 1] = '\0'; + + if (!strncmp(value, "processor\t: ", 12)) + sscanf(value, "processor\t: %u", &proc); + + if (proc != cpu) + continue; + + /* Get CPU vendor */ + if (!strncmp(value, "vendor_id", 9)) { + for (x = 1; x < X86_VENDOR_MAX; x++) { + if (strstr(value, cpu_vendor_table[x])) + cpu_info->vendor = x; + } + /* Get CPU family, etc. */ + } else if (!strncmp(value, "cpu family\t: ", 13)) { + sscanf(value, "cpu family\t: %u", + &cpu_info->family); + } else if (!strncmp(value, "model\t\t: ", 9)) { + sscanf(value, "model\t\t: %u", + &cpu_info->model); + } else if (!strncmp(value, "stepping\t: ", 10)) { + sscanf(value, "stepping\t: %u", + &cpu_info->stepping); + + /* Exit -> all values must have been set */ + if (cpu_info->vendor == X86_VENDOR_UNKNOWN || + cpu_info->family == unknown || + cpu_info->model == unknown || + cpu_info->stepping == unknown) { + ret = -EINVAL; + goto out; + } + + ret = 0; + goto out; + } + } + ret = -ENODEV; +out: + fclose(fp); + /* Get some useful CPU capabilities from cpuid */ + if (cpu_info->vendor != X86_VENDOR_AMD && + cpu_info->vendor != X86_VENDOR_INTEL) + return ret; + + cpuid_level = cpuid_eax(0); + ext_cpuid_level = cpuid_eax(0x80000000); + + /* Invariant TSC */ + if (ext_cpuid_level >= 0x80000007 && + (cpuid_edx(0x80000007) & (1 << 8))) + cpu_info->caps |= CPUPOWER_CAP_INV_TSC; + + /* Aperf/Mperf registers support */ + if (cpuid_level >= 6 && (cpuid_ecx(6) & 0x1)) + cpu_info->caps |= CPUPOWER_CAP_APERF; + + /* AMD Boost state enable/disable register */ + if (cpu_info->vendor == X86_VENDOR_AMD) { + if (ext_cpuid_level >= 0x80000007 && + (cpuid_edx(0x80000007) & (1 << 9))) + cpu_info->caps |= CPUPOWER_CAP_AMD_CBP; + } + + if (cpu_info->vendor == X86_VENDOR_INTEL) { + if (cpuid_level >= 6 && + (cpuid_eax(6) & (1 << 1))) + cpu_info->caps |= CPUPOWER_CAP_INTEL_IDA; + } + + if (cpu_info->vendor == X86_VENDOR_INTEL) { + /* Intel's perf-bias MSR support */ + if (cpuid_level >= 6 && (cpuid_ecx(6) & (1 << 3))) + cpu_info->caps |= CPUPOWER_CAP_PERF_BIAS; + + /* Intel's Turbo Ratio Limit support */ + if (cpu_info->family == 6) { + switch (cpu_info->model) { + case 0x1A: /* Core i7, Xeon 5500 series + * Bloomfield, Gainstown NHM-EP + */ + case 0x1E: /* Core i7 and i5 Processor + * Clarksfield, Lynnfield, Jasper Forest + */ + case 0x1F: /* Core i7 and i5 Processor - Nehalem */ + case 0x25: /* Westmere Client + * Clarkdale, Arrandale + */ + case 0x2C: /* Westmere EP - Gulftown */ + cpu_info->caps |= CPUPOWER_CAP_HAS_TURBO_RATIO; + case 0x2A: /* SNB */ + case 0x2D: /* SNB Xeon */ + case 0x3A: /* IVB */ + case 0x3E: /* IVB Xeon */ + cpu_info->caps |= CPUPOWER_CAP_HAS_TURBO_RATIO; + cpu_info->caps |= CPUPOWER_CAP_IS_SNB; + break; + case 0x2E: /* Nehalem-EX Xeon - Beckton */ + case 0x2F: /* Westmere-EX Xeon - Eagleton */ + default: + break; + } + } + } + + /* printf("ID: %u - Extid: 0x%x - Caps: 0x%llx\n", + cpuid_level, ext_cpuid_level, cpu_info->caps); + */ + return ret; +} diff --git a/kernel/tools/power/cpupower/utils/helpers/helpers.h b/kernel/tools/power/cpupower/utils/helpers/helpers.h new file mode 100644 index 000000000..aa9e95486 --- /dev/null +++ b/kernel/tools/power/cpupower/utils/helpers/helpers.h @@ -0,0 +1,195 @@ +/* + * (C) 2010,2011 Thomas Renninger <trenn@suse.de>, Novell Inc. + * + * Licensed under the terms of the GNU GPL License version 2. + * + * Miscellaneous helpers which do not fit or are worth + * to put into separate headers + */ + +#ifndef __CPUPOWERUTILS_HELPERS__ +#define __CPUPOWERUTILS_HELPERS__ + +#include <libintl.h> +#include <locale.h> + +#include "helpers/bitmask.h" + +/* Internationalization ****************************/ +#ifdef NLS + +#define _(String) gettext(String) +#ifndef gettext_noop +#define gettext_noop(String) String +#endif +#define N_(String) gettext_noop(String) + +#else /* !NLS */ + +#define _(String) String +#define N_(String) String + +#endif +/* Internationalization ****************************/ + +extern int run_as_root; +extern struct bitmask *cpus_chosen; + +/* Global verbose (-d) stuff *********************************/ +/* + * define DEBUG via global Makefile variable + * Debug output is sent to stderr, do: + * cpupower monitor 2>/tmp/debug + * to split debug output away from normal output +*/ +#ifdef DEBUG +extern int be_verbose; + +#define dprint(fmt, ...) { \ + if (be_verbose) { \ + fprintf(stderr, "%s: " fmt, \ + __func__, ##__VA_ARGS__); \ + } \ + } +#else +static inline void dprint(const char *fmt, ...) { } +#endif +extern int be_verbose; +/* Global verbose (-v) stuff *********************************/ + +/* cpuid and cpuinfo helpers **************************/ +enum cpupower_cpu_vendor {X86_VENDOR_UNKNOWN = 0, X86_VENDOR_INTEL, + X86_VENDOR_AMD, X86_VENDOR_MAX}; + +#define CPUPOWER_CAP_INV_TSC 0x00000001 +#define CPUPOWER_CAP_APERF 0x00000002 +#define CPUPOWER_CAP_AMD_CBP 0x00000004 +#define CPUPOWER_CAP_PERF_BIAS 0x00000008 +#define CPUPOWER_CAP_HAS_TURBO_RATIO 0x00000010 +#define CPUPOWER_CAP_IS_SNB 0x00000020 +#define CPUPOWER_CAP_INTEL_IDA 0x00000040 + +#define MAX_HW_PSTATES 10 + +struct cpupower_cpu_info { + enum cpupower_cpu_vendor vendor; + unsigned int family; + unsigned int model; + unsigned int stepping; + /* CPU capabilities read out from cpuid */ + unsigned long long caps; +}; + +/* get_cpu_info + * + * Extract CPU vendor, family, model, stepping info from /proc/cpuinfo + * + * Returns 0 on success or a negativ error code + * Only used on x86, below global's struct values are zero/unknown on + * other archs + */ +extern int get_cpu_info(unsigned int cpu, struct cpupower_cpu_info *cpu_info); +extern struct cpupower_cpu_info cpupower_cpu_info; +/* cpuid and cpuinfo helpers **************************/ + +struct cpuid_core_info { + int pkg; + int core; + int cpu; + + /* flags */ + unsigned int is_online:1; +}; + +/* CPU topology/hierarchy parsing ******************/ +struct cpupower_topology { + /* Amount of CPU cores, packages and threads per core in the system */ + unsigned int cores; + unsigned int pkgs; + unsigned int threads; /* per core */ + + /* Array gets mallocated with cores entries, holding per core info */ + struct cpuid_core_info *core_info; +}; + +extern int get_cpu_topology(struct cpupower_topology *cpu_top); +extern void cpu_topology_release(struct cpupower_topology cpu_top); + +/* CPU topology/hierarchy parsing ******************/ + +/* X86 ONLY ****************************************/ +#if defined(__i386__) || defined(__x86_64__) + +#include <pci/pci.h> + +/* Read/Write msr ****************************/ +extern int read_msr(int cpu, unsigned int idx, unsigned long long *val); +extern int write_msr(int cpu, unsigned int idx, unsigned long long val); + +extern int msr_intel_set_perf_bias(unsigned int cpu, unsigned int val); +extern int msr_intel_get_perf_bias(unsigned int cpu); +extern unsigned long long msr_intel_get_turbo_ratio(unsigned int cpu); + +/* Read/Write msr ****************************/ + +/* PCI stuff ****************************/ +extern int amd_pci_get_num_boost_states(int *active, int *states); +extern struct pci_dev *pci_acc_init(struct pci_access **pacc, int domain, + int bus, int slot, int func, int vendor, + int dev); +extern struct pci_dev *pci_slot_func_init(struct pci_access **pacc, + int slot, int func); + +/* PCI stuff ****************************/ + +/* AMD HW pstate decoding **************************/ + +extern int decode_pstates(unsigned int cpu, unsigned int cpu_family, + int boost_states, unsigned long *pstates, int *no); + +/* AMD HW pstate decoding **************************/ + +extern int cpufreq_has_boost_support(unsigned int cpu, int *support, + int *active, int * states); +/* + * CPUID functions returning a single datum + */ +unsigned int cpuid_eax(unsigned int op); +unsigned int cpuid_ebx(unsigned int op); +unsigned int cpuid_ecx(unsigned int op); +unsigned int cpuid_edx(unsigned int op); + +/* cpuid and cpuinfo helpers **************************/ +/* X86 ONLY ********************************************/ +#else +static inline int decode_pstates(unsigned int cpu, unsigned int cpu_family, + int boost_states, unsigned long *pstates, + int *no) +{ return -1; }; + +static inline int read_msr(int cpu, unsigned int idx, unsigned long long *val) +{ return -1; }; +static inline int write_msr(int cpu, unsigned int idx, unsigned long long val) +{ return -1; }; +static inline int msr_intel_set_perf_bias(unsigned int cpu, unsigned int val) +{ return -1; }; +static inline int msr_intel_get_perf_bias(unsigned int cpu) +{ return -1; }; +static inline unsigned long long msr_intel_get_turbo_ratio(unsigned int cpu) +{ return 0; }; + +/* Read/Write msr ****************************/ + +static inline int cpufreq_has_boost_support(unsigned int cpu, int *support, + int *active, int * states) +{ return -1; } + +/* cpuid and cpuinfo helpers **************************/ + +static inline unsigned int cpuid_eax(unsigned int op) { return 0; }; +static inline unsigned int cpuid_ebx(unsigned int op) { return 0; }; +static inline unsigned int cpuid_ecx(unsigned int op) { return 0; }; +static inline unsigned int cpuid_edx(unsigned int op) { return 0; }; +#endif /* defined(__i386__) || defined(__x86_64__) */ + +#endif /* __CPUPOWERUTILS_HELPERS__ */ diff --git a/kernel/tools/power/cpupower/utils/helpers/misc.c b/kernel/tools/power/cpupower/utils/helpers/misc.c new file mode 100644 index 000000000..1609243f5 --- /dev/null +++ b/kernel/tools/power/cpupower/utils/helpers/misc.c @@ -0,0 +1,27 @@ +#if defined(__i386__) || defined(__x86_64__) + +#include "helpers/helpers.h" + +int cpufreq_has_boost_support(unsigned int cpu, int *support, int *active, + int *states) +{ + struct cpupower_cpu_info cpu_info; + int ret; + + *support = *active = *states = 0; + + ret = get_cpu_info(0, &cpu_info); + if (ret) + return ret; + + if (cpupower_cpu_info.caps & CPUPOWER_CAP_AMD_CBP) { + *support = 1; + amd_pci_get_num_boost_states(active, states); + if (ret <= 0) + return ret; + *support = 1; + } else if (cpupower_cpu_info.caps & CPUPOWER_CAP_INTEL_IDA) + *support = *active = 1; + return 0; +} +#endif /* #if defined(__i386__) || defined(__x86_64__) */ diff --git a/kernel/tools/power/cpupower/utils/helpers/msr.c b/kernel/tools/power/cpupower/utils/helpers/msr.c new file mode 100644 index 000000000..31a4b24a8 --- /dev/null +++ b/kernel/tools/power/cpupower/utils/helpers/msr.c @@ -0,0 +1,115 @@ +#if defined(__i386__) || defined(__x86_64__) + +#include <fcntl.h> +#include <stdio.h> +#include <unistd.h> +#include <stdint.h> + +#include "helpers/helpers.h" + +/* Intel specific MSRs */ +#define MSR_IA32_PERF_STATUS 0x198 +#define MSR_IA32_MISC_ENABLES 0x1a0 +#define MSR_IA32_ENERGY_PERF_BIAS 0x1b0 +#define MSR_NEHALEM_TURBO_RATIO_LIMIT 0x1ad + +/* + * read_msr + * + * Will return 0 on success and -1 on failure. + * Possible errno values could be: + * EFAULT -If the read/write did not fully complete + * EIO -If the CPU does not support MSRs + * ENXIO -If the CPU does not exist + */ + +int read_msr(int cpu, unsigned int idx, unsigned long long *val) +{ + int fd; + char msr_file_name[64]; + + sprintf(msr_file_name, "/dev/cpu/%d/msr", cpu); + fd = open(msr_file_name, O_RDONLY); + if (fd < 0) + return -1; + if (lseek(fd, idx, SEEK_CUR) == -1) + goto err; + if (read(fd, val, sizeof *val) != sizeof *val) + goto err; + close(fd); + return 0; + err: + close(fd); + return -1; +} + +/* + * write_msr + * + * Will return 0 on success and -1 on failure. + * Possible errno values could be: + * EFAULT -If the read/write did not fully complete + * EIO -If the CPU does not support MSRs + * ENXIO -If the CPU does not exist + */ +int write_msr(int cpu, unsigned int idx, unsigned long long val) +{ + int fd; + char msr_file_name[64]; + + sprintf(msr_file_name, "/dev/cpu/%d/msr", cpu); + fd = open(msr_file_name, O_WRONLY); + if (fd < 0) + return -1; + if (lseek(fd, idx, SEEK_CUR) == -1) + goto err; + if (write(fd, &val, sizeof val) != sizeof val) + goto err; + close(fd); + return 0; + err: + close(fd); + return -1; +} + +int msr_intel_get_perf_bias(unsigned int cpu) +{ + unsigned long long val; + int ret; + + if (!(cpupower_cpu_info.caps & CPUPOWER_CAP_PERF_BIAS)) + return -1; + + ret = read_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &val); + if (ret) + return ret; + return val; +} + +int msr_intel_set_perf_bias(unsigned int cpu, unsigned int val) +{ + int ret; + + if (!(cpupower_cpu_info.caps & CPUPOWER_CAP_PERF_BIAS)) + return -1; + + ret = write_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, val); + if (ret) + return ret; + return 0; +} + +unsigned long long msr_intel_get_turbo_ratio(unsigned int cpu) +{ + unsigned long long val; + int ret; + + if (!(cpupower_cpu_info.caps & CPUPOWER_CAP_HAS_TURBO_RATIO)) + return -1; + + ret = read_msr(cpu, MSR_NEHALEM_TURBO_RATIO_LIMIT, &val); + if (ret) + return ret; + return val; +} +#endif diff --git a/kernel/tools/power/cpupower/utils/helpers/pci.c b/kernel/tools/power/cpupower/utils/helpers/pci.c new file mode 100644 index 000000000..8b278983c --- /dev/null +++ b/kernel/tools/power/cpupower/utils/helpers/pci.c @@ -0,0 +1,62 @@ +#if defined(__i386__) || defined(__x86_64__) + +#include <helpers/helpers.h> + +/* + * pci_acc_init + * + * PCI access helper function depending on libpci + * + * **pacc : if a valid pci_dev is returned + * *pacc must be passed to pci_acc_cleanup to free it + * + * domain: domain + * bus: bus + * slot: slot + * func: func + * vendor: vendor + * device: device + * Pass -1 for one of the six above to match any + * + * Returns : + * struct pci_dev which can be used with pci_{read,write}_* functions + * to access the PCI config space of matching pci devices + */ +struct pci_dev *pci_acc_init(struct pci_access **pacc, int domain, int bus, + int slot, int func, int vendor, int dev) +{ + struct pci_filter filter_nb_link; + struct pci_dev *device; + + *pacc = pci_alloc(); + if (*pacc == NULL) + return NULL; + + pci_filter_init(*pacc, &filter_nb_link); + filter_nb_link.domain = domain; + filter_nb_link.bus = bus; + filter_nb_link.slot = slot; + filter_nb_link.func = func; + filter_nb_link.vendor = vendor; + filter_nb_link.device = dev; + + pci_init(*pacc); + pci_scan_bus(*pacc); + + for (device = (*pacc)->devices; device; device = device->next) { + if (pci_filter_match(&filter_nb_link, device)) + return device; + } + pci_cleanup(*pacc); + return NULL; +} + +/* Typically one wants to get a specific slot(device)/func of the root domain + and bus */ +struct pci_dev *pci_slot_func_init(struct pci_access **pacc, int slot, + int func) +{ + return pci_acc_init(pacc, 0, 0, slot, func, -1, -1); +} + +#endif /* defined(__i386__) || defined(__x86_64__) */ diff --git a/kernel/tools/power/cpupower/utils/helpers/sysfs.c b/kernel/tools/power/cpupower/utils/helpers/sysfs.c new file mode 100644 index 000000000..4e8fe2c7b --- /dev/null +++ b/kernel/tools/power/cpupower/utils/helpers/sysfs.c @@ -0,0 +1,472 @@ +/* + * (C) 2004-2009 Dominik Brodowski <linux@dominikbrodowski.de> + * (C) 2011 Thomas Renninger <trenn@novell.com> Novell Inc. + * + * Licensed under the terms of the GNU GPL License version 2. + */ + +#include <stdio.h> +#include <errno.h> +#include <stdlib.h> +#include <string.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> + +#include "helpers/sysfs.h" + +unsigned int sysfs_read_file(const char *path, char *buf, size_t buflen) +{ + int fd; + ssize_t numread; + + fd = open(path, O_RDONLY); + if (fd == -1) + return 0; + + numread = read(fd, buf, buflen - 1); + if (numread < 1) { + close(fd); + return 0; + } + + buf[numread] = '\0'; + close(fd); + + return (unsigned int) numread; +} + +/* + * Detect whether a CPU is online + * + * Returns: + * 1 -> if CPU is online + * 0 -> if CPU is offline + * negative errno values in error case + */ +int sysfs_is_cpu_online(unsigned int cpu) +{ + char path[SYSFS_PATH_MAX]; + int fd; + ssize_t numread; + unsigned long long value; + char linebuf[MAX_LINE_LEN]; + char *endp; + struct stat statbuf; + + snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u", cpu); + + if (stat(path, &statbuf) != 0) + return 0; + + /* + * kernel without CONFIG_HOTPLUG_CPU + * -> cpuX directory exists, but not cpuX/online file + */ + snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/online", cpu); + if (stat(path, &statbuf) != 0) + return 1; + + fd = open(path, O_RDONLY); + if (fd == -1) + return -errno; + + numread = read(fd, linebuf, MAX_LINE_LEN - 1); + if (numread < 1) { + close(fd); + return -EIO; + } + linebuf[numread] = '\0'; + close(fd); + + value = strtoull(linebuf, &endp, 0); + if (value > 1) + return -EINVAL; + + return value; +} + +/* CPUidle idlestate specific /sys/devices/system/cpu/cpuX/cpuidle/ access */ + + +/* CPUidle idlestate specific /sys/devices/system/cpu/cpuX/cpuidle/ access */ + +/* + * helper function to check whether a file under "../cpuX/cpuidle/stateX/" dir + * exists. + * For example the functionality to disable c-states was introduced in later + * kernel versions, this function can be used to explicitly check for this + * feature. + * + * returns 1 if the file exists, 0 otherwise. + */ +unsigned int sysfs_idlestate_file_exists(unsigned int cpu, + unsigned int idlestate, + const char *fname) +{ + char path[SYSFS_PATH_MAX]; + struct stat statbuf; + + + snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/cpuidle/state%u/%s", + cpu, idlestate, fname); + if (stat(path, &statbuf) != 0) + return 0; + return 1; +} + +/* + * helper function to read file from /sys into given buffer + * fname is a relative path under "cpuX/cpuidle/stateX/" dir + * cstates starting with 0, C0 is not counted as cstate. + * This means if you want C1 info, pass 0 as idlestate param + */ +unsigned int sysfs_idlestate_read_file(unsigned int cpu, unsigned int idlestate, + const char *fname, char *buf, size_t buflen) +{ + char path[SYSFS_PATH_MAX]; + int fd; + ssize_t numread; + + snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/cpuidle/state%u/%s", + cpu, idlestate, fname); + + fd = open(path, O_RDONLY); + if (fd == -1) + return 0; + + numread = read(fd, buf, buflen - 1); + if (numread < 1) { + close(fd); + return 0; + } + + buf[numread] = '\0'; + close(fd); + + return (unsigned int) numread; +} + +/* + * helper function to write a new value to a /sys file + * fname is a relative path under "../cpuX/cpuidle/cstateY/" dir + * + * Returns the number of bytes written or 0 on error + */ +static +unsigned int sysfs_idlestate_write_file(unsigned int cpu, + unsigned int idlestate, + const char *fname, + const char *value, size_t len) +{ + char path[SYSFS_PATH_MAX]; + int fd; + ssize_t numwrite; + + snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/cpuidle/state%u/%s", + cpu, idlestate, fname); + + fd = open(path, O_WRONLY); + if (fd == -1) + return 0; + + numwrite = write(fd, value, len); + if (numwrite < 1) { + close(fd); + return 0; + } + + close(fd); + + return (unsigned int) numwrite; +} + +/* read access to files which contain one numeric value */ + +enum idlestate_value { + IDLESTATE_USAGE, + IDLESTATE_POWER, + IDLESTATE_LATENCY, + IDLESTATE_TIME, + IDLESTATE_DISABLE, + MAX_IDLESTATE_VALUE_FILES +}; + +static const char *idlestate_value_files[MAX_IDLESTATE_VALUE_FILES] = { + [IDLESTATE_USAGE] = "usage", + [IDLESTATE_POWER] = "power", + [IDLESTATE_LATENCY] = "latency", + [IDLESTATE_TIME] = "time", + [IDLESTATE_DISABLE] = "disable", +}; + +static unsigned long long sysfs_idlestate_get_one_value(unsigned int cpu, + unsigned int idlestate, + enum idlestate_value which) +{ + unsigned long long value; + unsigned int len; + char linebuf[MAX_LINE_LEN]; + char *endp; + + if (which >= MAX_IDLESTATE_VALUE_FILES) + return 0; + + len = sysfs_idlestate_read_file(cpu, idlestate, + idlestate_value_files[which], + linebuf, sizeof(linebuf)); + if (len == 0) + return 0; + + value = strtoull(linebuf, &endp, 0); + + if (endp == linebuf || errno == ERANGE) + return 0; + + return value; +} + +/* read access to files which contain one string */ + +enum idlestate_string { + IDLESTATE_DESC, + IDLESTATE_NAME, + MAX_IDLESTATE_STRING_FILES +}; + +static const char *idlestate_string_files[MAX_IDLESTATE_STRING_FILES] = { + [IDLESTATE_DESC] = "desc", + [IDLESTATE_NAME] = "name", +}; + + +static char *sysfs_idlestate_get_one_string(unsigned int cpu, + unsigned int idlestate, + enum idlestate_string which) +{ + char linebuf[MAX_LINE_LEN]; + char *result; + unsigned int len; + + if (which >= MAX_IDLESTATE_STRING_FILES) + return NULL; + + len = sysfs_idlestate_read_file(cpu, idlestate, + idlestate_string_files[which], + linebuf, sizeof(linebuf)); + if (len == 0) + return NULL; + + result = strdup(linebuf); + if (result == NULL) + return NULL; + + if (result[strlen(result) - 1] == '\n') + result[strlen(result) - 1] = '\0'; + + return result; +} + +/* + * Returns: + * 1 if disabled + * 0 if enabled + * -1 if idlestate is not available + * -2 if disabling is not supported by the kernel + */ +int sysfs_is_idlestate_disabled(unsigned int cpu, + unsigned int idlestate) +{ + if (sysfs_get_idlestate_count(cpu) <= idlestate) + return -1; + + if (!sysfs_idlestate_file_exists(cpu, idlestate, + idlestate_value_files[IDLESTATE_DISABLE])) + return -2; + return sysfs_idlestate_get_one_value(cpu, idlestate, IDLESTATE_DISABLE); +} + +/* + * Pass 1 as last argument to disable or 0 to enable the state + * Returns: + * 0 on success + * negative values on error, for example: + * -1 if idlestate is not available + * -2 if disabling is not supported by the kernel + * -3 No write access to disable/enable C-states + */ +int sysfs_idlestate_disable(unsigned int cpu, + unsigned int idlestate, + unsigned int disable) +{ + char value[SYSFS_PATH_MAX]; + int bytes_written; + + if (sysfs_get_idlestate_count(cpu) <= idlestate) + return -1; + + if (!sysfs_idlestate_file_exists(cpu, idlestate, + idlestate_value_files[IDLESTATE_DISABLE])) + return -2; + + snprintf(value, SYSFS_PATH_MAX, "%u", disable); + + bytes_written = sysfs_idlestate_write_file(cpu, idlestate, "disable", + value, sizeof(disable)); + if (bytes_written) + return 0; + return -3; +} + +unsigned long sysfs_get_idlestate_latency(unsigned int cpu, + unsigned int idlestate) +{ + return sysfs_idlestate_get_one_value(cpu, idlestate, IDLESTATE_LATENCY); +} + +unsigned long sysfs_get_idlestate_usage(unsigned int cpu, + unsigned int idlestate) +{ + return sysfs_idlestate_get_one_value(cpu, idlestate, IDLESTATE_USAGE); +} + +unsigned long long sysfs_get_idlestate_time(unsigned int cpu, + unsigned int idlestate) +{ + return sysfs_idlestate_get_one_value(cpu, idlestate, IDLESTATE_TIME); +} + +char *sysfs_get_idlestate_name(unsigned int cpu, unsigned int idlestate) +{ + return sysfs_idlestate_get_one_string(cpu, idlestate, IDLESTATE_NAME); +} + +char *sysfs_get_idlestate_desc(unsigned int cpu, unsigned int idlestate) +{ + return sysfs_idlestate_get_one_string(cpu, idlestate, IDLESTATE_DESC); +} + +/* + * Returns number of supported C-states of CPU core cpu + * Negativ in error case + * Zero if cpuidle does not export any C-states + */ +unsigned int sysfs_get_idlestate_count(unsigned int cpu) +{ + char file[SYSFS_PATH_MAX]; + struct stat statbuf; + int idlestates = 1; + + + snprintf(file, SYSFS_PATH_MAX, PATH_TO_CPU "cpuidle"); + if (stat(file, &statbuf) != 0 || !S_ISDIR(statbuf.st_mode)) + return 0; + + snprintf(file, SYSFS_PATH_MAX, PATH_TO_CPU "cpu%u/cpuidle/state0", cpu); + if (stat(file, &statbuf) != 0 || !S_ISDIR(statbuf.st_mode)) + return 0; + + while (stat(file, &statbuf) == 0 && S_ISDIR(statbuf.st_mode)) { + snprintf(file, SYSFS_PATH_MAX, PATH_TO_CPU + "cpu%u/cpuidle/state%d", cpu, idlestates); + idlestates++; + } + idlestates--; + return idlestates; +} + +/* CPUidle general /sys/devices/system/cpu/cpuidle/ sysfs access ********/ + +/* + * helper function to read file from /sys into given buffer + * fname is a relative path under "cpu/cpuidle/" dir + */ +static unsigned int sysfs_cpuidle_read_file(const char *fname, char *buf, + size_t buflen) +{ + char path[SYSFS_PATH_MAX]; + + snprintf(path, sizeof(path), PATH_TO_CPU "cpuidle/%s", fname); + + return sysfs_read_file(path, buf, buflen); +} + + + +/* read access to files which contain one string */ + +enum cpuidle_string { + CPUIDLE_GOVERNOR, + CPUIDLE_GOVERNOR_RO, + CPUIDLE_DRIVER, + MAX_CPUIDLE_STRING_FILES +}; + +static const char *cpuidle_string_files[MAX_CPUIDLE_STRING_FILES] = { + [CPUIDLE_GOVERNOR] = "current_governor", + [CPUIDLE_GOVERNOR_RO] = "current_governor_ro", + [CPUIDLE_DRIVER] = "current_driver", +}; + + +static char *sysfs_cpuidle_get_one_string(enum cpuidle_string which) +{ + char linebuf[MAX_LINE_LEN]; + char *result; + unsigned int len; + + if (which >= MAX_CPUIDLE_STRING_FILES) + return NULL; + + len = sysfs_cpuidle_read_file(cpuidle_string_files[which], + linebuf, sizeof(linebuf)); + if (len == 0) + return NULL; + + result = strdup(linebuf); + if (result == NULL) + return NULL; + + if (result[strlen(result) - 1] == '\n') + result[strlen(result) - 1] = '\0'; + + return result; +} + +char *sysfs_get_cpuidle_governor(void) +{ + char *tmp = sysfs_cpuidle_get_one_string(CPUIDLE_GOVERNOR_RO); + if (!tmp) + return sysfs_cpuidle_get_one_string(CPUIDLE_GOVERNOR); + else + return tmp; +} + +char *sysfs_get_cpuidle_driver(void) +{ + return sysfs_cpuidle_get_one_string(CPUIDLE_DRIVER); +} +/* CPUidle idlestate specific /sys/devices/system/cpu/cpuX/cpuidle/ access */ + +/* + * Get sched_mc or sched_smt settings + * Pass "mc" or "smt" as argument + * + * Returns negative value on failure + */ +int sysfs_get_sched(const char *smt_mc) +{ + return -ENODEV; +} + +/* + * Get sched_mc or sched_smt settings + * Pass "mc" or "smt" as argument + * + * Returns negative value on failure + */ +int sysfs_set_sched(const char *smt_mc, int val) +{ + return -ENODEV; +} diff --git a/kernel/tools/power/cpupower/utils/helpers/sysfs.h b/kernel/tools/power/cpupower/utils/helpers/sysfs.h new file mode 100644 index 000000000..d28f11fed --- /dev/null +++ b/kernel/tools/power/cpupower/utils/helpers/sysfs.h @@ -0,0 +1,38 @@ +#ifndef __CPUPOWER_HELPERS_SYSFS_H__ +#define __CPUPOWER_HELPERS_SYSFS_H__ + +#define PATH_TO_CPU "/sys/devices/system/cpu/" +#define MAX_LINE_LEN 255 +#define SYSFS_PATH_MAX 255 + +extern unsigned int sysfs_read_file(const char *path, char *buf, size_t buflen); + +extern unsigned int sysfs_idlestate_file_exists(unsigned int cpu, + unsigned int idlestate, + const char *fname); + +extern int sysfs_is_cpu_online(unsigned int cpu); + +extern int sysfs_is_idlestate_disabled(unsigned int cpu, + unsigned int idlestate); +extern int sysfs_idlestate_disable(unsigned int cpu, unsigned int idlestate, + unsigned int disable); +extern unsigned long sysfs_get_idlestate_latency(unsigned int cpu, + unsigned int idlestate); +extern unsigned long sysfs_get_idlestate_usage(unsigned int cpu, + unsigned int idlestate); +extern unsigned long long sysfs_get_idlestate_time(unsigned int cpu, + unsigned int idlestate); +extern char *sysfs_get_idlestate_name(unsigned int cpu, + unsigned int idlestate); +extern char *sysfs_get_idlestate_desc(unsigned int cpu, + unsigned int idlestate); +extern unsigned int sysfs_get_idlestate_count(unsigned int cpu); + +extern char *sysfs_get_cpuidle_governor(void); +extern char *sysfs_get_cpuidle_driver(void); + +extern int sysfs_get_sched(const char *smt_mc); +extern int sysfs_set_sched(const char *smt_mc, int val); + +#endif /* __CPUPOWER_HELPERS_SYSFS_H__ */ diff --git a/kernel/tools/power/cpupower/utils/helpers/topology.c b/kernel/tools/power/cpupower/utils/helpers/topology.c new file mode 100644 index 000000000..c13120af5 --- /dev/null +++ b/kernel/tools/power/cpupower/utils/helpers/topology.c @@ -0,0 +1,116 @@ +/* + * (C) 2010,2011 Thomas Renninger <trenn@suse.de>, Novell Inc. + * + * Licensed under the terms of the GNU GPL License version 2. + * + * ToDo: Needs to be done more properly for AMD/Intel specifics + */ + +/* Helper struct for qsort, must be in sync with cpupower_topology.cpu_info */ +/* Be careful: Need to pass unsigned to the sort, so that offlined cores are + in the end, but double check for -1 for offlined cpus at other places */ + +#include <stdlib.h> +#include <stdio.h> +#include <unistd.h> +#include <errno.h> +#include <fcntl.h> + +#include <helpers/helpers.h> +#include <helpers/sysfs.h> + +/* returns -1 on failure, 0 on success */ +static int sysfs_topology_read_file(unsigned int cpu, const char *fname, int *result) +{ + char linebuf[MAX_LINE_LEN]; + char *endp; + char path[SYSFS_PATH_MAX]; + + snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/topology/%s", + cpu, fname); + if (sysfs_read_file(path, linebuf, MAX_LINE_LEN) == 0) + return -1; + *result = strtol(linebuf, &endp, 0); + if (endp == linebuf || errno == ERANGE) + return -1; + return 0; +} + +static int __compare(const void *t1, const void *t2) +{ + struct cpuid_core_info *top1 = (struct cpuid_core_info *)t1; + struct cpuid_core_info *top2 = (struct cpuid_core_info *)t2; + if (top1->pkg < top2->pkg) + return -1; + else if (top1->pkg > top2->pkg) + return 1; + else if (top1->core < top2->core) + return -1; + else if (top1->core > top2->core) + return 1; + else if (top1->cpu < top2->cpu) + return -1; + else if (top1->cpu > top2->cpu) + return 1; + else + return 0; +} + +/* + * Returns amount of cpus, negative on error, cpu_top must be + * passed to cpu_topology_release to free resources + * + * Array is sorted after ->pkg, ->core, then ->cpu + */ +int get_cpu_topology(struct cpupower_topology *cpu_top) +{ + int cpu, last_pkg, cpus = sysconf(_SC_NPROCESSORS_CONF); + + cpu_top->core_info = malloc(sizeof(struct cpuid_core_info) * cpus); + if (cpu_top->core_info == NULL) + return -ENOMEM; + cpu_top->pkgs = cpu_top->cores = 0; + for (cpu = 0; cpu < cpus; cpu++) { + cpu_top->core_info[cpu].cpu = cpu; + cpu_top->core_info[cpu].is_online = sysfs_is_cpu_online(cpu); + if(sysfs_topology_read_file( + cpu, + "physical_package_id", + &(cpu_top->core_info[cpu].pkg)) < 0) + return -1; + if(sysfs_topology_read_file( + cpu, + "core_id", + &(cpu_top->core_info[cpu].core)) < 0) + return -1; + } + + qsort(cpu_top->core_info, cpus, sizeof(struct cpuid_core_info), + __compare); + + /* Count the number of distinct pkgs values. This works + because the primary sort of the core_info struct was just + done by pkg value. */ + last_pkg = cpu_top->core_info[0].pkg; + for(cpu = 1; cpu < cpus; cpu++) { + if(cpu_top->core_info[cpu].pkg != last_pkg) { + last_pkg = cpu_top->core_info[cpu].pkg; + cpu_top->pkgs++; + } + } + cpu_top->pkgs++; + + /* Intel's cores count is not consecutively numbered, there may + * be a core_id of 3, but none of 2. Assume there always is 0 + * Get amount of cores by counting duplicates in a package + for (cpu = 0; cpu_top->core_info[cpu].pkg = 0 && cpu < cpus; cpu++) { + if (cpu_top->core_info[cpu].core == 0) + cpu_top->cores++; + */ + return cpus; +} + +void cpu_topology_release(struct cpupower_topology cpu_top) +{ + free(cpu_top.core_info); +} diff --git a/kernel/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c b/kernel/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c new file mode 100644 index 000000000..2116df9ad --- /dev/null +++ b/kernel/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c @@ -0,0 +1,335 @@ +/* + * (C) 2010,2011 Thomas Renninger <trenn@suse.de>, Novell Inc. + * + * Licensed under the terms of the GNU GPL License version 2. + * + * PCI initialization based on example code from: + * Andreas Herrmann <andreas.herrmann3@amd.com> + */ + +#if defined(__i386__) || defined(__x86_64__) + +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <time.h> +#include <string.h> + +#include <pci/pci.h> + +#include "idle_monitor/cpupower-monitor.h" +#include "helpers/helpers.h" + +#define PCI_NON_PC0_OFFSET 0xb0 +#define PCI_PC1_OFFSET 0xb4 +#define PCI_PC6_OFFSET 0xb8 + +#define PCI_MONITOR_ENABLE_REG 0xe0 + +#define PCI_NON_PC0_ENABLE_BIT 0 +#define PCI_PC1_ENABLE_BIT 1 +#define PCI_PC6_ENABLE_BIT 2 + +#define PCI_NBP1_STAT_OFFSET 0x98 +#define PCI_NBP1_ACTIVE_BIT 2 +#define PCI_NBP1_ENTERED_BIT 1 + +#define PCI_NBP1_CAP_OFFSET 0x90 +#define PCI_NBP1_CAPABLE_BIT 31 + +#define OVERFLOW_MS 343597 /* 32 bit register filled at 12500 HZ + (1 tick per 80ns) */ + +enum amd_fam14h_states {NON_PC0 = 0, PC1, PC6, NBP1, + AMD_FAM14H_STATE_NUM}; + +static int fam14h_get_count_percent(unsigned int self_id, double *percent, + unsigned int cpu); +static int fam14h_nbp1_count(unsigned int id, unsigned long long *count, + unsigned int cpu); + +static cstate_t amd_fam14h_cstates[AMD_FAM14H_STATE_NUM] = { + { + .name = "!PC0", + .desc = N_("Package in sleep state (PC1 or deeper)"), + .id = NON_PC0, + .range = RANGE_PACKAGE, + .get_count_percent = fam14h_get_count_percent, + }, + { + .name = "PC1", + .desc = N_("Processor Package C1"), + .id = PC1, + .range = RANGE_PACKAGE, + .get_count_percent = fam14h_get_count_percent, + }, + { + .name = "PC6", + .desc = N_("Processor Package C6"), + .id = PC6, + .range = RANGE_PACKAGE, + .get_count_percent = fam14h_get_count_percent, + }, + { + .name = "NBP1", + .desc = N_("North Bridge P1 boolean counter (returns 0 or 1)"), + .id = NBP1, + .range = RANGE_PACKAGE, + .get_count = fam14h_nbp1_count, + }, +}; + +static struct pci_access *pci_acc; +static struct pci_dev *amd_fam14h_pci_dev; +static int nbp1_entered; + +struct timespec start_time; +static unsigned long long timediff; + +#ifdef DEBUG +struct timespec dbg_time; +long dbg_timediff; +#endif + +static unsigned long long *previous_count[AMD_FAM14H_STATE_NUM]; +static unsigned long long *current_count[AMD_FAM14H_STATE_NUM]; + +static int amd_fam14h_get_pci_info(struct cstate *state, + unsigned int *pci_offset, + unsigned int *enable_bit, + unsigned int cpu) +{ + switch (state->id) { + case NON_PC0: + *enable_bit = PCI_NON_PC0_ENABLE_BIT; + *pci_offset = PCI_NON_PC0_OFFSET; + break; + case PC1: + *enable_bit = PCI_PC1_ENABLE_BIT; + *pci_offset = PCI_PC1_OFFSET; + break; + case PC6: + *enable_bit = PCI_PC6_ENABLE_BIT; + *pci_offset = PCI_PC6_OFFSET; + break; + case NBP1: + *enable_bit = PCI_NBP1_ENTERED_BIT; + *pci_offset = PCI_NBP1_STAT_OFFSET; + break; + default: + return -1; + }; + return 0; +} + +static int amd_fam14h_init(cstate_t *state, unsigned int cpu) +{ + int enable_bit, pci_offset, ret; + uint32_t val; + + ret = amd_fam14h_get_pci_info(state, &pci_offset, &enable_bit, cpu); + if (ret) + return ret; + + /* NBP1 needs extra treating -> write 1 to D18F6x98 bit 1 for init */ + if (state->id == NBP1) { + val = pci_read_long(amd_fam14h_pci_dev, pci_offset); + val |= 1 << enable_bit; + val = pci_write_long(amd_fam14h_pci_dev, pci_offset, val); + return ret; + } + + /* Enable monitor */ + val = pci_read_long(amd_fam14h_pci_dev, PCI_MONITOR_ENABLE_REG); + dprint("Init %s: read at offset: 0x%x val: %u\n", state->name, + PCI_MONITOR_ENABLE_REG, (unsigned int) val); + val |= 1 << enable_bit; + pci_write_long(amd_fam14h_pci_dev, PCI_MONITOR_ENABLE_REG, val); + + dprint("Init %s: offset: 0x%x enable_bit: %d - val: %u (%u)\n", + state->name, PCI_MONITOR_ENABLE_REG, enable_bit, + (unsigned int) val, cpu); + + /* Set counter to zero */ + pci_write_long(amd_fam14h_pci_dev, pci_offset, 0); + previous_count[state->id][cpu] = 0; + + return 0; +} + +static int amd_fam14h_disable(cstate_t *state, unsigned int cpu) +{ + int enable_bit, pci_offset, ret; + uint32_t val; + + ret = amd_fam14h_get_pci_info(state, &pci_offset, &enable_bit, cpu); + if (ret) + return ret; + + val = pci_read_long(amd_fam14h_pci_dev, pci_offset); + dprint("%s: offset: 0x%x %u\n", state->name, pci_offset, val); + if (state->id == NBP1) { + /* was the bit whether NBP1 got entered set? */ + nbp1_entered = (val & (1 << PCI_NBP1_ACTIVE_BIT)) | + (val & (1 << PCI_NBP1_ENTERED_BIT)); + + dprint("NBP1 was %sentered - 0x%x - enable_bit: " + "%d - pci_offset: 0x%x\n", + nbp1_entered ? "" : "not ", + val, enable_bit, pci_offset); + return ret; + } + current_count[state->id][cpu] = val; + + dprint("%s: Current - %llu (%u)\n", state->name, + current_count[state->id][cpu], cpu); + dprint("%s: Previous - %llu (%u)\n", state->name, + previous_count[state->id][cpu], cpu); + + val = pci_read_long(amd_fam14h_pci_dev, PCI_MONITOR_ENABLE_REG); + val &= ~(1 << enable_bit); + pci_write_long(amd_fam14h_pci_dev, PCI_MONITOR_ENABLE_REG, val); + + return 0; +} + +static int fam14h_nbp1_count(unsigned int id, unsigned long long *count, + unsigned int cpu) +{ + if (id == NBP1) { + if (nbp1_entered) + *count = 1; + else + *count = 0; + return 0; + } + return -1; +} +static int fam14h_get_count_percent(unsigned int id, double *percent, + unsigned int cpu) +{ + unsigned long diff; + + if (id >= AMD_FAM14H_STATE_NUM) + return -1; + /* residency count in 80ns -> divide through 12.5 to get us residency */ + diff = current_count[id][cpu] - previous_count[id][cpu]; + + if (timediff == 0) + *percent = 0.0; + else + *percent = 100.0 * diff / timediff / 12.5; + + dprint("Timediff: %llu - res~: %lu us - percent: %.2f %%\n", + timediff, diff * 10 / 125, *percent); + + return 0; +} + +static int amd_fam14h_start(void) +{ + int num, cpu; + clock_gettime(CLOCK_REALTIME, &start_time); + for (num = 0; num < AMD_FAM14H_STATE_NUM; num++) { + for (cpu = 0; cpu < cpu_count; cpu++) + amd_fam14h_init(&amd_fam14h_cstates[num], cpu); + } +#ifdef DEBUG + clock_gettime(CLOCK_REALTIME, &dbg_time); + dbg_timediff = timespec_diff_us(start_time, dbg_time); + dprint("Enabling counters took: %lu us\n", + dbg_timediff); +#endif + return 0; +} + +static int amd_fam14h_stop(void) +{ + int num, cpu; + struct timespec end_time; + + clock_gettime(CLOCK_REALTIME, &end_time); + + for (num = 0; num < AMD_FAM14H_STATE_NUM; num++) { + for (cpu = 0; cpu < cpu_count; cpu++) + amd_fam14h_disable(&amd_fam14h_cstates[num], cpu); + } +#ifdef DEBUG + clock_gettime(CLOCK_REALTIME, &dbg_time); + dbg_timediff = timespec_diff_us(end_time, dbg_time); + dprint("Disabling counters took: %lu ns\n", dbg_timediff); +#endif + timediff = timespec_diff_us(start_time, end_time); + if (timediff / 1000 > OVERFLOW_MS) + print_overflow_err((unsigned int)timediff / 1000000, + OVERFLOW_MS / 1000); + + return 0; +} + +static int is_nbp1_capable(void) +{ + uint32_t val; + val = pci_read_long(amd_fam14h_pci_dev, PCI_NBP1_CAP_OFFSET); + return val & (1 << 31); +} + +struct cpuidle_monitor *amd_fam14h_register(void) +{ + int num; + + if (cpupower_cpu_info.vendor != X86_VENDOR_AMD) + return NULL; + + if (cpupower_cpu_info.family == 0x14) + strncpy(amd_fam14h_monitor.name, "Fam_14h", + MONITOR_NAME_LEN - 1); + else if (cpupower_cpu_info.family == 0x12) + strncpy(amd_fam14h_monitor.name, "Fam_12h", + MONITOR_NAME_LEN - 1); + else + return NULL; + + /* We do not alloc for nbp1 machine wide counter */ + for (num = 0; num < AMD_FAM14H_STATE_NUM - 1; num++) { + previous_count[num] = calloc(cpu_count, + sizeof(unsigned long long)); + current_count[num] = calloc(cpu_count, + sizeof(unsigned long long)); + } + + /* We need PCI device: Slot 18, Func 6, compare with BKDG + for fam 12h/14h */ + amd_fam14h_pci_dev = pci_slot_func_init(&pci_acc, 0x18, 6); + if (amd_fam14h_pci_dev == NULL || pci_acc == NULL) + return NULL; + + if (!is_nbp1_capable()) + amd_fam14h_monitor.hw_states_num = AMD_FAM14H_STATE_NUM - 1; + + amd_fam14h_monitor.name_len = strlen(amd_fam14h_monitor.name); + return &amd_fam14h_monitor; +} + +static void amd_fam14h_unregister(void) +{ + int num; + for (num = 0; num < AMD_FAM14H_STATE_NUM - 1; num++) { + free(previous_count[num]); + free(current_count[num]); + } + pci_cleanup(pci_acc); +} + +struct cpuidle_monitor amd_fam14h_monitor = { + .name = "", + .hw_states = amd_fam14h_cstates, + .hw_states_num = AMD_FAM14H_STATE_NUM, + .start = amd_fam14h_start, + .stop = amd_fam14h_stop, + .do_register = amd_fam14h_register, + .unregister = amd_fam14h_unregister, + .needs_root = 1, + .overflow_s = OVERFLOW_MS / 1000, +}; +#endif /* #if defined(__i386__) || defined(__x86_64__) */ diff --git a/kernel/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c b/kernel/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c new file mode 100644 index 000000000..bcd22a1a3 --- /dev/null +++ b/kernel/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c @@ -0,0 +1,196 @@ +/* + * (C) 2010,2011 Thomas Renninger <trenn@suse.de>, Novell Inc + * + * Licensed under the terms of the GNU GPL License version 2. + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <string.h> +#include <limits.h> + +#include "helpers/sysfs.h" +#include "helpers/helpers.h" +#include "idle_monitor/cpupower-monitor.h" + +#define CPUIDLE_STATES_MAX 10 +static cstate_t cpuidle_cstates[CPUIDLE_STATES_MAX]; +struct cpuidle_monitor cpuidle_sysfs_monitor; + +static unsigned long long **previous_count; +static unsigned long long **current_count; +struct timespec start_time; +static unsigned long long timediff; + +static int cpuidle_get_count_percent(unsigned int id, double *percent, + unsigned int cpu) +{ + unsigned long long statediff = current_count[cpu][id] + - previous_count[cpu][id]; + dprint("%s: - diff: %llu - percent: %f (%u)\n", + cpuidle_cstates[id].name, timediff, *percent, cpu); + + if (timediff == 0) + *percent = 0.0; + else + *percent = ((100.0 * statediff) / timediff); + + dprint("%s: - timediff: %llu - statediff: %llu - percent: %f (%u)\n", + cpuidle_cstates[id].name, timediff, statediff, *percent, cpu); + + return 0; +} + +static int cpuidle_start(void) +{ + int cpu, state; + clock_gettime(CLOCK_REALTIME, &start_time); + for (cpu = 0; cpu < cpu_count; cpu++) { + for (state = 0; state < cpuidle_sysfs_monitor.hw_states_num; + state++) { + previous_count[cpu][state] = + sysfs_get_idlestate_time(cpu, state); + dprint("CPU %d - State: %d - Val: %llu\n", + cpu, state, previous_count[cpu][state]); + } + }; + return 0; +} + +static int cpuidle_stop(void) +{ + int cpu, state; + struct timespec end_time; + clock_gettime(CLOCK_REALTIME, &end_time); + timediff = timespec_diff_us(start_time, end_time); + + for (cpu = 0; cpu < cpu_count; cpu++) { + for (state = 0; state < cpuidle_sysfs_monitor.hw_states_num; + state++) { + current_count[cpu][state] = + sysfs_get_idlestate_time(cpu, state); + dprint("CPU %d - State: %d - Val: %llu\n", + cpu, state, previous_count[cpu][state]); + } + }; + return 0; +} + +void fix_up_intel_idle_driver_name(char *tmp, int num) +{ + /* fix up cpuidle name for intel idle driver */ + if (!strncmp(tmp, "NHM-", 4)) { + switch (num) { + case 1: + strcpy(tmp, "C1"); + break; + case 2: + strcpy(tmp, "C3"); + break; + case 3: + strcpy(tmp, "C6"); + break; + } + } else if (!strncmp(tmp, "SNB-", 4)) { + switch (num) { + case 1: + strcpy(tmp, "C1"); + break; + case 2: + strcpy(tmp, "C3"); + break; + case 3: + strcpy(tmp, "C6"); + break; + case 4: + strcpy(tmp, "C7"); + break; + } + } else if (!strncmp(tmp, "ATM-", 4)) { + switch (num) { + case 1: + strcpy(tmp, "C1"); + break; + case 2: + strcpy(tmp, "C2"); + break; + case 3: + strcpy(tmp, "C4"); + break; + case 4: + strcpy(tmp, "C6"); + break; + } + } +} + +static struct cpuidle_monitor *cpuidle_register(void) +{ + int num; + char *tmp; + + /* Assume idle state count is the same for all CPUs */ + cpuidle_sysfs_monitor.hw_states_num = sysfs_get_idlestate_count(0); + + if (cpuidle_sysfs_monitor.hw_states_num <= 0) + return NULL; + + for (num = 0; num < cpuidle_sysfs_monitor.hw_states_num; num++) { + tmp = sysfs_get_idlestate_name(0, num); + if (tmp == NULL) + continue; + + fix_up_intel_idle_driver_name(tmp, num); + strncpy(cpuidle_cstates[num].name, tmp, CSTATE_NAME_LEN - 1); + free(tmp); + + tmp = sysfs_get_idlestate_desc(0, num); + if (tmp == NULL) + continue; + strncpy(cpuidle_cstates[num].desc, tmp, CSTATE_DESC_LEN - 1); + free(tmp); + + cpuidle_cstates[num].range = RANGE_THREAD; + cpuidle_cstates[num].id = num; + cpuidle_cstates[num].get_count_percent = + cpuidle_get_count_percent; + }; + + /* Free this at program termination */ + previous_count = malloc(sizeof(long long *) * cpu_count); + current_count = malloc(sizeof(long long *) * cpu_count); + for (num = 0; num < cpu_count; num++) { + previous_count[num] = malloc(sizeof(long long) * + cpuidle_sysfs_monitor.hw_states_num); + current_count[num] = malloc(sizeof(long long) * + cpuidle_sysfs_monitor.hw_states_num); + } + + cpuidle_sysfs_monitor.name_len = strlen(cpuidle_sysfs_monitor.name); + return &cpuidle_sysfs_monitor; +} + +void cpuidle_unregister(void) +{ + int num; + + for (num = 0; num < cpu_count; num++) { + free(previous_count[num]); + free(current_count[num]); + } + free(previous_count); + free(current_count); +} + +struct cpuidle_monitor cpuidle_sysfs_monitor = { + .name = "Idle_Stats", + .hw_states = cpuidle_cstates, + .start = cpuidle_start, + .stop = cpuidle_stop, + .do_register = cpuidle_register, + .unregister = cpuidle_unregister, + .needs_root = 0, + .overflow_s = UINT_MAX, +}; diff --git a/kernel/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c b/kernel/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c new file mode 100644 index 000000000..c4bae9203 --- /dev/null +++ b/kernel/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c @@ -0,0 +1,455 @@ +/* + * (C) 2010,2011 Thomas Renninger <trenn@suse.de>, Novell Inc. + * + * Licensed under the terms of the GNU GPL License version 2. + * + * Output format inspired by Len Brown's <lenb@kernel.org> turbostat tool. + * + */ + + +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include <signal.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <libgen.h> + +#include "idle_monitor/cpupower-monitor.h" +#include "idle_monitor/idle_monitors.h" +#include "helpers/helpers.h" + +/* Define pointers to all monitors. */ +#define DEF(x) & x ## _monitor , +struct cpuidle_monitor *all_monitors[] = { +#include "idle_monitors.def" +0 +}; + +static struct cpuidle_monitor *monitors[MONITORS_MAX]; +static unsigned int avail_monitors; + +static char *progname; + +enum operation_mode_e { list = 1, show, show_all }; +static int mode; +static int interval = 1; +static char *show_monitors_param; +static struct cpupower_topology cpu_top; +static unsigned int wake_cpus; + +/* ToDo: Document this in the manpage */ +static char range_abbr[RANGE_MAX] = { 'T', 'C', 'P', 'M', }; + +static void print_wrong_arg_exit(void) +{ + printf(_("invalid or unknown argument\n")); + exit(EXIT_FAILURE); +} + +long long timespec_diff_us(struct timespec start, struct timespec end) +{ + struct timespec temp; + if ((end.tv_nsec - start.tv_nsec) < 0) { + temp.tv_sec = end.tv_sec - start.tv_sec - 1; + temp.tv_nsec = 1000000000 + end.tv_nsec - start.tv_nsec; + } else { + temp.tv_sec = end.tv_sec - start.tv_sec; + temp.tv_nsec = end.tv_nsec - start.tv_nsec; + } + return (temp.tv_sec * 1000000) + (temp.tv_nsec / 1000); +} + +void print_n_spaces(int n) +{ + int x; + for (x = 0; x < n; x++) + printf(" "); +} + +/* size of s must be at least n + 1 */ +int fill_string_with_spaces(char *s, int n) +{ + int len = strlen(s); + if (len > n) + return -1; + for (; len < n; len++) + s[len] = ' '; + s[len] = '\0'; + return 0; +} + +void print_header(int topology_depth) +{ + int unsigned mon; + int state, need_len; + cstate_t s; + char buf[128] = ""; + int percent_width = 4; + + fill_string_with_spaces(buf, topology_depth * 5 - 1); + printf("%s|", buf); + + for (mon = 0; mon < avail_monitors; mon++) { + need_len = monitors[mon]->hw_states_num * (percent_width + 3) + - 1; + if (mon != 0) { + printf("|| "); + need_len--; + } + sprintf(buf, "%s", monitors[mon]->name); + fill_string_with_spaces(buf, need_len); + printf("%s", buf); + } + printf("\n"); + + if (topology_depth > 2) + printf("PKG |"); + if (topology_depth > 1) + printf("CORE|"); + if (topology_depth > 0) + printf("CPU |"); + + for (mon = 0; mon < avail_monitors; mon++) { + if (mon != 0) + printf("|| "); + else + printf(" "); + for (state = 0; state < monitors[mon]->hw_states_num; state++) { + if (state != 0) + printf(" | "); + s = monitors[mon]->hw_states[state]; + sprintf(buf, "%s", s.name); + fill_string_with_spaces(buf, percent_width); + printf("%s", buf); + } + printf(" "); + } + printf("\n"); +} + + +void print_results(int topology_depth, int cpu) +{ + unsigned int mon; + int state, ret; + double percent; + unsigned long long result; + cstate_t s; + + /* Be careful CPUs may got resorted for pkg value do not just use cpu */ + if (!bitmask_isbitset(cpus_chosen, cpu_top.core_info[cpu].cpu)) + return; + + if (topology_depth > 2) + printf("%4d|", cpu_top.core_info[cpu].pkg); + if (topology_depth > 1) + printf("%4d|", cpu_top.core_info[cpu].core); + if (topology_depth > 0) + printf("%4d|", cpu_top.core_info[cpu].cpu); + + for (mon = 0; mon < avail_monitors; mon++) { + if (mon != 0) + printf("||"); + + for (state = 0; state < monitors[mon]->hw_states_num; state++) { + if (state != 0) + printf("|"); + + s = monitors[mon]->hw_states[state]; + + if (s.get_count_percent) { + ret = s.get_count_percent(s.id, &percent, + cpu_top.core_info[cpu].cpu); + if (ret) + printf("******"); + else if (percent >= 100.0) + printf("%6.1f", percent); + else + printf("%6.2f", percent); + } else if (s.get_count) { + ret = s.get_count(s.id, &result, + cpu_top.core_info[cpu].cpu); + if (ret) + printf("******"); + else + printf("%6llu", result); + } else { + printf(_("Monitor %s, Counter %s has no count " + "function. Implementation error\n"), + monitors[mon]->name, s.name); + exit(EXIT_FAILURE); + } + } + } + /* + * The monitor could still provide useful data, for example + * AMD HW counters partly sit in PCI config space. + * It's up to the monitor plug-in to check .is_online, this one + * is just for additional info. + */ + if (!cpu_top.core_info[cpu].is_online) { + printf(_(" *is offline\n")); + return; + } else + printf("\n"); +} + + +/* param: string passed by -m param (The list of monitors to show) + * + * Monitors must have been registered already, matching monitors + * are picked out and available monitors array is overridden + * with matching ones + * + * Monitors get sorted in the same order the user passes them +*/ + +static void parse_monitor_param(char *param) +{ + unsigned int num; + int mon, hits = 0; + char *tmp = param, *token; + struct cpuidle_monitor *tmp_mons[MONITORS_MAX]; + + + for (mon = 0; mon < MONITORS_MAX; mon++, tmp = NULL) { + token = strtok(tmp, ","); + if (token == NULL) + break; + if (strlen(token) >= MONITOR_NAME_LEN) { + printf(_("%s: max monitor name length" + " (%d) exceeded\n"), token, MONITOR_NAME_LEN); + continue; + } + + for (num = 0; num < avail_monitors; num++) { + if (!strcmp(monitors[num]->name, token)) { + dprint("Found requested monitor: %s\n", token); + tmp_mons[hits] = monitors[num]; + hits++; + } + } + } + if (hits == 0) { + printf(_("No matching monitor found in %s, " + "try -l option\n"), param); + exit(EXIT_FAILURE); + } + /* Override detected/registerd monitors array with requested one */ + memcpy(monitors, tmp_mons, + sizeof(struct cpuidle_monitor *) * MONITORS_MAX); + avail_monitors = hits; +} + +void list_monitors(void) +{ + unsigned int mon; + int state; + cstate_t s; + + for (mon = 0; mon < avail_monitors; mon++) { + printf(_("Monitor \"%s\" (%d states) - Might overflow after %u " + "s\n"), + monitors[mon]->name, monitors[mon]->hw_states_num, + monitors[mon]->overflow_s); + + for (state = 0; state < monitors[mon]->hw_states_num; state++) { + s = monitors[mon]->hw_states[state]; + /* + * ToDo show more state capabilities: + * percent, time (granlarity) + */ + printf("%s\t[%c] -> %s\n", s.name, range_abbr[s.range], + gettext(s.desc)); + } + } +} + +int fork_it(char **argv) +{ + int status; + unsigned int num; + unsigned long long timediff; + pid_t child_pid; + struct timespec start, end; + + child_pid = fork(); + clock_gettime(CLOCK_REALTIME, &start); + + for (num = 0; num < avail_monitors; num++) + monitors[num]->start(); + + if (!child_pid) { + /* child */ + execvp(argv[0], argv); + } else { + /* parent */ + if (child_pid == -1) { + perror("fork"); + exit(1); + } + + signal(SIGINT, SIG_IGN); + signal(SIGQUIT, SIG_IGN); + if (waitpid(child_pid, &status, 0) == -1) { + perror("wait"); + exit(1); + } + } + clock_gettime(CLOCK_REALTIME, &end); + for (num = 0; num < avail_monitors; num++) + monitors[num]->stop(); + + timediff = timespec_diff_us(start, end); + if (WIFEXITED(status)) + printf(_("%s took %.5f seconds and exited with status %d\n"), + argv[0], timediff / (1000.0 * 1000), + WEXITSTATUS(status)); + return 0; +} + +int do_interval_measure(int i) +{ + unsigned int num; + int cpu; + + if (wake_cpus) + for (cpu = 0; cpu < cpu_count; cpu++) + bind_cpu(cpu); + + for (num = 0; num < avail_monitors; num++) { + dprint("HW C-state residency monitor: %s - States: %d\n", + monitors[num]->name, monitors[num]->hw_states_num); + monitors[num]->start(); + } + + sleep(i); + + if (wake_cpus) + for (cpu = 0; cpu < cpu_count; cpu++) + bind_cpu(cpu); + + for (num = 0; num < avail_monitors; num++) + monitors[num]->stop(); + + + return 0; +} + +static void cmdline(int argc, char *argv[]) +{ + int opt; + progname = basename(argv[0]); + + while ((opt = getopt(argc, argv, "+lci:m:")) != -1) { + switch (opt) { + case 'l': + if (mode) + print_wrong_arg_exit(); + mode = list; + break; + case 'i': + /* only allow -i with -m or no option */ + if (mode && mode != show) + print_wrong_arg_exit(); + interval = atoi(optarg); + break; + case 'm': + if (mode) + print_wrong_arg_exit(); + mode = show; + show_monitors_param = optarg; + break; + case 'c': + wake_cpus = 1; + break; + default: + print_wrong_arg_exit(); + } + } + if (!mode) + mode = show_all; +} + +int cmd_monitor(int argc, char **argv) +{ + unsigned int num; + struct cpuidle_monitor *test_mon; + int cpu; + + cmdline(argc, argv); + cpu_count = get_cpu_topology(&cpu_top); + if (cpu_count < 0) { + printf(_("Cannot read number of available processors\n")); + return EXIT_FAILURE; + } + + /* Default is: monitor all CPUs */ + if (bitmask_isallclear(cpus_chosen)) + bitmask_setall(cpus_chosen); + + dprint("System has up to %d CPU cores\n", cpu_count); + + for (num = 0; all_monitors[num]; num++) { + dprint("Try to register: %s\n", all_monitors[num]->name); + test_mon = all_monitors[num]->do_register(); + if (test_mon) { + if (test_mon->needs_root && !run_as_root) { + fprintf(stderr, _("Available monitor %s needs " + "root access\n"), test_mon->name); + continue; + } + monitors[avail_monitors] = test_mon; + dprint("%s registered\n", all_monitors[num]->name); + avail_monitors++; + } + } + + if (avail_monitors == 0) { + printf(_("No HW Cstate monitors found\n")); + return 1; + } + + if (mode == list) { + list_monitors(); + exit(EXIT_SUCCESS); + } + + if (mode == show) + parse_monitor_param(show_monitors_param); + + dprint("Packages: %d - Cores: %d - CPUs: %d\n", + cpu_top.pkgs, cpu_top.cores, cpu_count); + + /* + * if any params left, it must be a command to fork + */ + if (argc - optind) + fork_it(argv + optind); + else + do_interval_measure(interval); + + /* ToDo: Topology parsing needs fixing first to do + this more generically */ + if (cpu_top.pkgs > 1) + print_header(3); + else + print_header(1); + + for (cpu = 0; cpu < cpu_count; cpu++) { + if (cpu_top.pkgs > 1) + print_results(3, cpu); + else + print_results(1, cpu); + } + + for (num = 0; num < avail_monitors; num++) + monitors[num]->unregister(); + + cpu_topology_release(cpu_top); + return 0; +} diff --git a/kernel/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h b/kernel/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h new file mode 100644 index 000000000..9e43f3371 --- /dev/null +++ b/kernel/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h @@ -0,0 +1,85 @@ +/* + * (C) 2010,2011 Thomas Renninger <trenn@suse.de>, Novell Inc. + * + * Licensed under the terms of the GNU GPL License version 2. + * + */ + +#ifndef __CPUIDLE_INFO_HW__ +#define __CPUIDLE_INFO_HW__ + +#include <stdarg.h> +#include <time.h> + +#include "idle_monitor/idle_monitors.h" + +#define MONITORS_MAX 20 +#define MONITOR_NAME_LEN 20 +#define CSTATE_NAME_LEN 5 +#define CSTATE_DESC_LEN 60 + +int cpu_count; + +/* Hard to define the right names ...: */ +enum power_range_e { + RANGE_THREAD, /* Lowest in topology hierarcy, AMD: core, Intel: thread + kernel sysfs: cpu */ + RANGE_CORE, /* AMD: unit, Intel: core, kernel_sysfs: core_id */ + RANGE_PACKAGE, /* Package, processor socket */ + RANGE_MACHINE, /* Machine, platform wide */ + RANGE_MAX }; + +typedef struct cstate { + int id; + enum power_range_e range; + char name[CSTATE_NAME_LEN]; + char desc[CSTATE_DESC_LEN]; + + /* either provide a percentage or a general count */ + int (*get_count_percent)(unsigned int self_id, double *percent, + unsigned int cpu); + int (*get_count)(unsigned int self_id, unsigned long long *count, + unsigned int cpu); +} cstate_t; + +struct cpuidle_monitor { + /* Name must not contain whitespaces */ + char name[MONITOR_NAME_LEN]; + int name_len; + int hw_states_num; + cstate_t *hw_states; + int (*start) (void); + int (*stop) (void); + struct cpuidle_monitor* (*do_register) (void); + void (*unregister)(void); + unsigned int overflow_s; + int needs_root; +}; + +extern long long timespec_diff_us(struct timespec start, struct timespec end); + +#define print_overflow_err(mes, ov) \ +{ \ + fprintf(stderr, gettext("Measure took %u seconds, but registers could " \ + "overflow at %u seconds, results " \ + "could be inaccurate\n"), mes, ov); \ +} + + +/* Taken over from x86info project sources -> return 0 on success */ +#include <sched.h> +#include <sys/types.h> +#include <unistd.h> +static inline int bind_cpu(int cpu) +{ + cpu_set_t set; + + if (sched_getaffinity(getpid(), sizeof(set), &set) == 0) { + CPU_ZERO(&set); + CPU_SET(cpu, &set); + return sched_setaffinity(getpid(), sizeof(set), &set); + } + return 1; +} + +#endif /* __CPUIDLE_INFO_HW__ */ diff --git a/kernel/tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c b/kernel/tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c new file mode 100644 index 000000000..ebeaba657 --- /dev/null +++ b/kernel/tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c @@ -0,0 +1,196 @@ +/* + * (C) 2010,2011 Thomas Renninger <trenn@suse.de>, Novell Inc. + * + * Licensed under the terms of the GNU GPL License version 2. + * + * Based on SandyBridge monitor. Implements the new package C-states + * (PC8, PC9, PC10) coming with a specific Haswell (family 0x45) CPU. + */ + +#if defined(__i386__) || defined(__x86_64__) + +#include <stdio.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> + +#include "helpers/helpers.h" +#include "idle_monitor/cpupower-monitor.h" + +#define MSR_PKG_C8_RESIDENCY 0x00000630 +#define MSR_PKG_C9_RESIDENCY 0x00000631 +#define MSR_PKG_C10_RESIDENCY 0x00000632 + +#define MSR_TSC 0x10 + +enum intel_hsw_ext_id { PC8 = 0, PC9, PC10, HSW_EXT_CSTATE_COUNT, + TSC = 0xFFFF }; + +static int hsw_ext_get_count_percent(unsigned int self_id, double *percent, + unsigned int cpu); + +static cstate_t hsw_ext_cstates[HSW_EXT_CSTATE_COUNT] = { + { + .name = "PC8", + .desc = N_("Processor Package C8"), + .id = PC8, + .range = RANGE_PACKAGE, + .get_count_percent = hsw_ext_get_count_percent, + }, + { + .name = "PC9", + .desc = N_("Processor Package C9"), + .desc = N_("Processor Package C2"), + .id = PC9, + .range = RANGE_PACKAGE, + .get_count_percent = hsw_ext_get_count_percent, + }, + { + .name = "PC10", + .desc = N_("Processor Package C10"), + .id = PC10, + .range = RANGE_PACKAGE, + .get_count_percent = hsw_ext_get_count_percent, + }, +}; + +static unsigned long long tsc_at_measure_start; +static unsigned long long tsc_at_measure_end; +static unsigned long long *previous_count[HSW_EXT_CSTATE_COUNT]; +static unsigned long long *current_count[HSW_EXT_CSTATE_COUNT]; +/* valid flag for all CPUs. If a MSR read failed it will be zero */ +static int *is_valid; + +static int hsw_ext_get_count(enum intel_hsw_ext_id id, unsigned long long *val, + unsigned int cpu) +{ + int msr; + + switch (id) { + case PC8: + msr = MSR_PKG_C8_RESIDENCY; + break; + case PC9: + msr = MSR_PKG_C9_RESIDENCY; + break; + case PC10: + msr = MSR_PKG_C10_RESIDENCY; + break; + case TSC: + msr = MSR_TSC; + break; + default: + return -1; + }; + if (read_msr(cpu, msr, val)) + return -1; + return 0; +} + +static int hsw_ext_get_count_percent(unsigned int id, double *percent, + unsigned int cpu) +{ + *percent = 0.0; + + if (!is_valid[cpu]) + return -1; + + *percent = (100.0 * + (current_count[id][cpu] - previous_count[id][cpu])) / + (tsc_at_measure_end - tsc_at_measure_start); + + dprint("%s: previous: %llu - current: %llu - (%u)\n", + hsw_ext_cstates[id].name, previous_count[id][cpu], + current_count[id][cpu], cpu); + + dprint("%s: tsc_diff: %llu - count_diff: %llu - percent: %2.f (%u)\n", + hsw_ext_cstates[id].name, + (unsigned long long) tsc_at_measure_end - tsc_at_measure_start, + current_count[id][cpu] - previous_count[id][cpu], + *percent, cpu); + + return 0; +} + +static int hsw_ext_start(void) +{ + int num, cpu; + unsigned long long val; + + for (num = 0; num < HSW_EXT_CSTATE_COUNT; num++) { + for (cpu = 0; cpu < cpu_count; cpu++) { + hsw_ext_get_count(num, &val, cpu); + previous_count[num][cpu] = val; + } + } + hsw_ext_get_count(TSC, &tsc_at_measure_start, 0); + return 0; +} + +static int hsw_ext_stop(void) +{ + unsigned long long val; + int num, cpu; + + hsw_ext_get_count(TSC, &tsc_at_measure_end, 0); + + for (num = 0; num < HSW_EXT_CSTATE_COUNT; num++) { + for (cpu = 0; cpu < cpu_count; cpu++) { + is_valid[cpu] = !hsw_ext_get_count(num, &val, cpu); + current_count[num][cpu] = val; + } + } + return 0; +} + +struct cpuidle_monitor intel_hsw_ext_monitor; + +static struct cpuidle_monitor *hsw_ext_register(void) +{ + int num; + + if (cpupower_cpu_info.vendor != X86_VENDOR_INTEL + || cpupower_cpu_info.family != 6) + return NULL; + + switch (cpupower_cpu_info.model) { + case 0x45: /* HSW */ + break; + default: + return NULL; + } + + is_valid = calloc(cpu_count, sizeof(int)); + for (num = 0; num < HSW_EXT_CSTATE_COUNT; num++) { + previous_count[num] = calloc(cpu_count, + sizeof(unsigned long long)); + current_count[num] = calloc(cpu_count, + sizeof(unsigned long long)); + } + intel_hsw_ext_monitor.name_len = strlen(intel_hsw_ext_monitor.name); + return &intel_hsw_ext_monitor; +} + +void hsw_ext_unregister(void) +{ + int num; + free(is_valid); + for (num = 0; num < HSW_EXT_CSTATE_COUNT; num++) { + free(previous_count[num]); + free(current_count[num]); + } +} + +struct cpuidle_monitor intel_hsw_ext_monitor = { + .name = "HaswellExtended", + .hw_states = hsw_ext_cstates, + .hw_states_num = HSW_EXT_CSTATE_COUNT, + .start = hsw_ext_start, + .stop = hsw_ext_stop, + .do_register = hsw_ext_register, + .unregister = hsw_ext_unregister, + .needs_root = 1, + .overflow_s = 922000000 /* 922337203 seconds TSC overflow + at 20GHz */ +}; +#endif /* defined(__i386__) || defined(__x86_64__) */ diff --git a/kernel/tools/power/cpupower/utils/idle_monitor/idle_monitors.def b/kernel/tools/power/cpupower/utils/idle_monitor/idle_monitors.def new file mode 100644 index 000000000..0d6ba4dbb --- /dev/null +++ b/kernel/tools/power/cpupower/utils/idle_monitor/idle_monitors.def @@ -0,0 +1,8 @@ +#if defined(__i386__) || defined(__x86_64__) +DEF(amd_fam14h) +DEF(intel_nhm) +DEF(intel_snb) +DEF(intel_hsw_ext) +DEF(mperf) +#endif +DEF(cpuidle_sysfs) diff --git a/kernel/tools/power/cpupower/utils/idle_monitor/idle_monitors.h b/kernel/tools/power/cpupower/utils/idle_monitor/idle_monitors.h new file mode 100644 index 000000000..4fcdeb1e0 --- /dev/null +++ b/kernel/tools/power/cpupower/utils/idle_monitor/idle_monitors.h @@ -0,0 +1,18 @@ +/* + * (C) 2010,2011 Thomas Renninger <trenn@suse.de>, Novell Inc. + * + * Licensed under the terms of the GNU GPL License version 2. + * + * Based on the idea from Michael Matz <matz@suse.de> + * + */ + +#ifndef _CPUIDLE_IDLE_MONITORS_H_ +#define _CPUIDLE_IDLE_MONITORS_H_ + +#define DEF(x) extern struct cpuidle_monitor x ##_monitor; +#include "idle_monitors.def" +#undef DEF +extern struct cpuidle_monitor *all_monitors[]; + +#endif /* _CPUIDLE_IDLE_MONITORS_H_ */ diff --git a/kernel/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c b/kernel/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c new file mode 100644 index 000000000..90a8c4f07 --- /dev/null +++ b/kernel/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c @@ -0,0 +1,338 @@ +/* + * (C) 2010,2011 Thomas Renninger <trenn@suse.de>, Novell Inc. + * + * Licensed under the terms of the GNU GPL License version 2. + */ + +#if defined(__i386__) || defined(__x86_64__) + +#include <stdio.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <limits.h> + +#include <cpufreq.h> + +#include "helpers/helpers.h" +#include "idle_monitor/cpupower-monitor.h" + +#define MSR_APERF 0xE8 +#define MSR_MPERF 0xE7 + +#define MSR_TSC 0x10 + +#define MSR_AMD_HWCR 0xc0010015 + +enum mperf_id { C0 = 0, Cx, AVG_FREQ, MPERF_CSTATE_COUNT }; + +static int mperf_get_count_percent(unsigned int self_id, double *percent, + unsigned int cpu); +static int mperf_get_count_freq(unsigned int id, unsigned long long *count, + unsigned int cpu); +static struct timespec time_start, time_end; + +static cstate_t mperf_cstates[MPERF_CSTATE_COUNT] = { + { + .name = "C0", + .desc = N_("Processor Core not idle"), + .id = C0, + .range = RANGE_THREAD, + .get_count_percent = mperf_get_count_percent, + }, + { + .name = "Cx", + .desc = N_("Processor Core in an idle state"), + .id = Cx, + .range = RANGE_THREAD, + .get_count_percent = mperf_get_count_percent, + }, + + { + .name = "Freq", + .desc = N_("Average Frequency (including boost) in MHz"), + .id = AVG_FREQ, + .range = RANGE_THREAD, + .get_count = mperf_get_count_freq, + }, +}; + +enum MAX_FREQ_MODE { MAX_FREQ_SYSFS, MAX_FREQ_TSC_REF }; +static int max_freq_mode; +/* + * The max frequency mperf is ticking at (in C0), either retrieved via: + * 1) calculated after measurements if we know TSC ticks at mperf/P0 frequency + * 2) cpufreq /sys/devices/.../cpu0/cpufreq/cpuinfo_max_freq at init time + * 1. Is preferred as it also works without cpufreq subsystem (e.g. on Xen) + */ +static unsigned long max_frequency; + +static unsigned long long tsc_at_measure_start; +static unsigned long long tsc_at_measure_end; +static unsigned long long *mperf_previous_count; +static unsigned long long *aperf_previous_count; +static unsigned long long *mperf_current_count; +static unsigned long long *aperf_current_count; + +/* valid flag for all CPUs. If a MSR read failed it will be zero */ +static int *is_valid; + +static int mperf_get_tsc(unsigned long long *tsc) +{ + int ret; + ret = read_msr(0, MSR_TSC, tsc); + if (ret) + dprint("Reading TSC MSR failed, returning %llu\n", *tsc); + return ret; +} + +static int mperf_init_stats(unsigned int cpu) +{ + unsigned long long val; + int ret; + + ret = read_msr(cpu, MSR_APERF, &val); + aperf_previous_count[cpu] = val; + ret |= read_msr(cpu, MSR_MPERF, &val); + mperf_previous_count[cpu] = val; + is_valid[cpu] = !ret; + + return 0; +} + +static int mperf_measure_stats(unsigned int cpu) +{ + unsigned long long val; + int ret; + + ret = read_msr(cpu, MSR_APERF, &val); + aperf_current_count[cpu] = val; + ret |= read_msr(cpu, MSR_MPERF, &val); + mperf_current_count[cpu] = val; + is_valid[cpu] = !ret; + + return 0; +} + +static int mperf_get_count_percent(unsigned int id, double *percent, + unsigned int cpu) +{ + unsigned long long aperf_diff, mperf_diff, tsc_diff; + unsigned long long timediff; + + if (!is_valid[cpu]) + return -1; + + if (id != C0 && id != Cx) + return -1; + + mperf_diff = mperf_current_count[cpu] - mperf_previous_count[cpu]; + aperf_diff = aperf_current_count[cpu] - aperf_previous_count[cpu]; + + if (max_freq_mode == MAX_FREQ_TSC_REF) { + tsc_diff = tsc_at_measure_end - tsc_at_measure_start; + *percent = 100.0 * mperf_diff / tsc_diff; + dprint("%s: TSC Ref - mperf_diff: %llu, tsc_diff: %llu\n", + mperf_cstates[id].name, mperf_diff, tsc_diff); + } else if (max_freq_mode == MAX_FREQ_SYSFS) { + timediff = timespec_diff_us(time_start, time_end); + *percent = 100.0 * mperf_diff / timediff; + dprint("%s: MAXFREQ - mperf_diff: %llu, time_diff: %llu\n", + mperf_cstates[id].name, mperf_diff, timediff); + } else + return -1; + + if (id == Cx) + *percent = 100.0 - *percent; + + dprint("%s: previous: %llu - current: %llu - (%u)\n", + mperf_cstates[id].name, mperf_diff, aperf_diff, cpu); + dprint("%s: %f\n", mperf_cstates[id].name, *percent); + return 0; +} + +static int mperf_get_count_freq(unsigned int id, unsigned long long *count, + unsigned int cpu) +{ + unsigned long long aperf_diff, mperf_diff, time_diff, tsc_diff; + + if (id != AVG_FREQ) + return 1; + + if (!is_valid[cpu]) + return -1; + + mperf_diff = mperf_current_count[cpu] - mperf_previous_count[cpu]; + aperf_diff = aperf_current_count[cpu] - aperf_previous_count[cpu]; + + if (max_freq_mode == MAX_FREQ_TSC_REF) { + /* Calculate max_freq from TSC count */ + tsc_diff = tsc_at_measure_end - tsc_at_measure_start; + time_diff = timespec_diff_us(time_start, time_end); + max_frequency = tsc_diff / time_diff; + } + + *count = max_frequency * ((double)aperf_diff / mperf_diff); + dprint("%s: Average freq based on %s maximum frequency:\n", + mperf_cstates[id].name, + (max_freq_mode == MAX_FREQ_TSC_REF) ? "TSC calculated" : "sysfs read"); + dprint("%max_frequency: %lu", max_frequency); + dprint("aperf_diff: %llu\n", aperf_diff); + dprint("mperf_diff: %llu\n", mperf_diff); + dprint("avg freq: %llu\n", *count); + return 0; +} + +static int mperf_start(void) +{ + int cpu; + unsigned long long dbg; + + clock_gettime(CLOCK_REALTIME, &time_start); + mperf_get_tsc(&tsc_at_measure_start); + + for (cpu = 0; cpu < cpu_count; cpu++) + mperf_init_stats(cpu); + + mperf_get_tsc(&dbg); + dprint("TSC diff: %llu\n", dbg - tsc_at_measure_start); + return 0; +} + +static int mperf_stop(void) +{ + unsigned long long dbg; + int cpu; + + for (cpu = 0; cpu < cpu_count; cpu++) + mperf_measure_stats(cpu); + + mperf_get_tsc(&tsc_at_measure_end); + clock_gettime(CLOCK_REALTIME, &time_end); + + mperf_get_tsc(&dbg); + dprint("TSC diff: %llu\n", dbg - tsc_at_measure_end); + + return 0; +} + +/* + * Mperf register is defined to tick at P0 (maximum) frequency + * + * Instead of reading out P0 which can be tricky to read out from HW, + * we use TSC counter if it reliably ticks at P0/mperf frequency. + * + * Still try to fall back to: + * /sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq + * on older Intel HW without invariant TSC feature. + * Or on AMD machines where TSC does not tick at P0 (do not exist yet, but + * it's still double checked (MSR_AMD_HWCR)). + * + * On these machines the user would still get useful mperf + * stats when acpi-cpufreq driver is loaded. + */ +static int init_maxfreq_mode(void) +{ + int ret; + unsigned long long hwcr; + unsigned long min; + + if (!(cpupower_cpu_info.caps & CPUPOWER_CAP_INV_TSC)) + goto use_sysfs; + + if (cpupower_cpu_info.vendor == X86_VENDOR_AMD) { + /* MSR_AMD_HWCR tells us whether TSC runs at P0/mperf + * freq. + * A test whether hwcr is accessable/available would be: + * (cpupower_cpu_info.family > 0x10 || + * cpupower_cpu_info.family == 0x10 && + * cpupower_cpu_info.model >= 0x2)) + * This should be the case for all aperf/mperf + * capable AMD machines and is therefore safe to test here. + * Compare with Linus kernel git commit: acf01734b1747b1ec4 + */ + ret = read_msr(0, MSR_AMD_HWCR, &hwcr); + /* + * If the MSR read failed, assume a Xen system that did + * not explicitly provide access to it and assume TSC works + */ + if (ret != 0) { + dprint("TSC read 0x%x failed - assume TSC working\n", + MSR_AMD_HWCR); + return 0; + } else if (1 & (hwcr >> 24)) { + max_freq_mode = MAX_FREQ_TSC_REF; + return 0; + } else { /* Use sysfs max frequency if available */ } + } else if (cpupower_cpu_info.vendor == X86_VENDOR_INTEL) { + /* + * On Intel we assume mperf (in C0) is ticking at same + * rate than TSC + */ + max_freq_mode = MAX_FREQ_TSC_REF; + return 0; + } +use_sysfs: + if (cpufreq_get_hardware_limits(0, &min, &max_frequency)) { + dprint("Cannot retrieve max freq from cpufreq kernel " + "subsystem\n"); + return -1; + } + max_freq_mode = MAX_FREQ_SYSFS; + return 0; +} + +/* + * This monitor provides: + * + * 1) Average frequency a CPU resided in + * This always works if the CPU has aperf/mperf capabilities + * + * 2) C0 and Cx (any sleep state) time a CPU resided in + * Works if mperf timer stops ticking in sleep states which + * seem to be the case on all current HW. + * Both is directly retrieved from HW registers and is independent + * from kernel statistics. + */ +struct cpuidle_monitor mperf_monitor; +struct cpuidle_monitor *mperf_register(void) +{ + if (!(cpupower_cpu_info.caps & CPUPOWER_CAP_APERF)) + return NULL; + + if (init_maxfreq_mode()) + return NULL; + + /* Free this at program termination */ + is_valid = calloc(cpu_count, sizeof(int)); + mperf_previous_count = calloc(cpu_count, sizeof(unsigned long long)); + aperf_previous_count = calloc(cpu_count, sizeof(unsigned long long)); + mperf_current_count = calloc(cpu_count, sizeof(unsigned long long)); + aperf_current_count = calloc(cpu_count, sizeof(unsigned long long)); + + mperf_monitor.name_len = strlen(mperf_monitor.name); + return &mperf_monitor; +} + +void mperf_unregister(void) +{ + free(mperf_previous_count); + free(aperf_previous_count); + free(mperf_current_count); + free(aperf_current_count); + free(is_valid); +} + +struct cpuidle_monitor mperf_monitor = { + .name = "Mperf", + .hw_states_num = MPERF_CSTATE_COUNT, + .hw_states = mperf_cstates, + .start = mperf_start, + .stop = mperf_stop, + .do_register = mperf_register, + .unregister = mperf_unregister, + .needs_root = 1, + .overflow_s = 922000000 /* 922337203 seconds TSC overflow + at 20GHz */ +}; +#endif /* #if defined(__i386__) || defined(__x86_64__) */ diff --git a/kernel/tools/power/cpupower/utils/idle_monitor/nhm_idle.c b/kernel/tools/power/cpupower/utils/idle_monitor/nhm_idle.c new file mode 100644 index 000000000..d2a91dd0d --- /dev/null +++ b/kernel/tools/power/cpupower/utils/idle_monitor/nhm_idle.c @@ -0,0 +1,216 @@ +/* + * (C) 2010,2011 Thomas Renninger <trenn@suse.de>, Novell Inc. + * + * Licensed under the terms of the GNU GPL License version 2. + * + * Based on Len Brown's <lenb@kernel.org> turbostat tool. + */ + +#if defined(__i386__) || defined(__x86_64__) + +#include <stdio.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> + +#include "helpers/helpers.h" +#include "idle_monitor/cpupower-monitor.h" + +#define MSR_PKG_C3_RESIDENCY 0x3F8 +#define MSR_PKG_C6_RESIDENCY 0x3F9 +#define MSR_CORE_C3_RESIDENCY 0x3FC +#define MSR_CORE_C6_RESIDENCY 0x3FD + +#define MSR_TSC 0x10 + +#define NHM_CSTATE_COUNT 4 + +enum intel_nhm_id { C3 = 0, C6, PC3, PC6, TSC = 0xFFFF }; + +static int nhm_get_count_percent(unsigned int self_id, double *percent, + unsigned int cpu); + +static cstate_t nhm_cstates[NHM_CSTATE_COUNT] = { + { + .name = "C3", + .desc = N_("Processor Core C3"), + .id = C3, + .range = RANGE_CORE, + .get_count_percent = nhm_get_count_percent, + }, + { + .name = "C6", + .desc = N_("Processor Core C6"), + .id = C6, + .range = RANGE_CORE, + .get_count_percent = nhm_get_count_percent, + }, + + { + .name = "PC3", + .desc = N_("Processor Package C3"), + .id = PC3, + .range = RANGE_PACKAGE, + .get_count_percent = nhm_get_count_percent, + }, + { + .name = "PC6", + .desc = N_("Processor Package C6"), + .id = PC6, + .range = RANGE_PACKAGE, + .get_count_percent = nhm_get_count_percent, + }, +}; + +static unsigned long long tsc_at_measure_start; +static unsigned long long tsc_at_measure_end; +static unsigned long long *previous_count[NHM_CSTATE_COUNT]; +static unsigned long long *current_count[NHM_CSTATE_COUNT]; +/* valid flag for all CPUs. If a MSR read failed it will be zero */ +static int *is_valid; + +static int nhm_get_count(enum intel_nhm_id id, unsigned long long *val, + unsigned int cpu) +{ + int msr; + + switch (id) { + case C3: + msr = MSR_CORE_C3_RESIDENCY; + break; + case C6: + msr = MSR_CORE_C6_RESIDENCY; + break; + case PC3: + msr = MSR_PKG_C3_RESIDENCY; + break; + case PC6: + msr = MSR_PKG_C6_RESIDENCY; + break; + case TSC: + msr = MSR_TSC; + break; + default: + return -1; + }; + if (read_msr(cpu, msr, val)) + return -1; + + return 0; +} + +static int nhm_get_count_percent(unsigned int id, double *percent, + unsigned int cpu) +{ + *percent = 0.0; + + if (!is_valid[cpu]) + return -1; + + *percent = (100.0 * + (current_count[id][cpu] - previous_count[id][cpu])) / + (tsc_at_measure_end - tsc_at_measure_start); + + dprint("%s: previous: %llu - current: %llu - (%u)\n", + nhm_cstates[id].name, previous_count[id][cpu], + current_count[id][cpu], cpu); + + dprint("%s: tsc_diff: %llu - count_diff: %llu - percent: %2.f (%u)\n", + nhm_cstates[id].name, + (unsigned long long) tsc_at_measure_end - tsc_at_measure_start, + current_count[id][cpu] - previous_count[id][cpu], + *percent, cpu); + + return 0; +} + +static int nhm_start(void) +{ + int num, cpu; + unsigned long long dbg, val; + + nhm_get_count(TSC, &tsc_at_measure_start, 0); + + for (num = 0; num < NHM_CSTATE_COUNT; num++) { + for (cpu = 0; cpu < cpu_count; cpu++) { + is_valid[cpu] = !nhm_get_count(num, &val, cpu); + previous_count[num][cpu] = val; + } + } + nhm_get_count(TSC, &dbg, 0); + dprint("TSC diff: %llu\n", dbg - tsc_at_measure_start); + return 0; +} + +static int nhm_stop(void) +{ + unsigned long long val; + unsigned long long dbg; + int num, cpu; + + nhm_get_count(TSC, &tsc_at_measure_end, 0); + + for (num = 0; num < NHM_CSTATE_COUNT; num++) { + for (cpu = 0; cpu < cpu_count; cpu++) { + is_valid[cpu] = !nhm_get_count(num, &val, cpu); + current_count[num][cpu] = val; + } + } + nhm_get_count(TSC, &dbg, 0); + dprint("TSC diff: %llu\n", dbg - tsc_at_measure_end); + + return 0; +} + +struct cpuidle_monitor intel_nhm_monitor; + +struct cpuidle_monitor *intel_nhm_register(void) +{ + int num; + + if (cpupower_cpu_info.vendor != X86_VENDOR_INTEL) + return NULL; + + if (!(cpupower_cpu_info.caps & CPUPOWER_CAP_INV_TSC)) + return NULL; + + if (!(cpupower_cpu_info.caps & CPUPOWER_CAP_APERF)) + return NULL; + + /* Free this at program termination */ + is_valid = calloc(cpu_count, sizeof(int)); + for (num = 0; num < NHM_CSTATE_COUNT; num++) { + previous_count[num] = calloc(cpu_count, + sizeof(unsigned long long)); + current_count[num] = calloc(cpu_count, + sizeof(unsigned long long)); + } + + intel_nhm_monitor.name_len = strlen(intel_nhm_monitor.name); + return &intel_nhm_monitor; +} + +void intel_nhm_unregister(void) +{ + int num; + + for (num = 0; num < NHM_CSTATE_COUNT; num++) { + free(previous_count[num]); + free(current_count[num]); + } + free(is_valid); +} + +struct cpuidle_monitor intel_nhm_monitor = { + .name = "Nehalem", + .hw_states_num = NHM_CSTATE_COUNT, + .hw_states = nhm_cstates, + .start = nhm_start, + .stop = nhm_stop, + .do_register = intel_nhm_register, + .unregister = intel_nhm_unregister, + .needs_root = 1, + .overflow_s = 922000000 /* 922337203 seconds TSC overflow + at 20GHz */ +}; +#endif diff --git a/kernel/tools/power/cpupower/utils/idle_monitor/snb_idle.c b/kernel/tools/power/cpupower/utils/idle_monitor/snb_idle.c new file mode 100644 index 000000000..efc8a69c9 --- /dev/null +++ b/kernel/tools/power/cpupower/utils/idle_monitor/snb_idle.c @@ -0,0 +1,200 @@ +/* + * (C) 2010,2011 Thomas Renninger <trenn@suse.de>, Novell Inc. + * + * Licensed under the terms of the GNU GPL License version 2. + * + * Based on Len Brown's <lenb@kernel.org> turbostat tool. + */ + +#if defined(__i386__) || defined(__x86_64__) + +#include <stdio.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> + +#include "helpers/helpers.h" +#include "idle_monitor/cpupower-monitor.h" + +#define MSR_PKG_C2_RESIDENCY 0x60D +#define MSR_PKG_C7_RESIDENCY 0x3FA +#define MSR_CORE_C7_RESIDENCY 0x3FE + +#define MSR_TSC 0x10 + +enum intel_snb_id { C7 = 0, PC2, PC7, SNB_CSTATE_COUNT, TSC = 0xFFFF }; + +static int snb_get_count_percent(unsigned int self_id, double *percent, + unsigned int cpu); + +static cstate_t snb_cstates[SNB_CSTATE_COUNT] = { + { + .name = "C7", + .desc = N_("Processor Core C7"), + .id = C7, + .range = RANGE_CORE, + .get_count_percent = snb_get_count_percent, + }, + { + .name = "PC2", + .desc = N_("Processor Package C2"), + .id = PC2, + .range = RANGE_PACKAGE, + .get_count_percent = snb_get_count_percent, + }, + { + .name = "PC7", + .desc = N_("Processor Package C7"), + .id = PC7, + .range = RANGE_PACKAGE, + .get_count_percent = snb_get_count_percent, + }, +}; + +static unsigned long long tsc_at_measure_start; +static unsigned long long tsc_at_measure_end; +static unsigned long long *previous_count[SNB_CSTATE_COUNT]; +static unsigned long long *current_count[SNB_CSTATE_COUNT]; +/* valid flag for all CPUs. If a MSR read failed it will be zero */ +static int *is_valid; + +static int snb_get_count(enum intel_snb_id id, unsigned long long *val, + unsigned int cpu) +{ + int msr; + + switch (id) { + case C7: + msr = MSR_CORE_C7_RESIDENCY; + break; + case PC2: + msr = MSR_PKG_C2_RESIDENCY; + break; + case PC7: + msr = MSR_PKG_C7_RESIDENCY; + break; + case TSC: + msr = MSR_TSC; + break; + default: + return -1; + }; + if (read_msr(cpu, msr, val)) + return -1; + return 0; +} + +static int snb_get_count_percent(unsigned int id, double *percent, + unsigned int cpu) +{ + *percent = 0.0; + + if (!is_valid[cpu]) + return -1; + + *percent = (100.0 * + (current_count[id][cpu] - previous_count[id][cpu])) / + (tsc_at_measure_end - tsc_at_measure_start); + + dprint("%s: previous: %llu - current: %llu - (%u)\n", + snb_cstates[id].name, previous_count[id][cpu], + current_count[id][cpu], cpu); + + dprint("%s: tsc_diff: %llu - count_diff: %llu - percent: %2.f (%u)\n", + snb_cstates[id].name, + (unsigned long long) tsc_at_measure_end - tsc_at_measure_start, + current_count[id][cpu] - previous_count[id][cpu], + *percent, cpu); + + return 0; +} + +static int snb_start(void) +{ + int num, cpu; + unsigned long long val; + + for (num = 0; num < SNB_CSTATE_COUNT; num++) { + for (cpu = 0; cpu < cpu_count; cpu++) { + snb_get_count(num, &val, cpu); + previous_count[num][cpu] = val; + } + } + snb_get_count(TSC, &tsc_at_measure_start, 0); + return 0; +} + +static int snb_stop(void) +{ + unsigned long long val; + int num, cpu; + + snb_get_count(TSC, &tsc_at_measure_end, 0); + + for (num = 0; num < SNB_CSTATE_COUNT; num++) { + for (cpu = 0; cpu < cpu_count; cpu++) { + is_valid[cpu] = !snb_get_count(num, &val, cpu); + current_count[num][cpu] = val; + } + } + return 0; +} + +struct cpuidle_monitor intel_snb_monitor; + +static struct cpuidle_monitor *snb_register(void) +{ + int num; + + if (cpupower_cpu_info.vendor != X86_VENDOR_INTEL + || cpupower_cpu_info.family != 6) + return NULL; + + switch (cpupower_cpu_info.model) { + case 0x2A: /* SNB */ + case 0x2D: /* SNB Xeon */ + case 0x3A: /* IVB */ + case 0x3E: /* IVB Xeon */ + case 0x3C: /* HSW */ + case 0x3F: /* HSW */ + case 0x45: /* HSW */ + case 0x46: /* HSW */ + break; + default: + return NULL; + } + + is_valid = calloc(cpu_count, sizeof(int)); + for (num = 0; num < SNB_CSTATE_COUNT; num++) { + previous_count[num] = calloc(cpu_count, + sizeof(unsigned long long)); + current_count[num] = calloc(cpu_count, + sizeof(unsigned long long)); + } + intel_snb_monitor.name_len = strlen(intel_snb_monitor.name); + return &intel_snb_monitor; +} + +void snb_unregister(void) +{ + int num; + free(is_valid); + for (num = 0; num < SNB_CSTATE_COUNT; num++) { + free(previous_count[num]); + free(current_count[num]); + } +} + +struct cpuidle_monitor intel_snb_monitor = { + .name = "SandyBridge", + .hw_states = snb_cstates, + .hw_states_num = SNB_CSTATE_COUNT, + .start = snb_start, + .stop = snb_stop, + .do_register = snb_register, + .unregister = snb_unregister, + .needs_root = 1, + .overflow_s = 922000000 /* 922337203 seconds TSC overflow + at 20GHz */ +}; +#endif /* defined(__i386__) || defined(__x86_64__) */ diff --git a/kernel/tools/power/cpupower/utils/version-gen.sh b/kernel/tools/power/cpupower/utils/version-gen.sh new file mode 100755 index 000000000..5ec41c556 --- /dev/null +++ b/kernel/tools/power/cpupower/utils/version-gen.sh @@ -0,0 +1,35 @@ +#!/bin/sh +# +# Script which prints out the version to use for building cpupowerutils. +# Must be called from tools/power/cpupower/ +# +# Heavily based on tools/perf/util/PERF-VERSION-GEN . + +LF=' +' + +# First check if there is a .git to get the version from git describe +# otherwise try to get the version from the kernel makefile +if test -d ../../../.git -o -f ../../../.git && + VN=$(git describe --abbrev=4 HEAD 2>/dev/null) && + case "$VN" in + *$LF*) (exit 1) ;; + v[0-9]*) + git update-index -q --refresh + test -z "$(git diff-index --name-only HEAD --)" || + VN="$VN-dirty" ;; + esac +then + VN=$(echo "$VN" | sed -e 's/-/./g'); +else + eval $(grep '^VERSION[[:space:]]*=' ../../../Makefile|tr -d ' ') + eval $(grep '^PATCHLEVEL[[:space:]]*=' ../../../Makefile|tr -d ' ') + eval $(grep '^SUBLEVEL[[:space:]]*=' ../../../Makefile|tr -d ' ') + eval $(grep '^EXTRAVERSION[[:space:]]*=' ../../../Makefile|tr -d ' ') + + VN="${VERSION}.${PATCHLEVEL}.${SUBLEVEL}${EXTRAVERSION}" +fi + +VN=$(expr "$VN" : v*'\(.*\)') + +echo $VN |