From e09b41010ba33a20a87472ee821fa407a5b8da36 Mon Sep 17 00:00:00 2001 From: José Pekkarinen Date: Mon, 11 Apr 2016 10:41:07 +0300 Subject: These changes are the raw update to linux-4.4.6-rt14. Kernel sources are taken from kernel.org, and rt patch from the rt wiki download page. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During the rebasing, the following patch collided: Force tick interrupt and get rid of softirq magic(I70131fb85). Collisions have been removed because its logic was found on the source already. Change-Id: I7f57a4081d9deaa0d9ccfc41a6c8daccdee3b769 Signed-off-by: José Pekkarinen --- kernel/tools/perf/bench/Build | 4 +- kernel/tools/perf/bench/bench.h | 4 + kernel/tools/perf/bench/futex-lock-pi.c | 219 +++++++++++++ kernel/tools/perf/bench/futex-wake-parallel.c | 294 +++++++++++++++++ kernel/tools/perf/bench/futex-wake.c | 7 +- kernel/tools/perf/bench/futex.h | 20 ++ kernel/tools/perf/bench/mem-functions.c | 379 ++++++++++++++++++++++ kernel/tools/perf/bench/mem-memcpy.c | 434 -------------------------- kernel/tools/perf/bench/numa.c | 37 ++- kernel/tools/perf/bench/sched-messaging.c | 10 +- 10 files changed, 964 insertions(+), 444 deletions(-) create mode 100644 kernel/tools/perf/bench/futex-lock-pi.c create mode 100644 kernel/tools/perf/bench/futex-wake-parallel.c create mode 100644 kernel/tools/perf/bench/mem-functions.c delete mode 100644 kernel/tools/perf/bench/mem-memcpy.c (limited to 'kernel/tools/perf/bench') diff --git a/kernel/tools/perf/bench/Build b/kernel/tools/perf/bench/Build index 5ce98023d..60bf11943 100644 --- a/kernel/tools/perf/bench/Build +++ b/kernel/tools/perf/bench/Build @@ -1,9 +1,11 @@ perf-y += sched-messaging.o perf-y += sched-pipe.o -perf-y += mem-memcpy.o +perf-y += mem-functions.o perf-y += futex-hash.o perf-y += futex-wake.o +perf-y += futex-wake-parallel.o perf-y += futex-requeue.o +perf-y += futex-lock-pi.o perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o perf-$(CONFIG_X86_64) += mem-memset-x86-64-asm.o diff --git a/kernel/tools/perf/bench/bench.h b/kernel/tools/perf/bench/bench.h index 3c4dd44d4..a50df86f2 100644 --- a/kernel/tools/perf/bench/bench.h +++ b/kernel/tools/perf/bench/bench.h @@ -33,7 +33,11 @@ extern int bench_mem_memcpy(int argc, const char **argv, extern int bench_mem_memset(int argc, const char **argv, const char *prefix); extern int bench_futex_hash(int argc, const char **argv, const char *prefix); extern int bench_futex_wake(int argc, const char **argv, const char *prefix); +extern int bench_futex_wake_parallel(int argc, const char **argv, + const char *prefix); extern int bench_futex_requeue(int argc, const char **argv, const char *prefix); +/* pi futexes */ +extern int bench_futex_lock_pi(int argc, const char **argv, const char *prefix); #define BENCH_FORMAT_DEFAULT_STR "default" #define BENCH_FORMAT_DEFAULT 0 diff --git a/kernel/tools/perf/bench/futex-lock-pi.c b/kernel/tools/perf/bench/futex-lock-pi.c new file mode 100644 index 000000000..bc6a16adb --- /dev/null +++ b/kernel/tools/perf/bench/futex-lock-pi.c @@ -0,0 +1,219 @@ +/* + * Copyright (C) 2015 Davidlohr Bueso. + */ + +#include "../perf.h" +#include "../util/util.h" +#include "../util/stat.h" +#include "../util/parse-options.h" +#include "../util/header.h" +#include "bench.h" +#include "futex.h" + +#include +#include +#include +#include + +struct worker { + int tid; + u_int32_t *futex; + pthread_t thread; + unsigned long ops; +}; + +static u_int32_t global_futex = 0; +static struct worker *worker; +static unsigned int nsecs = 10; +static bool silent = false, multi = false; +static bool done = false, fshared = false; +static unsigned int ncpus, nthreads = 0; +static int futex_flag = 0; +struct timeval start, end, runtime; +static pthread_mutex_t thread_lock; +static unsigned int threads_starting; +static struct stats throughput_stats; +static pthread_cond_t thread_parent, thread_worker; + +static const struct option options[] = { + OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"), + OPT_UINTEGER('r', "runtime", &nsecs, "Specify runtime (in seconds)"), + OPT_BOOLEAN( 'M', "multi", &multi, "Use multiple futexes"), + OPT_BOOLEAN( 's', "silent", &silent, "Silent mode: do not display data/details"), + OPT_BOOLEAN( 'S', "shared", &fshared, "Use shared futexes instead of private ones"), + OPT_END() +}; + +static const char * const bench_futex_lock_pi_usage[] = { + "perf bench futex requeue ", + NULL +}; + +static void print_summary(void) +{ + unsigned long avg = avg_stats(&throughput_stats); + double stddev = stddev_stats(&throughput_stats); + + printf("%sAveraged %ld operations/sec (+- %.2f%%), total secs = %d\n", + !silent ? "\n" : "", avg, rel_stddev_stats(stddev, avg), + (int) runtime.tv_sec); +} + +static void toggle_done(int sig __maybe_unused, + siginfo_t *info __maybe_unused, + void *uc __maybe_unused) +{ + /* inform all threads that we're done for the day */ + done = true; + gettimeofday(&end, NULL); + timersub(&end, &start, &runtime); +} + +static void *workerfn(void *arg) +{ + struct worker *w = (struct worker *) arg; + + pthread_mutex_lock(&thread_lock); + threads_starting--; + if (!threads_starting) + pthread_cond_signal(&thread_parent); + pthread_cond_wait(&thread_worker, &thread_lock); + pthread_mutex_unlock(&thread_lock); + + do { + int ret; + again: + ret = futex_lock_pi(w->futex, NULL, 0, futex_flag); + + if (ret) { /* handle lock acquisition */ + if (!silent) + warn("thread %d: Could not lock pi-lock for %p (%d)", + w->tid, w->futex, ret); + if (done) + break; + + goto again; + } + + usleep(1); + ret = futex_unlock_pi(w->futex, futex_flag); + if (ret && !silent) + warn("thread %d: Could not unlock pi-lock for %p (%d)", + w->tid, w->futex, ret); + w->ops++; /* account for thread's share of work */ + } while (!done); + + return NULL; +} + +static void create_threads(struct worker *w, pthread_attr_t thread_attr) +{ + cpu_set_t cpu; + unsigned int i; + + threads_starting = nthreads; + + for (i = 0; i < nthreads; i++) { + worker[i].tid = i; + + if (multi) { + worker[i].futex = calloc(1, sizeof(u_int32_t)); + if (!worker[i].futex) + err(EXIT_FAILURE, "calloc"); + } else + worker[i].futex = &global_futex; + + CPU_ZERO(&cpu); + CPU_SET(i % ncpus, &cpu); + + if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu)) + err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); + + if (pthread_create(&w[i].thread, &thread_attr, workerfn, &worker[i])) + err(EXIT_FAILURE, "pthread_create"); + } +} + +int bench_futex_lock_pi(int argc, const char **argv, + const char *prefix __maybe_unused) +{ + int ret = 0; + unsigned int i; + struct sigaction act; + pthread_attr_t thread_attr; + + argc = parse_options(argc, argv, options, bench_futex_lock_pi_usage, 0); + if (argc) + goto err; + + ncpus = sysconf(_SC_NPROCESSORS_ONLN); + + sigfillset(&act.sa_mask); + act.sa_sigaction = toggle_done; + sigaction(SIGINT, &act, NULL); + + if (!nthreads) + nthreads = ncpus; + + worker = calloc(nthreads, sizeof(*worker)); + if (!worker) + err(EXIT_FAILURE, "calloc"); + + if (!fshared) + futex_flag = FUTEX_PRIVATE_FLAG; + + printf("Run summary [PID %d]: %d threads doing pi lock/unlock pairing for %d secs.\n\n", + getpid(), nthreads, nsecs); + + init_stats(&throughput_stats); + pthread_mutex_init(&thread_lock, NULL); + pthread_cond_init(&thread_parent, NULL); + pthread_cond_init(&thread_worker, NULL); + + threads_starting = nthreads; + pthread_attr_init(&thread_attr); + gettimeofday(&start, NULL); + + create_threads(worker, thread_attr); + pthread_attr_destroy(&thread_attr); + + pthread_mutex_lock(&thread_lock); + while (threads_starting) + pthread_cond_wait(&thread_parent, &thread_lock); + pthread_cond_broadcast(&thread_worker); + pthread_mutex_unlock(&thread_lock); + + sleep(nsecs); + toggle_done(0, NULL, NULL); + + for (i = 0; i < nthreads; i++) { + ret = pthread_join(worker[i].thread, NULL); + if (ret) + err(EXIT_FAILURE, "pthread_join"); + } + + /* cleanup & report results */ + pthread_cond_destroy(&thread_parent); + pthread_cond_destroy(&thread_worker); + pthread_mutex_destroy(&thread_lock); + + for (i = 0; i < nthreads; i++) { + unsigned long t = worker[i].ops/runtime.tv_sec; + + update_stats(&throughput_stats, t); + if (!silent) + printf("[thread %3d] futex: %p [ %ld ops/sec ]\n", + worker[i].tid, worker[i].futex, t); + + if (multi) + free(worker[i].futex); + } + + print_summary(); + + free(worker); + return ret; +err: + usage_with_options(bench_futex_lock_pi_usage, options); + exit(EXIT_FAILURE); +} diff --git a/kernel/tools/perf/bench/futex-wake-parallel.c b/kernel/tools/perf/bench/futex-wake-parallel.c new file mode 100644 index 000000000..6d8c9fa2a --- /dev/null +++ b/kernel/tools/perf/bench/futex-wake-parallel.c @@ -0,0 +1,294 @@ +/* + * Copyright (C) 2015 Davidlohr Bueso. + * + * Block a bunch of threads and let parallel waker threads wakeup an + * equal amount of them. The program output reflects the avg latency + * for each individual thread to service its share of work. Ultimately + * it can be used to measure futex_wake() changes. + */ + +#include "../perf.h" +#include "../util/util.h" +#include "../util/stat.h" +#include "../util/parse-options.h" +#include "../util/header.h" +#include "bench.h" +#include "futex.h" + +#include +#include +#include +#include + +struct thread_data { + pthread_t worker; + unsigned int nwoken; + struct timeval runtime; +}; + +static unsigned int nwakes = 1; + +/* all threads will block on the same futex -- hash bucket chaos ;) */ +static u_int32_t futex = 0; + +static pthread_t *blocked_worker; +static bool done = false, silent = false, fshared = false; +static unsigned int nblocked_threads = 0, nwaking_threads = 0; +static pthread_mutex_t thread_lock; +static pthread_cond_t thread_parent, thread_worker; +static struct stats waketime_stats, wakeup_stats; +static unsigned int ncpus, threads_starting; +static int futex_flag = 0; + +static const struct option options[] = { + OPT_UINTEGER('t', "threads", &nblocked_threads, "Specify amount of threads"), + OPT_UINTEGER('w', "nwakers", &nwaking_threads, "Specify amount of waking threads"), + OPT_BOOLEAN( 's', "silent", &silent, "Silent mode: do not display data/details"), + OPT_BOOLEAN( 'S', "shared", &fshared, "Use shared futexes instead of private ones"), + OPT_END() +}; + +static const char * const bench_futex_wake_parallel_usage[] = { + "perf bench futex wake-parallel ", + NULL +}; + +static void *waking_workerfn(void *arg) +{ + struct thread_data *waker = (struct thread_data *) arg; + struct timeval start, end; + + gettimeofday(&start, NULL); + + waker->nwoken = futex_wake(&futex, nwakes, futex_flag); + if (waker->nwoken != nwakes) + warnx("couldn't wakeup all tasks (%d/%d)", + waker->nwoken, nwakes); + + gettimeofday(&end, NULL); + timersub(&end, &start, &waker->runtime); + + pthread_exit(NULL); + return NULL; +} + +static void wakeup_threads(struct thread_data *td, pthread_attr_t thread_attr) +{ + unsigned int i; + + pthread_attr_setdetachstate(&thread_attr, PTHREAD_CREATE_JOINABLE); + + /* create and block all threads */ + for (i = 0; i < nwaking_threads; i++) { + /* + * Thread creation order will impact per-thread latency + * as it will affect the order to acquire the hb spinlock. + * For now let the scheduler decide. + */ + if (pthread_create(&td[i].worker, &thread_attr, + waking_workerfn, (void *)&td[i])) + err(EXIT_FAILURE, "pthread_create"); + } + + for (i = 0; i < nwaking_threads; i++) + if (pthread_join(td[i].worker, NULL)) + err(EXIT_FAILURE, "pthread_join"); +} + +static void *blocked_workerfn(void *arg __maybe_unused) +{ + pthread_mutex_lock(&thread_lock); + threads_starting--; + if (!threads_starting) + pthread_cond_signal(&thread_parent); + pthread_cond_wait(&thread_worker, &thread_lock); + pthread_mutex_unlock(&thread_lock); + + while (1) { /* handle spurious wakeups */ + if (futex_wait(&futex, 0, NULL, futex_flag) != EINTR) + break; + } + + pthread_exit(NULL); + return NULL; +} + +static void block_threads(pthread_t *w, pthread_attr_t thread_attr) +{ + cpu_set_t cpu; + unsigned int i; + + threads_starting = nblocked_threads; + + /* create and block all threads */ + for (i = 0; i < nblocked_threads; i++) { + CPU_ZERO(&cpu); + CPU_SET(i % ncpus, &cpu); + + if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu)) + err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); + + if (pthread_create(&w[i], &thread_attr, blocked_workerfn, NULL)) + err(EXIT_FAILURE, "pthread_create"); + } +} + +static void print_run(struct thread_data *waking_worker, unsigned int run_num) +{ + unsigned int i, wakeup_avg; + double waketime_avg, waketime_stddev; + struct stats __waketime_stats, __wakeup_stats; + + init_stats(&__wakeup_stats); + init_stats(&__waketime_stats); + + for (i = 0; i < nwaking_threads; i++) { + update_stats(&__waketime_stats, waking_worker[i].runtime.tv_usec); + update_stats(&__wakeup_stats, waking_worker[i].nwoken); + } + + waketime_avg = avg_stats(&__waketime_stats); + waketime_stddev = stddev_stats(&__waketime_stats); + wakeup_avg = avg_stats(&__wakeup_stats); + + printf("[Run %d]: Avg per-thread latency (waking %d/%d threads) " + "in %.4f ms (+-%.2f%%)\n", run_num + 1, wakeup_avg, + nblocked_threads, waketime_avg/1e3, + rel_stddev_stats(waketime_stddev, waketime_avg)); +} + +static void print_summary(void) +{ + unsigned int wakeup_avg; + double waketime_avg, waketime_stddev; + + waketime_avg = avg_stats(&waketime_stats); + waketime_stddev = stddev_stats(&waketime_stats); + wakeup_avg = avg_stats(&wakeup_stats); + + printf("Avg per-thread latency (waking %d/%d threads) in %.4f ms (+-%.2f%%)\n", + wakeup_avg, + nblocked_threads, + waketime_avg/1e3, + rel_stddev_stats(waketime_stddev, waketime_avg)); +} + + +static void do_run_stats(struct thread_data *waking_worker) +{ + unsigned int i; + + for (i = 0; i < nwaking_threads; i++) { + update_stats(&waketime_stats, waking_worker[i].runtime.tv_usec); + update_stats(&wakeup_stats, waking_worker[i].nwoken); + } + +} + +static void toggle_done(int sig __maybe_unused, + siginfo_t *info __maybe_unused, + void *uc __maybe_unused) +{ + done = true; +} + +int bench_futex_wake_parallel(int argc, const char **argv, + const char *prefix __maybe_unused) +{ + int ret = 0; + unsigned int i, j; + struct sigaction act; + pthread_attr_t thread_attr; + struct thread_data *waking_worker; + + argc = parse_options(argc, argv, options, + bench_futex_wake_parallel_usage, 0); + if (argc) { + usage_with_options(bench_futex_wake_parallel_usage, options); + exit(EXIT_FAILURE); + } + + sigfillset(&act.sa_mask); + act.sa_sigaction = toggle_done; + sigaction(SIGINT, &act, NULL); + + ncpus = sysconf(_SC_NPROCESSORS_ONLN); + if (!nblocked_threads) + nblocked_threads = ncpus; + + /* some sanity checks */ + if (nwaking_threads > nblocked_threads || !nwaking_threads) + nwaking_threads = nblocked_threads; + + if (nblocked_threads % nwaking_threads) + errx(EXIT_FAILURE, "Must be perfectly divisible"); + /* + * Each thread will wakeup nwakes tasks in + * a single futex_wait call. + */ + nwakes = nblocked_threads/nwaking_threads; + + blocked_worker = calloc(nblocked_threads, sizeof(*blocked_worker)); + if (!blocked_worker) + err(EXIT_FAILURE, "calloc"); + + if (!fshared) + futex_flag = FUTEX_PRIVATE_FLAG; + + printf("Run summary [PID %d]: blocking on %d threads (at [%s] " + "futex %p), %d threads waking up %d at a time.\n\n", + getpid(), nblocked_threads, fshared ? "shared":"private", + &futex, nwaking_threads, nwakes); + + init_stats(&wakeup_stats); + init_stats(&waketime_stats); + + pthread_attr_init(&thread_attr); + pthread_mutex_init(&thread_lock, NULL); + pthread_cond_init(&thread_parent, NULL); + pthread_cond_init(&thread_worker, NULL); + + for (j = 0; j < bench_repeat && !done; j++) { + waking_worker = calloc(nwaking_threads, sizeof(*waking_worker)); + if (!waking_worker) + err(EXIT_FAILURE, "calloc"); + + /* create, launch & block all threads */ + block_threads(blocked_worker, thread_attr); + + /* make sure all threads are already blocked */ + pthread_mutex_lock(&thread_lock); + while (threads_starting) + pthread_cond_wait(&thread_parent, &thread_lock); + pthread_cond_broadcast(&thread_worker); + pthread_mutex_unlock(&thread_lock); + + usleep(100000); + + /* Ok, all threads are patiently blocked, start waking folks up */ + wakeup_threads(waking_worker, thread_attr); + + for (i = 0; i < nblocked_threads; i++) { + ret = pthread_join(blocked_worker[i], NULL); + if (ret) + err(EXIT_FAILURE, "pthread_join"); + } + + do_run_stats(waking_worker); + if (!silent) + print_run(waking_worker, j); + + free(waking_worker); + } + + /* cleanup & report results */ + pthread_cond_destroy(&thread_parent); + pthread_cond_destroy(&thread_worker); + pthread_mutex_destroy(&thread_lock); + pthread_attr_destroy(&thread_attr); + + print_summary(); + + free(blocked_worker); + return ret; +} diff --git a/kernel/tools/perf/bench/futex-wake.c b/kernel/tools/perf/bench/futex-wake.c index 929f762be..e5e41d3bd 100644 --- a/kernel/tools/perf/bench/futex-wake.c +++ b/kernel/tools/perf/bench/futex-wake.c @@ -60,7 +60,12 @@ static void *workerfn(void *arg __maybe_unused) pthread_cond_wait(&thread_worker, &thread_lock); pthread_mutex_unlock(&thread_lock); - futex_wait(&futex1, 0, NULL, futex_flag); + while (1) { + if (futex_wait(&futex1, 0, NULL, futex_flag) != EINTR) + break; + } + + pthread_exit(NULL); return NULL; } diff --git a/kernel/tools/perf/bench/futex.h b/kernel/tools/perf/bench/futex.h index 7ed22ff1e..d44de9f44 100644 --- a/kernel/tools/perf/bench/futex.h +++ b/kernel/tools/perf/bench/futex.h @@ -55,6 +55,26 @@ futex_wake(u_int32_t *uaddr, int nr_wake, int opflags) return futex(uaddr, FUTEX_WAKE, nr_wake, NULL, NULL, 0, opflags); } +/** + * futex_lock_pi() - block on uaddr as a PI mutex + * @detect: whether (1) or not (0) to perform deadlock detection + */ +static inline int +futex_lock_pi(u_int32_t *uaddr, struct timespec *timeout, int detect, + int opflags) +{ + return futex(uaddr, FUTEX_LOCK_PI, detect, timeout, NULL, 0, opflags); +} + +/** + * futex_unlock_pi() - release uaddr as a PI mutex, waking the top waiter + */ +static inline int +futex_unlock_pi(u_int32_t *uaddr, int opflags) +{ + return futex(uaddr, FUTEX_UNLOCK_PI, 0, NULL, NULL, 0, opflags); +} + /** * futex_cmp_requeue() - requeue tasks from uaddr to uaddr2 * @nr_wake: wake up to this many tasks diff --git a/kernel/tools/perf/bench/mem-functions.c b/kernel/tools/perf/bench/mem-functions.c new file mode 100644 index 000000000..9419b9442 --- /dev/null +++ b/kernel/tools/perf/bench/mem-functions.c @@ -0,0 +1,379 @@ +/* + * mem-memcpy.c + * + * Simple memcpy() and memset() benchmarks + * + * Written by Hitoshi Mitake + */ + +#include "../perf.h" +#include "../util/util.h" +#include "../util/parse-options.h" +#include "../util/header.h" +#include "../util/cloexec.h" +#include "bench.h" +#include "mem-memcpy-arch.h" +#include "mem-memset-arch.h" + +#include +#include +#include +#include +#include + +#define K 1024 + +static const char *size_str = "1MB"; +static const char *function_str = "all"; +static int nr_loops = 1; +static bool use_cycles; +static int cycles_fd; + +static const struct option options[] = { + OPT_STRING('s', "size", &size_str, "1MB", + "Specify the size of the memory buffers. " + "Available units: B, KB, MB, GB and TB (case insensitive)"), + + OPT_STRING('f', "function", &function_str, "all", + "Specify the function to run, \"all\" runs all available functions, \"help\" lists them"), + + OPT_INTEGER('l', "nr_loops", &nr_loops, + "Specify the number of loops to run. (default: 1)"), + + OPT_BOOLEAN('c', "cycles", &use_cycles, + "Use a cycles event instead of gettimeofday() to measure performance"), + + OPT_END() +}; + +typedef void *(*memcpy_t)(void *, const void *, size_t); +typedef void *(*memset_t)(void *, int, size_t); + +struct function { + const char *name; + const char *desc; + union { + memcpy_t memcpy; + memset_t memset; + } fn; +}; + +static struct perf_event_attr cycle_attr = { + .type = PERF_TYPE_HARDWARE, + .config = PERF_COUNT_HW_CPU_CYCLES +}; + +static void init_cycles(void) +{ + cycles_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1, perf_event_open_cloexec_flag()); + + if (cycles_fd < 0 && errno == ENOSYS) + die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); + else + BUG_ON(cycles_fd < 0); +} + +static u64 get_cycles(void) +{ + int ret; + u64 clk; + + ret = read(cycles_fd, &clk, sizeof(u64)); + BUG_ON(ret != sizeof(u64)); + + return clk; +} + +static double timeval2double(struct timeval *ts) +{ + return (double)ts->tv_sec + (double)ts->tv_usec / (double)1000000; +} + +#define print_bps(x) do { \ + if (x < K) \ + printf(" %14lf bytes/sec\n", x); \ + else if (x < K * K) \ + printf(" %14lfd KB/sec\n", x / K); \ + else if (x < K * K * K) \ + printf(" %14lf MB/sec\n", x / K / K); \ + else \ + printf(" %14lf GB/sec\n", x / K / K / K); \ + } while (0) + +struct bench_mem_info { + const struct function *functions; + u64 (*do_cycles)(const struct function *r, size_t size); + double (*do_gettimeofday)(const struct function *r, size_t size); + const char *const *usage; +}; + +static void __bench_mem_function(struct bench_mem_info *info, int r_idx, size_t size, double size_total) +{ + const struct function *r = &info->functions[r_idx]; + double result_bps = 0.0; + u64 result_cycles = 0; + + printf("# function '%s' (%s)\n", r->name, r->desc); + + if (bench_format == BENCH_FORMAT_DEFAULT) + printf("# Copying %s bytes ...\n\n", size_str); + + if (use_cycles) { + result_cycles = info->do_cycles(r, size); + } else { + result_bps = info->do_gettimeofday(r, size); + } + + switch (bench_format) { + case BENCH_FORMAT_DEFAULT: + if (use_cycles) { + printf(" %14lf cycles/byte\n", (double)result_cycles/size_total); + } else { + print_bps(result_bps); + } + break; + + case BENCH_FORMAT_SIMPLE: + if (use_cycles) { + printf("%lf\n", (double)result_cycles/size_total); + } else { + printf("%lf\n", result_bps); + } + break; + + default: + BUG_ON(1); + break; + } +} + +static int bench_mem_common(int argc, const char **argv, struct bench_mem_info *info) +{ + int i; + size_t size; + double size_total; + + argc = parse_options(argc, argv, options, info->usage, 0); + + if (use_cycles) + init_cycles(); + + size = (size_t)perf_atoll((char *)size_str); + size_total = (double)size * nr_loops; + + if ((s64)size <= 0) { + fprintf(stderr, "Invalid size:%s\n", size_str); + return 1; + } + + if (!strncmp(function_str, "all", 3)) { + for (i = 0; info->functions[i].name; i++) + __bench_mem_function(info, i, size, size_total); + return 0; + } + + for (i = 0; info->functions[i].name; i++) { + if (!strcmp(info->functions[i].name, function_str)) + break; + } + if (!info->functions[i].name) { + if (strcmp(function_str, "help") && strcmp(function_str, "h")) + printf("Unknown function: %s\n", function_str); + printf("Available functions:\n"); + for (i = 0; info->functions[i].name; i++) { + printf("\t%s ... %s\n", + info->functions[i].name, info->functions[i].desc); + } + return 1; + } + + __bench_mem_function(info, i, size, size_total); + + return 0; +} + +static void memcpy_alloc_mem(void **dst, void **src, size_t size) +{ + *dst = zalloc(size); + if (!*dst) + die("memory allocation failed - maybe size is too large?\n"); + + *src = zalloc(size); + if (!*src) + die("memory allocation failed - maybe size is too large?\n"); + + /* Make sure to always prefault zero pages even if MMAP_THRESH is crossed: */ + memset(*src, 0, size); +} + +static u64 do_memcpy_cycles(const struct function *r, size_t size) +{ + u64 cycle_start = 0ULL, cycle_end = 0ULL; + void *src = NULL, *dst = NULL; + memcpy_t fn = r->fn.memcpy; + int i; + + memcpy_alloc_mem(&dst, &src, size); + + /* + * We prefault the freshly allocated memory range here, + * to not measure page fault overhead: + */ + fn(dst, src, size); + + cycle_start = get_cycles(); + for (i = 0; i < nr_loops; ++i) + fn(dst, src, size); + cycle_end = get_cycles(); + + free(src); + free(dst); + return cycle_end - cycle_start; +} + +static double do_memcpy_gettimeofday(const struct function *r, size_t size) +{ + struct timeval tv_start, tv_end, tv_diff; + memcpy_t fn = r->fn.memcpy; + void *src = NULL, *dst = NULL; + int i; + + memcpy_alloc_mem(&dst, &src, size); + + /* + * We prefault the freshly allocated memory range here, + * to not measure page fault overhead: + */ + fn(dst, src, size); + + BUG_ON(gettimeofday(&tv_start, NULL)); + for (i = 0; i < nr_loops; ++i) + fn(dst, src, size); + BUG_ON(gettimeofday(&tv_end, NULL)); + + timersub(&tv_end, &tv_start, &tv_diff); + + free(src); + free(dst); + + return (double)(((double)size * nr_loops) / timeval2double(&tv_diff)); +} + +struct function memcpy_functions[] = { + { .name = "default", + .desc = "Default memcpy() provided by glibc", + .fn.memcpy = memcpy }, + +#ifdef HAVE_ARCH_X86_64_SUPPORT +# define MEMCPY_FN(_fn, _name, _desc) {.name = _name, .desc = _desc, .fn.memcpy = _fn}, +# include "mem-memcpy-x86-64-asm-def.h" +# undef MEMCPY_FN +#endif + + { .name = NULL, } +}; + +static const char * const bench_mem_memcpy_usage[] = { + "perf bench mem memcpy ", + NULL +}; + +int bench_mem_memcpy(int argc, const char **argv, const char *prefix __maybe_unused) +{ + struct bench_mem_info info = { + .functions = memcpy_functions, + .do_cycles = do_memcpy_cycles, + .do_gettimeofday = do_memcpy_gettimeofday, + .usage = bench_mem_memcpy_usage, + }; + + return bench_mem_common(argc, argv, &info); +} + +static void memset_alloc_mem(void **dst, size_t size) +{ + *dst = zalloc(size); + if (!*dst) + die("memory allocation failed - maybe size is too large?\n"); +} + +static u64 do_memset_cycles(const struct function *r, size_t size) +{ + u64 cycle_start = 0ULL, cycle_end = 0ULL; + memset_t fn = r->fn.memset; + void *dst = NULL; + int i; + + memset_alloc_mem(&dst, size); + + /* + * We prefault the freshly allocated memory range here, + * to not measure page fault overhead: + */ + fn(dst, -1, size); + + cycle_start = get_cycles(); + for (i = 0; i < nr_loops; ++i) + fn(dst, i, size); + cycle_end = get_cycles(); + + free(dst); + return cycle_end - cycle_start; +} + +static double do_memset_gettimeofday(const struct function *r, size_t size) +{ + struct timeval tv_start, tv_end, tv_diff; + memset_t fn = r->fn.memset; + void *dst = NULL; + int i; + + memset_alloc_mem(&dst, size); + + /* + * We prefault the freshly allocated memory range here, + * to not measure page fault overhead: + */ + fn(dst, -1, size); + + BUG_ON(gettimeofday(&tv_start, NULL)); + for (i = 0; i < nr_loops; ++i) + fn(dst, i, size); + BUG_ON(gettimeofday(&tv_end, NULL)); + + timersub(&tv_end, &tv_start, &tv_diff); + + free(dst); + return (double)(((double)size * nr_loops) / timeval2double(&tv_diff)); +} + +static const char * const bench_mem_memset_usage[] = { + "perf bench mem memset ", + NULL +}; + +static const struct function memset_functions[] = { + { .name = "default", + .desc = "Default memset() provided by glibc", + .fn.memset = memset }, + +#ifdef HAVE_ARCH_X86_64_SUPPORT +# define MEMSET_FN(_fn, _name, _desc) { .name = _name, .desc = _desc, .fn.memset = _fn }, +# include "mem-memset-x86-64-asm-def.h" +# undef MEMSET_FN +#endif + + { .name = NULL, } +}; + +int bench_mem_memset(int argc, const char **argv, const char *prefix __maybe_unused) +{ + struct bench_mem_info info = { + .functions = memset_functions, + .do_cycles = do_memset_cycles, + .do_gettimeofday = do_memset_gettimeofday, + .usage = bench_mem_memset_usage, + }; + + return bench_mem_common(argc, argv, &info); +} diff --git a/kernel/tools/perf/bench/mem-memcpy.c b/kernel/tools/perf/bench/mem-memcpy.c deleted file mode 100644 index d3dfb7936..000000000 --- a/kernel/tools/perf/bench/mem-memcpy.c +++ /dev/null @@ -1,434 +0,0 @@ -/* - * mem-memcpy.c - * - * memcpy: Simple memory copy in various ways - * - * Written by Hitoshi Mitake - */ - -#include "../perf.h" -#include "../util/util.h" -#include "../util/parse-options.h" -#include "../util/header.h" -#include "../util/cloexec.h" -#include "bench.h" -#include "mem-memcpy-arch.h" -#include "mem-memset-arch.h" - -#include -#include -#include -#include -#include - -#define K 1024 - -static const char *length_str = "1MB"; -static const char *routine = "default"; -static int iterations = 1; -static bool use_cycle; -static int cycle_fd; -static bool only_prefault; -static bool no_prefault; - -static const struct option options[] = { - OPT_STRING('l', "length", &length_str, "1MB", - "Specify length of memory to copy. " - "Available units: B, KB, MB, GB and TB (upper and lower)"), - OPT_STRING('r', "routine", &routine, "default", - "Specify routine to copy, \"all\" runs all available routines"), - OPT_INTEGER('i', "iterations", &iterations, - "repeat memcpy() invocation this number of times"), - OPT_BOOLEAN('c', "cycle", &use_cycle, - "Use cycles event instead of gettimeofday() for measuring"), - OPT_BOOLEAN('o', "only-prefault", &only_prefault, - "Show only the result with page faults before memcpy()"), - OPT_BOOLEAN('n', "no-prefault", &no_prefault, - "Show only the result without page faults before memcpy()"), - OPT_END() -}; - -typedef void *(*memcpy_t)(void *, const void *, size_t); -typedef void *(*memset_t)(void *, int, size_t); - -struct routine { - const char *name; - const char *desc; - union { - memcpy_t memcpy; - memset_t memset; - } fn; -}; - -struct routine memcpy_routines[] = { - { .name = "default", - .desc = "Default memcpy() provided by glibc", - .fn.memcpy = memcpy }, -#ifdef HAVE_ARCH_X86_64_SUPPORT - -#define MEMCPY_FN(_fn, _name, _desc) {.name = _name, .desc = _desc, .fn.memcpy = _fn}, -#include "mem-memcpy-x86-64-asm-def.h" -#undef MEMCPY_FN - -#endif - - { NULL, - NULL, - {NULL} } -}; - -static const char * const bench_mem_memcpy_usage[] = { - "perf bench mem memcpy ", - NULL -}; - -static struct perf_event_attr cycle_attr = { - .type = PERF_TYPE_HARDWARE, - .config = PERF_COUNT_HW_CPU_CYCLES -}; - -static void init_cycle(void) -{ - cycle_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1, - perf_event_open_cloexec_flag()); - - if (cycle_fd < 0 && errno == ENOSYS) - die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); - else - BUG_ON(cycle_fd < 0); -} - -static u64 get_cycle(void) -{ - int ret; - u64 clk; - - ret = read(cycle_fd, &clk, sizeof(u64)); - BUG_ON(ret != sizeof(u64)); - - return clk; -} - -static double timeval2double(struct timeval *ts) -{ - return (double)ts->tv_sec + - (double)ts->tv_usec / (double)1000000; -} - -#define pf (no_prefault ? 0 : 1) - -#define print_bps(x) do { \ - if (x < K) \ - printf(" %14lf B/Sec", x); \ - else if (x < K * K) \ - printf(" %14lfd KB/Sec", x / K); \ - else if (x < K * K * K) \ - printf(" %14lf MB/Sec", x / K / K); \ - else \ - printf(" %14lf GB/Sec", x / K / K / K); \ - } while (0) - -struct bench_mem_info { - const struct routine *routines; - u64 (*do_cycle)(const struct routine *r, size_t len, bool prefault); - double (*do_gettimeofday)(const struct routine *r, size_t len, bool prefault); - const char *const *usage; -}; - -static void __bench_mem_routine(struct bench_mem_info *info, int r_idx, size_t len, double totallen) -{ - const struct routine *r = &info->routines[r_idx]; - double result_bps[2]; - u64 result_cycle[2]; - - result_cycle[0] = result_cycle[1] = 0ULL; - result_bps[0] = result_bps[1] = 0.0; - - printf("Routine %s (%s)\n", r->name, r->desc); - - if (bench_format == BENCH_FORMAT_DEFAULT) - printf("# Copying %s Bytes ...\n\n", length_str); - - if (!only_prefault && !no_prefault) { - /* show both of results */ - if (use_cycle) { - result_cycle[0] = info->do_cycle(r, len, false); - result_cycle[1] = info->do_cycle(r, len, true); - } else { - result_bps[0] = info->do_gettimeofday(r, len, false); - result_bps[1] = info->do_gettimeofday(r, len, true); - } - } else { - if (use_cycle) - result_cycle[pf] = info->do_cycle(r, len, only_prefault); - else - result_bps[pf] = info->do_gettimeofday(r, len, only_prefault); - } - - switch (bench_format) { - case BENCH_FORMAT_DEFAULT: - if (!only_prefault && !no_prefault) { - if (use_cycle) { - printf(" %14lf Cycle/Byte\n", - (double)result_cycle[0] - / totallen); - printf(" %14lf Cycle/Byte (with prefault)\n", - (double)result_cycle[1] - / totallen); - } else { - print_bps(result_bps[0]); - printf("\n"); - print_bps(result_bps[1]); - printf(" (with prefault)\n"); - } - } else { - if (use_cycle) { - printf(" %14lf Cycle/Byte", - (double)result_cycle[pf] - / totallen); - } else - print_bps(result_bps[pf]); - - printf("%s\n", only_prefault ? " (with prefault)" : ""); - } - break; - case BENCH_FORMAT_SIMPLE: - if (!only_prefault && !no_prefault) { - if (use_cycle) { - printf("%lf %lf\n", - (double)result_cycle[0] / totallen, - (double)result_cycle[1] / totallen); - } else { - printf("%lf %lf\n", - result_bps[0], result_bps[1]); - } - } else { - if (use_cycle) { - printf("%lf\n", (double)result_cycle[pf] - / totallen); - } else - printf("%lf\n", result_bps[pf]); - } - break; - default: - /* reaching this means there's some disaster: */ - die("unknown format: %d\n", bench_format); - break; - } -} - -static int bench_mem_common(int argc, const char **argv, - const char *prefix __maybe_unused, - struct bench_mem_info *info) -{ - int i; - size_t len; - double totallen; - - argc = parse_options(argc, argv, options, - info->usage, 0); - - if (no_prefault && only_prefault) { - fprintf(stderr, "Invalid options: -o and -n are mutually exclusive\n"); - return 1; - } - - if (use_cycle) - init_cycle(); - - len = (size_t)perf_atoll((char *)length_str); - totallen = (double)len * iterations; - - if ((s64)len <= 0) { - fprintf(stderr, "Invalid length:%s\n", length_str); - return 1; - } - - /* same to without specifying either of prefault and no-prefault */ - if (only_prefault && no_prefault) - only_prefault = no_prefault = false; - - if (!strncmp(routine, "all", 3)) { - for (i = 0; info->routines[i].name; i++) - __bench_mem_routine(info, i, len, totallen); - return 0; - } - - for (i = 0; info->routines[i].name; i++) { - if (!strcmp(info->routines[i].name, routine)) - break; - } - if (!info->routines[i].name) { - printf("Unknown routine:%s\n", routine); - printf("Available routines...\n"); - for (i = 0; info->routines[i].name; i++) { - printf("\t%s ... %s\n", - info->routines[i].name, info->routines[i].desc); - } - return 1; - } - - __bench_mem_routine(info, i, len, totallen); - - return 0; -} - -static void memcpy_alloc_mem(void **dst, void **src, size_t length) -{ - *dst = zalloc(length); - if (!*dst) - die("memory allocation failed - maybe length is too large?\n"); - - *src = zalloc(length); - if (!*src) - die("memory allocation failed - maybe length is too large?\n"); - /* Make sure to always replace the zero pages even if MMAP_THRESH is crossed */ - memset(*src, 0, length); -} - -static u64 do_memcpy_cycle(const struct routine *r, size_t len, bool prefault) -{ - u64 cycle_start = 0ULL, cycle_end = 0ULL; - void *src = NULL, *dst = NULL; - memcpy_t fn = r->fn.memcpy; - int i; - - memcpy_alloc_mem(&dst, &src, len); - - if (prefault) - fn(dst, src, len); - - cycle_start = get_cycle(); - for (i = 0; i < iterations; ++i) - fn(dst, src, len); - cycle_end = get_cycle(); - - free(src); - free(dst); - return cycle_end - cycle_start; -} - -static double do_memcpy_gettimeofday(const struct routine *r, size_t len, - bool prefault) -{ - struct timeval tv_start, tv_end, tv_diff; - memcpy_t fn = r->fn.memcpy; - void *src = NULL, *dst = NULL; - int i; - - memcpy_alloc_mem(&dst, &src, len); - - if (prefault) - fn(dst, src, len); - - BUG_ON(gettimeofday(&tv_start, NULL)); - for (i = 0; i < iterations; ++i) - fn(dst, src, len); - BUG_ON(gettimeofday(&tv_end, NULL)); - - timersub(&tv_end, &tv_start, &tv_diff); - - free(src); - free(dst); - return (double)(((double)len * iterations) / timeval2double(&tv_diff)); -} - -int bench_mem_memcpy(int argc, const char **argv, - const char *prefix __maybe_unused) -{ - struct bench_mem_info info = { - .routines = memcpy_routines, - .do_cycle = do_memcpy_cycle, - .do_gettimeofday = do_memcpy_gettimeofday, - .usage = bench_mem_memcpy_usage, - }; - - return bench_mem_common(argc, argv, prefix, &info); -} - -static void memset_alloc_mem(void **dst, size_t length) -{ - *dst = zalloc(length); - if (!*dst) - die("memory allocation failed - maybe length is too large?\n"); -} - -static u64 do_memset_cycle(const struct routine *r, size_t len, bool prefault) -{ - u64 cycle_start = 0ULL, cycle_end = 0ULL; - memset_t fn = r->fn.memset; - void *dst = NULL; - int i; - - memset_alloc_mem(&dst, len); - - if (prefault) - fn(dst, -1, len); - - cycle_start = get_cycle(); - for (i = 0; i < iterations; ++i) - fn(dst, i, len); - cycle_end = get_cycle(); - - free(dst); - return cycle_end - cycle_start; -} - -static double do_memset_gettimeofday(const struct routine *r, size_t len, - bool prefault) -{ - struct timeval tv_start, tv_end, tv_diff; - memset_t fn = r->fn.memset; - void *dst = NULL; - int i; - - memset_alloc_mem(&dst, len); - - if (prefault) - fn(dst, -1, len); - - BUG_ON(gettimeofday(&tv_start, NULL)); - for (i = 0; i < iterations; ++i) - fn(dst, i, len); - BUG_ON(gettimeofday(&tv_end, NULL)); - - timersub(&tv_end, &tv_start, &tv_diff); - - free(dst); - return (double)(((double)len * iterations) / timeval2double(&tv_diff)); -} - -static const char * const bench_mem_memset_usage[] = { - "perf bench mem memset ", - NULL -}; - -static const struct routine memset_routines[] = { - { .name ="default", - .desc = "Default memset() provided by glibc", - .fn.memset = memset }, -#ifdef HAVE_ARCH_X86_64_SUPPORT - -#define MEMSET_FN(_fn, _name, _desc) { .name = _name, .desc = _desc, .fn.memset = _fn }, -#include "mem-memset-x86-64-asm-def.h" -#undef MEMSET_FN - -#endif - - { .name = NULL, - .desc = NULL, - .fn.memset = NULL } -}; - -int bench_mem_memset(int argc, const char **argv, - const char *prefix __maybe_unused) -{ - struct bench_mem_info info = { - .routines = memset_routines, - .do_cycle = do_memset_cycle, - .do_gettimeofday = do_memset_gettimeofday, - .usage = bench_mem_memset_usage, - }; - - return bench_mem_common(argc, argv, prefix, &info); -} diff --git a/kernel/tools/perf/bench/numa.c b/kernel/tools/perf/bench/numa.c index ba5efa471..492df2752 100644 --- a/kernel/tools/perf/bench/numa.c +++ b/kernel/tools/perf/bench/numa.c @@ -8,6 +8,7 @@ #include "../builtin.h" #include "../util/util.h" #include "../util/parse-options.h" +#include "../util/cloexec.h" #include "bench.h" @@ -23,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -51,6 +53,9 @@ struct thread_data { unsigned int loops_done; u64 val; u64 runtime_ns; + u64 system_time_ns; + u64 user_time_ns; + double speed_gbs; pthread_mutex_t *process_lock; }; @@ -159,8 +164,8 @@ static const struct option options[] = { OPT_STRING('L', "mb_proc_locked", &p0.mb_proc_locked_str,"MB", "process serialized/locked memory access (MBs), <= process_memory"), OPT_STRING('T', "mb_thread" , &p0.mb_thread_str, "MB", "thread memory (MBs)"), - OPT_UINTEGER('l', "nr_loops" , &p0.nr_loops, "max number of loops to run"), - OPT_UINTEGER('s', "nr_secs" , &p0.nr_secs, "max number of seconds to run"), + OPT_UINTEGER('l', "nr_loops" , &p0.nr_loops, "max number of loops to run (default: unlimited)"), + OPT_UINTEGER('s', "nr_secs" , &p0.nr_secs, "max number of seconds to run (default: 5 secs)"), OPT_UINTEGER('u', "usleep" , &p0.sleep_usecs, "usecs to sleep per loop iteration"), OPT_BOOLEAN('R', "data_reads" , &p0.data_reads, "access the data via writes (can be mixed with -W)"), @@ -1042,6 +1047,7 @@ static void *worker_thread(void *__tdata) u64 bytes_done; long work_done; u32 l; + struct rusage rusage; bind_to_cpumask(td->bind_cpumask); bind_to_memnode(td->bind_node); @@ -1194,6 +1200,13 @@ static void *worker_thread(void *__tdata) timersub(&stop, &start0, &diff); td->runtime_ns = diff.tv_sec * 1000000000ULL; td->runtime_ns += diff.tv_usec * 1000ULL; + td->speed_gbs = bytes_done / (td->runtime_ns / 1e9) / 1e9; + + getrusage(RUSAGE_THREAD, &rusage); + td->system_time_ns = rusage.ru_stime.tv_sec * 1000000000ULL; + td->system_time_ns += rusage.ru_stime.tv_usec * 1000ULL; + td->user_time_ns = rusage.ru_utime.tv_sec * 1000000000ULL; + td->user_time_ns += rusage.ru_utime.tv_usec * 1000ULL; free_data(thread_data, g->p.bytes_thread); @@ -1420,7 +1433,7 @@ static int __bench_numa(const char *name) double runtime_sec_min; int wait_stat; double bytes; - int i, t; + int i, t, p; if (init()) return -1; @@ -1556,6 +1569,24 @@ static int __bench_numa(const char *name) print_res(name, bytes / runtime_sec_max / 1e9, "GB/sec,", "total-speed", "GB/sec total speed"); + if (g->p.show_details >= 2) { + char tname[32]; + struct thread_data *td; + for (p = 0; p < g->p.nr_proc; p++) { + for (t = 0; t < g->p.nr_threads; t++) { + memset(tname, 0, 32); + td = g->threads + p*g->p.nr_threads + t; + snprintf(tname, 32, "process%d:thread%d", p, t); + print_res(tname, td->speed_gbs, + "GB/sec", "thread-speed", "GB/sec/thread speed"); + print_res(tname, td->system_time_ns / 1e9, + "secs", "thread-system-time", "system CPU time/thread"); + print_res(tname, td->user_time_ns / 1e9, + "secs", "thread-user-time", "user CPU time/thread"); + } + } + } + free(pids); deinit(); diff --git a/kernel/tools/perf/bench/sched-messaging.c b/kernel/tools/perf/bench/sched-messaging.c index d7f281c28..d4ff1b539 100644 --- a/kernel/tools/perf/bench/sched-messaging.c +++ b/kernel/tools/perf/bench/sched-messaging.c @@ -33,7 +33,7 @@ #define DATASIZE 100 static bool use_pipes = false; -static unsigned int loops = 100; +static unsigned int nr_loops = 100; static bool thread_mode = false; static unsigned int num_groups = 10; @@ -79,7 +79,7 @@ static void ready(int ready_out, int wakefd) err(EXIT_FAILURE, "poll"); } -/* Sender sprays loops messages down each file descriptor */ +/* Sender sprays nr_loops messages down each file descriptor */ static void *sender(struct sender_context *ctx) { char data[DATASIZE]; @@ -88,7 +88,7 @@ static void *sender(struct sender_context *ctx) ready(ctx->ready_out, ctx->wakefd); /* Now pump to every receiver. */ - for (i = 0; i < loops; i++) { + for (i = 0; i < nr_loops; i++) { for (j = 0; j < ctx->num_fds; j++) { int ret, done = 0; @@ -213,7 +213,7 @@ static unsigned int group(pthread_t *pth, /* Create the pipe between client and server */ fdpair(fds); - ctx->num_packets = num_fds * loops; + ctx->num_packets = num_fds * nr_loops; ctx->in_fds[0] = fds[0]; ctx->in_fds[1] = fds[1]; ctx->ready_out = ready_out; @@ -250,7 +250,7 @@ static const struct option options[] = { OPT_BOOLEAN('t', "thread", &thread_mode, "Be multi thread instead of multi process"), OPT_UINTEGER('g', "group", &num_groups, "Specify number of groups"), - OPT_UINTEGER('l', "loop", &loops, "Specify number of loops"), + OPT_UINTEGER('l', "nr_loops", &nr_loops, "Specify the number of loops to run (default: 100)"), OPT_END() }; -- cgit 1.2.3-korg