synthesize.c 6.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12
// SPDX-License-Identifier: GPL-2.0
/*
 * Benchmark synthesis of perf events such as at the start of a 'perf
 * record'. Synthesis is done on the current process and the 'dummy' event
 * handlers are invoked that support dump_trace but otherwise do nothing.
 *
 * Copyright 2019 Google LLC.
 */
#include <stdio.h>
#include "bench.h"
#include "../util/debug.h"
#include "../util/session.h"
13
#include "../util/stat.h"
14 15 16 17
#include "../util/synthetic-events.h"
#include "../util/target.h"
#include "../util/thread_map.h"
#include "../util/tool.h"
18 19
#include "../util/util.h"
#include <linux/atomic.h>
20 21 22 23
#include <linux/err.h>
#include <linux/time64.h>
#include <subcmd/parse-options.h>

24 25 26 27 28 29
static unsigned int min_threads = 1;
static unsigned int max_threads = UINT_MAX;
static unsigned int single_iterations = 10000;
static unsigned int multi_iterations = 10;
static bool run_st;
static bool run_mt;
30 31

static const struct option options[] = {
32 33 34 35 36 37 38 39 40 41
	OPT_BOOLEAN('s', "st", &run_st, "Run single threaded benchmark"),
	OPT_BOOLEAN('t', "mt", &run_mt, "Run multi-threaded benchmark"),
	OPT_UINTEGER('m', "min-threads", &min_threads,
		"Minimum number of threads in multithreaded bench"),
	OPT_UINTEGER('M', "max-threads", &max_threads,
		"Maximum number of threads in multithreaded bench"),
	OPT_UINTEGER('i', "single-iterations", &single_iterations,
		"Number of iterations used to compute single-threaded average"),
	OPT_UINTEGER('I', "multi-iterations", &multi_iterations,
		"Number of iterations used to compute multi-threaded average"),
42 43 44
	OPT_END()
};

45
static const char *const bench_usage[] = {
46 47 48 49
	"perf bench internals synthesize <options>",
	NULL
};

50
static atomic_t event_count;
51

52 53 54 55 56 57 58 59 60 61 62 63
static int process_synthesized_event(struct perf_tool *tool __maybe_unused,
				     union perf_event *event __maybe_unused,
				     struct perf_sample *sample __maybe_unused,
				     struct machine *machine __maybe_unused)
{
	atomic_inc(&event_count);
	return 0;
}

static int do_run_single_threaded(struct perf_session *session,
				struct perf_thread_map *threads,
				struct target *target, bool data_mmap)
64 65 66 67 68
{
	const unsigned int nr_threads_synthesize = 1;
	struct timeval start, end, diff;
	u64 runtime_us;
	unsigned int i;
69
	double time_average, time_stddev, event_average, event_stddev;
70
	int err;
71
	struct stats time_stats, event_stats;
72

73 74 75 76 77 78 79 80 81 82 83
	init_stats(&time_stats);
	init_stats(&event_stats);

	for (i = 0; i < single_iterations; i++) {
		atomic_set(&event_count, 0);
		gettimeofday(&start, NULL);
		err = __machine__synthesize_threads(&session->machines.host,
						NULL,
						target, threads,
						process_synthesized_event,
						data_mmap,
84 85 86
						nr_threads_synthesize);
		if (err)
			return err;
87 88 89 90 91 92

		gettimeofday(&end, NULL);
		timersub(&end, &start, &diff);
		runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
		update_stats(&time_stats, runtime_us);
		update_stats(&event_stats, atomic_read(&event_count));
93 94
	}

95 96 97 98 99 100 101 102 103 104 105 106
	time_average = avg_stats(&time_stats);
	time_stddev = stddev_stats(&time_stats);
	printf("  Average %ssynthesis took: %.3f usec (+- %.3f usec)\n",
		data_mmap ? "data " : "", time_average, time_stddev);

	event_average = avg_stats(&event_stats);
	event_stddev = stddev_stats(&event_stats);
	printf("  Average num. events: %.3f (+- %.3f)\n",
		event_average, event_stddev);

	printf("  Average time per event %.3f usec\n",
		time_average / event_average);
107 108 109
	return 0;
}

110
static int run_single_threaded(void)
111 112 113 114 115 116 117 118
{
	struct perf_session *session;
	struct target target = {
		.pid = "self",
	};
	struct perf_thread_map *threads;
	int err;

119
	perf_set_singlethreaded();
120
	session = perf_session__new(NULL, NULL);
121 122 123 124 125 126 127 128 129 130 131
	if (IS_ERR(session)) {
		pr_err("Session creation failed.\n");
		return PTR_ERR(session);
	}
	threads = thread_map__new_by_pid(getpid());
	if (!threads) {
		pr_err("Thread map creation failed.\n");
		err = -ENOMEM;
		goto err_out;
	}

132 133 134 135 136
	puts(
"Computing performance of single threaded perf event synthesis by\n"
"synthesizing events on the perf process itself:");

	err = do_run_single_threaded(session, threads, &target, false);
137 138 139
	if (err)
		goto err_out;

140
	err = do_run_single_threaded(session, threads, &target, true);
141 142 143 144 145 146 147 148

err_out:
	if (threads)
		perf_thread_map__put(threads);

	perf_session__delete(session);
	return err;
}
149 150 151 152 153 154 155 156 157 158 159 160 161 162 163

static int do_run_multi_threaded(struct target *target,
				unsigned int nr_threads_synthesize)
{
	struct timeval start, end, diff;
	u64 runtime_us;
	unsigned int i;
	double time_average, time_stddev, event_average, event_stddev;
	int err;
	struct stats time_stats, event_stats;
	struct perf_session *session;

	init_stats(&time_stats);
	init_stats(&event_stats);
	for (i = 0; i < multi_iterations; i++) {
164
		session = perf_session__new(NULL, NULL);
165 166
		if (IS_ERR(session))
			return PTR_ERR(session);
167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262

		atomic_set(&event_count, 0);
		gettimeofday(&start, NULL);
		err = __machine__synthesize_threads(&session->machines.host,
						NULL,
						target, NULL,
						process_synthesized_event,
						false,
						nr_threads_synthesize);
		if (err) {
			perf_session__delete(session);
			return err;
		}

		gettimeofday(&end, NULL);
		timersub(&end, &start, &diff);
		runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
		update_stats(&time_stats, runtime_us);
		update_stats(&event_stats, atomic_read(&event_count));
		perf_session__delete(session);
	}

	time_average = avg_stats(&time_stats);
	time_stddev = stddev_stats(&time_stats);
	printf("    Average synthesis took: %.3f usec (+- %.3f usec)\n",
		time_average, time_stddev);

	event_average = avg_stats(&event_stats);
	event_stddev = stddev_stats(&event_stats);
	printf("    Average num. events: %.3f (+- %.3f)\n",
		event_average, event_stddev);

	printf("    Average time per event %.3f usec\n",
		time_average / event_average);
	return 0;
}

static int run_multi_threaded(void)
{
	struct target target = {
		.cpu_list = "0"
	};
	unsigned int nr_threads_synthesize;
	int err;

	if (max_threads == UINT_MAX)
		max_threads = sysconf(_SC_NPROCESSORS_ONLN);

	puts(
"Computing performance of multi threaded perf event synthesis by\n"
"synthesizing events on CPU 0:");

	for (nr_threads_synthesize = min_threads;
	     nr_threads_synthesize <= max_threads;
	     nr_threads_synthesize++) {
		if (nr_threads_synthesize == 1)
			perf_set_singlethreaded();
		else
			perf_set_multithreaded();

		printf("  Number of synthesis threads: %u\n",
			nr_threads_synthesize);

		err = do_run_multi_threaded(&target, nr_threads_synthesize);
		if (err)
			return err;
	}
	perf_set_singlethreaded();
	return 0;
}

int bench_synthesize(int argc, const char **argv)
{
	int err = 0;

	argc = parse_options(argc, argv, options, bench_usage, 0);
	if (argc) {
		usage_with_options(bench_usage, options);
		exit(EXIT_FAILURE);
	}

	/*
	 * If neither single threaded or multi-threaded are specified, default
	 * to running just single threaded.
	 */
	if (!run_st && !run_mt)
		run_st = true;

	if (run_st)
		err = run_single_threaded();

	if (!err && run_mt)
		err = run_multi_threaded();

	return err;
}