mem-memcpy.c 10.0 KB
Newer Older
1 2 3
/*
 * mem-memcpy.c
 *
4
 * Simple memcpy() and memset() benchmarks
5 6 7 8 9 10 11 12
 *
 * Written by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
 */

#include "../perf.h"
#include "../util/util.h"
#include "../util/parse-options.h"
#include "../util/header.h"
13
#include "../util/cloexec.h"
14
#include "bench.h"
15
#include "mem-memcpy-arch.h"
16
#include "mem-memset-arch.h"
17 18 19 20 21 22 23 24 25

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/time.h>
#include <errno.h>

#define K 1024

26
static const char	*length_str	= "1MB";
27
static const char	*routine	= "all";
28
static int		iterations	= 1;
29 30
static bool		use_cycle;
static int		cycle_fd;
31 32
static bool		only_prefault;
static bool		no_prefault;
33 34 35 36

static const struct option options[] = {
	OPT_STRING('l', "length", &length_str, "1MB",
		    "Specify length of memory to copy. "
N
Namhyung Kim 已提交
37
		    "Available units: B, KB, MB, GB and TB (upper and lower)"),
38
	OPT_STRING('r', "routine", &routine, "all",
39
		    "Specify routine to copy, \"all\" runs all available routines"),
40 41
	OPT_INTEGER('i', "iterations", &iterations,
		    "repeat memcpy() invocation this number of times"),
42
	OPT_BOOLEAN('c', "cycle", &use_cycle,
N
Namhyung Kim 已提交
43
		    "Use cycles event instead of gettimeofday() for measuring"),
44 45 46 47
	OPT_BOOLEAN('o', "only-prefault", &only_prefault,
		    "Show only the result with page faults before memcpy()"),
	OPT_BOOLEAN('n', "no-prefault", &no_prefault,
		    "Show only the result without page faults before memcpy()"),
48 49 50
	OPT_END()
};

51
typedef void *(*memcpy_t)(void *, const void *, size_t);
52
typedef void *(*memset_t)(void *, int, size_t);
53

54 55 56
struct routine {
	const char *name;
	const char *desc;
57 58
	union {
		memcpy_t memcpy;
59
		memset_t memset;
60
	} fn;
61 62
};

63
struct routine memcpy_routines[] = {
64 65 66
	{ .name		= "default",
	  .desc		= "Default memcpy() provided by glibc",
	  .fn.memcpy	= memcpy },
67

68 69 70 71
#ifdef HAVE_ARCH_X86_64_SUPPORT
# define MEMCPY_FN(_fn, _name, _desc) {.name = _name, .desc = _desc, .fn.memcpy = _fn},
# include "mem-memcpy-x86-64-asm-def.h"
# undef MEMCPY_FN
72 73
#endif

74
	{ NULL, }
75 76 77 78 79 80 81
};

static const char * const bench_mem_memcpy_usage[] = {
	"perf bench mem memcpy <options>",
	NULL
};

82
static struct perf_event_attr cycle_attr = {
83 84
	.type		= PERF_TYPE_HARDWARE,
	.config		= PERF_COUNT_HW_CPU_CYCLES
85 86
};

87
static void init_cycle(void)
88
{
89
	cycle_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1, perf_event_open_cloexec_flag());
90

91
	if (cycle_fd < 0 && errno == ENOSYS)
92 93
		die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
	else
94
		BUG_ON(cycle_fd < 0);
95 96
}

97
static u64 get_cycle(void)
98 99 100 101
{
	int ret;
	u64 clk;

102
	ret = read(cycle_fd, &clk, sizeof(u64));
103 104 105 106 107 108 109
	BUG_ON(ret != sizeof(u64));

	return clk;
}

static double timeval2double(struct timeval *ts)
{
110
	return (double)ts->tv_sec + (double)ts->tv_usec / (double)1000000;
111 112
}

113 114 115 116 117 118 119 120 121 122 123
#define print_bps(x) do {					\
		if (x < K)					\
			printf(" %14lf B/Sec", x);		\
		else if (x < K * K)				\
			printf(" %14lfd KB/Sec", x / K);	\
		else if (x < K * K * K)				\
			printf(" %14lf MB/Sec", x / K / K);	\
		else						\
			printf(" %14lf GB/Sec", x / K / K / K); \
	} while (0)

124 125 126 127 128 129 130
struct bench_mem_info {
	const struct routine *routines;
	u64 (*do_cycle)(const struct routine *r, size_t len, bool prefault);
	double (*do_gettimeofday)(const struct routine *r, size_t len, bool prefault);
	const char *const *usage;
};

131
static void __bench_mem_routine(struct bench_mem_info *info, int r_idx, size_t len, double totallen)
132
{
133
	const struct routine *r = &info->routines[r_idx];
134
	double result_bps[2];
135
	u64 result_cycle[2];
136
	int prefault = no_prefault ? 0 : 1;
137

138
	result_cycle[0] = result_cycle[1] = 0ULL;
139 140
	result_bps[0] = result_bps[1] = 0.0;

141
	printf("Routine %s (%s)\n", r->name, r->desc);
142

143 144
	if (bench_format == BENCH_FORMAT_DEFAULT)
		printf("# Copying %s Bytes ...\n\n", length_str);
145

146 147
	if (!only_prefault && prefault) {
		/* Show both results: */
148
		if (use_cycle) {
149 150
			result_cycle[0] = info->do_cycle(r, len, false);
			result_cycle[1] = info->do_cycle(r, len, true);
151
		} else {
152 153
			result_bps[0]   = info->do_gettimeofday(r, len, false);
			result_bps[1]   = info->do_gettimeofday(r, len, true);
154
		}
155
	} else {
156
		if (use_cycle)
157
			result_cycle[prefault] = info->do_cycle(r, len, only_prefault);
158
		else
159
			result_bps[prefault] = info->do_gettimeofday(r, len, only_prefault);
160 161 162 163
	}

	switch (bench_format) {
	case BENCH_FORMAT_DEFAULT:
164
		if (!only_prefault && prefault) {
165 166 167
			if (use_cycle) {
				printf(" %14lf Cycle/Byte\n",
					(double)result_cycle[0]
168
					/ totallen);
169 170
				printf(" %14lf Cycle/Byte (with prefault)\n",
					(double)result_cycle[1]
171
					/ totallen);
172 173 174 175 176
			} else {
				print_bps(result_bps[0]);
				printf("\n");
				print_bps(result_bps[1]);
				printf(" (with prefault)\n");
177
			}
178
		} else {
179 180
			if (use_cycle) {
				printf(" %14lf Cycle/Byte",
181
					(double)result_cycle[prefault]
182
					/ totallen);
183
			} else
184
				print_bps(result_bps[prefault]);
185 186

			printf("%s\n", only_prefault ? " (with prefault)" : "");
187 188 189
		}
		break;
	case BENCH_FORMAT_SIMPLE:
190
		if (!only_prefault && prefault) {
191
			if (use_cycle) {
192
				printf("%lf %lf\n",
193 194
					(double)result_cycle[0] / totallen,
					(double)result_cycle[1] / totallen);
195 196 197 198 199
			} else {
				printf("%lf %lf\n",
					result_bps[0], result_bps[1]);
			}
		} else {
200
			if (use_cycle) {
201
				printf("%lf\n", (double)result_cycle[prefault]
202
					/ totallen);
203
			} else
204
				printf("%lf\n", result_bps[prefault]);
205
		}
206 207
		break;
	default:
208
		/* Reaching this means there's some disaster: */
209
		die("unknown format: %d\n", bench_format);
210 211
		break;
	}
212 213
}

214
static int bench_mem_common(int argc, const char **argv, struct bench_mem_info *info)
215 216 217 218 219
{
	int i;
	size_t len;
	double totallen;

220
	argc = parse_options(argc, argv, options, info->usage, 0);
221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237

	if (no_prefault && only_prefault) {
		fprintf(stderr, "Invalid options: -o and -n are mutually exclusive\n");
		return 1;
	}

	if (use_cycle)
		init_cycle();

	len = (size_t)perf_atoll((char *)length_str);
	totallen = (double)len * iterations;

	if ((s64)len <= 0) {
		fprintf(stderr, "Invalid length:%s\n", length_str);
		return 1;
	}

238
	/* Same as without specifying either of prefault and no-prefault: */
239 240 241
	if (only_prefault && no_prefault)
		only_prefault = no_prefault = false;

242 243 244 245 246 247
	if (!strncmp(routine, "all", 3)) {
		for (i = 0; info->routines[i].name; i++)
			__bench_mem_routine(info, i, len, totallen);
		return 0;
	}

248 249 250 251 252 253 254 255 256 257 258 259 260 261 262
	for (i = 0; info->routines[i].name; i++) {
		if (!strcmp(info->routines[i].name, routine))
			break;
	}
	if (!info->routines[i].name) {
		printf("Unknown routine:%s\n", routine);
		printf("Available routines...\n");
		for (i = 0; info->routines[i].name; i++) {
			printf("\t%s ... %s\n",
			       info->routines[i].name, info->routines[i].desc);
		}
		return 1;
	}

	__bench_mem_routine(info, i, len, totallen);
263 264 265

	return 0;
}
266 267 268 269 270 271 272 273 274 275

static void memcpy_alloc_mem(void **dst, void **src, size_t length)
{
	*dst = zalloc(length);
	if (!*dst)
		die("memory allocation failed - maybe length is too large?\n");

	*src = zalloc(length);
	if (!*src)
		die("memory allocation failed - maybe length is too large?\n");
276 277

	/* Make sure to always prefault zero pages even if MMAP_THRESH is crossed: */
278 279 280 281 282 283 284 285 286 287
	memset(*src, 0, length);
}

static u64 do_memcpy_cycle(const struct routine *r, size_t len, bool prefault)
{
	u64 cycle_start = 0ULL, cycle_end = 0ULL;
	void *src = NULL, *dst = NULL;
	memcpy_t fn = r->fn.memcpy;
	int i;

288
	memcpy_alloc_mem(&dst, &src, len);
289 290 291 292 293 294 295 296 297 298 299 300 301 302

	if (prefault)
		fn(dst, src, len);

	cycle_start = get_cycle();
	for (i = 0; i < iterations; ++i)
		fn(dst, src, len);
	cycle_end = get_cycle();

	free(src);
	free(dst);
	return cycle_end - cycle_start;
}

303
static double do_memcpy_gettimeofday(const struct routine *r, size_t len, bool prefault)
304 305 306 307 308 309
{
	struct timeval tv_start, tv_end, tv_diff;
	memcpy_t fn = r->fn.memcpy;
	void *src = NULL, *dst = NULL;
	int i;

310
	memcpy_alloc_mem(&dst, &src, len);
311 312 313 314 315 316 317 318 319 320 321 322 323

	if (prefault)
		fn(dst, src, len);

	BUG_ON(gettimeofday(&tv_start, NULL));
	for (i = 0; i < iterations; ++i)
		fn(dst, src, len);
	BUG_ON(gettimeofday(&tv_end, NULL));

	timersub(&tv_end, &tv_start, &tv_diff);

	free(src);
	free(dst);
324
	return (double)(((double)len * iterations) / timeval2double(&tv_diff));
325 326
}

327
int bench_mem_memcpy(int argc, const char **argv, const char *prefix __maybe_unused)
328 329
{
	struct bench_mem_info info = {
330 331 332 333
		.routines		= memcpy_routines,
		.do_cycle		= do_memcpy_cycle,
		.do_gettimeofday	= do_memcpy_gettimeofday,
		.usage			= bench_mem_memcpy_usage,
334 335
	};

336
	return bench_mem_common(argc, argv, &info);
337
}
338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387

static void memset_alloc_mem(void **dst, size_t length)
{
	*dst = zalloc(length);
	if (!*dst)
		die("memory allocation failed - maybe length is too large?\n");
}

static u64 do_memset_cycle(const struct routine *r, size_t len, bool prefault)
{
	u64 cycle_start = 0ULL, cycle_end = 0ULL;
	memset_t fn = r->fn.memset;
	void *dst = NULL;
	int i;

	memset_alloc_mem(&dst, len);

	if (prefault)
		fn(dst, -1, len);

	cycle_start = get_cycle();
	for (i = 0; i < iterations; ++i)
		fn(dst, i, len);
	cycle_end = get_cycle();

	free(dst);
	return cycle_end - cycle_start;
}

static double do_memset_gettimeofday(const struct routine *r, size_t len,
				     bool prefault)
{
	struct timeval tv_start, tv_end, tv_diff;
	memset_t fn = r->fn.memset;
	void *dst = NULL;
	int i;

	memset_alloc_mem(&dst, len);

	if (prefault)
		fn(dst, -1, len);

	BUG_ON(gettimeofday(&tv_start, NULL));
	for (i = 0; i < iterations; ++i)
		fn(dst, i, len);
	BUG_ON(gettimeofday(&tv_end, NULL));

	timersub(&tv_end, &tv_start, &tv_diff);

	free(dst);
388
	return (double)(((double)len * iterations) / timeval2double(&tv_diff));
389 390 391 392 393 394 395 396
}

static const char * const bench_mem_memset_usage[] = {
	"perf bench mem memset <options>",
	NULL
};

static const struct routine memset_routines[] = {
397 398 399
	{ .name		= "default",
	  .desc		= "Default memset() provided by glibc",
	  .fn.memset	= memset },
400

401 402 403 404
#ifdef HAVE_ARCH_X86_64_SUPPORT
# define MEMSET_FN(_fn, _name, _desc) { .name = _name, .desc = _desc, .fn.memset = _fn },
# include "mem-memset-x86-64-asm-def.h"
# undef MEMSET_FN
405 406
#endif

407
	{ NULL, }
408 409
};

410
int bench_mem_memset(int argc, const char **argv, const char *prefix __maybe_unused)
411 412
{
	struct bench_mem_info info = {
413 414 415 416
		.routines		= memset_routines,
		.do_cycle		= do_memset_cycle,
		.do_gettimeofday	= do_memset_gettimeofday,
		.usage			= bench_mem_memset_usage,
417 418
	};

419
	return bench_mem_common(argc, argv, &info);
420
}