mem-functions.c 8.5 KB
Newer Older
1 2 3
/*
 * mem-memcpy.c
 *
4
 * Simple memcpy() and memset() benchmarks
5 6 7 8 9 10 11 12
 *
 * Written by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
 */

#include "../perf.h"
#include "../util/util.h"
#include "../util/parse-options.h"
#include "../util/header.h"
13
#include "../util/cloexec.h"
14
#include "bench.h"
15
#include "mem-memcpy-arch.h"
16
#include "mem-memset-arch.h"
17 18 19 20 21 22 23 24 25

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/time.h>
#include <errno.h>

#define K 1024

26
static const char	*length_str	= "1MB";
27
static const char	*routine	= "all";
28
static int		iterations	= 1;
29 30
static bool		use_cycles;
static int		cycles_fd;
31 32 33 34

static const struct option options[] = {
	OPT_STRING('l', "length", &length_str, "1MB",
		    "Specify length of memory to copy. "
N
Namhyung Kim 已提交
35
		    "Available units: B, KB, MB, GB and TB (upper and lower)"),
36
	OPT_STRING('r', "routine", &routine, "all",
37
		    "Specify routine to copy, \"all\" runs all available routines"),
38 39
	OPT_INTEGER('i', "iterations", &iterations,
		    "repeat memcpy() invocation this number of times"),
40 41
	OPT_BOOLEAN('c', "cycles", &use_cycles,
		    "Use a cycles event instead of gettimeofday() to measure performance"),
42 43 44
	OPT_END()
};

45
typedef void *(*memcpy_t)(void *, const void *, size_t);
46
typedef void *(*memset_t)(void *, int, size_t);
47

48 49 50
struct routine {
	const char *name;
	const char *desc;
51 52
	union {
		memcpy_t memcpy;
53
		memset_t memset;
54
	} fn;
55 56
};

57
struct routine memcpy_routines[] = {
58 59 60
	{ .name		= "default",
	  .desc		= "Default memcpy() provided by glibc",
	  .fn.memcpy	= memcpy },
61

62 63 64 65
#ifdef HAVE_ARCH_X86_64_SUPPORT
# define MEMCPY_FN(_fn, _name, _desc) {.name = _name, .desc = _desc, .fn.memcpy = _fn},
# include "mem-memcpy-x86-64-asm-def.h"
# undef MEMCPY_FN
66 67
#endif

68
	{ NULL, }
69 70 71 72 73 74 75
};

static const char * const bench_mem_memcpy_usage[] = {
	"perf bench mem memcpy <options>",
	NULL
};

76
static struct perf_event_attr cycle_attr = {
77 78
	.type		= PERF_TYPE_HARDWARE,
	.config		= PERF_COUNT_HW_CPU_CYCLES
79 80
};

81
static void init_cycles(void)
82
{
83
	cycles_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1, perf_event_open_cloexec_flag());
84

85
	if (cycles_fd < 0 && errno == ENOSYS)
86 87
		die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
	else
88
		BUG_ON(cycles_fd < 0);
89 90
}

91
static u64 get_cycles(void)
92 93 94 95
{
	int ret;
	u64 clk;

96
	ret = read(cycles_fd, &clk, sizeof(u64));
97 98 99 100 101 102 103
	BUG_ON(ret != sizeof(u64));

	return clk;
}

static double timeval2double(struct timeval *ts)
{
104
	return (double)ts->tv_sec + (double)ts->tv_usec / (double)1000000;
105 106
}

107 108 109 110 111 112 113 114 115
#define print_bps(x) do {						\
		if (x < K)						\
			printf(" %14lf B/Sec\n", x);			\
		else if (x < K * K)					\
			printf(" %14lfd KB/Sec\n", x / K);		\
		else if (x < K * K * K)					\
			printf(" %14lf MB/Sec\n", x / K / K);		\
		else							\
			printf(" %14lf GB/Sec\n", x / K / K / K);	\
116 117
	} while (0)

118 119
struct bench_mem_info {
	const struct routine *routines;
120
	u64 (*do_cycles)(const struct routine *r, size_t len);
121
	double (*do_gettimeofday)(const struct routine *r, size_t len);
122 123 124
	const char *const *usage;
};

125
static void __bench_mem_routine(struct bench_mem_info *info, int r_idx, size_t len, double totallen)
126
{
127
	const struct routine *r = &info->routines[r_idx];
128
	double result_bps = 0.0;
129
	u64 result_cycles = 0;
130

131
	printf("Routine %s (%s)\n", r->name, r->desc);
132

133 134
	if (bench_format == BENCH_FORMAT_DEFAULT)
		printf("# Copying %s Bytes ...\n\n", length_str);
135

136 137
	if (use_cycles) {
		result_cycles = info->do_cycles(r, len);
138
	} else {
139
		result_bps = info->do_gettimeofday(r, len);
140 141 142 143
	}

	switch (bench_format) {
	case BENCH_FORMAT_DEFAULT:
144 145
		if (use_cycles) {
			printf(" %14lf cycles/Byte\n", (double)result_cycles/totallen);
146
		} else {
147
			print_bps(result_bps);
148 149
		}
		break;
150

151
	case BENCH_FORMAT_SIMPLE:
152 153
		if (use_cycles) {
			printf("%lf\n", (double)result_cycles/totallen);
154
		} else {
155
			printf("%lf\n", result_bps);
156
		}
157
		break;
158

159
	default:
160
		BUG_ON(1);
161 162
		break;
	}
163 164
}

165
static int bench_mem_common(int argc, const char **argv, struct bench_mem_info *info)
166 167 168 169 170
{
	int i;
	size_t len;
	double totallen;

171
	argc = parse_options(argc, argv, options, info->usage, 0);
172

173 174
	if (use_cycles)
		init_cycles();
175 176 177 178 179 180 181 182 183

	len = (size_t)perf_atoll((char *)length_str);
	totallen = (double)len * iterations;

	if ((s64)len <= 0) {
		fprintf(stderr, "Invalid length:%s\n", length_str);
		return 1;
	}

184 185 186 187 188 189
	if (!strncmp(routine, "all", 3)) {
		for (i = 0; info->routines[i].name; i++)
			__bench_mem_routine(info, i, len, totallen);
		return 0;
	}

190 191 192 193 194 195 196 197 198 199 200 201 202 203 204
	for (i = 0; info->routines[i].name; i++) {
		if (!strcmp(info->routines[i].name, routine))
			break;
	}
	if (!info->routines[i].name) {
		printf("Unknown routine:%s\n", routine);
		printf("Available routines...\n");
		for (i = 0; info->routines[i].name; i++) {
			printf("\t%s ... %s\n",
			       info->routines[i].name, info->routines[i].desc);
		}
		return 1;
	}

	__bench_mem_routine(info, i, len, totallen);
205 206 207

	return 0;
}
208 209 210 211 212 213 214 215 216 217

static void memcpy_alloc_mem(void **dst, void **src, size_t length)
{
	*dst = zalloc(length);
	if (!*dst)
		die("memory allocation failed - maybe length is too large?\n");

	*src = zalloc(length);
	if (!*src)
		die("memory allocation failed - maybe length is too large?\n");
218 219

	/* Make sure to always prefault zero pages even if MMAP_THRESH is crossed: */
220 221 222
	memset(*src, 0, length);
}

223
static u64 do_memcpy_cycles(const struct routine *r, size_t len)
224 225 226 227 228 229
{
	u64 cycle_start = 0ULL, cycle_end = 0ULL;
	void *src = NULL, *dst = NULL;
	memcpy_t fn = r->fn.memcpy;
	int i;

230
	memcpy_alloc_mem(&dst, &src, len);
231

232 233 234 235 236
	/*
	 * We prefault the freshly allocated memory range here,
	 * to not measure page fault overhead:
	 */
	fn(dst, src, len);
237

238
	cycle_start = get_cycles();
239 240
	for (i = 0; i < iterations; ++i)
		fn(dst, src, len);
241
	cycle_end = get_cycles();
242 243 244 245 246 247

	free(src);
	free(dst);
	return cycle_end - cycle_start;
}

248
static double do_memcpy_gettimeofday(const struct routine *r, size_t len)
249 250 251 252 253 254
{
	struct timeval tv_start, tv_end, tv_diff;
	memcpy_t fn = r->fn.memcpy;
	void *src = NULL, *dst = NULL;
	int i;

255
	memcpy_alloc_mem(&dst, &src, len);
256

257 258 259 260 261
	/*
	 * We prefault the freshly allocated memory range here,
	 * to not measure page fault overhead:
	 */
	fn(dst, src, len);
262 263 264 265 266 267 268 269 270 271

	BUG_ON(gettimeofday(&tv_start, NULL));
	for (i = 0; i < iterations; ++i)
		fn(dst, src, len);
	BUG_ON(gettimeofday(&tv_end, NULL));

	timersub(&tv_end, &tv_start, &tv_diff);

	free(src);
	free(dst);
272

273
	return (double)(((double)len * iterations) / timeval2double(&tv_diff));
274 275
}

276
int bench_mem_memcpy(int argc, const char **argv, const char *prefix __maybe_unused)
277 278
{
	struct bench_mem_info info = {
279
		.routines		= memcpy_routines,
280
		.do_cycles		= do_memcpy_cycles,
281 282
		.do_gettimeofday	= do_memcpy_gettimeofday,
		.usage			= bench_mem_memcpy_usage,
283 284
	};

285
	return bench_mem_common(argc, argv, &info);
286
}
287 288 289 290 291 292 293 294

static void memset_alloc_mem(void **dst, size_t length)
{
	*dst = zalloc(length);
	if (!*dst)
		die("memory allocation failed - maybe length is too large?\n");
}

295
static u64 do_memset_cycles(const struct routine *r, size_t len)
296 297 298 299 300 301 302 303
{
	u64 cycle_start = 0ULL, cycle_end = 0ULL;
	memset_t fn = r->fn.memset;
	void *dst = NULL;
	int i;

	memset_alloc_mem(&dst, len);

304 305 306 307 308
	/*
	 * We prefault the freshly allocated memory range here,
	 * to not measure page fault overhead:
	 */
	fn(dst, -1, len);
309

310
	cycle_start = get_cycles();
311 312
	for (i = 0; i < iterations; ++i)
		fn(dst, i, len);
313
	cycle_end = get_cycles();
314 315 316 317 318

	free(dst);
	return cycle_end - cycle_start;
}

319
static double do_memset_gettimeofday(const struct routine *r, size_t len)
320 321 322 323 324 325 326 327
{
	struct timeval tv_start, tv_end, tv_diff;
	memset_t fn = r->fn.memset;
	void *dst = NULL;
	int i;

	memset_alloc_mem(&dst, len);

328 329 330 331 332
	/*
	 * We prefault the freshly allocated memory range here,
	 * to not measure page fault overhead:
	 */
	fn(dst, -1, len);
333 334 335 336 337 338 339 340 341

	BUG_ON(gettimeofday(&tv_start, NULL));
	for (i = 0; i < iterations; ++i)
		fn(dst, i, len);
	BUG_ON(gettimeofday(&tv_end, NULL));

	timersub(&tv_end, &tv_start, &tv_diff);

	free(dst);
342
	return (double)(((double)len * iterations) / timeval2double(&tv_diff));
343 344 345 346 347 348 349 350
}

static const char * const bench_mem_memset_usage[] = {
	"perf bench mem memset <options>",
	NULL
};

static const struct routine memset_routines[] = {
351 352 353
	{ .name		= "default",
	  .desc		= "Default memset() provided by glibc",
	  .fn.memset	= memset },
354

355 356 357 358
#ifdef HAVE_ARCH_X86_64_SUPPORT
# define MEMSET_FN(_fn, _name, _desc) { .name = _name, .desc = _desc, .fn.memset = _fn },
# include "mem-memset-x86-64-asm-def.h"
# undef MEMSET_FN
359 360
#endif

361
	{ NULL, }
362 363
};

364
int bench_mem_memset(int argc, const char **argv, const char *prefix __maybe_unused)
365 366
{
	struct bench_mem_info info = {
367
		.routines		= memset_routines,
368
		.do_cycles		= do_memset_cycles,
369 370
		.do_gettimeofday	= do_memset_gettimeofday,
		.usage			= bench_mem_memset_usage,
371 372
	};

373
	return bench_mem_common(argc, argv, &info);
374
}