builtin-trace.c 15.0 KB
Newer Older
A
Arnaldo Carvalho de Melo 已提交
1
#include "builtin.h"
2
#include "util/color.h"
A
Arnaldo Carvalho de Melo 已提交
3
#include "util/evlist.h"
4 5
#include "util/machine.h"
#include "util/thread.h"
A
Arnaldo Carvalho de Melo 已提交
6 7 8 9 10 11 12 13 14
#include "util/parse-options.h"
#include "util/thread_map.h"
#include "event-parse.h"

#include <libaudit.h>
#include <stdlib.h>

static struct syscall_fmt {
	const char *name;
15
	const char *alias;
A
Arnaldo Carvalho de Melo 已提交
16 17 18
	bool	   errmsg;
	bool	   timeout;
} syscall_fmts[] = {
19
	{ .name	    = "access",	    .errmsg = true, },
20 21 22 23
	{ .name	    = "arch_prctl", .errmsg = true, .alias = "prctl", },
	{ .name	    = "fstat",	    .errmsg = true, .alias = "newfstat", },
	{ .name	    = "fstatat",    .errmsg = true, .alias = "newfstatat", },
	{ .name	    = "futex",	    .errmsg = true, },
24
	{ .name	    = "open",	    .errmsg = true, },
25 26 27 28 29
	{ .name	    = "poll",	    .errmsg = true, .timeout = true, },
	{ .name	    = "ppoll",	    .errmsg = true, .timeout = true, },
	{ .name	    = "read",	    .errmsg = true, },
	{ .name	    = "recvfrom",   .errmsg = true, },
	{ .name	    = "select",	    .errmsg = true, .timeout = true, },
30
	{ .name	    = "socket",	    .errmsg = true, },
31
	{ .name	    = "stat",	    .errmsg = true, .alias = "newstat", },
A
Arnaldo Carvalho de Melo 已提交
32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51
};

static int syscall_fmt__cmp(const void *name, const void *fmtp)
{
	const struct syscall_fmt *fmt = fmtp;
	return strcmp(name, fmt->name);
}

static struct syscall_fmt *syscall_fmt__find(const char *name)
{
	const int nmemb = ARRAY_SIZE(syscall_fmts);
	return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
}

struct syscall {
	struct event_format *tp_format;
	const char	    *name;
	struct syscall_fmt  *fmt;
};

52 53 54 55 56 57 58 59 60 61 62 63 64 65
static size_t fprintf_duration(unsigned long t, FILE *fp)
{
	double duration = (double)t / NSEC_PER_MSEC;
	size_t printed = fprintf(fp, "(");

	if (duration >= 1.0)
		printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
	else if (duration >= 0.01)
		printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
	else
		printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
	return printed + fprintf(stdout, "): ");
}

66 67 68 69
struct thread_trace {
	u64		  entry_time;
	u64		  exit_time;
	bool		  entry_pending;
70
	unsigned long	  nr_events;
71 72 73 74 75 76 77 78 79 80
	char		  *entry_str;
};

static struct thread_trace *thread_trace__new(void)
{
	return zalloc(sizeof(struct thread_trace));
}

static struct thread_trace *thread__trace(struct thread *thread)
{
81 82
	struct thread_trace *ttrace;

83 84 85 86 87
	if (thread == NULL)
		goto fail;

	if (thread->priv == NULL)
		thread->priv = thread_trace__new();
88
		
89 90 91
	if (thread->priv == NULL)
		goto fail;

92 93 94 95
	ttrace = thread->priv;
	++ttrace->nr_events;

	return ttrace;
96 97 98 99 100 101
fail:
	color_fprintf(stdout, PERF_COLOR_RED,
		      "WARNING: not enough memory, dropping samples!\n");
	return NULL;
}

A
Arnaldo Carvalho de Melo 已提交
102 103 104 105 106 107 108
struct trace {
	int			audit_machine;
	struct {
		int		max;
		struct syscall  *table;
	} syscalls;
	struct perf_record_opts opts;
109 110
	struct machine		host;
	u64			base_time;
111
	unsigned long		nr_events;
112
	bool			multiple_threads;
113
	double			duration_filter;
A
Arnaldo Carvalho de Melo 已提交
114 115
};

116 117 118 119 120
static bool trace__filter_duration(struct trace *trace, double t)
{
	return t < (trace->duration_filter * NSEC_PER_MSEC);
}

121 122 123 124
static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
{
	double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;

125
	return fprintf(fp, "%10.3f ", ts);
126 127
}

128 129 130 131 132 133 134
static bool done = false;

static void sig_handler(int sig __maybe_unused)
{
	done = true;
}

135
static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
136
					u64 duration, u64 tstamp, FILE *fp)
137 138
{
	size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
139
	printed += fprintf_duration(duration, fp);
140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196

	if (trace->multiple_threads)
		printed += fprintf(fp, "%d ", thread->pid);

	return printed;
}

static int trace__process_event(struct machine *machine, union perf_event *event)
{
	int ret = 0;

	switch (event->header.type) {
	case PERF_RECORD_LOST:
		color_fprintf(stdout, PERF_COLOR_RED,
			      "LOST %" PRIu64 " events!\n", event->lost.lost);
		ret = machine__process_lost_event(machine, event);
	default:
		ret = machine__process_event(machine, event);
		break;
	}

	return ret;
}

static int trace__tool_process(struct perf_tool *tool __maybe_unused,
			       union perf_event *event,
			       struct perf_sample *sample __maybe_unused,
			       struct machine *machine)
{
	return trace__process_event(machine, event);
}

static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
{
	int err = symbol__init();

	if (err)
		return err;

	machine__init(&trace->host, "", HOST_KERNEL_ID);
	machine__create_kernel_maps(&trace->host);

	if (perf_target__has_task(&trace->opts.target)) {
		err = perf_event__synthesize_thread_map(NULL, evlist->threads,
							trace__tool_process,
							&trace->host);
	} else {
		err = perf_event__synthesize_threads(NULL, trace__tool_process,
						     &trace->host);
	}

	if (err)
		symbol__exit();

	return err;
}

A
Arnaldo Carvalho de Melo 已提交
197 198 199 200
static int trace__read_syscall_info(struct trace *trace, int id)
{
	char tp_name[128];
	struct syscall *sc;
201 202 203 204
	const char *name = audit_syscall_to_name(id, trace->audit_machine);

	if (name == NULL)
		return -1;
A
Arnaldo Carvalho de Melo 已提交
205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223

	if (id > trace->syscalls.max) {
		struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));

		if (nsyscalls == NULL)
			return -1;

		if (trace->syscalls.max != -1) {
			memset(nsyscalls + trace->syscalls.max + 1, 0,
			       (id - trace->syscalls.max) * sizeof(*sc));
		} else {
			memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
		}

		trace->syscalls.table = nsyscalls;
		trace->syscalls.max   = id;
	}

	sc = trace->syscalls.table + id;
224 225
	sc->name = name;
	sc->fmt  = syscall_fmt__find(sc->name);
A
Arnaldo Carvalho de Melo 已提交
226

227
	snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
A
Arnaldo Carvalho de Melo 已提交
228
	sc->tp_format = event_format__new("syscalls", tp_name);
229 230 231 232 233

	if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
		snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
		sc->tp_format = event_format__new("syscalls", tp_name);
	}
A
Arnaldo Carvalho de Melo 已提交
234 235 236 237

	return sc->tp_format != NULL ? 0 : -1;
}

238 239
static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
				      unsigned long *args)
A
Arnaldo Carvalho de Melo 已提交
240 241 242 243 244 245 246 247
{
	int i = 0;
	size_t printed = 0;

	if (sc->tp_format != NULL) {
		struct format_field *field;

		for (field = sc->tp_format->format.fields->next; field; field = field->next) {
248 249 250
			printed += scnprintf(bf + printed, size - printed,
					     "%s%s: %ld", printed ? ", " : "",
					     field->name, args[i++]);
A
Arnaldo Carvalho de Melo 已提交
251 252 253
		}
	} else {
		while (i < 6) {
254 255 256
			printed += scnprintf(bf + printed, size - printed,
					     "%sarg%d: %ld",
					     printed ? ", " : "", i, args[i]);
A
Arnaldo Carvalho de Melo 已提交
257 258 259 260 261 262 263
			++i;
		}
	}

	return printed;
}

264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287
typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
				  struct perf_sample *sample);

static struct syscall *trace__syscall_info(struct trace *trace,
					   struct perf_evsel *evsel,
					   struct perf_sample *sample)
{
	int id = perf_evsel__intval(evsel, sample, "id");

	if (id < 0) {
		printf("Invalid syscall %d id, skipping...\n", id);
		return NULL;
	}

	if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
	    trace__read_syscall_info(trace, id))
		goto out_cant_read;

	if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
		goto out_cant_read;

	return &trace->syscalls.table[id];

out_cant_read:
288 289 290 291
	printf("Problems reading syscall %d", id);
	if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
		printf("(%s)", trace->syscalls.table[id].name);
	puts(" information");
292 293 294 295 296 297
	return NULL;
}

static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
			    struct perf_sample *sample)
{
298
	char *msg;
299
	void *args;
300 301
	size_t printed = 0;
	struct thread *thread = machine__findnew_thread(&trace->host, sample->tid);
302
	struct syscall *sc = trace__syscall_info(trace, evsel, sample);
303
	struct thread_trace *ttrace = thread__trace(thread);
304

305
	if (ttrace == NULL || sc == NULL)
306 307 308 309 310 311 312 313
		return -1;

	args = perf_evsel__rawptr(evsel, sample, "args");
	if (args == NULL) {
		printf("Problems reading syscall arguments\n");
		return -1;
	}

314 315 316 317 318 319 320 321 322 323 324 325 326 327 328
	ttrace = thread->priv;

	if (ttrace->entry_str == NULL) {
		ttrace->entry_str = malloc(1024);
		if (!ttrace->entry_str)
			return -1;
	}

	ttrace->entry_time = sample->time;
	msg = ttrace->entry_str;
	printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);

	printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,  args);

	if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) {
329 330 331 332
		if (!trace->duration_filter) {
			trace__fprintf_entry_head(trace, thread, 1, sample->time, stdout);
			printf("%-70s\n", ttrace->entry_str);
		}
333 334
	} else
		ttrace->entry_pending = true;
335 336 337 338 339 340 341 342

	return 0;
}

static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
			   struct perf_sample *sample)
{
	int ret;
343
	u64 duration = 0;
344 345
	struct thread *thread = machine__findnew_thread(&trace->host, sample->tid);
	struct thread_trace *ttrace = thread__trace(thread);
346 347
	struct syscall *sc = trace__syscall_info(trace, evsel, sample);

348
	if (ttrace == NULL || sc == NULL)
349 350 351 352
		return -1;

	ret = perf_evsel__intval(evsel, sample, "ret");

353 354 355 356
	ttrace = thread->priv;

	ttrace->exit_time = sample->time;

357
	if (ttrace->entry_time) {
358
		duration = sample->time - ttrace->entry_time;
359 360 361 362
		if (trace__filter_duration(trace, duration))
			goto out;
	} else if (trace->duration_filter)
		goto out;
363 364

	trace__fprintf_entry_head(trace, thread, duration, sample->time, stdout);
365 366 367 368 369 370 371 372 373

	if (ttrace->entry_pending) {
		printf("%-70s", ttrace->entry_str);
	} else {
		printf(" ... [");
		color_fprintf(stdout, PERF_COLOR_YELLOW, "continued");
		printf("]: %s()", sc->name);
	}

374 375 376 377 378 379 380 381 382 383 384 385
	if (ret < 0 && sc->fmt && sc->fmt->errmsg) {
		char bf[256];
		const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
			   *e = audit_errno_to_name(-ret);

		printf(") = -1 %s %s", e, emsg);
	} else if (ret == 0 && sc->fmt && sc->fmt->timeout)
		printf(") = 0 Timeout");
	else
		printf(") = %d", ret);

	putchar('\n');
386
out:
387 388
	ttrace->entry_pending = false;

389 390 391
	return 0;
}

392
static int trace__run(struct trace *trace, int argc, const char **argv)
A
Arnaldo Carvalho de Melo 已提交
393 394
{
	struct perf_evlist *evlist = perf_evlist__new(NULL, NULL);
395
	struct perf_evsel *evsel;
396 397
	int err = -1, i;
	unsigned long before;
398
	const bool forks = argc > 0;
A
Arnaldo Carvalho de Melo 已提交
399 400 401 402 403 404

	if (evlist == NULL) {
		printf("Not enough memory to run!\n");
		goto out;
	}

405 406 407
	if (perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_enter", trace__sys_enter) ||
	    perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_exit", trace__sys_exit)) {
		printf("Couldn't read the raw_syscalls tracepoints information!\n");
A
Arnaldo Carvalho de Melo 已提交
408 409 410 411 412 413 414 415 416
		goto out_delete_evlist;
	}

	err = perf_evlist__create_maps(evlist, &trace->opts.target);
	if (err < 0) {
		printf("Problems parsing the target to trace, check your options!\n");
		goto out_delete_evlist;
	}

417 418 419 420 421 422
	err = trace__symbols_init(trace, evlist);
	if (err < 0) {
		printf("Problems initializing symbol libraries!\n");
		goto out_delete_evlist;
	}

A
Arnaldo Carvalho de Melo 已提交
423 424
	perf_evlist__config_attrs(evlist, &trace->opts);

425 426 427 428 429 430 431 432 433 434 435
	signal(SIGCHLD, sig_handler);
	signal(SIGINT, sig_handler);

	if (forks) {
		err = perf_evlist__prepare_workload(evlist, &trace->opts, argv);
		if (err < 0) {
			printf("Couldn't run the workload!\n");
			goto out_delete_evlist;
		}
	}

A
Arnaldo Carvalho de Melo 已提交
436 437 438 439 440 441 442 443 444 445 446 447 448
	err = perf_evlist__open(evlist);
	if (err < 0) {
		printf("Couldn't create the events: %s\n", strerror(errno));
		goto out_delete_evlist;
	}

	err = perf_evlist__mmap(evlist, UINT_MAX, false);
	if (err < 0) {
		printf("Couldn't mmap the events: %s\n", strerror(errno));
		goto out_delete_evlist;
	}

	perf_evlist__enable(evlist);
449 450 451 452

	if (forks)
		perf_evlist__start_workload(evlist);

453
	trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
A
Arnaldo Carvalho de Melo 已提交
454
again:
455
	before = trace->nr_events;
A
Arnaldo Carvalho de Melo 已提交
456 457 458 459 460 461

	for (i = 0; i < evlist->nr_mmaps; i++) {
		union perf_event *event;

		while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
			const u32 type = event->header.type;
462
			tracepoint_handler handler;
A
Arnaldo Carvalho de Melo 已提交
463 464
			struct perf_sample sample;

465
			++trace->nr_events;
A
Arnaldo Carvalho de Melo 已提交
466 467 468 469 470 471 472

			err = perf_evlist__parse_sample(evlist, event, &sample);
			if (err) {
				printf("Can't parse sample, err = %d, skipping...\n", err);
				continue;
			}

473 474 475 476 477 478 479 480
			if (trace->base_time == 0)
				trace->base_time = sample.time;

			if (type != PERF_RECORD_SAMPLE) {
				trace__process_event(&trace->host, event);
				continue;
			}

A
Arnaldo Carvalho de Melo 已提交
481 482 483 484 485 486
			evsel = perf_evlist__id2evsel(evlist, sample.id);
			if (evsel == NULL) {
				printf("Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
				continue;
			}

487 488 489 490 491 492
			if (sample.raw_data == NULL) {
				printf("%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
				       perf_evsel__name(evsel), sample.tid,
				       sample.cpu, sample.raw_size);
				continue;
			}
A
Arnaldo Carvalho de Melo 已提交
493

494 495 496 497 498 499 500
			if (sample.raw_data == NULL) {
				printf("%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
				       perf_evsel__name(evsel), sample.tid,
				       sample.cpu, sample.raw_size);
				continue;
			}

501 502
			handler = evsel->handler.func;
			handler(trace, evsel, &sample);
A
Arnaldo Carvalho de Melo 已提交
503 504 505
		}
	}

506
	if (trace->nr_events == before) {
507 508 509
		if (done)
			goto out_delete_evlist;

A
Arnaldo Carvalho de Melo 已提交
510
		poll(evlist->pollfd, evlist->nr_fds, -1);
511 512 513 514
	}

	if (done)
		perf_evlist__disable(evlist);
A
Arnaldo Carvalho de Melo 已提交
515 516 517 518 519 520 521 522 523

	goto again;

out_delete_evlist:
	perf_evlist__delete(evlist);
out:
	return err;
}

524 525 526 527 528 529 530 531 532
static int trace__set_duration(const struct option *opt, const char *str,
			       int unset __maybe_unused)
{
	struct trace *trace = opt->value;

	trace->duration_filter = atof(str);
	return 0;
}

A
Arnaldo Carvalho de Melo 已提交
533 534 535
int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
{
	const char * const trace_usage[] = {
536 537
		"perf trace [<options>] [<command>]",
		"perf trace [<options>] -- <command> [<options>]",
A
Arnaldo Carvalho de Melo 已提交
538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570
		NULL
	};
	struct trace trace = {
		.audit_machine = audit_detect_machine(),
		.syscalls = {
			. max = -1,
		},
		.opts = {
			.target = {
				.uid	   = UINT_MAX,
				.uses_mmap = true,
			},
			.user_freq     = UINT_MAX,
			.user_interval = ULLONG_MAX,
			.no_delay      = true,
			.mmap_pages    = 1024,
		},
	};
	const struct option trace_options[] = {
	OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
		    "trace events on existing process id"),
	OPT_STRING(0, "tid", &trace.opts.target.tid, "tid",
		    "trace events on existing thread id"),
	OPT_BOOLEAN(0, "all-cpus", &trace.opts.target.system_wide,
		    "system-wide collection from all CPUs"),
	OPT_STRING(0, "cpu", &trace.opts.target.cpu_list, "cpu",
		    "list of cpus to monitor"),
	OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
		    "child tasks do not inherit counters"),
	OPT_UINTEGER(0, "mmap-pages", &trace.opts.mmap_pages,
		     "number of mmap data pages"),
	OPT_STRING(0, "uid", &trace.opts.target.uid_str, "user",
		   "user to profile"),
571 572 573
	OPT_CALLBACK(0, "duration", &trace, "float",
		     "show only events with duration > N.M ms",
		     trace__set_duration),
A
Arnaldo Carvalho de Melo 已提交
574 575 576
	OPT_END()
	};
	int err;
577
	char bf[BUFSIZ];
A
Arnaldo Carvalho de Melo 已提交
578 579 580

	argc = parse_options(argc, argv, trace_options, trace_usage, 0);

581 582 583 584 585 586 587
	err = perf_target__validate(&trace.opts.target);
	if (err) {
		perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
		printf("%s", bf);
		return err;
	}

A
Arnaldo Carvalho de Melo 已提交
588 589 590 591 592 593 594
	err = perf_target__parse_uid(&trace.opts.target);
	if (err) {
		perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
		printf("%s", bf);
		return err;
	}

595
	if (!argc && perf_target__none(&trace.opts.target))
596 597
		trace.opts.target.system_wide = true;

598
	return trace__run(&trace, argc, argv);
A
Arnaldo Carvalho de Melo 已提交
599
}