builtin-trace.c 21.9 KB
Newer Older
1
#include <traceevent/event-parse.h>
A
Arnaldo Carvalho de Melo 已提交
2
#include "builtin.h"
3
#include "util/color.h"
4
#include "util/debug.h"
A
Arnaldo Carvalho de Melo 已提交
5
#include "util/evlist.h"
6 7
#include "util/machine.h"
#include "util/thread.h"
A
Arnaldo Carvalho de Melo 已提交
8
#include "util/parse-options.h"
9
#include "util/strlist.h"
A
Arnaldo Carvalho de Melo 已提交
10 11 12 13 14
#include "util/thread_map.h"

#include <libaudit.h>
#include <stdlib.h>

15 16 17 18 19
static size_t syscall_arg__scnprintf_hex(char *bf, size_t size, unsigned long arg)
{
	return scnprintf(bf, size, "%#lx", arg);
}

A
Arnaldo Carvalho de Melo 已提交
20 21
static struct syscall_fmt {
	const char *name;
22
	const char *alias;
A
Arnaldo Carvalho de Melo 已提交
23 24
	bool	   errmsg;
	bool	   timeout;
25
	bool	   hexret;
A
Arnaldo Carvalho de Melo 已提交
26
} syscall_fmts[] = {
27
	{ .name	    = "access",	    .errmsg = true, },
28
	{ .name	    = "arch_prctl", .errmsg = true, .alias = "prctl", },
29 30
	{ .name	    = "brk",	    .hexret = true, },
	{ .name	    = "mmap",	    .hexret = true, },
31
	{ .name	    = "connect",    .errmsg = true, },
32 33 34
	{ .name	    = "fstat",	    .errmsg = true, .alias = "newfstat", },
	{ .name	    = "fstatat",    .errmsg = true, .alias = "newfstatat", },
	{ .name	    = "futex",	    .errmsg = true, },
35
	{ .name	    = "lstat",	    .errmsg = true, .alias = "newlstat", },
36 37
	{ .name	    = "mmap",	    .hexret = true, },
	{ .name	    = "mremap",	    .hexret = true, },
38
	{ .name	    = "open",	    .errmsg = true, },
39 40
	{ .name	    = "poll",	    .errmsg = true, .timeout = true, },
	{ .name	    = "ppoll",	    .errmsg = true, .timeout = true, },
41 42
	{ .name	    = "pread",	    .errmsg = true, .alias = "pread64", },
	{ .name	    = "pwrite",	    .errmsg = true, .alias = "pwrite64", },
43 44 45
	{ .name	    = "read",	    .errmsg = true, },
	{ .name	    = "recvfrom",   .errmsg = true, },
	{ .name	    = "select",	    .errmsg = true, .timeout = true, },
46
	{ .name	    = "socket",	    .errmsg = true, },
47
	{ .name	    = "stat",	    .errmsg = true, .alias = "newstat", },
48
	{ .name	    = "uname",	    .errmsg = true, .alias = "newuname", },
A
Arnaldo Carvalho de Melo 已提交
49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65
};

static int syscall_fmt__cmp(const void *name, const void *fmtp)
{
	const struct syscall_fmt *fmt = fmtp;
	return strcmp(name, fmt->name);
}

static struct syscall_fmt *syscall_fmt__find(const char *name)
{
	const int nmemb = ARRAY_SIZE(syscall_fmts);
	return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
}

struct syscall {
	struct event_format *tp_format;
	const char	    *name;
66
	bool		    filtered;
A
Arnaldo Carvalho de Melo 已提交
67
	struct syscall_fmt  *fmt;
68
	size_t		    (**arg_scnprintf)(char *bf, size_t size, unsigned long arg);
A
Arnaldo Carvalho de Melo 已提交
69 70
};

71 72 73 74 75 76 77 78 79 80 81
static size_t fprintf_duration(unsigned long t, FILE *fp)
{
	double duration = (double)t / NSEC_PER_MSEC;
	size_t printed = fprintf(fp, "(");

	if (duration >= 1.0)
		printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
	else if (duration >= 0.01)
		printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
	else
		printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
82
	return printed + fprintf(fp, "): ");
83 84
}

85 86 87 88
struct thread_trace {
	u64		  entry_time;
	u64		  exit_time;
	bool		  entry_pending;
89
	unsigned long	  nr_events;
90
	char		  *entry_str;
91
	double		  runtime_ms;
92 93 94 95 96 97 98
};

static struct thread_trace *thread_trace__new(void)
{
	return zalloc(sizeof(struct thread_trace));
}

99
static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
100
{
101 102
	struct thread_trace *ttrace;

103 104 105 106 107
	if (thread == NULL)
		goto fail;

	if (thread->priv == NULL)
		thread->priv = thread_trace__new();
108
		
109 110 111
	if (thread->priv == NULL)
		goto fail;

112 113 114 115
	ttrace = thread->priv;
	++ttrace->nr_events;

	return ttrace;
116
fail:
117
	color_fprintf(fp, PERF_COLOR_RED,
118 119 120 121
		      "WARNING: not enough memory, dropping samples!\n");
	return NULL;
}

A
Arnaldo Carvalho de Melo 已提交
122
struct trace {
123
	struct perf_tool	tool;
A
Arnaldo Carvalho de Melo 已提交
124 125 126 127 128 129
	int			audit_machine;
	struct {
		int		max;
		struct syscall  *table;
	} syscalls;
	struct perf_record_opts opts;
130 131
	struct machine		host;
	u64			base_time;
132
	FILE			*output;
133
	unsigned long		nr_events;
134 135
	struct strlist		*ev_qualifier;
	bool			not_ev_qualifier;
136
	bool			sched;
137
	bool			multiple_threads;
138
	double			duration_filter;
139
	double			runtime_ms;
A
Arnaldo Carvalho de Melo 已提交
140 141
};

142 143 144 145 146
static bool trace__filter_duration(struct trace *trace, double t)
{
	return t < (trace->duration_filter * NSEC_PER_MSEC);
}

147 148 149 150
static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
{
	double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;

151
	return fprintf(fp, "%10.3f ", ts);
152 153
}

154 155 156 157 158 159 160
static bool done = false;

static void sig_handler(int sig __maybe_unused)
{
	done = true;
}

161
static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
162
					u64 duration, u64 tstamp, FILE *fp)
163 164
{
	size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
165
	printed += fprintf_duration(duration, fp);
166 167

	if (trace->multiple_threads)
168
		printed += fprintf(fp, "%d ", thread->tid);
169 170 171 172

	return printed;
}

173 174
static int trace__process_event(struct trace *trace, struct machine *machine,
				union perf_event *event)
175 176 177 178 179
{
	int ret = 0;

	switch (event->header.type) {
	case PERF_RECORD_LOST:
180
		color_fprintf(trace->output, PERF_COLOR_RED,
181 182 183 184 185 186 187 188 189 190
			      "LOST %" PRIu64 " events!\n", event->lost.lost);
		ret = machine__process_lost_event(machine, event);
	default:
		ret = machine__process_event(machine, event);
		break;
	}

	return ret;
}

191
static int trace__tool_process(struct perf_tool *tool,
192 193 194 195
			       union perf_event *event,
			       struct perf_sample *sample __maybe_unused,
			       struct machine *machine)
{
196 197
	struct trace *trace = container_of(tool, struct trace, tool);
	return trace__process_event(trace, machine, event);
198 199 200 201 202 203 204 205 206 207 208 209 210
}

static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
{
	int err = symbol__init();

	if (err)
		return err;

	machine__init(&trace->host, "", HOST_KERNEL_ID);
	machine__create_kernel_maps(&trace->host);

	if (perf_target__has_task(&trace->opts.target)) {
211
		err = perf_event__synthesize_thread_map(&trace->tool, evlist->threads,
212 213 214
							trace__tool_process,
							&trace->host);
	} else {
215
		err = perf_event__synthesize_threads(&trace->tool, trace__tool_process,
216 217 218 219 220 221 222 223 224
						     &trace->host);
	}

	if (err)
		symbol__exit();

	return err;
}

225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242
static int syscall__set_arg_fmts(struct syscall *sc)
{
	struct format_field *field;
	int idx = 0;

	sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
	if (sc->arg_scnprintf == NULL)
		return -1;

	for (field = sc->tp_format->format.fields->next; field; field = field->next) {
		if (field->flags & FIELD_IS_POINTER)
			sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
		++idx;
	}

	return 0;
}

A
Arnaldo Carvalho de Melo 已提交
243 244 245 246
static int trace__read_syscall_info(struct trace *trace, int id)
{
	char tp_name[128];
	struct syscall *sc;
247 248 249 250
	const char *name = audit_syscall_to_name(id, trace->audit_machine);

	if (name == NULL)
		return -1;
A
Arnaldo Carvalho de Melo 已提交
251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269

	if (id > trace->syscalls.max) {
		struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));

		if (nsyscalls == NULL)
			return -1;

		if (trace->syscalls.max != -1) {
			memset(nsyscalls + trace->syscalls.max + 1, 0,
			       (id - trace->syscalls.max) * sizeof(*sc));
		} else {
			memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
		}

		trace->syscalls.table = nsyscalls;
		trace->syscalls.max   = id;
	}

	sc = trace->syscalls.table + id;
270
	sc->name = name;
271

272 273 274 275 276 277 278 279 280 281 282
	if (trace->ev_qualifier) {
		bool in = strlist__find(trace->ev_qualifier, name) != NULL;

		if (!(in ^ trace->not_ev_qualifier)) {
			sc->filtered = true;
			/*
			 * No need to do read tracepoint information since this will be
			 * filtered out.
			 */
			return 0;
		}
283 284
	}

285
	sc->fmt  = syscall_fmt__find(sc->name);
A
Arnaldo Carvalho de Melo 已提交
286

287
	snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
A
Arnaldo Carvalho de Melo 已提交
288
	sc->tp_format = event_format__new("syscalls", tp_name);
289 290 291 292 293

	if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
		snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
		sc->tp_format = event_format__new("syscalls", tp_name);
	}
A
Arnaldo Carvalho de Melo 已提交
294

295 296 297 298
	if (sc->tp_format == NULL)
		return -1;

	return syscall__set_arg_fmts(sc);
A
Arnaldo Carvalho de Melo 已提交
299 300
}

301 302
static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
				      unsigned long *args)
A
Arnaldo Carvalho de Melo 已提交
303 304 305 306 307 308 309 310
{
	int i = 0;
	size_t printed = 0;

	if (sc->tp_format != NULL) {
		struct format_field *field;

		for (field = sc->tp_format->format.fields->next; field; field = field->next) {
311
			printed += scnprintf(bf + printed, size - printed,
312 313 314 315 316 317 318 319
					     "%s%s: ", printed ? ", " : "", field->name);

			if (sc->arg_scnprintf && sc->arg_scnprintf[i])
				printed += sc->arg_scnprintf[i](bf + printed, size - printed, args[i]);
			else
				printed += scnprintf(bf + printed, size - printed,
						     "%ld", args[i]);
                       ++i;
A
Arnaldo Carvalho de Melo 已提交
320 321 322
		}
	} else {
		while (i < 6) {
323 324 325
			printed += scnprintf(bf + printed, size - printed,
					     "%sarg%d: %ld",
					     printed ? ", " : "", i, args[i]);
A
Arnaldo Carvalho de Melo 已提交
326 327 328 329 330 331 332
			++i;
		}
	}

	return printed;
}

333 334 335 336 337 338 339 340 341 342
typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
				  struct perf_sample *sample);

static struct syscall *trace__syscall_info(struct trace *trace,
					   struct perf_evsel *evsel,
					   struct perf_sample *sample)
{
	int id = perf_evsel__intval(evsel, sample, "id");

	if (id < 0) {
343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358

		/*
		 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
		 * before that, leaving at a higher verbosity level till that is
		 * explained. Reproduced with plain ftrace with:
		 *
		 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
		 * grep "NR -1 " /t/trace_pipe
		 *
		 * After generating some load on the machine.
 		 */
		if (verbose > 1) {
			static u64 n;
			fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
				id, perf_evsel__name(evsel), ++n);
		}
359 360 361 362 363 364 365 366 367 368 369 370 371
		return NULL;
	}

	if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
	    trace__read_syscall_info(trace, id))
		goto out_cant_read;

	if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
		goto out_cant_read;

	return &trace->syscalls.table[id];

out_cant_read:
372 373 374 375 376 377
	if (verbose) {
		fprintf(trace->output, "Problems reading syscall %d", id);
		if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
			fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
		fputs(" information\n", trace->output);
	}
378 379 380 381 382 383
	return NULL;
}

static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
			    struct perf_sample *sample)
{
384
	char *msg;
385
	void *args;
386
	size_t printed = 0;
387
	struct thread *thread;
388
	struct syscall *sc = trace__syscall_info(trace, evsel, sample);
389 390 391 392
	struct thread_trace *ttrace;

	if (sc == NULL)
		return -1;
393

394 395 396 397
	if (sc->filtered)
		return 0;

	thread = machine__findnew_thread(&trace->host, sample->tid);
398
	ttrace = thread__trace(thread, trace->output);
399
	if (ttrace == NULL)
400 401 402 403
		return -1;

	args = perf_evsel__rawptr(evsel, sample, "args");
	if (args == NULL) {
404
		fprintf(trace->output, "Problems reading syscall arguments\n");
405 406 407
		return -1;
	}

408 409 410 411 412 413 414 415 416 417 418 419 420 421 422
	ttrace = thread->priv;

	if (ttrace->entry_str == NULL) {
		ttrace->entry_str = malloc(1024);
		if (!ttrace->entry_str)
			return -1;
	}

	ttrace->entry_time = sample->time;
	msg = ttrace->entry_str;
	printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);

	printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,  args);

	if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) {
423
		if (!trace->duration_filter) {
424 425
			trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
			fprintf(trace->output, "%-70s\n", ttrace->entry_str);
426
		}
427 428
	} else
		ttrace->entry_pending = true;
429 430 431 432 433 434 435 436

	return 0;
}

static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
			   struct perf_sample *sample)
{
	int ret;
437
	u64 duration = 0;
438
	struct thread *thread;
439
	struct syscall *sc = trace__syscall_info(trace, evsel, sample);
440 441 442 443
	struct thread_trace *ttrace;

	if (sc == NULL)
		return -1;
444

445 446 447 448
	if (sc->filtered)
		return 0;

	thread = machine__findnew_thread(&trace->host, sample->tid);
449
	ttrace = thread__trace(thread, trace->output);
450
	if (ttrace == NULL)
451 452 453 454
		return -1;

	ret = perf_evsel__intval(evsel, sample, "ret");

455 456 457 458
	ttrace = thread->priv;

	ttrace->exit_time = sample->time;

459
	if (ttrace->entry_time) {
460
		duration = sample->time - ttrace->entry_time;
461 462 463 464
		if (trace__filter_duration(trace, duration))
			goto out;
	} else if (trace->duration_filter)
		goto out;
465

466
	trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
467 468

	if (ttrace->entry_pending) {
469
		fprintf(trace->output, "%-70s", ttrace->entry_str);
470
	} else {
471 472 473
		fprintf(trace->output, " ... [");
		color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
		fprintf(trace->output, "]: %s()", sc->name);
474 475
	}

476 477 478 479
	if (sc->fmt == NULL) {
signed_print:
		fprintf(trace->output, ") = %d", ret);
	} else if (ret < 0 && sc->fmt->errmsg) {
480 481 482 483
		char bf[256];
		const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
			   *e = audit_errno_to_name(-ret);

484
		fprintf(trace->output, ") = -1 %s %s", e, emsg);
485
	} else if (ret == 0 && sc->fmt->timeout)
486
		fprintf(trace->output, ") = 0 Timeout");
487 488
	else if (sc->fmt->hexret)
		fprintf(trace->output, ") = %#x", ret);
489
	else
490
		goto signed_print;
491

492
	fputc('\n', trace->output);
493
out:
494 495
	ttrace->entry_pending = false;

496 497 498
	return 0;
}

499 500 501 502 503 504
static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
				     struct perf_sample *sample)
{
        u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
	double runtime_ms = (double)runtime / NSEC_PER_MSEC;
	struct thread *thread = machine__findnew_thread(&trace->host, sample->tid);
505
	struct thread_trace *ttrace = thread__trace(thread, trace->output);
506 507 508 509 510 511 512 513 514

	if (ttrace == NULL)
		goto out_dump;

	ttrace->runtime_ms += runtime_ms;
	trace->runtime_ms += runtime_ms;
	return 0;

out_dump:
515
	fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
516 517 518 519 520 521 522 523
	       evsel->name,
	       perf_evsel__strval(evsel, sample, "comm"),
	       (pid_t)perf_evsel__intval(evsel, sample, "pid"),
	       runtime,
	       perf_evsel__intval(evsel, sample, "vruntime"));
	return 0;
}

524
static int trace__run(struct trace *trace, int argc, const char **argv)
A
Arnaldo Carvalho de Melo 已提交
525
{
526
	struct perf_evlist *evlist = perf_evlist__new();
527
	struct perf_evsel *evsel;
528 529
	int err = -1, i;
	unsigned long before;
530
	const bool forks = argc > 0;
A
Arnaldo Carvalho de Melo 已提交
531 532

	if (evlist == NULL) {
533
		fprintf(trace->output, "Not enough memory to run!\n");
A
Arnaldo Carvalho de Melo 已提交
534 535 536
		goto out;
	}

537 538
	if (perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_enter", trace__sys_enter) ||
	    perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_exit", trace__sys_exit)) {
539
		fprintf(trace->output, "Couldn't read the raw_syscalls tracepoints information!\n");
A
Arnaldo Carvalho de Melo 已提交
540 541 542
		goto out_delete_evlist;
	}

543 544 545
	if (trace->sched &&
	    perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
				   trace__sched_stat_runtime)) {
546
		fprintf(trace->output, "Couldn't read the sched_stat_runtime tracepoint information!\n");
547 548 549
		goto out_delete_evlist;
	}

A
Arnaldo Carvalho de Melo 已提交
550 551
	err = perf_evlist__create_maps(evlist, &trace->opts.target);
	if (err < 0) {
552
		fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
A
Arnaldo Carvalho de Melo 已提交
553 554 555
		goto out_delete_evlist;
	}

556 557
	err = trace__symbols_init(trace, evlist);
	if (err < 0) {
558
		fprintf(trace->output, "Problems initializing symbol libraries!\n");
559
		goto out_delete_maps;
560 561
	}

562
	perf_evlist__config(evlist, &trace->opts);
A
Arnaldo Carvalho de Melo 已提交
563

564 565 566 567
	signal(SIGCHLD, sig_handler);
	signal(SIGINT, sig_handler);

	if (forks) {
568
		err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
569
						    argv, false, false);
570
		if (err < 0) {
571
			fprintf(trace->output, "Couldn't run the workload!\n");
572
			goto out_delete_maps;
573 574 575
		}
	}

A
Arnaldo Carvalho de Melo 已提交
576 577
	err = perf_evlist__open(evlist);
	if (err < 0) {
578
		fprintf(trace->output, "Couldn't create the events: %s\n", strerror(errno));
579
		goto out_delete_maps;
A
Arnaldo Carvalho de Melo 已提交
580 581 582 583
	}

	err = perf_evlist__mmap(evlist, UINT_MAX, false);
	if (err < 0) {
584
		fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno));
585
		goto out_close_evlist;
A
Arnaldo Carvalho de Melo 已提交
586 587 588
	}

	perf_evlist__enable(evlist);
589 590 591 592

	if (forks)
		perf_evlist__start_workload(evlist);

593
	trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
A
Arnaldo Carvalho de Melo 已提交
594
again:
595
	before = trace->nr_events;
A
Arnaldo Carvalho de Melo 已提交
596 597 598 599 600 601

	for (i = 0; i < evlist->nr_mmaps; i++) {
		union perf_event *event;

		while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
			const u32 type = event->header.type;
602
			tracepoint_handler handler;
A
Arnaldo Carvalho de Melo 已提交
603 604
			struct perf_sample sample;

605
			++trace->nr_events;
A
Arnaldo Carvalho de Melo 已提交
606 607 608

			err = perf_evlist__parse_sample(evlist, event, &sample);
			if (err) {
609
				fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
A
Arnaldo Carvalho de Melo 已提交
610 611 612
				continue;
			}

613 614 615 616
			if (trace->base_time == 0)
				trace->base_time = sample.time;

			if (type != PERF_RECORD_SAMPLE) {
617
				trace__process_event(trace, &trace->host, event);
618 619 620
				continue;
			}

A
Arnaldo Carvalho de Melo 已提交
621 622
			evsel = perf_evlist__id2evsel(evlist, sample.id);
			if (evsel == NULL) {
623
				fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
A
Arnaldo Carvalho de Melo 已提交
624 625 626
				continue;
			}

627
			if (sample.raw_data == NULL) {
628
				fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
629 630 631 632 633
				       perf_evsel__name(evsel), sample.tid,
				       sample.cpu, sample.raw_size);
				continue;
			}

634 635
			handler = evsel->handler.func;
			handler(trace, evsel, &sample);
A
Arnaldo Carvalho de Melo 已提交
636 637 638
		}
	}

639
	if (trace->nr_events == before) {
640
		if (done)
641
			goto out_unmap_evlist;
642

A
Arnaldo Carvalho de Melo 已提交
643
		poll(evlist->pollfd, evlist->nr_fds, -1);
644 645 646 647
	}

	if (done)
		perf_evlist__disable(evlist);
A
Arnaldo Carvalho de Melo 已提交
648 649 650

	goto again;

651 652 653 654 655 656
out_unmap_evlist:
	perf_evlist__munmap(evlist);
out_close_evlist:
	perf_evlist__close(evlist);
out_delete_maps:
	perf_evlist__delete_maps(evlist);
A
Arnaldo Carvalho de Melo 已提交
657 658 659 660 661 662
out_delete_evlist:
	perf_evlist__delete(evlist);
out:
	return err;
}

663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699
static size_t trace__fprintf_threads_header(FILE *fp)
{
	size_t printed;

	printed  = fprintf(fp, "\n _____________________________________________________________________\n");
	printed += fprintf(fp," __)    Summary of events    (__\n\n");
	printed += fprintf(fp,"              [ task - pid ]     [ events ] [ ratio ]  [ runtime ]\n");
	printed += fprintf(fp," _____________________________________________________________________\n\n");

	return printed;
}

static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
{
	size_t printed = trace__fprintf_threads_header(fp);
	struct rb_node *nd;

	for (nd = rb_first(&trace->host.threads); nd; nd = rb_next(nd)) {
		struct thread *thread = rb_entry(nd, struct thread, rb_node);
		struct thread_trace *ttrace = thread->priv;
		const char *color;
		double ratio;

		if (ttrace == NULL)
			continue;

		ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;

		color = PERF_COLOR_NORMAL;
		if (ratio > 50.0)
			color = PERF_COLOR_RED;
		else if (ratio > 25.0)
			color = PERF_COLOR_GREEN;
		else if (ratio > 5.0)
			color = PERF_COLOR_YELLOW;

		printed += color_fprintf(fp, color, "%20s", thread->comm);
700
		printed += fprintf(fp, " - %-5d :%11lu   [", thread->tid, ttrace->nr_events);
701 702 703 704 705 706 707
		printed += color_fprintf(fp, color, "%5.1f%%", ratio);
		printed += fprintf(fp, " ] %10.3f ms\n", ttrace->runtime_ms);
	}

	return printed;
}

708 709 710 711 712 713 714 715 716
static int trace__set_duration(const struct option *opt, const char *str,
			       int unset __maybe_unused)
{
	struct trace *trace = opt->value;

	trace->duration_filter = atof(str);
	return 0;
}

717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733
static int trace__open_output(struct trace *trace, const char *filename)
{
	struct stat st;

	if (!stat(filename, &st) && st.st_size) {
		char oldname[PATH_MAX];

		scnprintf(oldname, sizeof(oldname), "%s.old", filename);
		unlink(oldname);
		rename(filename, oldname);
	}

	trace->output = fopen(filename, "w");

	return trace->output == NULL ? -errno : 0;
}

A
Arnaldo Carvalho de Melo 已提交
734 735 736
int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
{
	const char * const trace_usage[] = {
737 738
		"perf trace [<options>] [<command>]",
		"perf trace [<options>] -- <command> [<options>]",
A
Arnaldo Carvalho de Melo 已提交
739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755
		NULL
	};
	struct trace trace = {
		.audit_machine = audit_detect_machine(),
		.syscalls = {
			. max = -1,
		},
		.opts = {
			.target = {
				.uid	   = UINT_MAX,
				.uses_mmap = true,
			},
			.user_freq     = UINT_MAX,
			.user_interval = ULLONG_MAX,
			.no_delay      = true,
			.mmap_pages    = 1024,
		},
756
		.output = stdout,
A
Arnaldo Carvalho de Melo 已提交
757
	};
758
	const char *output_name = NULL;
759
	const char *ev_qualifier_str = NULL;
A
Arnaldo Carvalho de Melo 已提交
760
	const struct option trace_options[] = {
761 762
	OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
		    "list of events to trace"),
763
	OPT_STRING('o', "output", &output_name, "file", "output file name"),
A
Arnaldo Carvalho de Melo 已提交
764 765
	OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
		    "trace events on existing process id"),
766
	OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
A
Arnaldo Carvalho de Melo 已提交
767
		    "trace events on existing thread id"),
768
	OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
A
Arnaldo Carvalho de Melo 已提交
769
		    "system-wide collection from all CPUs"),
770
	OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
A
Arnaldo Carvalho de Melo 已提交
771
		    "list of cpus to monitor"),
772
	OPT_BOOLEAN('i', "no-inherit", &trace.opts.no_inherit,
A
Arnaldo Carvalho de Melo 已提交
773
		    "child tasks do not inherit counters"),
774
	OPT_UINTEGER('m', "mmap-pages", &trace.opts.mmap_pages,
A
Arnaldo Carvalho de Melo 已提交
775
		     "number of mmap data pages"),
776
	OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
A
Arnaldo Carvalho de Melo 已提交
777
		   "user to profile"),
778 779 780
	OPT_CALLBACK(0, "duration", &trace, "float",
		     "show only events with duration > N.M ms",
		     trace__set_duration),
781
	OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
782
	OPT_INCR('v', "verbose", &verbose, "be more verbose"),
A
Arnaldo Carvalho de Melo 已提交
783 784 785
	OPT_END()
	};
	int err;
786
	char bf[BUFSIZ];
A
Arnaldo Carvalho de Melo 已提交
787 788 789

	argc = parse_options(argc, argv, trace_options, trace_usage, 0);

790 791 792 793 794 795 796 797
	if (output_name != NULL) {
		err = trace__open_output(&trace, output_name);
		if (err < 0) {
			perror("failed to create output file");
			goto out;
		}
	}

798
	if (ev_qualifier_str != NULL) {
799 800 801 802 803 804
		const char *s = ev_qualifier_str;

		trace.not_ev_qualifier = *s == '!';
		if (trace.not_ev_qualifier)
			++s;
		trace.ev_qualifier = strlist__new(true, s);
805
		if (trace.ev_qualifier == NULL) {
806 807 808 809
			fputs("Not enough memory to parse event qualifier",
			      trace.output);
			err = -ENOMEM;
			goto out_close;
810 811 812
		}
	}

813 814 815
	err = perf_target__validate(&trace.opts.target);
	if (err) {
		perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
816 817
		fprintf(trace.output, "%s", bf);
		goto out_close;
818 819
	}

A
Arnaldo Carvalho de Melo 已提交
820 821 822
	err = perf_target__parse_uid(&trace.opts.target);
	if (err) {
		perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
823 824
		fprintf(trace.output, "%s", bf);
		goto out_close;
A
Arnaldo Carvalho de Melo 已提交
825 826
	}

827
	if (!argc && perf_target__none(&trace.opts.target))
828 829
		trace.opts.target.system_wide = true;

830 831 832
	err = trace__run(&trace, argc, argv);

	if (trace.sched && !err)
833
		trace__fprintf_thread_summary(&trace, trace.output);
834

835 836 837 838
out_close:
	if (output_name != NULL)
		fclose(trace.output);
out:
839
	return err;
A
Arnaldo Carvalho de Melo 已提交
840
}