builtin-trace.c 14.0 KB
Newer Older
A
Arnaldo Carvalho de Melo 已提交
1
#include "builtin.h"
2
#include "util/color.h"
A
Arnaldo Carvalho de Melo 已提交
3
#include "util/evlist.h"
4 5
#include "util/machine.h"
#include "util/thread.h"
A
Arnaldo Carvalho de Melo 已提交
6 7 8 9 10 11 12 13 14
#include "util/parse-options.h"
#include "util/thread_map.h"
#include "event-parse.h"

#include <libaudit.h>
#include <stdlib.h>

static struct syscall_fmt {
	const char *name;
15
	const char *alias;
A
Arnaldo Carvalho de Melo 已提交
16 17 18
	bool	   errmsg;
	bool	   timeout;
} syscall_fmts[] = {
19 20 21 22 23 24 25 26 27 28
	{ .name	    = "arch_prctl", .errmsg = true, .alias = "prctl", },
	{ .name	    = "fstat",	    .errmsg = true, .alias = "newfstat", },
	{ .name	    = "fstatat",    .errmsg = true, .alias = "newfstatat", },
	{ .name	    = "futex",	    .errmsg = true, },
	{ .name	    = "poll",	    .errmsg = true, .timeout = true, },
	{ .name	    = "ppoll",	    .errmsg = true, .timeout = true, },
	{ .name	    = "read",	    .errmsg = true, },
	{ .name	    = "recvfrom",   .errmsg = true, },
	{ .name	    = "select",	    .errmsg = true, .timeout = true, },
	{ .name	    = "stat",	    .errmsg = true, .alias = "newstat", },
A
Arnaldo Carvalho de Melo 已提交
29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
};

static int syscall_fmt__cmp(const void *name, const void *fmtp)
{
	const struct syscall_fmt *fmt = fmtp;
	return strcmp(name, fmt->name);
}

static struct syscall_fmt *syscall_fmt__find(const char *name)
{
	const int nmemb = ARRAY_SIZE(syscall_fmts);
	return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
}

struct syscall {
	struct event_format *tp_format;
	const char	    *name;
	struct syscall_fmt  *fmt;
};

49 50 51 52 53 54 55 56 57 58 59 60 61 62
static size_t fprintf_duration(unsigned long t, FILE *fp)
{
	double duration = (double)t / NSEC_PER_MSEC;
	size_t printed = fprintf(fp, "(");

	if (duration >= 1.0)
		printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
	else if (duration >= 0.01)
		printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
	else
		printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
	return printed + fprintf(stdout, "): ");
}

63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92
struct thread_trace {
	u64		  entry_time;
	u64		  exit_time;
	bool		  entry_pending;
	char		  *entry_str;
};

static struct thread_trace *thread_trace__new(void)
{
	return zalloc(sizeof(struct thread_trace));
}

static struct thread_trace *thread__trace(struct thread *thread)
{
	if (thread == NULL)
		goto fail;

	if (thread->priv == NULL)
		thread->priv = thread_trace__new();

	if (thread->priv == NULL)
		goto fail;

	return thread->priv;
fail:
	color_fprintf(stdout, PERF_COLOR_RED,
		      "WARNING: not enough memory, dropping samples!\n");
	return NULL;
}

A
Arnaldo Carvalho de Melo 已提交
93 94 95 96 97 98 99
struct trace {
	int			audit_machine;
	struct {
		int		max;
		struct syscall  *table;
	} syscalls;
	struct perf_record_opts opts;
100 101 102
	struct machine		host;
	u64			base_time;
	bool			multiple_threads;
A
Arnaldo Carvalho de Melo 已提交
103 104
};

105 106 107 108
static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
{
	double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;

109
	return fprintf(fp, "%10.3f ", ts);
110 111
}

112 113 114 115 116 117 118
static bool done = false;

static void sig_handler(int sig __maybe_unused)
{
	done = true;
}

119
static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
120
					u64 duration, u64 tstamp, FILE *fp)
121 122
{
	size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
123
	printed += fprintf_duration(duration, fp);
124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180

	if (trace->multiple_threads)
		printed += fprintf(fp, "%d ", thread->pid);

	return printed;
}

static int trace__process_event(struct machine *machine, union perf_event *event)
{
	int ret = 0;

	switch (event->header.type) {
	case PERF_RECORD_LOST:
		color_fprintf(stdout, PERF_COLOR_RED,
			      "LOST %" PRIu64 " events!\n", event->lost.lost);
		ret = machine__process_lost_event(machine, event);
	default:
		ret = machine__process_event(machine, event);
		break;
	}

	return ret;
}

static int trace__tool_process(struct perf_tool *tool __maybe_unused,
			       union perf_event *event,
			       struct perf_sample *sample __maybe_unused,
			       struct machine *machine)
{
	return trace__process_event(machine, event);
}

static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
{
	int err = symbol__init();

	if (err)
		return err;

	machine__init(&trace->host, "", HOST_KERNEL_ID);
	machine__create_kernel_maps(&trace->host);

	if (perf_target__has_task(&trace->opts.target)) {
		err = perf_event__synthesize_thread_map(NULL, evlist->threads,
							trace__tool_process,
							&trace->host);
	} else {
		err = perf_event__synthesize_threads(NULL, trace__tool_process,
						     &trace->host);
	}

	if (err)
		symbol__exit();

	return err;
}

A
Arnaldo Carvalho de Melo 已提交
181 182 183 184
static int trace__read_syscall_info(struct trace *trace, int id)
{
	char tp_name[128];
	struct syscall *sc;
185 186 187 188
	const char *name = audit_syscall_to_name(id, trace->audit_machine);

	if (name == NULL)
		return -1;
A
Arnaldo Carvalho de Melo 已提交
189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207

	if (id > trace->syscalls.max) {
		struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));

		if (nsyscalls == NULL)
			return -1;

		if (trace->syscalls.max != -1) {
			memset(nsyscalls + trace->syscalls.max + 1, 0,
			       (id - trace->syscalls.max) * sizeof(*sc));
		} else {
			memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
		}

		trace->syscalls.table = nsyscalls;
		trace->syscalls.max   = id;
	}

	sc = trace->syscalls.table + id;
208 209
	sc->name = name;
	sc->fmt  = syscall_fmt__find(sc->name);
A
Arnaldo Carvalho de Melo 已提交
210

211
	snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
A
Arnaldo Carvalho de Melo 已提交
212
	sc->tp_format = event_format__new("syscalls", tp_name);
213 214 215 216 217

	if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
		snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
		sc->tp_format = event_format__new("syscalls", tp_name);
	}
A
Arnaldo Carvalho de Melo 已提交
218 219 220 221

	return sc->tp_format != NULL ? 0 : -1;
}

222 223
static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
				      unsigned long *args)
A
Arnaldo Carvalho de Melo 已提交
224 225 226 227 228 229 230 231
{
	int i = 0;
	size_t printed = 0;

	if (sc->tp_format != NULL) {
		struct format_field *field;

		for (field = sc->tp_format->format.fields->next; field; field = field->next) {
232 233 234
			printed += scnprintf(bf + printed, size - printed,
					     "%s%s: %ld", printed ? ", " : "",
					     field->name, args[i++]);
A
Arnaldo Carvalho de Melo 已提交
235 236 237
		}
	} else {
		while (i < 6) {
238 239 240
			printed += scnprintf(bf + printed, size - printed,
					     "%sarg%d: %ld",
					     printed ? ", " : "", i, args[i]);
A
Arnaldo Carvalho de Melo 已提交
241 242 243 244 245 246 247
			++i;
		}
	}

	return printed;
}

248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278
typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
				  struct perf_sample *sample);

static struct syscall *trace__syscall_info(struct trace *trace,
					   struct perf_evsel *evsel,
					   struct perf_sample *sample)
{
	int id = perf_evsel__intval(evsel, sample, "id");

	if (id < 0) {
		printf("Invalid syscall %d id, skipping...\n", id);
		return NULL;
	}

	if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
	    trace__read_syscall_info(trace, id))
		goto out_cant_read;

	if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
		goto out_cant_read;

	return &trace->syscalls.table[id];

out_cant_read:
	printf("Problems reading syscall %d information\n", id);
	return NULL;
}

static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
			    struct perf_sample *sample)
{
279
	char *msg;
280
	void *args;
281 282
	size_t printed = 0;
	struct thread *thread = machine__findnew_thread(&trace->host, sample->tid);
283
	struct syscall *sc = trace__syscall_info(trace, evsel, sample);
284
	struct thread_trace *ttrace = thread__trace(thread);
285

286
	if (ttrace == NULL || sc == NULL)
287 288 289 290 291 292 293 294
		return -1;

	args = perf_evsel__rawptr(evsel, sample, "args");
	if (args == NULL) {
		printf("Problems reading syscall arguments\n");
		return -1;
	}

295 296 297 298 299 300 301 302 303 304 305 306 307 308 309
	ttrace = thread->priv;

	if (ttrace->entry_str == NULL) {
		ttrace->entry_str = malloc(1024);
		if (!ttrace->entry_str)
			return -1;
	}

	ttrace->entry_time = sample->time;
	msg = ttrace->entry_str;
	printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);

	printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,  args);

	if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) {
310
		trace__fprintf_entry_head(trace, thread, 1, sample->time, stdout);
311 312 313
		printf("%-70s\n", ttrace->entry_str);
	} else
		ttrace->entry_pending = true;
314 315 316 317 318 319 320 321

	return 0;
}

static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
			   struct perf_sample *sample)
{
	int ret;
322
	u64 duration = 0;
323 324
	struct thread *thread = machine__findnew_thread(&trace->host, sample->tid);
	struct thread_trace *ttrace = thread__trace(thread);
325 326
	struct syscall *sc = trace__syscall_info(trace, evsel, sample);

327
	if (ttrace == NULL || sc == NULL)
328 329 330 331
		return -1;

	ret = perf_evsel__intval(evsel, sample, "ret");

332 333 334 335
	ttrace = thread->priv;

	ttrace->exit_time = sample->time;

336 337 338 339
	if (ttrace->entry_time)
		duration = sample->time - ttrace->entry_time;

	trace__fprintf_entry_head(trace, thread, duration, sample->time, stdout);
340 341 342 343 344 345 346 347 348

	if (ttrace->entry_pending) {
		printf("%-70s", ttrace->entry_str);
	} else {
		printf(" ... [");
		color_fprintf(stdout, PERF_COLOR_YELLOW, "continued");
		printf("]: %s()", sc->name);
	}

349 350 351 352 353 354 355 356 357 358 359 360
	if (ret < 0 && sc->fmt && sc->fmt->errmsg) {
		char bf[256];
		const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
			   *e = audit_errno_to_name(-ret);

		printf(") = -1 %s %s", e, emsg);
	} else if (ret == 0 && sc->fmt && sc->fmt->timeout)
		printf(") = 0 Timeout");
	else
		printf(") = %d", ret);

	putchar('\n');
361 362 363

	ttrace->entry_pending = false;

364 365 366
	return 0;
}

367
static int trace__run(struct trace *trace, int argc, const char **argv)
A
Arnaldo Carvalho de Melo 已提交
368 369
{
	struct perf_evlist *evlist = perf_evlist__new(NULL, NULL);
370
	struct perf_evsel *evsel;
A
Arnaldo Carvalho de Melo 已提交
371
	int err = -1, i, nr_events = 0, before;
372
	const bool forks = argc > 0;
A
Arnaldo Carvalho de Melo 已提交
373 374 375 376 377 378

	if (evlist == NULL) {
		printf("Not enough memory to run!\n");
		goto out;
	}

379 380 381
	if (perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_enter", trace__sys_enter) ||
	    perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_exit", trace__sys_exit)) {
		printf("Couldn't read the raw_syscalls tracepoints information!\n");
A
Arnaldo Carvalho de Melo 已提交
382 383 384 385 386 387 388 389 390
		goto out_delete_evlist;
	}

	err = perf_evlist__create_maps(evlist, &trace->opts.target);
	if (err < 0) {
		printf("Problems parsing the target to trace, check your options!\n");
		goto out_delete_evlist;
	}

391 392 393 394 395 396
	err = trace__symbols_init(trace, evlist);
	if (err < 0) {
		printf("Problems initializing symbol libraries!\n");
		goto out_delete_evlist;
	}

A
Arnaldo Carvalho de Melo 已提交
397 398
	perf_evlist__config_attrs(evlist, &trace->opts);

399 400 401 402 403 404 405 406 407 408 409
	signal(SIGCHLD, sig_handler);
	signal(SIGINT, sig_handler);

	if (forks) {
		err = perf_evlist__prepare_workload(evlist, &trace->opts, argv);
		if (err < 0) {
			printf("Couldn't run the workload!\n");
			goto out_delete_evlist;
		}
	}

A
Arnaldo Carvalho de Melo 已提交
410 411 412 413 414 415 416 417 418 419 420 421 422
	err = perf_evlist__open(evlist);
	if (err < 0) {
		printf("Couldn't create the events: %s\n", strerror(errno));
		goto out_delete_evlist;
	}

	err = perf_evlist__mmap(evlist, UINT_MAX, false);
	if (err < 0) {
		printf("Couldn't mmap the events: %s\n", strerror(errno));
		goto out_delete_evlist;
	}

	perf_evlist__enable(evlist);
423 424 425 426

	if (forks)
		perf_evlist__start_workload(evlist);

427
	trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
A
Arnaldo Carvalho de Melo 已提交
428 429 430 431 432 433 434 435
again:
	before = nr_events;

	for (i = 0; i < evlist->nr_mmaps; i++) {
		union perf_event *event;

		while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
			const u32 type = event->header.type;
436
			tracepoint_handler handler;
A
Arnaldo Carvalho de Melo 已提交
437 438 439 440 441 442 443 444 445 446
			struct perf_sample sample;

			++nr_events;

			err = perf_evlist__parse_sample(evlist, event, &sample);
			if (err) {
				printf("Can't parse sample, err = %d, skipping...\n", err);
				continue;
			}

447 448 449 450 451 452 453 454
			if (trace->base_time == 0)
				trace->base_time = sample.time;

			if (type != PERF_RECORD_SAMPLE) {
				trace__process_event(&trace->host, event);
				continue;
			}

A
Arnaldo Carvalho de Melo 已提交
455 456 457 458 459 460
			evsel = perf_evlist__id2evsel(evlist, sample.id);
			if (evsel == NULL) {
				printf("Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
				continue;
			}

461 462 463 464 465 466
			if (sample.raw_data == NULL) {
				printf("%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
				       perf_evsel__name(evsel), sample.tid,
				       sample.cpu, sample.raw_size);
				continue;
			}
A
Arnaldo Carvalho de Melo 已提交
467

468 469 470 471 472 473 474
			if (sample.raw_data == NULL) {
				printf("%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
				       perf_evsel__name(evsel), sample.tid,
				       sample.cpu, sample.raw_size);
				continue;
			}

475 476
			handler = evsel->handler.func;
			handler(trace, evsel, &sample);
A
Arnaldo Carvalho de Melo 已提交
477 478 479
		}
	}

480 481 482 483
	if (nr_events == before) {
		if (done)
			goto out_delete_evlist;

A
Arnaldo Carvalho de Melo 已提交
484
		poll(evlist->pollfd, evlist->nr_fds, -1);
485 486 487 488
	}

	if (done)
		perf_evlist__disable(evlist);
A
Arnaldo Carvalho de Melo 已提交
489 490 491 492 493 494 495 496 497 498 499 500

	goto again;

out_delete_evlist:
	perf_evlist__delete(evlist);
out:
	return err;
}

int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
{
	const char * const trace_usage[] = {
501 502
		"perf trace [<options>] [<command>]",
		"perf trace [<options>] -- <command> [<options>]",
A
Arnaldo Carvalho de Melo 已提交
503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538
		NULL
	};
	struct trace trace = {
		.audit_machine = audit_detect_machine(),
		.syscalls = {
			. max = -1,
		},
		.opts = {
			.target = {
				.uid	   = UINT_MAX,
				.uses_mmap = true,
			},
			.user_freq     = UINT_MAX,
			.user_interval = ULLONG_MAX,
			.no_delay      = true,
			.mmap_pages    = 1024,
		},
	};
	const struct option trace_options[] = {
	OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
		    "trace events on existing process id"),
	OPT_STRING(0, "tid", &trace.opts.target.tid, "tid",
		    "trace events on existing thread id"),
	OPT_BOOLEAN(0, "all-cpus", &trace.opts.target.system_wide,
		    "system-wide collection from all CPUs"),
	OPT_STRING(0, "cpu", &trace.opts.target.cpu_list, "cpu",
		    "list of cpus to monitor"),
	OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
		    "child tasks do not inherit counters"),
	OPT_UINTEGER(0, "mmap-pages", &trace.opts.mmap_pages,
		     "number of mmap data pages"),
	OPT_STRING(0, "uid", &trace.opts.target.uid_str, "user",
		   "user to profile"),
	OPT_END()
	};
	int err;
539
	char bf[BUFSIZ];
A
Arnaldo Carvalho de Melo 已提交
540 541 542

	argc = parse_options(argc, argv, trace_options, trace_usage, 0);

543 544 545 546 547 548 549
	err = perf_target__validate(&trace.opts.target);
	if (err) {
		perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
		printf("%s", bf);
		return err;
	}

A
Arnaldo Carvalho de Melo 已提交
550 551 552 553 554 555 556
	err = perf_target__parse_uid(&trace.opts.target);
	if (err) {
		perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
		printf("%s", bf);
		return err;
	}

557
	if (!argc && perf_target__none(&trace.opts.target))
558 559
		trace.opts.target.system_wide = true;

560
	return trace__run(&trace, argc, argv);
A
Arnaldo Carvalho de Melo 已提交
561
}