diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt new file mode 100644 index 0000000000000000000000000000000000000000..3a2ae37310a99da3579b847b3c79395631e780af --- /dev/null +++ b/tools/perf/Documentation/perf-trace.txt @@ -0,0 +1,53 @@ +perf-trace(1) +============= + +NAME +---- +perf-trace - strace inspired tool + +SYNOPSIS +-------- +[verse] +'perf trace' + +DESCRIPTION +----------- +This command will show the events associated with the target, initially +syscalls, but other system events like pagefaults, task lifetime events, +scheduling events, etc. + +Initially this is a live mode only tool, but eventually will work with +perf.data files like the other tools, allowing a detached 'record' from +analysis phases. + +OPTIONS +------- + +--all-cpus:: + System-wide collection from all CPUs. + +-p:: +--pid=:: + Record events on existing process ID (comma separated list). + +--tid=:: + Record events on existing thread ID (comma separated list). + +--uid=:: + Record events in threads owned by uid. Name or number. + +--no-inherit:: + Child tasks do not inherit counters. + +--mmap-pages=:: + Number of mmap data pages. Must be a power of two. + +--cpu:: +Collect samples only on the list of CPUs provided. Multiple CPUs can be provided as a +comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2. +In per-thread mode with inheritance mode on (default), Events are captured only when +the thread executes on the designated CPUs. Default is to monitor all CPUs. + +SEE ALSO +-------- +linkperf:perf-record[1], linkperf:perf-script[1] diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 251dcd7fb5acbb90f9206f4707509bfba9778f8a..6958ba4f5dcbc8c6cc7ed99b3909d0d0ef48b751 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -102,7 +102,7 @@ ifdef PARSER_DEBUG endif CFLAGS = -fno-omit-frame-pointer -ggdb3 -funwind-tables -Wall -Wextra -std=gnu99 $(CFLAGS_WERROR) $(CFLAGS_OPTIMIZE) $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) $(PARSER_DEBUG_CFLAGS) -EXTLIBS = -lpthread -lrt -lelf -lm +EXTLIBS = -lpthread -lrt -lelf -lm -laudit ALL_CFLAGS = $(CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE ALL_LDFLAGS = $(LDFLAGS) STRIP ?= strip @@ -442,6 +442,7 @@ BUILTIN_OBJS += $(OUTPUT)builtin-kmem.o BUILTIN_OBJS += $(OUTPUT)builtin-lock.o BUILTIN_OBJS += $(OUTPUT)builtin-kvm.o BUILTIN_OBJS += $(OUTPUT)builtin-test.o +BUILTIN_OBJS += $(OUTPUT)builtin-trace.o BUILTIN_OBJS += $(OUTPUT)builtin-inject.o PERFLIBS = $(LIB_FILE) $(LIBTRACEEVENT) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c new file mode 100644 index 0000000000000000000000000000000000000000..5fa1820cc969ae187dca75de4dbb21e9a076f3e1 --- /dev/null +++ b/tools/perf/builtin-trace.c @@ -0,0 +1,300 @@ +#include "builtin.h" +#include "util/evlist.h" +#include "util/parse-options.h" +#include "util/thread_map.h" +#include "event-parse.h" + +#include +#include + +static struct syscall_fmt { + const char *name; + bool errmsg; + bool timeout; +} syscall_fmts[] = { + { .name = "futex", .errmsg = true, }, + { .name = "poll", .errmsg = true, .timeout = true, }, + { .name = "ppoll", .errmsg = true, .timeout = true, }, + { .name = "read", .errmsg = true, }, + { .name = "recvfrom", .errmsg = true, }, + { .name = "select", .errmsg = true, .timeout = true, }, +}; + +static int syscall_fmt__cmp(const void *name, const void *fmtp) +{ + const struct syscall_fmt *fmt = fmtp; + return strcmp(name, fmt->name); +} + +static struct syscall_fmt *syscall_fmt__find(const char *name) +{ + const int nmemb = ARRAY_SIZE(syscall_fmts); + return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp); +} + +struct syscall { + struct event_format *tp_format; + const char *name; + struct syscall_fmt *fmt; +}; + +struct trace { + int audit_machine; + struct { + int max; + struct syscall *table; + } syscalls; + struct perf_record_opts opts; +}; + +static int trace__read_syscall_info(struct trace *trace, int id) +{ + char tp_name[128]; + struct syscall *sc; + + if (id > trace->syscalls.max) { + struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc)); + + if (nsyscalls == NULL) + return -1; + + if (trace->syscalls.max != -1) { + memset(nsyscalls + trace->syscalls.max + 1, 0, + (id - trace->syscalls.max) * sizeof(*sc)); + } else { + memset(nsyscalls, 0, (id + 1) * sizeof(*sc)); + } + + trace->syscalls.table = nsyscalls; + trace->syscalls.max = id; + } + + sc = trace->syscalls.table + id; + sc->name = audit_syscall_to_name(id, trace->audit_machine); + if (sc->name == NULL) + return -1; + + snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name); + + sc->tp_format = event_format__new("syscalls", tp_name); + sc->fmt = syscall_fmt__find(sc->name); + + return sc->tp_format != NULL ? 0 : -1; +} + +static size_t syscall__fprintf_args(struct syscall *sc, unsigned long *args, FILE *fp) +{ + int i = 0; + size_t printed = 0; + + if (sc->tp_format != NULL) { + struct format_field *field; + + for (field = sc->tp_format->format.fields->next; field; field = field->next) { + printed += fprintf(fp, "%s%s: %ld", printed ? ", " : "", + field->name, args[i++]); + } + } else { + while (i < 6) { + printed += fprintf(fp, "%sarg%d: %ld", printed ? ", " : "", i, args[i]); + ++i; + } + } + + return printed; +} + +static int trace__run(struct trace *trace) +{ + struct perf_evlist *evlist = perf_evlist__new(NULL, NULL); + struct perf_evsel *evsel, *evsel_enter, *evsel_exit; + int err = -1, i, nr_events = 0, before; + + if (evlist == NULL) { + printf("Not enough memory to run!\n"); + goto out; + } + + evsel_enter = perf_evsel__newtp("raw_syscalls", "sys_enter", 0); + if (evsel_enter == NULL) { + printf("Couldn't read the raw_syscalls:sys_enter tracepoint information!\n"); + goto out_delete_evlist; + } + + perf_evlist__add(evlist, evsel_enter); + + evsel_exit = perf_evsel__newtp("raw_syscalls", "sys_exit", 1); + if (evsel_exit == NULL) { + printf("Couldn't read the raw_syscalls:sys_exit tracepoint information!\n"); + goto out_delete_evlist; + } + + perf_evlist__add(evlist, evsel_exit); + + err = perf_evlist__create_maps(evlist, &trace->opts.target); + if (err < 0) { + printf("Problems parsing the target to trace, check your options!\n"); + goto out_delete_evlist; + } + + perf_evlist__config_attrs(evlist, &trace->opts); + + err = perf_evlist__open(evlist); + if (err < 0) { + printf("Couldn't create the events: %s\n", strerror(errno)); + goto out_delete_evlist; + } + + err = perf_evlist__mmap(evlist, UINT_MAX, false); + if (err < 0) { + printf("Couldn't mmap the events: %s\n", strerror(errno)); + goto out_delete_evlist; + } + + perf_evlist__enable(evlist); +again: + before = nr_events; + + for (i = 0; i < evlist->nr_mmaps; i++) { + union perf_event *event; + + while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) { + const u32 type = event->header.type; + struct syscall *sc; + struct perf_sample sample; + int id; + + ++nr_events; + + switch (type) { + case PERF_RECORD_SAMPLE: + break; + case PERF_RECORD_LOST: + printf("LOST %" PRIu64 " events!\n", event->lost.lost); + continue; + default: + printf("Unexpected %s event, skipping...\n", + perf_event__name(type)); + continue; + } + + err = perf_evlist__parse_sample(evlist, event, &sample); + if (err) { + printf("Can't parse sample, err = %d, skipping...\n", err); + continue; + } + + evsel = perf_evlist__id2evsel(evlist, sample.id); + if (evsel == NULL) { + printf("Unknown tp ID %" PRIu64 ", skipping...\n", sample.id); + continue; + } + + id = perf_evsel__intval(evsel, &sample, "id"); + if (id < 0) { + printf("Invalid syscall %d id, skipping...\n", id); + continue; + } + + if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) && + trace__read_syscall_info(trace, id)) + continue; + + if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL)) + continue; + + sc = &trace->syscalls.table[id]; + + if (evlist->threads->map[0] == -1 || evlist->threads->nr > 1) + printf("%d ", sample.tid); + + if (evsel == evsel_enter) { + void *args = perf_evsel__rawptr(evsel, &sample, "args"); + + printf("%s(", sc->name); + syscall__fprintf_args(sc, args, stdout); + } else if (evsel == evsel_exit) { + int ret = perf_evsel__intval(evsel, &sample, "ret"); + + if (ret < 0 && sc->fmt && sc->fmt->errmsg) { + char bf[256]; + const char *emsg = strerror_r(-ret, bf, sizeof(bf)), + *e = audit_errno_to_name(-ret); + + printf(") = -1 %s %s", e, emsg); + } else if (ret == 0 && sc->fmt && sc->fmt->timeout) + printf(") = 0 Timeout"); + else + printf(") = %d", ret); + + putchar('\n'); + } + } + } + + if (nr_events == before) + poll(evlist->pollfd, evlist->nr_fds, -1); + + goto again; + +out_delete_evlist: + perf_evlist__delete(evlist); +out: + return err; +} + +int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) +{ + const char * const trace_usage[] = { + "perf trace []", + NULL + }; + struct trace trace = { + .audit_machine = audit_detect_machine(), + .syscalls = { + . max = -1, + }, + .opts = { + .target = { + .uid = UINT_MAX, + .uses_mmap = true, + }, + .user_freq = UINT_MAX, + .user_interval = ULLONG_MAX, + .no_delay = true, + .mmap_pages = 1024, + }, + }; + const struct option trace_options[] = { + OPT_STRING('p', "pid", &trace.opts.target.pid, "pid", + "trace events on existing process id"), + OPT_STRING(0, "tid", &trace.opts.target.tid, "tid", + "trace events on existing thread id"), + OPT_BOOLEAN(0, "all-cpus", &trace.opts.target.system_wide, + "system-wide collection from all CPUs"), + OPT_STRING(0, "cpu", &trace.opts.target.cpu_list, "cpu", + "list of cpus to monitor"), + OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit, + "child tasks do not inherit counters"), + OPT_UINTEGER(0, "mmap-pages", &trace.opts.mmap_pages, + "number of mmap data pages"), + OPT_STRING(0, "uid", &trace.opts.target.uid_str, "user", + "user to profile"), + OPT_END() + }; + int err; + + argc = parse_options(argc, argv, trace_options, trace_usage, 0); + if (argc) + usage_with_options(trace_usage, trace_options); + + err = perf_target__parse_uid(&trace.opts.target); + if (err) { + char bf[BUFSIZ]; + perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf)); + printf("%s", bf); + return err; + } + + return trace__run(&trace); +} diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h index 3ea74ed1b26b799620e53484dcf4e60c3bad6019..08143bd854c7db8b09369ee9e108d9dd09714518 100644 --- a/tools/perf/builtin.h +++ b/tools/perf/builtin.h @@ -34,6 +34,7 @@ extern int cmd_kmem(int argc, const char **argv, const char *prefix); extern int cmd_lock(int argc, const char **argv, const char *prefix); extern int cmd_kvm(int argc, const char **argv, const char *prefix); extern int cmd_test(int argc, const char **argv, const char *prefix); +extern int cmd_trace(int argc, const char **argv, const char *prefix); extern int cmd_inject(int argc, const char **argv, const char *prefix); extern int find_scripts(char **scripts_array, char **scripts_path_array); diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt index 0303ec69227490ae630c8265be5001b8a51a1422..3e86bbd8c2d5136cdbb756b708bd5021fed1599d 100644 --- a/tools/perf/command-list.txt +++ b/tools/perf/command-list.txt @@ -17,6 +17,7 @@ perf-report mainporcelain common perf-stat mainporcelain common perf-timechart mainporcelain common perf-top mainporcelain common +perf-trace mainporcelain common perf-script mainporcelain common perf-probe mainporcelain full perf-kmem mainporcelain common diff --git a/tools/perf/perf.c b/tools/perf/perf.c index fb8578cfa03cec98044a3a9d997c791760961698..3fb052c9a27f4b801ac234e544774502205255fb 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -55,6 +55,7 @@ static struct cmd_struct commands[] = { { "lock", cmd_lock, 0 }, { "kvm", cmd_kvm, 0 }, { "test", cmd_test, 0 }, + { "trace", cmd_trace, 0 }, { "inject", cmd_inject, 0 }, };