提交 d0713d4c 编写于 作者: N Nicholas Fraser 提交者: Arnaldo Carvalho de Melo

perf data: Add JSON export

This adds a feature to export perf data to JSON.

The resolved symbols are exported into the JSON so that external tools
don't need to load the dsos themselves (or even have access to them at
all.) This makes it easy to load and analyze perf data with standalone
tools where direct perf or libbabeltrace integration is impractical.

The exporter uses a minimal inline JSON encoding without any external
dependencies. Currently it only outputs some headers and sample metadata
but it's easily extensible.

Use it like this:

  $ perf data convert --to-json out.json

Committer notes:

Fixup a __printf() bug that broke the build:

  util/data-convert-json.c:103:11: error: expected ‘)’ before numeric constant
    103 | __(printf, 5, 6)
        |           ^~
        |           )
  util/data-convert-json.c: In function ‘output_sample_callchain_entry’:
  util/data-convert-json.c:124:2: error: implicit declaration of function ‘output_json_key_format’; did you mean ‘output_json_format’? [-Werror=implicit-function-declaration]
    124 |  output_json_key_format(out, false, 5, "ip", "\"0x%" PRIx64 "\"", ip);
        |  ^~~~~~~~~~~~~~~~~~~~~~
        |  output_json_format

Also had to add this patch to fix errors reported by various versions of
clang:

  -       if (al && al->sym && al->sym->name && strlen(al->sym->name) > 0) {
  +       if (al && al->sym && al->sym->namelen) {

al->sym->name is a zero sized array, to avoid one extra alloc in the
symbol__new() constructor, sym->namelen carries its strlen.

Committer testing:

  $ ls -la out.json
  ls: cannot access 'out.json': No such file or directory
  $ perf record sleep 0.1
  [ perf record: Woken up 1 times to write data ]
  [ perf record: Captured and wrote 0.001 MB perf.data (8 samples) ]
  $ perf report --stats | grep -w SAMPLE
            SAMPLE events:          8
  $ perf data convert --to-json out.json
  [ perf data convert: Converted 'perf.data' into JSON data 'out.json' ]
  [ perf data convert: Converted and wrote 0.002 MB (8 samples) ]
  $ ls -la out.json
  -rw-rw-r--. 1 acme acme 2017 Apr 26 17:29 out.json
  $ cat out.json
  {
  	"linux-perf-json-version": 1,
  	"headers": {
  		"header-version": 1,
  		"captured-on": "2021-04-26T20:28:57Z",
  		"data-offset": 432,
  		"data-size": 1016,
  		"feat-offset": 1448,
  		"hostname": "five",
  		"os-release": "5.11.14-200.fc33.x86_64",
  		"arch": "x86_64",
  		"cpu-desc": "AMD Ryzen 9 3900X 12-Core Processor",
  		"cpuid": "AuthenticAMD,23,113,0",
  		"nrcpus-online": 24,
  		"nrcpus-avail": 24,
  		"perf-version": "5.12.gee134f3189bd",
  		"cmdline": [
  			"/home/acme/bin/perf",
  			"record",
  			"sleep",
  			"0.1"
  		]
  	},
  	"samples": [
  		{
  			"timestamp": 170517539043684,
  			"pid": 375844,
  			"tid": 375844,
  			"comm": "sleep",
  			"callchain": [
  				{
  					"ip": "0xffffffffa6268827"
  				}
  			]
  		},
  		{
  			"timestamp": 170517539048443,
  			"pid": 375844,
  			"tid": 375844,
  			"comm": "sleep",
  			"callchain": [
  				{
  					"ip": "0xffffffffa661359d"
  				}
  			]
  		},
  		{
  			"timestamp": 170517539051018,
  			"pid": 375844,
  			"tid": 375844,
  			"comm": "sleep",
  			"callchain": [
  				{
  					"ip": "0xffffffffa6311e18"
  				}
  			]
  		},
  		{
  			"timestamp": 170517539053652,
  			"pid": 375844,
  			"tid": 375844,
  			"comm": "sleep",
  			"callchain": [
  				{
  					"ip": "0x7fdb77b4812b",
  					"symbol": "_dl_start",
  					"dso": "ld-2.32.so"
  				}
  			]
  		},
  		{
  			"timestamp": 170517539055306,
  			"pid": 375844,
  			"tid": 375844,
  			"comm": "sleep",
  			"callchain": [
  				{
  					"ip": "0xffffffffa6269286"
  				}
  			]
  		},
  		{
  			"timestamp": 170517539057590,
  			"pid": 375844,
  			"tid": 375844,
  			"comm": "sleep",
  			"callchain": [
  				{
  					"ip": "0xffffffffa62abd8b"
  				}
  			]
  		},
  		{
  			"timestamp": 170517539067559,
  			"pid": 375844,
  			"tid": 375844,
  			"comm": "sleep",
  			"callchain": [
  				{
  					"ip": "0x7fdb77b5e9e9",
  					"symbol": "__GI___tunables_init",
  					"dso": "ld-2.32.so"
  				}
  			]
  		},
  		{
  			"timestamp": 170517539282452,
  			"pid": 375844,
  			"tid": 375844,
  			"comm": "sleep",
  			"callchain": [
  				{
  					"ip": "0x7fdb779978d2",
  					"symbol": "getenv",
  					"dso": "libc-2.32.so"
  				}
  			]
  		}
  	]
  }
  $
Signed-off-by: NNicholas Fraser <nfraser@codeweavers.com>
Tested-by: NArnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Changbin Du <changbin.du@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Tan Xiaojun <tanxiaojun@huawei.com>
Cc: Ulrich Czekalla <uczekalla@codeweavers.com>
Link: http://lore.kernel.org/lkml/3884969f-804d-2f53-c648-e2b0bd85edff@codeweavers.comSigned-off-by: NArnaldo Carvalho de Melo <acme@redhat.com>
上级 5508c9da
......@@ -17,7 +17,7 @@ Data file related processing.
COMMANDS
--------
convert::
Converts perf data file into another format (only CTF [1] format is support by now).
Converts perf data file into another format.
It's possible to set data-convert debug variable to get debug messages from conversion,
like:
perf --debug data-convert data convert ...
......@@ -27,6 +27,9 @@ OPTIONS for 'convert'
--to-ctf::
Triggers the CTF conversion, specify the path of CTF data directory.
--to-json::
Triggers JSON conversion. Specify the JSON filename to output.
--tod::
Convert time to wall clock time.
......
......@@ -7,7 +7,6 @@
#include "debug.h"
#include <subcmd/parse-options.h>
#include "data-convert.h"
#include "data-convert-bt.h"
typedef int (*data_cmd_fn_t)(int argc, const char **argv);
......@@ -55,6 +54,7 @@ static const char * const data_convert_usage[] = {
static int cmd_data_convert(int argc, const char **argv)
{
const char *to_json = NULL;
const char *to_ctf = NULL;
struct perf_data_convert_opts opts = {
.force = false,
......@@ -63,6 +63,7 @@ static int cmd_data_convert(int argc, const char **argv)
const struct option options[] = {
OPT_INCR('v', "verbose", &verbose, "be more verbose"),
OPT_STRING('i', "input", &input_name, "file", "input file name"),
OPT_STRING(0, "to-json", &to_json, NULL, "Convert to JSON format"),
#ifdef HAVE_LIBBABELTRACE_SUPPORT
OPT_STRING(0, "to-ctf", &to_ctf, NULL, "Convert to CTF format"),
OPT_BOOLEAN(0, "tod", &opts.tod, "Convert time to wall clock time"),
......@@ -72,11 +73,6 @@ static int cmd_data_convert(int argc, const char **argv)
OPT_END()
};
#ifndef HAVE_LIBBABELTRACE_SUPPORT
pr_err("No conversion support compiled in. perf should be compiled with environment variables LIBBABELTRACE=1 and LIBBABELTRACE_DIR=/path/to/libbabeltrace/\n");
return -1;
#endif
argc = parse_options(argc, argv, options,
data_convert_usage, 0);
if (argc) {
......@@ -84,11 +80,25 @@ static int cmd_data_convert(int argc, const char **argv)
return -1;
}
if (to_json && to_ctf) {
pr_err("You cannot specify both --to-ctf and --to-json.\n");
return -1;
}
if (!to_json && !to_ctf) {
pr_err("You must specify one of --to-ctf or --to-json.\n");
return -1;
}
if (to_json)
return bt_convert__perf2json(input_name, to_json, &opts);
if (to_ctf) {
#ifdef HAVE_LIBBABELTRACE_SUPPORT
return bt_convert__perf2ctf(input_name, to_ctf, &opts);
#else
pr_err("The libbabeltrace support is not compiled in.\n");
pr_err("The libbabeltrace support is not compiled in. perf should be "
"compiled with environment variables LIBBABELTRACE=1 and "
"LIBBABELTRACE_DIR=/path/to/libbabeltrace/\n");
return -1;
#endif
}
......
......@@ -165,6 +165,7 @@ perf-$(CONFIG_LIBUNWIND_X86) += libunwind/x86_32.o
perf-$(CONFIG_LIBUNWIND_AARCH64) += libunwind/arm64.o
perf-$(CONFIG_LIBBABELTRACE) += data-convert-bt.o
perf-y += data-convert-json.o
perf-y += scripting-engines/
......
......@@ -21,7 +21,7 @@
#include <babeltrace/ctf/events.h>
#include <traceevent/event-parse.h>
#include "asm/bug.h"
#include "data-convert-bt.h"
#include "data-convert.h"
#include "session.h"
#include "debug.h"
#include "tool.h"
......
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __DATA_CONVERT_BT_H
#define __DATA_CONVERT_BT_H
#include "data-convert.h"
#ifdef HAVE_LIBBABELTRACE_SUPPORT
int bt_convert__perf2ctf(const char *input_name, const char *to_ctf,
struct perf_data_convert_opts *opts);
#endif /* HAVE_LIBBABELTRACE_SUPPORT */
#endif /* __DATA_CONVERT_BT_H */
// SPDX-License-Identifier: GPL-2.0-only
/*
* JSON export.
*
* Copyright (C) 2021, CodeWeavers Inc. <nfraser@codeweavers.com>
*/
#include "data-convert.h"
#include <fcntl.h>
#include <inttypes.h>
#include <sys/stat.h>
#include <unistd.h>
#include "linux/compiler.h"
#include "linux/err.h"
#include "util/auxtrace.h"
#include "util/debug.h"
#include "util/dso.h"
#include "util/event.h"
#include "util/evsel.h"
#include "util/evlist.h"
#include "util/header.h"
#include "util/map.h"
#include "util/session.h"
#include "util/symbol.h"
#include "util/thread.h"
#include "util/tool.h"
struct convert_json {
struct perf_tool tool;
FILE *out;
bool first;
u64 events_count;
};
// Outputs a JSON-encoded string surrounded by quotes with characters escaped.
static void output_json_string(FILE *out, const char *s)
{
fputc('"', out);
while (*s) {
switch (*s) {
// required escapes with special forms as per RFC 8259
case '"': fputs("\\\"", out); break;
case '\\': fputs("\\\\", out); break;
case '\b': fputs("\\b", out); break;
case '\f': fputs("\\f", out); break;
case '\n': fputs("\\n", out); break;
case '\r': fputs("\\r", out); break;
case '\t': fputs("\\t", out); break;
default:
// all other control characters must be escaped by hex code
if (*s <= 0x1f)
fprintf(out, "\\u%04x", *s);
else
fputc(*s, out);
break;
}
++s;
}
fputc('"', out);
}
// Outputs an optional comma, newline and indentation to delimit a new value
// from the previous one in a JSON object or array.
static void output_json_delimiters(FILE *out, bool comma, int depth)
{
int i;
if (comma)
fputc(',', out);
fputc('\n', out);
for (i = 0; i < depth; ++i)
fputc('\t', out);
}
// Outputs a printf format string (with delimiter) as a JSON value.
__printf(4, 5)
static void output_json_format(FILE *out, bool comma, int depth, const char *format, ...)
{
va_list args;
output_json_delimiters(out, comma, depth);
va_start(args, format);
vfprintf(out, format, args);
va_end(args);
}
// Outputs a JSON key-value pair where the value is a string.
static void output_json_key_string(FILE *out, bool comma, int depth,
const char *key, const char *value)
{
output_json_delimiters(out, comma, depth);
output_json_string(out, key);
fputs(": ", out);
output_json_string(out, value);
}
// Outputs a JSON key-value pair where the value is a printf format string.
__printf(5, 6)
static void output_json_key_format(FILE *out, bool comma, int depth,
const char *key, const char *format, ...)
{
va_list args;
output_json_delimiters(out, comma, depth);
output_json_string(out, key);
fputs(": ", out);
va_start(args, format);
vfprintf(out, format, args);
va_end(args);
}
static void output_sample_callchain_entry(struct perf_tool *tool,
u64 ip, struct addr_location *al)
{
struct convert_json *c = container_of(tool, struct convert_json, tool);
FILE *out = c->out;
output_json_format(out, false, 4, "{");
output_json_key_format(out, false, 5, "ip", "\"0x%" PRIx64 "\"", ip);
if (al && al->sym && al->sym->namelen) {
fputc(',', out);
output_json_key_string(out, false, 5, "symbol", al->sym->name);
if (al->map && al->map->dso) {
const char *dso = al->map->dso->short_name;
if (dso && strlen(dso) > 0) {
fputc(',', out);
output_json_key_string(out, false, 5, "dso", dso);
}
}
}
output_json_format(out, false, 4, "}");
}
static int process_sample_event(struct perf_tool *tool,
union perf_event *event __maybe_unused,
struct perf_sample *sample,
struct evsel *evsel __maybe_unused,
struct machine *machine)
{
struct convert_json *c = container_of(tool, struct convert_json, tool);
FILE *out = c->out;
struct addr_location al, tal;
u8 cpumode = PERF_RECORD_MISC_USER;
if (machine__resolve(machine, &al, sample) < 0) {
pr_err("Sample resolution failed!\n");
return -1;
}
++c->events_count;
if (c->first)
c->first = false;
else
fputc(',', out);
output_json_format(out, false, 2, "{");
output_json_key_format(out, false, 3, "timestamp", "%" PRIi64, sample->time);
output_json_key_format(out, true, 3, "pid", "%i", al.thread->pid_);
output_json_key_format(out, true, 3, "tid", "%i", al.thread->tid);
if (al.thread->cpu >= 0)
output_json_key_format(out, true, 3, "cpu", "%i", al.thread->cpu);
output_json_key_string(out, true, 3, "comm", thread__comm_str(al.thread));
output_json_key_format(out, true, 3, "callchain", "[");
if (sample->callchain) {
unsigned int i;
bool ok;
bool first_callchain = true;
for (i = 0; i < sample->callchain->nr; ++i) {
u64 ip = sample->callchain->ips[i];
if (ip >= PERF_CONTEXT_MAX) {
switch (ip) {
case PERF_CONTEXT_HV:
cpumode = PERF_RECORD_MISC_HYPERVISOR;
break;
case PERF_CONTEXT_KERNEL:
cpumode = PERF_RECORD_MISC_KERNEL;
break;
case PERF_CONTEXT_USER:
cpumode = PERF_RECORD_MISC_USER;
break;
default:
pr_debug("invalid callchain context: %"
PRId64 "\n", (s64) ip);
break;
}
continue;
}
if (first_callchain)
first_callchain = false;
else
fputc(',', out);
ok = thread__find_symbol(al.thread, cpumode, ip, &tal);
output_sample_callchain_entry(tool, ip, ok ? &tal : NULL);
}
} else {
output_sample_callchain_entry(tool, sample->ip, &al);
}
output_json_format(out, false, 3, "]");
output_json_format(out, false, 2, "}");
return 0;
}
static void output_headers(struct perf_session *session, struct convert_json *c)
{
struct stat st;
struct perf_header *header = &session->header;
int ret;
int fd = perf_data__fd(session->data);
int i;
FILE *out = c->out;
output_json_key_format(out, false, 2, "header-version", "%u", header->version);
ret = fstat(fd, &st);
if (ret >= 0) {
time_t stctime = st.st_mtime;
char buf[256];
strftime(buf, sizeof(buf), "%FT%TZ", gmtime(&stctime));
output_json_key_string(out, true, 2, "captured-on", buf);
} else {
pr_debug("Failed to get mtime of source file, not writing captured-on");
}
output_json_key_format(out, true, 2, "data-offset", "%" PRIu64, header->data_offset);
output_json_key_format(out, true, 2, "data-size", "%" PRIu64, header->data_size);
output_json_key_format(out, true, 2, "feat-offset", "%" PRIu64, header->feat_offset);
output_json_key_string(out, true, 2, "hostname", header->env.hostname);
output_json_key_string(out, true, 2, "os-release", header->env.os_release);
output_json_key_string(out, true, 2, "arch", header->env.arch);
output_json_key_string(out, true, 2, "cpu-desc", header->env.cpu_desc);
output_json_key_string(out, true, 2, "cpuid", header->env.cpuid);
output_json_key_format(out, true, 2, "nrcpus-online", "%u", header->env.nr_cpus_online);
output_json_key_format(out, true, 2, "nrcpus-avail", "%u", header->env.nr_cpus_avail);
if (header->env.clock.enabled) {
output_json_key_format(out, true, 2, "clockid",
"%u", header->env.clock.clockid);
output_json_key_format(out, true, 2, "clock-time",
"%" PRIu64, header->env.clock.clockid_ns);
output_json_key_format(out, true, 2, "real-time",
"%" PRIu64, header->env.clock.tod_ns);
}
output_json_key_string(out, true, 2, "perf-version", header->env.version);
output_json_key_format(out, true, 2, "cmdline", "[");
for (i = 0; i < header->env.nr_cmdline; i++) {
output_json_delimiters(out, i != 0, 3);
output_json_string(c->out, header->env.cmdline_argv[i]);
}
output_json_format(out, false, 2, "]");
}
int bt_convert__perf2json(const char *input_name, const char *output_name,
struct perf_data_convert_opts *opts __maybe_unused)
{
struct perf_session *session;
int fd;
int ret = -1;
struct convert_json c = {
.tool = {
.sample = process_sample_event,
.mmap = perf_event__process_mmap,
.mmap2 = perf_event__process_mmap2,
.comm = perf_event__process_comm,
.namespaces = perf_event__process_namespaces,
.cgroup = perf_event__process_cgroup,
.exit = perf_event__process_exit,
.fork = perf_event__process_fork,
.lost = perf_event__process_lost,
.tracing_data = perf_event__process_tracing_data,
.build_id = perf_event__process_build_id,
.id_index = perf_event__process_id_index,
.auxtrace_info = perf_event__process_auxtrace_info,
.auxtrace = perf_event__process_auxtrace,
.event_update = perf_event__process_event_update,
.ordered_events = true,
.ordering_requires_timestamps = true,
},
.first = true,
.events_count = 0,
};
struct perf_data data = {
.mode = PERF_DATA_MODE_READ,
.path = input_name,
.force = opts->force,
};
if (opts->all) {
pr_err("--all is currently unsupported for JSON output.\n");
goto err;
}
if (opts->tod) {
pr_err("--tod is currently unsupported for JSON output.\n");
goto err;
}
fd = open(output_name, O_CREAT | O_WRONLY | (opts->force ? O_TRUNC : O_EXCL), 0666);
if (fd == -1) {
if (errno == EEXIST)
pr_err("Output file exists. Use --force to overwrite it.\n");
else
pr_err("Error opening output file!\n");
goto err;
}
c.out = fdopen(fd, "w");
if (!c.out) {
fprintf(stderr, "Error opening output file!\n");
close(fd);
goto err;
}
session = perf_session__new(&data, false, &c.tool);
if (IS_ERR(session)) {
fprintf(stderr, "Error creating perf session!\n");
goto err_fclose;
}
if (symbol__init(&session->header.env) < 0) {
fprintf(stderr, "Symbol init error!\n");
goto err_session_delete;
}
// The opening brace is printed manually because it isn't delimited from a
// previous value (i.e. we don't want a leading newline)
fputc('{', c.out);
// Version number for future-proofing. Most additions should be able to be
// done in a backwards-compatible way so this should only need to be bumped
// if some major breaking change must be made.
output_json_format(c.out, false, 1, "\"linux-perf-json-version\": 1");
// Output headers
output_json_format(c.out, true, 1, "\"headers\": {");
output_headers(session, &c);
output_json_format(c.out, false, 1, "}");
// Output samples
output_json_format(c.out, true, 1, "\"samples\": [");
perf_session__process_events(session);
output_json_format(c.out, false, 1, "]");
output_json_format(c.out, false, 0, "}");
fputc('\n', c.out);
fprintf(stderr,
"[ perf data convert: Converted '%s' into JSON data '%s' ]\n",
data.path, output_name);
fprintf(stderr,
"[ perf data convert: Converted and wrote %.3f MB (%" PRIu64 " samples) ]\n",
(ftell(c.out)) / 1024.0 / 1024.0, c.events_count);
ret = 0;
err_session_delete:
perf_session__delete(session);
err_fclose:
fclose(c.out);
err:
return ret;
}
......@@ -2,10 +2,20 @@
#ifndef __DATA_CONVERT_H
#define __DATA_CONVERT_H
#include <stdbool.h>
struct perf_data_convert_opts {
bool force;
bool all;
bool tod;
};
#ifdef HAVE_LIBBABELTRACE_SUPPORT
int bt_convert__perf2ctf(const char *input_name, const char *to_ctf,
struct perf_data_convert_opts *opts);
#endif /* HAVE_LIBBABELTRACE_SUPPORT */
int bt_convert__perf2json(const char *input_name, const char *to_ctf,
struct perf_data_convert_opts *opts);
#endif /* __DATA_CONVERT_H */
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册