提交 fbe96f29 编写于 作者: S Stephane Eranian 提交者: Arnaldo Carvalho de Melo

perf tools: Make perf.data more self-descriptive (v8)

The goal of this patch is to include more information about the host
environment into the perf.data so it is more self-descriptive. Overtime,
profiles are captured on various machines and it becomes hard to track
what was recorded, on what machine and when.

This patch provides a way to solve this by extending the perf.data file
with basic information about the host machine. To add those extensions,
we leverage the feature bits capabilities of the perf.data format.  The
change is backward compatible with existing perf.data files.

We define the following useful new extensions:
 - HEADER_HOSTNAME: the hostname
 - HEADER_OSRELEASE: the kernel release number
 - HEADER_ARCH: the hw architecture
 - HEADER_CPUDESC: generic CPU description
 - HEADER_NRCPUS: number of online/avail cpus
 - HEADER_CMDLINE: perf command line
 - HEADER_VERSION: perf version
 - HEADER_TOPOLOGY: cpu topology
 - HEADER_EVENT_DESC: full event description (attrs)
 - HEADER_CPUID: easy-to-parse low level CPU identication

The small granularity for the entries is to make it easier to extend
without breaking backward compatiblity. Many entries are provided as
ASCII strings.

Perf report/script have been modified to print the basic information as
easy-to-parse ASCII strings. Extended information about CPU and NUMA
topology may be requested with the -I option.

Thanks to David Ahern for reviewing and testing the many versions of
this patch.

 $ perf report --stdio
 # ========
 # captured on : Mon Sep 26 15:22:14 2011
 # hostname : quad
 # os release : 3.1.0-rc4-tip
 # perf version : 3.1.0-rc4
 # arch : x86_64
 # nrcpus online : 4
 # nrcpus avail : 4
 # cpudesc : Intel(R) Core(TM)2 Quad CPU Q6600 @ 2.40GHz
 # cpuid : GenuineIntel,6,15,11
 # total memory : 8105360 kB
 # cmdline : /home/eranian/perfmon/official/tip/build/tools/perf/perf record date
 # event : name = cycles, type = 0, config = 0x0, config1 = 0x0, config2 = 0x0, excl_usr = 0, excl_kern = 0, id = { 29, 30, 31,
 # HEADER_CPU_TOPOLOGY info available, use -I to display
 # HEADER_NUMA_TOPOLOGY info available, use -I to display
 # ========
 #
 ...

 $ perf report --stdio -I
 # ========
 # captured on : Mon Sep 26 15:22:14 2011
 # hostname : quad
 # os release : 3.1.0-rc4-tip
 # perf version : 3.1.0-rc4
 # arch : x86_64
 # nrcpus online : 4
 # nrcpus avail : 4
 # cpudesc : Intel(R) Core(TM)2 Quad CPU Q6600 @ 2.40GHz
 # cpuid : GenuineIntel,6,15,11
 # total memory : 8105360 kB
 # cmdline : /home/eranian/perfmon/official/tip/build/tools/perf/perf record date
 # event : name = cycles, type = 0, config = 0x0, config1 = 0x0, config2 = 0x0, excl_usr = 0, excl_kern = 0, id = { 29, 30, 31,
 # sibling cores   : 0-3
 # sibling threads : 0
 # sibling threads : 1
 # sibling threads : 2
 # sibling threads : 3
 # node0 meminfo  : total = 8320608 kB, free = 7571024 kB
 # node0 cpu list : 0-3
 # ========
 #
 ...
Reviewed-by: NDavid Ahern <dsahern@gmail.com>
Tested-by: NDavid Ahern <dsahern@gmail.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Andi Kleen <ak@linux.intel.com>
Link: http://lkml.kernel.org/r/20110930134040.GA5575@quadSigned-off-by: NStephane Eranian <eranian@google.com>
[ committer notes: Use --show-info in the tools as was in the docs, rename
  perf_header_fprintf_info to perf_file_section__fprintf_info, fixup
  conflict with f69b64f7 "perf: Support setting the disassembler style" ]
Signed-off-by: NArnaldo Carvalho de Melo <acme@redhat.com>
上级 be83f5ed
......@@ -139,6 +139,12 @@ OPTIONS
--show-total-period:: Show a column with the sum of periods.
-I::
--show-info::
Display extended information about the perf.data file. This adds
information which may be very large and thus may clutter the display.
It currently includes: cpu and numa topology of the host system.
SEE ALSO
--------
linkperf:perf-stat[1]
......@@ -188,6 +188,13 @@ OPTIONS
CPUs are specified with -: 0-2. Default is to report samples on all
CPUs.
-I::
--show-info::
Display extended information about the perf.data file. This adds
information which may be very large and thus may clutter the display.
It currently includes: cpu and numa topology of the host system.
It can only be used with the perf script report mode.
SEE ALSO
--------
linkperf:perf-record[1], linkperf:perf-script-perl[1],
......
......@@ -2,3 +2,4 @@ ifndef NO_DWARF
PERF_HAVE_DWARF_REGS := 1
LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o
endif
LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/header.o
#include <sys/types.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "../../util/header.h"
#define __stringify_1(x) #x
#define __stringify(x) __stringify_1(x)
#define mfspr(rn) ({unsigned long rval; \
asm volatile("mfspr %0," __stringify(rn) \
: "=r" (rval)); rval; })
#define SPRN_PVR 0x11F /* Processor Version Register */
#define PVR_VER(pvr) (((pvr) >> 16) & 0xFFFF) /* Version field */
#define PVR_REV(pvr) (((pvr) >> 0) & 0xFFFF) /* Revison field */
int
get_cpuid(char *buffer, size_t sz)
{
unsigned long pvr;
int nb;
pvr = mfspr(SPRN_PVR);
nb = snprintf(buffer, sz, "%lu,%lu$", PVR_VER(pvr), PVR_REV(pvr));
/* look for end marker to ensure the entire data fit */
if (strchr(buffer, '$')) {
buffer[nb-1] = '\0';
return 0;
}
return -1;
}
......@@ -2,3 +2,4 @@ ifndef NO_DWARF
PERF_HAVE_DWARF_REGS := 1
LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o
endif
LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/header.o
#include <sys/types.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "../../util/header.h"
static inline void
cpuid(unsigned int op, unsigned int *a, unsigned int *b, unsigned int *c,
unsigned int *d)
{
__asm__ __volatile__ (".byte 0x53\n\tcpuid\n\t"
"movl %%ebx, %%esi\n\t.byte 0x5b"
: "=a" (*a),
"=S" (*b),
"=c" (*c),
"=d" (*d)
: "a" (op));
}
int
get_cpuid(char *buffer, size_t sz)
{
unsigned int a, b, c, d, lvl;
int family = -1, model = -1, step = -1;
int nb;
char vendor[16];
cpuid(0, &lvl, &b, &c, &d);
strncpy(&vendor[0], (char *)(&b), 4);
strncpy(&vendor[4], (char *)(&d), 4);
strncpy(&vendor[8], (char *)(&c), 4);
vendor[12] = '\0';
if (lvl >= 1) {
cpuid(1, &a, &b, &c, &d);
family = (a >> 8) & 0xf; /* bits 11 - 8 */
model = (a >> 4) & 0xf; /* Bits 7 - 4 */
step = a & 0xf;
/* extended family */
if (family == 0xf)
family += (a >> 20) & 0xff;
/* extended model */
if (family >= 0x6)
model += ((a >> 16) & 0xf) << 4;
}
nb = snprintf(buffer, sz, "%s,%u,%u,%u$", vendor, family, model, step);
/* look for end marker to ensure the entire data fit */
if (strchr(buffer, '$')) {
buffer[nb-1] = '\0';
return 0;
}
return -1;
}
......@@ -529,6 +529,19 @@ static int __cmd_record(int argc, const char **argv)
if (have_tracepoints(&evsel_list->entries))
perf_header__set_feat(&session->header, HEADER_TRACE_INFO);
perf_header__set_feat(&session->header, HEADER_HOSTNAME);
perf_header__set_feat(&session->header, HEADER_OSRELEASE);
perf_header__set_feat(&session->header, HEADER_ARCH);
perf_header__set_feat(&session->header, HEADER_CPUDESC);
perf_header__set_feat(&session->header, HEADER_NRCPUS);
perf_header__set_feat(&session->header, HEADER_EVENT_DESC);
perf_header__set_feat(&session->header, HEADER_CMDLINE);
perf_header__set_feat(&session->header, HEADER_VERSION);
perf_header__set_feat(&session->header, HEADER_CPU_TOPOLOGY);
perf_header__set_feat(&session->header, HEADER_TOTAL_MEM);
perf_header__set_feat(&session->header, HEADER_NUMA_TOPOLOGY);
perf_header__set_feat(&session->header, HEADER_CPUID);
/* 512 kiB: default amount of unprivileged mlocked memory */
if (mmap_pages == UINT_MAX)
mmap_pages = (512 * 1024) / page_size;
......@@ -800,6 +813,8 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
int err = -ENOMEM;
struct perf_evsel *pos;
perf_header__set_cmdline(argc, argv);
evsel_list = perf_evlist__new(NULL, NULL);
if (evsel_list == NULL)
return -ENOMEM;
......
......@@ -40,6 +40,7 @@ static char const *input_name = "perf.data";
static bool force, use_tui, use_stdio;
static bool hide_unresolved;
static bool dont_use_callchains;
static bool show_full_info;
static bool show_threads;
static struct perf_read_values show_threads_values;
......@@ -273,6 +274,9 @@ static int __cmd_report(void)
goto out_delete;
}
if (use_browser <= 0)
perf_session__fprintf_info(session, stdout, show_full_info);
if (show_threads)
perf_read_values_init(&show_threads_values);
......@@ -485,6 +489,8 @@ static const struct option options[] = {
OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory",
"Look for files with symbols relative to this directory"),
OPT_STRING('c', "cpu", &cpu_list, "cpu", "list of cpus to profile"),
OPT_BOOLEAN('I', "show-info", &show_full_info,
"Display extended information about perf.data file"),
OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style",
"Specify disassembler style (e.g. -M intel for intel syntax)"),
OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period,
......
......@@ -22,6 +22,7 @@ static u64 last_timestamp;
static u64 nr_unordered;
extern const struct option record_options[];
static bool no_callchain;
static bool show_full_info;
static const char *cpu_list;
static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
......@@ -1083,7 +1084,8 @@ static const struct option options[] = {
"comma separated output fields prepend with 'type:'. Valid types: hw,sw,trace,raw. Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,addr",
parse_output_fields),
OPT_STRING('c', "cpu", &cpu_list, "cpu", "list of cpus to profile"),
OPT_BOOLEAN('I', "show-info", &show_full_info,
"display extended information from perf.data file"),
OPT_END()
};
......@@ -1268,6 +1270,8 @@ int cmd_script(int argc, const char **argv, const char *prefix __used)
return -1;
}
perf_session__fprintf_info(session, stdout, show_full_info);
if (!no_callchain)
symbol_conf.use_callchain = true;
else
......
......@@ -4,7 +4,6 @@
#include "util/util.h"
#include "util/strbuf.h"
extern const char perf_version_string[];
extern const char perf_usage_string[];
extern const char perf_more_info_string[];
......
......@@ -9,18 +9,21 @@ void get_term_dimensions(struct winsize *ws);
#include "../../arch/x86/include/asm/unistd.h"
#define rmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
#define cpu_relax() asm volatile("rep; nop" ::: "memory");
#define CPUINFO_PROC "model name"
#endif
#if defined(__x86_64__)
#include "../../arch/x86/include/asm/unistd.h"
#define rmb() asm volatile("lfence" ::: "memory")
#define cpu_relax() asm volatile("rep; nop" ::: "memory");
#define CPUINFO_PROC "model name"
#endif
#ifdef __powerpc__
#include "../../arch/powerpc/include/asm/unistd.h"
#define rmb() asm volatile ("sync" ::: "memory")
#define cpu_relax() asm volatile ("" ::: "memory");
#define CPUINFO_PROC "cpu"
#endif
#ifdef __s390__
......@@ -37,30 +40,35 @@ void get_term_dimensions(struct winsize *ws);
# define rmb() asm volatile("" ::: "memory")
#endif
#define cpu_relax() asm volatile("" ::: "memory")
#define CPUINFO_PROC "cpu type"
#endif
#ifdef __hppa__
#include "../../arch/parisc/include/asm/unistd.h"
#define rmb() asm volatile("" ::: "memory")
#define cpu_relax() asm volatile("" ::: "memory");
#define CPUINFO_PROC "cpu"
#endif
#ifdef __sparc__
#include "../../arch/sparc/include/asm/unistd.h"
#define rmb() asm volatile("":::"memory")
#define cpu_relax() asm volatile("":::"memory")
#define CPUINFO_PROC "cpu"
#endif
#ifdef __alpha__
#include "../../arch/alpha/include/asm/unistd.h"
#define rmb() asm volatile("mb" ::: "memory")
#define cpu_relax() asm volatile("" ::: "memory")
#define CPUINFO_PROC "cpu model"
#endif
#ifdef __ia64__
#include "../../arch/ia64/include/asm/unistd.h"
#define rmb() asm volatile ("mf" ::: "memory")
#define cpu_relax() asm volatile ("hint @pause" ::: "memory")
#define CPUINFO_PROC "model name"
#endif
#ifdef __arm__
......@@ -71,6 +79,7 @@ void get_term_dimensions(struct winsize *ws);
*/
#define rmb() ((void(*)(void))0xffff0fa0)()
#define cpu_relax() asm volatile("":::"memory")
#define CPUINFO_PROC "Processor"
#endif
#ifdef __mips__
......@@ -83,6 +92,7 @@ void get_term_dimensions(struct winsize *ws);
: /* no input */ \
: "memory")
#define cpu_relax() asm volatile("" ::: "memory")
#define CPUINFO_PROC "cpu model"
#endif
#include <time.h>
......@@ -171,5 +181,6 @@ struct ip_callchain {
};
extern bool perf_host, perf_guest;
extern const char perf_version_string[];
#endif
此差异已折叠。
......@@ -12,6 +12,20 @@
enum {
HEADER_TRACE_INFO = 1,
HEADER_BUILD_ID,
HEADER_HOSTNAME,
HEADER_OSRELEASE,
HEADER_VERSION,
HEADER_ARCH,
HEADER_NRCPUS,
HEADER_CPUDESC,
HEADER_CPUID,
HEADER_TOTAL_MEM,
HEADER_CMDLINE,
HEADER_EVENT_DESC,
HEADER_CPU_TOPOLOGY,
HEADER_NUMA_TOPOLOGY,
HEADER_LAST_FEATURE,
};
......@@ -68,10 +82,15 @@ void perf_header__set_feat(struct perf_header *header, int feat);
void perf_header__clear_feat(struct perf_header *header, int feat);
bool perf_header__has_feat(const struct perf_header *header, int feat);
int perf_header__set_cmdline(int argc, const char **argv);
int perf_header__process_sections(struct perf_header *header, int fd,
void *data,
int (*process)(struct perf_file_section *section,
struct perf_header *ph,
int feat, int fd));
struct perf_header *ph,
int feat, int fd, void *data));
int perf_header__fprintf_info(struct perf_session *s, FILE *fp, bool full);
int build_id_cache__add_s(const char *sbuild_id, const char *debugdir,
const char *name, bool is_kallsyms);
......@@ -104,4 +123,10 @@ int perf_event__synthesize_build_id(struct dso *pos, u16 misc,
struct perf_session *session);
int perf_event__process_build_id(union perf_event *event,
struct perf_session *session);
/*
* arch specific callback
*/
int get_cpuid(char *buffer, size_t sz);
#endif /* __PERF_HEADER_H */
......@@ -1326,3 +1326,22 @@ int perf_session__cpu_bitmap(struct perf_session *session,
return 0;
}
void perf_session__fprintf_info(struct perf_session *session, FILE *fp,
bool full)
{
struct stat st;
int ret;
if (session == NULL || fp == NULL)
return;
ret = fstat(session->fd, &st);
if (ret == -1)
return;
fprintf(fp, "# ========\n");
fprintf(fp, "# captured on: %s", ctime(&st.st_ctime));
perf_header__fprintf_info(session, fp, full);
fprintf(fp, "# ========\n#\n");
}
......@@ -177,4 +177,5 @@ void perf_session__print_ip(union perf_event *event,
int perf_session__cpu_bitmap(struct perf_session *session,
const char *cpu_list, unsigned long *cpu_bitmap);
void perf_session__fprintf_info(struct perf_session *s, FILE *fp, bool full);
#endif /* __PERF_SESSION_H */
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册