提交 7104f194 编写于 作者: I Ingo Molnar

Merge tag 'perf-core-for-mingo' of...

Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

User visible changes:

 - Make 'perf record' collect CPU cache info in the perf.data file header:

  $ perf record usleep 1
  [ perf record: Woken up 1 times to write data ]
  [ perf record: Captured and wrote 0.017 MB perf.data (7 samples) ]
  $ perf report --header-only -I | tail -10 | head -8
  # CPU cache info:
  #  L1 Data                 32K [0-1]
  #  L1 Instruction          32K [0-1]
  #  L1 Data                 32K [2-3]
  #  L1 Instruction          32K [2-3]
  #  L2 Unified             256K [0-1]
  #  L2 Unified             256K [2-3]
  #  L3 Unified            4096K [0-3]
  $

  Will be used in 'perf c2c' and eventually in 'perf diff' to allow, for instance
  running the same workload in multiple machines and then when using 'diff' show
  the hardware difference. (Jiri Olsa)

 - 'perf stat' now shows shadow metrics (insn per cycle, etc) in
   interval mode too. E.g:

    # perf stat -I 1000 -e instructions,cycles sleep 1
    #         time   counts unit events
       1.000215928  519,620      instructions     #  0.69 insn per cycle
       1.000215928  752,003      cycles
    <SNIP>

Infrastructure changes:

 - libapi now can also use pr_{warning,info,debug}() and that can be
   set by tools using it (Jiri Olsa)

 - libapi adopts filename__read_str() from perf, adds sysfs__read_str() (Jiri Olsa)

 - Add check for java alternatives cmd in jvmti Makefile, so that it manages
   to automatically find the right path for the JDK devel files in Ubuntu like
   systems in addition to Fedora like ones (Stephane Eranian)
Signed-off-by: NArnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: NIngo Molnar <mingo@kernel.org>
libapi-y += fd/
libapi-y += fs/
libapi-y += cpu.o
libapi-y += debug.o
......@@ -18,6 +18,7 @@ LIBFILE = $(OUTPUT)libapi.a
CFLAGS := $(EXTRA_WARNINGS) $(EXTRA_CFLAGS)
CFLAGS += -ggdb3 -Wall -Wextra -std=gnu99 -Werror -O6 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 -fPIC
CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64
CFLAGS += -I$(srctree)/tools/lib/api
RM = rm -f
......
#ifndef __API_DEBUG_INTERNAL_H__
#define __API_DEBUG_INTERNAL_H__
#include "debug.h"
#define __pr(func, fmt, ...) \
do { \
if ((func)) \
(func)("libapi: " fmt, ##__VA_ARGS__); \
} while (0)
extern libapi_print_fn_t __pr_warning;
extern libapi_print_fn_t __pr_info;
extern libapi_print_fn_t __pr_debug;
#define pr_warning(fmt, ...) __pr(__pr_warning, fmt, ##__VA_ARGS__)
#define pr_info(fmt, ...) __pr(__pr_info, fmt, ##__VA_ARGS__)
#define pr_debug(fmt, ...) __pr(__pr_debug, fmt, ##__VA_ARGS__)
#endif /* __API_DEBUG_INTERNAL_H__ */
#include <stdio.h>
#include <stdarg.h>
#include "debug.h"
#include "debug-internal.h"
static int __base_pr(const char *format, ...)
{
va_list args;
int err;
va_start(args, format);
err = vfprintf(stderr, format, args);
va_end(args);
return err;
}
libapi_print_fn_t __pr_warning = __base_pr;
libapi_print_fn_t __pr_info = __base_pr;
libapi_print_fn_t __pr_debug;
void libapi_set_print(libapi_print_fn_t warn,
libapi_print_fn_t info,
libapi_print_fn_t debug)
{
__pr_warning = warn;
__pr_info = info;
__pr_debug = debug;
}
#ifndef __API_DEBUG_H__
#define __API_DEBUG_H__
typedef int (*libapi_print_fn_t)(const char *, ...);
void libapi_set_print(libapi_print_fn_t warn,
libapi_print_fn_t info,
libapi_print_fn_t debug);
#endif /* __API_DEBUG_H__ */
......@@ -13,6 +13,7 @@
#include <sys/mount.h>
#include "fs.h"
#include "debug-internal.h"
#define _STR(x) #x
#define STR(x) _STR(x)
......@@ -300,6 +301,56 @@ int filename__read_ull(const char *filename, unsigned long long *value)
return err;
}
#define STRERR_BUFSIZE 128 /* For the buffer size of strerror_r */
int filename__read_str(const char *filename, char **buf, size_t *sizep)
{
size_t size = 0, alloc_size = 0;
void *bf = NULL, *nbf;
int fd, n, err = 0;
char sbuf[STRERR_BUFSIZE];
fd = open(filename, O_RDONLY);
if (fd < 0)
return -errno;
do {
if (size == alloc_size) {
alloc_size += BUFSIZ;
nbf = realloc(bf, alloc_size);
if (!nbf) {
err = -ENOMEM;
break;
}
bf = nbf;
}
n = read(fd, bf + size, alloc_size - size);
if (n < 0) {
if (size) {
pr_warning("read failed %d: %s\n", errno,
strerror_r(errno, sbuf, sizeof(sbuf)));
err = 0;
} else
err = -errno;
break;
}
size += n;
} while (n > 0);
if (!err) {
*sizep = size;
*buf = bf;
} else
free(bf);
close(fd);
return err;
}
int sysfs__read_ull(const char *entry, unsigned long long *value)
{
char path[PATH_MAX];
......@@ -326,6 +377,19 @@ int sysfs__read_int(const char *entry, int *value)
return filename__read_int(path, value);
}
int sysfs__read_str(const char *entry, char **buf, size_t *sizep)
{
char path[PATH_MAX];
const char *sysfs = sysfs__mountpoint();
if (!sysfs)
return -1;
snprintf(path, sizeof(path), "%s/%s", sysfs, entry);
return filename__read_str(path, buf, sizep);
}
int sysctl__read_int(const char *sysctl, int *value)
{
char path[PATH_MAX];
......
......@@ -2,6 +2,7 @@
#define __API_FS__
#include <stdbool.h>
#include <unistd.h>
/*
* On most systems <limits.h> would have given us this, but not on some systems
......@@ -26,8 +27,10 @@ FS(tracefs)
int filename__read_int(const char *filename, int *value);
int filename__read_ull(const char *filename, unsigned long long *value);
int filename__read_str(const char *filename, char **buf, size_t *sizep);
int sysctl__read_int(const char *sysctl, int *value);
int sysfs__read_int(const char *entry, int *value);
int sysfs__read_ull(const char *entry, unsigned long long *value);
int sysfs__read_str(const char *entry, char **buf, size_t *sizep);
#endif /* __API_FS__ */
......@@ -735,6 +735,60 @@ static void aggr_printout(struct perf_evsel *evsel, int id, int nr)
}
}
struct outstate {
FILE *fh;
bool newline;
const char *prefix;
};
#define METRIC_LEN 35
static void new_line_std(void *ctx)
{
struct outstate *os = ctx;
os->newline = true;
}
static void do_new_line_std(struct outstate *os)
{
fputc('\n', os->fh);
fputs(os->prefix, os->fh);
if (stat_config.aggr_mode == AGGR_NONE)
fprintf(os->fh, " ");
if (stat_config.aggr_mode == AGGR_CORE)
fprintf(os->fh, " ");
if (stat_config.aggr_mode == AGGR_SOCKET)
fprintf(os->fh, " ");
fprintf(os->fh, " ");
}
static void print_metric_std(void *ctx, const char *color, const char *fmt,
const char *unit, double val)
{
struct outstate *os = ctx;
FILE *out = os->fh;
int n;
bool newline = os->newline;
os->newline = false;
if (unit == NULL || fmt == NULL) {
fprintf(out, "%-*s", METRIC_LEN, "");
return;
}
if (newline)
do_new_line_std(os);
n = fprintf(out, " # ");
if (color)
n += color_fprintf(out, color, fmt, val);
else
n += fprintf(out, fmt, val);
fprintf(out, " %-*s", METRIC_LEN - n - 1, unit);
}
static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg)
{
FILE *output = stat_config.output;
......@@ -793,22 +847,60 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
}
static void printout(int id, int nr, struct perf_evsel *counter, double uval)
static void printout(int id, int nr, struct perf_evsel *counter, double uval,
char *prefix, u64 run, u64 ena, double noise)
{
int cpu = cpu_map__id_to_cpu(id);
struct perf_stat_output_ctx out;
struct outstate os = {
.fh = stat_config.output,
.prefix = prefix ? prefix : ""
};
print_metric_t pm = print_metric_std;
void (*nl)(void *);
nl = new_line_std;
if (run == 0 || ena == 0) {
aggr_printout(counter, id, nr);
fprintf(stat_config.output, "%*s%s",
csv_output ? 0 : 18,
counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
csv_sep);
if (stat_config.aggr_mode == AGGR_GLOBAL)
cpu = 0;
fprintf(stat_config.output, "%-*s%s",
csv_output ? 0 : unit_width,
counter->unit, csv_sep);
fprintf(stat_config.output, "%*s",
csv_output ? 0 : -25,
perf_evsel__name(counter));
if (counter->cgrp)
fprintf(stat_config.output, "%s%s",
csv_sep, counter->cgrp->name);
print_running(run, ena);
return;
}
if (nsec_counter(counter))
nsec_printout(id, nr, counter, uval);
else
abs_printout(id, nr, counter, uval);
if (!csv_output && !stat_config.interval)
perf_stat__print_shadow_stats(stat_config.output, counter,
uval, cpu,
stat_config.aggr_mode);
out.print_metric = pm;
out.new_line = nl;
out.ctx = &os;
if (!csv_output)
perf_stat__print_shadow_stats(counter, uval,
stat_config.aggr_mode == AGGR_GLOBAL ? 0 :
cpu_map__id_to_cpu(id),
&out);
print_noise(counter, noise);
print_running(run, ena);
}
static void print_aggr(char *prefix)
......@@ -839,36 +931,8 @@ static void print_aggr(char *prefix)
if (prefix)
fprintf(output, "%s", prefix);
if (run == 0 || ena == 0) {
aggr_printout(counter, id, nr);
fprintf(output, "%*s%s",
csv_output ? 0 : 18,
counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
csv_sep);
fprintf(output, "%-*s%s",
csv_output ? 0 : unit_width,
counter->unit, csv_sep);
fprintf(output, "%*s",
csv_output ? 0 : -25,
perf_evsel__name(counter));
if (counter->cgrp)
fprintf(output, "%s%s",
csv_sep, counter->cgrp->name);
print_running(run, ena);
fputc('\n', output);
continue;
}
uval = val * counter->scale;
printout(id, nr, counter, uval);
if (!csv_output)
print_noise(counter, 1.0);
print_running(run, ena);
printout(id, nr, counter, uval, prefix, run, ena, 1.0);
fputc('\n', output);
}
}
......@@ -895,12 +959,7 @@ static void print_aggr_thread(struct perf_evsel *counter, char *prefix)
fprintf(output, "%s", prefix);
uval = val * counter->scale;
printout(thread, 0, counter, uval);
if (!csv_output)
print_noise(counter, 1.0);
print_running(run, ena);
printout(thread, 0, counter, uval, prefix, run, ena, 1.0);
fputc('\n', output);
}
}
......@@ -914,7 +973,6 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix)
FILE *output = stat_config.output;
struct perf_stat_evsel *ps = counter->priv;
double avg = avg_stats(&ps->res_stats[0]);
int scaled = counter->counts->scaled;
double uval;
double avg_enabled, avg_running;
......@@ -924,32 +982,8 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix)
if (prefix)
fprintf(output, "%s", prefix);
if (scaled == -1 || !counter->supported) {
fprintf(output, "%*s%s",
csv_output ? 0 : 18,
counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
csv_sep);
fprintf(output, "%-*s%s",
csv_output ? 0 : unit_width,
counter->unit, csv_sep);
fprintf(output, "%*s",
csv_output ? 0 : -25,
perf_evsel__name(counter));
if (counter->cgrp)
fprintf(output, "%s%s", csv_sep, counter->cgrp->name);
print_running(avg_running, avg_enabled);
fputc('\n', output);
return;
}
uval = avg * counter->scale;
printout(-1, 0, counter, uval);
print_noise(counter, avg);
print_running(avg_running, avg_enabled);
printout(-1, 0, counter, uval, prefix, avg_running, avg_enabled, avg);
fprintf(output, "\n");
}
......@@ -972,36 +1006,8 @@ static void print_counter(struct perf_evsel *counter, char *prefix)
if (prefix)
fprintf(output, "%s", prefix);
if (run == 0 || ena == 0) {
fprintf(output, "CPU%*d%s%*s%s",
csv_output ? 0 : -4,
perf_evsel__cpus(counter)->map[cpu], csv_sep,
csv_output ? 0 : 18,
counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
csv_sep);
fprintf(output, "%-*s%s",
csv_output ? 0 : unit_width,
counter->unit, csv_sep);
fprintf(output, "%*s",
csv_output ? 0 : -25,
perf_evsel__name(counter));
if (counter->cgrp)
fprintf(output, "%s%s",
csv_sep, counter->cgrp->name);
print_running(run, ena);
fputc('\n', output);
continue;
}
uval = val * counter->scale;
printout(cpu, 0, counter, uval);
if (!csv_output)
print_noise(counter, 1.0);
print_running(run, ena);
printout(cpu, 0, counter, uval, prefix, run, ena, 1.0);
fputc('\n', output);
}
......
......@@ -35,8 +35,12 @@ SOLIBEXT=so
# The following works at least on fedora 23, you may need the next
# line for other distros.
ifeq (,$(wildcard /usr/sbin/update-java-alternatives))
JDIR=$(shell alternatives --display java | tail -1 | cut -d' ' -f 5 | sed 's%/jre/bin/java.%%g')
#JDIR=$(shell /usr/sbin/update-java-alternatives -l | head -1 | cut -d ' ' -f 3)
else
JDIR=$(shell /usr/sbin/update-java-alternatives -l | head -1 | cut -d ' ' -f 3)
endif
# -lrt required in 32-bit mode for clock_gettime()
LIBS=-lelf -lrt
INCDIR=-I $(JDIR)/include -I $(JDIR)/include/linux
......
......@@ -613,6 +613,8 @@ int main(int argc, const char **argv)
*/
pthread__block_sigwinch();
perf_debug_setup();
while (1) {
static int done_help;
int was_alias = run_argv(&argc, &argv);
......
......@@ -5,6 +5,7 @@
#include <string.h>
#include <stdarg.h>
#include <stdio.h>
#include <api/debug.h>
#include "cache.h"
#include "color.h"
......@@ -22,7 +23,7 @@ int debug_ordered_events;
static int redirect_to_stderr;
int debug_data_convert;
static int _eprintf(int level, int var, const char *fmt, va_list args)
int veprintf(int level, int var, const char *fmt, va_list args)
{
int ret = 0;
......@@ -36,24 +37,19 @@ static int _eprintf(int level, int var, const char *fmt, va_list args)
return ret;
}
int veprintf(int level, int var, const char *fmt, va_list args)
{
return _eprintf(level, var, fmt, args);
}
int eprintf(int level, int var, const char *fmt, ...)
{
va_list args;
int ret;
va_start(args, fmt);
ret = _eprintf(level, var, fmt, args);
ret = veprintf(level, var, fmt, args);
va_end(args);
return ret;
}
static int __eprintf_time(u64 t, const char *fmt, va_list args)
static int veprintf_time(u64 t, const char *fmt, va_list args)
{
int ret = 0;
u64 secs, usecs, nsecs = t;
......@@ -75,7 +71,7 @@ int eprintf_time(int level, int var, u64 t, const char *fmt, ...)
if (var >= level) {
va_start(args, fmt);
ret = __eprintf_time(t, fmt, args);
ret = veprintf_time(t, fmt, args);
va_end(args);
}
......@@ -91,7 +87,7 @@ void pr_stat(const char *fmt, ...)
va_list args;
va_start(args, fmt);
_eprintf(1, verbose, fmt, args);
veprintf(1, verbose, fmt, args);
va_end(args);
eprintf(1, verbose, "\n");
}
......@@ -192,3 +188,23 @@ int perf_debug_option(const char *str)
free(s);
return 0;
}
#define DEBUG_WRAPPER(__n, __l) \
static int pr_ ## __n ## _wrapper(const char *fmt, ...) \
{ \
va_list args; \
int ret; \
\
va_start(args, fmt); \
ret = veprintf(__l, verbose, fmt, args); \
va_end(args); \
return ret; \
}
DEBUG_WRAPPER(warning, 0);
DEBUG_WRAPPER(debug, 1);
void perf_debug_setup(void)
{
libapi_set_print(pr_warning_wrapper, pr_warning_wrapper, pr_debug_wrapper);
}
......@@ -53,5 +53,6 @@ int eprintf_time(int level, int var, u64 t, const char *fmt, ...) __attribute__(
int veprintf(int level, int var, const char *fmt, va_list args);
int perf_debug_option(const char *str);
void perf_debug_setup(void);
#endif /* __PERF_DEBUG_H */
......@@ -6,6 +6,8 @@ struct perf_env perf_env;
void perf_env__exit(struct perf_env *env)
{
int i;
zfree(&env->hostname);
zfree(&env->os_release);
zfree(&env->version);
......@@ -19,6 +21,10 @@ void perf_env__exit(struct perf_env *env)
zfree(&env->numa_nodes);
zfree(&env->pmu_mappings);
zfree(&env->cpu);
for (i = 0; i < env->caches_cnt; i++)
cpu_cache_level__free(&env->caches[i]);
zfree(&env->caches);
}
int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[])
......@@ -75,3 +81,10 @@ int perf_env__read_cpu_topology_map(struct perf_env *env)
env->nr_cpus_avail = nr_cpus;
return 0;
}
void cpu_cache_level__free(struct cpu_cache_level *cache)
{
free(cache->type);
free(cache->map);
free(cache->size);
}
#ifndef __PERF_ENV_H
#define __PERF_ENV_H
#include <linux/types.h>
struct cpu_topology_map {
int socket_id;
int core_id;
};
struct cpu_cache_level {
u32 level;
u32 line_size;
u32 sets;
u32 ways;
char *type;
char *size;
char *map;
};
struct perf_env {
char *hostname;
char *os_release;
......@@ -31,6 +43,8 @@ struct perf_env {
char *numa_nodes;
char *pmu_mappings;
struct cpu_topology_map *cpu;
struct cpu_cache_level *caches;
int caches_cnt;
};
extern struct perf_env perf_env;
......@@ -41,4 +55,5 @@ int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[]);
int perf_env__read_cpu_topology_map(struct perf_env *env);
void cpu_cache_level__free(struct cpu_cache_level *cache);
#endif /* __PERF_ENV_H */
......@@ -23,6 +23,8 @@
#include "strbuf.h"
#include "build-id.h"
#include "data.h"
#include <api/fs/fs.h>
#include "asm/bug.h"
/*
* magic2 = "PERFILE2"
......@@ -868,6 +870,199 @@ static int write_auxtrace(int fd, struct perf_header *h,
return err;
}
static int cpu_cache_level__sort(const void *a, const void *b)
{
struct cpu_cache_level *cache_a = (struct cpu_cache_level *)a;
struct cpu_cache_level *cache_b = (struct cpu_cache_level *)b;
return cache_a->level - cache_b->level;
}
static bool cpu_cache_level__cmp(struct cpu_cache_level *a, struct cpu_cache_level *b)
{
if (a->level != b->level)
return false;
if (a->line_size != b->line_size)
return false;
if (a->sets != b->sets)
return false;
if (a->ways != b->ways)
return false;
if (strcmp(a->type, b->type))
return false;
if (strcmp(a->size, b->size))
return false;
if (strcmp(a->map, b->map))
return false;
return true;
}
static int cpu_cache_level__read(struct cpu_cache_level *cache, u32 cpu, u16 level)
{
char path[PATH_MAX], file[PATH_MAX];
struct stat st;
size_t len;
scnprintf(path, PATH_MAX, "devices/system/cpu/cpu%d/cache/index%d/", cpu, level);
scnprintf(file, PATH_MAX, "%s/%s", sysfs__mountpoint(), path);
if (stat(file, &st))
return 1;
scnprintf(file, PATH_MAX, "%s/level", path);
if (sysfs__read_int(file, (int *) &cache->level))
return -1;
scnprintf(file, PATH_MAX, "%s/coherency_line_size", path);
if (sysfs__read_int(file, (int *) &cache->line_size))
return -1;
scnprintf(file, PATH_MAX, "%s/number_of_sets", path);
if (sysfs__read_int(file, (int *) &cache->sets))
return -1;
scnprintf(file, PATH_MAX, "%s/ways_of_associativity", path);
if (sysfs__read_int(file, (int *) &cache->ways))
return -1;
scnprintf(file, PATH_MAX, "%s/type", path);
if (sysfs__read_str(file, &cache->type, &len))
return -1;
cache->type[len] = 0;
cache->type = rtrim(cache->type);
scnprintf(file, PATH_MAX, "%s/size", path);
if (sysfs__read_str(file, &cache->size, &len)) {
free(cache->type);
return -1;
}
cache->size[len] = 0;
cache->size = rtrim(cache->size);
scnprintf(file, PATH_MAX, "%s/shared_cpu_list", path);
if (sysfs__read_str(file, &cache->map, &len)) {
free(cache->map);
free(cache->type);
return -1;
}
cache->map[len] = 0;
cache->map = rtrim(cache->map);
return 0;
}
static void cpu_cache_level__fprintf(FILE *out, struct cpu_cache_level *c)
{
fprintf(out, "L%d %-15s %8s [%s]\n", c->level, c->type, c->size, c->map);
}
static int build_caches(struct cpu_cache_level caches[], u32 size, u32 *cntp)
{
u32 i, cnt = 0;
long ncpus;
u32 nr, cpu;
u16 level;
ncpus = sysconf(_SC_NPROCESSORS_CONF);
if (ncpus < 0)
return -1;
nr = (u32)(ncpus & UINT_MAX);
for (cpu = 0; cpu < nr; cpu++) {
for (level = 0; level < 10; level++) {
struct cpu_cache_level c;
int err;
err = cpu_cache_level__read(&c, cpu, level);
if (err < 0)
return err;
if (err == 1)
break;
for (i = 0; i < cnt; i++) {
if (cpu_cache_level__cmp(&c, &caches[i]))
break;
}
if (i == cnt)
caches[cnt++] = c;
else
cpu_cache_level__free(&c);
if (WARN_ONCE(cnt == size, "way too many cpu caches.."))
goto out;
}
}
out:
*cntp = cnt;
return 0;
}
#define MAX_CACHES 2000
static int write_cache(int fd, struct perf_header *h __maybe_unused,
struct perf_evlist *evlist __maybe_unused)
{
struct cpu_cache_level caches[MAX_CACHES];
u32 cnt = 0, i, version = 1;
int ret;
ret = build_caches(caches, MAX_CACHES, &cnt);
if (ret)
goto out;
qsort(&caches, cnt, sizeof(struct cpu_cache_level), cpu_cache_level__sort);
ret = do_write(fd, &version, sizeof(u32));
if (ret < 0)
goto out;
ret = do_write(fd, &cnt, sizeof(u32));
if (ret < 0)
goto out;
for (i = 0; i < cnt; i++) {
struct cpu_cache_level *c = &caches[i];
#define _W(v) \
ret = do_write(fd, &c->v, sizeof(u32)); \
if (ret < 0) \
goto out;
_W(level)
_W(line_size)
_W(sets)
_W(ways)
#undef _W
#define _W(v) \
ret = do_write_string(fd, (const char *) c->v); \
if (ret < 0) \
goto out;
_W(type)
_W(size)
_W(map)
#undef _W
}
out:
for (i = 0; i < cnt; i++)
cpu_cache_level__free(&caches[i]);
return ret;
}
static int write_stat(int fd __maybe_unused,
struct perf_header *h __maybe_unused,
struct perf_evlist *evlist __maybe_unused)
......@@ -1172,6 +1367,18 @@ static void print_stat(struct perf_header *ph __maybe_unused,
fprintf(fp, "# contains stat data\n");
}
static void print_cache(struct perf_header *ph __maybe_unused,
int fd __maybe_unused, FILE *fp __maybe_unused)
{
int i;
fprintf(fp, "# CPU cache info:\n");
for (i = 0; i < ph->env.caches_cnt; i++) {
fprintf(fp, "# ");
cpu_cache_level__fprintf(fp, &ph->env.caches[i]);
}
}
static void print_pmu_mappings(struct perf_header *ph, int fd __maybe_unused,
FILE *fp)
{
......@@ -1920,6 +2127,68 @@ static int process_auxtrace(struct perf_file_section *section,
return err;
}
static int process_cache(struct perf_file_section *section __maybe_unused,
struct perf_header *ph __maybe_unused, int fd __maybe_unused,
void *data __maybe_unused)
{
struct cpu_cache_level *caches;
u32 cnt, i, version;
if (readn(fd, &version, sizeof(version)) != sizeof(version))
return -1;
if (ph->needs_swap)
version = bswap_32(version);
if (version != 1)
return -1;
if (readn(fd, &cnt, sizeof(cnt)) != sizeof(cnt))
return -1;
if (ph->needs_swap)
cnt = bswap_32(cnt);
caches = zalloc(sizeof(*caches) * cnt);
if (!caches)
return -1;
for (i = 0; i < cnt; i++) {
struct cpu_cache_level c;
#define _R(v) \
if (readn(fd, &c.v, sizeof(u32)) != sizeof(u32))\
goto out_free_caches; \
if (ph->needs_swap) \
c.v = bswap_32(c.v); \
_R(level)
_R(line_size)
_R(sets)
_R(ways)
#undef _R
#define _R(v) \
c.v = do_read_string(fd, ph); \
if (!c.v) \
goto out_free_caches;
_R(type)
_R(size)
_R(map)
#undef _R
caches[i] = c;
}
ph->env.caches = caches;
ph->env.caches_cnt = cnt;
return 0;
out_free_caches:
free(caches);
return -1;
}
struct feature_ops {
int (*write)(int fd, struct perf_header *h, struct perf_evlist *evlist);
void (*print)(struct perf_header *h, int fd, FILE *fp);
......@@ -1962,6 +2231,7 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = {
FEAT_OPP(HEADER_GROUP_DESC, group_desc),
FEAT_OPP(HEADER_AUXTRACE, auxtrace),
FEAT_OPA(HEADER_STAT, stat),
FEAT_OPF(HEADER_CACHE, cache),
};
struct header_print_data {
......
......@@ -32,6 +32,7 @@ enum {
HEADER_GROUP_DESC,
HEADER_AUXTRACE,
HEADER_STAT,
HEADER_CACHE,
HEADER_LAST_FEATURE,
HEADER_FEAT_BITS = 256,
};
......
......@@ -137,9 +137,10 @@ static const char *get_ratio_color(enum grc_type type, double ratio)
return color;
}
static void print_stalled_cycles_frontend(FILE *out, int cpu,
static void print_stalled_cycles_frontend(int cpu,
struct perf_evsel *evsel
__maybe_unused, double avg)
__maybe_unused, double avg,
struct perf_stat_output_ctx *out)
{
double total, ratio = 0.0;
const char *color;
......@@ -152,14 +153,17 @@ static void print_stalled_cycles_frontend(FILE *out, int cpu,
color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio);
fprintf(out, " # ");
color_fprintf(out, color, "%6.2f%%", ratio);
fprintf(out, " frontend cycles idle ");
if (ratio)
out->print_metric(out->ctx, color, "%7.2f%%", "frontend cycles idle",
ratio);
else
out->print_metric(out->ctx, NULL, NULL, "frontend cycles idle", 0);
}
static void print_stalled_cycles_backend(FILE *out, int cpu,
static void print_stalled_cycles_backend(int cpu,
struct perf_evsel *evsel
__maybe_unused, double avg)
__maybe_unused, double avg,
struct perf_stat_output_ctx *out)
{
double total, ratio = 0.0;
const char *color;
......@@ -172,14 +176,13 @@ static void print_stalled_cycles_backend(FILE *out, int cpu,
color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio);
fprintf(out, " # ");
color_fprintf(out, color, "%6.2f%%", ratio);
fprintf(out, " backend cycles idle ");
out->print_metric(out->ctx, color, "%6.2f%%", "backend cycles idle", ratio);
}
static void print_branch_misses(FILE *out, int cpu,
static void print_branch_misses(int cpu,
struct perf_evsel *evsel __maybe_unused,
double avg)
double avg,
struct perf_stat_output_ctx *out)
{
double total, ratio = 0.0;
const char *color;
......@@ -192,14 +195,13 @@ static void print_branch_misses(FILE *out, int cpu,
color = get_ratio_color(GRC_CACHE_MISSES, ratio);
fprintf(out, " # ");
color_fprintf(out, color, "%6.2f%%", ratio);
fprintf(out, " of all branches ");
out->print_metric(out->ctx, color, "%7.2f%%", "of all branches", ratio);
}
static void print_l1_dcache_misses(FILE *out, int cpu,
static void print_l1_dcache_misses(int cpu,
struct perf_evsel *evsel __maybe_unused,
double avg)
double avg,
struct perf_stat_output_ctx *out)
{
double total, ratio = 0.0;
const char *color;
......@@ -212,14 +214,13 @@ static void print_l1_dcache_misses(FILE *out, int cpu,
color = get_ratio_color(GRC_CACHE_MISSES, ratio);
fprintf(out, " # ");
color_fprintf(out, color, "%6.2f%%", ratio);
fprintf(out, " of all L1-dcache hits ");
out->print_metric(out->ctx, color, "%7.2f%%", "of all L1-dcache hits", ratio);
}
static void print_l1_icache_misses(FILE *out, int cpu,
static void print_l1_icache_misses(int cpu,
struct perf_evsel *evsel __maybe_unused,
double avg)
double avg,
struct perf_stat_output_ctx *out)
{
double total, ratio = 0.0;
const char *color;
......@@ -231,15 +232,13 @@ static void print_l1_icache_misses(FILE *out, int cpu,
ratio = avg / total * 100.0;
color = get_ratio_color(GRC_CACHE_MISSES, ratio);
fprintf(out, " # ");
color_fprintf(out, color, "%6.2f%%", ratio);
fprintf(out, " of all L1-icache hits ");
out->print_metric(out->ctx, color, "%7.2f%%", "of all L1-icache hits", ratio);
}
static void print_dtlb_cache_misses(FILE *out, int cpu,
static void print_dtlb_cache_misses(int cpu,
struct perf_evsel *evsel __maybe_unused,
double avg)
double avg,
struct perf_stat_output_ctx *out)
{
double total, ratio = 0.0;
const char *color;
......@@ -251,15 +250,13 @@ static void print_dtlb_cache_misses(FILE *out, int cpu,
ratio = avg / total * 100.0;
color = get_ratio_color(GRC_CACHE_MISSES, ratio);
fprintf(out, " # ");
color_fprintf(out, color, "%6.2f%%", ratio);
fprintf(out, " of all dTLB cache hits ");
out->print_metric(out->ctx, color, "%7.2f%%", "of all dTLB cache hits", ratio);
}
static void print_itlb_cache_misses(FILE *out, int cpu,
static void print_itlb_cache_misses(int cpu,
struct perf_evsel *evsel __maybe_unused,
double avg)
double avg,
struct perf_stat_output_ctx *out)
{
double total, ratio = 0.0;
const char *color;
......@@ -271,15 +268,13 @@ static void print_itlb_cache_misses(FILE *out, int cpu,
ratio = avg / total * 100.0;
color = get_ratio_color(GRC_CACHE_MISSES, ratio);
fprintf(out, " # ");
color_fprintf(out, color, "%6.2f%%", ratio);
fprintf(out, " of all iTLB cache hits ");
out->print_metric(out->ctx, color, "%7.2f%%", "of all iTLB cache hits", ratio);
}
static void print_ll_cache_misses(FILE *out, int cpu,
static void print_ll_cache_misses(int cpu,
struct perf_evsel *evsel __maybe_unused,
double avg)
double avg,
struct perf_stat_output_ctx *out)
{
double total, ratio = 0.0;
const char *color;
......@@ -291,15 +286,15 @@ static void print_ll_cache_misses(FILE *out, int cpu,
ratio = avg / total * 100.0;
color = get_ratio_color(GRC_CACHE_MISSES, ratio);
fprintf(out, " # ");
color_fprintf(out, color, "%6.2f%%", ratio);
fprintf(out, " of all LL-cache hits ");
out->print_metric(out->ctx, color, "%7.2f%%", "of all LL-cache hits", ratio);
}
void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel,
double avg, int cpu, enum aggr_mode aggr)
void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
double avg, int cpu,
struct perf_stat_output_ctx *out)
{
void *ctxp = out->ctx;
print_metric_t print_metric = out->print_metric;
double total, ratio = 0.0, total2;
int ctx = evsel_context(evsel);
......@@ -307,119 +302,145 @@ void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel,
total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
if (total) {
ratio = avg / total;
fprintf(out, " # %5.2f insns per cycle ", ratio);
print_metric(ctxp, NULL, "%7.2f ",
"insn per cycle", ratio);
} else {
fprintf(out, " ");
print_metric(ctxp, NULL, NULL, "insn per cycle", 0);
}
total = avg_stats(&runtime_stalled_cycles_front_stats[ctx][cpu]);
total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[ctx][cpu]));
out->new_line(ctxp);
if (total && avg) {
ratio = total / avg;
fprintf(out, "\n");
if (aggr == AGGR_NONE)
fprintf(out, " ");
fprintf(out, " # %5.2f stalled cycles per insn", ratio);
print_metric(ctxp, NULL, "%7.2f ",
"stalled cycles per insn",
ratio);
} else {
print_metric(ctxp, NULL, NULL,
"stalled cycles per insn", 0);
}
} else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) &&
runtime_branches_stats[ctx][cpu].n != 0) {
print_branch_misses(out, cpu, evsel, avg);
} else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) {
if (runtime_branches_stats[ctx][cpu].n != 0)
print_branch_misses(cpu, evsel, avg, out);
else
print_metric(ctxp, NULL, NULL, "of all branches", 0);
} else if (
evsel->attr.type == PERF_TYPE_HW_CACHE &&
evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1D |
((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
runtime_l1_dcache_stats[ctx][cpu].n != 0) {
print_l1_dcache_misses(out, cpu, evsel, avg);
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
if (runtime_l1_dcache_stats[ctx][cpu].n != 0)
print_l1_dcache_misses(cpu, evsel, avg, out);
else
print_metric(ctxp, NULL, NULL, "of all L1-dcache hits", 0);
} else if (
evsel->attr.type == PERF_TYPE_HW_CACHE &&
evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1I |
((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
runtime_l1_icache_stats[ctx][cpu].n != 0) {
print_l1_icache_misses(out, cpu, evsel, avg);
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
if (runtime_l1_icache_stats[ctx][cpu].n != 0)
print_l1_icache_misses(cpu, evsel, avg, out);
else
print_metric(ctxp, NULL, NULL, "of all L1-icache hits", 0);
} else if (
evsel->attr.type == PERF_TYPE_HW_CACHE &&
evsel->attr.config == ( PERF_COUNT_HW_CACHE_DTLB |
((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
runtime_dtlb_cache_stats[ctx][cpu].n != 0) {
print_dtlb_cache_misses(out, cpu, evsel, avg);
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
if (runtime_dtlb_cache_stats[ctx][cpu].n != 0)
print_dtlb_cache_misses(cpu, evsel, avg, out);
else
print_metric(ctxp, NULL, NULL, "of all dTLB cache hits", 0);
} else if (
evsel->attr.type == PERF_TYPE_HW_CACHE &&
evsel->attr.config == ( PERF_COUNT_HW_CACHE_ITLB |
((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
runtime_itlb_cache_stats[ctx][cpu].n != 0) {
print_itlb_cache_misses(out, cpu, evsel, avg);
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
if (runtime_itlb_cache_stats[ctx][cpu].n != 0)
print_itlb_cache_misses(cpu, evsel, avg, out);
else
print_metric(ctxp, NULL, NULL, "of all iTLB cache hits", 0);
} else if (
evsel->attr.type == PERF_TYPE_HW_CACHE &&
evsel->attr.config == ( PERF_COUNT_HW_CACHE_LL |
((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
runtime_ll_cache_stats[ctx][cpu].n != 0) {
print_ll_cache_misses(out, cpu, evsel, avg);
} else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) &&
runtime_cacherefs_stats[ctx][cpu].n != 0) {
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
if (runtime_ll_cache_stats[ctx][cpu].n != 0)
print_ll_cache_misses(cpu, evsel, avg, out);
else
print_metric(ctxp, NULL, NULL, "of all LL-cache hits", 0);
} else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) {
total = avg_stats(&runtime_cacherefs_stats[ctx][cpu]);
if (total)
ratio = avg * 100 / total;
fprintf(out, " # %8.3f %% of all cache refs ", ratio);
if (runtime_cacherefs_stats[ctx][cpu].n != 0)
print_metric(ctxp, NULL, "%8.3f %%",
"of all cache refs", ratio);
else
print_metric(ctxp, NULL, NULL, "of all cache refs", 0);
} else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
print_stalled_cycles_frontend(out, cpu, evsel, avg);
print_stalled_cycles_frontend(cpu, evsel, avg, out);
} else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
print_stalled_cycles_backend(out, cpu, evsel, avg);
print_stalled_cycles_backend(cpu, evsel, avg, out);
} else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
total = avg_stats(&runtime_nsecs_stats[cpu]);
if (total) {
ratio = avg / total;
fprintf(out, " # %8.3f GHz ", ratio);
print_metric(ctxp, NULL, "%8.3f", "GHz", ratio);
} else {
fprintf(out, " ");
print_metric(ctxp, NULL, NULL, "Ghz", 0);
}
} else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) {
total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
if (total)
fprintf(out,
" # %5.2f%% transactional cycles ",
100.0 * (avg / total));
print_metric(ctxp, NULL,
"%7.2f%%", "transactional cycles",
100.0 * (avg / total));
else
print_metric(ctxp, NULL, NULL, "transactional cycles",
0);
} else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) {
total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
total2 = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
if (total2 < avg)
total2 = avg;
if (total)
fprintf(out,
" # %5.2f%% aborted cycles ",
print_metric(ctxp, NULL, "%7.2f%%", "aborted cycles",
100.0 * ((total2-avg) / total));
} else if (perf_stat_evsel__is(evsel, TRANSACTION_START) &&
runtime_cycles_in_tx_stats[ctx][cpu].n != 0) {
else
print_metric(ctxp, NULL, NULL, "aborted cycles", 0);
} else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) {
total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
if (avg)
ratio = total / avg;
fprintf(out, " # %8.0f cycles / transaction ", ratio);
} else if (perf_stat_evsel__is(evsel, ELISION_START) &&
runtime_cycles_in_tx_stats[ctx][cpu].n != 0) {
if (runtime_cycles_in_tx_stats[ctx][cpu].n != 0)
print_metric(ctxp, NULL, "%8.0f",
"cycles / transaction", ratio);
else
print_metric(ctxp, NULL, NULL, "cycles / transaction",
0);
} else if (perf_stat_evsel__is(evsel, ELISION_START)) {
total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
if (avg)
ratio = total / avg;
fprintf(out, " # %8.0f cycles / elision ", ratio);
print_metric(ctxp, NULL, "%8.0f", "cycles / elision", ratio);
} else if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) {
if ((ratio = avg_stats(&walltime_nsecs_stats)) != 0)
fprintf(out, " # %8.3f CPUs utilized ", avg / ratio);
print_metric(ctxp, NULL, "%8.3f", "CPUs utilized",
avg / ratio);
else
fprintf(out, " ");
print_metric(ctxp, NULL, NULL, "CPUs utilized", 0);
} else if (runtime_nsecs_stats[cpu].n != 0) {
char unit = 'M';
char unit_buf[10];
total = avg_stats(&runtime_nsecs_stats[cpu]);
......@@ -429,9 +450,9 @@ void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel,
ratio *= 1000;
unit = 'K';
}
fprintf(out, " # %8.3f %c/sec ", ratio, unit);
snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit);
print_metric(ctxp, NULL, "%8.3f", unit_buf, ratio);
} else {
fprintf(out, " ");
print_metric(ctxp, NULL, NULL, NULL, 0);
}
}
......@@ -68,11 +68,22 @@ void perf_stat_evsel_id_init(struct perf_evsel *evsel);
extern struct stats walltime_nsecs_stats;
typedef void (*print_metric_t)(void *ctx, const char *color, const char *unit,
const char *fmt, double val);
typedef void (*new_line_t )(void *ctx);
void perf_stat__reset_shadow_stats(void);
void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count,
int cpu);
void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel,
double avg, int cpu, enum aggr_mode aggr);
struct perf_stat_output_ctx {
void *ctx;
print_metric_t print_metric;
new_line_t new_line;
};
void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
double avg, int cpu,
struct perf_stat_output_ctx *out);
int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw);
void perf_evlist__free_stats(struct perf_evlist *evlist);
......
......@@ -10,6 +10,7 @@
#include <linux/err.h>
#include <traceevent/event-parse.h>
#include <api/fs/tracing_path.h>
#include <api/fs/fs.h>
#include "trace-event.h"
#include "machine.h"
#include "util.h"
......
......@@ -507,54 +507,6 @@ int parse_callchain_record(const char *arg, struct callchain_param *param)
return ret;
}
int filename__read_str(const char *filename, char **buf, size_t *sizep)
{
size_t size = 0, alloc_size = 0;
void *bf = NULL, *nbf;
int fd, n, err = 0;
char sbuf[STRERR_BUFSIZE];
fd = open(filename, O_RDONLY);
if (fd < 0)
return -errno;
do {
if (size == alloc_size) {
alloc_size += BUFSIZ;
nbf = realloc(bf, alloc_size);
if (!nbf) {
err = -ENOMEM;
break;
}
bf = nbf;
}
n = read(fd, bf + size, alloc_size - size);
if (n < 0) {
if (size) {
pr_warning("read failed %d: %s\n", errno,
strerror_r(errno, sbuf, sizeof(sbuf)));
err = 0;
} else
err = -errno;
break;
}
size += n;
} while (n > 0);
if (!err) {
*sizep = size;
*buf = bf;
} else
free(bf);
close(fd);
return err;
}
const char *get_filename_for_perf_kvm(void)
{
const char *filename;
......
......@@ -303,7 +303,6 @@ char *__get_srcline(struct dso *dso, u64 addr, struct symbol *sym,
bool show_sym, bool unwind_inlines);
void free_srcline(char *srcline);
int filename__read_str(const char *filename, char **buf, size_t *sizep);
int perf_event_paranoid(void);
void mem_bswap_64(void *src, int byte_size);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册