提交 3d521f91 编写于 作者: L Linus Torvalds

Merge branch 'perf-core-for-linus' of...

Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip into next

Pull perf updates from Ingo Molnar:
 "The tooling changes maintained by Jiri Olsa until Arnaldo is on
  vacation:

  User visible changes:
   - Add -F option for specifying output fields (Namhyung Kim)
   - Propagate exit status of a command line workload for record command
     (Namhyung Kim)
   - Use tid for finding thread (Namhyung Kim)
   - Clarify the output of perf sched map plus small sched command
     fixes (Dongsheng Yang)
   - Wire up perf_regs and unwind support for ARM64 (Jean Pihet)
   - Factor hists statistics counts processing which in turn also fixes
     several bugs in TUI report command (Namhyung Kim)
   - Add --percentage option to control absolute/relative percentage
     output (Namhyung Kim)
   - Add --list-cmds to 'kmem', 'mem', 'lock' and 'sched', for use by
     completion scripts (Ramkumar Ramachandra)

  Development/infrastructure changes and fixes:
   - Android related fixes for pager and map dso resolving (Michael
     Lentine)
   - Add libdw DWARF post unwind support for ARM (Jean Pihet)
   - Consolidate types.h for ARM and ARM64 (Jean Pihet)
   - Fix possible null pointer dereference in session.c (Masanari Iida)
   - Cleanup, remove unused variables in map_switch_event() (Dongsheng
     Yang)
   - Remove nr_state_machine_bugs in perf latency (Dongsheng Yang)
   - Remove usage of trace_sched_wakeup(.success) (Peter Zijlstra)
   - Cleanups for perf.h header (Jiri Olsa)
   - Consolidate types.h and export.h within tools (Borislav Petkov)
   - Move u64_swap union to its single user's header, evsel.h (Borislav
     Petkov)
   - Fix for s390 to properly parse tracepoints plus test code
     (Alexander Yarygin)
   - Handle EINTR error for readn/writen (Namhyung Kim)
   - Add a test case for hists filtering (Namhyung Kim)
   - Share map_groups among threads of the same group (Arnaldo Carvalho
     de Melo, Jiri Olsa)
   - Making some code (cpu node map and report parse callchain callback)
     global to be usable by upcomming changes (Don Zickus)
   - Fix pmu object compilation error (Jiri Olsa)

  Kernel side changes:
   - intrusive uprobes fixes from Oleg Nesterov.  Since the interface is
     admin-only, and the bug only affects user-space ("any probed
     jmp/call can kill the application"), we queued these fixes via the
     development tree, as a special exception.
   - more fuzzer motivated race fixes and related refactoring and
     robustization.
   - allow PMU drivers to be built as modules.  (No actual module yet,
     because the x86 Intel uncore module wasn't ready in time for this)"

* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (114 commits)
  perf tools: Add automatic remapping of Android libraries
  perf tools: Add cat as fallback pager
  perf tests: Add a testcase for histogram output sorting
  perf tests: Factor out print_hists_*()
  perf tools: Introduce reset_output_field()
  perf tools: Get rid of obsolete hist_entry__sort_list
  perf hists: Reset width of output fields with header length
  perf tools: Skip elided sort entries
  perf top: Add --fields option to specify output fields
  perf report/tui: Fix a bug when --fields/sort is given
  perf tools: Add ->sort() member to struct sort_entry
  perf report: Add -F option to specify output fields
  perf tools: Call perf_hpp__init() before setting up GUI browsers
  perf tools: Consolidate management of default sort orders
  perf tools: Allow hpp fields to be sort keys
  perf ui: Get rid of callback from __hpp__fmt()
  perf tools: Consolidate output field handling to hpp format routines
  perf tools: Use hpp formats to sort final output
  perf tools: Support event grouping in hpp ->sort()
  perf tools: Use hpp formats to sort hist entries
  ...
......@@ -33,15 +33,27 @@ typedef u8 uprobe_opcode_t;
#define UPROBE_SWBP_INSN 0xcc
#define UPROBE_SWBP_INSN_SIZE 1
struct uprobe_xol_ops;
struct arch_uprobe {
u16 fixups;
union {
u8 insn[MAX_UINSN_BYTES];
u8 ixol[MAX_UINSN_BYTES];
};
u16 fixups;
const struct uprobe_xol_ops *ops;
union {
#ifdef CONFIG_X86_64
unsigned long rip_rela_target_address;
unsigned long rip_rela_target_address;
#endif
struct {
s32 offs;
u8 ilen;
u8 opc1;
} branch;
};
};
struct arch_uprobe_task {
......
......@@ -721,6 +721,7 @@ int perf_assign_events(struct perf_event **events, int n,
return sched.state.unassigned;
}
EXPORT_SYMBOL_GPL(perf_assign_events);
int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
{
......
......@@ -108,15 +108,31 @@ static u64 precise_store_data(u64 status)
return val;
}
static u64 precise_store_data_hsw(u64 status)
static u64 precise_store_data_hsw(struct perf_event *event, u64 status)
{
union perf_mem_data_src dse;
u64 cfg = event->hw.config & INTEL_ARCH_EVENT_MASK;
dse.val = 0;
dse.mem_op = PERF_MEM_OP_STORE;
dse.mem_lvl = PERF_MEM_LVL_NA;
/*
* L1 info only valid for following events:
*
* MEM_UOPS_RETIRED.STLB_MISS_STORES
* MEM_UOPS_RETIRED.LOCK_STORES
* MEM_UOPS_RETIRED.SPLIT_STORES
* MEM_UOPS_RETIRED.ALL_STORES
*/
if (cfg != 0x12d0 && cfg != 0x22d0 && cfg != 0x42d0 && cfg != 0x82d0)
return dse.mem_lvl;
if (status & 1)
dse.mem_lvl = PERF_MEM_LVL_L1;
dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
else
dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_MISS;
/* Nothing else supported. Sorry. */
return dse.val;
}
......@@ -887,7 +903,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
data.data_src.val = load_latency_data(pebs->dse);
else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW)
data.data_src.val =
precise_store_data_hsw(pebs->dse);
precise_store_data_hsw(event, pebs->dse);
else
data.data_src.val = precise_store_data(pebs->dse);
}
......
此差异已折叠。
......@@ -172,6 +172,7 @@ struct perf_event;
struct pmu {
struct list_head entry;
struct module *module;
struct device *dev;
const struct attribute_group **attr_groups;
const char *name;
......
......@@ -722,10 +722,10 @@ enum perf_callchain_context {
PERF_CONTEXT_MAX = (__u64)-4095,
};
#define PERF_FLAG_FD_NO_GROUP (1U << 0)
#define PERF_FLAG_FD_OUTPUT (1U << 1)
#define PERF_FLAG_PID_CGROUP (1U << 2) /* pid=cgroup id, per-cpu mode only */
#define PERF_FLAG_FD_CLOEXEC (1U << 3) /* O_CLOEXEC */
#define PERF_FLAG_FD_NO_GROUP (1UL << 0)
#define PERF_FLAG_FD_OUTPUT (1UL << 1)
#define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup id, per-cpu mode only */
#define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */
union perf_mem_data_src {
__u64 val;
......
......@@ -39,6 +39,7 @@
#include <linux/hw_breakpoint.h>
#include <linux/mm_types.h>
#include <linux/cgroup.h>
#include <linux/module.h>
#include "internal.h"
......@@ -1677,6 +1678,8 @@ event_sched_in(struct perf_event *event,
u64 tstamp = perf_event_time(event);
int ret = 0;
lockdep_assert_held(&ctx->lock);
if (event->state <= PERF_EVENT_STATE_OFF)
return 0;
......@@ -3244,9 +3247,13 @@ static void __free_event(struct perf_event *event)
if (event->ctx)
put_ctx(event->ctx);
if (event->pmu)
module_put(event->pmu->module);
call_rcu(&event->rcu_head, free_event_rcu);
}
static void free_event(struct perf_event *event)
static void _free_event(struct perf_event *event)
{
irq_work_sync(&event->pending);
......@@ -3267,42 +3274,31 @@ static void free_event(struct perf_event *event)
if (is_cgroup_event(event))
perf_detach_cgroup(event);
__free_event(event);
}
int perf_event_release_kernel(struct perf_event *event)
/*
* Used to free events which have a known refcount of 1, such as in error paths
* where the event isn't exposed yet and inherited events.
*/
static void free_event(struct perf_event *event)
{
struct perf_event_context *ctx = event->ctx;
WARN_ON_ONCE(ctx->parent_ctx);
/*
* There are two ways this annotation is useful:
*
* 1) there is a lock recursion from perf_event_exit_task
* see the comment there.
*
* 2) there is a lock-inversion with mmap_sem through
* perf_event_read_group(), which takes faults while
* holding ctx->mutex, however this is called after
* the last filedesc died, so there is no possibility
* to trigger the AB-BA case.
*/
mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING);
perf_remove_from_context(event, true);
mutex_unlock(&ctx->mutex);
free_event(event);
if (WARN(atomic_long_cmpxchg(&event->refcount, 1, 0) != 1,
"unexpected event refcount: %ld; ptr=%p\n",
atomic_long_read(&event->refcount), event)) {
/* leak to avoid use-after-free */
return;
}
return 0;
_free_event(event);
}
EXPORT_SYMBOL_GPL(perf_event_release_kernel);
/*
* Called when the last reference to the file is gone.
*/
static void put_event(struct perf_event *event)
{
struct perf_event_context *ctx = event->ctx;
struct task_struct *owner;
if (!atomic_long_dec_and_test(&event->refcount))
......@@ -3341,9 +3337,33 @@ static void put_event(struct perf_event *event)
put_task_struct(owner);
}
perf_event_release_kernel(event);
WARN_ON_ONCE(ctx->parent_ctx);
/*
* There are two ways this annotation is useful:
*
* 1) there is a lock recursion from perf_event_exit_task
* see the comment there.
*
* 2) there is a lock-inversion with mmap_sem through
* perf_event_read_group(), which takes faults while
* holding ctx->mutex, however this is called after
* the last filedesc died, so there is no possibility
* to trigger the AB-BA case.
*/
mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING);
perf_remove_from_context(event, true);
mutex_unlock(&ctx->mutex);
_free_event(event);
}
int perf_event_release_kernel(struct perf_event *event)
{
put_event(event);
return 0;
}
EXPORT_SYMBOL_GPL(perf_event_release_kernel);
static int perf_release(struct inode *inode, struct file *file)
{
put_event(file->private_data);
......@@ -6578,6 +6598,7 @@ int perf_pmu_register(struct pmu *pmu, const char *name, int type)
free_percpu(pmu->pmu_disable_count);
goto unlock;
}
EXPORT_SYMBOL_GPL(perf_pmu_register);
void perf_pmu_unregister(struct pmu *pmu)
{
......@@ -6599,6 +6620,7 @@ void perf_pmu_unregister(struct pmu *pmu)
put_device(pmu->dev);
free_pmu_context(pmu);
}
EXPORT_SYMBOL_GPL(perf_pmu_unregister);
struct pmu *perf_init_event(struct perf_event *event)
{
......@@ -6612,6 +6634,10 @@ struct pmu *perf_init_event(struct perf_event *event)
pmu = idr_find(&pmu_idr, event->attr.type);
rcu_read_unlock();
if (pmu) {
if (!try_module_get(pmu->module)) {
pmu = ERR_PTR(-ENODEV);
goto unlock;
}
event->pmu = pmu;
ret = pmu->event_init(event);
if (ret)
......@@ -6620,6 +6646,10 @@ struct pmu *perf_init_event(struct perf_event *event)
}
list_for_each_entry_rcu(pmu, &pmus, entry) {
if (!try_module_get(pmu->module)) {
pmu = ERR_PTR(-ENODEV);
goto unlock;
}
event->pmu = pmu;
ret = pmu->event_init(event);
if (!ret)
......@@ -6798,6 +6828,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
err_pmu:
if (event->destroy)
event->destroy(event);
module_put(pmu->module);
err_ns:
if (event->ns)
put_pid_ns(event->ns);
......@@ -7067,20 +7098,26 @@ SYSCALL_DEFINE5(perf_event_open,
}
}
if (task && group_leader &&
group_leader->attr.inherit != attr.inherit) {
err = -EINVAL;
goto err_task;
}
get_online_cpus();
event = perf_event_alloc(&attr, cpu, task, group_leader, NULL,
NULL, NULL);
if (IS_ERR(event)) {
err = PTR_ERR(event);
goto err_task;
goto err_cpus;
}
if (flags & PERF_FLAG_PID_CGROUP) {
err = perf_cgroup_connect(pid, event, &attr, group_leader);
if (err) {
__free_event(event);
goto err_task;
goto err_cpus;
}
}
......@@ -7242,8 +7279,9 @@ SYSCALL_DEFINE5(perf_event_open,
put_ctx(ctx);
err_alloc:
free_event(event);
err_task:
err_cpus:
put_online_cpus();
err_task:
if (task)
put_task_struct(task);
err_group_fd:
......@@ -7379,7 +7417,7 @@ __perf_event_exit_task(struct perf_event *child_event,
struct perf_event_context *child_ctx,
struct task_struct *child)
{
perf_remove_from_context(child_event, !!child_event->parent);
perf_remove_from_context(child_event, true);
/*
* It can happen that the parent exits first, and has events
......@@ -7394,7 +7432,7 @@ __perf_event_exit_task(struct perf_event *child_event,
static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
{
struct perf_event *child_event, *tmp;
struct perf_event *child_event;
struct perf_event_context *child_ctx;
unsigned long flags;
......@@ -7448,24 +7486,9 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
*/
mutex_lock(&child_ctx->mutex);
again:
list_for_each_entry_safe(child_event, tmp, &child_ctx->pinned_groups,
group_entry)
__perf_event_exit_task(child_event, child_ctx, child);
list_for_each_entry_safe(child_event, tmp, &child_ctx->flexible_groups,
group_entry)
list_for_each_entry_rcu(child_event, &child_ctx->event_list, event_entry)
__perf_event_exit_task(child_event, child_ctx, child);
/*
* If the last event was a group event, it will have appended all
* its siblings to the list, but we obtained 'tmp' before that which
* will still point to the list head terminating the iteration.
*/
if (!list_empty(&child_ctx->pinned_groups) ||
!list_empty(&child_ctx->flexible_groups))
goto again;
mutex_unlock(&child_ctx->mutex);
put_ctx(child_ctx);
......
......@@ -60,8 +60,6 @@ static struct percpu_rw_semaphore dup_mmap_sem;
/* Have a copy of original instruction */
#define UPROBE_COPY_INSN 0
/* Can skip singlestep */
#define UPROBE_SKIP_SSTEP 1
struct uprobe {
struct rb_node rb_node; /* node in the rb tree */
......@@ -491,12 +489,9 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset)
uprobe->offset = offset;
init_rwsem(&uprobe->register_rwsem);
init_rwsem(&uprobe->consumer_rwsem);
/* For now assume that the instruction need not be single-stepped */
__set_bit(UPROBE_SKIP_SSTEP, &uprobe->flags);
/* add to uprobes_tree, sorted on inode:offset */
cur_uprobe = insert_uprobe(uprobe);
/* a uprobe exists for this inode:offset combination */
if (cur_uprobe) {
kfree(uprobe);
......@@ -1628,20 +1623,6 @@ bool uprobe_deny_signal(void)
return true;
}
/*
* Avoid singlestepping the original instruction if the original instruction
* is a NOP or can be emulated.
*/
static bool can_skip_sstep(struct uprobe *uprobe, struct pt_regs *regs)
{
if (test_bit(UPROBE_SKIP_SSTEP, &uprobe->flags)) {
if (arch_uprobe_skip_sstep(&uprobe->arch, regs))
return true;
clear_bit(UPROBE_SKIP_SSTEP, &uprobe->flags);
}
return false;
}
static void mmf_recalc_uprobes(struct mm_struct *mm)
{
struct vm_area_struct *vma;
......@@ -1868,13 +1849,13 @@ static void handle_swbp(struct pt_regs *regs)
handler_chain(uprobe, regs);
if (can_skip_sstep(uprobe, regs))
if (arch_uprobe_skip_sstep(&uprobe->arch, regs))
goto out;
if (!pre_ssout(uprobe, regs, bp_vaddr))
return;
/* can_skip_sstep() succeeded, or restart if can't singlestep */
/* arch_uprobe_skip_sstep() succeeded, or restart if can't singlestep */
out:
put_uprobe(uprobe);
}
......@@ -1886,10 +1867,11 @@ static void handle_swbp(struct pt_regs *regs)
static void handle_singlestep(struct uprobe_task *utask, struct pt_regs *regs)
{
struct uprobe *uprobe;
int err = 0;
uprobe = utask->active_uprobe;
if (utask->state == UTASK_SSTEP_ACK)
arch_uprobe_post_xol(&uprobe->arch, regs);
err = arch_uprobe_post_xol(&uprobe->arch, regs);
else if (utask->state == UTASK_SSTEP_TRAPPED)
arch_uprobe_abort_xol(&uprobe->arch, regs);
else
......@@ -1903,6 +1885,11 @@ static void handle_singlestep(struct uprobe_task *utask, struct pt_regs *regs)
spin_lock_irq(&current->sighand->siglock);
recalc_sigpending(); /* see uprobe_deny_signal() */
spin_unlock_irq(&current->sighand->siglock);
if (unlikely(err)) {
uprobe_warn(current, "execute the probed insn, sending SIGILL.");
force_sig_info(SIGILL, SEND_SIG_FORCED, current);
}
}
/*
......
......@@ -1039,6 +1039,7 @@ int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
return ret;
}
EXPORT_SYMBOL_GPL(__hrtimer_start_range_ns);
/**
* hrtimer_start_range_ns - (re)start an hrtimer on the current CPU
......
......@@ -35,4 +35,6 @@
# define unlikely(x) __builtin_expect(!!(x), 0)
#endif
#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
#endif /* _TOOLS_LINUX_COMPILER_H */
#ifndef _TOOLS_LINUX_EXPORT_H_
#define _TOOLS_LINUX_EXPORT_H_
#define EXPORT_SYMBOL(sym)
#define EXPORT_SYMBOL_GPL(sym)
#define EXPORT_SYMBOL_GPL_FUTURE(sym)
#define EXPORT_UNUSED_SYMBOL(sym)
#define EXPORT_UNUSED_SYMBOL_GPL(sym)
#endif
#ifndef _LIBLOCKDEP_LINUX_TYPES_H_
#define _LIBLOCKDEP_LINUX_TYPES_H_
#ifndef _TOOLS_LINUX_TYPES_H_
#define _TOOLS_LINUX_TYPES_H_
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#define __SANE_USERSPACE_TYPES__ /* For PPC64, to get LL64 types */
#include <asm/types.h>
......@@ -10,10 +11,22 @@
struct page;
struct kmem_cache;
typedef unsigned gfp_t;
typedef enum {
GFP_KERNEL,
GFP_ATOMIC,
__GFP_HIGHMEM,
__GFP_HIGH
} gfp_t;
typedef __u64 u64;
typedef __s64 s64;
/*
* We define u64 as uint64_t for every architecture
* so that we can print it with "%"PRIx64 without getting warnings.
*
* typedef __u64 u64;
* typedef __s64 s64;
*/
typedef uint64_t u64;
typedef int64_t s64;
typedef __u32 u32;
typedef __s32 s32;
......@@ -35,6 +48,10 @@ typedef __s8 s8;
#define __bitwise
#endif
#define __force
#define __user
#define __must_check
#define __cold
typedef __u16 __bitwise __le16;
typedef __u16 __bitwise __be16;
......@@ -55,4 +72,4 @@ struct hlist_node {
struct hlist_node *next, **pprev;
};
#endif
#endif /* _TOOLS_LINUX_TYPES_H_ */
......@@ -104,7 +104,7 @@ N =
export Q VERBOSE
INCLUDES = -I. -I/usr/local/include -I./uinclude -I./include $(CONFIG_INCLUDES)
INCLUDES = -I. -I/usr/local/include -I./uinclude -I./include -I../../include $(CONFIG_INCLUDES)
# Set compile option CFLAGS if not set elsewhere
CFLAGS ?= -g -DCONFIG_LOCKDEP -DCONFIG_STACKTRACE -DCONFIG_PROVE_LOCKING -DBITS_PER_LONG=__WORDSIZE -DLIBLOCKDEP_VERSION='"$(LIBLOCKDEP_VERSION)"' -rdynamic -O0 -g
......
#ifndef _LIBLOCKDEP_LINUX_EXPORT_H_
#define _LIBLOCKDEP_LINUX_EXPORT_H_
#define EXPORT_SYMBOL(sym)
#define EXPORT_SYMBOL_GPL(sym)
#endif
......@@ -33,21 +33,25 @@ OPTIONS
-d::
--dsos=::
Only consider symbols in these dsos. CSV that understands
file://filename entries.
file://filename entries. This option will affect the percentage
of the Baseline/Delta column. See --percentage for more info.
-C::
--comms=::
Only consider symbols in these comms. CSV that understands
file://filename entries.
file://filename entries. This option will affect the percentage
of the Baseline/Delta column. See --percentage for more info.
-S::
--symbols=::
Only consider these symbols. CSV that understands
file://filename entries.
file://filename entries. This option will affect the percentage
of the Baseline/Delta column. See --percentage for more info.
-s::
--sort=::
Sort by key(s): pid, comm, dso, symbol.
Sort by key(s): pid, comm, dso, symbol, cpu, parent, srcline.
Please see description of --sort in the perf-report man page.
-t::
--field-separator=::
......@@ -89,6 +93,14 @@ OPTIONS
--order::
Specify compute sorting column number.
--percentage::
Determine how to display the overhead percentage of filtered entries.
Filters can be applied by --comms, --dsos and/or --symbols options.
"relative" means it's relative to filtered entries only so that the
sum of shown entries will be always 100%. "absolute" means it retains
the original value before and after the filter is applied.
COMPARISON
----------
The comparison is governed by the baseline file. The baseline perf.data
......@@ -157,6 +169,10 @@ with:
- period_percent being the % of the hist entry period value within
single data file
- with filtering by -C, -d and/or -S, period_percent might be changed
relative to how entries are filtered. Use --percentage=absolute to
prevent such fluctuation.
ratio
~~~~~
If specified the 'Ratio' column is displayed with value 'r' computed as:
......@@ -187,4 +203,4 @@ If specified the 'Weighted diff' column is displayed with value 'd' computed as:
SEE ALSO
--------
linkperf:perf-record[1]
linkperf:perf-record[1], linkperf:perf-report[1]
......@@ -25,10 +25,6 @@ OPTIONS
--verbose::
Be more verbose. (show symbol address, etc)
-d::
--dsos=::
Only consider symbols in these dsos. CSV that understands
file://filename entries.
-n::
--show-nr-samples::
Show the number of samples for each symbol
......@@ -42,11 +38,18 @@ OPTIONS
-c::
--comms=::
Only consider symbols in these comms. CSV that understands
file://filename entries.
file://filename entries. This option will affect the percentage of
the overhead column. See --percentage for more info.
-d::
--dsos=::
Only consider symbols in these dsos. CSV that understands
file://filename entries. This option will affect the percentage of
the overhead column. See --percentage for more info.
-S::
--symbols=::
Only consider these symbols. CSV that understands
file://filename entries.
file://filename entries. This option will affect the percentage of
the overhead column. See --percentage for more info.
--symbol-filter=::
Only show symbols that match (partially) with this filter.
......@@ -76,6 +79,15 @@ OPTIONS
abort cost. This is the global weight.
- local_weight: Local weight version of the weight above.
- transaction: Transaction abort flags.
- overhead: Overhead percentage of sample
- overhead_sys: Overhead percentage of sample running in system mode
- overhead_us: Overhead percentage of sample running in user mode
- overhead_guest_sys: Overhead percentage of sample running in system mode
on guest machine
- overhead_guest_us: Overhead percentage of sample running in user mode on
guest machine
- sample: Number of sample
- period: Raw number of event count of sample
By default, comm, dso and symbol keys are used.
(i.e. --sort comm,dso,symbol)
......@@ -95,6 +107,16 @@ OPTIONS
And default sort keys are changed to comm, dso_from, symbol_from, dso_to
and symbol_to, see '--branch-stack'.
-F::
--fields=::
Specify output field - multiple keys can be specified in CSV format.
Following fields are available:
overhead, overhead_sys, overhead_us, sample and period.
Also it can contain any sort key(s).
By default, every sort keys not specified in -F will be appended
automatically.
-p::
--parent=<regex>::
A regex filter to identify parent. The parent is a caller of this
......@@ -237,6 +259,15 @@ OPTIONS
Do not show entries which have an overhead under that percent.
(Default: 0).
--percentage::
Determine how to display the overhead percentage of filtered entries.
Filters can be applied by --comms, --dsos and/or --symbols options and
Zoom operations on the TUI (thread, dso, etc).
"relative" means it's relative to filtered entries only so that the
sum of shown entries will be always 100%. "absolute" means it retains
the original value before and after the filter is applied.
--header::
Show header information in the perf.data file. This includes
various information like hostname, OS and perf version, cpu/mem
......
......@@ -113,7 +113,17 @@ Default is to monitor all CPUS.
-s::
--sort::
Sort by key(s): pid, comm, dso, symbol, parent, srcline, weight,
local_weight, abort, in_tx, transaction
local_weight, abort, in_tx, transaction, overhead, sample, period.
Please see description of --sort in the perf-report man page.
--fields=::
Specify output field - multiple keys can be specified in CSV format.
Following fields are available:
overhead, overhead_sys, overhead_us, sample and period.
Also it can contain any sort key(s).
By default, every sort keys not specified in --field will be appended
automatically.
-n::
--show-nr-samples::
......@@ -123,13 +133,16 @@ Default is to monitor all CPUS.
Show a column with the sum of periods.
--dsos::
Only consider symbols in these dsos.
Only consider symbols in these dsos. This option will affect the
percentage of the overhead column. See --percentage for more info.
--comms::
Only consider symbols in these comms.
Only consider symbols in these comms. This option will affect the
percentage of the overhead column. See --percentage for more info.
--symbols::
Only consider these symbols.
Only consider these symbols. This option will affect the
percentage of the overhead column. See --percentage for more info.
-M::
--disassembler-style=:: Set disassembler style for objdump.
......@@ -165,6 +178,15 @@ Default is to monitor all CPUS.
Do not show entries which have an overhead under that percent.
(Default: 0).
--percentage::
Determine how to display the overhead percentage of filtered entries.
Filters can be applied by --comms, --dsos and/or --symbols options and
Zoom operations on the TUI (thread, dso, etc).
"relative" means it's relative to filtered entries only so that the
sum of shown entries will be always 100%. "absolute" means it retains
the original value before and after the filter is applied.
INTERACTIVE PROMPTING KEYS
--------------------------
......@@ -200,4 +222,4 @@ Pressing any unmapped key displays a menu, and prompts for input.
SEE ALSO
--------
linkperf:perf-stat[1], linkperf:perf-list[1]
linkperf:perf-stat[1], linkperf:perf-list[1], linkperf:perf-report[1]
......@@ -7,6 +7,8 @@ tools/lib/symbol/kallsyms.h
tools/include/asm/bug.h
tools/include/linux/compiler.h
tools/include/linux/hash.h
tools/include/linux/export.h
tools/include/linux/types.h
include/linux/const.h
include/linux/perf_event.h
include/linux/rbtree.h
......
......@@ -222,12 +222,12 @@ LIB_H += util/include/linux/const.h
LIB_H += util/include/linux/ctype.h
LIB_H += util/include/linux/kernel.h
LIB_H += util/include/linux/list.h
LIB_H += util/include/linux/export.h
LIB_H += ../include/linux/export.h
LIB_H += util/include/linux/poison.h
LIB_H += util/include/linux/rbtree.h
LIB_H += util/include/linux/rbtree_augmented.h
LIB_H += util/include/linux/string.h
LIB_H += util/include/linux/types.h
LIB_H += ../include/linux/types.h
LIB_H += util/include/linux/linkage.h
LIB_H += util/include/asm/asm-offsets.h
LIB_H += ../include/asm/bug.h
......@@ -252,7 +252,6 @@ LIB_H += util/event.h
LIB_H += util/evsel.h
LIB_H += util/evlist.h
LIB_H += util/exec_cmd.h
LIB_H += util/types.h
LIB_H += util/levenshtein.h
LIB_H += util/machine.h
LIB_H += util/map.h
......@@ -397,7 +396,10 @@ LIB_OBJS += $(OUTPUT)tests/rdpmc.o
LIB_OBJS += $(OUTPUT)tests/evsel-roundtrip-name.o
LIB_OBJS += $(OUTPUT)tests/evsel-tp-sched.o
LIB_OBJS += $(OUTPUT)tests/pmu.o
LIB_OBJS += $(OUTPUT)tests/hists_common.o
LIB_OBJS += $(OUTPUT)tests/hists_link.o
LIB_OBJS += $(OUTPUT)tests/hists_filter.o
LIB_OBJS += $(OUTPUT)tests/hists_output.o
LIB_OBJS += $(OUTPUT)tests/python-use.o
LIB_OBJS += $(OUTPUT)tests/bp_signal.o
LIB_OBJS += $(OUTPUT)tests/bp_signal_overflow.o
......@@ -410,10 +412,12 @@ LIB_OBJS += $(OUTPUT)tests/code-reading.o
LIB_OBJS += $(OUTPUT)tests/sample-parsing.o
LIB_OBJS += $(OUTPUT)tests/parse-no-sample-id-all.o
ifndef NO_DWARF_UNWIND
ifeq ($(ARCH),x86)
ifeq ($(ARCH),$(filter $(ARCH),x86 arm))
LIB_OBJS += $(OUTPUT)tests/dwarf-unwind.o
endif
endif
LIB_OBJS += $(OUTPUT)tests/mmap-thread-lookup.o
LIB_OBJS += $(OUTPUT)tests/thread-mg-share.o
BUILTIN_OBJS += $(OUTPUT)builtin-annotate.o
BUILTIN_OBJS += $(OUTPUT)builtin-bench.o
......
......@@ -5,3 +5,10 @@ endif
ifndef NO_LIBUNWIND
LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libunwind.o
endif
ifndef NO_LIBDW_DWARF_UNWIND
LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libdw.o
endif
ifndef NO_DWARF_UNWIND
LIB_OBJS += $(OUTPUT)arch/$(ARCH)/tests/regs_load.o
LIB_OBJS += $(OUTPUT)arch/$(ARCH)/tests/dwarf-unwind.o
endif
......@@ -2,10 +2,15 @@
#define ARCH_PERF_REGS_H
#include <stdlib.h>
#include "../../util/types.h"
#include <linux/types.h>
#include <asm/perf_regs.h>
void perf_regs_load(u64 *regs);
#define PERF_REGS_MASK ((1ULL << PERF_REG_ARM_MAX) - 1)
#define PERF_REGS_MAX PERF_REG_ARM_MAX
#define PERF_SAMPLE_REGS_ABI PERF_SAMPLE_REGS_ABI_32
#define PERF_REG_IP PERF_REG_ARM_PC
#define PERF_REG_SP PERF_REG_ARM_SP
......
#include <string.h>
#include "perf_regs.h"
#include "thread.h"
#include "map.h"
#include "event.h"
#include "tests/tests.h"
#define STACK_SIZE 8192
static int sample_ustack(struct perf_sample *sample,
struct thread *thread, u64 *regs)
{
struct stack_dump *stack = &sample->user_stack;
struct map *map;
unsigned long sp;
u64 stack_size, *buf;
buf = malloc(STACK_SIZE);
if (!buf) {
pr_debug("failed to allocate sample uregs data\n");
return -1;
}
sp = (unsigned long) regs[PERF_REG_ARM_SP];
map = map_groups__find(thread->mg, MAP__VARIABLE, (u64) sp);
if (!map) {
pr_debug("failed to get stack map\n");
free(buf);
return -1;
}
stack_size = map->end - sp;
stack_size = stack_size > STACK_SIZE ? STACK_SIZE : stack_size;
memcpy(buf, (void *) sp, stack_size);
stack->data = (char *) buf;
stack->size = stack_size;
return 0;
}
int test__arch_unwind_sample(struct perf_sample *sample,
struct thread *thread)
{
struct regs_dump *regs = &sample->user_regs;
u64 *buf;
buf = calloc(1, sizeof(u64) * PERF_REGS_MAX);
if (!buf) {
pr_debug("failed to allocate sample uregs data\n");
return -1;
}
perf_regs_load(buf);
regs->abi = PERF_SAMPLE_REGS_ABI;
regs->regs = buf;
regs->mask = PERF_REGS_MASK;
return sample_ustack(sample, thread, buf);
}
#include <linux/linkage.h>
#define R0 0x00
#define R1 0x08
#define R2 0x10
#define R3 0x18
#define R4 0x20
#define R5 0x28
#define R6 0x30
#define R7 0x38
#define R8 0x40
#define R9 0x48
#define SL 0x50
#define FP 0x58
#define IP 0x60
#define SP 0x68
#define LR 0x70
#define PC 0x78
/*
* Implementation of void perf_regs_load(u64 *regs);
*
* This functions fills in the 'regs' buffer from the actual registers values,
* in the way the perf built-in unwinding test expects them:
* - the PC at the time at the call to this function. Since this function
* is called using a bl instruction, the PC value is taken from LR.
* The built-in unwinding test then unwinds the call stack from the dwarf
* information in unwind__get_entries.
*
* Notes:
* - the 8 bytes stride in the registers offsets comes from the fact
* that the registers are stored in an u64 array (u64 *regs),
* - the regs buffer needs to be zeroed before the call to this function,
* in this case using a calloc in dwarf-unwind.c.
*/
.text
.type perf_regs_load,%function
ENTRY(perf_regs_load)
str r0, [r0, #R0]
str r1, [r0, #R1]
str r2, [r0, #R2]
str r3, [r0, #R3]
str r4, [r0, #R4]
str r5, [r0, #R5]
str r6, [r0, #R6]
str r7, [r0, #R7]
str r8, [r0, #R8]
str r9, [r0, #R9]
str sl, [r0, #SL]
str fp, [r0, #FP]
str ip, [r0, #IP]
str sp, [r0, #SP]
str lr, [r0, #LR]
str lr, [r0, #PC] // store pc as lr in order to skip the call
// to this function
mov pc, lr
ENDPROC(perf_regs_load)
#include <elfutils/libdwfl.h>
#include "../../util/unwind-libdw.h"
#include "../../util/perf_regs.h"
bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg)
{
struct unwind_info *ui = arg;
struct regs_dump *user_regs = &ui->sample->user_regs;
Dwarf_Word dwarf_regs[PERF_REG_ARM_MAX];
#define REG(r) ({ \
Dwarf_Word val = 0; \
perf_reg_value(&val, user_regs, PERF_REG_ARM_##r); \
val; \
})
dwarf_regs[0] = REG(R0);
dwarf_regs[1] = REG(R1);
dwarf_regs[2] = REG(R2);
dwarf_regs[3] = REG(R3);
dwarf_regs[4] = REG(R4);
dwarf_regs[5] = REG(R5);
dwarf_regs[6] = REG(R6);
dwarf_regs[7] = REG(R7);
dwarf_regs[8] = REG(R8);
dwarf_regs[9] = REG(R9);
dwarf_regs[10] = REG(R10);
dwarf_regs[11] = REG(FP);
dwarf_regs[12] = REG(IP);
dwarf_regs[13] = REG(SP);
dwarf_regs[14] = REG(LR);
dwarf_regs[15] = REG(PC);
return dwfl_thread_state_registers(thread, 0, PERF_REG_ARM_MAX,
dwarf_regs);
}
ifndef NO_DWARF
PERF_HAVE_DWARF_REGS := 1
LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o
endif
ifndef NO_LIBUNWIND
LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libunwind.o
endif
#ifndef ARCH_PERF_REGS_H
#define ARCH_PERF_REGS_H
#include <stdlib.h>
#include <linux/types.h>
#include <asm/perf_regs.h>
#define PERF_REGS_MASK ((1ULL << PERF_REG_ARM64_MAX) - 1)
#define PERF_REG_IP PERF_REG_ARM64_PC
#define PERF_REG_SP PERF_REG_ARM64_SP
static inline const char *perf_reg_name(int id)
{
switch (id) {
case PERF_REG_ARM64_X0:
return "x0";
case PERF_REG_ARM64_X1:
return "x1";
case PERF_REG_ARM64_X2:
return "x2";
case PERF_REG_ARM64_X3:
return "x3";
case PERF_REG_ARM64_X4:
return "x4";
case PERF_REG_ARM64_X5:
return "x5";
case PERF_REG_ARM64_X6:
return "x6";
case PERF_REG_ARM64_X7:
return "x7";
case PERF_REG_ARM64_X8:
return "x8";
case PERF_REG_ARM64_X9:
return "x9";
case PERF_REG_ARM64_X10:
return "x10";
case PERF_REG_ARM64_X11:
return "x11";
case PERF_REG_ARM64_X12:
return "x12";
case PERF_REG_ARM64_X13:
return "x13";
case PERF_REG_ARM64_X14:
return "x14";
case PERF_REG_ARM64_X15:
return "x15";
case PERF_REG_ARM64_X16:
return "x16";
case PERF_REG_ARM64_X17:
return "x17";
case PERF_REG_ARM64_X18:
return "x18";
case PERF_REG_ARM64_X19:
return "x19";
case PERF_REG_ARM64_X20:
return "x20";
case PERF_REG_ARM64_X21:
return "x21";
case PERF_REG_ARM64_X22:
return "x22";
case PERF_REG_ARM64_X23:
return "x23";
case PERF_REG_ARM64_X24:
return "x24";
case PERF_REG_ARM64_X25:
return "x25";
case PERF_REG_ARM64_X26:
return "x26";
case PERF_REG_ARM64_X27:
return "x27";
case PERF_REG_ARM64_X28:
return "x28";
case PERF_REG_ARM64_X29:
return "x29";
case PERF_REG_ARM64_SP:
return "sp";
case PERF_REG_ARM64_LR:
return "lr";
case PERF_REG_ARM64_PC:
return "pc";
default:
return NULL;
}
return NULL;
}
#endif /* ARCH_PERF_REGS_H */
/*
* Mapping of DWARF debug register numbers into register names.
*
* Copyright (C) 2010 Will Deacon, ARM Ltd.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <stddef.h>
#include <dwarf-regs.h>
struct pt_regs_dwarfnum {
const char *name;
unsigned int dwarfnum;
};
#define STR(s) #s
#define REG_DWARFNUM_NAME(r, num) {.name = r, .dwarfnum = num}
#define GPR_DWARFNUM_NAME(num) \
{.name = STR(%x##num), .dwarfnum = num}
#define REG_DWARFNUM_END {.name = NULL, .dwarfnum = 0}
/*
* Reference:
* http://infocenter.arm.com/help/topic/com.arm.doc.ihi0057b/IHI0057B_aadwarf64.pdf
*/
static const struct pt_regs_dwarfnum regdwarfnum_table[] = {
GPR_DWARFNUM_NAME(0),
GPR_DWARFNUM_NAME(1),
GPR_DWARFNUM_NAME(2),
GPR_DWARFNUM_NAME(3),
GPR_DWARFNUM_NAME(4),
GPR_DWARFNUM_NAME(5),
GPR_DWARFNUM_NAME(6),
GPR_DWARFNUM_NAME(7),
GPR_DWARFNUM_NAME(8),
GPR_DWARFNUM_NAME(9),
GPR_DWARFNUM_NAME(10),
GPR_DWARFNUM_NAME(11),
GPR_DWARFNUM_NAME(12),
GPR_DWARFNUM_NAME(13),
GPR_DWARFNUM_NAME(14),
GPR_DWARFNUM_NAME(15),
GPR_DWARFNUM_NAME(16),
GPR_DWARFNUM_NAME(17),
GPR_DWARFNUM_NAME(18),
GPR_DWARFNUM_NAME(19),
GPR_DWARFNUM_NAME(20),
GPR_DWARFNUM_NAME(21),
GPR_DWARFNUM_NAME(22),
GPR_DWARFNUM_NAME(23),
GPR_DWARFNUM_NAME(24),
GPR_DWARFNUM_NAME(25),
GPR_DWARFNUM_NAME(26),
GPR_DWARFNUM_NAME(27),
GPR_DWARFNUM_NAME(28),
GPR_DWARFNUM_NAME(29),
REG_DWARFNUM_NAME("%lr", 30),
REG_DWARFNUM_NAME("%sp", 31),
REG_DWARFNUM_END,
};
/**
* get_arch_regstr() - lookup register name from it's DWARF register number
* @n: the DWARF register number
*
* get_arch_regstr() returns the name of the register in struct
* regdwarfnum_table from it's DWARF register number. If the register is not
* found in the table, this returns NULL;
*/
const char *get_arch_regstr(unsigned int n)
{
const struct pt_regs_dwarfnum *roff;
for (roff = regdwarfnum_table; roff->name != NULL; roff++)
if (roff->dwarfnum == n)
return roff->name;
return NULL;
}
#include <errno.h>
#include <libunwind.h>
#include "perf_regs.h"
#include "../../util/unwind.h"
int libunwind__arch_reg_id(int regnum)
{
switch (regnum) {
case UNW_AARCH64_X0:
return PERF_REG_ARM64_X0;
case UNW_AARCH64_X1:
return PERF_REG_ARM64_X1;
case UNW_AARCH64_X2:
return PERF_REG_ARM64_X2;
case UNW_AARCH64_X3:
return PERF_REG_ARM64_X3;
case UNW_AARCH64_X4:
return PERF_REG_ARM64_X4;
case UNW_AARCH64_X5:
return PERF_REG_ARM64_X5;
case UNW_AARCH64_X6:
return PERF_REG_ARM64_X6;
case UNW_AARCH64_X7:
return PERF_REG_ARM64_X7;
case UNW_AARCH64_X8:
return PERF_REG_ARM64_X8;
case UNW_AARCH64_X9:
return PERF_REG_ARM64_X9;
case UNW_AARCH64_X10:
return PERF_REG_ARM64_X10;
case UNW_AARCH64_X11:
return PERF_REG_ARM64_X11;
case UNW_AARCH64_X12:
return PERF_REG_ARM64_X12;
case UNW_AARCH64_X13:
return PERF_REG_ARM64_X13;
case UNW_AARCH64_X14:
return PERF_REG_ARM64_X14;
case UNW_AARCH64_X15:
return PERF_REG_ARM64_X15;
case UNW_AARCH64_X16:
return PERF_REG_ARM64_X16;
case UNW_AARCH64_X17:
return PERF_REG_ARM64_X17;
case UNW_AARCH64_X18:
return PERF_REG_ARM64_X18;
case UNW_AARCH64_X19:
return PERF_REG_ARM64_X19;
case UNW_AARCH64_X20:
return PERF_REG_ARM64_X20;
case UNW_AARCH64_X21:
return PERF_REG_ARM64_X21;
case UNW_AARCH64_X22:
return PERF_REG_ARM64_X22;
case UNW_AARCH64_X23:
return PERF_REG_ARM64_X23;
case UNW_AARCH64_X24:
return PERF_REG_ARM64_X24;
case UNW_AARCH64_X25:
return PERF_REG_ARM64_X25;
case UNW_AARCH64_X26:
return PERF_REG_ARM64_X26;
case UNW_AARCH64_X27:
return PERF_REG_ARM64_X27;
case UNW_AARCH64_X28:
return PERF_REG_ARM64_X28;
case UNW_AARCH64_X29:
return PERF_REG_ARM64_X29;
case UNW_AARCH64_X30:
return PERF_REG_ARM64_LR;
case UNW_AARCH64_SP:
return PERF_REG_ARM64_SP;
case UNW_AARCH64_PC:
return PERF_REG_ARM64_PC;
default:
pr_err("unwind: invalid reg id %d\n", regnum);
return -EINVAL;
}
return -EINVAL;
}
......@@ -2,7 +2,7 @@
#define ARCH_PERF_REGS_H
#include <stdlib.h>
#include "../../util/types.h"
#include <linux/types.h>
#include <asm/perf_regs.h>
void perf_regs_load(u64 *regs);
......
......@@ -23,7 +23,7 @@ static int sample_ustack(struct perf_sample *sample,
sp = (unsigned long) regs[PERF_REG_X86_SP];
map = map_groups__find(&thread->mg, MAP__VARIABLE, (u64) sp);
map = map_groups__find(thread->mg, MAP__VARIABLE, (u64) sp);
if (!map) {
pr_debug("failed to get stack map\n");
free(buf);
......
......@@ -4,7 +4,7 @@
#include <linux/perf_event.h>
#include "../../perf.h"
#include "../../util/types.h"
#include <linux/types.h>
#include "../../util/debug.h"
#include "tsc.h"
......
#ifndef TOOLS_PERF_ARCH_X86_UTIL_TSC_H__
#define TOOLS_PERF_ARCH_X86_UTIL_TSC_H__
#include "../../util/types.h"
#include <linux/types.h>
struct perf_tsc_conversion {
u16 time_shift;
......
......@@ -46,7 +46,7 @@ struct perf_annotate {
};
static int perf_evsel__add_sample(struct perf_evsel *evsel,
struct perf_sample *sample,
struct perf_sample *sample __maybe_unused,
struct addr_location *al,
struct perf_annotate *ann)
{
......@@ -70,7 +70,6 @@ static int perf_evsel__add_sample(struct perf_evsel *evsel,
return -ENOMEM;
ret = hist_entry__inc_addr_samples(he, evsel->idx, al->addr);
evsel->hists.stats.total_period += sample->period;
hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
return ret;
}
......
......@@ -60,7 +60,6 @@ static int data__files_cnt;
#define data__for_each_file(i, d) data__for_each_file_start(i, d, 0)
#define data__for_each_file_new(i, d) data__for_each_file_start(i, d, 1)
static char diff__default_sort_order[] = "dso,symbol";
static bool force;
static bool show_period;
static bool show_formula;
......@@ -220,7 +219,8 @@ static int setup_compute(const struct option *opt, const char *str,
static double period_percent(struct hist_entry *he, u64 period)
{
u64 total = he->hists->stats.total_period;
u64 total = hists__total_period(he->hists);
return (period * 100.0) / total;
}
......@@ -259,11 +259,18 @@ static s64 compute_wdiff(struct hist_entry *he, struct hist_entry *pair)
static int formula_delta(struct hist_entry *he, struct hist_entry *pair,
char *buf, size_t size)
{
u64 he_total = he->hists->stats.total_period;
u64 pair_total = pair->hists->stats.total_period;
if (symbol_conf.filter_relative) {
he_total = he->hists->stats.total_non_filtered_period;
pair_total = pair->hists->stats.total_non_filtered_period;
}
return scnprintf(buf, size,
"(%" PRIu64 " * 100 / %" PRIu64 ") - "
"(%" PRIu64 " * 100 / %" PRIu64 ")",
pair->stat.period, pair->hists->stats.total_period,
he->stat.period, he->hists->stats.total_period);
pair->stat.period, pair_total,
he->stat.period, he_total);
}
static int formula_ratio(struct hist_entry *he, struct hist_entry *pair,
......@@ -327,16 +334,22 @@ static int diff__process_sample_event(struct perf_tool *tool __maybe_unused,
return -1;
}
if (al.filtered)
return 0;
if (hists__add_entry(&evsel->hists, &al, sample->period,
sample->weight, sample->transaction)) {
pr_warning("problem incrementing symbol period, skipping event\n");
return -1;
}
/*
* The total_period is updated here before going to the output
* tree since normally only the baseline hists will call
* hists__output_resort() and precompute needs the total
* period in order to sort entries by percentage delta.
*/
evsel->hists.stats.total_period += sample->period;
if (!al.filtered)
evsel->hists.stats.total_non_filtered_period += sample->period;
return 0;
}
......@@ -564,8 +577,7 @@ static void hists__compute_resort(struct hists *hists)
hists->entries = RB_ROOT;
next = rb_first(root);
hists->nr_entries = 0;
hists->stats.total_period = 0;
hists__reset_stats(hists);
hists__reset_col_len(hists);
while (next != NULL) {
......@@ -575,7 +587,10 @@ static void hists__compute_resort(struct hists *hists)
next = rb_next(&he->rb_node_in);
insert_hist_entry_by_compute(&hists->entries, he, compute);
hists__inc_nr_entries(hists, he);
hists__inc_stats(hists, he);
if (!he->filtered)
hists__calc_col_len(hists, he);
}
}
......@@ -725,20 +740,24 @@ static const struct option options[] = {
OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
"only consider these symbols"),
OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
"sort by key(s): pid, comm, dso, symbol, parent"),
"sort by key(s): pid, comm, dso, symbol, parent, cpu, srcline, ..."
" Please refer the man page for the complete list."),
OPT_STRING('t', "field-separator", &symbol_conf.field_sep, "separator",
"separator for columns, no spaces will be added between "
"columns '.' is reserved."),
OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory",
"Look for files with symbols relative to this directory"),
OPT_UINTEGER('o', "order", &sort_compute, "Specify compute sorting."),
OPT_CALLBACK(0, "percentage", NULL, "relative|absolute",
"How to display percentage of filtered entries", parse_filter_percentage),
OPT_END()
};
static double baseline_percent(struct hist_entry *he)
{
struct hists *hists = he->hists;
return 100.0 * he->stat.period / hists->stats.total_period;
u64 total = hists__total_period(he->hists);
return 100.0 * he->stat.period / total;
}
static int hpp__color_baseline(struct perf_hpp_fmt *fmt,
......@@ -1120,7 +1139,8 @@ static int data_init(int argc, const char **argv)
int cmd_diff(int argc, const char **argv, const char *prefix __maybe_unused)
{
sort_order = diff__default_sort_order;
perf_config(perf_default_config, NULL);
argc = parse_options(argc, argv, options, diff_usage, 0);
if (symbol__init() < 0)
......@@ -1131,6 +1151,8 @@ int cmd_diff(int argc, const char **argv, const char *prefix __maybe_unused)
ui_init();
sort__mode = SORT_MODE__DIFF;
if (setup_sorting() < 0)
usage_with_options(diff_usage, options);
......
......@@ -209,7 +209,7 @@ static int perf_event__inject_buildid(struct perf_tool *tool,
cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
thread = machine__findnew_thread(machine, sample->pid, sample->pid);
thread = machine__findnew_thread(machine, sample->pid, sample->tid);
if (thread == NULL) {
pr_err("problem processing %d event, skipping it.\n",
event->header.type);
......
......@@ -14,6 +14,7 @@
#include "util/parse-options.h"
#include "util/trace-event.h"
#include "util/data.h"
#include "util/cpumap.h"
#include "util/debug.h"
......@@ -31,9 +32,6 @@ static int caller_lines = -1;
static bool raw_ip;
static int *cpunode_map;
static int max_cpu_num;
struct alloc_stat {
u64 call_site;
u64 ptr;
......@@ -55,76 +53,6 @@ static struct rb_root root_caller_sorted;
static unsigned long total_requested, total_allocated;
static unsigned long nr_allocs, nr_cross_allocs;
#define PATH_SYS_NODE "/sys/devices/system/node"
static int init_cpunode_map(void)
{
FILE *fp;
int i, err = -1;
fp = fopen("/sys/devices/system/cpu/kernel_max", "r");
if (!fp) {
max_cpu_num = 4096;
return 0;
}
if (fscanf(fp, "%d", &max_cpu_num) < 1) {
pr_err("Failed to read 'kernel_max' from sysfs");
goto out_close;
}
max_cpu_num++;
cpunode_map = calloc(max_cpu_num, sizeof(int));
if (!cpunode_map) {
pr_err("%s: calloc failed\n", __func__);
goto out_close;
}
for (i = 0; i < max_cpu_num; i++)
cpunode_map[i] = -1;
err = 0;
out_close:
fclose(fp);
return err;
}
static int setup_cpunode_map(void)
{
struct dirent *dent1, *dent2;
DIR *dir1, *dir2;
unsigned int cpu, mem;
char buf[PATH_MAX];
if (init_cpunode_map())
return -1;
dir1 = opendir(PATH_SYS_NODE);
if (!dir1)
return 0;
while ((dent1 = readdir(dir1)) != NULL) {
if (dent1->d_type != DT_DIR ||
sscanf(dent1->d_name, "node%u", &mem) < 1)
continue;
snprintf(buf, PATH_MAX, "%s/%s", PATH_SYS_NODE, dent1->d_name);
dir2 = opendir(buf);
if (!dir2)
continue;
while ((dent2 = readdir(dir2)) != NULL) {
if (dent2->d_type != DT_LNK ||
sscanf(dent2->d_name, "cpu%u", &cpu) < 1)
continue;
cpunode_map[cpu] = mem;
}
closedir(dir2);
}
closedir(dir1);
return 0;
}
static int insert_alloc_stat(unsigned long call_site, unsigned long ptr,
int bytes_req, int bytes_alloc, int cpu)
{
......@@ -235,7 +163,7 @@ static int perf_evsel__process_alloc_node_event(struct perf_evsel *evsel,
int ret = perf_evsel__process_alloc_event(evsel, sample);
if (!ret) {
int node1 = cpunode_map[sample->cpu],
int node1 = cpu__get_node(sample->cpu),
node2 = perf_evsel__intval(evsel, sample, "node");
if (node1 != node2)
......@@ -307,7 +235,7 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
struct machine *machine)
{
struct thread *thread = machine__findnew_thread(machine, sample->pid,
sample->pid);
sample->tid);
if (thread == NULL) {
pr_debug("problem processing %d event, skipping it.\n",
......@@ -756,11 +684,13 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused)
OPT_BOOLEAN(0, "raw-ip", &raw_ip, "show raw ip instead of symbol"),
OPT_END()
};
const char * const kmem_usage[] = {
"perf kmem [<options>] {record|stat}",
const char *const kmem_subcommands[] = { "record", "stat", NULL };
const char *kmem_usage[] = {
NULL,
NULL
};
argc = parse_options(argc, argv, kmem_options, kmem_usage, 0);
argc = parse_options_subcommand(argc, argv, kmem_options,
kmem_subcommands, kmem_usage, 0);
if (!argc)
usage_with_options(kmem_usage, kmem_options);
......@@ -770,7 +700,7 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused)
if (!strncmp(argv[0], "rec", 3)) {
return __cmd_record(argc, argv);
} else if (!strcmp(argv[0], "stat")) {
if (setup_cpunode_map())
if (cpu__setup_cpunode_map())
return -1;
if (list_empty(&caller_sort))
......
......@@ -961,8 +961,10 @@ int cmd_lock(int argc, const char **argv, const char *prefix __maybe_unused)
"perf lock info [<options>]",
NULL
};
const char * const lock_usage[] = {
"perf lock [<options>] {record|report|script|info}",
const char *const lock_subcommands[] = { "record", "report", "script",
"info", NULL };
const char *lock_usage[] = {
NULL,
NULL
};
const char * const report_usage[] = {
......@@ -976,8 +978,8 @@ int cmd_lock(int argc, const char **argv, const char *prefix __maybe_unused)
for (i = 0; i < LOCKHASH_SIZE; i++)
INIT_LIST_HEAD(lockhash_table + i);
argc = parse_options(argc, argv, lock_options, lock_usage,
PARSE_OPT_STOP_AT_NON_OPTION);
argc = parse_options_subcommand(argc, argv, lock_options, lock_subcommands,
lock_usage, PARSE_OPT_STOP_AT_NON_OPTION);
if (!argc)
usage_with_options(lock_usage, lock_options);
......
......@@ -21,11 +21,6 @@ struct perf_mem {
DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
};
static const char * const mem_usage[] = {
"perf mem [<options>] {record <command> |report}",
NULL
};
static int __cmd_record(int argc, const char **argv)
{
int rec_argc, i = 0, j;
......@@ -220,9 +215,15 @@ int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused)
" between columns '.' is reserved."),
OPT_END()
};
const char *const mem_subcommands[] = { "record", "report", NULL };
const char *mem_usage[] = {
NULL,
NULL
};
argc = parse_options(argc, argv, mem_options, mem_usage,
PARSE_OPT_STOP_AT_NON_OPTION);
argc = parse_options_subcommand(argc, argv, mem_options, mem_subcommands,
mem_usage, PARSE_OPT_STOP_AT_NON_OPTION);
if (!argc || !(strncmp(argv[0], "rec", 3) || mem_operation))
usage_with_options(mem_usage, mem_options);
......
......@@ -30,37 +30,6 @@
#include <sched.h>
#include <sys/mman.h>
#ifndef HAVE_ON_EXIT_SUPPORT
#ifndef ATEXIT_MAX
#define ATEXIT_MAX 32
#endif
static int __on_exit_count = 0;
typedef void (*on_exit_func_t) (int, void *);
static on_exit_func_t __on_exit_funcs[ATEXIT_MAX];
static void *__on_exit_args[ATEXIT_MAX];
static int __exitcode = 0;
static void __handle_on_exit_funcs(void);
static int on_exit(on_exit_func_t function, void *arg);
#define exit(x) (exit)(__exitcode = (x))
static int on_exit(on_exit_func_t function, void *arg)
{
if (__on_exit_count == ATEXIT_MAX)
return -ENOMEM;
else if (__on_exit_count == 0)
atexit(__handle_on_exit_funcs);
__on_exit_funcs[__on_exit_count] = function;
__on_exit_args[__on_exit_count++] = arg;
return 0;
}
static void __handle_on_exit_funcs(void)
{
int i;
for (i = 0; i < __on_exit_count; i++)
__on_exit_funcs[i] (__exitcode, __on_exit_args[i]);
}
#endif
struct record {
struct perf_tool tool;
......@@ -147,29 +116,19 @@ static void sig_handler(int sig)
{
if (sig == SIGCHLD)
child_finished = 1;
else
signr = sig;
done = 1;
signr = sig;
}
static void record__sig_exit(int exit_status __maybe_unused, void *arg)
static void record__sig_exit(void)
{
struct record *rec = arg;
int status;
if (rec->evlist->workload.pid > 0) {
if (!child_finished)
kill(rec->evlist->workload.pid, SIGTERM);
wait(&status);
if (WIFSIGNALED(status))
psignal(WTERMSIG(status), rec->progname);
}
if (signr == -1 || signr == SIGUSR1)
if (signr == -1)
return;
signal(signr, SIG_DFL);
raise(signr);
}
static int record__open(struct record *rec)
......@@ -243,27 +202,6 @@ static int process_buildids(struct record *rec)
size, &build_id__mark_dso_hit_ops);
}
static void record__exit(int status, void *arg)
{
struct record *rec = arg;
struct perf_data_file *file = &rec->file;
if (status != 0)
return;
if (!file->is_pipe) {
rec->session->header.data_size += rec->bytes_written;
if (!rec->no_buildid)
process_buildids(rec);
perf_session__write_header(rec->session, rec->evlist,
file->fd, true);
perf_session__delete(rec->session);
perf_evlist__delete(rec->evlist);
symbol__exit();
}
}
static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
{
int err;
......@@ -344,18 +282,19 @@ static volatile int workload_exec_errno;
* if the fork fails, since we asked by setting its
* want_signal to true.
*/
static void workload_exec_failed_signal(int signo, siginfo_t *info,
static void workload_exec_failed_signal(int signo __maybe_unused,
siginfo_t *info,
void *ucontext __maybe_unused)
{
workload_exec_errno = info->si_value.sival_int;
done = 1;
signr = signo;
child_finished = 1;
}
static int __cmd_record(struct record *rec, int argc, const char **argv)
{
int err;
int status = 0;
unsigned long waking = 0;
const bool forks = argc > 0;
struct machine *machine;
......@@ -367,7 +306,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
rec->progname = argv[0];
on_exit(record__sig_exit, rec);
atexit(record__sig_exit);
signal(SIGCHLD, sig_handler);
signal(SIGINT, sig_handler);
signal(SIGTERM, sig_handler);
......@@ -388,32 +327,28 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
workload_exec_failed_signal);
if (err < 0) {
pr_err("Couldn't run the workload!\n");
status = err;
goto out_delete_session;
}
}
if (record__open(rec) != 0) {
err = -1;
goto out_delete_session;
goto out_child;
}
if (!rec->evlist->nr_groups)
perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
/*
* perf_session__delete(session) will be called at record__exit()
*/
on_exit(record__exit, rec);
if (file->is_pipe) {
err = perf_header__write_pipe(file->fd);
if (err < 0)
goto out_delete_session;
goto out_child;
} else {
err = perf_session__write_header(session, rec->evlist,
file->fd, false);
if (err < 0)
goto out_delete_session;
goto out_child;
}
if (!rec->no_buildid
......@@ -421,7 +356,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
pr_err("Couldn't generate buildids. "
"Use --no-buildid to profile anyway.\n");
err = -1;
goto out_delete_session;
goto out_child;
}
machine = &session->machines.host;
......@@ -431,7 +366,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
process_synthesized_event);
if (err < 0) {
pr_err("Couldn't synthesize attrs.\n");
goto out_delete_session;
goto out_child;
}
if (have_tracepoints(&rec->evlist->entries)) {
......@@ -447,7 +382,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
process_synthesized_event);
if (err <= 0) {
pr_err("Couldn't record tracing data.\n");
goto out_delete_session;
goto out_child;
}
rec->bytes_written += err;
}
......@@ -475,7 +410,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
process_synthesized_event, opts->sample_address);
if (err != 0)
goto out_delete_session;
goto out_child;
if (rec->realtime_prio) {
struct sched_param param;
......@@ -484,7 +419,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
if (sched_setscheduler(0, SCHED_FIFO, &param)) {
pr_err("Could not set realtime priority.\n");
err = -1;
goto out_delete_session;
goto out_child;
}
}
......@@ -512,13 +447,15 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
if (record__mmap_read_all(rec) < 0) {
err = -1;
goto out_delete_session;
goto out_child;
}
if (hits == rec->samples) {
if (done)
break;
err = poll(rec->evlist->pollfd, rec->evlist->nr_fds, -1);
if (err < 0 && errno == EINTR)
err = 0;
waking++;
}
......@@ -538,28 +475,52 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
const char *emsg = strerror_r(workload_exec_errno, msg, sizeof(msg));
pr_err("Workload failed: %s\n", emsg);
err = -1;
goto out_delete_session;
goto out_child;
}
if (quiet || signr == SIGUSR1)
return 0;
if (!quiet) {
fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
/*
* Approximate RIP event size: 24 bytes.
*/
fprintf(stderr,
"[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
(double)rec->bytes_written / 1024.0 / 1024.0,
file->path,
rec->bytes_written / 24);
}
/*
* Approximate RIP event size: 24 bytes.
*/
fprintf(stderr,
"[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
(double)rec->bytes_written / 1024.0 / 1024.0,
file->path,
rec->bytes_written / 24);
out_child:
if (forks) {
int exit_status;
return 0;
if (!child_finished)
kill(rec->evlist->workload.pid, SIGTERM);
wait(&exit_status);
if (err < 0)
status = err;
else if (WIFEXITED(exit_status))
status = WEXITSTATUS(exit_status);
else if (WIFSIGNALED(exit_status))
signr = WTERMSIG(exit_status);
} else
status = err;
if (!err && !file->is_pipe) {
rec->session->header.data_size += rec->bytes_written;
if (!rec->no_buildid)
process_buildids(rec);
perf_session__write_header(rec->session, rec->evlist,
file->fd, true);
}
out_delete_session:
perf_session__delete(session);
return err;
return status;
}
#define BRANCH_OPT(n, m) \
......@@ -988,6 +949,7 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
err = __cmd_record(&record, argc, argv);
out_symbol_exit:
perf_evlist__delete(rec->evlist);
symbol__exit();
return err;
}
......@@ -57,6 +57,7 @@ struct report {
const char *cpu_list;
const char *symbol_filter_str;
float min_percent;
u64 nr_entries;
DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
};
......@@ -75,6 +76,27 @@ static int report__config(const char *var, const char *value, void *cb)
return perf_default_config(var, value, cb);
}
static void report__inc_stats(struct report *rep, struct hist_entry *he)
{
/*
* The @he is either of a newly created one or an existing one
* merging current sample. We only want to count a new one so
* checking ->nr_events being 1.
*/
if (he->stat.nr_events == 1)
rep->nr_entries++;
/*
* Only counts number of samples at this stage as it's more
* natural to do it here and non-sample events are also
* counted in perf_session_deliver_event(). The dump_trace
* requires this info is ready before going to the output tree.
*/
hists__inc_nr_events(he->hists, PERF_RECORD_SAMPLE);
if (!he->filtered)
he->hists->stats.nr_non_filtered_samples++;
}
static int report__add_mem_hist_entry(struct report *rep, struct addr_location *al,
struct perf_sample *sample, struct perf_evsel *evsel)
{
......@@ -121,8 +143,8 @@ static int report__add_mem_hist_entry(struct report *rep, struct addr_location *
goto out;
}
evsel->hists.stats.total_period += cost;
hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
report__inc_stats(rep, he);
err = hist_entry__append_callchain(he, sample);
out:
return err;
......@@ -173,9 +195,7 @@ static int report__add_branch_hist_entry(struct report *rep, struct addr_locatio
if (err)
goto out;
}
evsel->hists.stats.total_period += 1;
hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
report__inc_stats(rep, he);
} else
goto out;
}
......@@ -208,8 +228,8 @@ static int report__add_hist_entry(struct report *rep, struct perf_evsel *evsel,
if (ui__has_annotation())
err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr);
evsel->hists.stats.total_period += sample->period;
hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
report__inc_stats(rep, he);
out:
return err;
}
......@@ -337,6 +357,11 @@ static size_t hists__fprintf_nr_sample_events(struct hists *hists, struct report
char buf[512];
size_t size = sizeof(buf);
if (symbol_conf.filter_relative) {
nr_samples = hists->stats.nr_non_filtered_samples;
nr_events = hists->stats.total_non_filtered_period;
}
if (perf_evsel__is_group_event(evsel)) {
struct perf_evsel *pos;
......@@ -344,8 +369,13 @@ static size_t hists__fprintf_nr_sample_events(struct hists *hists, struct report
evname = buf;
for_each_group_member(pos, evsel) {
nr_samples += pos->hists.stats.nr_events[PERF_RECORD_SAMPLE];
nr_events += pos->hists.stats.total_period;
if (symbol_conf.filter_relative) {
nr_samples += pos->hists.stats.nr_non_filtered_samples;
nr_events += pos->hists.stats.total_non_filtered_period;
} else {
nr_samples += pos->hists.stats.nr_events[PERF_RECORD_SAMPLE];
nr_events += pos->hists.stats.total_period;
}
}
}
......@@ -470,24 +500,12 @@ static int report__browse_hists(struct report *rep)
return ret;
}
static u64 report__collapse_hists(struct report *rep)
static void report__collapse_hists(struct report *rep)
{
struct ui_progress prog;
struct perf_evsel *pos;
u64 nr_samples = 0;
/*
* Count number of histogram entries to use when showing progress,
* reusing nr_samples variable.
*/
evlist__for_each(rep->session->evlist, pos)
nr_samples += pos->hists.nr_entries;
ui_progress__init(&prog, nr_samples, "Merging related events...");
/*
* Count total number of samples, will be used to check if this
* session had any.
*/
nr_samples = 0;
ui_progress__init(&prog, rep->nr_entries, "Merging related events...");
evlist__for_each(rep->session->evlist, pos) {
struct hists *hists = &pos->hists;
......@@ -496,7 +514,6 @@ static u64 report__collapse_hists(struct report *rep)
hists->symbol_filter_str = rep->symbol_filter_str;
hists__collapse_resort(hists, &prog);
nr_samples += hists->stats.nr_events[PERF_RECORD_SAMPLE];
/* Non-group events are considered as leader */
if (symbol_conf.event_group &&
......@@ -509,14 +526,11 @@ static u64 report__collapse_hists(struct report *rep)
}
ui_progress__finish();
return nr_samples;
}
static int __cmd_report(struct report *rep)
{
int ret;
u64 nr_samples;
struct perf_session *session = rep->session;
struct perf_evsel *pos;
struct perf_data_file *file = session->file;
......@@ -556,12 +570,12 @@ static int __cmd_report(struct report *rep)
}
}
nr_samples = report__collapse_hists(rep);
report__collapse_hists(rep);
if (session_done())
return 0;
if (nr_samples == 0) {
if (rep->nr_entries == 0) {
ui__error("The %s file has no samples!\n", file->path);
return 0;
}
......@@ -573,11 +587,9 @@ static int __cmd_report(struct report *rep)
}
static int
parse_callchain_opt(const struct option *opt, const char *arg, int unset)
report_parse_callchain_opt(const struct option *opt, const char *arg, int unset)
{
struct report *rep = (struct report *)opt->value;
char *tok, *tok2;
char *endptr;
/*
* --no-call-graph
......@@ -587,80 +599,7 @@ parse_callchain_opt(const struct option *opt, const char *arg, int unset)
return 0;
}
symbol_conf.use_callchain = true;
if (!arg)
return 0;
tok = strtok((char *)arg, ",");
if (!tok)
return -1;
/* get the output mode */
if (!strncmp(tok, "graph", strlen(arg)))
callchain_param.mode = CHAIN_GRAPH_ABS;
else if (!strncmp(tok, "flat", strlen(arg)))
callchain_param.mode = CHAIN_FLAT;
else if (!strncmp(tok, "fractal", strlen(arg)))
callchain_param.mode = CHAIN_GRAPH_REL;
else if (!strncmp(tok, "none", strlen(arg))) {
callchain_param.mode = CHAIN_NONE;
symbol_conf.use_callchain = false;
return 0;
}
else
return -1;
/* get the min percentage */
tok = strtok(NULL, ",");
if (!tok)
goto setup;
callchain_param.min_percent = strtod(tok, &endptr);
if (tok == endptr)
return -1;
/* get the print limit */
tok2 = strtok(NULL, ",");
if (!tok2)
goto setup;
if (tok2[0] != 'c') {
callchain_param.print_limit = strtoul(tok2, &endptr, 0);
tok2 = strtok(NULL, ",");
if (!tok2)
goto setup;
}
/* get the call chain order */
if (!strncmp(tok2, "caller", strlen("caller")))
callchain_param.order = ORDER_CALLER;
else if (!strncmp(tok2, "callee", strlen("callee")))
callchain_param.order = ORDER_CALLEE;
else
return -1;
/* Get the sort key */
tok2 = strtok(NULL, ",");
if (!tok2)
goto setup;
if (!strncmp(tok2, "function", strlen("function")))
callchain_param.key = CCKEY_FUNCTION;
else if (!strncmp(tok2, "address", strlen("address")))
callchain_param.key = CCKEY_ADDRESS;
else
return -1;
setup:
if (callchain_register_param(&callchain_param) < 0) {
pr_err("Can't register callchain params\n");
return -1;
}
return 0;
return parse_callchain_report_opt(arg);
}
int
......@@ -760,10 +699,10 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
OPT_BOOLEAN(0, "header-only", &report.header_only,
"Show only data header."),
OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
"sort by key(s): pid, comm, dso, symbol, parent, cpu, srcline,"
" dso_to, dso_from, symbol_to, symbol_from, mispredict,"
" weight, local_weight, mem, symbol_daddr, dso_daddr, tlb, "
"snoop, locked, abort, in_tx, transaction"),
"sort by key(s): pid, comm, dso, symbol, parent, cpu, srcline, ..."
" Please refer the man page for the complete list."),
OPT_STRING('F', "fields", &field_order, "key[,keys...]",
"output field(s): overhead, period, sample plus all of sort keys"),
OPT_BOOLEAN(0, "showcpuutilization", &symbol_conf.show_cpu_utilization,
"Show sample percentage for different cpu modes"),
OPT_STRING('p', "parent", &parent_pattern, "regex",
......@@ -772,7 +711,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
"Only display entries with parent-match"),
OPT_CALLBACK_DEFAULT('g', "call-graph", &report, "output_type,min_percent[,print_limit],call_order",
"Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit, callchain order, key (function or address). "
"Default: fractal,0.5,callee,function", &parse_callchain_opt, callchain_default_opt),
"Default: fractal,0.5,callee,function", &report_parse_callchain_opt, callchain_default_opt),
OPT_INTEGER(0, "max-stack", &report.max_stack,
"Set the maximum stack depth when parsing the callchain, "
"anything beyond the specified depth will be ignored. "
......@@ -823,6 +762,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
OPT_BOOLEAN(0, "mem-mode", &report.mem_mode, "mem access profile"),
OPT_CALLBACK(0, "percent-limit", &report, "percent",
"Don't show entries under that percent", parse_percent_limit),
OPT_CALLBACK(0, "percentage", NULL, "relative|absolute",
"how to display percentage of filtered entries", parse_filter_percentage),
OPT_END()
};
struct perf_data_file file = {
......@@ -866,52 +807,31 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
if (branch_mode == -1 && has_br_stack)
sort__mode = SORT_MODE__BRANCH;
/* sort__mode could be NORMAL if --no-branch-stack */
if (sort__mode == SORT_MODE__BRANCH) {
/*
* if no sort_order is provided, then specify
* branch-mode specific order
*/
if (sort_order == default_sort_order)
sort_order = "comm,dso_from,symbol_from,"
"dso_to,symbol_to";
}
if (report.mem_mode) {
if (sort__mode == SORT_MODE__BRANCH) {
pr_err("branch and mem mode incompatible\n");
goto error;
}
sort__mode = SORT_MODE__MEMORY;
/*
* if no sort_order is provided, then specify
* branch-mode specific order
*/
if (sort_order == default_sort_order)
sort_order = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked";
}
if (setup_sorting() < 0) {
parse_options_usage(report_usage, options, "s", 1);
if (sort_order)
parse_options_usage(report_usage, options, "s", 1);
if (field_order)
parse_options_usage(sort_order ? NULL : report_usage,
options, "F", 1);
goto error;
}
if (parent_pattern != default_parent_pattern) {
if (sort_dimension__add("parent") < 0)
goto error;
}
/* Force tty output for header output. */
if (report.header || report.header_only)
use_browser = 0;
if (strcmp(input_name, "-") != 0)
setup_browser(true);
else {
else
use_browser = 0;
perf_hpp__init();
}
if (report.header || report.header_only) {
perf_session__fprintf_info(session, stdout,
......
......@@ -66,7 +66,7 @@ struct sched_atom {
struct task_desc *wakee;
};
#define TASK_STATE_TO_CHAR_STR "RSDTtZX"
#define TASK_STATE_TO_CHAR_STR "RSDTtZXxKWP"
enum thread_state {
THREAD_SLEEPING = 0,
......@@ -149,7 +149,6 @@ struct perf_sched {
unsigned long nr_runs;
unsigned long nr_timestamps;
unsigned long nr_unordered_timestamps;
unsigned long nr_state_machine_bugs;
unsigned long nr_context_switch_bugs;
unsigned long nr_events;
unsigned long nr_lost_chunks;
......@@ -1007,17 +1006,12 @@ static int latency_wakeup_event(struct perf_sched *sched,
struct perf_sample *sample,
struct machine *machine)
{
const u32 pid = perf_evsel__intval(evsel, sample, "pid"),
success = perf_evsel__intval(evsel, sample, "success");
const u32 pid = perf_evsel__intval(evsel, sample, "pid");
struct work_atoms *atoms;
struct work_atom *atom;
struct thread *wakee;
u64 timestamp = sample->time;
/* Note for later, it may be interesting to observe the failing cases */
if (!success)
return 0;
wakee = machine__findnew_thread(machine, 0, pid);
atoms = thread_atoms_search(&sched->atom_root, wakee, &sched->cmp_pid);
if (!atoms) {
......@@ -1037,12 +1031,18 @@ static int latency_wakeup_event(struct perf_sched *sched,
atom = list_entry(atoms->work_list.prev, struct work_atom, list);
/*
* As we do not guarantee the wakeup event happens when
* task is out of run queue, also may happen when task is
* on run queue and wakeup only change ->state to TASK_RUNNING,
* then we should not set the ->wake_up_time when wake up a
* task which is on run queue.
*
* You WILL be missing events if you've recorded only
* one CPU, or are only looking at only one, so don't
* make useless noise.
* skip in this case.
*/
if (sched->profile_cpu == -1 && atom->state != THREAD_SLEEPING)
sched->nr_state_machine_bugs++;
return 0;
sched->nr_timestamps++;
if (atom->sched_out_time > timestamp) {
......@@ -1266,9 +1266,8 @@ static int process_sched_wakeup_event(struct perf_tool *tool,
static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
struct perf_sample *sample, struct machine *machine)
{
const u32 prev_pid = perf_evsel__intval(evsel, sample, "prev_pid"),
next_pid = perf_evsel__intval(evsel, sample, "next_pid");
struct thread *sched_out __maybe_unused, *sched_in;
const u32 next_pid = perf_evsel__intval(evsel, sample, "next_pid");
struct thread *sched_in;
int new_shortname;
u64 timestamp0, timestamp = sample->time;
s64 delta;
......@@ -1291,7 +1290,6 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
return -1;
}
sched_out = machine__findnew_thread(machine, 0, prev_pid);
sched_in = machine__findnew_thread(machine, 0, next_pid);
sched->curr_thread[this_cpu] = sched_in;
......@@ -1300,17 +1298,25 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
new_shortname = 0;
if (!sched_in->shortname[0]) {
sched_in->shortname[0] = sched->next_shortname1;
sched_in->shortname[1] = sched->next_shortname2;
if (sched->next_shortname1 < 'Z') {
sched->next_shortname1++;
if (!strcmp(thread__comm_str(sched_in), "swapper")) {
/*
* Don't allocate a letter-number for swapper:0
* as a shortname. Instead, we use '.' for it.
*/
sched_in->shortname[0] = '.';
sched_in->shortname[1] = ' ';
} else {
sched->next_shortname1='A';
if (sched->next_shortname2 < '9') {
sched->next_shortname2++;
sched_in->shortname[0] = sched->next_shortname1;
sched_in->shortname[1] = sched->next_shortname2;
if (sched->next_shortname1 < 'Z') {
sched->next_shortname1++;
} else {
sched->next_shortname2='0';
sched->next_shortname1 = 'A';
if (sched->next_shortname2 < '9')
sched->next_shortname2++;
else
sched->next_shortname2 = '0';
}
}
new_shortname = 1;
......@@ -1322,12 +1328,9 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
else
printf("*");
if (sched->curr_thread[cpu]) {
if (sched->curr_thread[cpu]->tid)
printf("%2s ", sched->curr_thread[cpu]->shortname);
else
printf(". ");
} else
if (sched->curr_thread[cpu])
printf("%2s ", sched->curr_thread[cpu]->shortname);
else
printf(" ");
}
......@@ -1496,14 +1499,6 @@ static void print_bad_events(struct perf_sched *sched)
(double)sched->nr_lost_events/(double)sched->nr_events * 100.0,
sched->nr_lost_events, sched->nr_events, sched->nr_lost_chunks);
}
if (sched->nr_state_machine_bugs && sched->nr_timestamps) {
printf(" INFO: %.3f%% state machine bugs (%ld out of %ld)",
(double)sched->nr_state_machine_bugs/(double)sched->nr_timestamps*100.0,
sched->nr_state_machine_bugs, sched->nr_timestamps);
if (sched->nr_lost_events)
printf(" (due to lost events?)");
printf("\n");
}
if (sched->nr_context_switch_bugs && sched->nr_timestamps) {
printf(" INFO: %.3f%% context switch bugs (%ld out of %ld)",
(double)sched->nr_context_switch_bugs/(double)sched->nr_timestamps*100.0,
......@@ -1635,6 +1630,7 @@ static int __cmd_record(int argc, const char **argv)
"-e", "sched:sched_stat_runtime",
"-e", "sched:sched_process_fork",
"-e", "sched:sched_wakeup",
"-e", "sched:sched_wakeup_new",
"-e", "sched:sched_migrate_task",
};
......@@ -1713,8 +1709,10 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
"perf sched replay [<options>]",
NULL
};
const char * const sched_usage[] = {
"perf sched [<options>] {record|latency|map|replay|script}",
const char *const sched_subcommands[] = { "record", "latency", "map",
"replay", "script", NULL };
const char *sched_usage[] = {
NULL,
NULL
};
struct trace_sched_handler lat_ops = {
......@@ -1736,8 +1734,8 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
for (i = 0; i < ARRAY_SIZE(sched.curr_pid); i++)
sched.curr_pid[i] = -1;
argc = parse_options(argc, argv, sched_options, sched_usage,
PARSE_OPT_STOP_AT_NON_OPTION);
argc = parse_options_subcommand(argc, argv, sched_options, sched_subcommands,
sched_usage, PARSE_OPT_STOP_AT_NON_OPTION);
if (!argc)
usage_with_options(sched_usage, sched_options);
......
......@@ -253,6 +253,9 @@ static struct hist_entry *perf_evsel__add_hist_entry(struct perf_evsel *evsel,
return NULL;
hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
if (!he->filtered)
evsel->hists.stats.nr_non_filtered_samples++;
return he;
}
......@@ -694,8 +697,7 @@ static void perf_event__process_sample(struct perf_tool *tool,
if (event->header.misc & PERF_RECORD_MISC_EXACT_IP)
top->exact_samples++;
if (perf_event__preprocess_sample(event, machine, &al, sample) < 0 ||
al.filtered)
if (perf_event__preprocess_sample(event, machine, &al, sample) < 0)
return;
if (!top->kptr_restrict_warned &&
......@@ -1081,8 +1083,10 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
OPT_INCR('v', "verbose", &verbose,
"be more verbose (show counter open errors, etc)"),
OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
"sort by key(s): pid, comm, dso, symbol, parent, weight, local_weight,"
" abort, in_tx, transaction"),
"sort by key(s): pid, comm, dso, symbol, parent, cpu, srcline, ..."
" Please refer the man page for the complete list."),
OPT_STRING(0, "fields", &field_order, "key[,keys...]",
"output field(s): overhead, period, sample plus all of sort keys"),
OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples,
"Show a column with the number of samples"),
OPT_CALLBACK_NOOPT('g', NULL, &top.record_opts,
......@@ -1116,6 +1120,8 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
OPT_STRING('u', "uid", &target->uid_str, "user", "user to profile"),
OPT_CALLBACK(0, "percent-limit", &top, "percent",
"Don't show entries under that percent", parse_percent_limit),
OPT_CALLBACK(0, "percentage", NULL, "relative|absolute",
"How to display percentage of filtered entries", parse_filter_percentage),
OPT_END()
};
const char * const top_usage[] = {
......@@ -1133,17 +1139,19 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
if (argc)
usage_with_options(top_usage, options);
if (sort_order == default_sort_order)
sort_order = "dso,symbol";
sort__mode = SORT_MODE__TOP;
/* display thread wants entries to be collapsed in a different tree */
sort__need_collapse = 1;
if (setup_sorting() < 0) {
parse_options_usage(top_usage, options, "s", 1);
if (sort_order)
parse_options_usage(top_usage, options, "s", 1);
if (field_order)
parse_options_usage(sort_order ? NULL : top_usage,
options, "fields", 0);
goto out_delete_evlist;
}
/* display thread wants entries to be collapsed in a different tree */
sort__need_collapse = 1;
if (top.use_stdio)
use_browser = 0;
else if (top.use_tui)
......
......@@ -29,16 +29,22 @@ ifeq ($(ARCH),x86)
endif
NO_PERF_REGS := 0
endif
ifeq ($(ARCH),arm)
NO_PERF_REGS := 0
LIBUNWIND_LIBS = -lunwind -lunwind-arm
endif
# So far there's only x86 libdw unwind support merged in perf.
ifeq ($(ARCH),arm64)
NO_PERF_REGS := 0
LIBUNWIND_LIBS = -lunwind -lunwind-aarch64
endif
# So far there's only x86 and arm libdw unwind support merged in perf.
# Disable it on all other architectures in case libdw unwind
# support is detected in system. Add supported architectures
# to the check.
ifneq ($(ARCH),x86)
ifneq ($(ARCH),$(filter $(ARCH),x86 arm))
NO_LIBDW_DWARF_UNWIND := 1
endif
......@@ -168,7 +174,6 @@ CORE_FEATURE_TESTS = \
libpython-version \
libslang \
libunwind \
on-exit \
stackprotector-all \
timerfd \
libdw-dwarf-unwind
......@@ -194,7 +199,6 @@ VF_FEATURE_TESTS = \
libelf-getphdrnum \
libelf-mmap \
libpython-version \
on-exit \
stackprotector-all \
timerfd \
libunwind-debug-frame \
......@@ -370,7 +374,7 @@ else
endif
ifndef NO_LIBUNWIND
ifeq ($(ARCH),arm)
ifeq ($(ARCH),$(filter $(ARCH),arm arm64))
$(call feature_check,libunwind-debug-frame)
ifneq ($(feature-libunwind-debug-frame), 1)
msg := $(warning No debug_frame support found in libunwind);
......@@ -565,12 +569,6 @@ ifneq ($(filter -lbfd,$(EXTLIBS)),)
CFLAGS += -DHAVE_LIBBFD_SUPPORT
endif
ifndef NO_ON_EXIT
ifeq ($(feature-on-exit), 1)
CFLAGS += -DHAVE_ON_EXIT_SUPPORT
endif
endif
ifndef NO_BACKTRACE
ifeq ($(feature-backtrace), 1)
CFLAGS += -DHAVE_BACKTRACE_SUPPORT
......
......@@ -24,7 +24,6 @@ FILES= \
test-libslang.bin \
test-libunwind.bin \
test-libunwind-debug-frame.bin \
test-on-exit.bin \
test-stackprotector-all.bin \
test-timerfd.bin \
test-libdw-dwarf-unwind.bin
......@@ -133,9 +132,6 @@ test-liberty-z.bin:
test-cplus-demangle.bin:
$(BUILD) -liberty
test-on-exit.bin:
$(BUILD)
test-backtrace.bin:
$(BUILD)
......
......@@ -69,10 +69,6 @@
# include "test-libbfd.c"
#undef main
#define main main_test_on_exit
# include "test-on-exit.c"
#undef main
#define main main_test_backtrace
# include "test-backtrace.c"
#undef main
......@@ -110,7 +106,6 @@ int main(int argc, char *argv[])
main_test_gtk2(argc, argv);
main_test_gtk2_infobar(argc, argv);
main_test_libbfd();
main_test_on_exit();
main_test_backtrace();
main_test_libnuma();
main_test_timerfd();
......
#include <stdio.h>
#include <stdlib.h>
static void exit_fn(int status, void *__data)
{
printf("exit status: %d, data: %d\n", status, *(int *)__data);
}
static int data = 123;
int main(void)
{
on_exit(exit_fn, &data);
return 321;
}
......@@ -121,8 +121,8 @@ __perf_main ()
elif [[ $prev == "-e" && "${words[1]}" == @(record|stat|top) ]]; then
evts=$($cmd list --raw-dump)
__perfcomp_colon "$evts" "$cur"
# List subcommands for 'perf kvm'
elif [[ $prev == "kvm" ]]; then
# List subcommands for perf commands
elif [[ $prev == @(kvm|kmem|mem|lock|sched) ]]; then
subcmds=$($cmd $prev --list-cmds)
__perfcomp_colon "$subcmds" "$cur"
# List long option names
......
#ifndef _PERF_SYS_H
#define _PERF_SYS_H
#include <unistd.h>
#include <sys/types.h>
#include <sys/syscall.h>
#include <linux/types.h>
#include <linux/perf_event.h>
#include <asm/unistd.h>
#if defined(__i386__)
#define mb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
#define wmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
#define rmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
#define cpu_relax() asm volatile("rep; nop" ::: "memory");
#define CPUINFO_PROC "model name"
#ifndef __NR_perf_event_open
# define __NR_perf_event_open 336
#endif
#ifndef __NR_futex
# define __NR_futex 240
#endif
#ifndef __NR_gettid
# define __NR_gettid 224
#endif
#endif
#if defined(__x86_64__)
#define mb() asm volatile("mfence" ::: "memory")
#define wmb() asm volatile("sfence" ::: "memory")
#define rmb() asm volatile("lfence" ::: "memory")
#define cpu_relax() asm volatile("rep; nop" ::: "memory");
#define CPUINFO_PROC "model name"
#ifndef __NR_perf_event_open
# define __NR_perf_event_open 298
#endif
#ifndef __NR_futex
# define __NR_futex 202
#endif
#ifndef __NR_gettid
# define __NR_gettid 186
#endif
#endif
#ifdef __powerpc__
#include "../../arch/powerpc/include/uapi/asm/unistd.h"
#define mb() asm volatile ("sync" ::: "memory")
#define wmb() asm volatile ("sync" ::: "memory")
#define rmb() asm volatile ("sync" ::: "memory")
#define CPUINFO_PROC "cpu"
#endif
#ifdef __s390__
#define mb() asm volatile("bcr 15,0" ::: "memory")
#define wmb() asm volatile("bcr 15,0" ::: "memory")
#define rmb() asm volatile("bcr 15,0" ::: "memory")
#endif
#ifdef __sh__
#if defined(__SH4A__) || defined(__SH5__)
# define mb() asm volatile("synco" ::: "memory")
# define wmb() asm volatile("synco" ::: "memory")
# define rmb() asm volatile("synco" ::: "memory")
#else
# define mb() asm volatile("" ::: "memory")
# define wmb() asm volatile("" ::: "memory")
# define rmb() asm volatile("" ::: "memory")
#endif
#define CPUINFO_PROC "cpu type"
#endif
#ifdef __hppa__
#define mb() asm volatile("" ::: "memory")
#define wmb() asm volatile("" ::: "memory")
#define rmb() asm volatile("" ::: "memory")
#define CPUINFO_PROC "cpu"
#endif
#ifdef __sparc__
#ifdef __LP64__
#define mb() asm volatile("ba,pt %%xcc, 1f\n" \
"membar #StoreLoad\n" \
"1:\n":::"memory")
#else
#define mb() asm volatile("":::"memory")
#endif
#define wmb() asm volatile("":::"memory")
#define rmb() asm volatile("":::"memory")
#define CPUINFO_PROC "cpu"
#endif
#ifdef __alpha__
#define mb() asm volatile("mb" ::: "memory")
#define wmb() asm volatile("wmb" ::: "memory")
#define rmb() asm volatile("mb" ::: "memory")
#define CPUINFO_PROC "cpu model"
#endif
#ifdef __ia64__
#define mb() asm volatile ("mf" ::: "memory")
#define wmb() asm volatile ("mf" ::: "memory")
#define rmb() asm volatile ("mf" ::: "memory")
#define cpu_relax() asm volatile ("hint @pause" ::: "memory")
#define CPUINFO_PROC "model name"
#endif
#ifdef __arm__
/*
* Use the __kuser_memory_barrier helper in the CPU helper page. See
* arch/arm/kernel/entry-armv.S in the kernel source for details.
*/
#define mb() ((void(*)(void))0xffff0fa0)()
#define wmb() ((void(*)(void))0xffff0fa0)()
#define rmb() ((void(*)(void))0xffff0fa0)()
#define CPUINFO_PROC "Processor"
#endif
#ifdef __aarch64__
#define mb() asm volatile("dmb ish" ::: "memory")
#define wmb() asm volatile("dmb ishst" ::: "memory")
#define rmb() asm volatile("dmb ishld" ::: "memory")
#define cpu_relax() asm volatile("yield" ::: "memory")
#endif
#ifdef __mips__
#define mb() asm volatile( \
".set mips2\n\t" \
"sync\n\t" \
".set mips0" \
: /* no output */ \
: /* no input */ \
: "memory")
#define wmb() mb()
#define rmb() mb()
#define CPUINFO_PROC "cpu model"
#endif
#ifdef __arc__
#define mb() asm volatile("" ::: "memory")
#define wmb() asm volatile("" ::: "memory")
#define rmb() asm volatile("" ::: "memory")
#define CPUINFO_PROC "Processor"
#endif
#ifdef __metag__
#define mb() asm volatile("" ::: "memory")
#define wmb() asm volatile("" ::: "memory")
#define rmb() asm volatile("" ::: "memory")
#define CPUINFO_PROC "CPU"
#endif
#ifdef __xtensa__
#define mb() asm volatile("memw" ::: "memory")
#define wmb() asm volatile("memw" ::: "memory")
#define rmb() asm volatile("" ::: "memory")
#define CPUINFO_PROC "core ID"
#endif
#ifdef __tile__
#define mb() asm volatile ("mf" ::: "memory")
#define wmb() asm volatile ("mf" ::: "memory")
#define rmb() asm volatile ("mf" ::: "memory")
#define cpu_relax() asm volatile ("mfspr zero, PASS" ::: "memory")
#define CPUINFO_PROC "model name"
#endif
#define barrier() asm volatile ("" ::: "memory")
#ifndef cpu_relax
#define cpu_relax() barrier()
#endif
static inline int
sys_perf_event_open(struct perf_event_attr *attr,
pid_t pid, int cpu, int group_fd,
unsigned long flags)
{
int fd;
fd = syscall(__NR_perf_event_open, attr, pid, cpu,
group_fd, flags);
#ifdef HAVE_ATTR_TEST
if (unlikely(test_attr__enabled))
test_attr__open(attr, pid, cpu, fd, group_fd, flags);
#endif
return fd;
}
#endif /* _PERF_SYS_H */
#ifndef _PERF_PERF_H
#define _PERF_PERF_H
#include <asm/unistd.h>
#if defined(__i386__)
#define mb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
#define wmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
#define rmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
#define cpu_relax() asm volatile("rep; nop" ::: "memory");
#define CPUINFO_PROC "model name"
#ifndef __NR_perf_event_open
# define __NR_perf_event_open 336
#endif
#ifndef __NR_futex
# define __NR_futex 240
#endif
#endif
#if defined(__x86_64__)
#define mb() asm volatile("mfence" ::: "memory")
#define wmb() asm volatile("sfence" ::: "memory")
#define rmb() asm volatile("lfence" ::: "memory")
#define cpu_relax() asm volatile("rep; nop" ::: "memory");
#define CPUINFO_PROC "model name"
#ifndef __NR_perf_event_open
# define __NR_perf_event_open 298
#endif
#ifndef __NR_futex
# define __NR_futex 202
#endif
#endif
#ifdef __powerpc__
#include "../../arch/powerpc/include/uapi/asm/unistd.h"
#define mb() asm volatile ("sync" ::: "memory")
#define wmb() asm volatile ("sync" ::: "memory")
#define rmb() asm volatile ("sync" ::: "memory")
#define CPUINFO_PROC "cpu"
#endif
#ifdef __s390__
#define mb() asm volatile("bcr 15,0" ::: "memory")
#define wmb() asm volatile("bcr 15,0" ::: "memory")
#define rmb() asm volatile("bcr 15,0" ::: "memory")
#endif
#ifdef __sh__
#if defined(__SH4A__) || defined(__SH5__)
# define mb() asm volatile("synco" ::: "memory")
# define wmb() asm volatile("synco" ::: "memory")
# define rmb() asm volatile("synco" ::: "memory")
#else
# define mb() asm volatile("" ::: "memory")
# define wmb() asm volatile("" ::: "memory")
# define rmb() asm volatile("" ::: "memory")
#endif
#define CPUINFO_PROC "cpu type"
#endif
#ifdef __hppa__
#define mb() asm volatile("" ::: "memory")
#define wmb() asm volatile("" ::: "memory")
#define rmb() asm volatile("" ::: "memory")
#define CPUINFO_PROC "cpu"
#endif
#ifdef __sparc__
#ifdef __LP64__
#define mb() asm volatile("ba,pt %%xcc, 1f\n" \
"membar #StoreLoad\n" \
"1:\n":::"memory")
#else
#define mb() asm volatile("":::"memory")
#endif
#define wmb() asm volatile("":::"memory")
#define rmb() asm volatile("":::"memory")
#define CPUINFO_PROC "cpu"
#endif
#ifdef __alpha__
#define mb() asm volatile("mb" ::: "memory")
#define wmb() asm volatile("wmb" ::: "memory")
#define rmb() asm volatile("mb" ::: "memory")
#define CPUINFO_PROC "cpu model"
#endif
#ifdef __ia64__
#define mb() asm volatile ("mf" ::: "memory")
#define wmb() asm volatile ("mf" ::: "memory")
#define rmb() asm volatile ("mf" ::: "memory")
#define cpu_relax() asm volatile ("hint @pause" ::: "memory")
#define CPUINFO_PROC "model name"
#endif
#ifdef __arm__
/*
* Use the __kuser_memory_barrier helper in the CPU helper page. See
* arch/arm/kernel/entry-armv.S in the kernel source for details.
*/
#define mb() ((void(*)(void))0xffff0fa0)()
#define wmb() ((void(*)(void))0xffff0fa0)()
#define rmb() ((void(*)(void))0xffff0fa0)()
#define CPUINFO_PROC "Processor"
#endif
#ifdef __aarch64__
#define mb() asm volatile("dmb ish" ::: "memory")
#define wmb() asm volatile("dmb ishst" ::: "memory")
#define rmb() asm volatile("dmb ishld" ::: "memory")
#define cpu_relax() asm volatile("yield" ::: "memory")
#endif
#ifdef __mips__
#define mb() asm volatile( \
".set mips2\n\t" \
"sync\n\t" \
".set mips0" \
: /* no output */ \
: /* no input */ \
: "memory")
#define wmb() mb()
#define rmb() mb()
#define CPUINFO_PROC "cpu model"
#endif
#ifdef __arc__
#define mb() asm volatile("" ::: "memory")
#define wmb() asm volatile("" ::: "memory")
#define rmb() asm volatile("" ::: "memory")
#define CPUINFO_PROC "Processor"
#endif
#ifdef __metag__
#define mb() asm volatile("" ::: "memory")
#define wmb() asm volatile("" ::: "memory")
#define rmb() asm volatile("" ::: "memory")
#define CPUINFO_PROC "CPU"
#endif
#ifdef __xtensa__
#define mb() asm volatile("memw" ::: "memory")
#define wmb() asm volatile("memw" ::: "memory")
#define rmb() asm volatile("" ::: "memory")
#define CPUINFO_PROC "core ID"
#endif
#ifdef __tile__
#define mb() asm volatile ("mf" ::: "memory")
#define wmb() asm volatile ("mf" ::: "memory")
#define rmb() asm volatile ("mf" ::: "memory")
#define cpu_relax() asm volatile ("mfspr zero, PASS" ::: "memory")
#define CPUINFO_PROC "model name"
#endif
#define barrier() asm volatile ("" ::: "memory")
#ifndef cpu_relax
#define cpu_relax() barrier()
#endif
#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
#include <time.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/syscall.h>
#include <linux/perf_event.h>
#include "util/types.h"
#include <stdbool.h>
#include <linux/types.h>
#include <linux/perf_event.h>
/*
* prctl(PR_TASK_PERF_EVENTS_DISABLE) will (cheaply) disable all
* counters in the current task.
*/
#define PR_TASK_PERF_EVENTS_DISABLE 31
#define PR_TASK_PERF_EVENTS_ENABLE 32
extern bool test_attr__enabled;
void test_attr__init(void);
void test_attr__open(struct perf_event_attr *attr, pid_t pid, int cpu,
int fd, int group_fd, unsigned long flags);
#define HAVE_ATTR_TEST
#include "perf-sys.h"
#ifndef NSEC_PER_SEC
# define NSEC_PER_SEC 1000000000ULL
......@@ -193,67 +29,8 @@ static inline unsigned long long rdclock(void)
return ts.tv_sec * 1000000000ULL + ts.tv_nsec;
}
/*
* Pick up some kernel type conventions:
*/
#define __user
#define asmlinkage
#define unlikely(x) __builtin_expect(!!(x), 0)
#define min(x, y) ({ \
typeof(x) _min1 = (x); \
typeof(y) _min2 = (y); \
(void) (&_min1 == &_min2); \
_min1 < _min2 ? _min1 : _min2; })
extern bool test_attr__enabled;
void test_attr__init(void);
void test_attr__open(struct perf_event_attr *attr, pid_t pid, int cpu,
int fd, int group_fd, unsigned long flags);
static inline int
sys_perf_event_open(struct perf_event_attr *attr,
pid_t pid, int cpu, int group_fd,
unsigned long flags)
{
int fd;
fd = syscall(__NR_perf_event_open, attr, pid, cpu,
group_fd, flags);
if (unlikely(test_attr__enabled))
test_attr__open(attr, pid, cpu, fd, group_fd, flags);
return fd;
}
#define MAX_COUNTERS 256
#define MAX_NR_CPUS 256
struct ip_callchain {
u64 nr;
u64 ips[0];
};
struct branch_flags {
u64 mispred:1;
u64 predicted:1;
u64 in_tx:1;
u64 abort:1;
u64 reserved:60;
};
struct branch_entry {
u64 from;
u64 to;
struct branch_flags flags;
};
struct branch_stack {
u64 nr;
struct branch_entry entries[0];
};
extern const char *input_name;
extern bool perf_host, perf_guest;
extern const char perf_version_string[];
......@@ -262,13 +39,6 @@ void pthread__unblock_sigwinch(void);
#include "util/target.h"
enum perf_call_graph_mode {
CALLCHAIN_NONE,
CALLCHAIN_FP,
CALLCHAIN_DWARF,
CALLCHAIN_MAX
};
struct record_opts {
struct target target;
int call_graph;
......
/*
* The struct perf_event_attr test support.
*
......@@ -19,14 +18,8 @@
* permissions. All the event text files are stored there.
*/
/*
* Powerpc needs __SANE_USERSPACE_TYPES__ before <linux/types.h> to select
* 'int-ll64.h' and avoid compile warnings when printing __u64 with %llu.
*/
#define __SANE_USERSPACE_TYPES__
#include <stdlib.h>
#include <stdio.h>
#include <inttypes.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include "../perf.h"
......
......@@ -115,7 +115,7 @@ static struct test {
.desc = "Test parsing with no sample_id_all bit set",
.func = test__parse_no_sample_id_all,
},
#if defined(__x86_64__) || defined(__i386__)
#if defined(__x86_64__) || defined(__i386__) || defined(__arm__)
#ifdef HAVE_DWARF_UNWIND_SUPPORT
{
.desc = "Test dwarf unwind",
......@@ -123,6 +123,22 @@ static struct test {
},
#endif
#endif
{
.desc = "Test filtering hist entries",
.func = test__hists_filter,
},
{
.desc = "Test mmap thread lookup",
.func = test__mmap_thread_lookup,
},
{
.desc = "Test thread mg sharing",
.func = test__thread_mg_share,
},
{
.desc = "Test output sorting of hist entries",
.func = test__hists_output,
},
{
.func = NULL,
},
......
#include <sys/types.h>
#include <linux/types.h>
#include <stdlib.h>
#include <unistd.h>
#include <stdio.h>
#include <inttypes.h>
#include <ctype.h>
#include <string.h>
......@@ -257,7 +256,7 @@ static int process_sample_event(struct machine *machine,
return -1;
}
thread = machine__findnew_thread(machine, sample.pid, sample.pid);
thread = machine__findnew_thread(machine, sample.pid, sample.tid);
if (!thread) {
pr_debug("machine__findnew_thread failed\n");
return -1;
......
#include "util.h"
#include <stdlib.h>
#include <sys/types.h>
#include <linux/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <string.h>
......
#include <linux/compiler.h>
#include <sys/types.h>
#include <linux/types.h>
#include <unistd.h>
#include "tests.h"
#include "debug.h"
......
......@@ -74,9 +74,6 @@ int test__perf_evsel__tp_sched_test(void)
if (perf_evsel__test_field(evsel, "prio", 4, true))
ret = -1;
if (perf_evsel__test_field(evsel, "success", 4, true))
ret = -1;
if (perf_evsel__test_field(evsel, "target_cpu", 4, true))
ret = -1;
......
#include "perf.h"
#include "util/debug.h"
#include "util/symbol.h"
#include "util/sort.h"
#include "util/evsel.h"
#include "util/evlist.h"
#include "util/machine.h"
#include "util/thread.h"
#include "tests/hists_common.h"
static struct {
u32 pid;
const char *comm;
} fake_threads[] = {
{ 100, "perf" },
{ 200, "perf" },
{ 300, "bash" },
};
static struct {
u32 pid;
u64 start;
const char *filename;
} fake_mmap_info[] = {
{ 100, 0x40000, "perf" },
{ 100, 0x50000, "libc" },
{ 100, 0xf0000, "[kernel]" },
{ 200, 0x40000, "perf" },
{ 200, 0x50000, "libc" },
{ 200, 0xf0000, "[kernel]" },
{ 300, 0x40000, "bash" },
{ 300, 0x50000, "libc" },
{ 300, 0xf0000, "[kernel]" },
};
struct fake_sym {
u64 start;
u64 length;
const char *name;
};
static struct fake_sym perf_syms[] = {
{ 700, 100, "main" },
{ 800, 100, "run_command" },
{ 900, 100, "cmd_record" },
};
static struct fake_sym bash_syms[] = {
{ 700, 100, "main" },
{ 800, 100, "xmalloc" },
{ 900, 100, "xfree" },
};
static struct fake_sym libc_syms[] = {
{ 700, 100, "malloc" },
{ 800, 100, "free" },
{ 900, 100, "realloc" },
};
static struct fake_sym kernel_syms[] = {
{ 700, 100, "schedule" },
{ 800, 100, "page_fault" },
{ 900, 100, "sys_perf_event_open" },
};
static struct {
const char *dso_name;
struct fake_sym *syms;
size_t nr_syms;
} fake_symbols[] = {
{ "perf", perf_syms, ARRAY_SIZE(perf_syms) },
{ "bash", bash_syms, ARRAY_SIZE(bash_syms) },
{ "libc", libc_syms, ARRAY_SIZE(libc_syms) },
{ "[kernel]", kernel_syms, ARRAY_SIZE(kernel_syms) },
};
struct machine *setup_fake_machine(struct machines *machines)
{
struct machine *machine = machines__find(machines, HOST_KERNEL_ID);
size_t i;
if (machine == NULL) {
pr_debug("Not enough memory for machine setup\n");
return NULL;
}
for (i = 0; i < ARRAY_SIZE(fake_threads); i++) {
struct thread *thread;
thread = machine__findnew_thread(machine, fake_threads[i].pid,
fake_threads[i].pid);
if (thread == NULL)
goto out;
thread__set_comm(thread, fake_threads[i].comm, 0);
}
for (i = 0; i < ARRAY_SIZE(fake_mmap_info); i++) {
union perf_event fake_mmap_event = {
.mmap = {
.header = { .misc = PERF_RECORD_MISC_USER, },
.pid = fake_mmap_info[i].pid,
.tid = fake_mmap_info[i].pid,
.start = fake_mmap_info[i].start,
.len = 0x1000ULL,
.pgoff = 0ULL,
},
};
strcpy(fake_mmap_event.mmap.filename,
fake_mmap_info[i].filename);
machine__process_mmap_event(machine, &fake_mmap_event, NULL);
}
for (i = 0; i < ARRAY_SIZE(fake_symbols); i++) {
size_t k;
struct dso *dso;
dso = __dsos__findnew(&machine->user_dsos,
fake_symbols[i].dso_name);
if (dso == NULL)
goto out;
/* emulate dso__load() */
dso__set_loaded(dso, MAP__FUNCTION);
for (k = 0; k < fake_symbols[i].nr_syms; k++) {
struct symbol *sym;
struct fake_sym *fsym = &fake_symbols[i].syms[k];
sym = symbol__new(fsym->start, fsym->length,
STB_GLOBAL, fsym->name);
if (sym == NULL)
goto out;
symbols__insert(&dso->symbols[MAP__FUNCTION], sym);
}
}
return machine;
out:
pr_debug("Not enough memory for machine setup\n");
machine__delete_threads(machine);
machine__delete(machine);
return NULL;
}
void print_hists_in(struct hists *hists)
{
int i = 0;
struct rb_root *root;
struct rb_node *node;
if (sort__need_collapse)
root = &hists->entries_collapsed;
else
root = hists->entries_in;
pr_info("----- %s --------\n", __func__);
node = rb_first(root);
while (node) {
struct hist_entry *he;
he = rb_entry(node, struct hist_entry, rb_node_in);
if (!he->filtered) {
pr_info("%2d: entry: %-8s [%-8s] %20s: period = %"PRIu64"\n",
i, thread__comm_str(he->thread),
he->ms.map->dso->short_name,
he->ms.sym->name, he->stat.period);
}
i++;
node = rb_next(node);
}
}
void print_hists_out(struct hists *hists)
{
int i = 0;
struct rb_root *root;
struct rb_node *node;
root = &hists->entries;
pr_info("----- %s --------\n", __func__);
node = rb_first(root);
while (node) {
struct hist_entry *he;
he = rb_entry(node, struct hist_entry, rb_node);
if (!he->filtered) {
pr_info("%2d: entry: %8s:%5d [%-8s] %20s: period = %"PRIu64"\n",
i, thread__comm_str(he->thread), he->thread->tid,
he->ms.map->dso->short_name,
he->ms.sym->name, he->stat.period);
}
i++;
node = rb_next(node);
}
}
#ifndef __PERF_TESTS__HISTS_COMMON_H__
#define __PERF_TESTS__HISTS_COMMON_H__
struct machine;
struct machines;
/*
* The setup_fake_machine() provides a test environment which consists
* of 3 processes that have 3 mappings and in turn, have 3 symbols
* respectively. See below table:
*
* Command: Pid Shared Object Symbol
* ............. ............. ...................
* perf: 100 perf main
* perf: 100 perf run_command
* perf: 100 perf comd_record
* perf: 100 libc malloc
* perf: 100 libc free
* perf: 100 libc realloc
* perf: 100 [kernel] schedule
* perf: 100 [kernel] page_fault
* perf: 100 [kernel] sys_perf_event_open
* perf: 200 perf main
* perf: 200 perf run_command
* perf: 200 perf comd_record
* perf: 200 libc malloc
* perf: 200 libc free
* perf: 200 libc realloc
* perf: 200 [kernel] schedule
* perf: 200 [kernel] page_fault
* perf: 200 [kernel] sys_perf_event_open
* bash: 300 bash main
* bash: 300 bash xmalloc
* bash: 300 bash xfree
* bash: 300 libc malloc
* bash: 300 libc free
* bash: 300 libc realloc
* bash: 300 [kernel] schedule
* bash: 300 [kernel] page_fault
* bash: 300 [kernel] sys_perf_event_open
*/
struct machine *setup_fake_machine(struct machines *machines);
void print_hists_in(struct hists *hists);
void print_hists_out(struct hists *hists);
#endif /* __PERF_TESTS__HISTS_COMMON_H__ */
#include "perf.h"
#include "util/debug.h"
#include "util/symbol.h"
#include "util/sort.h"
#include "util/evsel.h"
#include "util/evlist.h"
#include "util/machine.h"
#include "util/thread.h"
#include "util/parse-events.h"
#include "tests/tests.h"
#include "tests/hists_common.h"
struct sample {
u32 pid;
u64 ip;
struct thread *thread;
struct map *map;
struct symbol *sym;
};
/* For the numbers, see hists_common.c */
static struct sample fake_samples[] = {
/* perf [kernel] schedule() */
{ .pid = 100, .ip = 0xf0000 + 700, },
/* perf [perf] main() */
{ .pid = 100, .ip = 0x40000 + 700, },
/* perf [libc] malloc() */
{ .pid = 100, .ip = 0x50000 + 700, },
/* perf [perf] main() */
{ .pid = 200, .ip = 0x40000 + 700, }, /* will be merged */
/* perf [perf] cmd_record() */
{ .pid = 200, .ip = 0x40000 + 900, },
/* perf [kernel] page_fault() */
{ .pid = 200, .ip = 0xf0000 + 800, },
/* bash [bash] main() */
{ .pid = 300, .ip = 0x40000 + 700, },
/* bash [bash] xmalloc() */
{ .pid = 300, .ip = 0x40000 + 800, },
/* bash [libc] malloc() */
{ .pid = 300, .ip = 0x50000 + 700, },
/* bash [kernel] page_fault() */
{ .pid = 300, .ip = 0xf0000 + 800, },
};
static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
{
struct perf_evsel *evsel;
struct addr_location al;
struct hist_entry *he;
struct perf_sample sample = { .cpu = 0, };
size_t i;
/*
* each evsel will have 10 samples but the 4th sample
* (perf [perf] main) will be collapsed to an existing entry
* so total 9 entries will be in the tree.
*/
evlist__for_each(evlist, evsel) {
for (i = 0; i < ARRAY_SIZE(fake_samples); i++) {
const union perf_event event = {
.header = {
.misc = PERF_RECORD_MISC_USER,
},
};
/* make sure it has no filter at first */
evsel->hists.thread_filter = NULL;
evsel->hists.dso_filter = NULL;
evsel->hists.symbol_filter_str = NULL;
sample.pid = fake_samples[i].pid;
sample.tid = fake_samples[i].pid;
sample.ip = fake_samples[i].ip;
if (perf_event__preprocess_sample(&event, machine, &al,
&sample) < 0)
goto out;
he = __hists__add_entry(&evsel->hists, &al, NULL,
NULL, NULL, 100, 1, 0);
if (he == NULL)
goto out;
fake_samples[i].thread = al.thread;
fake_samples[i].map = al.map;
fake_samples[i].sym = al.sym;
hists__inc_nr_events(he->hists, PERF_RECORD_SAMPLE);
if (!he->filtered)
he->hists->stats.nr_non_filtered_samples++;
}
}
return 0;
out:
pr_debug("Not enough memory for adding a hist entry\n");
return TEST_FAIL;
}
int test__hists_filter(void)
{
int err = TEST_FAIL;
struct machines machines;
struct machine *machine;
struct perf_evsel *evsel;
struct perf_evlist *evlist = perf_evlist__new();
TEST_ASSERT_VAL("No memory", evlist);
err = parse_events(evlist, "cpu-clock");
if (err)
goto out;
err = parse_events(evlist, "task-clock");
if (err)
goto out;
/* default sort order (comm,dso,sym) will be used */
if (setup_sorting() < 0)
goto out;
machines__init(&machines);
/* setup threads/dso/map/symbols also */
machine = setup_fake_machine(&machines);
if (!machine)
goto out;
if (verbose > 1)
machine__fprintf(machine, stderr);
/* process sample events */
err = add_hist_entries(evlist, machine);
if (err < 0)
goto out;
evlist__for_each(evlist, evsel) {
struct hists *hists = &evsel->hists;
hists__collapse_resort(hists, NULL);
hists__output_resort(hists);
if (verbose > 2) {
pr_info("Normal histogram\n");
print_hists_out(hists);
}
TEST_ASSERT_VAL("Invalid nr samples",
hists->stats.nr_events[PERF_RECORD_SAMPLE] == 10);
TEST_ASSERT_VAL("Invalid nr hist entries",
hists->nr_entries == 9);
TEST_ASSERT_VAL("Invalid total period",
hists->stats.total_period == 1000);
TEST_ASSERT_VAL("Unmatched nr samples",
hists->stats.nr_events[PERF_RECORD_SAMPLE] ==
hists->stats.nr_non_filtered_samples);
TEST_ASSERT_VAL("Unmatched nr hist entries",
hists->nr_entries == hists->nr_non_filtered_entries);
TEST_ASSERT_VAL("Unmatched total period",
hists->stats.total_period ==
hists->stats.total_non_filtered_period);
/* now applying thread filter for 'bash' */
evsel->hists.thread_filter = fake_samples[9].thread;
hists__filter_by_thread(hists);
if (verbose > 2) {
pr_info("Histogram for thread filter\n");
print_hists_out(hists);
}
/* normal stats should be invariant */
TEST_ASSERT_VAL("Invalid nr samples",
hists->stats.nr_events[PERF_RECORD_SAMPLE] == 10);
TEST_ASSERT_VAL("Invalid nr hist entries",
hists->nr_entries == 9);
TEST_ASSERT_VAL("Invalid total period",
hists->stats.total_period == 1000);
/* but filter stats are changed */
TEST_ASSERT_VAL("Unmatched nr samples for thread filter",
hists->stats.nr_non_filtered_samples == 4);
TEST_ASSERT_VAL("Unmatched nr hist entries for thread filter",
hists->nr_non_filtered_entries == 4);
TEST_ASSERT_VAL("Unmatched total period for thread filter",
hists->stats.total_non_filtered_period == 400);
/* remove thread filter first */
evsel->hists.thread_filter = NULL;
hists__filter_by_thread(hists);
/* now applying dso filter for 'kernel' */
evsel->hists.dso_filter = fake_samples[0].map->dso;
hists__filter_by_dso(hists);
if (verbose > 2) {
pr_info("Histogram for dso filter\n");
print_hists_out(hists);
}
/* normal stats should be invariant */
TEST_ASSERT_VAL("Invalid nr samples",
hists->stats.nr_events[PERF_RECORD_SAMPLE] == 10);
TEST_ASSERT_VAL("Invalid nr hist entries",
hists->nr_entries == 9);
TEST_ASSERT_VAL("Invalid total period",
hists->stats.total_period == 1000);
/* but filter stats are changed */
TEST_ASSERT_VAL("Unmatched nr samples for dso filter",
hists->stats.nr_non_filtered_samples == 3);
TEST_ASSERT_VAL("Unmatched nr hist entries for dso filter",
hists->nr_non_filtered_entries == 3);
TEST_ASSERT_VAL("Unmatched total period for dso filter",
hists->stats.total_non_filtered_period == 300);
/* remove dso filter first */
evsel->hists.dso_filter = NULL;
hists__filter_by_dso(hists);
/*
* now applying symbol filter for 'main'. Also note that
* there's 3 samples that have 'main' symbol but the 4th
* entry of fake_samples was collapsed already so it won't
* be counted as a separate entry but the sample count and
* total period will be remained.
*/
evsel->hists.symbol_filter_str = "main";
hists__filter_by_symbol(hists);
if (verbose > 2) {
pr_info("Histogram for symbol filter\n");
print_hists_out(hists);
}
/* normal stats should be invariant */
TEST_ASSERT_VAL("Invalid nr samples",
hists->stats.nr_events[PERF_RECORD_SAMPLE] == 10);
TEST_ASSERT_VAL("Invalid nr hist entries",
hists->nr_entries == 9);
TEST_ASSERT_VAL("Invalid total period",
hists->stats.total_period == 1000);
/* but filter stats are changed */
TEST_ASSERT_VAL("Unmatched nr samples for symbol filter",
hists->stats.nr_non_filtered_samples == 3);
TEST_ASSERT_VAL("Unmatched nr hist entries for symbol filter",
hists->nr_non_filtered_entries == 2);
TEST_ASSERT_VAL("Unmatched total period for symbol filter",
hists->stats.total_non_filtered_period == 300);
/* now applying all filters at once. */
evsel->hists.thread_filter = fake_samples[1].thread;
evsel->hists.dso_filter = fake_samples[1].map->dso;
hists__filter_by_thread(hists);
hists__filter_by_dso(hists);
if (verbose > 2) {
pr_info("Histogram for all filters\n");
print_hists_out(hists);
}
/* normal stats should be invariant */
TEST_ASSERT_VAL("Invalid nr samples",
hists->stats.nr_events[PERF_RECORD_SAMPLE] == 10);
TEST_ASSERT_VAL("Invalid nr hist entries",
hists->nr_entries == 9);
TEST_ASSERT_VAL("Invalid total period",
hists->stats.total_period == 1000);
/* but filter stats are changed */
TEST_ASSERT_VAL("Unmatched nr samples for all filter",
hists->stats.nr_non_filtered_samples == 2);
TEST_ASSERT_VAL("Unmatched nr hist entries for all filter",
hists->nr_non_filtered_entries == 1);
TEST_ASSERT_VAL("Unmatched total period for all filter",
hists->stats.total_non_filtered_period == 200);
}
err = TEST_OK;
out:
/* tear down everything */
perf_evlist__delete(evlist);
reset_output_field();
machines__exit(&machines);
return err;
}
......@@ -8,145 +8,7 @@
#include "machine.h"
#include "thread.h"
#include "parse-events.h"
static struct {
u32 pid;
const char *comm;
} fake_threads[] = {
{ 100, "perf" },
{ 200, "perf" },
{ 300, "bash" },
};
static struct {
u32 pid;
u64 start;
const char *filename;
} fake_mmap_info[] = {
{ 100, 0x40000, "perf" },
{ 100, 0x50000, "libc" },
{ 100, 0xf0000, "[kernel]" },
{ 200, 0x40000, "perf" },
{ 200, 0x50000, "libc" },
{ 200, 0xf0000, "[kernel]" },
{ 300, 0x40000, "bash" },
{ 300, 0x50000, "libc" },
{ 300, 0xf0000, "[kernel]" },
};
struct fake_sym {
u64 start;
u64 length;
const char *name;
};
static struct fake_sym perf_syms[] = {
{ 700, 100, "main" },
{ 800, 100, "run_command" },
{ 900, 100, "cmd_record" },
};
static struct fake_sym bash_syms[] = {
{ 700, 100, "main" },
{ 800, 100, "xmalloc" },
{ 900, 100, "xfree" },
};
static struct fake_sym libc_syms[] = {
{ 700, 100, "malloc" },
{ 800, 100, "free" },
{ 900, 100, "realloc" },
};
static struct fake_sym kernel_syms[] = {
{ 700, 100, "schedule" },
{ 800, 100, "page_fault" },
{ 900, 100, "sys_perf_event_open" },
};
static struct {
const char *dso_name;
struct fake_sym *syms;
size_t nr_syms;
} fake_symbols[] = {
{ "perf", perf_syms, ARRAY_SIZE(perf_syms) },
{ "bash", bash_syms, ARRAY_SIZE(bash_syms) },
{ "libc", libc_syms, ARRAY_SIZE(libc_syms) },
{ "[kernel]", kernel_syms, ARRAY_SIZE(kernel_syms) },
};
static struct machine *setup_fake_machine(struct machines *machines)
{
struct machine *machine = machines__find(machines, HOST_KERNEL_ID);
size_t i;
if (machine == NULL) {
pr_debug("Not enough memory for machine setup\n");
return NULL;
}
for (i = 0; i < ARRAY_SIZE(fake_threads); i++) {
struct thread *thread;
thread = machine__findnew_thread(machine, fake_threads[i].pid,
fake_threads[i].pid);
if (thread == NULL)
goto out;
thread__set_comm(thread, fake_threads[i].comm, 0);
}
for (i = 0; i < ARRAY_SIZE(fake_mmap_info); i++) {
union perf_event fake_mmap_event = {
.mmap = {
.header = { .misc = PERF_RECORD_MISC_USER, },
.pid = fake_mmap_info[i].pid,
.tid = fake_mmap_info[i].pid,
.start = fake_mmap_info[i].start,
.len = 0x1000ULL,
.pgoff = 0ULL,
},
};
strcpy(fake_mmap_event.mmap.filename,
fake_mmap_info[i].filename);
machine__process_mmap_event(machine, &fake_mmap_event, NULL);
}
for (i = 0; i < ARRAY_SIZE(fake_symbols); i++) {
size_t k;
struct dso *dso;
dso = __dsos__findnew(&machine->user_dsos,
fake_symbols[i].dso_name);
if (dso == NULL)
goto out;
/* emulate dso__load() */
dso__set_loaded(dso, MAP__FUNCTION);
for (k = 0; k < fake_symbols[i].nr_syms; k++) {
struct symbol *sym;
struct fake_sym *fsym = &fake_symbols[i].syms[k];
sym = symbol__new(fsym->start, fsym->length,
STB_GLOBAL, fsym->name);
if (sym == NULL)
goto out;
symbols__insert(&dso->symbols[MAP__FUNCTION], sym);
}
}
return machine;
out:
pr_debug("Not enough memory for machine setup\n");
machine__delete_threads(machine);
machine__delete(machine);
return NULL;
}
#include "hists_common.h"
struct sample {
u32 pid;
......@@ -156,6 +18,7 @@ struct sample {
struct symbol *sym;
};
/* For the numbers, see hists_common.c */
static struct sample fake_common_samples[] = {
/* perf [kernel] schedule() */
{ .pid = 100, .ip = 0xf0000 + 700, },
......@@ -218,6 +81,7 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
};
sample.pid = fake_common_samples[k].pid;
sample.tid = fake_common_samples[k].pid;
sample.ip = fake_common_samples[k].ip;
if (perf_event__preprocess_sample(&event, machine, &al,
&sample) < 0)
......@@ -241,6 +105,7 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
};
sample.pid = fake_samples[i][k].pid;
sample.tid = fake_samples[i][k].pid;
sample.ip = fake_samples[i][k].ip;
if (perf_event__preprocess_sample(&event, machine, &al,
&sample) < 0)
......@@ -403,33 +268,6 @@ static int validate_link(struct hists *leader, struct hists *other)
return __validate_link(leader, 0) || __validate_link(other, 1);
}
static void print_hists(struct hists *hists)
{
int i = 0;
struct rb_root *root;
struct rb_node *node;
if (sort__need_collapse)
root = &hists->entries_collapsed;
else
root = hists->entries_in;
pr_info("----- %s --------\n", __func__);
node = rb_first(root);
while (node) {
struct hist_entry *he;
he = rb_entry(node, struct hist_entry, rb_node_in);
pr_info("%2d: entry: %-8s [%-8s] %20s: period = %"PRIu64"\n",
i, thread__comm_str(he->thread), he->ms.map->dso->short_name,
he->ms.sym->name, he->stat.period);
i++;
node = rb_next(node);
}
}
int test__hists_link(void)
{
int err = -1;
......@@ -471,7 +309,7 @@ int test__hists_link(void)
hists__collapse_resort(&evsel->hists, NULL);
if (verbose > 2)
print_hists(&evsel->hists);
print_hists_in(&evsel->hists);
}
first = perf_evlist__first(evlist);
......@@ -494,6 +332,7 @@ int test__hists_link(void)
out:
/* tear down everything */
perf_evlist__delete(evlist);
reset_output_field();
machines__exit(&machines);
return err;
......
此差异已折叠。
#include <sys/types.h>
#include <linux/types.h>
#include <unistd.h>
#include <sys/prctl.h>
......
#include <unistd.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <sys/mman.h>
#include <pthread.h>
#include <stdlib.h>
#include <stdio.h>
#include "debug.h"
#include "tests.h"
#include "machine.h"
#include "thread_map.h"
#include "symbol.h"
#include "thread.h"
#define THREADS 4
static int go_away;
struct thread_data {
pthread_t pt;
pid_t tid;
void *map;
int ready[2];
};
static struct thread_data threads[THREADS];
static int thread_init(struct thread_data *td)
{
void *map;
map = mmap(NULL, page_size,
PROT_READ|PROT_WRITE|PROT_EXEC,
MAP_SHARED|MAP_ANONYMOUS, -1, 0);
if (map == MAP_FAILED) {
perror("mmap failed");
return -1;
}
td->map = map;
td->tid = syscall(SYS_gettid);
pr_debug("tid = %d, map = %p\n", td->tid, map);
return 0;
}
static void *thread_fn(void *arg)
{
struct thread_data *td = arg;
ssize_t ret;
int go;
if (thread_init(td))
return NULL;
/* Signal thread_create thread is initialized. */
ret = write(td->ready[1], &go, sizeof(int));
if (ret != sizeof(int)) {
pr_err("failed to notify\n");
return NULL;
}
while (!go_away) {
/* Waiting for main thread to kill us. */
usleep(100);
}
munmap(td->map, page_size);
return NULL;
}
static int thread_create(int i)
{
struct thread_data *td = &threads[i];
int err, go;
if (pipe(td->ready))
return -1;
err = pthread_create(&td->pt, NULL, thread_fn, td);
if (!err) {
/* Wait for thread initialization. */
ssize_t ret = read(td->ready[0], &go, sizeof(int));
err = ret != sizeof(int);
}
close(td->ready[0]);
close(td->ready[1]);
return err;
}
static int threads_create(void)
{
struct thread_data *td0 = &threads[0];
int i, err = 0;
go_away = 0;
/* 0 is main thread */
if (thread_init(td0))
return -1;
for (i = 1; !err && i < THREADS; i++)
err = thread_create(i);
return err;
}
static int threads_destroy(void)
{
struct thread_data *td0 = &threads[0];
int i, err = 0;
/* cleanup the main thread */
munmap(td0->map, page_size);
go_away = 1;
for (i = 1; !err && i < THREADS; i++)
err = pthread_join(threads[i].pt, NULL);
return err;
}
typedef int (*synth_cb)(struct machine *machine);
static int synth_all(struct machine *machine)
{
return perf_event__synthesize_threads(NULL,
perf_event__process,
machine, 0);
}
static int synth_process(struct machine *machine)
{
struct thread_map *map;
int err;
map = thread_map__new_by_pid(getpid());
err = perf_event__synthesize_thread_map(NULL, map,
perf_event__process,
machine, 0);
thread_map__delete(map);
return err;
}
static int mmap_events(synth_cb synth)
{
struct machines machines;
struct machine *machine;
int err, i;
/*
* The threads_create will not return before all threads
* are spawned and all created memory map.
*
* They will loop until threads_destroy is called, so we
* can safely run synthesizing function.
*/
TEST_ASSERT_VAL("failed to create threads", !threads_create());
machines__init(&machines);
machine = &machines.host;
dump_trace = verbose > 1 ? 1 : 0;
err = synth(machine);
dump_trace = 0;
TEST_ASSERT_VAL("failed to destroy threads", !threads_destroy());
TEST_ASSERT_VAL("failed to synthesize maps", !err);
/*
* All data is synthesized, try to find map for each
* thread object.
*/
for (i = 0; i < THREADS; i++) {
struct thread_data *td = &threads[i];
struct addr_location al;
struct thread *thread;
thread = machine__findnew_thread(machine, getpid(), td->tid);
pr_debug("looking for map %p\n", td->map);
thread__find_addr_map(thread, machine,
PERF_RECORD_MISC_USER, MAP__FUNCTION,
(unsigned long) (td->map + 1), &al);
if (!al.map) {
pr_debug("failed, couldn't find map\n");
err = -1;
break;
}
pr_debug("map %p, addr %" PRIx64 "\n", al.map, al.map->start);
}
machine__delete_threads(machine);
machines__exit(&machines);
return err;
}
/*
* This test creates 'THREADS' number of threads (including
* main thread) and each thread creates memory map.
*
* When threads are created, we synthesize them with both
* (separate tests):
* perf_event__synthesize_thread_map (process based)
* perf_event__synthesize_threads (global)
*
* We test we can find all memory maps via:
* thread__find_addr_map
*
* by using all thread objects.
*/
int test__mmap_thread_lookup(void)
{
/* perf_event__synthesize_threads synthesize */
TEST_ASSERT_VAL("failed with sythesizing all",
!mmap_events(synth_all));
/* perf_event__synthesize_thread_map synthesize */
TEST_ASSERT_VAL("failed with sythesizing process",
!mmap_events(synth_process));
return 0;
}
此差异已折叠。
#include <sys/types.h>
#include <linux/types.h>
#include <stddef.h>
#include "tests.h"
......
#include <stdio.h>
#include <sys/types.h>
#include <unistd.h>
#include <inttypes.h>
#include <linux/types.h>
#include <sys/prctl.h>
#include "parse-events.h"
......
......@@ -2,7 +2,7 @@
#include <stdlib.h>
#include <signal.h>
#include <sys/mman.h>
#include "types.h"
#include <linux/types.h>
#include "perf.h"
#include "debug.h"
#include "tests.h"
......
#include <stdbool.h>
#include <inttypes.h>
#include <linux/types.h>
#include "util.h"
#include "event.h"
......
......@@ -41,8 +41,12 @@ int test__sample_parsing(void);
int test__keep_tracking(void);
int test__parse_no_sample_id_all(void);
int test__dwarf_unwind(void);
int test__hists_filter(void);
int test__mmap_thread_lookup(void);
int test__thread_mg_share(void);
int test__hists_output(void);
#if defined(__x86_64__) || defined(__i386__)
#if defined(__x86_64__) || defined(__i386__) || defined(__arm__)
#ifdef HAVE_DWARF_UNWIND_SUPPORT
struct thread;
struct perf_sample;
......
#include "tests.h"
#include "machine.h"
#include "thread.h"
#include "map.h"
int test__thread_mg_share(void)
{
struct machines machines;
struct machine *machine;
/* thread group */
struct thread *leader;
struct thread *t1, *t2, *t3;
struct map_groups *mg;
/* other process */
struct thread *other, *other_leader;
struct map_groups *other_mg;
/*
* This test create 2 processes abstractions (struct thread)
* with several threads and checks they properly share and
* maintain map groups info (struct map_groups).
*
* thread group (pid: 0, tids: 0, 1, 2, 3)
* other group (pid: 4, tids: 4, 5)
*/
machines__init(&machines);
machine = &machines.host;
/* create process with 4 threads */
leader = machine__findnew_thread(machine, 0, 0);
t1 = machine__findnew_thread(machine, 0, 1);
t2 = machine__findnew_thread(machine, 0, 2);
t3 = machine__findnew_thread(machine, 0, 3);
/* and create 1 separated process, without thread leader */
other = machine__findnew_thread(machine, 4, 5);
TEST_ASSERT_VAL("failed to create threads",
leader && t1 && t2 && t3 && other);
mg = leader->mg;
TEST_ASSERT_VAL("wrong refcnt", mg->refcnt == 4);
/* test the map groups pointer is shared */
TEST_ASSERT_VAL("map groups don't match", mg == t1->mg);
TEST_ASSERT_VAL("map groups don't match", mg == t2->mg);
TEST_ASSERT_VAL("map groups don't match", mg == t3->mg);
/*
* Verify the other leader was created by previous call.
* It should have shared map groups with no change in
* refcnt.
*/
other_leader = machine__find_thread(machine, 4, 4);
TEST_ASSERT_VAL("failed to find other leader", other_leader);
other_mg = other->mg;
TEST_ASSERT_VAL("wrong refcnt", other_mg->refcnt == 2);
TEST_ASSERT_VAL("map groups don't match", other_mg == other_leader->mg);
/* release thread group */
thread__delete(leader);
TEST_ASSERT_VAL("wrong refcnt", mg->refcnt == 3);
thread__delete(t1);
TEST_ASSERT_VAL("wrong refcnt", mg->refcnt == 2);
thread__delete(t2);
TEST_ASSERT_VAL("wrong refcnt", mg->refcnt == 1);
thread__delete(t3);
/* release other group */
thread__delete(other_leader);
TEST_ASSERT_VAL("wrong refcnt", other_mg->refcnt == 1);
thread__delete(other);
/*
* Cannot call machine__delete_threads(machine) now,
* because we've already released all the threads.
*/
machines__exit(&machines);
return 0;
}
#ifndef _PERF_UI_BROWSER_H_
#define _PERF_UI_BROWSER_H_ 1
#include <stdbool.h>
#include <sys/types.h>
#include "../types.h"
#include <linux/types.h>
#define HE_COLORSET_TOP 50
#define HE_COLORSET_MEDIUM 51
......
此差异已折叠。
......@@ -43,7 +43,7 @@ static int perf_gtk__hpp_color_##_type(struct perf_hpp_fmt *fmt __maybe_unused,
struct perf_hpp *hpp, \
struct hist_entry *he) \
{ \
return __hpp__fmt(hpp, he, he_get_##_field, NULL, " %6.2f%%", \
return __hpp__fmt(hpp, he, he_get_##_field, " %6.2f%%", \
__percent_color_snprintf, true); \
}
......@@ -58,8 +58,6 @@ __HPP_COLOR_PERCENT_FN(overhead_guest_us, period_guest_us)
void perf_gtk__init_hpp(void)
{
perf_hpp__init();
perf_hpp__format[PERF_HPP__OVERHEAD].color =
perf_gtk__hpp_color_overhead;
perf_hpp__format[PERF_HPP__OVERHEAD_SYS].color =
......@@ -153,7 +151,6 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists,
struct perf_hpp_fmt *fmt;
GType col_types[MAX_COLUMNS];
GtkCellRenderer *renderer;
struct sort_entry *se;
GtkTreeStore *store;
struct rb_node *nd;
GtkWidget *view;
......@@ -172,16 +169,6 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists,
perf_hpp__for_each_format(fmt)
col_types[nr_cols++] = G_TYPE_STRING;
list_for_each_entry(se, &hist_entry__sort_list, list) {
if (se->elide)
continue;
if (se == &sort_sym)
sym_col = nr_cols;
col_types[nr_cols++] = G_TYPE_STRING;
}
store = gtk_tree_store_newv(nr_cols, col_types);
view = gtk_tree_view_new();
......@@ -191,6 +178,9 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists,
col_idx = 0;
perf_hpp__for_each_format(fmt) {
if (perf_hpp__should_skip(fmt))
continue;
fmt->header(fmt, &hpp, hists_to_evsel(hists));
gtk_tree_view_insert_column_with_attributes(GTK_TREE_VIEW(view),
......@@ -199,16 +189,6 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists,
col_idx++, NULL);
}
list_for_each_entry(se, &hist_entry__sort_list, list) {
if (se->elide)
continue;
gtk_tree_view_insert_column_with_attributes(GTK_TREE_VIEW(view),
-1, se->se_header,
renderer, "text",
col_idx++, NULL);
}
for (col_idx = 0; col_idx < nr_cols; col_idx++) {
GtkTreeViewColumn *column;
......@@ -228,12 +208,15 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists,
for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) {
struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
GtkTreeIter iter;
float percent = h->stat.period * 100.0 /
hists->stats.total_period;
u64 total = hists__total_period(h->hists);
float percent = 0.0;
if (h->filtered)
continue;
if (total)
percent = h->stat.period * 100.0 / total;
if (percent < min_pcnt)
continue;
......@@ -242,6 +225,9 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists,
col_idx = 0;
perf_hpp__for_each_format(fmt) {
if (perf_hpp__should_skip(fmt))
continue;
if (fmt->color)
fmt->color(fmt, &hpp, h);
else
......@@ -250,23 +236,9 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists,
gtk_tree_store_set(store, &iter, col_idx++, s, -1);
}
list_for_each_entry(se, &hist_entry__sort_list, list) {
if (se->elide)
continue;
se->se_snprintf(h, s, ARRAY_SIZE(s),
hists__col_len(hists, se->se_width_idx));
gtk_tree_store_set(store, &iter, col_idx++, s, -1);
}
if (symbol_conf.use_callchain && sort__has_sym) {
u64 total;
if (callchain_param.mode == CHAIN_GRAPH_REL)
total = h->stat.period;
else
total = hists->stats.total_period;
perf_gtk__add_callchain(&h->sorted_chain, store, &iter,
sym_col, total);
......
此差异已折叠。
#ifndef _PERF_UI_PROGRESS_H_
#define _PERF_UI_PROGRESS_H_ 1
#include <../types.h>
#include <linux/types.h>
void ui_progress__finish(void);
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
#ifndef PERF_LINUX_MODULE_H
#define PERF_LINUX_MODULE_H
#define EXPORT_SYMBOL(name)
#endif
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册