提交 3d521f91 编写于 作者: L Linus Torvalds

Merge branch 'perf-core-for-linus' of...

Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip into next

Pull perf updates from Ingo Molnar:
 "The tooling changes maintained by Jiri Olsa until Arnaldo is on
  vacation:

  User visible changes:
   - Add -F option for specifying output fields (Namhyung Kim)
   - Propagate exit status of a command line workload for record command
     (Namhyung Kim)
   - Use tid for finding thread (Namhyung Kim)
   - Clarify the output of perf sched map plus small sched command
     fixes (Dongsheng Yang)
   - Wire up perf_regs and unwind support for ARM64 (Jean Pihet)
   - Factor hists statistics counts processing which in turn also fixes
     several bugs in TUI report command (Namhyung Kim)
   - Add --percentage option to control absolute/relative percentage
     output (Namhyung Kim)
   - Add --list-cmds to 'kmem', 'mem', 'lock' and 'sched', for use by
     completion scripts (Ramkumar Ramachandra)

  Development/infrastructure changes and fixes:
   - Android related fixes for pager and map dso resolving (Michael
     Lentine)
   - Add libdw DWARF post unwind support for ARM (Jean Pihet)
   - Consolidate types.h for ARM and ARM64 (Jean Pihet)
   - Fix possible null pointer dereference in session.c (Masanari Iida)
   - Cleanup, remove unused variables in map_switch_event() (Dongsheng
     Yang)
   - Remove nr_state_machine_bugs in perf latency (Dongsheng Yang)
   - Remove usage of trace_sched_wakeup(.success) (Peter Zijlstra)
   - Cleanups for perf.h header (Jiri Olsa)
   - Consolidate types.h and export.h within tools (Borislav Petkov)
   - Move u64_swap union to its single user's header, evsel.h (Borislav
     Petkov)
   - Fix for s390 to properly parse tracepoints plus test code
     (Alexander Yarygin)
   - Handle EINTR error for readn/writen (Namhyung Kim)
   - Add a test case for hists filtering (Namhyung Kim)
   - Share map_groups among threads of the same group (Arnaldo Carvalho
     de Melo, Jiri Olsa)
   - Making some code (cpu node map and report parse callchain callback)
     global to be usable by upcomming changes (Don Zickus)
   - Fix pmu object compilation error (Jiri Olsa)

  Kernel side changes:
   - intrusive uprobes fixes from Oleg Nesterov.  Since the interface is
     admin-only, and the bug only affects user-space ("any probed
     jmp/call can kill the application"), we queued these fixes via the
     development tree, as a special exception.
   - more fuzzer motivated race fixes and related refactoring and
     robustization.
   - allow PMU drivers to be built as modules.  (No actual module yet,
     because the x86 Intel uncore module wasn't ready in time for this)"

* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (114 commits)
  perf tools: Add automatic remapping of Android libraries
  perf tools: Add cat as fallback pager
  perf tests: Add a testcase for histogram output sorting
  perf tests: Factor out print_hists_*()
  perf tools: Introduce reset_output_field()
  perf tools: Get rid of obsolete hist_entry__sort_list
  perf hists: Reset width of output fields with header length
  perf tools: Skip elided sort entries
  perf top: Add --fields option to specify output fields
  perf report/tui: Fix a bug when --fields/sort is given
  perf tools: Add ->sort() member to struct sort_entry
  perf report: Add -F option to specify output fields
  perf tools: Call perf_hpp__init() before setting up GUI browsers
  perf tools: Consolidate management of default sort orders
  perf tools: Allow hpp fields to be sort keys
  perf ui: Get rid of callback from __hpp__fmt()
  perf tools: Consolidate output field handling to hpp format routines
  perf tools: Use hpp formats to sort final output
  perf tools: Support event grouping in hpp ->sort()
  perf tools: Use hpp formats to sort hist entries
  ...
......@@ -33,15 +33,27 @@ typedef u8 uprobe_opcode_t;
#define UPROBE_SWBP_INSN 0xcc
#define UPROBE_SWBP_INSN_SIZE 1
struct uprobe_xol_ops;
struct arch_uprobe {
u16 fixups;
union {
u8 insn[MAX_UINSN_BYTES];
u8 ixol[MAX_UINSN_BYTES];
};
u16 fixups;
const struct uprobe_xol_ops *ops;
union {
#ifdef CONFIG_X86_64
unsigned long rip_rela_target_address;
#endif
struct {
s32 offs;
u8 ilen;
u8 opc1;
} branch;
};
};
struct arch_uprobe_task {
......
......@@ -721,6 +721,7 @@ int perf_assign_events(struct perf_event **events, int n,
return sched.state.unassigned;
}
EXPORT_SYMBOL_GPL(perf_assign_events);
int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
{
......
......@@ -108,15 +108,31 @@ static u64 precise_store_data(u64 status)
return val;
}
static u64 precise_store_data_hsw(u64 status)
static u64 precise_store_data_hsw(struct perf_event *event, u64 status)
{
union perf_mem_data_src dse;
u64 cfg = event->hw.config & INTEL_ARCH_EVENT_MASK;
dse.val = 0;
dse.mem_op = PERF_MEM_OP_STORE;
dse.mem_lvl = PERF_MEM_LVL_NA;
/*
* L1 info only valid for following events:
*
* MEM_UOPS_RETIRED.STLB_MISS_STORES
* MEM_UOPS_RETIRED.LOCK_STORES
* MEM_UOPS_RETIRED.SPLIT_STORES
* MEM_UOPS_RETIRED.ALL_STORES
*/
if (cfg != 0x12d0 && cfg != 0x22d0 && cfg != 0x42d0 && cfg != 0x82d0)
return dse.mem_lvl;
if (status & 1)
dse.mem_lvl = PERF_MEM_LVL_L1;
dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
else
dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_MISS;
/* Nothing else supported. Sorry. */
return dse.val;
}
......@@ -887,7 +903,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
data.data_src.val = load_latency_data(pebs->dse);
else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW)
data.data_src.val =
precise_store_data_hsw(pebs->dse);
precise_store_data_hsw(event, pebs->dse);
else
data.data_src.val = precise_store_data(pebs->dse);
}
......
此差异已折叠。
......@@ -172,6 +172,7 @@ struct perf_event;
struct pmu {
struct list_head entry;
struct module *module;
struct device *dev;
const struct attribute_group **attr_groups;
const char *name;
......
......@@ -722,10 +722,10 @@ enum perf_callchain_context {
PERF_CONTEXT_MAX = (__u64)-4095,
};
#define PERF_FLAG_FD_NO_GROUP (1U << 0)
#define PERF_FLAG_FD_OUTPUT (1U << 1)
#define PERF_FLAG_PID_CGROUP (1U << 2) /* pid=cgroup id, per-cpu mode only */
#define PERF_FLAG_FD_CLOEXEC (1U << 3) /* O_CLOEXEC */
#define PERF_FLAG_FD_NO_GROUP (1UL << 0)
#define PERF_FLAG_FD_OUTPUT (1UL << 1)
#define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup id, per-cpu mode only */
#define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */
union perf_mem_data_src {
__u64 val;
......
......@@ -39,6 +39,7 @@
#include <linux/hw_breakpoint.h>
#include <linux/mm_types.h>
#include <linux/cgroup.h>
#include <linux/module.h>
#include "internal.h"
......@@ -1677,6 +1678,8 @@ event_sched_in(struct perf_event *event,
u64 tstamp = perf_event_time(event);
int ret = 0;
lockdep_assert_held(&ctx->lock);
if (event->state <= PERF_EVENT_STATE_OFF)
return 0;
......@@ -3244,9 +3247,13 @@ static void __free_event(struct perf_event *event)
if (event->ctx)
put_ctx(event->ctx);
if (event->pmu)
module_put(event->pmu->module);
call_rcu(&event->rcu_head, free_event_rcu);
}
static void free_event(struct perf_event *event)
static void _free_event(struct perf_event *event)
{
irq_work_sync(&event->pending);
......@@ -3267,42 +3274,31 @@ static void free_event(struct perf_event *event)
if (is_cgroup_event(event))
perf_detach_cgroup(event);
__free_event(event);
}
int perf_event_release_kernel(struct perf_event *event)
{
struct perf_event_context *ctx = event->ctx;
WARN_ON_ONCE(ctx->parent_ctx);
/*
* There are two ways this annotation is useful:
*
* 1) there is a lock recursion from perf_event_exit_task
* see the comment there.
*
* 2) there is a lock-inversion with mmap_sem through
* perf_event_read_group(), which takes faults while
* holding ctx->mutex, however this is called after
* the last filedesc died, so there is no possibility
* to trigger the AB-BA case.
/*
* Used to free events which have a known refcount of 1, such as in error paths
* where the event isn't exposed yet and inherited events.
*/
mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING);
perf_remove_from_context(event, true);
mutex_unlock(&ctx->mutex);
free_event(event);
static void free_event(struct perf_event *event)
{
if (WARN(atomic_long_cmpxchg(&event->refcount, 1, 0) != 1,
"unexpected event refcount: %ld; ptr=%p\n",
atomic_long_read(&event->refcount), event)) {
/* leak to avoid use-after-free */
return;
}
return 0;
_free_event(event);
}
EXPORT_SYMBOL_GPL(perf_event_release_kernel);
/*
* Called when the last reference to the file is gone.
*/
static void put_event(struct perf_event *event)
{
struct perf_event_context *ctx = event->ctx;
struct task_struct *owner;
if (!atomic_long_dec_and_test(&event->refcount))
......@@ -3341,8 +3337,32 @@ static void put_event(struct perf_event *event)
put_task_struct(owner);
}
perf_event_release_kernel(event);
WARN_ON_ONCE(ctx->parent_ctx);
/*
* There are two ways this annotation is useful:
*
* 1) there is a lock recursion from perf_event_exit_task
* see the comment there.
*
* 2) there is a lock-inversion with mmap_sem through
* perf_event_read_group(), which takes faults while
* holding ctx->mutex, however this is called after
* the last filedesc died, so there is no possibility
* to trigger the AB-BA case.
*/
mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING);
perf_remove_from_context(event, true);
mutex_unlock(&ctx->mutex);
_free_event(event);
}
int perf_event_release_kernel(struct perf_event *event)
{
put_event(event);
return 0;
}
EXPORT_SYMBOL_GPL(perf_event_release_kernel);
static int perf_release(struct inode *inode, struct file *file)
{
......@@ -6578,6 +6598,7 @@ int perf_pmu_register(struct pmu *pmu, const char *name, int type)
free_percpu(pmu->pmu_disable_count);
goto unlock;
}
EXPORT_SYMBOL_GPL(perf_pmu_register);
void perf_pmu_unregister(struct pmu *pmu)
{
......@@ -6599,6 +6620,7 @@ void perf_pmu_unregister(struct pmu *pmu)
put_device(pmu->dev);
free_pmu_context(pmu);
}
EXPORT_SYMBOL_GPL(perf_pmu_unregister);
struct pmu *perf_init_event(struct perf_event *event)
{
......@@ -6612,6 +6634,10 @@ struct pmu *perf_init_event(struct perf_event *event)
pmu = idr_find(&pmu_idr, event->attr.type);
rcu_read_unlock();
if (pmu) {
if (!try_module_get(pmu->module)) {
pmu = ERR_PTR(-ENODEV);
goto unlock;
}
event->pmu = pmu;
ret = pmu->event_init(event);
if (ret)
......@@ -6620,6 +6646,10 @@ struct pmu *perf_init_event(struct perf_event *event)
}
list_for_each_entry_rcu(pmu, &pmus, entry) {
if (!try_module_get(pmu->module)) {
pmu = ERR_PTR(-ENODEV);
goto unlock;
}
event->pmu = pmu;
ret = pmu->event_init(event);
if (!ret)
......@@ -6798,6 +6828,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
err_pmu:
if (event->destroy)
event->destroy(event);
module_put(pmu->module);
err_ns:
if (event->ns)
put_pid_ns(event->ns);
......@@ -7067,20 +7098,26 @@ SYSCALL_DEFINE5(perf_event_open,
}
}
if (task && group_leader &&
group_leader->attr.inherit != attr.inherit) {
err = -EINVAL;
goto err_task;
}
get_online_cpus();
event = perf_event_alloc(&attr, cpu, task, group_leader, NULL,
NULL, NULL);
if (IS_ERR(event)) {
err = PTR_ERR(event);
goto err_task;
goto err_cpus;
}
if (flags & PERF_FLAG_PID_CGROUP) {
err = perf_cgroup_connect(pid, event, &attr, group_leader);
if (err) {
__free_event(event);
goto err_task;
goto err_cpus;
}
}
......@@ -7242,8 +7279,9 @@ SYSCALL_DEFINE5(perf_event_open,
put_ctx(ctx);
err_alloc:
free_event(event);
err_task:
err_cpus:
put_online_cpus();
err_task:
if (task)
put_task_struct(task);
err_group_fd:
......@@ -7379,7 +7417,7 @@ __perf_event_exit_task(struct perf_event *child_event,
struct perf_event_context *child_ctx,
struct task_struct *child)
{
perf_remove_from_context(child_event, !!child_event->parent);
perf_remove_from_context(child_event, true);
/*
* It can happen that the parent exits first, and has events
......@@ -7394,7 +7432,7 @@ __perf_event_exit_task(struct perf_event *child_event,
static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
{
struct perf_event *child_event, *tmp;
struct perf_event *child_event;
struct perf_event_context *child_ctx;
unsigned long flags;
......@@ -7448,24 +7486,9 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
*/
mutex_lock(&child_ctx->mutex);
again:
list_for_each_entry_safe(child_event, tmp, &child_ctx->pinned_groups,
group_entry)
__perf_event_exit_task(child_event, child_ctx, child);
list_for_each_entry_safe(child_event, tmp, &child_ctx->flexible_groups,
group_entry)
list_for_each_entry_rcu(child_event, &child_ctx->event_list, event_entry)
__perf_event_exit_task(child_event, child_ctx, child);
/*
* If the last event was a group event, it will have appended all
* its siblings to the list, but we obtained 'tmp' before that which
* will still point to the list head terminating the iteration.
*/
if (!list_empty(&child_ctx->pinned_groups) ||
!list_empty(&child_ctx->flexible_groups))
goto again;
mutex_unlock(&child_ctx->mutex);
put_ctx(child_ctx);
......
......@@ -60,8 +60,6 @@ static struct percpu_rw_semaphore dup_mmap_sem;
/* Have a copy of original instruction */
#define UPROBE_COPY_INSN 0
/* Can skip singlestep */
#define UPROBE_SKIP_SSTEP 1
struct uprobe {
struct rb_node rb_node; /* node in the rb tree */
......@@ -491,12 +489,9 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset)
uprobe->offset = offset;
init_rwsem(&uprobe->register_rwsem);
init_rwsem(&uprobe->consumer_rwsem);
/* For now assume that the instruction need not be single-stepped */
__set_bit(UPROBE_SKIP_SSTEP, &uprobe->flags);
/* add to uprobes_tree, sorted on inode:offset */
cur_uprobe = insert_uprobe(uprobe);
/* a uprobe exists for this inode:offset combination */
if (cur_uprobe) {
kfree(uprobe);
......@@ -1628,20 +1623,6 @@ bool uprobe_deny_signal(void)
return true;
}
/*
* Avoid singlestepping the original instruction if the original instruction
* is a NOP or can be emulated.
*/
static bool can_skip_sstep(struct uprobe *uprobe, struct pt_regs *regs)
{
if (test_bit(UPROBE_SKIP_SSTEP, &uprobe->flags)) {
if (arch_uprobe_skip_sstep(&uprobe->arch, regs))
return true;
clear_bit(UPROBE_SKIP_SSTEP, &uprobe->flags);
}
return false;
}
static void mmf_recalc_uprobes(struct mm_struct *mm)
{
struct vm_area_struct *vma;
......@@ -1868,13 +1849,13 @@ static void handle_swbp(struct pt_regs *regs)
handler_chain(uprobe, regs);
if (can_skip_sstep(uprobe, regs))
if (arch_uprobe_skip_sstep(&uprobe->arch, regs))
goto out;
if (!pre_ssout(uprobe, regs, bp_vaddr))
return;
/* can_skip_sstep() succeeded, or restart if can't singlestep */
/* arch_uprobe_skip_sstep() succeeded, or restart if can't singlestep */
out:
put_uprobe(uprobe);
}
......@@ -1886,10 +1867,11 @@ static void handle_swbp(struct pt_regs *regs)
static void handle_singlestep(struct uprobe_task *utask, struct pt_regs *regs)
{
struct uprobe *uprobe;
int err = 0;
uprobe = utask->active_uprobe;
if (utask->state == UTASK_SSTEP_ACK)
arch_uprobe_post_xol(&uprobe->arch, regs);
err = arch_uprobe_post_xol(&uprobe->arch, regs);
else if (utask->state == UTASK_SSTEP_TRAPPED)
arch_uprobe_abort_xol(&uprobe->arch, regs);
else
......@@ -1903,6 +1885,11 @@ static void handle_singlestep(struct uprobe_task *utask, struct pt_regs *regs)
spin_lock_irq(&current->sighand->siglock);
recalc_sigpending(); /* see uprobe_deny_signal() */
spin_unlock_irq(&current->sighand->siglock);
if (unlikely(err)) {
uprobe_warn(current, "execute the probed insn, sending SIGILL.");
force_sig_info(SIGILL, SEND_SIG_FORCED, current);
}
}
/*
......
......@@ -1039,6 +1039,7 @@ int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
return ret;
}
EXPORT_SYMBOL_GPL(__hrtimer_start_range_ns);
/**
* hrtimer_start_range_ns - (re)start an hrtimer on the current CPU
......
......@@ -35,4 +35,6 @@
# define unlikely(x) __builtin_expect(!!(x), 0)
#endif
#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
#endif /* _TOOLS_LINUX_COMPILER_H */
#ifndef _TOOLS_LINUX_EXPORT_H_
#define _TOOLS_LINUX_EXPORT_H_
#define EXPORT_SYMBOL(sym)
#define EXPORT_SYMBOL_GPL(sym)
#define EXPORT_SYMBOL_GPL_FUTURE(sym)
#define EXPORT_UNUSED_SYMBOL(sym)
#define EXPORT_UNUSED_SYMBOL_GPL(sym)
#endif
#ifndef _LIBLOCKDEP_LINUX_TYPES_H_
#define _LIBLOCKDEP_LINUX_TYPES_H_
#ifndef _TOOLS_LINUX_TYPES_H_
#define _TOOLS_LINUX_TYPES_H_
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#define __SANE_USERSPACE_TYPES__ /* For PPC64, to get LL64 types */
#include <asm/types.h>
......@@ -10,10 +11,22 @@
struct page;
struct kmem_cache;
typedef unsigned gfp_t;
typedef enum {
GFP_KERNEL,
GFP_ATOMIC,
__GFP_HIGHMEM,
__GFP_HIGH
} gfp_t;
typedef __u64 u64;
typedef __s64 s64;
/*
* We define u64 as uint64_t for every architecture
* so that we can print it with "%"PRIx64 without getting warnings.
*
* typedef __u64 u64;
* typedef __s64 s64;
*/
typedef uint64_t u64;
typedef int64_t s64;
typedef __u32 u32;
typedef __s32 s32;
......@@ -35,6 +48,10 @@ typedef __s8 s8;
#define __bitwise
#endif
#define __force
#define __user
#define __must_check
#define __cold
typedef __u16 __bitwise __le16;
typedef __u16 __bitwise __be16;
......@@ -55,4 +72,4 @@ struct hlist_node {
struct hlist_node *next, **pprev;
};
#endif
#endif /* _TOOLS_LINUX_TYPES_H_ */
......@@ -104,7 +104,7 @@ N =
export Q VERBOSE
INCLUDES = -I. -I/usr/local/include -I./uinclude -I./include $(CONFIG_INCLUDES)
INCLUDES = -I. -I/usr/local/include -I./uinclude -I./include -I../../include $(CONFIG_INCLUDES)
# Set compile option CFLAGS if not set elsewhere
CFLAGS ?= -g -DCONFIG_LOCKDEP -DCONFIG_STACKTRACE -DCONFIG_PROVE_LOCKING -DBITS_PER_LONG=__WORDSIZE -DLIBLOCKDEP_VERSION='"$(LIBLOCKDEP_VERSION)"' -rdynamic -O0 -g
......
#ifndef _LIBLOCKDEP_LINUX_EXPORT_H_
#define _LIBLOCKDEP_LINUX_EXPORT_H_
#define EXPORT_SYMBOL(sym)
#define EXPORT_SYMBOL_GPL(sym)
#endif
......@@ -33,21 +33,25 @@ OPTIONS
-d::
--dsos=::
Only consider symbols in these dsos. CSV that understands
file://filename entries.
file://filename entries. This option will affect the percentage
of the Baseline/Delta column. See --percentage for more info.
-C::
--comms=::
Only consider symbols in these comms. CSV that understands
file://filename entries.
file://filename entries. This option will affect the percentage
of the Baseline/Delta column. See --percentage for more info.
-S::
--symbols=::
Only consider these symbols. CSV that understands
file://filename entries.
file://filename entries. This option will affect the percentage
of the Baseline/Delta column. See --percentage for more info.
-s::
--sort=::
Sort by key(s): pid, comm, dso, symbol.
Sort by key(s): pid, comm, dso, symbol, cpu, parent, srcline.
Please see description of --sort in the perf-report man page.
-t::
--field-separator=::
......@@ -89,6 +93,14 @@ OPTIONS
--order::
Specify compute sorting column number.
--percentage::
Determine how to display the overhead percentage of filtered entries.
Filters can be applied by --comms, --dsos and/or --symbols options.
"relative" means it's relative to filtered entries only so that the
sum of shown entries will be always 100%. "absolute" means it retains
the original value before and after the filter is applied.
COMPARISON
----------
The comparison is governed by the baseline file. The baseline perf.data
......@@ -157,6 +169,10 @@ with:
- period_percent being the % of the hist entry period value within
single data file
- with filtering by -C, -d and/or -S, period_percent might be changed
relative to how entries are filtered. Use --percentage=absolute to
prevent such fluctuation.
ratio
~~~~~
If specified the 'Ratio' column is displayed with value 'r' computed as:
......@@ -187,4 +203,4 @@ If specified the 'Weighted diff' column is displayed with value 'd' computed as:
SEE ALSO
--------
linkperf:perf-record[1]
linkperf:perf-record[1], linkperf:perf-report[1]
......@@ -25,10 +25,6 @@ OPTIONS
--verbose::
Be more verbose. (show symbol address, etc)
-d::
--dsos=::
Only consider symbols in these dsos. CSV that understands
file://filename entries.
-n::
--show-nr-samples::
Show the number of samples for each symbol
......@@ -42,11 +38,18 @@ OPTIONS
-c::
--comms=::
Only consider symbols in these comms. CSV that understands
file://filename entries.
file://filename entries. This option will affect the percentage of
the overhead column. See --percentage for more info.
-d::
--dsos=::
Only consider symbols in these dsos. CSV that understands
file://filename entries. This option will affect the percentage of
the overhead column. See --percentage for more info.
-S::
--symbols=::
Only consider these symbols. CSV that understands
file://filename entries.
file://filename entries. This option will affect the percentage of
the overhead column. See --percentage for more info.
--symbol-filter=::
Only show symbols that match (partially) with this filter.
......@@ -76,6 +79,15 @@ OPTIONS
abort cost. This is the global weight.
- local_weight: Local weight version of the weight above.
- transaction: Transaction abort flags.
- overhead: Overhead percentage of sample
- overhead_sys: Overhead percentage of sample running in system mode
- overhead_us: Overhead percentage of sample running in user mode
- overhead_guest_sys: Overhead percentage of sample running in system mode
on guest machine
- overhead_guest_us: Overhead percentage of sample running in user mode on
guest machine
- sample: Number of sample
- period: Raw number of event count of sample
By default, comm, dso and symbol keys are used.
(i.e. --sort comm,dso,symbol)
......@@ -95,6 +107,16 @@ OPTIONS
And default sort keys are changed to comm, dso_from, symbol_from, dso_to
and symbol_to, see '--branch-stack'.
-F::
--fields=::
Specify output field - multiple keys can be specified in CSV format.
Following fields are available:
overhead, overhead_sys, overhead_us, sample and period.
Also it can contain any sort key(s).
By default, every sort keys not specified in -F will be appended
automatically.
-p::
--parent=<regex>::
A regex filter to identify parent. The parent is a caller of this
......@@ -237,6 +259,15 @@ OPTIONS
Do not show entries which have an overhead under that percent.
(Default: 0).
--percentage::
Determine how to display the overhead percentage of filtered entries.
Filters can be applied by --comms, --dsos and/or --symbols options and
Zoom operations on the TUI (thread, dso, etc).
"relative" means it's relative to filtered entries only so that the
sum of shown entries will be always 100%. "absolute" means it retains
the original value before and after the filter is applied.
--header::
Show header information in the perf.data file. This includes
various information like hostname, OS and perf version, cpu/mem
......
......@@ -113,7 +113,17 @@ Default is to monitor all CPUS.
-s::
--sort::
Sort by key(s): pid, comm, dso, symbol, parent, srcline, weight,
local_weight, abort, in_tx, transaction
local_weight, abort, in_tx, transaction, overhead, sample, period.
Please see description of --sort in the perf-report man page.
--fields=::
Specify output field - multiple keys can be specified in CSV format.
Following fields are available:
overhead, overhead_sys, overhead_us, sample and period.
Also it can contain any sort key(s).
By default, every sort keys not specified in --field will be appended
automatically.
-n::
--show-nr-samples::
......@@ -123,13 +133,16 @@ Default is to monitor all CPUS.
Show a column with the sum of periods.
--dsos::
Only consider symbols in these dsos.
Only consider symbols in these dsos. This option will affect the
percentage of the overhead column. See --percentage for more info.
--comms::
Only consider symbols in these comms.
Only consider symbols in these comms. This option will affect the
percentage of the overhead column. See --percentage for more info.
--symbols::
Only consider these symbols.
Only consider these symbols. This option will affect the
percentage of the overhead column. See --percentage for more info.
-M::
--disassembler-style=:: Set disassembler style for objdump.
......@@ -165,6 +178,15 @@ Default is to monitor all CPUS.
Do not show entries which have an overhead under that percent.
(Default: 0).
--percentage::
Determine how to display the overhead percentage of filtered entries.
Filters can be applied by --comms, --dsos and/or --symbols options and
Zoom operations on the TUI (thread, dso, etc).
"relative" means it's relative to filtered entries only so that the
sum of shown entries will be always 100%. "absolute" means it retains
the original value before and after the filter is applied.
INTERACTIVE PROMPTING KEYS
--------------------------
......@@ -200,4 +222,4 @@ Pressing any unmapped key displays a menu, and prompts for input.
SEE ALSO
--------
linkperf:perf-stat[1], linkperf:perf-list[1]
linkperf:perf-stat[1], linkperf:perf-list[1], linkperf:perf-report[1]
......@@ -7,6 +7,8 @@ tools/lib/symbol/kallsyms.h
tools/include/asm/bug.h
tools/include/linux/compiler.h
tools/include/linux/hash.h
tools/include/linux/export.h
tools/include/linux/types.h
include/linux/const.h
include/linux/perf_event.h
include/linux/rbtree.h
......
......@@ -222,12 +222,12 @@ LIB_H += util/include/linux/const.h
LIB_H += util/include/linux/ctype.h
LIB_H += util/include/linux/kernel.h
LIB_H += util/include/linux/list.h
LIB_H += util/include/linux/export.h
LIB_H += ../include/linux/export.h
LIB_H += util/include/linux/poison.h
LIB_H += util/include/linux/rbtree.h
LIB_H += util/include/linux/rbtree_augmented.h
LIB_H += util/include/linux/string.h
LIB_H += util/include/linux/types.h
LIB_H += ../include/linux/types.h
LIB_H += util/include/linux/linkage.h
LIB_H += util/include/asm/asm-offsets.h
LIB_H += ../include/asm/bug.h
......@@ -252,7 +252,6 @@ LIB_H += util/event.h
LIB_H += util/evsel.h
LIB_H += util/evlist.h
LIB_H += util/exec_cmd.h
LIB_H += util/types.h
LIB_H += util/levenshtein.h
LIB_H += util/machine.h
LIB_H += util/map.h
......@@ -397,7 +396,10 @@ LIB_OBJS += $(OUTPUT)tests/rdpmc.o
LIB_OBJS += $(OUTPUT)tests/evsel-roundtrip-name.o
LIB_OBJS += $(OUTPUT)tests/evsel-tp-sched.o
LIB_OBJS += $(OUTPUT)tests/pmu.o
LIB_OBJS += $(OUTPUT)tests/hists_common.o
LIB_OBJS += $(OUTPUT)tests/hists_link.o
LIB_OBJS += $(OUTPUT)tests/hists_filter.o
LIB_OBJS += $(OUTPUT)tests/hists_output.o
LIB_OBJS += $(OUTPUT)tests/python-use.o
LIB_OBJS += $(OUTPUT)tests/bp_signal.o
LIB_OBJS += $(OUTPUT)tests/bp_signal_overflow.o
......@@ -410,10 +412,12 @@ LIB_OBJS += $(OUTPUT)tests/code-reading.o
LIB_OBJS += $(OUTPUT)tests/sample-parsing.o
LIB_OBJS += $(OUTPUT)tests/parse-no-sample-id-all.o
ifndef NO_DWARF_UNWIND
ifeq ($(ARCH),x86)
ifeq ($(ARCH),$(filter $(ARCH),x86 arm))
LIB_OBJS += $(OUTPUT)tests/dwarf-unwind.o
endif
endif
LIB_OBJS += $(OUTPUT)tests/mmap-thread-lookup.o
LIB_OBJS += $(OUTPUT)tests/thread-mg-share.o
BUILTIN_OBJS += $(OUTPUT)builtin-annotate.o
BUILTIN_OBJS += $(OUTPUT)builtin-bench.o
......
......@@ -5,3 +5,10 @@ endif
ifndef NO_LIBUNWIND
LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libunwind.o
endif
ifndef NO_LIBDW_DWARF_UNWIND
LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libdw.o
endif
ifndef NO_DWARF_UNWIND
LIB_OBJS += $(OUTPUT)arch/$(ARCH)/tests/regs_load.o
LIB_OBJS += $(OUTPUT)arch/$(ARCH)/tests/dwarf-unwind.o
endif
......@@ -2,10 +2,15 @@
#define ARCH_PERF_REGS_H
#include <stdlib.h>
#include "../../util/types.h"
#include <linux/types.h>
#include <asm/perf_regs.h>
void perf_regs_load(u64 *regs);
#define PERF_REGS_MASK ((1ULL << PERF_REG_ARM_MAX) - 1)
#define PERF_REGS_MAX PERF_REG_ARM_MAX
#define PERF_SAMPLE_REGS_ABI PERF_SAMPLE_REGS_ABI_32
#define PERF_REG_IP PERF_REG_ARM_PC
#define PERF_REG_SP PERF_REG_ARM_SP
......
#include <string.h>
#include "perf_regs.h"
#include "thread.h"
#include "map.h"
#include "event.h"
#include "tests/tests.h"
#define STACK_SIZE 8192
static int sample_ustack(struct perf_sample *sample,
struct thread *thread, u64 *regs)
{
struct stack_dump *stack = &sample->user_stack;
struct map *map;
unsigned long sp;
u64 stack_size, *buf;
buf = malloc(STACK_SIZE);
if (!buf) {
pr_debug("failed to allocate sample uregs data\n");
return -1;
}
sp = (unsigned long) regs[PERF_REG_ARM_SP];
map = map_groups__find(thread->mg, MAP__VARIABLE, (u64) sp);
if (!map) {
pr_debug("failed to get stack map\n");
free(buf);
return -1;
}
stack_size = map->end - sp;
stack_size = stack_size > STACK_SIZE ? STACK_SIZE : stack_size;
memcpy(buf, (void *) sp, stack_size);
stack->data = (char *) buf;
stack->size = stack_size;
return 0;
}
int test__arch_unwind_sample(struct perf_sample *sample,
struct thread *thread)
{
struct regs_dump *regs = &sample->user_regs;
u64 *buf;
buf = calloc(1, sizeof(u64) * PERF_REGS_MAX);
if (!buf) {
pr_debug("failed to allocate sample uregs data\n");
return -1;
}
perf_regs_load(buf);
regs->abi = PERF_SAMPLE_REGS_ABI;
regs->regs = buf;
regs->mask = PERF_REGS_MASK;
return sample_ustack(sample, thread, buf);
}
#include <linux/linkage.h>
#define R0 0x00
#define R1 0x08
#define R2 0x10
#define R3 0x18
#define R4 0x20
#define R5 0x28
#define R6 0x30
#define R7 0x38
#define R8 0x40
#define R9 0x48
#define SL 0x50
#define FP 0x58
#define IP 0x60
#define SP 0x68
#define LR 0x70
#define PC 0x78
/*
* Implementation of void perf_regs_load(u64 *regs);
*
* This functions fills in the 'regs' buffer from the actual registers values,
* in the way the perf built-in unwinding test expects them:
* - the PC at the time at the call to this function. Since this function
* is called using a bl instruction, the PC value is taken from LR.
* The built-in unwinding test then unwinds the call stack from the dwarf
* information in unwind__get_entries.
*
* Notes:
* - the 8 bytes stride in the registers offsets comes from the fact
* that the registers are stored in an u64 array (u64 *regs),
* - the regs buffer needs to be zeroed before the call to this function,
* in this case using a calloc in dwarf-unwind.c.
*/
.text
.type perf_regs_load,%function
ENTRY(perf_regs_load)
str r0, [r0, #R0]
str r1, [r0, #R1]
str r2, [r0, #R2]
str r3, [r0, #R3]
str r4, [r0, #R4]
str r5, [r0, #R5]
str r6, [r0, #R6]
str r7, [r0, #R7]
str r8, [r0, #R8]
str r9, [r0, #R9]
str sl, [r0, #SL]
str fp, [r0, #FP]
str ip, [r0, #IP]
str sp, [r0, #SP]
str lr, [r0, #LR]
str lr, [r0, #PC] // store pc as lr in order to skip the call
// to this function
mov pc, lr
ENDPROC(perf_regs_load)
#include <elfutils/libdwfl.h>
#include "../../util/unwind-libdw.h"
#include "../../util/perf_regs.h"
bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg)
{
struct unwind_info *ui = arg;
struct regs_dump *user_regs = &ui->sample->user_regs;
Dwarf_Word dwarf_regs[PERF_REG_ARM_MAX];
#define REG(r) ({ \
Dwarf_Word val = 0; \
perf_reg_value(&val, user_regs, PERF_REG_ARM_##r); \
val; \
})
dwarf_regs[0] = REG(R0);
dwarf_regs[1] = REG(R1);
dwarf_regs[2] = REG(R2);
dwarf_regs[3] = REG(R3);
dwarf_regs[4] = REG(R4);
dwarf_regs[5] = REG(R5);
dwarf_regs[6] = REG(R6);
dwarf_regs[7] = REG(R7);
dwarf_regs[8] = REG(R8);
dwarf_regs[9] = REG(R9);
dwarf_regs[10] = REG(R10);
dwarf_regs[11] = REG(FP);
dwarf_regs[12] = REG(IP);
dwarf_regs[13] = REG(SP);
dwarf_regs[14] = REG(LR);
dwarf_regs[15] = REG(PC);
return dwfl_thread_state_registers(thread, 0, PERF_REG_ARM_MAX,
dwarf_regs);
}
ifndef NO_DWARF
PERF_HAVE_DWARF_REGS := 1
LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o
endif
ifndef NO_LIBUNWIND
LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libunwind.o
endif
#ifndef ARCH_PERF_REGS_H
#define ARCH_PERF_REGS_H
#include <stdlib.h>
#include <linux/types.h>
#include <asm/perf_regs.h>
#define PERF_REGS_MASK ((1ULL << PERF_REG_ARM64_MAX) - 1)
#define PERF_REG_IP PERF_REG_ARM64_PC
#define PERF_REG_SP PERF_REG_ARM64_SP
static inline const char *perf_reg_name(int id)
{
switch (id) {
case PERF_REG_ARM64_X0:
return "x0";
case PERF_REG_ARM64_X1:
return "x1";
case PERF_REG_ARM64_X2:
return "x2";
case PERF_REG_ARM64_X3:
return "x3";
case PERF_REG_ARM64_X4:
return "x4";
case PERF_REG_ARM64_X5:
return "x5";
case PERF_REG_ARM64_X6:
return "x6";
case PERF_REG_ARM64_X7:
return "x7";
case PERF_REG_ARM64_X8:
return "x8";
case PERF_REG_ARM64_X9:
return "x9";
case PERF_REG_ARM64_X10:
return "x10";
case PERF_REG_ARM64_X11:
return "x11";
case PERF_REG_ARM64_X12:
return "x12";
case PERF_REG_ARM64_X13:
return "x13";
case PERF_REG_ARM64_X14:
return "x14";
case PERF_REG_ARM64_X15:
return "x15";
case PERF_REG_ARM64_X16:
return "x16";
case PERF_REG_ARM64_X17:
return "x17";
case PERF_REG_ARM64_X18:
return "x18";
case PERF_REG_ARM64_X19:
return "x19";
case PERF_REG_ARM64_X20:
return "x20";
case PERF_REG_ARM64_X21:
return "x21";
case PERF_REG_ARM64_X22:
return "x22";
case PERF_REG_ARM64_X23:
return "x23";
case PERF_REG_ARM64_X24:
return "x24";
case PERF_REG_ARM64_X25:
return "x25";
case PERF_REG_ARM64_X26:
return "x26";
case PERF_REG_ARM64_X27:
return "x27";
case PERF_REG_ARM64_X28:
return "x28";
case PERF_REG_ARM64_X29:
return "x29";
case PERF_REG_ARM64_SP:
return "sp";
case PERF_REG_ARM64_LR:
return "lr";
case PERF_REG_ARM64_PC:
return "pc";
default:
return NULL;
}
return NULL;
}
#endif /* ARCH_PERF_REGS_H */
/*
* Mapping of DWARF debug register numbers into register names.
*
* Copyright (C) 2010 Will Deacon, ARM Ltd.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <stddef.h>
#include <dwarf-regs.h>
struct pt_regs_dwarfnum {
const char *name;
unsigned int dwarfnum;
};
#define STR(s) #s
#define REG_DWARFNUM_NAME(r, num) {.name = r, .dwarfnum = num}
#define GPR_DWARFNUM_NAME(num) \
{.name = STR(%x##num), .dwarfnum = num}
#define REG_DWARFNUM_END {.name = NULL, .dwarfnum = 0}
/*
* Reference:
* http://infocenter.arm.com/help/topic/com.arm.doc.ihi0057b/IHI0057B_aadwarf64.pdf
*/
static const struct pt_regs_dwarfnum regdwarfnum_table[] = {
GPR_DWARFNUM_NAME(0),
GPR_DWARFNUM_NAME(1),
GPR_DWARFNUM_NAME(2),
GPR_DWARFNUM_NAME(3),
GPR_DWARFNUM_NAME(4),
GPR_DWARFNUM_NAME(5),
GPR_DWARFNUM_NAME(6),
GPR_DWARFNUM_NAME(7),
GPR_DWARFNUM_NAME(8),
GPR_DWARFNUM_NAME(9),
GPR_DWARFNUM_NAME(10),
GPR_DWARFNUM_NAME(11),
GPR_DWARFNUM_NAME(12),
GPR_DWARFNUM_NAME(13),
GPR_DWARFNUM_NAME(14),
GPR_DWARFNUM_NAME(15),
GPR_DWARFNUM_NAME(16),
GPR_DWARFNUM_NAME(17),
GPR_DWARFNUM_NAME(18),
GPR_DWARFNUM_NAME(19),
GPR_DWARFNUM_NAME(20),
GPR_DWARFNUM_NAME(21),
GPR_DWARFNUM_NAME(22),
GPR_DWARFNUM_NAME(23),
GPR_DWARFNUM_NAME(24),
GPR_DWARFNUM_NAME(25),
GPR_DWARFNUM_NAME(26),
GPR_DWARFNUM_NAME(27),
GPR_DWARFNUM_NAME(28),
GPR_DWARFNUM_NAME(29),
REG_DWARFNUM_NAME("%lr", 30),
REG_DWARFNUM_NAME("%sp", 31),
REG_DWARFNUM_END,
};
/**
* get_arch_regstr() - lookup register name from it's DWARF register number
* @n: the DWARF register number
*
* get_arch_regstr() returns the name of the register in struct
* regdwarfnum_table from it's DWARF register number. If the register is not
* found in the table, this returns NULL;
*/
const char *get_arch_regstr(unsigned int n)
{
const struct pt_regs_dwarfnum *roff;
for (roff = regdwarfnum_table; roff->name != NULL; roff++)
if (roff->dwarfnum == n)
return roff->name;
return NULL;
}
#include <errno.h>
#include <libunwind.h>
#include "perf_regs.h"
#include "../../util/unwind.h"
int libunwind__arch_reg_id(int regnum)
{
switch (regnum) {
case UNW_AARCH64_X0:
return PERF_REG_ARM64_X0;
case UNW_AARCH64_X1:
return PERF_REG_ARM64_X1;
case UNW_AARCH64_X2:
return PERF_REG_ARM64_X2;
case UNW_AARCH64_X3:
return PERF_REG_ARM64_X3;
case UNW_AARCH64_X4:
return PERF_REG_ARM64_X4;
case UNW_AARCH64_X5:
return PERF_REG_ARM64_X5;
case UNW_AARCH64_X6:
return PERF_REG_ARM64_X6;
case UNW_AARCH64_X7:
return PERF_REG_ARM64_X7;
case UNW_AARCH64_X8:
return PERF_REG_ARM64_X8;
case UNW_AARCH64_X9:
return PERF_REG_ARM64_X9;
case UNW_AARCH64_X10:
return PERF_REG_ARM64_X10;
case UNW_AARCH64_X11:
return PERF_REG_ARM64_X11;
case UNW_AARCH64_X12:
return PERF_REG_ARM64_X12;
case UNW_AARCH64_X13:
return PERF_REG_ARM64_X13;
case UNW_AARCH64_X14:
return PERF_REG_ARM64_X14;
case UNW_AARCH64_X15:
return PERF_REG_ARM64_X15;
case UNW_AARCH64_X16:
return PERF_REG_ARM64_X16;
case UNW_AARCH64_X17:
return PERF_REG_ARM64_X17;
case UNW_AARCH64_X18:
return PERF_REG_ARM64_X18;
case UNW_AARCH64_X19:
return PERF_REG_ARM64_X19;
case UNW_AARCH64_X20:
return PERF_REG_ARM64_X20;
case UNW_AARCH64_X21:
return PERF_REG_ARM64_X21;
case UNW_AARCH64_X22:
return PERF_REG_ARM64_X22;
case UNW_AARCH64_X23:
return PERF_REG_ARM64_X23;
case UNW_AARCH64_X24:
return PERF_REG_ARM64_X24;
case UNW_AARCH64_X25:
return PERF_REG_ARM64_X25;
case UNW_AARCH64_X26:
return PERF_REG_ARM64_X26;
case UNW_AARCH64_X27:
return PERF_REG_ARM64_X27;
case UNW_AARCH64_X28:
return PERF_REG_ARM64_X28;
case UNW_AARCH64_X29:
return PERF_REG_ARM64_X29;
case UNW_AARCH64_X30:
return PERF_REG_ARM64_LR;
case UNW_AARCH64_SP:
return PERF_REG_ARM64_SP;
case UNW_AARCH64_PC:
return PERF_REG_ARM64_PC;
default:
pr_err("unwind: invalid reg id %d\n", regnum);
return -EINVAL;
}
return -EINVAL;
}
......@@ -2,7 +2,7 @@
#define ARCH_PERF_REGS_H
#include <stdlib.h>
#include "../../util/types.h"
#include <linux/types.h>
#include <asm/perf_regs.h>
void perf_regs_load(u64 *regs);
......
......@@ -23,7 +23,7 @@ static int sample_ustack(struct perf_sample *sample,
sp = (unsigned long) regs[PERF_REG_X86_SP];
map = map_groups__find(&thread->mg, MAP__VARIABLE, (u64) sp);
map = map_groups__find(thread->mg, MAP__VARIABLE, (u64) sp);
if (!map) {
pr_debug("failed to get stack map\n");
free(buf);
......
......@@ -4,7 +4,7 @@
#include <linux/perf_event.h>
#include "../../perf.h"
#include "../../util/types.h"
#include <linux/types.h>
#include "../../util/debug.h"
#include "tsc.h"
......
#ifndef TOOLS_PERF_ARCH_X86_UTIL_TSC_H__
#define TOOLS_PERF_ARCH_X86_UTIL_TSC_H__
#include "../../util/types.h"
#include <linux/types.h>
struct perf_tsc_conversion {
u16 time_shift;
......
......@@ -46,7 +46,7 @@ struct perf_annotate {
};
static int perf_evsel__add_sample(struct perf_evsel *evsel,
struct perf_sample *sample,
struct perf_sample *sample __maybe_unused,
struct addr_location *al,
struct perf_annotate *ann)
{
......@@ -70,7 +70,6 @@ static int perf_evsel__add_sample(struct perf_evsel *evsel,
return -ENOMEM;
ret = hist_entry__inc_addr_samples(he, evsel->idx, al->addr);
evsel->hists.stats.total_period += sample->period;
hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
return ret;
}
......
......@@ -60,7 +60,6 @@ static int data__files_cnt;
#define data__for_each_file(i, d) data__for_each_file_start(i, d, 0)
#define data__for_each_file_new(i, d) data__for_each_file_start(i, d, 1)
static char diff__default_sort_order[] = "dso,symbol";
static bool force;
static bool show_period;
static bool show_formula;
......@@ -220,7 +219,8 @@ static int setup_compute(const struct option *opt, const char *str,
static double period_percent(struct hist_entry *he, u64 period)
{
u64 total = he->hists->stats.total_period;
u64 total = hists__total_period(he->hists);
return (period * 100.0) / total;
}
......@@ -259,11 +259,18 @@ static s64 compute_wdiff(struct hist_entry *he, struct hist_entry *pair)
static int formula_delta(struct hist_entry *he, struct hist_entry *pair,
char *buf, size_t size)
{
u64 he_total = he->hists->stats.total_period;
u64 pair_total = pair->hists->stats.total_period;
if (symbol_conf.filter_relative) {
he_total = he->hists->stats.total_non_filtered_period;
pair_total = pair->hists->stats.total_non_filtered_period;
}
return scnprintf(buf, size,
"(%" PRIu64 " * 100 / %" PRIu64 ") - "
"(%" PRIu64 " * 100 / %" PRIu64 ")",
pair->stat.period, pair->hists->stats.total_period,
he->stat.period, he->hists->stats.total_period);
pair->stat.period, pair_total,
he->stat.period, he_total);
}
static int formula_ratio(struct hist_entry *he, struct hist_entry *pair,
......@@ -327,16 +334,22 @@ static int diff__process_sample_event(struct perf_tool *tool __maybe_unused,
return -1;
}
if (al.filtered)
return 0;
if (hists__add_entry(&evsel->hists, &al, sample->period,
sample->weight, sample->transaction)) {
pr_warning("problem incrementing symbol period, skipping event\n");
return -1;
}
/*
* The total_period is updated here before going to the output
* tree since normally only the baseline hists will call
* hists__output_resort() and precompute needs the total
* period in order to sort entries by percentage delta.
*/
evsel->hists.stats.total_period += sample->period;
if (!al.filtered)
evsel->hists.stats.total_non_filtered_period += sample->period;
return 0;
}
......@@ -564,8 +577,7 @@ static void hists__compute_resort(struct hists *hists)
hists->entries = RB_ROOT;
next = rb_first(root);
hists->nr_entries = 0;
hists->stats.total_period = 0;
hists__reset_stats(hists);
hists__reset_col_len(hists);
while (next != NULL) {
......@@ -575,7 +587,10 @@ static void hists__compute_resort(struct hists *hists)
next = rb_next(&he->rb_node_in);
insert_hist_entry_by_compute(&hists->entries, he, compute);
hists__inc_nr_entries(hists, he);
hists__inc_stats(hists, he);
if (!he->filtered)
hists__calc_col_len(hists, he);
}
}
......@@ -725,20 +740,24 @@ static const struct option options[] = {
OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
"only consider these symbols"),
OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
"sort by key(s): pid, comm, dso, symbol, parent"),
"sort by key(s): pid, comm, dso, symbol, parent, cpu, srcline, ..."
" Please refer the man page for the complete list."),
OPT_STRING('t', "field-separator", &symbol_conf.field_sep, "separator",
"separator for columns, no spaces will be added between "
"columns '.' is reserved."),
OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory",
"Look for files with symbols relative to this directory"),
OPT_UINTEGER('o', "order", &sort_compute, "Specify compute sorting."),
OPT_CALLBACK(0, "percentage", NULL, "relative|absolute",
"How to display percentage of filtered entries", parse_filter_percentage),
OPT_END()
};
static double baseline_percent(struct hist_entry *he)
{
struct hists *hists = he->hists;
return 100.0 * he->stat.period / hists->stats.total_period;
u64 total = hists__total_period(he->hists);
return 100.0 * he->stat.period / total;
}
static int hpp__color_baseline(struct perf_hpp_fmt *fmt,
......@@ -1120,7 +1139,8 @@ static int data_init(int argc, const char **argv)
int cmd_diff(int argc, const char **argv, const char *prefix __maybe_unused)
{
sort_order = diff__default_sort_order;
perf_config(perf_default_config, NULL);
argc = parse_options(argc, argv, options, diff_usage, 0);
if (symbol__init() < 0)
......@@ -1131,6 +1151,8 @@ int cmd_diff(int argc, const char **argv, const char *prefix __maybe_unused)
ui_init();
sort__mode = SORT_MODE__DIFF;
if (setup_sorting() < 0)
usage_with_options(diff_usage, options);
......
......@@ -209,7 +209,7 @@ static int perf_event__inject_buildid(struct perf_tool *tool,
cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
thread = machine__findnew_thread(machine, sample->pid, sample->pid);
thread = machine__findnew_thread(machine, sample->pid, sample->tid);
if (thread == NULL) {
pr_err("problem processing %d event, skipping it.\n",
event->header.type);
......
......@@ -14,6 +14,7 @@
#include "util/parse-options.h"
#include "util/trace-event.h"
#include "util/data.h"
#include "util/cpumap.h"
#include "util/debug.h"
......@@ -31,9 +32,6 @@ static int caller_lines = -1;
static bool raw_ip;
static int *cpunode_map;
static int max_cpu_num;
struct alloc_stat {
u64 call_site;
u64 ptr;
......@@ -55,76 +53,6 @@ static struct rb_root root_caller_sorted;
static unsigned long total_requested, total_allocated;
static unsigned long nr_allocs, nr_cross_allocs;
#define PATH_SYS_NODE "/sys/devices/system/node"
static int init_cpunode_map(void)
{
FILE *fp;
int i, err = -1;
fp = fopen("/sys/devices/system/cpu/kernel_max", "r");
if (!fp) {
max_cpu_num = 4096;
return 0;
}
if (fscanf(fp, "%d", &max_cpu_num) < 1) {
pr_err("Failed to read 'kernel_max' from sysfs");
goto out_close;
}
max_cpu_num++;
cpunode_map = calloc(max_cpu_num, sizeof(int));
if (!cpunode_map) {
pr_err("%s: calloc failed\n", __func__);
goto out_close;
}
for (i = 0; i < max_cpu_num; i++)
cpunode_map[i] = -1;
err = 0;
out_close:
fclose(fp);
return err;
}
static int setup_cpunode_map(void)
{
struct dirent *dent1, *dent2;
DIR *dir1, *dir2;
unsigned int cpu, mem;
char buf[PATH_MAX];
if (init_cpunode_map())
return -1;
dir1 = opendir(PATH_SYS_NODE);
if (!dir1)
return 0;
while ((dent1 = readdir(dir1)) != NULL) {
if (dent1->d_type != DT_DIR ||
sscanf(dent1->d_name, "node%u", &mem) < 1)
continue;
snprintf(buf, PATH_MAX, "%s/%s", PATH_SYS_NODE, dent1->d_name);
dir2 = opendir(buf);
if (!dir2)
continue;
while ((dent2 = readdir(dir2)) != NULL) {
if (dent2->d_type != DT_LNK ||
sscanf(dent2->d_name, "cpu%u", &cpu) < 1)
continue;
cpunode_map[cpu] = mem;
}
closedir(dir2);
}
closedir(dir1);
return 0;
}
static int insert_alloc_stat(unsigned long call_site, unsigned long ptr,
int bytes_req, int bytes_alloc, int cpu)
{
......@@ -235,7 +163,7 @@ static int perf_evsel__process_alloc_node_event(struct perf_evsel *evsel,
int ret = perf_evsel__process_alloc_event(evsel, sample);
if (!ret) {
int node1 = cpunode_map[sample->cpu],
int node1 = cpu__get_node(sample->cpu),
node2 = perf_evsel__intval(evsel, sample, "node");
if (node1 != node2)
......@@ -307,7 +235,7 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
struct machine *machine)
{
struct thread *thread = machine__findnew_thread(machine, sample->pid,
sample->pid);
sample->tid);
if (thread == NULL) {
pr_debug("problem processing %d event, skipping it.\n",
......@@ -756,11 +684,13 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused)
OPT_BOOLEAN(0, "raw-ip", &raw_ip, "show raw ip instead of symbol"),
OPT_END()
};
const char * const kmem_usage[] = {
"perf kmem [<options>] {record|stat}",
const char *const kmem_subcommands[] = { "record", "stat", NULL };
const char *kmem_usage[] = {
NULL,
NULL
};
argc = parse_options(argc, argv, kmem_options, kmem_usage, 0);
argc = parse_options_subcommand(argc, argv, kmem_options,
kmem_subcommands, kmem_usage, 0);
if (!argc)
usage_with_options(kmem_usage, kmem_options);
......@@ -770,7 +700,7 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused)
if (!strncmp(argv[0], "rec", 3)) {
return __cmd_record(argc, argv);
} else if (!strcmp(argv[0], "stat")) {
if (setup_cpunode_map())
if (cpu__setup_cpunode_map())
return -1;
if (list_empty(&caller_sort))
......
......@@ -961,8 +961,10 @@ int cmd_lock(int argc, const char **argv, const char *prefix __maybe_unused)
"perf lock info [<options>]",
NULL
};
const char * const lock_usage[] = {
"perf lock [<options>] {record|report|script|info}",
const char *const lock_subcommands[] = { "record", "report", "script",
"info", NULL };
const char *lock_usage[] = {
NULL,
NULL
};
const char * const report_usage[] = {
......@@ -976,8 +978,8 @@ int cmd_lock(int argc, const char **argv, const char *prefix __maybe_unused)
for (i = 0; i < LOCKHASH_SIZE; i++)
INIT_LIST_HEAD(lockhash_table + i);
argc = parse_options(argc, argv, lock_options, lock_usage,
PARSE_OPT_STOP_AT_NON_OPTION);
argc = parse_options_subcommand(argc, argv, lock_options, lock_subcommands,
lock_usage, PARSE_OPT_STOP_AT_NON_OPTION);
if (!argc)
usage_with_options(lock_usage, lock_options);
......
......@@ -21,11 +21,6 @@ struct perf_mem {
DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
};
static const char * const mem_usage[] = {
"perf mem [<options>] {record <command> |report}",
NULL
};
static int __cmd_record(int argc, const char **argv)
{
int rec_argc, i = 0, j;
......@@ -220,9 +215,15 @@ int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused)
" between columns '.' is reserved."),
OPT_END()
};
const char *const mem_subcommands[] = { "record", "report", NULL };
const char *mem_usage[] = {
NULL,
NULL
};
argc = parse_options(argc, argv, mem_options, mem_usage,
PARSE_OPT_STOP_AT_NON_OPTION);
argc = parse_options_subcommand(argc, argv, mem_options, mem_subcommands,
mem_usage, PARSE_OPT_STOP_AT_NON_OPTION);
if (!argc || !(strncmp(argv[0], "rec", 3) || mem_operation))
usage_with_options(mem_usage, mem_options);
......
......@@ -30,37 +30,6 @@
#include <sched.h>
#include <sys/mman.h>
#ifndef HAVE_ON_EXIT_SUPPORT
#ifndef ATEXIT_MAX
#define ATEXIT_MAX 32
#endif
static int __on_exit_count = 0;
typedef void (*on_exit_func_t) (int, void *);
static on_exit_func_t __on_exit_funcs[ATEXIT_MAX];
static void *__on_exit_args[ATEXIT_MAX];
static int __exitcode = 0;
static void __handle_on_exit_funcs(void);
static int on_exit(on_exit_func_t function, void *arg);
#define exit(x) (exit)(__exitcode = (x))
static int on_exit(on_exit_func_t function, void *arg)
{
if (__on_exit_count == ATEXIT_MAX)
return -ENOMEM;
else if (__on_exit_count == 0)
atexit(__handle_on_exit_funcs);
__on_exit_funcs[__on_exit_count] = function;
__on_exit_args[__on_exit_count++] = arg;
return 0;
}
static void __handle_on_exit_funcs(void)
{
int i;
for (i = 0; i < __on_exit_count; i++)
__on_exit_funcs[i] (__exitcode, __on_exit_args[i]);
}
#endif
struct record {
struct perf_tool tool;
......@@ -147,29 +116,19 @@ static void sig_handler(int sig)
{
if (sig == SIGCHLD)
child_finished = 1;
else
signr = sig;
done = 1;
signr = sig;
}
static void record__sig_exit(int exit_status __maybe_unused, void *arg)
static void record__sig_exit(void)
{
struct record *rec = arg;
int status;
if (rec->evlist->workload.pid > 0) {
if (!child_finished)
kill(rec->evlist->workload.pid, SIGTERM);
wait(&status);
if (WIFSIGNALED(status))
psignal(WTERMSIG(status), rec->progname);
}
if (signr == -1 || signr == SIGUSR1)
if (signr == -1)
return;
signal(signr, SIG_DFL);
raise(signr);
}
static int record__open(struct record *rec)
......@@ -243,27 +202,6 @@ static int process_buildids(struct record *rec)
size, &build_id__mark_dso_hit_ops);
}
static void record__exit(int status, void *arg)
{
struct record *rec = arg;
struct perf_data_file *file = &rec->file;
if (status != 0)
return;
if (!file->is_pipe) {
rec->session->header.data_size += rec->bytes_written;
if (!rec->no_buildid)
process_buildids(rec);
perf_session__write_header(rec->session, rec->evlist,
file->fd, true);
perf_session__delete(rec->session);
perf_evlist__delete(rec->evlist);
symbol__exit();
}
}
static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
{
int err;
......@@ -344,18 +282,19 @@ static volatile int workload_exec_errno;
* if the fork fails, since we asked by setting its
* want_signal to true.
*/
static void workload_exec_failed_signal(int signo, siginfo_t *info,
static void workload_exec_failed_signal(int signo __maybe_unused,
siginfo_t *info,
void *ucontext __maybe_unused)
{
workload_exec_errno = info->si_value.sival_int;
done = 1;
signr = signo;
child_finished = 1;
}
static int __cmd_record(struct record *rec, int argc, const char **argv)
{
int err;
int status = 0;
unsigned long waking = 0;
const bool forks = argc > 0;
struct machine *machine;
......@@ -367,7 +306,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
rec->progname = argv[0];
on_exit(record__sig_exit, rec);
atexit(record__sig_exit);
signal(SIGCHLD, sig_handler);
signal(SIGINT, sig_handler);
signal(SIGTERM, sig_handler);
......@@ -388,32 +327,28 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
workload_exec_failed_signal);
if (err < 0) {
pr_err("Couldn't run the workload!\n");
status = err;
goto out_delete_session;
}
}
if (record__open(rec) != 0) {
err = -1;
goto out_delete_session;
goto out_child;
}
if (!rec->evlist->nr_groups)
perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
/*
* perf_session__delete(session) will be called at record__exit()
*/
on_exit(record__exit, rec);
if (file->is_pipe) {
err = perf_header__write_pipe(file->fd);
if (err < 0)
goto out_delete_session;
goto out_child;
} else {
err = perf_session__write_header(session, rec->evlist,
file->fd, false);
if (err < 0)
goto out_delete_session;
goto out_child;
}
if (!rec->no_buildid
......@@ -421,7 +356,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
pr_err("Couldn't generate buildids. "
"Use --no-buildid to profile anyway.\n");
err = -1;
goto out_delete_session;
goto out_child;
}
machine = &session->machines.host;
......@@ -431,7 +366,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
process_synthesized_event);
if (err < 0) {
pr_err("Couldn't synthesize attrs.\n");
goto out_delete_session;
goto out_child;
}
if (have_tracepoints(&rec->evlist->entries)) {
......@@ -447,7 +382,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
process_synthesized_event);
if (err <= 0) {
pr_err("Couldn't record tracing data.\n");
goto out_delete_session;
goto out_child;
}
rec->bytes_written += err;
}
......@@ -475,7 +410,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
process_synthesized_event, opts->sample_address);
if (err != 0)
goto out_delete_session;
goto out_child;
if (rec->realtime_prio) {
struct sched_param param;
......@@ -484,7 +419,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
if (sched_setscheduler(0, SCHED_FIFO, &param)) {
pr_err("Could not set realtime priority.\n");
err = -1;
goto out_delete_session;
goto out_child;
}
}
......@@ -512,13 +447,15 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
if (record__mmap_read_all(rec) < 0) {
err = -1;
goto out_delete_session;
goto out_child;
}
if (hits == rec->samples) {
if (done)
break;
err = poll(rec->evlist->pollfd, rec->evlist->nr_fds, -1);
if (err < 0 && errno == EINTR)
err = 0;
waking++;
}
......@@ -538,12 +475,10 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
const char *emsg = strerror_r(workload_exec_errno, msg, sizeof(msg));
pr_err("Workload failed: %s\n", emsg);
err = -1;
goto out_delete_session;
goto out_child;
}
if (quiet || signr == SIGUSR1)
return 0;
if (!quiet) {
fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
/*
......@@ -554,12 +489,38 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
(double)rec->bytes_written / 1024.0 / 1024.0,
file->path,
rec->bytes_written / 24);
}
return 0;
out_child:
if (forks) {
int exit_status;
if (!child_finished)
kill(rec->evlist->workload.pid, SIGTERM);
wait(&exit_status);
if (err < 0)
status = err;
else if (WIFEXITED(exit_status))
status = WEXITSTATUS(exit_status);
else if (WIFSIGNALED(exit_status))
signr = WTERMSIG(exit_status);
} else
status = err;
if (!err && !file->is_pipe) {
rec->session->header.data_size += rec->bytes_written;
if (!rec->no_buildid)
process_buildids(rec);
perf_session__write_header(rec->session, rec->evlist,
file->fd, true);
}
out_delete_session:
perf_session__delete(session);
return err;
return status;
}
#define BRANCH_OPT(n, m) \
......@@ -988,6 +949,7 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
err = __cmd_record(&record, argc, argv);
out_symbol_exit:
perf_evlist__delete(rec->evlist);
symbol__exit();
return err;
}
......@@ -57,6 +57,7 @@ struct report {
const char *cpu_list;
const char *symbol_filter_str;
float min_percent;
u64 nr_entries;
DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
};
......@@ -75,6 +76,27 @@ static int report__config(const char *var, const char *value, void *cb)
return perf_default_config(var, value, cb);
}
static void report__inc_stats(struct report *rep, struct hist_entry *he)
{
/*
* The @he is either of a newly created one or an existing one
* merging current sample. We only want to count a new one so
* checking ->nr_events being 1.
*/
if (he->stat.nr_events == 1)
rep->nr_entries++;
/*
* Only counts number of samples at this stage as it's more
* natural to do it here and non-sample events are also
* counted in perf_session_deliver_event(). The dump_trace
* requires this info is ready before going to the output tree.
*/
hists__inc_nr_events(he->hists, PERF_RECORD_SAMPLE);
if (!he->filtered)
he->hists->stats.nr_non_filtered_samples++;
}
static int report__add_mem_hist_entry(struct report *rep, struct addr_location *al,
struct perf_sample *sample, struct perf_evsel *evsel)
{
......@@ -121,8 +143,8 @@ static int report__add_mem_hist_entry(struct report *rep, struct addr_location *
goto out;
}
evsel->hists.stats.total_period += cost;
hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
report__inc_stats(rep, he);
err = hist_entry__append_callchain(he, sample);
out:
return err;
......@@ -173,9 +195,7 @@ static int report__add_branch_hist_entry(struct report *rep, struct addr_locatio
if (err)
goto out;
}
evsel->hists.stats.total_period += 1;
hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
report__inc_stats(rep, he);
} else
goto out;
}
......@@ -208,8 +228,8 @@ static int report__add_hist_entry(struct report *rep, struct perf_evsel *evsel,
if (ui__has_annotation())
err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr);
evsel->hists.stats.total_period += sample->period;
hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
report__inc_stats(rep, he);
out:
return err;
}
......@@ -337,6 +357,11 @@ static size_t hists__fprintf_nr_sample_events(struct hists *hists, struct report
char buf[512];
size_t size = sizeof(buf);
if (symbol_conf.filter_relative) {
nr_samples = hists->stats.nr_non_filtered_samples;
nr_events = hists->stats.total_non_filtered_period;
}
if (perf_evsel__is_group_event(evsel)) {
struct perf_evsel *pos;
......@@ -344,10 +369,15 @@ static size_t hists__fprintf_nr_sample_events(struct hists *hists, struct report
evname = buf;
for_each_group_member(pos, evsel) {
if (symbol_conf.filter_relative) {
nr_samples += pos->hists.stats.nr_non_filtered_samples;
nr_events += pos->hists.stats.total_non_filtered_period;
} else {
nr_samples += pos->hists.stats.nr_events[PERF_RECORD_SAMPLE];
nr_events += pos->hists.stats.total_period;
}
}
}
nr_samples = convert_unit(nr_samples, &unit);
ret = fprintf(fp, "# Samples: %lu%c", nr_samples, unit);
......@@ -470,24 +500,12 @@ static int report__browse_hists(struct report *rep)
return ret;
}
static u64 report__collapse_hists(struct report *rep)
static void report__collapse_hists(struct report *rep)
{
struct ui_progress prog;
struct perf_evsel *pos;
u64 nr_samples = 0;
/*
* Count number of histogram entries to use when showing progress,
* reusing nr_samples variable.
*/
evlist__for_each(rep->session->evlist, pos)
nr_samples += pos->hists.nr_entries;
ui_progress__init(&prog, nr_samples, "Merging related events...");
/*
* Count total number of samples, will be used to check if this
* session had any.
*/
nr_samples = 0;
ui_progress__init(&prog, rep->nr_entries, "Merging related events...");
evlist__for_each(rep->session->evlist, pos) {
struct hists *hists = &pos->hists;
......@@ -496,7 +514,6 @@ static u64 report__collapse_hists(struct report *rep)
hists->symbol_filter_str = rep->symbol_filter_str;
hists__collapse_resort(hists, &prog);
nr_samples += hists->stats.nr_events[PERF_RECORD_SAMPLE];
/* Non-group events are considered as leader */
if (symbol_conf.event_group &&
......@@ -509,14 +526,11 @@ static u64 report__collapse_hists(struct report *rep)
}
ui_progress__finish();
return nr_samples;
}
static int __cmd_report(struct report *rep)
{
int ret;
u64 nr_samples;
struct perf_session *session = rep->session;
struct perf_evsel *pos;
struct perf_data_file *file = session->file;
......@@ -556,12 +570,12 @@ static int __cmd_report(struct report *rep)
}
}
nr_samples = report__collapse_hists(rep);
report__collapse_hists(rep);
if (session_done())
return 0;
if (nr_samples == 0) {
if (rep->nr_entries == 0) {
ui__error("The %s file has no samples!\n", file->path);
return 0;
}
......@@ -573,11 +587,9 @@ static int __cmd_report(struct report *rep)
}
static int
parse_callchain_opt(const struct option *opt, const char *arg, int unset)
report_parse_callchain_opt(const struct option *opt, const char *arg, int unset)
{
struct report *rep = (struct report *)opt->value;
char *tok, *tok2;
char *endptr;
/*
* --no-call-graph
......@@ -587,80 +599,7 @@ parse_callchain_opt(const struct option *opt, const char *arg, int unset)
return 0;
}
symbol_conf.use_callchain = true;
if (!arg)
return 0;
tok = strtok((char *)arg, ",");
if (!tok)
return -1;
/* get the output mode */
if (!strncmp(tok, "graph", strlen(arg)))
callchain_param.mode = CHAIN_GRAPH_ABS;
else if (!strncmp(tok, "flat", strlen(arg)))
callchain_param.mode = CHAIN_FLAT;
else if (!strncmp(tok, "fractal", strlen(arg)))
callchain_param.mode = CHAIN_GRAPH_REL;
else if (!strncmp(tok, "none", strlen(arg))) {
callchain_param.mode = CHAIN_NONE;
symbol_conf.use_callchain = false;
return 0;
}
else
return -1;
/* get the min percentage */
tok = strtok(NULL, ",");
if (!tok)
goto setup;
callchain_param.min_percent = strtod(tok, &endptr);
if (tok == endptr)
return -1;
/* get the print limit */
tok2 = strtok(NULL, ",");
if (!tok2)
goto setup;
if (tok2[0] != 'c') {
callchain_param.print_limit = strtoul(tok2, &endptr, 0);
tok2 = strtok(NULL, ",");
if (!tok2)
goto setup;
}
/* get the call chain order */
if (!strncmp(tok2, "caller", strlen("caller")))
callchain_param.order = ORDER_CALLER;
else if (!strncmp(tok2, "callee", strlen("callee")))
callchain_param.order = ORDER_CALLEE;
else
return -1;
/* Get the sort key */
tok2 = strtok(NULL, ",");
if (!tok2)
goto setup;
if (!strncmp(tok2, "function", strlen("function")))
callchain_param.key = CCKEY_FUNCTION;
else if (!strncmp(tok2, "address", strlen("address")))
callchain_param.key = CCKEY_ADDRESS;
else
return -1;
setup:
if (callchain_register_param(&callchain_param) < 0) {
pr_err("Can't register callchain params\n");
return -1;
}
return 0;
return parse_callchain_report_opt(arg);
}
int
......@@ -760,10 +699,10 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
OPT_BOOLEAN(0, "header-only", &report.header_only,
"Show only data header."),
OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
"sort by key(s): pid, comm, dso, symbol, parent, cpu, srcline,"
" dso_to, dso_from, symbol_to, symbol_from, mispredict,"
" weight, local_weight, mem, symbol_daddr, dso_daddr, tlb, "
"snoop, locked, abort, in_tx, transaction"),
"sort by key(s): pid, comm, dso, symbol, parent, cpu, srcline, ..."
" Please refer the man page for the complete list."),
OPT_STRING('F', "fields", &field_order, "key[,keys...]",
"output field(s): overhead, period, sample plus all of sort keys"),
OPT_BOOLEAN(0, "showcpuutilization", &symbol_conf.show_cpu_utilization,
"Show sample percentage for different cpu modes"),
OPT_STRING('p', "parent", &parent_pattern, "regex",
......@@ -772,7 +711,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
"Only display entries with parent-match"),
OPT_CALLBACK_DEFAULT('g', "call-graph", &report, "output_type,min_percent[,print_limit],call_order",
"Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit, callchain order, key (function or address). "
"Default: fractal,0.5,callee,function", &parse_callchain_opt, callchain_default_opt),
"Default: fractal,0.5,callee,function", &report_parse_callchain_opt, callchain_default_opt),
OPT_INTEGER(0, "max-stack", &report.max_stack,
"Set the maximum stack depth when parsing the callchain, "
"anything beyond the specified depth will be ignored. "
......@@ -823,6 +762,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
OPT_BOOLEAN(0, "mem-mode", &report.mem_mode, "mem access profile"),
OPT_CALLBACK(0, "percent-limit", &report, "percent",
"Don't show entries under that percent", parse_percent_limit),
OPT_CALLBACK(0, "percentage", NULL, "relative|absolute",
"how to display percentage of filtered entries", parse_filter_percentage),
OPT_END()
};
struct perf_data_file file = {
......@@ -866,39 +807,20 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
if (branch_mode == -1 && has_br_stack)
sort__mode = SORT_MODE__BRANCH;
/* sort__mode could be NORMAL if --no-branch-stack */
if (sort__mode == SORT_MODE__BRANCH) {
/*
* if no sort_order is provided, then specify
* branch-mode specific order
*/
if (sort_order == default_sort_order)
sort_order = "comm,dso_from,symbol_from,"
"dso_to,symbol_to";
}
if (report.mem_mode) {
if (sort__mode == SORT_MODE__BRANCH) {
pr_err("branch and mem mode incompatible\n");
goto error;
}
sort__mode = SORT_MODE__MEMORY;
/*
* if no sort_order is provided, then specify
* branch-mode specific order
*/
if (sort_order == default_sort_order)
sort_order = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked";
}
if (setup_sorting() < 0) {
if (sort_order)
parse_options_usage(report_usage, options, "s", 1);
goto error;
}
if (parent_pattern != default_parent_pattern) {
if (sort_dimension__add("parent") < 0)
if (field_order)
parse_options_usage(sort_order ? NULL : report_usage,
options, "F", 1);
goto error;
}
......@@ -908,10 +830,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
if (strcmp(input_name, "-") != 0)
setup_browser(true);
else {
else
use_browser = 0;
perf_hpp__init();
}
if (report.header || report.header_only) {
perf_session__fprintf_info(session, stdout,
......
......@@ -66,7 +66,7 @@ struct sched_atom {
struct task_desc *wakee;
};
#define TASK_STATE_TO_CHAR_STR "RSDTtZX"
#define TASK_STATE_TO_CHAR_STR "RSDTtZXxKWP"
enum thread_state {
THREAD_SLEEPING = 0,
......@@ -149,7 +149,6 @@ struct perf_sched {
unsigned long nr_runs;
unsigned long nr_timestamps;
unsigned long nr_unordered_timestamps;
unsigned long nr_state_machine_bugs;
unsigned long nr_context_switch_bugs;
unsigned long nr_events;
unsigned long nr_lost_chunks;
......@@ -1007,17 +1006,12 @@ static int latency_wakeup_event(struct perf_sched *sched,
struct perf_sample *sample,
struct machine *machine)
{
const u32 pid = perf_evsel__intval(evsel, sample, "pid"),
success = perf_evsel__intval(evsel, sample, "success");
const u32 pid = perf_evsel__intval(evsel, sample, "pid");
struct work_atoms *atoms;
struct work_atom *atom;
struct thread *wakee;
u64 timestamp = sample->time;
/* Note for later, it may be interesting to observe the failing cases */
if (!success)
return 0;
wakee = machine__findnew_thread(machine, 0, pid);
atoms = thread_atoms_search(&sched->atom_root, wakee, &sched->cmp_pid);
if (!atoms) {
......@@ -1037,12 +1031,18 @@ static int latency_wakeup_event(struct perf_sched *sched,
atom = list_entry(atoms->work_list.prev, struct work_atom, list);
/*
* As we do not guarantee the wakeup event happens when
* task is out of run queue, also may happen when task is
* on run queue and wakeup only change ->state to TASK_RUNNING,
* then we should not set the ->wake_up_time when wake up a
* task which is on run queue.
*
* You WILL be missing events if you've recorded only
* one CPU, or are only looking at only one, so don't
* make useless noise.
* skip in this case.
*/
if (sched->profile_cpu == -1 && atom->state != THREAD_SLEEPING)
sched->nr_state_machine_bugs++;
return 0;
sched->nr_timestamps++;
if (atom->sched_out_time > timestamp) {
......@@ -1266,9 +1266,8 @@ static int process_sched_wakeup_event(struct perf_tool *tool,
static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
struct perf_sample *sample, struct machine *machine)
{
const u32 prev_pid = perf_evsel__intval(evsel, sample, "prev_pid"),
next_pid = perf_evsel__intval(evsel, sample, "next_pid");
struct thread *sched_out __maybe_unused, *sched_in;
const u32 next_pid = perf_evsel__intval(evsel, sample, "next_pid");
struct thread *sched_in;
int new_shortname;
u64 timestamp0, timestamp = sample->time;
s64 delta;
......@@ -1291,7 +1290,6 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
return -1;
}
sched_out = machine__findnew_thread(machine, 0, prev_pid);
sched_in = machine__findnew_thread(machine, 0, next_pid);
sched->curr_thread[this_cpu] = sched_in;
......@@ -1300,17 +1298,25 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
new_shortname = 0;
if (!sched_in->shortname[0]) {
if (!strcmp(thread__comm_str(sched_in), "swapper")) {
/*
* Don't allocate a letter-number for swapper:0
* as a shortname. Instead, we use '.' for it.
*/
sched_in->shortname[0] = '.';
sched_in->shortname[1] = ' ';
} else {
sched_in->shortname[0] = sched->next_shortname1;
sched_in->shortname[1] = sched->next_shortname2;
if (sched->next_shortname1 < 'Z') {
sched->next_shortname1++;
} else {
sched->next_shortname1='A';
if (sched->next_shortname2 < '9') {
sched->next_shortname1 = 'A';
if (sched->next_shortname2 < '9')
sched->next_shortname2++;
} else {
sched->next_shortname2='0';
else
sched->next_shortname2 = '0';
}
}
new_shortname = 1;
......@@ -1322,12 +1328,9 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
else
printf("*");
if (sched->curr_thread[cpu]) {
if (sched->curr_thread[cpu]->tid)
if (sched->curr_thread[cpu])
printf("%2s ", sched->curr_thread[cpu]->shortname);
else
printf(". ");
} else
printf(" ");
}
......@@ -1496,14 +1499,6 @@ static void print_bad_events(struct perf_sched *sched)
(double)sched->nr_lost_events/(double)sched->nr_events * 100.0,
sched->nr_lost_events, sched->nr_events, sched->nr_lost_chunks);
}
if (sched->nr_state_machine_bugs && sched->nr_timestamps) {
printf(" INFO: %.3f%% state machine bugs (%ld out of %ld)",
(double)sched->nr_state_machine_bugs/(double)sched->nr_timestamps*100.0,
sched->nr_state_machine_bugs, sched->nr_timestamps);
if (sched->nr_lost_events)
printf(" (due to lost events?)");
printf("\n");
}
if (sched->nr_context_switch_bugs && sched->nr_timestamps) {
printf(" INFO: %.3f%% context switch bugs (%ld out of %ld)",
(double)sched->nr_context_switch_bugs/(double)sched->nr_timestamps*100.0,
......@@ -1635,6 +1630,7 @@ static int __cmd_record(int argc, const char **argv)
"-e", "sched:sched_stat_runtime",
"-e", "sched:sched_process_fork",
"-e", "sched:sched_wakeup",
"-e", "sched:sched_wakeup_new",
"-e", "sched:sched_migrate_task",
};
......@@ -1713,8 +1709,10 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
"perf sched replay [<options>]",
NULL
};
const char * const sched_usage[] = {
"perf sched [<options>] {record|latency|map|replay|script}",
const char *const sched_subcommands[] = { "record", "latency", "map",
"replay", "script", NULL };
const char *sched_usage[] = {
NULL,
NULL
};
struct trace_sched_handler lat_ops = {
......@@ -1736,8 +1734,8 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
for (i = 0; i < ARRAY_SIZE(sched.curr_pid); i++)
sched.curr_pid[i] = -1;
argc = parse_options(argc, argv, sched_options, sched_usage,
PARSE_OPT_STOP_AT_NON_OPTION);
argc = parse_options_subcommand(argc, argv, sched_options, sched_subcommands,
sched_usage, PARSE_OPT_STOP_AT_NON_OPTION);
if (!argc)
usage_with_options(sched_usage, sched_options);
......
......@@ -253,6 +253,9 @@ static struct hist_entry *perf_evsel__add_hist_entry(struct perf_evsel *evsel,
return NULL;
hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
if (!he->filtered)
evsel->hists.stats.nr_non_filtered_samples++;
return he;
}
......@@ -694,8 +697,7 @@ static void perf_event__process_sample(struct perf_tool *tool,
if (event->header.misc & PERF_RECORD_MISC_EXACT_IP)
top->exact_samples++;
if (perf_event__preprocess_sample(event, machine, &al, sample) < 0 ||
al.filtered)
if (perf_event__preprocess_sample(event, machine, &al, sample) < 0)
return;
if (!top->kptr_restrict_warned &&
......@@ -1081,8 +1083,10 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
OPT_INCR('v', "verbose", &verbose,
"be more verbose (show counter open errors, etc)"),
OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
"sort by key(s): pid, comm, dso, symbol, parent, weight, local_weight,"
" abort, in_tx, transaction"),
"sort by key(s): pid, comm, dso, symbol, parent, cpu, srcline, ..."
" Please refer the man page for the complete list."),
OPT_STRING(0, "fields", &field_order, "key[,keys...]",
"output field(s): overhead, period, sample plus all of sort keys"),
OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples,
"Show a column with the number of samples"),
OPT_CALLBACK_NOOPT('g', NULL, &top.record_opts,
......@@ -1116,6 +1120,8 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
OPT_STRING('u', "uid", &target->uid_str, "user", "user to profile"),
OPT_CALLBACK(0, "percent-limit", &top, "percent",
"Don't show entries under that percent", parse_percent_limit),
OPT_CALLBACK(0, "percentage", NULL, "relative|absolute",
"How to display percentage of filtered entries", parse_filter_percentage),
OPT_END()
};
const char * const top_usage[] = {
......@@ -1133,17 +1139,19 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
if (argc)
usage_with_options(top_usage, options);
if (sort_order == default_sort_order)
sort_order = "dso,symbol";
sort__mode = SORT_MODE__TOP;
/* display thread wants entries to be collapsed in a different tree */
sort__need_collapse = 1;
if (setup_sorting() < 0) {
if (sort_order)
parse_options_usage(top_usage, options, "s", 1);
if (field_order)
parse_options_usage(sort_order ? NULL : top_usage,
options, "fields", 0);
goto out_delete_evlist;
}
/* display thread wants entries to be collapsed in a different tree */
sort__need_collapse = 1;
if (top.use_stdio)
use_browser = 0;
else if (top.use_tui)
......
......@@ -29,16 +29,22 @@ ifeq ($(ARCH),x86)
endif
NO_PERF_REGS := 0
endif
ifeq ($(ARCH),arm)
NO_PERF_REGS := 0
LIBUNWIND_LIBS = -lunwind -lunwind-arm
endif
# So far there's only x86 libdw unwind support merged in perf.
ifeq ($(ARCH),arm64)
NO_PERF_REGS := 0
LIBUNWIND_LIBS = -lunwind -lunwind-aarch64
endif
# So far there's only x86 and arm libdw unwind support merged in perf.
# Disable it on all other architectures in case libdw unwind
# support is detected in system. Add supported architectures
# to the check.
ifneq ($(ARCH),x86)
ifneq ($(ARCH),$(filter $(ARCH),x86 arm))
NO_LIBDW_DWARF_UNWIND := 1
endif
......@@ -168,7 +174,6 @@ CORE_FEATURE_TESTS = \
libpython-version \
libslang \
libunwind \
on-exit \
stackprotector-all \
timerfd \
libdw-dwarf-unwind
......@@ -194,7 +199,6 @@ VF_FEATURE_TESTS = \
libelf-getphdrnum \
libelf-mmap \
libpython-version \
on-exit \
stackprotector-all \
timerfd \
libunwind-debug-frame \
......@@ -370,7 +374,7 @@ else
endif
ifndef NO_LIBUNWIND
ifeq ($(ARCH),arm)
ifeq ($(ARCH),$(filter $(ARCH),arm arm64))
$(call feature_check,libunwind-debug-frame)
ifneq ($(feature-libunwind-debug-frame), 1)
msg := $(warning No debug_frame support found in libunwind);
......@@ -565,12 +569,6 @@ ifneq ($(filter -lbfd,$(EXTLIBS)),)
CFLAGS += -DHAVE_LIBBFD_SUPPORT
endif
ifndef NO_ON_EXIT
ifeq ($(feature-on-exit), 1)
CFLAGS += -DHAVE_ON_EXIT_SUPPORT
endif
endif
ifndef NO_BACKTRACE
ifeq ($(feature-backtrace), 1)
CFLAGS += -DHAVE_BACKTRACE_SUPPORT
......
......@@ -24,7 +24,6 @@ FILES= \
test-libslang.bin \
test-libunwind.bin \
test-libunwind-debug-frame.bin \
test-on-exit.bin \
test-stackprotector-all.bin \
test-timerfd.bin \
test-libdw-dwarf-unwind.bin
......@@ -133,9 +132,6 @@ test-liberty-z.bin:
test-cplus-demangle.bin:
$(BUILD) -liberty
test-on-exit.bin:
$(BUILD)
test-backtrace.bin:
$(BUILD)
......
......@@ -69,10 +69,6 @@
# include "test-libbfd.c"
#undef main
#define main main_test_on_exit
# include "test-on-exit.c"
#undef main
#define main main_test_backtrace
# include "test-backtrace.c"
#undef main
......@@ -110,7 +106,6 @@ int main(int argc, char *argv[])
main_test_gtk2(argc, argv);
main_test_gtk2_infobar(argc, argv);
main_test_libbfd();
main_test_on_exit();
main_test_backtrace();
main_test_libnuma();
main_test_timerfd();
......
#include <stdio.h>
#include <stdlib.h>
static void exit_fn(int status, void *__data)
{
printf("exit status: %d, data: %d\n", status, *(int *)__data);
}
static int data = 123;
int main(void)
{
on_exit(exit_fn, &data);
return 321;
}
......@@ -121,8 +121,8 @@ __perf_main ()
elif [[ $prev == "-e" && "${words[1]}" == @(record|stat|top) ]]; then
evts=$($cmd list --raw-dump)
__perfcomp_colon "$evts" "$cur"
# List subcommands for 'perf kvm'
elif [[ $prev == "kvm" ]]; then
# List subcommands for perf commands
elif [[ $prev == @(kvm|kmem|mem|lock|sched) ]]; then
subcmds=$($cmd $prev --list-cmds)
__perfcomp_colon "$subcmds" "$cur"
# List long option names
......
#ifndef _PERF_SYS_H
#define _PERF_SYS_H
#include <unistd.h>
#include <sys/types.h>
#include <sys/syscall.h>
#include <linux/types.h>
#include <linux/perf_event.h>
#include <asm/unistd.h>
#if defined(__i386__)
#define mb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
#define wmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
#define rmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
#define cpu_relax() asm volatile("rep; nop" ::: "memory");
#define CPUINFO_PROC "model name"
#ifndef __NR_perf_event_open
# define __NR_perf_event_open 336
#endif
#ifndef __NR_futex
# define __NR_futex 240
#endif
#ifndef __NR_gettid
# define __NR_gettid 224
#endif
#endif
#if defined(__x86_64__)
#define mb() asm volatile("mfence" ::: "memory")
#define wmb() asm volatile("sfence" ::: "memory")
#define rmb() asm volatile("lfence" ::: "memory")
#define cpu_relax() asm volatile("rep; nop" ::: "memory");
#define CPUINFO_PROC "model name"
#ifndef __NR_perf_event_open
# define __NR_perf_event_open 298
#endif
#ifndef __NR_futex
# define __NR_futex 202
#endif
#ifndef __NR_gettid
# define __NR_gettid 186
#endif
#endif
#ifdef __powerpc__
#include "../../arch/powerpc/include/uapi/asm/unistd.h"
#define mb() asm volatile ("sync" ::: "memory")
#define wmb() asm volatile ("sync" ::: "memory")
#define rmb() asm volatile ("sync" ::: "memory")
#define CPUINFO_PROC "cpu"
#endif
#ifdef __s390__
#define mb() asm volatile("bcr 15,0" ::: "memory")
#define wmb() asm volatile("bcr 15,0" ::: "memory")
#define rmb() asm volatile("bcr 15,0" ::: "memory")
#endif
#ifdef __sh__
#if defined(__SH4A__) || defined(__SH5__)
# define mb() asm volatile("synco" ::: "memory")
# define wmb() asm volatile("synco" ::: "memory")
# define rmb() asm volatile("synco" ::: "memory")
#else
# define mb() asm volatile("" ::: "memory")
# define wmb() asm volatile("" ::: "memory")
# define rmb() asm volatile("" ::: "memory")
#endif
#define CPUINFO_PROC "cpu type"
#endif
#ifdef __hppa__
#define mb() asm volatile("" ::: "memory")
#define wmb() asm volatile("" ::: "memory")
#define rmb() asm volatile("" ::: "memory")
#define CPUINFO_PROC "cpu"
#endif
#ifdef __sparc__
#ifdef __LP64__
#define mb() asm volatile("ba,pt %%xcc, 1f\n" \
"membar #StoreLoad\n" \
"1:\n":::"memory")
#else
#define mb() asm volatile("":::"memory")
#endif
#define wmb() asm volatile("":::"memory")
#define rmb() asm volatile("":::"memory")
#define CPUINFO_PROC "cpu"
#endif
#ifdef __alpha__
#define mb() asm volatile("mb" ::: "memory")
#define wmb() asm volatile("wmb" ::: "memory")
#define rmb() asm volatile("mb" ::: "memory")
#define CPUINFO_PROC "cpu model"
#endif
#ifdef __ia64__
#define mb() asm volatile ("mf" ::: "memory")
#define wmb() asm volatile ("mf" ::: "memory")
#define rmb() asm volatile ("mf" ::: "memory")
#define cpu_relax() asm volatile ("hint @pause" ::: "memory")
#define CPUINFO_PROC "model name"
#endif
#ifdef __arm__
/*
* Use the __kuser_memory_barrier helper in the CPU helper page. See
* arch/arm/kernel/entry-armv.S in the kernel source for details.
*/
#define mb() ((void(*)(void))0xffff0fa0)()
#define wmb() ((void(*)(void))0xffff0fa0)()
#define rmb() ((void(*)(void))0xffff0fa0)()
#define CPUINFO_PROC "Processor"
#endif
#ifdef __aarch64__
#define mb() asm volatile("dmb ish" ::: "memory")
#define wmb() asm volatile("dmb ishst" ::: "memory")
#define rmb() asm volatile("dmb ishld" ::: "memory")
#define cpu_relax() asm volatile("yield" ::: "memory")
#endif
#ifdef __mips__
#define mb() asm volatile( \
".set mips2\n\t" \
"sync\n\t" \
".set mips0" \
: /* no output */ \
: /* no input */ \
: "memory")
#define wmb() mb()
#define rmb() mb()
#define CPUINFO_PROC "cpu model"
#endif
#ifdef __arc__
#define mb() asm volatile("" ::: "memory")
#define wmb() asm volatile("" ::: "memory")
#define rmb() asm volatile("" ::: "memory")
#define CPUINFO_PROC "Processor"
#endif
#ifdef __metag__
#define mb() asm volatile("" ::: "memory")
#define wmb() asm volatile("" ::: "memory")
#define rmb() asm volatile("" ::: "memory")
#define CPUINFO_PROC "CPU"
#endif
#ifdef __xtensa__
#define mb() asm volatile("memw" ::: "memory")
#define wmb() asm volatile("memw" ::: "memory")
#define rmb() asm volatile("" ::: "memory")
#define CPUINFO_PROC "core ID"
#endif
#ifdef __tile__
#define mb() asm volatile ("mf" ::: "memory")
#define wmb() asm volatile ("mf" ::: "memory")
#define rmb() asm volatile ("mf" ::: "memory")
#define cpu_relax() asm volatile ("mfspr zero, PASS" ::: "memory")
#define CPUINFO_PROC "model name"
#endif
#define barrier() asm volatile ("" ::: "memory")
#ifndef cpu_relax
#define cpu_relax() barrier()
#endif
static inline int
sys_perf_event_open(struct perf_event_attr *attr,
pid_t pid, int cpu, int group_fd,
unsigned long flags)
{
int fd;
fd = syscall(__NR_perf_event_open, attr, pid, cpu,
group_fd, flags);
#ifdef HAVE_ATTR_TEST
if (unlikely(test_attr__enabled))
test_attr__open(attr, pid, cpu, fd, group_fd, flags);
#endif
return fd;
}
#endif /* _PERF_SYS_H */
#ifndef _PERF_PERF_H
#define _PERF_PERF_H
#include <asm/unistd.h>
#if defined(__i386__)
#define mb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
#define wmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
#define rmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
#define cpu_relax() asm volatile("rep; nop" ::: "memory");
#define CPUINFO_PROC "model name"
#ifndef __NR_perf_event_open
# define __NR_perf_event_open 336
#endif
#ifndef __NR_futex
# define __NR_futex 240
#endif
#endif
#if defined(__x86_64__)
#define mb() asm volatile("mfence" ::: "memory")
#define wmb() asm volatile("sfence" ::: "memory")
#define rmb() asm volatile("lfence" ::: "memory")
#define cpu_relax() asm volatile("rep; nop" ::: "memory");
#define CPUINFO_PROC "model name"
#ifndef __NR_perf_event_open
# define __NR_perf_event_open 298
#endif
#ifndef __NR_futex
# define __NR_futex 202
#endif
#endif
#ifdef __powerpc__
#include "../../arch/powerpc/include/uapi/asm/unistd.h"
#define mb() asm volatile ("sync" ::: "memory")
#define wmb() asm volatile ("sync" ::: "memory")
#define rmb() asm volatile ("sync" ::: "memory")
#define CPUINFO_PROC "cpu"
#endif
#ifdef __s390__
#define mb() asm volatile("bcr 15,0" ::: "memory")
#define wmb() asm volatile("bcr 15,0" ::: "memory")
#define rmb() asm volatile("bcr 15,0" ::: "memory")
#endif
#ifdef __sh__
#if defined(__SH4A__) || defined(__SH5__)
# define mb() asm volatile("synco" ::: "memory")
# define wmb() asm volatile("synco" ::: "memory")
# define rmb() asm volatile("synco" ::: "memory")
#else
# define mb() asm volatile("" ::: "memory")
# define wmb() asm volatile("" ::: "memory")
# define rmb() asm volatile("" ::: "memory")
#endif
#define CPUINFO_PROC "cpu type"
#endif
#ifdef __hppa__
#define mb() asm volatile("" ::: "memory")
#define wmb() asm volatile("" ::: "memory")
#define rmb() asm volatile("" ::: "memory")
#define CPUINFO_PROC "cpu"
#endif
#ifdef __sparc__
#ifdef __LP64__
#define mb() asm volatile("ba,pt %%xcc, 1f\n" \
"membar #StoreLoad\n" \
"1:\n":::"memory")
#else
#define mb() asm volatile("":::"memory")
#endif
#define wmb() asm volatile("":::"memory")
#define rmb() asm volatile("":::"memory")
#define CPUINFO_PROC "cpu"
#endif
#ifdef __alpha__
#define mb() asm volatile("mb" ::: "memory")
#define wmb() asm volatile("wmb" ::: "memory")
#define rmb() asm volatile("mb" ::: "memory")
#define CPUINFO_PROC "cpu model"
#endif
#ifdef __ia64__
#define mb() asm volatile ("mf" ::: "memory")
#define wmb() asm volatile ("mf" ::: "memory")
#define rmb() asm volatile ("mf" ::: "memory")
#define cpu_relax() asm volatile ("hint @pause" ::: "memory")
#define CPUINFO_PROC "model name"
#endif
#ifdef __arm__
/*
* Use the __kuser_memory_barrier helper in the CPU helper page. See
* arch/arm/kernel/entry-armv.S in the kernel source for details.
*/
#define mb() ((void(*)(void))0xffff0fa0)()
#define wmb() ((void(*)(void))0xffff0fa0)()
#define rmb() ((void(*)(void))0xffff0fa0)()
#define CPUINFO_PROC "Processor"
#endif
#ifdef __aarch64__
#define mb() asm volatile("dmb ish" ::: "memory")
#define wmb() asm volatile("dmb ishst" ::: "memory")
#define rmb() asm volatile("dmb ishld" ::: "memory")
#define cpu_relax() asm volatile("yield" ::: "memory")
#endif
#ifdef __mips__
#define mb() asm volatile( \
".set mips2\n\t" \
"sync\n\t" \
".set mips0" \
: /* no output */ \
: /* no input */ \
: "memory")
#define wmb() mb()
#define rmb() mb()
#define CPUINFO_PROC "cpu model"
#endif
#ifdef __arc__
#define mb() asm volatile("" ::: "memory")
#define wmb() asm volatile("" ::: "memory")
#define rmb() asm volatile("" ::: "memory")
#define CPUINFO_PROC "Processor"
#endif
#ifdef __metag__
#define mb() asm volatile("" ::: "memory")
#define wmb() asm volatile("" ::: "memory")
#define rmb() asm volatile("" ::: "memory")
#define CPUINFO_PROC "CPU"
#endif
#ifdef __xtensa__
#define mb() asm volatile("memw" ::: "memory")
#define wmb() asm volatile("memw" ::: "memory")
#define rmb() asm volatile("" ::: "memory")
#define CPUINFO_PROC "core ID"
#endif
#ifdef __tile__
#define mb() asm volatile ("mf" ::: "memory")
#define wmb() asm volatile ("mf" ::: "memory")
#define rmb() asm volatile ("mf" ::: "memory")
#define cpu_relax() asm volatile ("mfspr zero, PASS" ::: "memory")
#define CPUINFO_PROC "model name"
#endif
#define barrier() asm volatile ("" ::: "memory")
#ifndef cpu_relax
#define cpu_relax() barrier()
#endif
#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
#include <time.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/syscall.h>
#include <linux/perf_event.h>
#include "util/types.h"
#include <stdbool.h>
#include <linux/types.h>
#include <linux/perf_event.h>
/*
* prctl(PR_TASK_PERF_EVENTS_DISABLE) will (cheaply) disable all
* counters in the current task.
*/
#define PR_TASK_PERF_EVENTS_DISABLE 31
#define PR_TASK_PERF_EVENTS_ENABLE 32
extern bool test_attr__enabled;
void test_attr__init(void);
void test_attr__open(struct perf_event_attr *attr, pid_t pid, int cpu,
int fd, int group_fd, unsigned long flags);
#define HAVE_ATTR_TEST
#include "perf-sys.h"
#ifndef NSEC_PER_SEC
# define NSEC_PER_SEC 1000000000ULL
......@@ -193,67 +29,8 @@ static inline unsigned long long rdclock(void)
return ts.tv_sec * 1000000000ULL + ts.tv_nsec;
}
/*
* Pick up some kernel type conventions:
*/
#define __user
#define asmlinkage
#define unlikely(x) __builtin_expect(!!(x), 0)
#define min(x, y) ({ \
typeof(x) _min1 = (x); \
typeof(y) _min2 = (y); \
(void) (&_min1 == &_min2); \
_min1 < _min2 ? _min1 : _min2; })
extern bool test_attr__enabled;
void test_attr__init(void);
void test_attr__open(struct perf_event_attr *attr, pid_t pid, int cpu,
int fd, int group_fd, unsigned long flags);
static inline int
sys_perf_event_open(struct perf_event_attr *attr,
pid_t pid, int cpu, int group_fd,
unsigned long flags)
{
int fd;
fd = syscall(__NR_perf_event_open, attr, pid, cpu,
group_fd, flags);
if (unlikely(test_attr__enabled))
test_attr__open(attr, pid, cpu, fd, group_fd, flags);
return fd;
}
#define MAX_COUNTERS 256
#define MAX_NR_CPUS 256
struct ip_callchain {
u64 nr;
u64 ips[0];
};
struct branch_flags {
u64 mispred:1;
u64 predicted:1;
u64 in_tx:1;
u64 abort:1;
u64 reserved:60;
};
struct branch_entry {
u64 from;
u64 to;
struct branch_flags flags;
};
struct branch_stack {
u64 nr;
struct branch_entry entries[0];
};
extern const char *input_name;
extern bool perf_host, perf_guest;
extern const char perf_version_string[];
......@@ -262,13 +39,6 @@ void pthread__unblock_sigwinch(void);
#include "util/target.h"
enum perf_call_graph_mode {
CALLCHAIN_NONE,
CALLCHAIN_FP,
CALLCHAIN_DWARF,
CALLCHAIN_MAX
};
struct record_opts {
struct target target;
int call_graph;
......
/*
* The struct perf_event_attr test support.
*
......@@ -19,14 +18,8 @@
* permissions. All the event text files are stored there.
*/
/*
* Powerpc needs __SANE_USERSPACE_TYPES__ before <linux/types.h> to select
* 'int-ll64.h' and avoid compile warnings when printing __u64 with %llu.
*/
#define __SANE_USERSPACE_TYPES__
#include <stdlib.h>
#include <stdio.h>
#include <inttypes.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include "../perf.h"
......
......@@ -115,7 +115,7 @@ static struct test {
.desc = "Test parsing with no sample_id_all bit set",
.func = test__parse_no_sample_id_all,
},
#if defined(__x86_64__) || defined(__i386__)
#if defined(__x86_64__) || defined(__i386__) || defined(__arm__)
#ifdef HAVE_DWARF_UNWIND_SUPPORT
{
.desc = "Test dwarf unwind",
......@@ -123,6 +123,22 @@ static struct test {
},
#endif
#endif
{
.desc = "Test filtering hist entries",
.func = test__hists_filter,
},
{
.desc = "Test mmap thread lookup",
.func = test__mmap_thread_lookup,
},
{
.desc = "Test thread mg sharing",
.func = test__thread_mg_share,
},
{
.desc = "Test output sorting of hist entries",
.func = test__hists_output,
},
{
.func = NULL,
},
......
#include <sys/types.h>
#include <linux/types.h>
#include <stdlib.h>
#include <unistd.h>
#include <stdio.h>
#include <inttypes.h>
#include <ctype.h>
#include <string.h>
......@@ -257,7 +256,7 @@ static int process_sample_event(struct machine *machine,
return -1;
}
thread = machine__findnew_thread(machine, sample.pid, sample.pid);
thread = machine__findnew_thread(machine, sample.pid, sample.tid);
if (!thread) {
pr_debug("machine__findnew_thread failed\n");
return -1;
......
#include "util.h"
#include <stdlib.h>
#include <sys/types.h>
#include <linux/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <string.h>
......
#include <linux/compiler.h>
#include <sys/types.h>
#include <linux/types.h>
#include <unistd.h>
#include "tests.h"
#include "debug.h"
......
......@@ -74,9 +74,6 @@ int test__perf_evsel__tp_sched_test(void)
if (perf_evsel__test_field(evsel, "prio", 4, true))
ret = -1;
if (perf_evsel__test_field(evsel, "success", 4, true))
ret = -1;
if (perf_evsel__test_field(evsel, "target_cpu", 4, true))
ret = -1;
......
#include "perf.h"
#include "util/debug.h"
#include "util/symbol.h"
#include "util/sort.h"
#include "util/evsel.h"
#include "util/evlist.h"
#include "util/machine.h"
#include "util/thread.h"
#include "tests/hists_common.h"
static struct {
u32 pid;
const char *comm;
} fake_threads[] = {
{ 100, "perf" },
{ 200, "perf" },
{ 300, "bash" },
};
static struct {
u32 pid;
u64 start;
const char *filename;
} fake_mmap_info[] = {
{ 100, 0x40000, "perf" },
{ 100, 0x50000, "libc" },
{ 100, 0xf0000, "[kernel]" },
{ 200, 0x40000, "perf" },
{ 200, 0x50000, "libc" },
{ 200, 0xf0000, "[kernel]" },
{ 300, 0x40000, "bash" },
{ 300, 0x50000, "libc" },
{ 300, 0xf0000, "[kernel]" },
};
struct fake_sym {
u64 start;
u64 length;
const char *name;
};
static struct fake_sym perf_syms[] = {
{ 700, 100, "main" },
{ 800, 100, "run_command" },
{ 900, 100, "cmd_record" },
};
static struct fake_sym bash_syms[] = {
{ 700, 100, "main" },
{ 800, 100, "xmalloc" },
{ 900, 100, "xfree" },
};
static struct fake_sym libc_syms[] = {
{ 700, 100, "malloc" },
{ 800, 100, "free" },
{ 900, 100, "realloc" },
};
static struct fake_sym kernel_syms[] = {
{ 700, 100, "schedule" },
{ 800, 100, "page_fault" },
{ 900, 100, "sys_perf_event_open" },
};
static struct {
const char *dso_name;
struct fake_sym *syms;
size_t nr_syms;
} fake_symbols[] = {
{ "perf", perf_syms, ARRAY_SIZE(perf_syms) },
{ "bash", bash_syms, ARRAY_SIZE(bash_syms) },
{ "libc", libc_syms, ARRAY_SIZE(libc_syms) },
{ "[kernel]", kernel_syms, ARRAY_SIZE(kernel_syms) },
};
struct machine *setup_fake_machine(struct machines *machines)
{
struct machine *machine = machines__find(machines, HOST_KERNEL_ID);
size_t i;
if (machine == NULL) {
pr_debug("Not enough memory for machine setup\n");
return NULL;
}
for (i = 0; i < ARRAY_SIZE(fake_threads); i++) {
struct thread *thread;
thread = machine__findnew_thread(machine, fake_threads[i].pid,
fake_threads[i].pid);
if (thread == NULL)
goto out;
thread__set_comm(thread, fake_threads[i].comm, 0);
}
for (i = 0; i < ARRAY_SIZE(fake_mmap_info); i++) {
union perf_event fake_mmap_event = {
.mmap = {
.header = { .misc = PERF_RECORD_MISC_USER, },
.pid = fake_mmap_info[i].pid,
.tid = fake_mmap_info[i].pid,
.start = fake_mmap_info[i].start,
.len = 0x1000ULL,
.pgoff = 0ULL,
},
};
strcpy(fake_mmap_event.mmap.filename,
fake_mmap_info[i].filename);
machine__process_mmap_event(machine, &fake_mmap_event, NULL);
}
for (i = 0; i < ARRAY_SIZE(fake_symbols); i++) {
size_t k;
struct dso *dso;
dso = __dsos__findnew(&machine->user_dsos,
fake_symbols[i].dso_name);
if (dso == NULL)
goto out;
/* emulate dso__load() */
dso__set_loaded(dso, MAP__FUNCTION);
for (k = 0; k < fake_symbols[i].nr_syms; k++) {
struct symbol *sym;
struct fake_sym *fsym = &fake_symbols[i].syms[k];
sym = symbol__new(fsym->start, fsym->length,
STB_GLOBAL, fsym->name);
if (sym == NULL)
goto out;
symbols__insert(&dso->symbols[MAP__FUNCTION], sym);
}
}
return machine;
out:
pr_debug("Not enough memory for machine setup\n");
machine__delete_threads(machine);
machine__delete(machine);
return NULL;
}
void print_hists_in(struct hists *hists)
{
int i = 0;
struct rb_root *root;
struct rb_node *node;
if (sort__need_collapse)
root = &hists->entries_collapsed;
else
root = hists->entries_in;
pr_info("----- %s --------\n", __func__);
node = rb_first(root);
while (node) {
struct hist_entry *he;
he = rb_entry(node, struct hist_entry, rb_node_in);
if (!he->filtered) {
pr_info("%2d: entry: %-8s [%-8s] %20s: period = %"PRIu64"\n",
i, thread__comm_str(he->thread),
he->ms.map->dso->short_name,
he->ms.sym->name, he->stat.period);
}
i++;
node = rb_next(node);
}
}
void print_hists_out(struct hists *hists)
{
int i = 0;
struct rb_root *root;
struct rb_node *node;
root = &hists->entries;
pr_info("----- %s --------\n", __func__);
node = rb_first(root);
while (node) {
struct hist_entry *he;
he = rb_entry(node, struct hist_entry, rb_node);
if (!he->filtered) {
pr_info("%2d: entry: %8s:%5d [%-8s] %20s: period = %"PRIu64"\n",
i, thread__comm_str(he->thread), he->thread->tid,
he->ms.map->dso->short_name,
he->ms.sym->name, he->stat.period);
}
i++;
node = rb_next(node);
}
}
#ifndef __PERF_TESTS__HISTS_COMMON_H__
#define __PERF_TESTS__HISTS_COMMON_H__
struct machine;
struct machines;
/*
* The setup_fake_machine() provides a test environment which consists
* of 3 processes that have 3 mappings and in turn, have 3 symbols
* respectively. See below table:
*
* Command: Pid Shared Object Symbol
* ............. ............. ...................
* perf: 100 perf main
* perf: 100 perf run_command
* perf: 100 perf comd_record
* perf: 100 libc malloc
* perf: 100 libc free
* perf: 100 libc realloc
* perf: 100 [kernel] schedule
* perf: 100 [kernel] page_fault
* perf: 100 [kernel] sys_perf_event_open
* perf: 200 perf main
* perf: 200 perf run_command
* perf: 200 perf comd_record
* perf: 200 libc malloc
* perf: 200 libc free
* perf: 200 libc realloc
* perf: 200 [kernel] schedule
* perf: 200 [kernel] page_fault
* perf: 200 [kernel] sys_perf_event_open
* bash: 300 bash main
* bash: 300 bash xmalloc
* bash: 300 bash xfree
* bash: 300 libc malloc
* bash: 300 libc free
* bash: 300 libc realloc
* bash: 300 [kernel] schedule
* bash: 300 [kernel] page_fault
* bash: 300 [kernel] sys_perf_event_open
*/
struct machine *setup_fake_machine(struct machines *machines);
void print_hists_in(struct hists *hists);
void print_hists_out(struct hists *hists);
#endif /* __PERF_TESTS__HISTS_COMMON_H__ */
#include "perf.h"
#include "util/debug.h"
#include "util/symbol.h"
#include "util/sort.h"
#include "util/evsel.h"
#include "util/evlist.h"
#include "util/machine.h"
#include "util/thread.h"
#include "util/parse-events.h"
#include "tests/tests.h"
#include "tests/hists_common.h"
struct sample {
u32 pid;
u64 ip;
struct thread *thread;
struct map *map;
struct symbol *sym;
};
/* For the numbers, see hists_common.c */
static struct sample fake_samples[] = {
/* perf [kernel] schedule() */
{ .pid = 100, .ip = 0xf0000 + 700, },
/* perf [perf] main() */
{ .pid = 100, .ip = 0x40000 + 700, },
/* perf [libc] malloc() */
{ .pid = 100, .ip = 0x50000 + 700, },
/* perf [perf] main() */
{ .pid = 200, .ip = 0x40000 + 700, }, /* will be merged */
/* perf [perf] cmd_record() */
{ .pid = 200, .ip = 0x40000 + 900, },
/* perf [kernel] page_fault() */
{ .pid = 200, .ip = 0xf0000 + 800, },
/* bash [bash] main() */
{ .pid = 300, .ip = 0x40000 + 700, },
/* bash [bash] xmalloc() */
{ .pid = 300, .ip = 0x40000 + 800, },
/* bash [libc] malloc() */
{ .pid = 300, .ip = 0x50000 + 700, },
/* bash [kernel] page_fault() */
{ .pid = 300, .ip = 0xf0000 + 800, },
};
static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
{
struct perf_evsel *evsel;
struct addr_location al;
struct hist_entry *he;
struct perf_sample sample = { .cpu = 0, };
size_t i;
/*
* each evsel will have 10 samples but the 4th sample
* (perf [perf] main) will be collapsed to an existing entry
* so total 9 entries will be in the tree.
*/
evlist__for_each(evlist, evsel) {
for (i = 0; i < ARRAY_SIZE(fake_samples); i++) {
const union perf_event event = {
.header = {
.misc = PERF_RECORD_MISC_USER,
},
};
/* make sure it has no filter at first */
evsel->hists.thread_filter = NULL;
evsel->hists.dso_filter = NULL;
evsel->hists.symbol_filter_str = NULL;
sample.pid = fake_samples[i].pid;
sample.tid = fake_samples[i].pid;
sample.ip = fake_samples[i].ip;
if (perf_event__preprocess_sample(&event, machine, &al,
&sample) < 0)
goto out;
he = __hists__add_entry(&evsel->hists, &al, NULL,
NULL, NULL, 100, 1, 0);
if (he == NULL)
goto out;
fake_samples[i].thread = al.thread;
fake_samples[i].map = al.map;
fake_samples[i].sym = al.sym;
hists__inc_nr_events(he->hists, PERF_RECORD_SAMPLE);
if (!he->filtered)
he->hists->stats.nr_non_filtered_samples++;
}
}
return 0;
out:
pr_debug("Not enough memory for adding a hist entry\n");
return TEST_FAIL;
}
int test__hists_filter(void)
{
int err = TEST_FAIL;
struct machines machines;
struct machine *machine;
struct perf_evsel *evsel;
struct perf_evlist *evlist = perf_evlist__new();
TEST_ASSERT_VAL("No memory", evlist);
err = parse_events(evlist, "cpu-clock");
if (err)
goto out;
err = parse_events(evlist, "task-clock");
if (err)
goto out;
/* default sort order (comm,dso,sym) will be used */
if (setup_sorting() < 0)
goto out;
machines__init(&machines);
/* setup threads/dso/map/symbols also */
machine = setup_fake_machine(&machines);
if (!machine)
goto out;
if (verbose > 1)
machine__fprintf(machine, stderr);
/* process sample events */
err = add_hist_entries(evlist, machine);
if (err < 0)
goto out;
evlist__for_each(evlist, evsel) {
struct hists *hists = &evsel->hists;
hists__collapse_resort(hists, NULL);
hists__output_resort(hists);
if (verbose > 2) {
pr_info("Normal histogram\n");
print_hists_out(hists);
}
TEST_ASSERT_VAL("Invalid nr samples",
hists->stats.nr_events[PERF_RECORD_SAMPLE] == 10);
TEST_ASSERT_VAL("Invalid nr hist entries",
hists->nr_entries == 9);
TEST_ASSERT_VAL("Invalid total period",
hists->stats.total_period == 1000);
TEST_ASSERT_VAL("Unmatched nr samples",
hists->stats.nr_events[PERF_RECORD_SAMPLE] ==
hists->stats.nr_non_filtered_samples);
TEST_ASSERT_VAL("Unmatched nr hist entries",
hists->nr_entries == hists->nr_non_filtered_entries);
TEST_ASSERT_VAL("Unmatched total period",
hists->stats.total_period ==
hists->stats.total_non_filtered_period);
/* now applying thread filter for 'bash' */
evsel->hists.thread_filter = fake_samples[9].thread;
hists__filter_by_thread(hists);
if (verbose > 2) {
pr_info("Histogram for thread filter\n");
print_hists_out(hists);
}
/* normal stats should be invariant */
TEST_ASSERT_VAL("Invalid nr samples",
hists->stats.nr_events[PERF_RECORD_SAMPLE] == 10);
TEST_ASSERT_VAL("Invalid nr hist entries",
hists->nr_entries == 9);
TEST_ASSERT_VAL("Invalid total period",
hists->stats.total_period == 1000);
/* but filter stats are changed */
TEST_ASSERT_VAL("Unmatched nr samples for thread filter",
hists->stats.nr_non_filtered_samples == 4);
TEST_ASSERT_VAL("Unmatched nr hist entries for thread filter",
hists->nr_non_filtered_entries == 4);
TEST_ASSERT_VAL("Unmatched total period for thread filter",
hists->stats.total_non_filtered_period == 400);
/* remove thread filter first */
evsel->hists.thread_filter = NULL;
hists__filter_by_thread(hists);
/* now applying dso filter for 'kernel' */
evsel->hists.dso_filter = fake_samples[0].map->dso;
hists__filter_by_dso(hists);
if (verbose > 2) {
pr_info("Histogram for dso filter\n");
print_hists_out(hists);
}
/* normal stats should be invariant */
TEST_ASSERT_VAL("Invalid nr samples",
hists->stats.nr_events[PERF_RECORD_SAMPLE] == 10);
TEST_ASSERT_VAL("Invalid nr hist entries",
hists->nr_entries == 9);
TEST_ASSERT_VAL("Invalid total period",
hists->stats.total_period == 1000);
/* but filter stats are changed */
TEST_ASSERT_VAL("Unmatched nr samples for dso filter",
hists->stats.nr_non_filtered_samples == 3);
TEST_ASSERT_VAL("Unmatched nr hist entries for dso filter",
hists->nr_non_filtered_entries == 3);
TEST_ASSERT_VAL("Unmatched total period for dso filter",
hists->stats.total_non_filtered_period == 300);
/* remove dso filter first */
evsel->hists.dso_filter = NULL;
hists__filter_by_dso(hists);
/*
* now applying symbol filter for 'main'. Also note that
* there's 3 samples that have 'main' symbol but the 4th
* entry of fake_samples was collapsed already so it won't
* be counted as a separate entry but the sample count and
* total period will be remained.
*/
evsel->hists.symbol_filter_str = "main";
hists__filter_by_symbol(hists);
if (verbose > 2) {
pr_info("Histogram for symbol filter\n");
print_hists_out(hists);
}
/* normal stats should be invariant */
TEST_ASSERT_VAL("Invalid nr samples",
hists->stats.nr_events[PERF_RECORD_SAMPLE] == 10);
TEST_ASSERT_VAL("Invalid nr hist entries",
hists->nr_entries == 9);
TEST_ASSERT_VAL("Invalid total period",
hists->stats.total_period == 1000);
/* but filter stats are changed */
TEST_ASSERT_VAL("Unmatched nr samples for symbol filter",
hists->stats.nr_non_filtered_samples == 3);
TEST_ASSERT_VAL("Unmatched nr hist entries for symbol filter",
hists->nr_non_filtered_entries == 2);
TEST_ASSERT_VAL("Unmatched total period for symbol filter",
hists->stats.total_non_filtered_period == 300);
/* now applying all filters at once. */
evsel->hists.thread_filter = fake_samples[1].thread;
evsel->hists.dso_filter = fake_samples[1].map->dso;
hists__filter_by_thread(hists);
hists__filter_by_dso(hists);
if (verbose > 2) {
pr_info("Histogram for all filters\n");
print_hists_out(hists);
}
/* normal stats should be invariant */
TEST_ASSERT_VAL("Invalid nr samples",
hists->stats.nr_events[PERF_RECORD_SAMPLE] == 10);
TEST_ASSERT_VAL("Invalid nr hist entries",
hists->nr_entries == 9);
TEST_ASSERT_VAL("Invalid total period",
hists->stats.total_period == 1000);
/* but filter stats are changed */
TEST_ASSERT_VAL("Unmatched nr samples for all filter",
hists->stats.nr_non_filtered_samples == 2);
TEST_ASSERT_VAL("Unmatched nr hist entries for all filter",
hists->nr_non_filtered_entries == 1);
TEST_ASSERT_VAL("Unmatched total period for all filter",
hists->stats.total_non_filtered_period == 200);
}
err = TEST_OK;
out:
/* tear down everything */
perf_evlist__delete(evlist);
reset_output_field();
machines__exit(&machines);
return err;
}
......@@ -8,145 +8,7 @@
#include "machine.h"
#include "thread.h"
#include "parse-events.h"
static struct {
u32 pid;
const char *comm;
} fake_threads[] = {
{ 100, "perf" },
{ 200, "perf" },
{ 300, "bash" },
};
static struct {
u32 pid;
u64 start;
const char *filename;
} fake_mmap_info[] = {
{ 100, 0x40000, "perf" },
{ 100, 0x50000, "libc" },
{ 100, 0xf0000, "[kernel]" },
{ 200, 0x40000, "perf" },
{ 200, 0x50000, "libc" },
{ 200, 0xf0000, "[kernel]" },
{ 300, 0x40000, "bash" },
{ 300, 0x50000, "libc" },
{ 300, 0xf0000, "[kernel]" },
};
struct fake_sym {
u64 start;
u64 length;
const char *name;
};
static struct fake_sym perf_syms[] = {
{ 700, 100, "main" },
{ 800, 100, "run_command" },
{ 900, 100, "cmd_record" },
};
static struct fake_sym bash_syms[] = {
{ 700, 100, "main" },
{ 800, 100, "xmalloc" },
{ 900, 100, "xfree" },
};
static struct fake_sym libc_syms[] = {
{ 700, 100, "malloc" },
{ 800, 100, "free" },
{ 900, 100, "realloc" },
};
static struct fake_sym kernel_syms[] = {
{ 700, 100, "schedule" },
{ 800, 100, "page_fault" },
{ 900, 100, "sys_perf_event_open" },
};
static struct {
const char *dso_name;
struct fake_sym *syms;
size_t nr_syms;
} fake_symbols[] = {
{ "perf", perf_syms, ARRAY_SIZE(perf_syms) },
{ "bash", bash_syms, ARRAY_SIZE(bash_syms) },
{ "libc", libc_syms, ARRAY_SIZE(libc_syms) },
{ "[kernel]", kernel_syms, ARRAY_SIZE(kernel_syms) },
};
static struct machine *setup_fake_machine(struct machines *machines)
{
struct machine *machine = machines__find(machines, HOST_KERNEL_ID);
size_t i;
if (machine == NULL) {
pr_debug("Not enough memory for machine setup\n");
return NULL;
}
for (i = 0; i < ARRAY_SIZE(fake_threads); i++) {
struct thread *thread;
thread = machine__findnew_thread(machine, fake_threads[i].pid,
fake_threads[i].pid);
if (thread == NULL)
goto out;
thread__set_comm(thread, fake_threads[i].comm, 0);
}
for (i = 0; i < ARRAY_SIZE(fake_mmap_info); i++) {
union perf_event fake_mmap_event = {
.mmap = {
.header = { .misc = PERF_RECORD_MISC_USER, },
.pid = fake_mmap_info[i].pid,
.tid = fake_mmap_info[i].pid,
.start = fake_mmap_info[i].start,
.len = 0x1000ULL,
.pgoff = 0ULL,
},
};
strcpy(fake_mmap_event.mmap.filename,
fake_mmap_info[i].filename);
machine__process_mmap_event(machine, &fake_mmap_event, NULL);
}
for (i = 0; i < ARRAY_SIZE(fake_symbols); i++) {
size_t k;
struct dso *dso;
dso = __dsos__findnew(&machine->user_dsos,
fake_symbols[i].dso_name);
if (dso == NULL)
goto out;
/* emulate dso__load() */
dso__set_loaded(dso, MAP__FUNCTION);
for (k = 0; k < fake_symbols[i].nr_syms; k++) {
struct symbol *sym;
struct fake_sym *fsym = &fake_symbols[i].syms[k];
sym = symbol__new(fsym->start, fsym->length,
STB_GLOBAL, fsym->name);
if (sym == NULL)
goto out;
symbols__insert(&dso->symbols[MAP__FUNCTION], sym);
}
}
return machine;
out:
pr_debug("Not enough memory for machine setup\n");
machine__delete_threads(machine);
machine__delete(machine);
return NULL;
}
#include "hists_common.h"
struct sample {
u32 pid;
......@@ -156,6 +18,7 @@ struct sample {
struct symbol *sym;
};
/* For the numbers, see hists_common.c */
static struct sample fake_common_samples[] = {
/* perf [kernel] schedule() */
{ .pid = 100, .ip = 0xf0000 + 700, },
......@@ -218,6 +81,7 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
};
sample.pid = fake_common_samples[k].pid;
sample.tid = fake_common_samples[k].pid;
sample.ip = fake_common_samples[k].ip;
if (perf_event__preprocess_sample(&event, machine, &al,
&sample) < 0)
......@@ -241,6 +105,7 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
};
sample.pid = fake_samples[i][k].pid;
sample.tid = fake_samples[i][k].pid;
sample.ip = fake_samples[i][k].ip;
if (perf_event__preprocess_sample(&event, machine, &al,
&sample) < 0)
......@@ -403,33 +268,6 @@ static int validate_link(struct hists *leader, struct hists *other)
return __validate_link(leader, 0) || __validate_link(other, 1);
}
static void print_hists(struct hists *hists)
{
int i = 0;
struct rb_root *root;
struct rb_node *node;
if (sort__need_collapse)
root = &hists->entries_collapsed;
else
root = hists->entries_in;
pr_info("----- %s --------\n", __func__);
node = rb_first(root);
while (node) {
struct hist_entry *he;
he = rb_entry(node, struct hist_entry, rb_node_in);
pr_info("%2d: entry: %-8s [%-8s] %20s: period = %"PRIu64"\n",
i, thread__comm_str(he->thread), he->ms.map->dso->short_name,
he->ms.sym->name, he->stat.period);
i++;
node = rb_next(node);
}
}
int test__hists_link(void)
{
int err = -1;
......@@ -471,7 +309,7 @@ int test__hists_link(void)
hists__collapse_resort(&evsel->hists, NULL);
if (verbose > 2)
print_hists(&evsel->hists);
print_hists_in(&evsel->hists);
}
first = perf_evlist__first(evlist);
......@@ -494,6 +332,7 @@ int test__hists_link(void)
out:
/* tear down everything */
perf_evlist__delete(evlist);
reset_output_field();
machines__exit(&machines);
return err;
......
此差异已折叠。
#include <sys/types.h>
#include <linux/types.h>
#include <unistd.h>
#include <sys/prctl.h>
......
#include <unistd.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <sys/mman.h>
#include <pthread.h>
#include <stdlib.h>
#include <stdio.h>
#include "debug.h"
#include "tests.h"
#include "machine.h"
#include "thread_map.h"
#include "symbol.h"
#include "thread.h"
#define THREADS 4
static int go_away;
struct thread_data {
pthread_t pt;
pid_t tid;
void *map;
int ready[2];
};
static struct thread_data threads[THREADS];
static int thread_init(struct thread_data *td)
{
void *map;
map = mmap(NULL, page_size,
PROT_READ|PROT_WRITE|PROT_EXEC,
MAP_SHARED|MAP_ANONYMOUS, -1, 0);
if (map == MAP_FAILED) {
perror("mmap failed");
return -1;
}
td->map = map;
td->tid = syscall(SYS_gettid);
pr_debug("tid = %d, map = %p\n", td->tid, map);
return 0;
}
static void *thread_fn(void *arg)
{
struct thread_data *td = arg;
ssize_t ret;
int go;
if (thread_init(td))
return NULL;
/* Signal thread_create thread is initialized. */
ret = write(td->ready[1], &go, sizeof(int));
if (ret != sizeof(int)) {
pr_err("failed to notify\n");
return NULL;
}
while (!go_away) {
/* Waiting for main thread to kill us. */
usleep(100);
}
munmap(td->map, page_size);
return NULL;
}
static int thread_create(int i)
{
struct thread_data *td = &threads[i];
int err, go;
if (pipe(td->ready))
return -1;
err = pthread_create(&td->pt, NULL, thread_fn, td);
if (!err) {
/* Wait for thread initialization. */
ssize_t ret = read(td->ready[0], &go, sizeof(int));
err = ret != sizeof(int);
}
close(td->ready[0]);
close(td->ready[1]);
return err;
}
static int threads_create(void)
{
struct thread_data *td0 = &threads[0];
int i, err = 0;
go_away = 0;
/* 0 is main thread */
if (thread_init(td0))
return -1;
for (i = 1; !err && i < THREADS; i++)
err = thread_create(i);
return err;
}
static int threads_destroy(void)
{
struct thread_data *td0 = &threads[0];
int i, err = 0;
/* cleanup the main thread */
munmap(td0->map, page_size);
go_away = 1;
for (i = 1; !err && i < THREADS; i++)
err = pthread_join(threads[i].pt, NULL);
return err;
}
typedef int (*synth_cb)(struct machine *machine);
static int synth_all(struct machine *machine)
{
return perf_event__synthesize_threads(NULL,
perf_event__process,
machine, 0);
}
static int synth_process(struct machine *machine)
{
struct thread_map *map;
int err;
map = thread_map__new_by_pid(getpid());
err = perf_event__synthesize_thread_map(NULL, map,
perf_event__process,
machine, 0);
thread_map__delete(map);
return err;
}
static int mmap_events(synth_cb synth)
{
struct machines machines;
struct machine *machine;
int err, i;
/*
* The threads_create will not return before all threads
* are spawned and all created memory map.
*
* They will loop until threads_destroy is called, so we
* can safely run synthesizing function.
*/
TEST_ASSERT_VAL("failed to create threads", !threads_create());
machines__init(&machines);
machine = &machines.host;
dump_trace = verbose > 1 ? 1 : 0;
err = synth(machine);
dump_trace = 0;
TEST_ASSERT_VAL("failed to destroy threads", !threads_destroy());
TEST_ASSERT_VAL("failed to synthesize maps", !err);
/*
* All data is synthesized, try to find map for each
* thread object.
*/
for (i = 0; i < THREADS; i++) {
struct thread_data *td = &threads[i];
struct addr_location al;
struct thread *thread;
thread = machine__findnew_thread(machine, getpid(), td->tid);
pr_debug("looking for map %p\n", td->map);
thread__find_addr_map(thread, machine,
PERF_RECORD_MISC_USER, MAP__FUNCTION,
(unsigned long) (td->map + 1), &al);
if (!al.map) {
pr_debug("failed, couldn't find map\n");
err = -1;
break;
}
pr_debug("map %p, addr %" PRIx64 "\n", al.map, al.map->start);
}
machine__delete_threads(machine);
machines__exit(&machines);
return err;
}
/*
* This test creates 'THREADS' number of threads (including
* main thread) and each thread creates memory map.
*
* When threads are created, we synthesize them with both
* (separate tests):
* perf_event__synthesize_thread_map (process based)
* perf_event__synthesize_threads (global)
*
* We test we can find all memory maps via:
* thread__find_addr_map
*
* by using all thread objects.
*/
int test__mmap_thread_lookup(void)
{
/* perf_event__synthesize_threads synthesize */
TEST_ASSERT_VAL("failed with sythesizing all",
!mmap_events(synth_all));
/* perf_event__synthesize_thread_map synthesize */
TEST_ASSERT_VAL("failed with sythesizing process",
!mmap_events(synth_process));
return 0;
}
......@@ -1174,188 +1174,240 @@ static int test__all_tracepoints(struct perf_evlist *evlist)
struct evlist_test {
const char *name;
__u32 type;
const int id;
int (*check)(struct perf_evlist *evlist);
};
static struct evlist_test test__events[] = {
[0] = {
{
.name = "syscalls:sys_enter_open",
.check = test__checkevent_tracepoint,
.id = 0,
},
[1] = {
{
.name = "syscalls:*",
.check = test__checkevent_tracepoint_multi,
.id = 1,
},
[2] = {
{
.name = "r1a",
.check = test__checkevent_raw,
.id = 2,
},
[3] = {
{
.name = "1:1",
.check = test__checkevent_numeric,
.id = 3,
},
[4] = {
{
.name = "instructions",
.check = test__checkevent_symbolic_name,
.id = 4,
},
[5] = {
{
.name = "cycles/period=100000,config2/",
.check = test__checkevent_symbolic_name_config,
.id = 5,
},
[6] = {
{
.name = "faults",
.check = test__checkevent_symbolic_alias,
.id = 6,
},
[7] = {
{
.name = "L1-dcache-load-miss",
.check = test__checkevent_genhw,
.id = 7,
},
[8] = {
{
.name = "mem:0",
.check = test__checkevent_breakpoint,
.id = 8,
},
[9] = {
{
.name = "mem:0:x",
.check = test__checkevent_breakpoint_x,
.id = 9,
},
[10] = {
{
.name = "mem:0:r",
.check = test__checkevent_breakpoint_r,
.id = 10,
},
[11] = {
{
.name = "mem:0:w",
.check = test__checkevent_breakpoint_w,
.id = 11,
},
[12] = {
{
.name = "syscalls:sys_enter_open:k",
.check = test__checkevent_tracepoint_modifier,
.id = 12,
},
[13] = {
{
.name = "syscalls:*:u",
.check = test__checkevent_tracepoint_multi_modifier,
.id = 13,
},
[14] = {
{
.name = "r1a:kp",
.check = test__checkevent_raw_modifier,
.id = 14,
},
[15] = {
{
.name = "1:1:hp",
.check = test__checkevent_numeric_modifier,
.id = 15,
},
[16] = {
{
.name = "instructions:h",
.check = test__checkevent_symbolic_name_modifier,
.id = 16,
},
[17] = {
{
.name = "faults:u",
.check = test__checkevent_symbolic_alias_modifier,
.id = 17,
},
[18] = {
{
.name = "L1-dcache-load-miss:kp",
.check = test__checkevent_genhw_modifier,
.id = 18,
},
[19] = {
{
.name = "mem:0:u",
.check = test__checkevent_breakpoint_modifier,
.id = 19,
},
[20] = {
{
.name = "mem:0:x:k",
.check = test__checkevent_breakpoint_x_modifier,
.id = 20,
},
[21] = {
{
.name = "mem:0:r:hp",
.check = test__checkevent_breakpoint_r_modifier,
.id = 21,
},
[22] = {
{
.name = "mem:0:w:up",
.check = test__checkevent_breakpoint_w_modifier,
.id = 22,
},
[23] = {
{
.name = "r1,syscalls:sys_enter_open:k,1:1:hp",
.check = test__checkevent_list,
.id = 23,
},
[24] = {
{
.name = "instructions:G",
.check = test__checkevent_exclude_host_modifier,
.id = 24,
},
[25] = {
{
.name = "instructions:H",
.check = test__checkevent_exclude_guest_modifier,
.id = 25,
},
[26] = {
{
.name = "mem:0:rw",
.check = test__checkevent_breakpoint_rw,
.id = 26,
},
[27] = {
{
.name = "mem:0:rw:kp",
.check = test__checkevent_breakpoint_rw_modifier,
.id = 27,
},
[28] = {
{
.name = "{instructions:k,cycles:upp}",
.check = test__group1,
.id = 28,
},
[29] = {
{
.name = "{faults:k,cache-references}:u,cycles:k",
.check = test__group2,
.id = 29,
},
[30] = {
{
.name = "group1{syscalls:sys_enter_open:H,cycles:kppp},group2{cycles,1:3}:G,instructions:u",
.check = test__group3,
.id = 30,
},
[31] = {
{
.name = "{cycles:u,instructions:kp}:p",
.check = test__group4,
.id = 31,
},
[32] = {
{
.name = "{cycles,instructions}:G,{cycles:G,instructions:G},cycles",
.check = test__group5,
.id = 32,
},
[33] = {
{
.name = "*:*",
.check = test__all_tracepoints,
.id = 33,
},
[34] = {
{
.name = "{cycles,cache-misses:G}:H",
.check = test__group_gh1,
.id = 34,
},
[35] = {
{
.name = "{cycles,cache-misses:H}:G",
.check = test__group_gh2,
.id = 35,
},
[36] = {
{
.name = "{cycles:G,cache-misses:H}:u",
.check = test__group_gh3,
.id = 36,
},
[37] = {
{
.name = "{cycles:G,cache-misses:H}:uG",
.check = test__group_gh4,
.id = 37,
},
[38] = {
{
.name = "{cycles,cache-misses,branch-misses}:S",
.check = test__leader_sample1,
.id = 38,
},
[39] = {
{
.name = "{instructions,branch-misses}:Su",
.check = test__leader_sample2,
.id = 39,
},
[40] = {
{
.name = "instructions:uDp",
.check = test__checkevent_pinned_modifier,
.id = 40,
},
[41] = {
{
.name = "{cycles,cache-misses,branch-misses}:D",
.check = test__pinned_group,
.id = 41,
},
#if defined(__s390x__)
{
.name = "kvm-s390:kvm_s390_create_vm",
.check = test__checkevent_tracepoint,
.id = 100,
},
#endif
};
static struct evlist_test test__events_pmu[] = {
[0] = {
{
.name = "cpu/config=10,config1,config2=3,period=1000/u",
.check = test__checkevent_pmu,
.id = 0,
},
[1] = {
{
.name = "cpu/config=1,name=krava/u,cpu/config=2/u",
.check = test__checkevent_pmu_name,
.id = 1,
},
};
......@@ -1402,7 +1454,7 @@ static int test_events(struct evlist_test *events, unsigned cnt)
for (i = 0; i < cnt; i++) {
struct evlist_test *e = &events[i];
pr_debug("running test %d '%s'\n", i, e->name);
pr_debug("running test %d '%s'\n", e->id, e->name);
ret1 = test_event(e);
if (ret1)
ret2 = ret1;
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册