提交 a042e261 编写于 作者: L Linus Torvalds

Merge branch 'perf-fixes-for-linus' of...

Merge branch 'perf-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'perf-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (50 commits)
  perf python scripting: Add futex-contention script
  perf python scripting: Fixup cut'n'paste error in sctop script
  perf scripting: Shut up 'perf record' final status
  perf record: Remove newline character from perror() argument
  perf python scripting: Support fedora 11 (audit 1.7.17)
  perf python scripting: Improve the syscalls-by-pid script
  perf python scripting: print the syscall name on sctop
  perf python scripting: Improve the syscalls-counts script
  perf python scripting: Improve the failed-syscalls-by-pid script
  kprobes: Remove redundant text_mutex lock in optimize
  x86/oprofile: Fix uninitialized variable use in debug printk
  tracing: Fix 'faild' -> 'failed' typo
  perf probe: Fix format specified for Dwarf_Off parameter
  perf trace: Fix detection of script extension
  perf trace: Use $PERF_EXEC_PATH in canned report scripts
  perf tools: Document event modifiers
  perf tools: Remove direct slang.h include
  perf_events: Fix for transaction recovery in group_sched_in()
  perf_events: Revert: Fix transaction recovery in group_sched_in()
  perf, x86: Use NUMA aware allocations for PEBS/BTS/DS allocations
  ...
......@@ -121,6 +121,7 @@
#define MSR_AMD64_IBSDCLINAD 0xc0011038
#define MSR_AMD64_IBSDCPHYSAD 0xc0011039
#define MSR_AMD64_IBSCTL 0xc001103a
#define MSR_AMD64_IBSBRTARGET 0xc001103b
/* Fam 10h MSRs */
#define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058
......
......@@ -111,17 +111,18 @@ union cpuid10_edx {
#define X86_PMC_IDX_FIXED_BTS (X86_PMC_IDX_FIXED + 16)
/* IbsFetchCtl bits/masks */
#define IBS_FETCH_RAND_EN (1ULL<<57)
#define IBS_FETCH_VAL (1ULL<<49)
#define IBS_FETCH_ENABLE (1ULL<<48)
#define IBS_FETCH_CNT 0xFFFF0000ULL
#define IBS_FETCH_MAX_CNT 0x0000FFFFULL
#define IBS_FETCH_RAND_EN (1ULL<<57)
#define IBS_FETCH_VAL (1ULL<<49)
#define IBS_FETCH_ENABLE (1ULL<<48)
#define IBS_FETCH_CNT 0xFFFF0000ULL
#define IBS_FETCH_MAX_CNT 0x0000FFFFULL
/* IbsOpCtl bits */
#define IBS_OP_CNT_CTL (1ULL<<19)
#define IBS_OP_VAL (1ULL<<18)
#define IBS_OP_ENABLE (1ULL<<17)
#define IBS_OP_MAX_CNT 0x0000FFFFULL
#define IBS_OP_CNT_CTL (1ULL<<19)
#define IBS_OP_VAL (1ULL<<18)
#define IBS_OP_ENABLE (1ULL<<17)
#define IBS_OP_MAX_CNT 0x0000FFFFULL
#define IBS_OP_MAX_CNT_EXT 0x007FFFFFULL /* not a register bit mask */
#ifdef CONFIG_PERF_EVENTS
extern void init_hw_perf_events(void);
......
......@@ -237,6 +237,7 @@ struct x86_pmu {
* Intel DebugStore bits
*/
int bts, pebs;
int bts_active, pebs_active;
int pebs_record_size;
void (*drain_pebs)(struct pt_regs *regs);
struct event_constraint *pebs_constraints;
......@@ -380,7 +381,7 @@ static void release_pmc_hardware(void) {}
#endif
static int reserve_ds_buffers(void);
static void reserve_ds_buffers(void);
static void release_ds_buffers(void);
static void hw_perf_event_destroy(struct perf_event *event)
......@@ -477,7 +478,7 @@ static int x86_setup_perfctr(struct perf_event *event)
if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) &&
(hwc->sample_period == 1)) {
/* BTS is not supported by this architecture. */
if (!x86_pmu.bts)
if (!x86_pmu.bts_active)
return -EOPNOTSUPP;
/* BTS is currently only allowed for user-mode. */
......@@ -496,12 +497,13 @@ static int x86_pmu_hw_config(struct perf_event *event)
int precise = 0;
/* Support for constant skid */
if (x86_pmu.pebs)
if (x86_pmu.pebs_active) {
precise++;
/* Support for IP fixup */
if (x86_pmu.lbr_nr)
precise++;
/* Support for IP fixup */
if (x86_pmu.lbr_nr)
precise++;
}
if (event->attr.precise_ip > precise)
return -EOPNOTSUPP;
......@@ -543,11 +545,8 @@ static int __x86_pmu_event_init(struct perf_event *event)
if (atomic_read(&active_events) == 0) {
if (!reserve_pmc_hardware())
err = -EBUSY;
else {
err = reserve_ds_buffers();
if (err)
release_pmc_hardware();
}
else
reserve_ds_buffers();
}
if (!err)
atomic_inc(&active_events);
......
......@@ -74,6 +74,107 @@ static void fini_debug_store_on_cpu(int cpu)
wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
}
static int alloc_pebs_buffer(int cpu)
{
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
int node = cpu_to_node(cpu);
int max, thresh = 1; /* always use a single PEBS record */
void *buffer;
if (!x86_pmu.pebs)
return 0;
buffer = kmalloc_node(PEBS_BUFFER_SIZE, GFP_KERNEL | __GFP_ZERO, node);
if (unlikely(!buffer))
return -ENOMEM;
max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size;
ds->pebs_buffer_base = (u64)(unsigned long)buffer;
ds->pebs_index = ds->pebs_buffer_base;
ds->pebs_absolute_maximum = ds->pebs_buffer_base +
max * x86_pmu.pebs_record_size;
ds->pebs_interrupt_threshold = ds->pebs_buffer_base +
thresh * x86_pmu.pebs_record_size;
return 0;
}
static void release_pebs_buffer(int cpu)
{
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
if (!ds || !x86_pmu.pebs)
return;
kfree((void *)(unsigned long)ds->pebs_buffer_base);
ds->pebs_buffer_base = 0;
}
static int alloc_bts_buffer(int cpu)
{
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
int node = cpu_to_node(cpu);
int max, thresh;
void *buffer;
if (!x86_pmu.bts)
return 0;
buffer = kmalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_ZERO, node);
if (unlikely(!buffer))
return -ENOMEM;
max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
thresh = max / 16;
ds->bts_buffer_base = (u64)(unsigned long)buffer;
ds->bts_index = ds->bts_buffer_base;
ds->bts_absolute_maximum = ds->bts_buffer_base +
max * BTS_RECORD_SIZE;
ds->bts_interrupt_threshold = ds->bts_absolute_maximum -
thresh * BTS_RECORD_SIZE;
return 0;
}
static void release_bts_buffer(int cpu)
{
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
if (!ds || !x86_pmu.bts)
return;
kfree((void *)(unsigned long)ds->bts_buffer_base);
ds->bts_buffer_base = 0;
}
static int alloc_ds_buffer(int cpu)
{
int node = cpu_to_node(cpu);
struct debug_store *ds;
ds = kmalloc_node(sizeof(*ds), GFP_KERNEL | __GFP_ZERO, node);
if (unlikely(!ds))
return -ENOMEM;
per_cpu(cpu_hw_events, cpu).ds = ds;
return 0;
}
static void release_ds_buffer(int cpu)
{
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
if (!ds)
return;
per_cpu(cpu_hw_events, cpu).ds = NULL;
kfree(ds);
}
static void release_ds_buffers(void)
{
int cpu;
......@@ -82,93 +183,77 @@ static void release_ds_buffers(void)
return;
get_online_cpus();
for_each_online_cpu(cpu)
fini_debug_store_on_cpu(cpu);
for_each_possible_cpu(cpu) {
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
if (!ds)
continue;
per_cpu(cpu_hw_events, cpu).ds = NULL;
kfree((void *)(unsigned long)ds->pebs_buffer_base);
kfree((void *)(unsigned long)ds->bts_buffer_base);
kfree(ds);
release_pebs_buffer(cpu);
release_bts_buffer(cpu);
release_ds_buffer(cpu);
}
put_online_cpus();
}
static int reserve_ds_buffers(void)
static void reserve_ds_buffers(void)
{
int cpu, err = 0;
int bts_err = 0, pebs_err = 0;
int cpu;
x86_pmu.bts_active = 0;
x86_pmu.pebs_active = 0;
if (!x86_pmu.bts && !x86_pmu.pebs)
return 0;
return;
if (!x86_pmu.bts)
bts_err = 1;
if (!x86_pmu.pebs)
pebs_err = 1;
get_online_cpus();
for_each_possible_cpu(cpu) {
struct debug_store *ds;
void *buffer;
int max, thresh;
if (alloc_ds_buffer(cpu)) {
bts_err = 1;
pebs_err = 1;
}
if (!bts_err && alloc_bts_buffer(cpu))
bts_err = 1;
err = -ENOMEM;
ds = kzalloc(sizeof(*ds), GFP_KERNEL);
if (unlikely(!ds))
if (!pebs_err && alloc_pebs_buffer(cpu))
pebs_err = 1;
if (bts_err && pebs_err)
break;
per_cpu(cpu_hw_events, cpu).ds = ds;
if (x86_pmu.bts) {
buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL);
if (unlikely(!buffer))
break;
max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
thresh = max / 16;
ds->bts_buffer_base = (u64)(unsigned long)buffer;
ds->bts_index = ds->bts_buffer_base;
ds->bts_absolute_maximum = ds->bts_buffer_base +
max * BTS_RECORD_SIZE;
ds->bts_interrupt_threshold = ds->bts_absolute_maximum -
thresh * BTS_RECORD_SIZE;
}
}
if (x86_pmu.pebs) {
buffer = kzalloc(PEBS_BUFFER_SIZE, GFP_KERNEL);
if (unlikely(!buffer))
break;
max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size;
ds->pebs_buffer_base = (u64)(unsigned long)buffer;
ds->pebs_index = ds->pebs_buffer_base;
ds->pebs_absolute_maximum = ds->pebs_buffer_base +
max * x86_pmu.pebs_record_size;
/*
* Always use single record PEBS
*/
ds->pebs_interrupt_threshold = ds->pebs_buffer_base +
x86_pmu.pebs_record_size;
}
if (bts_err) {
for_each_possible_cpu(cpu)
release_bts_buffer(cpu);
}
err = 0;
if (pebs_err) {
for_each_possible_cpu(cpu)
release_pebs_buffer(cpu);
}
if (err)
release_ds_buffers();
else {
if (bts_err && pebs_err) {
for_each_possible_cpu(cpu)
release_ds_buffer(cpu);
} else {
if (x86_pmu.bts && !bts_err)
x86_pmu.bts_active = 1;
if (x86_pmu.pebs && !pebs_err)
x86_pmu.pebs_active = 1;
for_each_online_cpu(cpu)
init_debug_store_on_cpu(cpu);
}
put_online_cpus();
return err;
}
/*
......@@ -233,7 +318,7 @@ static int intel_pmu_drain_bts_buffer(void)
if (!event)
return 0;
if (!ds)
if (!x86_pmu.bts_active)
return 0;
at = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
......@@ -503,7 +588,7 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
struct pebs_record_core *at, *top;
int n;
if (!ds || !x86_pmu.pebs)
if (!x86_pmu.pebs_active)
return;
at = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base;
......@@ -545,7 +630,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
u64 status = 0;
int bit, n;
if (!ds || !x86_pmu.pebs)
if (!x86_pmu.pebs_active)
return;
at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
......@@ -630,9 +715,8 @@ static void intel_ds_init(void)
#else /* CONFIG_CPU_SUP_INTEL */
static int reserve_ds_buffers(void)
static void reserve_ds_buffers(void)
{
return 0;
}
static void release_ds_buffers(void)
......
......@@ -726,6 +726,12 @@ int __init op_nmi_init(struct oprofile_operations *ops)
case 0x11:
cpu_type = "x86-64/family11h";
break;
case 0x12:
cpu_type = "x86-64/family12h";
break;
case 0x14:
cpu_type = "x86-64/family14h";
break;
default:
return -ENODEV;
}
......
......@@ -48,17 +48,24 @@ static unsigned long reset_value[NUM_VIRT_COUNTERS];
static u32 ibs_caps;
struct op_ibs_config {
struct ibs_config {
unsigned long op_enabled;
unsigned long fetch_enabled;
unsigned long max_cnt_fetch;
unsigned long max_cnt_op;
unsigned long rand_en;
unsigned long dispatched_ops;
unsigned long branch_target;
};
static struct op_ibs_config ibs_config;
static u64 ibs_op_ctl;
struct ibs_state {
u64 ibs_op_ctl;
int branch_target;
unsigned long sample_size;
};
static struct ibs_config ibs_config;
static struct ibs_state ibs_state;
/*
* IBS cpuid feature detection
......@@ -71,8 +78,16 @@ static u64 ibs_op_ctl;
* bit 0 is used to indicate the existence of IBS.
*/
#define IBS_CAPS_AVAIL (1U<<0)
#define IBS_CAPS_FETCHSAM (1U<<1)
#define IBS_CAPS_OPSAM (1U<<2)
#define IBS_CAPS_RDWROPCNT (1U<<3)
#define IBS_CAPS_OPCNT (1U<<4)
#define IBS_CAPS_BRNTRGT (1U<<5)
#define IBS_CAPS_OPCNTEXT (1U<<6)
#define IBS_CAPS_DEFAULT (IBS_CAPS_AVAIL \
| IBS_CAPS_FETCHSAM \
| IBS_CAPS_OPSAM)
/*
* IBS APIC setup
......@@ -99,12 +114,12 @@ static u32 get_ibs_caps(void)
/* check IBS cpuid feature flags */
max_level = cpuid_eax(0x80000000);
if (max_level < IBS_CPUID_FEATURES)
return IBS_CAPS_AVAIL;
return IBS_CAPS_DEFAULT;
ibs_caps = cpuid_eax(IBS_CPUID_FEATURES);
if (!(ibs_caps & IBS_CAPS_AVAIL))
/* cpuid flags not valid */
return IBS_CAPS_AVAIL;
return IBS_CAPS_DEFAULT;
return ibs_caps;
}
......@@ -197,8 +212,8 @@ op_amd_handle_ibs(struct pt_regs * const regs,
rdmsrl(MSR_AMD64_IBSOPCTL, ctl);
if (ctl & IBS_OP_VAL) {
rdmsrl(MSR_AMD64_IBSOPRIP, val);
oprofile_write_reserve(&entry, regs, val,
IBS_OP_CODE, IBS_OP_SIZE);
oprofile_write_reserve(&entry, regs, val, IBS_OP_CODE,
ibs_state.sample_size);
oprofile_add_data64(&entry, val);
rdmsrl(MSR_AMD64_IBSOPDATA, val);
oprofile_add_data64(&entry, val);
......@@ -210,10 +225,14 @@ op_amd_handle_ibs(struct pt_regs * const regs,
oprofile_add_data64(&entry, val);
rdmsrl(MSR_AMD64_IBSDCPHYSAD, val);
oprofile_add_data64(&entry, val);
if (ibs_state.branch_target) {
rdmsrl(MSR_AMD64_IBSBRTARGET, val);
oprofile_add_data(&entry, (unsigned long)val);
}
oprofile_write_commit(&entry);
/* reenable the IRQ */
ctl = op_amd_randomize_ibs_op(ibs_op_ctl);
ctl = op_amd_randomize_ibs_op(ibs_state.ibs_op_ctl);
wrmsrl(MSR_AMD64_IBSOPCTL, ctl);
}
}
......@@ -226,21 +245,32 @@ static inline void op_amd_start_ibs(void)
if (!ibs_caps)
return;
memset(&ibs_state, 0, sizeof(ibs_state));
/*
* Note: Since the max count settings may out of range we
* write back the actual used values so that userland can read
* it.
*/
if (ibs_config.fetch_enabled) {
val = (ibs_config.max_cnt_fetch >> 4) & IBS_FETCH_MAX_CNT;
val = ibs_config.max_cnt_fetch >> 4;
val = min(val, IBS_FETCH_MAX_CNT);
ibs_config.max_cnt_fetch = val << 4;
val |= ibs_config.rand_en ? IBS_FETCH_RAND_EN : 0;
val |= IBS_FETCH_ENABLE;
wrmsrl(MSR_AMD64_IBSFETCHCTL, val);
}
if (ibs_config.op_enabled) {
ibs_op_ctl = ibs_config.max_cnt_op >> 4;
val = ibs_config.max_cnt_op >> 4;
if (!(ibs_caps & IBS_CAPS_RDWROPCNT)) {
/*
* IbsOpCurCnt not supported. See
* op_amd_randomize_ibs_op() for details.
*/
ibs_op_ctl = clamp(ibs_op_ctl, 0x0081ULL, 0xFF80ULL);
val = clamp(val, 0x0081ULL, 0xFF80ULL);
ibs_config.max_cnt_op = val << 4;
} else {
/*
* The start value is randomized with a
......@@ -248,13 +278,24 @@ static inline void op_amd_start_ibs(void)
* with the half of the randomized range. Also
* avoid underflows.
*/
ibs_op_ctl = min(ibs_op_ctl + IBS_RANDOM_MAXCNT_OFFSET,
IBS_OP_MAX_CNT);
val += IBS_RANDOM_MAXCNT_OFFSET;
if (ibs_caps & IBS_CAPS_OPCNTEXT)
val = min(val, IBS_OP_MAX_CNT_EXT);
else
val = min(val, IBS_OP_MAX_CNT);
ibs_config.max_cnt_op =
(val - IBS_RANDOM_MAXCNT_OFFSET) << 4;
}
val = ((val & ~IBS_OP_MAX_CNT) << 4) | (val & IBS_OP_MAX_CNT);
val |= ibs_config.dispatched_ops ? IBS_OP_CNT_CTL : 0;
val |= IBS_OP_ENABLE;
ibs_state.ibs_op_ctl = val;
ibs_state.sample_size = IBS_OP_SIZE;
if (ibs_config.branch_target) {
ibs_state.branch_target = 1;
ibs_state.sample_size++;
}
if (ibs_caps & IBS_CAPS_OPCNT && ibs_config.dispatched_ops)
ibs_op_ctl |= IBS_OP_CNT_CTL;
ibs_op_ctl |= IBS_OP_ENABLE;
val = op_amd_randomize_ibs_op(ibs_op_ctl);
val = op_amd_randomize_ibs_op(ibs_state.ibs_op_ctl);
wrmsrl(MSR_AMD64_IBSOPCTL, val);
}
}
......@@ -281,29 +322,25 @@ static inline int eilvt_is_available(int offset)
static inline int ibs_eilvt_valid(void)
{
u64 val;
int offset;
u64 val;
rdmsrl(MSR_AMD64_IBSCTL, val);
offset = val & IBSCTL_LVT_OFFSET_MASK;
if (!(val & IBSCTL_LVT_OFFSET_VALID)) {
pr_err(FW_BUG "cpu %d, invalid IBS "
"interrupt offset %d (MSR%08X=0x%016llx)",
smp_processor_id(), offset,
MSR_AMD64_IBSCTL, val);
pr_err(FW_BUG "cpu %d, invalid IBS interrupt offset %d (MSR%08X=0x%016llx)\n",
smp_processor_id(), offset, MSR_AMD64_IBSCTL, val);
return 0;
}
offset = val & IBSCTL_LVT_OFFSET_MASK;
if (eilvt_is_available(offset))
return !0;
pr_err(FW_BUG "cpu %d, IBS interrupt offset %d "
"not available (MSR%08X=0x%016llx)",
smp_processor_id(), offset,
MSR_AMD64_IBSCTL, val);
if (!eilvt_is_available(offset)) {
pr_err(FW_BUG "cpu %d, IBS interrupt offset %d not available (MSR%08X=0x%016llx)\n",
smp_processor_id(), offset, MSR_AMD64_IBSCTL, val);
return 0;
}
return 0;
return 1;
}
static inline int get_ibs_offset(void)
......@@ -630,28 +667,33 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root)
/* model specific files */
/* setup some reasonable defaults */
memset(&ibs_config, 0, sizeof(ibs_config));
ibs_config.max_cnt_fetch = 250000;
ibs_config.fetch_enabled = 0;
ibs_config.max_cnt_op = 250000;
ibs_config.op_enabled = 0;
ibs_config.dispatched_ops = 0;
dir = oprofilefs_mkdir(sb, root, "ibs_fetch");
oprofilefs_create_ulong(sb, dir, "enable",
&ibs_config.fetch_enabled);
oprofilefs_create_ulong(sb, dir, "max_count",
&ibs_config.max_cnt_fetch);
oprofilefs_create_ulong(sb, dir, "rand_enable",
&ibs_config.rand_en);
dir = oprofilefs_mkdir(sb, root, "ibs_op");
oprofilefs_create_ulong(sb, dir, "enable",
&ibs_config.op_enabled);
oprofilefs_create_ulong(sb, dir, "max_count",
&ibs_config.max_cnt_op);
if (ibs_caps & IBS_CAPS_OPCNT)
oprofilefs_create_ulong(sb, dir, "dispatched_ops",
&ibs_config.dispatched_ops);
if (ibs_caps & IBS_CAPS_FETCHSAM) {
dir = oprofilefs_mkdir(sb, root, "ibs_fetch");
oprofilefs_create_ulong(sb, dir, "enable",
&ibs_config.fetch_enabled);
oprofilefs_create_ulong(sb, dir, "max_count",
&ibs_config.max_cnt_fetch);
oprofilefs_create_ulong(sb, dir, "rand_enable",
&ibs_config.rand_en);
}
if (ibs_caps & IBS_CAPS_OPSAM) {
dir = oprofilefs_mkdir(sb, root, "ibs_op");
oprofilefs_create_ulong(sb, dir, "enable",
&ibs_config.op_enabled);
oprofilefs_create_ulong(sb, dir, "max_count",
&ibs_config.max_cnt_op);
if (ibs_caps & IBS_CAPS_OPCNT)
oprofilefs_create_ulong(sb, dir, "dispatched_ops",
&ibs_config.dispatched_ops);
if (ibs_caps & IBS_CAPS_BRNTRGT)
oprofilefs_create_ulong(sb, dir, "branch_target",
&ibs_config.branch_target);
}
return 0;
}
......
......@@ -410,7 +410,7 @@ extern void open_softirq(int nr, void (*action)(struct softirq_action *));
extern void softirq_init(void);
static inline void __raise_softirq_irqoff(unsigned int nr)
{
trace_softirq_raise((struct softirq_action *)(unsigned long)nr, NULL);
trace_softirq_raise(nr);
or_softirq_pending(1UL << nr);
}
......
......@@ -62,18 +62,6 @@ enum ring_buffer_type {
unsigned ring_buffer_event_length(struct ring_buffer_event *event);
void *ring_buffer_event_data(struct ring_buffer_event *event);
/**
* ring_buffer_event_time_delta - return the delta timestamp of the event
* @event: the event to get the delta timestamp of
*
* The delta timestamp is the 27 bit timestamp since the last event.
*/
static inline unsigned
ring_buffer_event_time_delta(struct ring_buffer_event *event)
{
return event->time_delta;
}
/*
* ring_buffer_discard_commit will remove an event that has not
* ben committed yet. If this is used, then ring_buffer_unlock_commit
......
......@@ -86,76 +86,62 @@ TRACE_EVENT(irq_handler_exit,
DECLARE_EVENT_CLASS(softirq,
TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
TP_PROTO(unsigned int vec_nr),
TP_ARGS(h, vec),
TP_ARGS(vec_nr),
TP_STRUCT__entry(
__field( int, vec )
__field( unsigned int, vec )
),
TP_fast_assign(
if (vec)
__entry->vec = (int)(h - vec);
else
__entry->vec = (int)(long)h;
__entry->vec = vec_nr;
),
TP_printk("vec=%d [action=%s]", __entry->vec,
TP_printk("vec=%u [action=%s]", __entry->vec,
show_softirq_name(__entry->vec))
);
/**
* softirq_entry - called immediately before the softirq handler
* @h: pointer to struct softirq_action
* @vec: pointer to first struct softirq_action in softirq_vec array
* @vec_nr: softirq vector number
*
* The @h parameter, contains a pointer to the struct softirq_action
* which has a pointer to the action handler that is called. By subtracting
* the @vec pointer from the @h pointer, we can determine the softirq
* number. Also, when used in combination with the softirq_exit tracepoint
* we can determine the softirq latency.
* When used in combination with the softirq_exit tracepoint
* we can determine the softirq handler runtine.
*/
DEFINE_EVENT(softirq, softirq_entry,
TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
TP_PROTO(unsigned int vec_nr),
TP_ARGS(h, vec)
TP_ARGS(vec_nr)
);
/**
* softirq_exit - called immediately after the softirq handler returns
* @h: pointer to struct softirq_action
* @vec: pointer to first struct softirq_action in softirq_vec array
* @vec_nr: softirq vector number
*
* The @h parameter contains a pointer to the struct softirq_action
* that has handled the softirq. By subtracting the @vec pointer from
* the @h pointer, we can determine the softirq number. Also, when used in
* combination with the softirq_entry tracepoint we can determine the softirq
* latency.
* When used in combination with the softirq_entry tracepoint
* we can determine the softirq handler runtine.
*/
DEFINE_EVENT(softirq, softirq_exit,
TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
TP_PROTO(unsigned int vec_nr),
TP_ARGS(h, vec)
TP_ARGS(vec_nr)
);
/**
* softirq_raise - called immediately when a softirq is raised
* @h: pointer to struct softirq_action
* @vec: pointer to first struct softirq_action in softirq_vec array
* @vec_nr: softirq vector number
*
* The @h parameter contains a pointer to the softirq vector number which is
* raised. @vec is NULL and it means @h includes vector number not
* softirq_action. When used in combination with the softirq_entry tracepoint
* we can determine the softirq raise latency.
* When used in combination with the softirq_entry tracepoint
* we can determine the softirq raise to run latency.
*/
DEFINE_EVENT(softirq, softirq_raise,
TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
TP_PROTO(unsigned int vec_nr),
TP_ARGS(h, vec)
TP_ARGS(vec_nr)
);
#endif /* _TRACE_IRQ_H */
......
......@@ -74,7 +74,8 @@ static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE];
/* NOTE: change this value only with kprobe_mutex held */
static bool kprobes_all_disarmed;
static DEFINE_MUTEX(kprobe_mutex); /* Protects kprobe_table */
/* This protects kprobe_table and optimizing_list */
static DEFINE_MUTEX(kprobe_mutex);
static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL;
static struct {
spinlock_t lock ____cacheline_aligned_in_smp;
......@@ -595,6 +596,7 @@ static __kprobes void try_to_optimize_kprobe(struct kprobe *p)
}
#ifdef CONFIG_SYSCTL
/* This should be called with kprobe_mutex locked */
static void __kprobes optimize_all_kprobes(void)
{
struct hlist_head *head;
......@@ -607,17 +609,16 @@ static void __kprobes optimize_all_kprobes(void)
return;
kprobes_allow_optimization = true;
mutex_lock(&text_mutex);
for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
head = &kprobe_table[i];
hlist_for_each_entry_rcu(p, node, head, hlist)
if (!kprobe_disabled(p))
optimize_kprobe(p);
}
mutex_unlock(&text_mutex);
printk(KERN_INFO "Kprobes globally optimized\n");
}
/* This should be called with kprobe_mutex locked */
static void __kprobes unoptimize_all_kprobes(void)
{
struct hlist_head *head;
......
......@@ -417,8 +417,8 @@ event_filter_match(struct perf_event *event)
return event->cpu == -1 || event->cpu == smp_processor_id();
}
static int
__event_sched_out(struct perf_event *event,
static void
event_sched_out(struct perf_event *event,
struct perf_cpu_context *cpuctx,
struct perf_event_context *ctx)
{
......@@ -437,13 +437,14 @@ __event_sched_out(struct perf_event *event,
}
if (event->state != PERF_EVENT_STATE_ACTIVE)
return 0;
return;
event->state = PERF_EVENT_STATE_INACTIVE;
if (event->pending_disable) {
event->pending_disable = 0;
event->state = PERF_EVENT_STATE_OFF;
}
event->tstamp_stopped = ctx->time;
event->pmu->del(event, 0);
event->oncpu = -1;
......@@ -452,19 +453,6 @@ __event_sched_out(struct perf_event *event,
ctx->nr_active--;
if (event->attr.exclusive || !cpuctx->active_oncpu)
cpuctx->exclusive = 0;
return 1;
}
static void
event_sched_out(struct perf_event *event,
struct perf_cpu_context *cpuctx,
struct perf_event_context *ctx)
{
int ret;
ret = __event_sched_out(event, cpuctx, ctx);
if (ret)
event->tstamp_stopped = ctx->time;
}
static void
......@@ -664,7 +652,7 @@ void perf_event_disable(struct perf_event *event)
}
static int
__event_sched_in(struct perf_event *event,
event_sched_in(struct perf_event *event,
struct perf_cpu_context *cpuctx,
struct perf_event_context *ctx)
{
......@@ -684,6 +672,8 @@ __event_sched_in(struct perf_event *event,
return -EAGAIN;
}
event->tstamp_running += ctx->time - event->tstamp_stopped;
if (!is_software_event(event))
cpuctx->active_oncpu++;
ctx->nr_active++;
......@@ -694,35 +684,6 @@ __event_sched_in(struct perf_event *event,
return 0;
}
static inline int
event_sched_in(struct perf_event *event,
struct perf_cpu_context *cpuctx,
struct perf_event_context *ctx)
{
int ret = __event_sched_in(event, cpuctx, ctx);
if (ret)
return ret;
event->tstamp_running += ctx->time - event->tstamp_stopped;
return 0;
}
static void
group_commit_event_sched_in(struct perf_event *group_event,
struct perf_cpu_context *cpuctx,
struct perf_event_context *ctx)
{
struct perf_event *event;
u64 now = ctx->time;
group_event->tstamp_running += now - group_event->tstamp_stopped;
/*
* Schedule in siblings as one group (if any):
*/
list_for_each_entry(event, &group_event->sibling_list, group_entry) {
event->tstamp_running += now - event->tstamp_stopped;
}
}
static int
group_sched_in(struct perf_event *group_event,
struct perf_cpu_context *cpuctx,
......@@ -730,19 +691,15 @@ group_sched_in(struct perf_event *group_event,
{
struct perf_event *event, *partial_group = NULL;
struct pmu *pmu = group_event->pmu;
u64 now = ctx->time;
bool simulate = false;
if (group_event->state == PERF_EVENT_STATE_OFF)
return 0;
pmu->start_txn(pmu);
/*
* use __event_sched_in() to delay updating tstamp_running
* until the transaction is committed. In case of failure
* we will keep an unmodified tstamp_running which is a
* requirement to get correct timing information
*/
if (__event_sched_in(group_event, cpuctx, ctx)) {
if (event_sched_in(group_event, cpuctx, ctx)) {
pmu->cancel_txn(pmu);
return -EAGAIN;
}
......@@ -751,31 +708,42 @@ group_sched_in(struct perf_event *group_event,
* Schedule in siblings as one group (if any):
*/
list_for_each_entry(event, &group_event->sibling_list, group_entry) {
if (__event_sched_in(event, cpuctx, ctx)) {
if (event_sched_in(event, cpuctx, ctx)) {
partial_group = event;
goto group_error;
}
}
if (!pmu->commit_txn(pmu)) {
/* commit tstamp_running */
group_commit_event_sched_in(group_event, cpuctx, ctx);
if (!pmu->commit_txn(pmu))
return 0;
}
group_error:
/*
* Groups can be scheduled in as one unit only, so undo any
* partial group before returning:
* The events up to the failed event are scheduled out normally,
* tstamp_stopped will be updated.
*
* use __event_sched_out() to avoid updating tstamp_stopped
* because the event never actually ran
* The failed events and the remaining siblings need to have
* their timings updated as if they had gone thru event_sched_in()
* and event_sched_out(). This is required to get consistent timings
* across the group. This also takes care of the case where the group
* could never be scheduled by ensuring tstamp_stopped is set to mark
* the time the event was actually stopped, such that time delta
* calculation in update_event_times() is correct.
*/
list_for_each_entry(event, &group_event->sibling_list, group_entry) {
if (event == partial_group)
break;
__event_sched_out(event, cpuctx, ctx);
simulate = true;
if (simulate) {
event->tstamp_running += now - event->tstamp_stopped;
event->tstamp_stopped = now;
} else {
event_sched_out(event, cpuctx, ctx);
}
}
__event_sched_out(group_event, cpuctx, ctx);
event_sched_out(group_event, cpuctx, ctx);
pmu->cancel_txn(pmu);
......
......@@ -229,18 +229,20 @@ asmlinkage void __do_softirq(void)
do {
if (pending & 1) {
unsigned int vec_nr = h - softirq_vec;
int prev_count = preempt_count();
kstat_incr_softirqs_this_cpu(h - softirq_vec);
trace_softirq_entry(h, softirq_vec);
kstat_incr_softirqs_this_cpu(vec_nr);
trace_softirq_entry(vec_nr);
h->action(h);
trace_softirq_exit(h, softirq_vec);
trace_softirq_exit(vec_nr);
if (unlikely(prev_count != preempt_count())) {
printk(KERN_ERR "huh, entered softirq %td %s %p"
printk(KERN_ERR "huh, entered softirq %u %s %p"
"with preempt_count %08x,"
" exited with %08x?\n", h - softirq_vec,
softirq_to_name[h - softirq_vec],
h->action, prev_count, preempt_count());
" exited with %08x?\n", vec_nr,
softirq_to_name[vec_nr], h->action,
prev_count, preempt_count());
preempt_count() = prev_count;
}
......
......@@ -224,6 +224,9 @@ enum {
RB_LEN_TIME_STAMP = 16,
};
#define skip_time_extend(event) \
((struct ring_buffer_event *)((char *)event + RB_LEN_TIME_EXTEND))
static inline int rb_null_event(struct ring_buffer_event *event)
{
return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta;
......@@ -248,8 +251,12 @@ rb_event_data_length(struct ring_buffer_event *event)
return length + RB_EVNT_HDR_SIZE;
}
/* inline for ring buffer fast paths */
static unsigned
/*
* Return the length of the given event. Will return
* the length of the time extend if the event is a
* time extend.
*/
static inline unsigned
rb_event_length(struct ring_buffer_event *event)
{
switch (event->type_len) {
......@@ -274,13 +281,41 @@ rb_event_length(struct ring_buffer_event *event)
return 0;
}
/*
* Return total length of time extend and data,
* or just the event length for all other events.
*/
static inline unsigned
rb_event_ts_length(struct ring_buffer_event *event)
{
unsigned len = 0;
if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
/* time extends include the data event after it */
len = RB_LEN_TIME_EXTEND;
event = skip_time_extend(event);
}
return len + rb_event_length(event);
}
/**
* ring_buffer_event_length - return the length of the event
* @event: the event to get the length of
*
* Returns the size of the data load of a data event.
* If the event is something other than a data event, it
* returns the size of the event itself. With the exception
* of a TIME EXTEND, where it still returns the size of the
* data load of the data event after it.
*/
unsigned ring_buffer_event_length(struct ring_buffer_event *event)
{
unsigned length = rb_event_length(event);
unsigned length;
if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
event = skip_time_extend(event);
length = rb_event_length(event);
if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
return length;
length -= RB_EVNT_HDR_SIZE;
......@@ -294,6 +329,8 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_length);
static void *
rb_event_data(struct ring_buffer_event *event)
{
if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
event = skip_time_extend(event);
BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX);
/* If length is in len field, then array[0] has the data */
if (event->type_len)
......@@ -404,9 +441,6 @@ static inline int test_time_stamp(u64 delta)
/* Max payload is BUF_PAGE_SIZE - header (8bytes) */
#define BUF_MAX_DATA_SIZE (BUF_PAGE_SIZE - (sizeof(u32) * 2))
/* Max number of timestamps that can fit on a page */
#define RB_TIMESTAMPS_PER_PAGE (BUF_PAGE_SIZE / RB_LEN_TIME_EXTEND)
int ring_buffer_print_page_header(struct trace_seq *s)
{
struct buffer_data_page field;
......@@ -1546,6 +1580,25 @@ static void rb_inc_iter(struct ring_buffer_iter *iter)
iter->head = 0;
}
/* Slow path, do not inline */
static noinline struct ring_buffer_event *
rb_add_time_stamp(struct ring_buffer_event *event, u64 delta)
{
event->type_len = RINGBUF_TYPE_TIME_EXTEND;
/* Not the first event on the page? */
if (rb_event_index(event)) {
event->time_delta = delta & TS_MASK;
event->array[0] = delta >> TS_SHIFT;
} else {
/* nope, just zero it */
event->time_delta = 0;
event->array[0] = 0;
}
return skip_time_extend(event);
}
/**
* ring_buffer_update_event - update event type and data
* @event: the even to update
......@@ -1558,28 +1611,31 @@ static void rb_inc_iter(struct ring_buffer_iter *iter)
* data field.
*/
static void
rb_update_event(struct ring_buffer_event *event,
unsigned type, unsigned length)
rb_update_event(struct ring_buffer_per_cpu *cpu_buffer,
struct ring_buffer_event *event, unsigned length,
int add_timestamp, u64 delta)
{
event->type_len = type;
switch (type) {
case RINGBUF_TYPE_PADDING:
case RINGBUF_TYPE_TIME_EXTEND:
case RINGBUF_TYPE_TIME_STAMP:
break;
/* Only a commit updates the timestamp */
if (unlikely(!rb_event_is_commit(cpu_buffer, event)))
delta = 0;
case 0:
length -= RB_EVNT_HDR_SIZE;
if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT)
event->array[0] = length;
else
event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT);
break;
default:
BUG();
/*
* If we need to add a timestamp, then we
* add it to the start of the resevered space.
*/
if (unlikely(add_timestamp)) {
event = rb_add_time_stamp(event, delta);
length -= RB_LEN_TIME_EXTEND;
delta = 0;
}
event->time_delta = delta;
length -= RB_EVNT_HDR_SIZE;
if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) {
event->type_len = 0;
event->array[0] = length;
} else
event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT);
}
/*
......@@ -1823,10 +1879,13 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
local_sub(length, &tail_page->write);
}
static struct ring_buffer_event *
/*
* This is the slow path, force gcc not to inline it.
*/
static noinline struct ring_buffer_event *
rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
unsigned long length, unsigned long tail,
struct buffer_page *tail_page, u64 *ts)
struct buffer_page *tail_page, u64 ts)
{
struct buffer_page *commit_page = cpu_buffer->commit_page;
struct ring_buffer *buffer = cpu_buffer->buffer;
......@@ -1909,8 +1968,8 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
* Nested commits always have zero deltas, so
* just reread the time stamp
*/
*ts = rb_time_stamp(buffer);
next_page->page->time_stamp = *ts;
ts = rb_time_stamp(buffer);
next_page->page->time_stamp = ts;
}
out_again:
......@@ -1929,12 +1988,21 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
static struct ring_buffer_event *
__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
unsigned type, unsigned long length, u64 *ts)
unsigned long length, u64 ts,
u64 delta, int add_timestamp)
{
struct buffer_page *tail_page;
struct ring_buffer_event *event;
unsigned long tail, write;
/*
* If the time delta since the last event is too big to
* hold in the time field of the event, then we append a
* TIME EXTEND event ahead of the data event.
*/
if (unlikely(add_timestamp))
length += RB_LEN_TIME_EXTEND;
tail_page = cpu_buffer->tail_page;
write = local_add_return(length, &tail_page->write);
......@@ -1943,7 +2011,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
tail = write - length;
/* See if we shot pass the end of this buffer page */
if (write > BUF_PAGE_SIZE)
if (unlikely(write > BUF_PAGE_SIZE))
return rb_move_tail(cpu_buffer, length, tail,
tail_page, ts);
......@@ -1951,18 +2019,16 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
event = __rb_page_index(tail_page, tail);
kmemcheck_annotate_bitfield(event, bitfield);
rb_update_event(event, type, length);
rb_update_event(cpu_buffer, event, length, add_timestamp, delta);
/* The passed in type is zero for DATA */
if (likely(!type))
local_inc(&tail_page->entries);
local_inc(&tail_page->entries);
/*
* If this is the first commit on the page, then update
* its timestamp.
*/
if (!tail)
tail_page->page->time_stamp = *ts;
tail_page->page->time_stamp = ts;
return event;
}
......@@ -1977,7 +2043,7 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
unsigned long addr;
new_index = rb_event_index(event);
old_index = new_index + rb_event_length(event);
old_index = new_index + rb_event_ts_length(event);
addr = (unsigned long)event;
addr &= PAGE_MASK;
......@@ -2003,76 +2069,13 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
return 0;
}
static int
rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
u64 *ts, u64 *delta)
{
struct ring_buffer_event *event;
int ret;
WARN_ONCE(*delta > (1ULL << 59),
KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n",
(unsigned long long)*delta,
(unsigned long long)*ts,
(unsigned long long)cpu_buffer->write_stamp);
/*
* The delta is too big, we to add a
* new timestamp.
*/
event = __rb_reserve_next(cpu_buffer,
RINGBUF_TYPE_TIME_EXTEND,
RB_LEN_TIME_EXTEND,
ts);
if (!event)
return -EBUSY;
if (PTR_ERR(event) == -EAGAIN)
return -EAGAIN;
/* Only a commited time event can update the write stamp */
if (rb_event_is_commit(cpu_buffer, event)) {
/*
* If this is the first on the page, then it was
* updated with the page itself. Try to discard it
* and if we can't just make it zero.
*/
if (rb_event_index(event)) {
event->time_delta = *delta & TS_MASK;
event->array[0] = *delta >> TS_SHIFT;
} else {
/* try to discard, since we do not need this */
if (!rb_try_to_discard(cpu_buffer, event)) {
/* nope, just zero it */
event->time_delta = 0;
event->array[0] = 0;
}
}
cpu_buffer->write_stamp = *ts;
/* let the caller know this was the commit */
ret = 1;
} else {
/* Try to discard the event */
if (!rb_try_to_discard(cpu_buffer, event)) {
/* Darn, this is just wasted space */
event->time_delta = 0;
event->array[0] = 0;
}
ret = 0;
}
*delta = 0;
return ret;
}
static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer)
{
local_inc(&cpu_buffer->committing);
local_inc(&cpu_buffer->commits);
}
static void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer)
static inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer)
{
unsigned long commits;
......@@ -2110,9 +2113,10 @@ rb_reserve_next_event(struct ring_buffer *buffer,
unsigned long length)
{
struct ring_buffer_event *event;
u64 ts, delta = 0;
int commit = 0;
u64 ts, delta;
int nr_loops = 0;
int add_timestamp;
u64 diff;
rb_start_commit(cpu_buffer);
......@@ -2133,6 +2137,9 @@ rb_reserve_next_event(struct ring_buffer *buffer,
length = rb_calculate_event_length(length);
again:
add_timestamp = 0;
delta = 0;
/*
* We allow for interrupts to reenter here and do a trace.
* If one does, it will cause this original code to loop
......@@ -2146,56 +2153,32 @@ rb_reserve_next_event(struct ring_buffer *buffer,
goto out_fail;
ts = rb_time_stamp(cpu_buffer->buffer);
diff = ts - cpu_buffer->write_stamp;
/*
* Only the first commit can update the timestamp.
* Yes there is a race here. If an interrupt comes in
* just after the conditional and it traces too, then it
* will also check the deltas. More than one timestamp may
* also be made. But only the entry that did the actual
* commit will be something other than zero.
*/
if (likely(cpu_buffer->tail_page == cpu_buffer->commit_page &&
rb_page_write(cpu_buffer->tail_page) ==
rb_commit_index(cpu_buffer))) {
u64 diff;
diff = ts - cpu_buffer->write_stamp;
/* make sure this diff is calculated here */
barrier();
/* Did the write stamp get updated already? */
if (unlikely(ts < cpu_buffer->write_stamp))
goto get_event;
/* make sure this diff is calculated here */
barrier();
/* Did the write stamp get updated already? */
if (likely(ts >= cpu_buffer->write_stamp)) {
delta = diff;
if (unlikely(test_time_stamp(delta))) {
commit = rb_add_time_stamp(cpu_buffer, &ts, &delta);
if (commit == -EBUSY)
goto out_fail;
if (commit == -EAGAIN)
goto again;
RB_WARN_ON(cpu_buffer, commit < 0);
WARN_ONCE(delta > (1ULL << 59),
KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n",
(unsigned long long)delta,
(unsigned long long)ts,
(unsigned long long)cpu_buffer->write_stamp);
add_timestamp = 1;
}
}
get_event:
event = __rb_reserve_next(cpu_buffer, 0, length, &ts);
event = __rb_reserve_next(cpu_buffer, length, ts,
delta, add_timestamp);
if (unlikely(PTR_ERR(event) == -EAGAIN))
goto again;
if (!event)
goto out_fail;
if (!rb_event_is_commit(cpu_buffer, event))
delta = 0;
event->time_delta = delta;
return event;
out_fail:
......@@ -2207,13 +2190,9 @@ rb_reserve_next_event(struct ring_buffer *buffer,
#define TRACE_RECURSIVE_DEPTH 16
static int trace_recursive_lock(void)
/* Keep this code out of the fast path cache */
static noinline void trace_recursive_fail(void)
{
current->trace_recursion++;
if (likely(current->trace_recursion < TRACE_RECURSIVE_DEPTH))
return 0;
/* Disable all tracing before we do anything else */
tracing_off_permanent();
......@@ -2225,10 +2204,21 @@ static int trace_recursive_lock(void)
in_nmi());
WARN_ON_ONCE(1);
}
static inline int trace_recursive_lock(void)
{
current->trace_recursion++;
if (likely(current->trace_recursion < TRACE_RECURSIVE_DEPTH))
return 0;
trace_recursive_fail();
return -1;
}
static void trace_recursive_unlock(void)
static inline void trace_recursive_unlock(void)
{
WARN_ON_ONCE(!current->trace_recursion);
......@@ -2308,12 +2298,28 @@ static void
rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer,
struct ring_buffer_event *event)
{
u64 delta;
/*
* The event first in the commit queue updates the
* time stamp.
*/
if (rb_event_is_commit(cpu_buffer, event))
cpu_buffer->write_stamp += event->time_delta;
if (rb_event_is_commit(cpu_buffer, event)) {
/*
* A commit event that is first on a page
* updates the write timestamp with the page stamp
*/
if (!rb_event_index(event))
cpu_buffer->write_stamp =
cpu_buffer->commit_page->page->time_stamp;
else if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
delta = event->array[0];
delta <<= TS_SHIFT;
delta += event->time_delta;
cpu_buffer->write_stamp += delta;
} else
cpu_buffer->write_stamp += event->time_delta;
}
}
static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
......@@ -2353,6 +2359,9 @@ EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit);
static inline void rb_event_discard(struct ring_buffer_event *event)
{
if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
event = skip_time_extend(event);
/* array[0] holds the actual length for the discarded event */
event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE;
event->type_len = RINGBUF_TYPE_PADDING;
......@@ -3049,12 +3058,12 @@ rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts,
again:
/*
* We repeat when a timestamp is encountered. It is possible
* to get multiple timestamps from an interrupt entering just
* as one timestamp is about to be written, or from discarded
* commits. The most that we can have is the number on a single page.
* We repeat when a time extend is encountered.
* Since the time extend is always attached to a data event,
* we should never loop more than once.
* (We never hit the following condition more than twice).
*/
if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE))
if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2))
return NULL;
reader = rb_get_reader_page(cpu_buffer);
......@@ -3130,14 +3139,12 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
return NULL;
/*
* We repeat when a timestamp is encountered.
* We can get multiple timestamps by nested interrupts or also
* if filtering is on (discarding commits). Since discarding
* commits can be frequent we can get a lot of timestamps.
* But we limit them by not adding timestamps if they begin
* at the start of a page.
* We repeat when a time extend is encountered.
* Since the time extend is always attached to a data event,
* we should never loop more than once.
* (We never hit the following condition more than twice).
*/
if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE))
if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2))
return NULL;
if (rb_per_cpu_empty(cpu_buffer))
......@@ -3835,7 +3842,8 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
if (len > (commit - read))
len = (commit - read);
size = rb_event_length(event);
/* Always keep the time extend and data together */
size = rb_event_ts_length(event);
if (len < size)
goto out_unlock;
......@@ -3857,7 +3865,8 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
break;
event = rb_reader_event(cpu_buffer);
size = rb_event_length(event);
/* Always keep the time extend and data together */
size = rb_event_ts_length(event);
} while (len > size);
/* update bpage */
......
......@@ -3996,13 +3996,9 @@ static void tracing_init_debugfs_percpu(long cpu)
{
struct dentry *d_percpu = tracing_dentry_percpu();
struct dentry *d_cpu;
/* strlen(cpu) + MAX(log10(cpu)) + '\0' */
char cpu_dir[7];
char cpu_dir[30]; /* 30 characters should be more than enough */
if (cpu > 999 || cpu < 0)
return;
sprintf(cpu_dir, "cpu%ld", cpu);
snprintf(cpu_dir, 30, "cpu%ld", cpu);
d_cpu = debugfs_create_dir(cpu_dir, d_percpu);
if (!d_cpu) {
pr_warning("Could not create debugfs '%s' entry\n", cpu_dir);
......
......@@ -15,6 +15,23 @@ DESCRIPTION
This command displays the symbolic event types which can be selected in the
various perf commands with the -e option.
EVENT MODIFIERS
---------------
Events can optionally have a modifer by appending a colon and one or
more modifiers. Modifiers allow the user to restrict when events are
counted with 'u' for user-space, 'k' for kernel, 'h' for hypervisor.
The 'p' modifier can be used for specifying how precise the instruction
address should be. The 'p' modifier is currently only implemented for
Intel PEBS and can be specified multiple times:
0 - SAMPLE_IP can have arbitrary skid
1 - SAMPLE_IP must have constant skid
2 - SAMPLE_IP requested to have 0 skid
3 - SAMPLE_IP must have 0 skid
The PEBS implementation now supports up to 2.
RAW HARDWARE EVENT DESCRIPTOR
-----------------------------
Even when an event is not available in a symbolic form within perf right now,
......
......@@ -16,7 +16,9 @@ or
or
'perf probe' --list
or
'perf probe' --line='FUNC[:RLN[+NUM|:RLN2]]|SRC:ALN[+NUM|:ALN2]'
'perf probe' [options] --line='FUNC[:RLN[+NUM|:RLN2]]|SRC:ALN[+NUM|:ALN2]'
or
'perf probe' [options] --vars='PROBEPOINT'
DESCRIPTION
-----------
......@@ -31,6 +33,11 @@ OPTIONS
--vmlinux=PATH::
Specify vmlinux path which has debuginfo (Dwarf binary).
-m::
--module=MODNAME::
Specify module name in which perf-probe searches probe points
or lines.
-s::
--source=PATH::
Specify path to kernel source.
......@@ -57,6 +64,15 @@ OPTIONS
Show source code lines which can be probed. This needs an argument
which specifies a range of the source code. (see LINE SYNTAX for detail)
-V::
--vars=::
Show available local variables at given probe point. The argument
syntax is same as PROBE SYNTAX, but NO ARGs.
--externs::
(Only for --vars) Show external defined variables in addition to local
variables.
-f::
--force::
Forcibly add events with existing name.
......
......@@ -83,6 +83,10 @@ OPTIONS
--call-graph::
Do call-graph (stack chain/backtrace) recording.
-q::
--quiet::
Don't print any message, useful for scripting.
-v::
--verbose::
Be more verbose (show counter open errors, etc).
......
......@@ -50,14 +50,17 @@ static struct {
bool list_events;
bool force_add;
bool show_lines;
bool show_vars;
bool show_ext_vars;
bool mod_events;
int nevents;
struct perf_probe_event events[MAX_PROBES];
struct strlist *dellist;
struct line_range line_range;
const char *target_module;
int max_probe_points;
} params;
/* Parse an event definition. Note that any error must die. */
static int parse_probe_event(const char *str)
{
......@@ -92,6 +95,7 @@ static int parse_probe_event_argv(int argc, const char **argv)
len = 0;
for (i = 0; i < argc; i++)
len += sprintf(&buf[len], "%s ", argv[i]);
params.mod_events = true;
ret = parse_probe_event(buf);
free(buf);
return ret;
......@@ -100,9 +104,10 @@ static int parse_probe_event_argv(int argc, const char **argv)
static int opt_add_probe_event(const struct option *opt __used,
const char *str, int unset __used)
{
if (str)
if (str) {
params.mod_events = true;
return parse_probe_event(str);
else
} else
return 0;
}
......@@ -110,6 +115,7 @@ static int opt_del_probe_event(const struct option *opt __used,
const char *str, int unset __used)
{
if (str) {
params.mod_events = true;
if (!params.dellist)
params.dellist = strlist__new(true, NULL);
strlist__add(params.dellist, str);
......@@ -130,6 +136,25 @@ static int opt_show_lines(const struct option *opt __used,
return ret;
}
static int opt_show_vars(const struct option *opt __used,
const char *str, int unset __used)
{
struct perf_probe_event *pev = &params.events[params.nevents];
int ret;
if (!str)
return 0;
ret = parse_probe_event(str);
if (!ret && pev->nargs != 0) {
pr_err(" Error: '--vars' doesn't accept arguments.\n");
return -EINVAL;
}
params.show_vars = true;
return ret;
}
#endif
static const char * const probe_usage[] = {
......@@ -138,7 +163,8 @@ static const char * const probe_usage[] = {
"perf probe [<options>] --del '[GROUP:]EVENT' ...",
"perf probe --list",
#ifdef DWARF_SUPPORT
"perf probe --line 'LINEDESC'",
"perf probe [<options>] --line 'LINEDESC'",
"perf probe [<options>] --vars 'PROBEPOINT'",
#endif
NULL
};
......@@ -180,10 +206,17 @@ static const struct option options[] = {
OPT_CALLBACK('L', "line", NULL,
"FUNC[:RLN[+NUM|-RLN2]]|SRC:ALN[+NUM|-ALN2]",
"Show source code lines.", opt_show_lines),
OPT_CALLBACK('V', "vars", NULL,
"FUNC[@SRC][+OFF|%return|:RL|;PT]|SRC:AL|SRC;PT",
"Show accessible variables on PROBEDEF", opt_show_vars),
OPT_BOOLEAN('\0', "externs", &params.show_ext_vars,
"Show external variables too (with --vars only)"),
OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
"file", "vmlinux pathname"),
OPT_STRING('s', "source", &symbol_conf.source_prefix,
"directory", "path to kernel source"),
OPT_STRING('m', "module", &params.target_module,
"modname", "target module name"),
#endif
OPT__DRY_RUN(&probe_event_dry_run),
OPT_INTEGER('\0', "max-probes", &params.max_probe_points,
......@@ -217,7 +250,7 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)
usage_with_options(probe_usage, options);
if (params.list_events) {
if (params.nevents != 0 || params.dellist) {
if (params.mod_events) {
pr_err(" Error: Don't use --list with --add/--del.\n");
usage_with_options(probe_usage, options);
}
......@@ -225,6 +258,10 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)
pr_err(" Error: Don't use --list with --line.\n");
usage_with_options(probe_usage, options);
}
if (params.show_vars) {
pr_err(" Error: Don't use --list with --vars.\n");
usage_with_options(probe_usage, options);
}
ret = show_perf_probe_events();
if (ret < 0)
pr_err(" Error: Failed to show event list. (%d)\n",
......@@ -234,17 +271,35 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)
#ifdef DWARF_SUPPORT
if (params.show_lines) {
if (params.nevents != 0 || params.dellist) {
pr_warning(" Error: Don't use --line with"
" --add/--del.\n");
if (params.mod_events) {
pr_err(" Error: Don't use --line with"
" --add/--del.\n");
usage_with_options(probe_usage, options);
}
if (params.show_vars) {
pr_err(" Error: Don't use --line with --vars.\n");
usage_with_options(probe_usage, options);
}
ret = show_line_range(&params.line_range);
ret = show_line_range(&params.line_range, params.target_module);
if (ret < 0)
pr_err(" Error: Failed to show lines. (%d)\n", ret);
return ret;
}
if (params.show_vars) {
if (params.mod_events) {
pr_err(" Error: Don't use --vars with"
" --add/--del.\n");
usage_with_options(probe_usage, options);
}
ret = show_available_vars(params.events, params.nevents,
params.max_probe_points,
params.target_module,
params.show_ext_vars);
if (ret < 0)
pr_err(" Error: Failed to show vars. (%d)\n", ret);
return ret;
}
#endif
if (params.dellist) {
......@@ -258,8 +313,9 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)
if (params.nevents) {
ret = add_perf_probe_events(params.events, params.nevents,
params.force_add,
params.max_probe_points);
params.max_probe_points,
params.target_module,
params.force_add);
if (ret < 0) {
pr_err(" Error: Failed to add events. (%d)\n", ret);
return ret;
......
......@@ -353,7 +353,7 @@ static void create_counter(int counter, int cpu)
}
if (read(fd[nr_cpu][counter][thread_index], &read_data, sizeof(read_data)) == -1) {
perror("Unable to read perf file descriptor\n");
perror("Unable to read perf file descriptor");
exit(-1);
}
......@@ -626,7 +626,7 @@ static int __cmd_record(int argc, const char **argv)
nr_cpus = read_cpu_map(cpu_list);
if (nr_cpus < 1) {
perror("failed to collect number of CPUs\n");
perror("failed to collect number of CPUs");
return -1;
}
......@@ -761,6 +761,9 @@ static int __cmd_record(int argc, const char **argv)
}
}
if (quiet)
return 0;
fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
/*
......@@ -820,6 +823,7 @@ static const struct option options[] = {
"do call-graph (stack chain/backtrace) recording"),
OPT_INCR('v', "verbose", &verbose,
"be more verbose (show counter open errors, etc)"),
OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
OPT_BOOLEAN('s', "stat", &inherit_stat,
"per thread counts"),
OPT_BOOLEAN('d', "data", &sample_address,
......
......@@ -46,9 +46,6 @@ static struct scripting_ops *scripting_ops;
static void setup_scripting(void)
{
/* make sure PERF_EXEC_PATH is set for scripts */
perf_set_argv_exec_path(perf_exec_path());
setup_perl_scripting();
setup_python_scripting();
......@@ -285,7 +282,7 @@ static int parse_scriptname(const struct option *opt __used,
script++;
} else {
script = str;
ext = strchr(script, '.');
ext = strrchr(script, '.');
if (!ext) {
fprintf(stderr, "invalid script extension");
return -1;
......@@ -593,6 +590,9 @@ int cmd_trace(int argc, const char **argv, const char *prefix __used)
suffix = REPORT_SUFFIX;
}
/* make sure PERF_EXEC_PATH is set for scripts */
perf_set_argv_exec_path(perf_exec_path());
if (!suffix && argc >= 2 && strncmp(argv[1], "-", strlen("-")) != 0) {
char *record_script_path, *report_script_path;
int live_pipe[2];
......@@ -625,12 +625,13 @@ int cmd_trace(int argc, const char **argv, const char *prefix __used)
dup2(live_pipe[1], 1);
close(live_pipe[0]);
__argv = malloc(5 * sizeof(const char *));
__argv = malloc(6 * sizeof(const char *));
__argv[0] = "/bin/sh";
__argv[1] = record_script_path;
__argv[2] = "-o";
__argv[3] = "-";
__argv[4] = NULL;
__argv[2] = "-q";
__argv[3] = "-o";
__argv[4] = "-";
__argv[5] = NULL;
execvp("/bin/sh", (char **)__argv);
exit(-1);
......
......@@ -7,4 +7,4 @@ if [ $# -gt 0 ] ; then
shift
fi
fi
perf trace $@ -s ~/libexec/perf-core/scripts/perl/failed-syscalls.pl $comm
perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/failed-syscalls.pl $comm
......@@ -7,7 +7,7 @@ if [ $# -lt 1 ] ; then
fi
comm=$1
shift
perf trace $@ -s ~/libexec/perf-core/scripts/perl/rw-by-file.pl $comm
perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/rw-by-file.pl $comm
#!/bin/bash
# description: system-wide r/w activity
perf trace $@ -s ~/libexec/perf-core/scripts/perl/rw-by-pid.pl
perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/rw-by-pid.pl
......@@ -17,7 +17,7 @@ if [ "$n_args" -gt 0 ] ; then
interval=$1
shift
fi
perf trace $@ -s ~/libexec/perf-core/scripts/perl/rwtop.pl $interval
perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/rwtop.pl $interval
#!/bin/bash
# description: system-wide min/max/avg wakeup latency
perf trace $@ -s ~/libexec/perf-core/scripts/perl/wakeup-latency.pl
perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/wakeup-latency.pl
#!/bin/bash
# description: workqueue stats (ins/exe/create/destroy)
perf trace $@ -s ~/libexec/perf-core/scripts/perl/workqueue-stats.pl
perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/workqueue-stats.pl
......
......@@ -6,6 +6,14 @@
# Public License ("GPL") version 2 as published by the Free Software
# Foundation.
import errno, os
FUTEX_WAIT = 0
FUTEX_WAKE = 1
FUTEX_PRIVATE_FLAG = 128
FUTEX_CLOCK_REALTIME = 256
FUTEX_CMD_MASK = ~(FUTEX_PRIVATE_FLAG | FUTEX_CLOCK_REALTIME)
NSECS_PER_SEC = 1000000000
def avg(total, n):
......@@ -24,5 +32,55 @@ def nsecs_str(nsecs):
str = "%5u.%09u" % (nsecs_secs(nsecs), nsecs_nsecs(nsecs)),
return str
def add_stats(dict, key, value):
if not dict.has_key(key):
dict[key] = (value, value, value, 1)
else:
min, max, avg, count = dict[key]
if value < min:
min = value
if value > max:
max = value
avg = (avg + value) / 2
dict[key] = (min, max, avg, count + 1)
def clear_term():
print("\x1b[H\x1b[2J")
audit_package_warned = False
try:
import audit
machine_to_id = {
'x86_64': audit.MACH_86_64,
'alpha' : audit.MACH_ALPHA,
'ia64' : audit.MACH_IA64,
'ppc' : audit.MACH_PPC,
'ppc64' : audit.MACH_PPC64,
's390' : audit.MACH_S390,
's390x' : audit.MACH_S390X,
'i386' : audit.MACH_X86,
'i586' : audit.MACH_X86,
'i686' : audit.MACH_X86,
}
try:
machine_to_id['armeb'] = audit.MACH_ARMEB
except:
pass
machine_id = machine_to_id[os.uname()[4]]
except:
if not audit_package_warned:
audit_package_warned = True
print "Install the audit-libs-python package to get syscall names"
def syscall_name(id):
try:
return audit.audit_syscall_to_name(id, machine_id)
except:
return str(id)
def strerror(nr):
try:
return errno.errorcode[abs(nr)]
except:
return "Unknown %d errno" % nr
......@@ -7,4 +7,4 @@ if [ $# -gt 0 ] ; then
shift
fi
fi
perf trace $@ -s ~/libexec/perf-core/scripts/python/failed-syscalls-by-pid.py $comm
perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/failed-syscalls-by-pid.py $comm
#!/bin/bash
perf record -a -e syscalls:sys_enter_futex -e syscalls:sys_exit_futex $@
#!/bin/bash
# description: futext contention measurement
perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/futex-contention.py
......@@ -2,4 +2,4 @@
# description: display a process of packet and processing time
# args: [tx] [rx] [dev=] [debug]
perf trace -s ~/libexec/perf-core/scripts/python/netdev-times.py $@
perf trace -s "$PERF_EXEC_PATH"/scripts/python/netdev-times.py $@
#!/bin/bash
# description: sched migration overview
perf trace $@ -s ~/libexec/perf-core/scripts/python/sched-migration.py
perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/sched-migration.py
......@@ -21,4 +21,4 @@ elif [ "$n_args" -gt 0 ] ; then
interval=$1
shift
fi
perf trace $@ -s ~/libexec/perf-core/scripts/python/sctop.py $comm $interval
perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/sctop.py $comm $interval
......@@ -7,4 +7,4 @@ if [ $# -gt 0 ] ; then
shift
fi
fi
perf trace $@ -s ~/libexec/perf-core/scripts/python/syscall-counts-by-pid.py $comm
perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/syscall-counts-by-pid.py $comm
......@@ -7,4 +7,4 @@ if [ $# -gt 0 ] ; then
shift
fi
fi
perf trace $@ -s ~/libexec/perf-core/scripts/python/syscall-counts.py $comm
perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/syscall-counts.py $comm
......@@ -13,21 +13,26 @@ sys.path.append(os.environ['PERF_EXEC_PATH'] + \
from perf_trace_context import *
from Core import *
from Util import *
usage = "perf trace -s syscall-counts-by-pid.py [comm]\n";
usage = "perf trace -s syscall-counts-by-pid.py [comm|pid]\n";
for_comm = None
for_pid = None
if len(sys.argv) > 2:
sys.exit(usage)
if len(sys.argv) > 1:
for_comm = sys.argv[1]
try:
for_pid = int(sys.argv[1])
except:
for_comm = sys.argv[1]
syscalls = autodict()
def trace_begin():
pass
print "Press control+C to stop and show the summary"
def trace_end():
print_error_totals()
......@@ -35,9 +40,9 @@ def trace_end():
def raw_syscalls__sys_exit(event_name, context, common_cpu,
common_secs, common_nsecs, common_pid, common_comm,
id, ret):
if for_comm is not None:
if common_comm != for_comm:
return
if (for_comm and common_comm != for_comm) or \
(for_pid and common_pid != for_pid ):
return
if ret < 0:
try:
......@@ -62,7 +67,7 @@ def print_error_totals():
print "\n%s [%d]\n" % (comm, pid),
id_keys = syscalls[comm][pid].keys()
for id in id_keys:
print " syscall: %-16d\n" % (id),
print " syscall: %-16s\n" % syscall_name(id),
ret_keys = syscalls[comm][pid][id].keys()
for ret, val in sorted(syscalls[comm][pid][id].iteritems(), key = lambda(k, v): (v, k), reverse = True):
print " err = %-20d %10d\n" % (ret, val),
print " err = %-20s %10d\n" % (strerror(ret), val),
# futex contention
# (c) 2010, Arnaldo Carvalho de Melo <acme@redhat.com>
# Licensed under the terms of the GNU GPL License version 2
#
# Translation of:
#
# http://sourceware.org/systemtap/wiki/WSFutexContention
#
# to perf python scripting.
#
# Measures futex contention
import os, sys
sys.path.append(os.environ['PERF_EXEC_PATH'] + '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
from Util import *
process_names = {}
thread_thislock = {}
thread_blocktime = {}
lock_waits = {} # long-lived stats on (tid,lock) blockage elapsed time
process_names = {} # long-lived pid-to-execname mapping
def syscalls__sys_enter_futex(event, ctxt, cpu, s, ns, tid, comm,
nr, uaddr, op, val, utime, uaddr2, val3):
cmd = op & FUTEX_CMD_MASK
if cmd != FUTEX_WAIT:
return # we don't care about originators of WAKE events
process_names[tid] = comm
thread_thislock[tid] = uaddr
thread_blocktime[tid] = nsecs(s, ns)
def syscalls__sys_exit_futex(event, ctxt, cpu, s, ns, tid, comm,
nr, ret):
if thread_blocktime.has_key(tid):
elapsed = nsecs(s, ns) - thread_blocktime[tid]
add_stats(lock_waits, (tid, thread_thislock[tid]), elapsed)
del thread_blocktime[tid]
del thread_thislock[tid]
def trace_begin():
print "Press control+C to stop and show the summary"
def trace_end():
for (tid, lock) in lock_waits:
min, max, avg, count = lock_waits[tid, lock]
print "%s[%d] lock %x contended %d times, %d avg ns" % \
(process_names[tid], tid, lock, count, avg)
......@@ -8,10 +8,7 @@
# will be refreshed every [interval] seconds. The default interval is
# 3 seconds.
import thread
import time
import os
import sys
import os, sys, thread, time
sys.path.append(os.environ['PERF_EXEC_PATH'] + \
'/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
......@@ -20,7 +17,7 @@ from perf_trace_context import *
from Core import *
from Util import *
usage = "perf trace -s syscall-counts.py [comm] [interval]\n";
usage = "perf trace -s sctop.py [comm] [interval]\n";
for_comm = None
default_interval = 3
......@@ -71,7 +68,7 @@ def print_syscall_totals(interval):
for id, val in sorted(syscalls.iteritems(), key = lambda(k, v): (v, k), \
reverse = True):
try:
print "%-40d %10d\n" % (id, val),
print "%-40s %10d\n" % (syscall_name(id), val),
except TypeError:
pass
syscalls.clear()
......
......@@ -5,29 +5,33 @@
# Displays system-wide system call totals, broken down by syscall.
# If a [comm] arg is specified, only syscalls called by [comm] are displayed.
import os
import sys
import os, sys
sys.path.append(os.environ['PERF_EXEC_PATH'] + \
'/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
from perf_trace_context import *
from Core import *
from Util import syscall_name
usage = "perf trace -s syscall-counts-by-pid.py [comm]\n";
for_comm = None
for_pid = None
if len(sys.argv) > 2:
sys.exit(usage)
if len(sys.argv) > 1:
for_comm = sys.argv[1]
try:
for_pid = int(sys.argv[1])
except:
for_comm = sys.argv[1]
syscalls = autodict()
def trace_begin():
pass
print "Press control+C to stop and show the summary"
def trace_end():
print_syscall_totals()
......@@ -35,9 +39,10 @@ def trace_end():
def raw_syscalls__sys_enter(event_name, context, common_cpu,
common_secs, common_nsecs, common_pid, common_comm,
id, args):
if for_comm is not None:
if common_comm != for_comm:
return
if (for_comm and common_comm != for_comm) or \
(for_pid and common_pid != for_pid ):
return
try:
syscalls[common_comm][common_pid][id] += 1
except TypeError:
......@@ -61,4 +66,4 @@ def print_syscall_totals():
id_keys = syscalls[comm][pid].keys()
for id, val in sorted(syscalls[comm][pid].iteritems(), \
key = lambda(k, v): (v, k), reverse = True):
print " %-38d %10d\n" % (id, val),
print " %-38s %10d\n" % (syscall_name(id), val),
......@@ -13,6 +13,7 @@ sys.path.append(os.environ['PERF_EXEC_PATH'] + \
from perf_trace_context import *
from Core import *
from Util import syscall_name
usage = "perf trace -s syscall-counts.py [comm]\n";
......@@ -27,7 +28,7 @@ if len(sys.argv) > 1:
syscalls = autodict()
def trace_begin():
pass
print "Press control+C to stop and show the summary"
def trace_end():
print_syscall_totals()
......@@ -55,4 +56,4 @@ def print_syscall_totals():
for id, val in sorted(syscalls.iteritems(), key = lambda(k, v): (v, k), \
reverse = True):
print "%-40d %10d\n" % (id, val),
print "%-40s %10d\n" % (syscall_name(id), val),
......@@ -12,8 +12,8 @@
#include "debug.h"
#include "util.h"
int verbose = 0;
bool dump_trace = false;
int verbose;
bool dump_trace = false, quiet = false;
int eprintf(int level, const char *fmt, ...)
{
......
......@@ -6,7 +6,7 @@
#include "event.h"
extern int verbose;
extern bool dump_trace;
extern bool quiet, dump_trace;
int dump_printf(const char *fmt, ...) __attribute__((format(printf, 1, 2)));
void trace_event(event_t *event);
......
......@@ -215,6 +215,16 @@ struct symbol *map_groups__find_function_by_name(struct map_groups *self,
return map_groups__find_symbol_by_name(self, MAP__FUNCTION, name, mapp, filter);
}
static inline
struct symbol *machine__find_kernel_function_by_name(struct machine *self,
const char *name,
struct map **mapp,
symbol_filter_t filter)
{
return map_groups__find_function_by_name(&self->kmaps, name, mapp,
filter);
}
int map_groups__fixup_overlappings(struct map_groups *self, struct map *map,
int verbose, FILE *fp);
......
......@@ -74,10 +74,9 @@ static int e_snprintf(char *str, size_t size, const char *format, ...)
static char *synthesize_perf_probe_point(struct perf_probe_point *pp);
static struct machine machine;
/* Initialize symbol maps and path of vmlinux */
/* Initialize symbol maps and path of vmlinux/modules */
static int init_vmlinux(void)
{
struct dso *kernel;
int ret;
symbol_conf.sort_by_name = true;
......@@ -91,33 +90,61 @@ static int init_vmlinux(void)
goto out;
}
ret = machine__init(&machine, "/", 0);
ret = machine__init(&machine, "", HOST_KERNEL_ID);
if (ret < 0)
goto out;
kernel = dso__new_kernel(symbol_conf.vmlinux_name);
if (kernel == NULL)
die("Failed to create kernel dso.");
ret = __machine__create_kernel_maps(&machine, kernel);
if (ret < 0)
pr_debug("Failed to create kernel maps.\n");
if (machine__create_kernel_maps(&machine) < 0) {
pr_debug("machine__create_kernel_maps ");
goto out;
}
out:
if (ret < 0)
pr_warning("Failed to init vmlinux path.\n");
return ret;
}
static struct symbol *__find_kernel_function_by_name(const char *name,
struct map **mapp)
{
return machine__find_kernel_function_by_name(&machine, name, mapp,
NULL);
}
const char *kernel_get_module_path(const char *module)
{
struct dso *dso;
if (module) {
list_for_each_entry(dso, &machine.kernel_dsos, node) {
if (strncmp(dso->short_name + 1, module,
dso->short_name_len - 2) == 0)
goto found;
}
pr_debug("Failed to find module %s.\n", module);
return NULL;
} else {
dso = machine.vmlinux_maps[MAP__FUNCTION]->dso;
if (dso__load_vmlinux_path(dso,
machine.vmlinux_maps[MAP__FUNCTION], NULL) < 0) {
pr_debug("Failed to load kernel map.\n");
return NULL;
}
}
found:
return dso->long_name;
}
#ifdef DWARF_SUPPORT
static int open_vmlinux(void)
static int open_vmlinux(const char *module)
{
if (map__load(machine.vmlinux_maps[MAP__FUNCTION], NULL) < 0) {
pr_debug("Failed to load kernel map.\n");
return -EINVAL;
const char *path = kernel_get_module_path(module);
if (!path) {
pr_err("Failed to find path of %s module", module ?: "kernel");
return -ENOENT;
}
pr_debug("Try to open %s\n", machine.vmlinux_maps[MAP__FUNCTION]->dso->long_name);
return open(machine.vmlinux_maps[MAP__FUNCTION]->dso->long_name, O_RDONLY);
pr_debug("Try to open %s\n", path);
return open(path, O_RDONLY);
}
/*
......@@ -125,20 +152,19 @@ static int open_vmlinux(void)
* Currently only handles kprobes.
*/
static int kprobe_convert_to_perf_probe(struct probe_trace_point *tp,
struct perf_probe_point *pp)
struct perf_probe_point *pp)
{
struct symbol *sym;
int fd, ret = -ENOENT;
struct map *map;
u64 addr;
int ret = -ENOENT;
sym = map__find_symbol_by_name(machine.vmlinux_maps[MAP__FUNCTION],
tp->symbol, NULL);
sym = __find_kernel_function_by_name(tp->symbol, &map);
if (sym) {
fd = open_vmlinux();
if (fd >= 0) {
ret = find_perf_probe_point(fd,
sym->start + tp->offset, pp);
close(fd);
}
addr = map->unmap_ip(map, sym->start + tp->offset);
pr_debug("try to find %s+%ld@%llx\n", tp->symbol,
tp->offset, addr);
ret = find_perf_probe_point((unsigned long)addr, pp);
}
if (ret <= 0) {
pr_debug("Failed to find corresponding probes from "
......@@ -156,12 +182,12 @@ static int kprobe_convert_to_perf_probe(struct probe_trace_point *tp,
/* Try to find perf_probe_event with debuginfo */
static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
struct probe_trace_event **tevs,
int max_tevs)
int max_tevs, const char *module)
{
bool need_dwarf = perf_probe_event_need_dwarf(pev);
int fd, ntevs;
fd = open_vmlinux();
fd = open_vmlinux(module);
if (fd < 0) {
if (need_dwarf) {
pr_warning("Failed to open debuginfo file.\n");
......@@ -300,7 +326,7 @@ static int show_one_line(FILE *fp, int l, bool skip, bool show_num)
* Show line-range always requires debuginfo to find source file and
* line number.
*/
int show_line_range(struct line_range *lr)
int show_line_range(struct line_range *lr, const char *module)
{
int l = 1;
struct line_node *ln;
......@@ -313,7 +339,7 @@ int show_line_range(struct line_range *lr)
if (ret < 0)
return ret;
fd = open_vmlinux();
fd = open_vmlinux(module);
if (fd < 0) {
pr_warning("Failed to open debuginfo file.\n");
return fd;
......@@ -378,11 +404,84 @@ int show_line_range(struct line_range *lr)
return ret;
}
static int show_available_vars_at(int fd, struct perf_probe_event *pev,
int max_vls, bool externs)
{
char *buf;
int ret, i;
struct str_node *node;
struct variable_list *vls = NULL, *vl;
buf = synthesize_perf_probe_point(&pev->point);
if (!buf)
return -EINVAL;
pr_debug("Searching variables at %s\n", buf);
ret = find_available_vars_at(fd, pev, &vls, max_vls, externs);
if (ret > 0) {
/* Some variables were found */
fprintf(stdout, "Available variables at %s\n", buf);
for (i = 0; i < ret; i++) {
vl = &vls[i];
/*
* A probe point might be converted to
* several trace points.
*/
fprintf(stdout, "\t@<%s+%lu>\n", vl->point.symbol,
vl->point.offset);
free(vl->point.symbol);
if (vl->vars) {
strlist__for_each(node, vl->vars)
fprintf(stdout, "\t\t%s\n", node->s);
strlist__delete(vl->vars);
} else
fprintf(stdout, "(No variables)\n");
}
free(vls);
} else
pr_err("Failed to find variables at %s (%d)\n", buf, ret);
free(buf);
return ret;
}
/* Show available variables on given probe point */
int show_available_vars(struct perf_probe_event *pevs, int npevs,
int max_vls, const char *module, bool externs)
{
int i, fd, ret = 0;
ret = init_vmlinux();
if (ret < 0)
return ret;
fd = open_vmlinux(module);
if (fd < 0) {
pr_warning("Failed to open debuginfo file.\n");
return fd;
}
setup_pager();
for (i = 0; i < npevs && ret >= 0; i++)
ret = show_available_vars_at(fd, &pevs[i], max_vls, externs);
close(fd);
return ret;
}
#else /* !DWARF_SUPPORT */
static int kprobe_convert_to_perf_probe(struct probe_trace_point *tp,
struct perf_probe_point *pp)
struct perf_probe_point *pp)
{
struct symbol *sym;
sym = __find_kernel_function_by_name(tp->symbol, NULL);
if (!sym) {
pr_err("Failed to find symbol %s in kernel.\n", tp->symbol);
return -ENOENT;
}
pp->function = strdup(tp->symbol);
if (pp->function == NULL)
return -ENOMEM;
......@@ -394,7 +493,7 @@ static int kprobe_convert_to_perf_probe(struct probe_trace_point *tp,
static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
struct probe_trace_event **tevs __unused,
int max_tevs __unused)
int max_tevs __unused, const char *mod __unused)
{
if (perf_probe_event_need_dwarf(pev)) {
pr_warning("Debuginfo-analysis is not supported.\n");
......@@ -403,12 +502,19 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
return 0;
}
int show_line_range(struct line_range *lr __unused)
int show_line_range(struct line_range *lr __unused, const char *module __unused)
{
pr_warning("Debuginfo-analysis is not supported.\n");
return -ENOSYS;
}
int show_available_vars(struct perf_probe_event *pevs __unused,
int npevs __unused, int max_vls __unused,
const char *module __unused, bool externs __unused)
{
pr_warning("Debuginfo-analysis is not supported.\n");
return -ENOSYS;
}
#endif
int parse_line_range_desc(const char *arg, struct line_range *lr)
......@@ -1087,7 +1193,7 @@ char *synthesize_probe_trace_command(struct probe_trace_event *tev)
}
static int convert_to_perf_probe_event(struct probe_trace_event *tev,
struct perf_probe_event *pev)
struct perf_probe_event *pev)
{
char buf[64] = "";
int i, ret;
......@@ -1516,14 +1622,14 @@ static int __add_probe_trace_events(struct perf_probe_event *pev,
static int convert_to_probe_trace_events(struct perf_probe_event *pev,
struct probe_trace_event **tevs,
int max_tevs)
int max_tevs, const char *module)
{
struct symbol *sym;
int ret = 0, i;
struct probe_trace_event *tev;
/* Convert perf_probe_event with debuginfo */
ret = try_to_find_probe_trace_events(pev, tevs, max_tevs);
ret = try_to_find_probe_trace_events(pev, tevs, max_tevs, module);
if (ret != 0)
return ret;
......@@ -1572,8 +1678,7 @@ static int convert_to_probe_trace_events(struct perf_probe_event *pev,
}
/* Currently just checking function name from symbol map */
sym = map__find_symbol_by_name(machine.vmlinux_maps[MAP__FUNCTION],
tev->point.symbol, NULL);
sym = __find_kernel_function_by_name(tev->point.symbol, NULL);
if (!sym) {
pr_warning("Kernel symbol \'%s\' not found.\n",
tev->point.symbol);
......@@ -1596,7 +1701,7 @@ struct __event_package {
};
int add_perf_probe_events(struct perf_probe_event *pevs, int npevs,
bool force_add, int max_tevs)
int max_tevs, const char *module, bool force_add)
{
int i, j, ret;
struct __event_package *pkgs;
......@@ -1617,7 +1722,9 @@ int add_perf_probe_events(struct perf_probe_event *pevs, int npevs,
pkgs[i].pev = &pevs[i];
/* Convert with or without debuginfo */
ret = convert_to_probe_trace_events(pkgs[i].pev,
&pkgs[i].tevs, max_tevs);
&pkgs[i].tevs,
max_tevs,
module);
if (ret < 0)
goto end;
pkgs[i].ntevs = ret;
......
......@@ -90,6 +90,12 @@ struct line_range {
struct list_head line_list; /* Visible lines */
};
/* List of variables */
struct variable_list {
struct probe_trace_point point; /* Actual probepoint */
struct strlist *vars; /* Available variables */
};
/* Command string to events */
extern int parse_perf_probe_command(const char *cmd,
struct perf_probe_event *pev);
......@@ -109,12 +115,18 @@ extern void clear_perf_probe_event(struct perf_probe_event *pev);
/* Command string to line-range */
extern int parse_line_range_desc(const char *cmd, struct line_range *lr);
/* Internal use: Return kernel/module path */
extern const char *kernel_get_module_path(const char *module);
extern int add_perf_probe_events(struct perf_probe_event *pevs, int npevs,
bool force_add, int max_probe_points);
int max_probe_points, const char *module,
bool force_add);
extern int del_perf_probe_events(struct strlist *dellist);
extern int show_perf_probe_events(void);
extern int show_line_range(struct line_range *lr);
extern int show_line_range(struct line_range *lr, const char *module);
extern int show_available_vars(struct perf_probe_event *pevs, int npevs,
int max_probe_points, const char *module,
bool externs);
/* Maximum index number of event-name postfix */
......
此差异已折叠。
......@@ -22,20 +22,27 @@ extern int find_probe_trace_events(int fd, struct perf_probe_event *pev,
int max_tevs);
/* Find a perf_probe_point from debuginfo */
extern int find_perf_probe_point(int fd, unsigned long addr,
extern int find_perf_probe_point(unsigned long addr,
struct perf_probe_point *ppt);
/* Find a line range */
extern int find_line_range(int fd, struct line_range *lr);
/* Find available variables */
extern int find_available_vars_at(int fd, struct perf_probe_event *pev,
struct variable_list **vls, int max_points,
bool externs);
#include <dwarf.h>
#include <libdw.h>
#include <libdwfl.h>
#include <version.h>
struct probe_finder {
struct perf_probe_event *pev; /* Target probe event */
struct probe_trace_event *tevs; /* Result trace events */
int ntevs; /* Number of trace events */
int max_tevs; /* Max number of trace events */
/* Callback when a probe point is found */
int (*callback)(Dwarf_Die *sp_die, struct probe_finder *pf);
/* For function searching */
int lno; /* Line number */
......@@ -53,6 +60,22 @@ struct probe_finder {
struct probe_trace_arg *tvar; /* Current result variable */
};
struct trace_event_finder {
struct probe_finder pf;
struct probe_trace_event *tevs; /* Found trace events */
int ntevs; /* Number of trace events */
int max_tevs; /* Max number of trace events */
};
struct available_var_finder {
struct probe_finder pf;
struct variable_list *vls; /* Found variable lists */
int nvls; /* Number of variable lists */
int max_vls; /* Max no. of variable lists */
bool externs; /* Find external vars too */
bool child; /* Search child scopes */
};
struct line_finder {
struct line_range *lr; /* Target line range */
......
#include <slang.h>
#include "libslang.h"
#include <linux/compiler.h>
#include <linux/list.h>
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册