提交 4c3b73c6 编写于 作者: L Linus Torvalds

Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull perf fixes from Ingo Molnar:
 "Misc kernel side fixes:

   - fix event leak
   - fix AMD PMU driver bug
   - fix core event handling bug
   - fix build bug on certain randconfigs

  Plus misc tooling fixes"

* 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  perf/x86/amd/ibs: Fix pmu::stop() nesting
  perf/core: Don't leak event in the syscall error path
  perf/core: Fix time tracking bug with multiplexing
  perf jit: genelf makes assumptions about endian
  perf hists: Fix determination of a callchain node's childlessness
  perf tools: Add missing initialization of perf_sample.cpumode in synthesized samples
  perf tools: Fix build break on powerpc
  perf/x86: Move events_sysfs_show() outside CPU_SUP_INTEL
  perf bench: Fix detached tarball building due to missing 'perf bench memcpy' headers
  perf tests: Fix tarpkg build test error output redirection
...@@ -28,10 +28,46 @@ static u32 ibs_caps; ...@@ -28,10 +28,46 @@ static u32 ibs_caps;
#define IBS_FETCH_CONFIG_MASK (IBS_FETCH_RAND_EN | IBS_FETCH_MAX_CNT) #define IBS_FETCH_CONFIG_MASK (IBS_FETCH_RAND_EN | IBS_FETCH_MAX_CNT)
#define IBS_OP_CONFIG_MASK IBS_OP_MAX_CNT #define IBS_OP_CONFIG_MASK IBS_OP_MAX_CNT
/*
* IBS states:
*
* ENABLED; tracks the pmu::add(), pmu::del() state, when set the counter is taken
* and any further add()s must fail.
*
* STARTED/STOPPING/STOPPED; deal with pmu::start(), pmu::stop() state but are
* complicated by the fact that the IBS hardware can send late NMIs (ie. after
* we've cleared the EN bit).
*
* In order to consume these late NMIs we have the STOPPED state, any NMI that
* happens after we've cleared the EN state will clear this bit and report the
* NMI handled (this is fundamentally racy in the face or multiple NMI sources,
* someone else can consume our BIT and our NMI will go unhandled).
*
* And since we cannot set/clear this separate bit together with the EN bit,
* there are races; if we cleared STARTED early, an NMI could land in
* between clearing STARTED and clearing the EN bit (in fact multiple NMIs
* could happen if the period is small enough), and consume our STOPPED bit
* and trigger streams of unhandled NMIs.
*
* If, however, we clear STARTED late, an NMI can hit between clearing the
* EN bit and clearing STARTED, still see STARTED set and process the event.
* If this event will have the VALID bit clear, we bail properly, but this
* is not a given. With VALID set we can end up calling pmu::stop() again
* (the throttle logic) and trigger the WARNs in there.
*
* So what we do is set STOPPING before clearing EN to avoid the pmu::stop()
* nesting, and clear STARTED late, so that we have a well defined state over
* the clearing of the EN bit.
*
* XXX: we could probably be using !atomic bitops for all this.
*/
enum ibs_states { enum ibs_states {
IBS_ENABLED = 0, IBS_ENABLED = 0,
IBS_STARTED = 1, IBS_STARTED = 1,
IBS_STOPPING = 2, IBS_STOPPING = 2,
IBS_STOPPED = 3,
IBS_MAX_STATES, IBS_MAX_STATES,
}; };
...@@ -377,9 +413,8 @@ static void perf_ibs_start(struct perf_event *event, int flags) ...@@ -377,9 +413,8 @@ static void perf_ibs_start(struct perf_event *event, int flags)
perf_ibs_set_period(perf_ibs, hwc, &period); perf_ibs_set_period(perf_ibs, hwc, &period);
/* /*
* Set STARTED before enabling the hardware, such that * Set STARTED before enabling the hardware, such that a subsequent NMI
* a subsequent NMI must observe it. Then clear STOPPING * must observe it.
* such that we don't consume NMIs by accident.
*/ */
set_bit(IBS_STARTED, pcpu->state); set_bit(IBS_STARTED, pcpu->state);
clear_bit(IBS_STOPPING, pcpu->state); clear_bit(IBS_STOPPING, pcpu->state);
...@@ -396,6 +431,9 @@ static void perf_ibs_stop(struct perf_event *event, int flags) ...@@ -396,6 +431,9 @@ static void perf_ibs_stop(struct perf_event *event, int flags)
u64 config; u64 config;
int stopping; int stopping;
if (test_and_set_bit(IBS_STOPPING, pcpu->state))
return;
stopping = test_bit(IBS_STARTED, pcpu->state); stopping = test_bit(IBS_STARTED, pcpu->state);
if (!stopping && (hwc->state & PERF_HES_UPTODATE)) if (!stopping && (hwc->state & PERF_HES_UPTODATE))
...@@ -405,12 +443,12 @@ static void perf_ibs_stop(struct perf_event *event, int flags) ...@@ -405,12 +443,12 @@ static void perf_ibs_stop(struct perf_event *event, int flags)
if (stopping) { if (stopping) {
/* /*
* Set STOPPING before disabling the hardware, such that it * Set STOPPED before disabling the hardware, such that it
* must be visible to NMIs the moment we clear the EN bit, * must be visible to NMIs the moment we clear the EN bit,
* at which point we can generate an !VALID sample which * at which point we can generate an !VALID sample which
* we need to consume. * we need to consume.
*/ */
set_bit(IBS_STOPPING, pcpu->state); set_bit(IBS_STOPPED, pcpu->state);
perf_ibs_disable_event(perf_ibs, hwc, config); perf_ibs_disable_event(perf_ibs, hwc, config);
/* /*
* Clear STARTED after disabling the hardware; if it were * Clear STARTED after disabling the hardware; if it were
...@@ -556,7 +594,7 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs) ...@@ -556,7 +594,7 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
* with samples that even have the valid bit cleared. * with samples that even have the valid bit cleared.
* Mark all this NMIs as handled. * Mark all this NMIs as handled.
*/ */
if (test_and_clear_bit(IBS_STOPPING, pcpu->state)) if (test_and_clear_bit(IBS_STOPPED, pcpu->state))
return 1; return 1;
return 0; return 0;
......
...@@ -800,6 +800,9 @@ ssize_t intel_event_sysfs_show(char *page, u64 config); ...@@ -800,6 +800,9 @@ ssize_t intel_event_sysfs_show(char *page, u64 config);
struct attribute **merge_attr(struct attribute **a, struct attribute **b); struct attribute **merge_attr(struct attribute **a, struct attribute **b);
ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr,
char *page);
#ifdef CONFIG_CPU_SUP_AMD #ifdef CONFIG_CPU_SUP_AMD
int amd_pmu_init(void); int amd_pmu_init(void);
...@@ -930,9 +933,6 @@ int p6_pmu_init(void); ...@@ -930,9 +933,6 @@ int p6_pmu_init(void);
int knc_pmu_init(void); int knc_pmu_init(void);
ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr,
char *page);
static inline int is_ht_workaround_enabled(void) static inline int is_ht_workaround_enabled(void)
{ {
return !!(x86_pmu.flags & PMU_FL_EXCL_ENABLED); return !!(x86_pmu.flags & PMU_FL_EXCL_ENABLED);
......
...@@ -2417,14 +2417,24 @@ static void ctx_sched_out(struct perf_event_context *ctx, ...@@ -2417,14 +2417,24 @@ static void ctx_sched_out(struct perf_event_context *ctx,
cpuctx->task_ctx = NULL; cpuctx->task_ctx = NULL;
} }
is_active ^= ctx->is_active; /* changed bits */ /*
* Always update time if it was set; not only when it changes.
* Otherwise we can 'forget' to update time for any but the last
* context we sched out. For example:
*
* ctx_sched_out(.event_type = EVENT_FLEXIBLE)
* ctx_sched_out(.event_type = EVENT_PINNED)
*
* would only update time for the pinned events.
*/
if (is_active & EVENT_TIME) { if (is_active & EVENT_TIME) {
/* update (and stop) ctx time */ /* update (and stop) ctx time */
update_context_time(ctx); update_context_time(ctx);
update_cgrp_time_from_cpuctx(cpuctx); update_cgrp_time_from_cpuctx(cpuctx);
} }
is_active ^= ctx->is_active; /* changed bits */
if (!ctx->nr_active || !(is_active & EVENT_ALL)) if (!ctx->nr_active || !(is_active & EVENT_ALL))
return; return;
...@@ -8532,6 +8542,7 @@ SYSCALL_DEFINE5(perf_event_open, ...@@ -8532,6 +8542,7 @@ SYSCALL_DEFINE5(perf_event_open,
f_flags); f_flags);
if (IS_ERR(event_file)) { if (IS_ERR(event_file)) {
err = PTR_ERR(event_file); err = PTR_ERR(event_file);
event_file = NULL;
goto err_context; goto err_context;
} }
......
...@@ -74,6 +74,7 @@ arch/*/include/uapi/asm/unistd*.h ...@@ -74,6 +74,7 @@ arch/*/include/uapi/asm/unistd*.h
arch/*/include/uapi/asm/perf_regs.h arch/*/include/uapi/asm/perf_regs.h
arch/*/lib/memcpy*.S arch/*/lib/memcpy*.S
arch/*/lib/memset*.S arch/*/lib/memset*.S
arch/*/include/asm/*features.h
include/linux/poison.h include/linux/poison.h
include/linux/hw_breakpoint.h include/linux/hw_breakpoint.h
include/uapi/linux/perf_event.h include/uapi/linux/perf_event.h
......
...@@ -4,6 +4,8 @@ ...@@ -4,6 +4,8 @@
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include <linux/stringify.h> #include <linux/stringify.h>
#include "header.h"
#include "util.h"
#define mfspr(rn) ({unsigned long rval; \ #define mfspr(rn) ({unsigned long rval; \
asm volatile("mfspr %0," __stringify(rn) \ asm volatile("mfspr %0," __stringify(rn) \
......
...@@ -15,7 +15,7 @@ TMP_DEST=$(mktemp -d) ...@@ -15,7 +15,7 @@ TMP_DEST=$(mktemp -d)
tar xf ${TARBALL} -C $TMP_DEST tar xf ${TARBALL} -C $TMP_DEST
rm -f ${TARBALL} rm -f ${TARBALL}
cd - > /dev/null cd - > /dev/null
make -C $TMP_DEST/perf*/tools/perf > /dev/null 2>&1 make -C $TMP_DEST/perf*/tools/perf > /dev/null
RC=$? RC=$?
rm -rf ${TMP_DEST} rm -rf ${TMP_DEST}
exit $RC exit $RC
...@@ -337,7 +337,7 @@ static void callchain_node__init_have_children(struct callchain_node *node, ...@@ -337,7 +337,7 @@ static void callchain_node__init_have_children(struct callchain_node *node,
chain = list_entry(node->val.next, struct callchain_list, list); chain = list_entry(node->val.next, struct callchain_list, list);
chain->has_children = has_sibling; chain->has_children = has_sibling;
if (node->val.next != node->val.prev) { if (!list_empty(&node->val)) {
chain = list_entry(node->val.prev, struct callchain_list, list); chain = list_entry(node->val.prev, struct callchain_list, list);
chain->has_children = !RB_EMPTY_ROOT(&node->rb_root); chain->has_children = !RB_EMPTY_ROOT(&node->rb_root);
} }
......
...@@ -56,13 +56,22 @@ const char *perf_event__name(unsigned int id) ...@@ -56,13 +56,22 @@ const char *perf_event__name(unsigned int id)
return perf_event__names[id]; return perf_event__names[id];
} }
static struct perf_sample synth_sample = { static int perf_tool__process_synth_event(struct perf_tool *tool,
union perf_event *event,
struct machine *machine,
perf_event__handler_t process)
{
struct perf_sample synth_sample = {
.pid = -1, .pid = -1,
.tid = -1, .tid = -1,
.time = -1, .time = -1,
.stream_id = -1, .stream_id = -1,
.cpu = -1, .cpu = -1,
.period = 1, .period = 1,
.cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK,
};
return process(tool, event, &synth_sample, machine);
}; };
/* /*
...@@ -186,7 +195,7 @@ pid_t perf_event__synthesize_comm(struct perf_tool *tool, ...@@ -186,7 +195,7 @@ pid_t perf_event__synthesize_comm(struct perf_tool *tool,
if (perf_event__prepare_comm(event, pid, machine, &tgid, &ppid) != 0) if (perf_event__prepare_comm(event, pid, machine, &tgid, &ppid) != 0)
return -1; return -1;
if (process(tool, event, &synth_sample, machine) != 0) if (perf_tool__process_synth_event(tool, event, machine, process) != 0)
return -1; return -1;
return tgid; return tgid;
...@@ -218,7 +227,7 @@ static int perf_event__synthesize_fork(struct perf_tool *tool, ...@@ -218,7 +227,7 @@ static int perf_event__synthesize_fork(struct perf_tool *tool,
event->fork.header.size = (sizeof(event->fork) + machine->id_hdr_size); event->fork.header.size = (sizeof(event->fork) + machine->id_hdr_size);
if (process(tool, event, &synth_sample, machine) != 0) if (perf_tool__process_synth_event(tool, event, machine, process) != 0)
return -1; return -1;
return 0; return 0;
...@@ -344,7 +353,7 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool, ...@@ -344,7 +353,7 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
event->mmap2.pid = tgid; event->mmap2.pid = tgid;
event->mmap2.tid = pid; event->mmap2.tid = pid;
if (process(tool, event, &synth_sample, machine) != 0) { if (perf_tool__process_synth_event(tool, event, machine, process) != 0) {
rc = -1; rc = -1;
break; break;
} }
...@@ -402,7 +411,7 @@ int perf_event__synthesize_modules(struct perf_tool *tool, ...@@ -402,7 +411,7 @@ int perf_event__synthesize_modules(struct perf_tool *tool,
memcpy(event->mmap.filename, pos->dso->long_name, memcpy(event->mmap.filename, pos->dso->long_name,
pos->dso->long_name_len + 1); pos->dso->long_name_len + 1);
if (process(tool, event, &synth_sample, machine) != 0) { if (perf_tool__process_synth_event(tool, event, machine, process) != 0) {
rc = -1; rc = -1;
break; break;
} }
...@@ -472,7 +481,7 @@ static int __event__synthesize_thread(union perf_event *comm_event, ...@@ -472,7 +481,7 @@ static int __event__synthesize_thread(union perf_event *comm_event,
/* /*
* Send the prepared comm event * Send the prepared comm event
*/ */
if (process(tool, comm_event, &synth_sample, machine) != 0) if (perf_tool__process_synth_event(tool, comm_event, machine, process) != 0)
break; break;
rc = 0; rc = 0;
...@@ -701,7 +710,7 @@ int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, ...@@ -701,7 +710,7 @@ int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
event->mmap.len = map->end - event->mmap.start; event->mmap.len = map->end - event->mmap.start;
event->mmap.pid = machine->pid; event->mmap.pid = machine->pid;
err = process(tool, event, &synth_sample, machine); err = perf_tool__process_synth_event(tool, event, machine, process);
free(event); free(event);
return err; return err;
......
...@@ -9,36 +9,32 @@ int jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_ent ...@@ -9,36 +9,32 @@ int jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_ent
#if defined(__arm__) #if defined(__arm__)
#define GEN_ELF_ARCH EM_ARM #define GEN_ELF_ARCH EM_ARM
#define GEN_ELF_ENDIAN ELFDATA2LSB
#define GEN_ELF_CLASS ELFCLASS32 #define GEN_ELF_CLASS ELFCLASS32
#elif defined(__aarch64__) #elif defined(__aarch64__)
#define GEN_ELF_ARCH EM_AARCH64 #define GEN_ELF_ARCH EM_AARCH64
#define GEN_ELF_ENDIAN ELFDATA2LSB
#define GEN_ELF_CLASS ELFCLASS64 #define GEN_ELF_CLASS ELFCLASS64
#elif defined(__x86_64__) #elif defined(__x86_64__)
#define GEN_ELF_ARCH EM_X86_64 #define GEN_ELF_ARCH EM_X86_64
#define GEN_ELF_ENDIAN ELFDATA2LSB
#define GEN_ELF_CLASS ELFCLASS64 #define GEN_ELF_CLASS ELFCLASS64
#elif defined(__i386__) #elif defined(__i386__)
#define GEN_ELF_ARCH EM_386 #define GEN_ELF_ARCH EM_386
#define GEN_ELF_ENDIAN ELFDATA2LSB
#define GEN_ELF_CLASS ELFCLASS32 #define GEN_ELF_CLASS ELFCLASS32
#elif defined(__ppcle__) #elif defined(__powerpc64__)
#define GEN_ELF_ARCH EM_PPC
#define GEN_ELF_ENDIAN ELFDATA2LSB
#define GEN_ELF_CLASS ELFCLASS64
#elif defined(__powerpc__)
#define GEN_ELF_ARCH EM_PPC64
#define GEN_ELF_ENDIAN ELFDATA2MSB
#define GEN_ELF_CLASS ELFCLASS64
#elif defined(__powerpcle__)
#define GEN_ELF_ARCH EM_PPC64 #define GEN_ELF_ARCH EM_PPC64
#define GEN_ELF_ENDIAN ELFDATA2LSB
#define GEN_ELF_CLASS ELFCLASS64 #define GEN_ELF_CLASS ELFCLASS64
#elif defined(__powerpc__)
#define GEN_ELF_ARCH EM_PPC
#define GEN_ELF_CLASS ELFCLASS32
#else #else
#error "unsupported architecture" #error "unsupported architecture"
#endif #endif
#if __BYTE_ORDER == __BIG_ENDIAN
#define GEN_ELF_ENDIAN ELFDATA2MSB
#else
#define GEN_ELF_ENDIAN ELFDATA2LSB
#endif
#if GEN_ELF_CLASS == ELFCLASS64 #if GEN_ELF_CLASS == ELFCLASS64
#define elf_newehdr elf64_newehdr #define elf_newehdr elf64_newehdr
#define elf_getshdr elf64_getshdr #define elf_getshdr elf64_getshdr
......
...@@ -279,6 +279,7 @@ static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq, ...@@ -279,6 +279,7 @@ static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq,
event.sample.header.misc = PERF_RECORD_MISC_USER; event.sample.header.misc = PERF_RECORD_MISC_USER;
event.sample.header.size = sizeof(struct perf_event_header); event.sample.header.size = sizeof(struct perf_event_header);
sample.cpumode = PERF_RECORD_MISC_USER;
sample.ip = le64_to_cpu(branch->from); sample.ip = le64_to_cpu(branch->from);
sample.pid = btsq->pid; sample.pid = btsq->pid;
sample.tid = btsq->tid; sample.tid = btsq->tid;
......
...@@ -979,6 +979,7 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) ...@@ -979,6 +979,7 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
if (!pt->timeless_decoding) if (!pt->timeless_decoding)
sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc); sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
sample.cpumode = PERF_RECORD_MISC_USER;
sample.ip = ptq->state->from_ip; sample.ip = ptq->state->from_ip;
sample.pid = ptq->pid; sample.pid = ptq->pid;
sample.tid = ptq->tid; sample.tid = ptq->tid;
...@@ -1035,6 +1036,7 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq) ...@@ -1035,6 +1036,7 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
if (!pt->timeless_decoding) if (!pt->timeless_decoding)
sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc); sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
sample.cpumode = PERF_RECORD_MISC_USER;
sample.ip = ptq->state->from_ip; sample.ip = ptq->state->from_ip;
sample.pid = ptq->pid; sample.pid = ptq->pid;
sample.tid = ptq->tid; sample.tid = ptq->tid;
...@@ -1092,6 +1094,7 @@ static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq) ...@@ -1092,6 +1094,7 @@ static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq)
if (!pt->timeless_decoding) if (!pt->timeless_decoding)
sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc); sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
sample.cpumode = PERF_RECORD_MISC_USER;
sample.ip = ptq->state->from_ip; sample.ip = ptq->state->from_ip;
sample.pid = ptq->pid; sample.pid = ptq->pid;
sample.tid = ptq->tid; sample.tid = ptq->tid;
......
...@@ -417,6 +417,7 @@ static int jit_repipe_code_load(struct jit_buf_desc *jd, union jr_entry *jr) ...@@ -417,6 +417,7 @@ static int jit_repipe_code_load(struct jit_buf_desc *jd, union jr_entry *jr)
* use first address as sample address * use first address as sample address
*/ */
memset(&sample, 0, sizeof(sample)); memset(&sample, 0, sizeof(sample));
sample.cpumode = PERF_RECORD_MISC_USER;
sample.pid = pid; sample.pid = pid;
sample.tid = tid; sample.tid = tid;
sample.time = id->time; sample.time = id->time;
...@@ -505,6 +506,7 @@ static int jit_repipe_code_move(struct jit_buf_desc *jd, union jr_entry *jr) ...@@ -505,6 +506,7 @@ static int jit_repipe_code_move(struct jit_buf_desc *jd, union jr_entry *jr)
* use first address as sample address * use first address as sample address
*/ */
memset(&sample, 0, sizeof(sample)); memset(&sample, 0, sizeof(sample));
sample.cpumode = PERF_RECORD_MISC_USER;
sample.pid = pid; sample.pid = pid;
sample.tid = tid; sample.tid = tid;
sample.time = id->time; sample.time = id->time;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册