diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 9560d0fc6fa6cf4f0c0653a7b58e26dffe33d3a7..5801a14f7524315a7318fe5a0f60509704fdb756 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -135,6 +135,7 @@ static int x86_pmu_extra_regs(u64 config, struct perf_event *event) } static atomic_t active_events; +static atomic_t pmc_refcount; static DEFINE_MUTEX(pmc_reserve_mutex); #ifdef CONFIG_X86_LOCAL_APIC @@ -270,11 +271,8 @@ static bool check_hw_exists(void) static void hw_perf_event_destroy(struct perf_event *event) { - if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) { - release_pmc_hardware(); - release_ds_buffers(); - mutex_unlock(&pmc_reserve_mutex); - } + x86_release_hardware(); + atomic_dec(&active_events); } void hw_perf_lbr_event_destroy(struct perf_event *event) @@ -324,6 +322,35 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event) return x86_pmu_extra_regs(val, event); } +int x86_reserve_hardware(void) +{ + int err = 0; + + if (!atomic_inc_not_zero(&pmc_refcount)) { + mutex_lock(&pmc_reserve_mutex); + if (atomic_read(&pmc_refcount) == 0) { + if (!reserve_pmc_hardware()) + err = -EBUSY; + else + reserve_ds_buffers(); + } + if (!err) + atomic_inc(&pmc_refcount); + mutex_unlock(&pmc_reserve_mutex); + } + + return err; +} + +void x86_release_hardware(void) +{ + if (atomic_dec_and_mutex_lock(&pmc_refcount, &pmc_reserve_mutex)) { + release_pmc_hardware(); + release_ds_buffers(); + mutex_unlock(&pmc_reserve_mutex); + } +} + /* * Check if we can create event of a certain type (that no conflicting events * are present). @@ -336,21 +363,34 @@ int x86_add_exclusive(unsigned int what) return 0; mutex_lock(&pmc_reserve_mutex); - for (i = 0; i < ARRAY_SIZE(x86_pmu.lbr_exclusive); i++) + for (i = 0; i < ARRAY_SIZE(x86_pmu.lbr_exclusive); i++) { if (i != what && atomic_read(&x86_pmu.lbr_exclusive[i])) goto out; + } atomic_inc(&x86_pmu.lbr_exclusive[what]); ret = 0; out: mutex_unlock(&pmc_reserve_mutex); + + /* + * Assuming that all exclusive events will share the PMI handler + * (which checks active_events for whether there is work to do), + * we can bump active_events counter right here, except for + * x86_lbr_exclusive_lbr events that go through x86_pmu_event_init() + * path, which already bumps active_events for them. + */ + if (!ret && what != x86_lbr_exclusive_lbr) + atomic_inc(&active_events); + return ret; } void x86_del_exclusive(unsigned int what) { atomic_dec(&x86_pmu.lbr_exclusive[what]); + atomic_dec(&active_events); } int x86_setup_perfctr(struct perf_event *event) @@ -527,22 +567,11 @@ static int __x86_pmu_event_init(struct perf_event *event) if (!x86_pmu_initialized()) return -ENODEV; - err = 0; - if (!atomic_inc_not_zero(&active_events)) { - mutex_lock(&pmc_reserve_mutex); - if (atomic_read(&active_events) == 0) { - if (!reserve_pmc_hardware()) - err = -EBUSY; - else - reserve_ds_buffers(); - } - if (!err) - atomic_inc(&active_events); - mutex_unlock(&pmc_reserve_mutex); - } + err = x86_reserve_hardware(); if (err) return err; + atomic_inc(&active_events); event->destroy = hw_perf_event_destroy; event->hw.idx = -1; @@ -1415,6 +1444,10 @@ perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs) u64 finish_clock; int ret; + /* + * All PMUs/events that share this PMI handler should make sure to + * increment active_events for their events. + */ if (!atomic_read(&active_events)) return NMI_DONE; diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 74089bcb6d740f4b066b5e5de541d99716e3d824..3e7fd27dfe201718860185be3fe5eeb3c028ed5c 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -716,6 +716,10 @@ int x86_add_exclusive(unsigned int what); void x86_del_exclusive(unsigned int what); +int x86_reserve_hardware(void); + +void x86_release_hardware(void); + void hw_perf_lbr_event_destroy(struct perf_event *event); int x86_setup_perfctr(struct perf_event *event); diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index d455e2a61287d1d082c8a2f85ea07d00cd83627a..19980d9a6cc96cc80e3144775bd90bde652477d7 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -3227,6 +3227,8 @@ __init int intel_pmu_init(void) case 61: /* 14nm Broadwell Core-M */ case 86: /* 14nm Broadwell Xeon D */ + case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */ + case 79: /* 14nm Broadwell Server */ x86_pmu.late_ack = true; memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); @@ -3296,13 +3298,13 @@ __init int intel_pmu_init(void) * counter, so do not extend mask to generic counters */ for_each_event_constraint(c, x86_pmu.event_constraints) { - if (c->cmask != FIXED_EVENT_FLAGS - || c->idxmsk64 == INTEL_PMC_MSK_FIXED_REF_CYCLES) { - continue; + if (c->cmask == FIXED_EVENT_FLAGS + && c->idxmsk64 != INTEL_PMC_MSK_FIXED_REF_CYCLES) { + c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1; } - - c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1; - c->weight += x86_pmu.num_counters; + c->idxmsk64 &= + ~(~0UL << (INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed)); + c->weight = hweight64(c->idxmsk64); } } diff --git a/arch/x86/kernel/cpu/perf_event_intel_bts.c b/arch/x86/kernel/cpu/perf_event_intel_bts.c index ac1f0c55f3796e17bdaddc5f946a41509890e446..7795f3f8b1d57198469ded20ac9a1244035428e8 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_bts.c +++ b/arch/x86/kernel/cpu/perf_event_intel_bts.c @@ -483,17 +483,26 @@ static int bts_event_add(struct perf_event *event, int mode) static void bts_event_destroy(struct perf_event *event) { + x86_release_hardware(); x86_del_exclusive(x86_lbr_exclusive_bts); } static int bts_event_init(struct perf_event *event) { + int ret; + if (event->attr.type != bts_pmu.type) return -ENOENT; if (x86_add_exclusive(x86_lbr_exclusive_bts)) return -EBUSY; + ret = x86_reserve_hardware(); + if (ret) { + x86_del_exclusive(x86_lbr_exclusive_bts); + return ret; + } + event->destroy = bts_event_destroy; return 0; diff --git a/kernel/events/core.c b/kernel/events/core.c index 9e0773d5d110309d8ebd7ce444ec688028402a77..f2003b97ddc99d726cf5cc145b134b128671f17c 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -4331,20 +4331,20 @@ static void ring_buffer_attach(struct perf_event *event, WARN_ON_ONCE(event->rcu_pending); old_rb = event->rb; - event->rcu_batches = get_state_synchronize_rcu(); - event->rcu_pending = 1; - spin_lock_irqsave(&old_rb->event_lock, flags); list_del_rcu(&event->rb_entry); spin_unlock_irqrestore(&old_rb->event_lock, flags); - } - if (event->rcu_pending && rb) { - cond_synchronize_rcu(event->rcu_batches); - event->rcu_pending = 0; + event->rcu_batches = get_state_synchronize_rcu(); + event->rcu_pending = 1; } if (rb) { + if (event->rcu_pending) { + cond_synchronize_rcu(event->rcu_batches); + event->rcu_pending = 0; + } + spin_lock_irqsave(&rb->event_lock, flags); list_add_rcu(&event->rb_entry, &rb->event_list); spin_unlock_irqrestore(&rb->event_lock, flags); diff --git a/tools/build/Makefile.build b/tools/build/Makefile.build index 69c35cf09cad25d7a711388af6e1ca449cfffe77..a51244a8022f91c26591b9553d16e0020c9f9d12 100644 --- a/tools/build/Makefile.build +++ b/tools/build/Makefile.build @@ -94,12 +94,12 @@ obj-y := $(patsubst %/, %/$(obj)-in.o, $(obj-y)) subdir-obj-y := $(filter %/$(obj)-in.o, $(obj-y)) # '$(OUTPUT)/dir' prefix to all objects -prefix := $(subst ./,,$(OUTPUT)$(dir)/) -obj-y := $(addprefix $(prefix),$(obj-y)) -subdir-obj-y := $(addprefix $(prefix),$(subdir-obj-y)) +objprefix := $(subst ./,,$(OUTPUT)$(dir)/) +obj-y := $(addprefix $(objprefix),$(obj-y)) +subdir-obj-y := $(addprefix $(objprefix),$(subdir-obj-y)) # Final '$(obj)-in.o' object -in-target := $(prefix)$(obj)-in.o +in-target := $(objprefix)$(obj)-in.o PHONY += $(subdir-y)