提交 cc1790cf 编写于 作者: P Peter Zijlstra 提交者: Ingo Molnar

perf/x86: Improve HT workaround GP counter constraint

The (SNB/IVB/HSW) HT bug only affects events that can be programmed
onto GP counters, therefore we should only limit the number of GP
counters that can be used per cpu -- iow we should not constrain the
FP counters.

Furthermore, we should only enfore such a limit when there are in fact
exclusive events being scheduled on either sibling.
Reported-by: NVince Weaver <vincent.weaver@maine.edu>
Signed-off-by: NPeter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
[ Fixed build fail for the !CONFIG_CPU_SUP_INTEL case. ]
Signed-off-by: NIngo Molnar <mingo@kernel.org>
上级 b371b594
...@@ -611,6 +611,7 @@ struct sched_state { ...@@ -611,6 +611,7 @@ struct sched_state {
int event; /* event index */ int event; /* event index */
int counter; /* counter index */ int counter; /* counter index */
int unassigned; /* number of events to be assigned left */ int unassigned; /* number of events to be assigned left */
int nr_gp; /* number of GP counters used */
unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
}; };
...@@ -620,9 +621,10 @@ struct sched_state { ...@@ -620,9 +621,10 @@ struct sched_state {
struct perf_sched { struct perf_sched {
int max_weight; int max_weight;
int max_events; int max_events;
int max_gp;
int saved_states;
struct event_constraint **constraints; struct event_constraint **constraints;
struct sched_state state; struct sched_state state;
int saved_states;
struct sched_state saved[SCHED_STATES_MAX]; struct sched_state saved[SCHED_STATES_MAX];
}; };
...@@ -630,13 +632,14 @@ struct perf_sched { ...@@ -630,13 +632,14 @@ struct perf_sched {
* Initialize interator that runs through all events and counters. * Initialize interator that runs through all events and counters.
*/ */
static void perf_sched_init(struct perf_sched *sched, struct event_constraint **constraints, static void perf_sched_init(struct perf_sched *sched, struct event_constraint **constraints,
int num, int wmin, int wmax) int num, int wmin, int wmax, int gpmax)
{ {
int idx; int idx;
memset(sched, 0, sizeof(*sched)); memset(sched, 0, sizeof(*sched));
sched->max_events = num; sched->max_events = num;
sched->max_weight = wmax; sched->max_weight = wmax;
sched->max_gp = gpmax;
sched->constraints = constraints; sched->constraints = constraints;
for (idx = 0; idx < num; idx++) { for (idx = 0; idx < num; idx++) {
...@@ -696,12 +699,17 @@ static bool __perf_sched_find_counter(struct perf_sched *sched) ...@@ -696,12 +699,17 @@ static bool __perf_sched_find_counter(struct perf_sched *sched)
goto done; goto done;
} }
} }
/* Grab the first unused counter starting with idx */ /* Grab the first unused counter starting with idx */
idx = sched->state.counter; idx = sched->state.counter;
for_each_set_bit_from(idx, c->idxmsk, INTEL_PMC_IDX_FIXED) { for_each_set_bit_from(idx, c->idxmsk, INTEL_PMC_IDX_FIXED) {
if (!__test_and_set_bit(idx, sched->state.used)) if (!__test_and_set_bit(idx, sched->state.used)) {
if (sched->state.nr_gp++ >= sched->max_gp)
return false;
goto done; goto done;
} }
}
return false; return false;
...@@ -757,11 +765,11 @@ static bool perf_sched_next_event(struct perf_sched *sched) ...@@ -757,11 +765,11 @@ static bool perf_sched_next_event(struct perf_sched *sched)
* Assign a counter for each event. * Assign a counter for each event.
*/ */
int perf_assign_events(struct event_constraint **constraints, int n, int perf_assign_events(struct event_constraint **constraints, int n,
int wmin, int wmax, int *assign) int wmin, int wmax, int gpmax, int *assign)
{ {
struct perf_sched sched; struct perf_sched sched;
perf_sched_init(&sched, constraints, n, wmin, wmax); perf_sched_init(&sched, constraints, n, wmin, wmax, gpmax);
do { do {
if (!perf_sched_find_counter(&sched)) if (!perf_sched_find_counter(&sched))
...@@ -822,8 +830,24 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) ...@@ -822,8 +830,24 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
/* slow path */ /* slow path */
if (i != n) { if (i != n) {
int gpmax = x86_pmu.num_counters;
/*
* Do not allow scheduling of more than half the available
* generic counters.
*
* This helps avoid counter starvation of sibling thread by
* ensuring at most half the counters cannot be in exclusive
* mode. There is no designated counters for the limits. Any
* N/2 counters can be used. This helps with events with
* specific counter constraints.
*/
if (is_ht_workaround_enabled() && !cpuc->is_fake &&
READ_ONCE(cpuc->excl_cntrs->exclusive_present))
gpmax /= 2;
unsched = perf_assign_events(cpuc->event_constraint, n, wmin, unsched = perf_assign_events(cpuc->event_constraint, n, wmin,
wmax, assign); wmax, gpmax, assign);
} }
/* /*
......
...@@ -74,6 +74,7 @@ struct event_constraint { ...@@ -74,6 +74,7 @@ struct event_constraint {
#define PERF_X86_EVENT_EXCL 0x0040 /* HT exclusivity on counter */ #define PERF_X86_EVENT_EXCL 0x0040 /* HT exclusivity on counter */
#define PERF_X86_EVENT_DYNAMIC 0x0080 /* dynamic alloc'd constraint */ #define PERF_X86_EVENT_DYNAMIC 0x0080 /* dynamic alloc'd constraint */
#define PERF_X86_EVENT_RDPMC_ALLOWED 0x0100 /* grant rdpmc permission */ #define PERF_X86_EVENT_RDPMC_ALLOWED 0x0100 /* grant rdpmc permission */
#define PERF_X86_EVENT_EXCL_ACCT 0x0200 /* accounted EXCL event */
struct amd_nb { struct amd_nb {
...@@ -134,8 +135,6 @@ enum intel_excl_state_type { ...@@ -134,8 +135,6 @@ enum intel_excl_state_type {
struct intel_excl_states { struct intel_excl_states {
enum intel_excl_state_type init_state[X86_PMC_IDX_MAX]; enum intel_excl_state_type init_state[X86_PMC_IDX_MAX];
enum intel_excl_state_type state[X86_PMC_IDX_MAX]; enum intel_excl_state_type state[X86_PMC_IDX_MAX];
int num_alloc_cntrs;/* #counters allocated */
int max_alloc_cntrs;/* max #counters allowed */
bool sched_started; /* true if scheduling has started */ bool sched_started; /* true if scheduling has started */
}; };
...@@ -144,6 +143,11 @@ struct intel_excl_cntrs { ...@@ -144,6 +143,11 @@ struct intel_excl_cntrs {
struct intel_excl_states states[2]; struct intel_excl_states states[2];
union {
u16 has_exclusive[2];
u32 exclusive_present;
};
int refcnt; /* per-core: #HT threads */ int refcnt; /* per-core: #HT threads */
unsigned core_id; /* per-core: core id */ unsigned core_id; /* per-core: core id */
}; };
...@@ -176,6 +180,7 @@ struct cpu_hw_events { ...@@ -176,6 +180,7 @@ struct cpu_hw_events {
struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */
struct event_constraint *event_constraint[X86_PMC_IDX_MAX]; struct event_constraint *event_constraint[X86_PMC_IDX_MAX];
int n_excl; /* the number of exclusive events */
unsigned int group_flag; unsigned int group_flag;
int is_fake; int is_fake;
...@@ -719,7 +724,7 @@ static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, ...@@ -719,7 +724,7 @@ static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
void x86_pmu_enable_all(int added); void x86_pmu_enable_all(int added);
int perf_assign_events(struct event_constraint **constraints, int n, int perf_assign_events(struct event_constraint **constraints, int n,
int wmin, int wmax, int *assign); int wmin, int wmax, int gpmax, int *assign);
int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign); int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign);
void x86_pmu_stop(struct perf_event *event, int flags); void x86_pmu_stop(struct perf_event *event, int flags);
...@@ -930,4 +935,8 @@ static inline struct intel_shared_regs *allocate_shared_regs(int cpu) ...@@ -930,4 +935,8 @@ static inline struct intel_shared_regs *allocate_shared_regs(int cpu)
return NULL; return NULL;
} }
static inline int is_ht_workaround_enabled(void)
{
return 0;
}
#endif /* CONFIG_CPU_SUP_INTEL */ #endif /* CONFIG_CPU_SUP_INTEL */
...@@ -1923,7 +1923,6 @@ intel_start_scheduling(struct cpu_hw_events *cpuc) ...@@ -1923,7 +1923,6 @@ intel_start_scheduling(struct cpu_hw_events *cpuc)
xl = &excl_cntrs->states[tid]; xl = &excl_cntrs->states[tid];
xl->sched_started = true; xl->sched_started = true;
xl->num_alloc_cntrs = 0;
/* /*
* lock shared state until we are done scheduling * lock shared state until we are done scheduling
* in stop_event_scheduling() * in stop_event_scheduling()
...@@ -2000,6 +1999,11 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event, ...@@ -2000,6 +1999,11 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
* across HT threads * across HT threads
*/ */
is_excl = c->flags & PERF_X86_EVENT_EXCL; is_excl = c->flags & PERF_X86_EVENT_EXCL;
if (is_excl && !(event->hw.flags & PERF_X86_EVENT_EXCL_ACCT)) {
event->hw.flags |= PERF_X86_EVENT_EXCL_ACCT;
if (!cpuc->n_excl++)
WRITE_ONCE(excl_cntrs->has_exclusive[tid], 1);
}
/* /*
* xl = state of current HT * xl = state of current HT
...@@ -2008,18 +2012,6 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event, ...@@ -2008,18 +2012,6 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
xl = &excl_cntrs->states[tid]; xl = &excl_cntrs->states[tid];
xlo = &excl_cntrs->states[o_tid]; xlo = &excl_cntrs->states[o_tid];
/*
* do not allow scheduling of more than max_alloc_cntrs
* which is set to half the available generic counters.
* this helps avoid counter starvation of sibling thread
* by ensuring at most half the counters cannot be in
* exclusive mode. There is not designated counters for the
* limits. Any N/2 counters can be used. This helps with
* events with specifix counter constraints
*/
if (xl->num_alloc_cntrs++ == xl->max_alloc_cntrs)
return &emptyconstraint;
cx = c; cx = c;
/* /*
...@@ -2150,6 +2142,11 @@ static void intel_put_excl_constraints(struct cpu_hw_events *cpuc, ...@@ -2150,6 +2142,11 @@ static void intel_put_excl_constraints(struct cpu_hw_events *cpuc,
xl = &excl_cntrs->states[tid]; xl = &excl_cntrs->states[tid];
xlo = &excl_cntrs->states[o_tid]; xlo = &excl_cntrs->states[o_tid];
if (hwc->flags & PERF_X86_EVENT_EXCL_ACCT) {
hwc->flags &= ~PERF_X86_EVENT_EXCL_ACCT;
if (!--cpuc->n_excl)
WRITE_ONCE(excl_cntrs->has_exclusive[tid], 0);
}
/* /*
* put_constraint may be called from x86_schedule_events() * put_constraint may be called from x86_schedule_events()
...@@ -2632,8 +2629,6 @@ static void intel_pmu_cpu_starting(int cpu) ...@@ -2632,8 +2629,6 @@ static void intel_pmu_cpu_starting(int cpu)
cpuc->lbr_sel = &cpuc->shared_regs->regs[EXTRA_REG_LBR]; cpuc->lbr_sel = &cpuc->shared_regs->regs[EXTRA_REG_LBR];
if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) { if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) {
int h = x86_pmu.num_counters >> 1;
for_each_cpu(i, topology_thread_cpumask(cpu)) { for_each_cpu(i, topology_thread_cpumask(cpu)) {
struct intel_excl_cntrs *c; struct intel_excl_cntrs *c;
...@@ -2647,11 +2642,6 @@ static void intel_pmu_cpu_starting(int cpu) ...@@ -2647,11 +2642,6 @@ static void intel_pmu_cpu_starting(int cpu)
} }
cpuc->excl_cntrs->core_id = core_id; cpuc->excl_cntrs->core_id = core_id;
cpuc->excl_cntrs->refcnt++; cpuc->excl_cntrs->refcnt++;
/*
* set hard limit to half the number of generic counters
*/
cpuc->excl_cntrs->states[0].max_alloc_cntrs = h;
cpuc->excl_cntrs->states[1].max_alloc_cntrs = h;
} }
} }
......
...@@ -395,7 +395,7 @@ static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int ...@@ -395,7 +395,7 @@ static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int
/* slow path */ /* slow path */
if (i != n) if (i != n)
ret = perf_assign_events(box->event_constraint, n, ret = perf_assign_events(box->event_constraint, n,
wmin, wmax, assign); wmin, wmax, n, assign);
if (!assign || ret) { if (!assign || ret) {
for (i = 0; i < n; i++) for (i = 0; i < n; i++)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册