From 235c7fc7c500e4fd1700c4ad01b5612bcdc1b449 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 21 Dec 2008 14:43:25 +0100 Subject: [PATCH] perfcounters: generalize the counter scheduler Impact: clean up and refactor code refactor the counter scheduler: separate out in/out functions and introduce a counter-rotation function as well. Signed-off-by: Ingo Molnar --- kernel/perf_counter.c | 220 +++++++++++++++++++++++++++--------------- 1 file changed, 142 insertions(+), 78 deletions(-) diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 48e1dbcdc1cd..d7a79f321b1c 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -111,11 +111,12 @@ static void __perf_counter_remove_from_context(void *info) spin_lock(&ctx->lock); if (counter->state == PERF_COUNTER_STATE_ACTIVE) { - counter->hw_ops->disable(counter); counter->state = PERF_COUNTER_STATE_INACTIVE; + counter->hw_ops->disable(counter); ctx->nr_active--; cpuctx->active_oncpu--; counter->task = NULL; + counter->oncpu = -1; } ctx->nr_counters--; @@ -192,8 +193,36 @@ static void perf_counter_remove_from_context(struct perf_counter *counter) spin_unlock_irq(&ctx->lock); } +static int +counter_sched_in(struct perf_counter *counter, + struct perf_cpu_context *cpuctx, + struct perf_counter_context *ctx, + int cpu) +{ + if (counter->state == PERF_COUNTER_STATE_OFF) + return 0; + + counter->state = PERF_COUNTER_STATE_ACTIVE; + counter->oncpu = cpu; /* TODO: put 'cpu' into cpuctx->cpu */ + /* + * The new state must be visible before we turn it on in the hardware: + */ + smp_wmb(); + + if (counter->hw_ops->enable(counter)) { + counter->state = PERF_COUNTER_STATE_INACTIVE; + counter->oncpu = -1; + return -EAGAIN; + } + + cpuctx->active_oncpu++; + ctx->nr_active++; + + return 0; +} + /* - * Cross CPU call to install and enable a preformance counter + * Cross CPU call to install and enable a performance counter */ static void __perf_install_in_context(void *info) { @@ -220,22 +249,17 @@ static void __perf_install_in_context(void *info) * counters on a global level. NOP for non NMI based counters. */ perf_flags = hw_perf_save_disable(); - list_add_counter(counter, ctx); - hw_perf_restore(perf_flags); + list_add_counter(counter, ctx); ctx->nr_counters++; - if (cpuctx->active_oncpu < perf_max_counters) { - counter->state = PERF_COUNTER_STATE_ACTIVE; - counter->oncpu = cpu; - ctx->nr_active++; - cpuctx->active_oncpu++; - counter->hw_ops->enable(counter); - } + counter_sched_in(counter, cpuctx, ctx, cpu); if (!ctx->task && cpuctx->max_pertask) cpuctx->max_pertask--; + hw_perf_restore(perf_flags); + spin_unlock(&ctx->lock); curr_rq_unlock_irq_restore(&flags); } @@ -302,8 +326,8 @@ counter_sched_out(struct perf_counter *counter, if (counter->state != PERF_COUNTER_STATE_ACTIVE) return; - counter->hw_ops->disable(counter); counter->state = PERF_COUNTER_STATE_INACTIVE; + counter->hw_ops->disable(counter); counter->oncpu = -1; cpuctx->active_oncpu--; @@ -326,6 +350,22 @@ group_sched_out(struct perf_counter *group_counter, counter_sched_out(counter, cpuctx, ctx); } +void __perf_counter_sched_out(struct perf_counter_context *ctx, + struct perf_cpu_context *cpuctx) +{ + struct perf_counter *counter; + + if (likely(!ctx->nr_counters)) + return; + + spin_lock(&ctx->lock); + if (ctx->nr_active) { + list_for_each_entry(counter, &ctx->counter_list, list_entry) + group_sched_out(counter, cpuctx, ctx); + } + spin_unlock(&ctx->lock); +} + /* * Called from scheduler to remove the counters of the current task, * with interrupts disabled. @@ -341,39 +381,18 @@ void perf_counter_task_sched_out(struct task_struct *task, int cpu) { struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); struct perf_counter_context *ctx = &task->perf_counter_ctx; - struct perf_counter *counter; if (likely(!cpuctx->task_ctx)) return; - spin_lock(&ctx->lock); - if (ctx->nr_active) { - list_for_each_entry(counter, &ctx->counter_list, list_entry) - group_sched_out(counter, cpuctx, ctx); - } - spin_unlock(&ctx->lock); + __perf_counter_sched_out(ctx, cpuctx); + cpuctx->task_ctx = NULL; } -static int -counter_sched_in(struct perf_counter *counter, - struct perf_cpu_context *cpuctx, - struct perf_counter_context *ctx, - int cpu) +static void perf_counter_cpu_sched_out(struct perf_cpu_context *cpuctx) { - if (counter->state == PERF_COUNTER_STATE_OFF) - return 0; - - if (counter->hw_ops->enable(counter)) - return -EAGAIN; - - counter->state = PERF_COUNTER_STATE_ACTIVE; - counter->oncpu = cpu; /* TODO: put 'cpu' into cpuctx->cpu */ - - cpuctx->active_oncpu++; - ctx->nr_active++; - - return 0; + __perf_counter_sched_out(&cpuctx->ctx, cpuctx); } static int @@ -416,21 +435,10 @@ group_sched_in(struct perf_counter *group_counter, return -EAGAIN; } -/* - * Called from scheduler to add the counters of the current task - * with interrupts disabled. - * - * We restore the counter value and then enable it. - * - * This does not protect us against NMI, but enable() - * sets the enabled bit in the control field of counter _before_ - * accessing the counter control register. If a NMI hits, then it will - * keep the counter running. - */ -void perf_counter_task_sched_in(struct task_struct *task, int cpu) +static void +__perf_counter_sched_in(struct perf_counter_context *ctx, + struct perf_cpu_context *cpuctx, int cpu) { - struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); - struct perf_counter_context *ctx = &task->perf_counter_ctx; struct perf_counter *counter; if (likely(!ctx->nr_counters)) @@ -453,10 +461,35 @@ void perf_counter_task_sched_in(struct task_struct *task, int cpu) break; } spin_unlock(&ctx->lock); +} +/* + * Called from scheduler to add the counters of the current task + * with interrupts disabled. + * + * We restore the counter value and then enable it. + * + * This does not protect us against NMI, but enable() + * sets the enabled bit in the control field of counter _before_ + * accessing the counter control register. If a NMI hits, then it will + * keep the counter running. + */ +void perf_counter_task_sched_in(struct task_struct *task, int cpu) +{ + struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); + struct perf_counter_context *ctx = &task->perf_counter_ctx; + + __perf_counter_sched_in(ctx, cpuctx, cpu); cpuctx->task_ctx = ctx; } +static void perf_counter_cpu_sched_in(struct perf_cpu_context *cpuctx, int cpu) +{ + struct perf_counter_context *ctx = &cpuctx->ctx; + + __perf_counter_sched_in(ctx, cpuctx, cpu); +} + int perf_counter_task_disable(void) { struct task_struct *curr = current; @@ -514,6 +547,8 @@ int perf_counter_task_enable(void) /* force the update of the task clock: */ __task_delta_exec(curr, 1); + perf_counter_task_sched_out(curr, cpu); + spin_lock(&ctx->lock); /* @@ -538,19 +573,18 @@ int perf_counter_task_enable(void) return 0; } -void perf_counter_task_tick(struct task_struct *curr, int cpu) +/* + * Round-robin a context's counters: + */ +static void rotate_ctx(struct perf_counter_context *ctx) { - struct perf_counter_context *ctx = &curr->perf_counter_ctx; struct perf_counter *counter; u64 perf_flags; - if (likely(!ctx->nr_counters)) + if (!ctx->nr_counters) return; - perf_counter_task_sched_out(curr, cpu); - spin_lock(&ctx->lock); - /* * Rotate the first entry last (works just fine for group counters too): */ @@ -563,7 +597,24 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu) hw_perf_restore(perf_flags); spin_unlock(&ctx->lock); +} + +void perf_counter_task_tick(struct task_struct *curr, int cpu) +{ + struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); + struct perf_counter_context *ctx = &curr->perf_counter_ctx; + const int rotate_percpu = 0; + + if (rotate_percpu) + perf_counter_cpu_sched_out(cpuctx); + perf_counter_task_sched_out(curr, cpu); + if (rotate_percpu) + rotate_ctx(&cpuctx->ctx); + rotate_ctx(ctx); + + if (rotate_percpu) + perf_counter_cpu_sched_in(cpuctx, cpu); perf_counter_task_sched_in(curr, cpu); } @@ -905,8 +956,6 @@ static u64 task_clock_perf_counter_val(struct perf_counter *counter, int update) struct task_struct *curr = counter->task; u64 delta; - WARN_ON_ONCE(counter->task != current); - delta = __task_delta_exec(curr, update); return curr->se.sum_exec_runtime + delta; @@ -1160,6 +1209,7 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event, counter->group_leader = group_leader; counter->hw_ops = NULL; + counter->state = PERF_COUNTER_STATE_INACTIVE; if (hw_event->disabled) counter->state = PERF_COUNTER_STATE_OFF; @@ -1331,35 +1381,49 @@ __perf_counter_exit_task(struct task_struct *child, { struct perf_counter *parent_counter; u64 parent_val, child_val; - unsigned long flags; - u64 perf_flags; /* - * Disable and unlink this counter. - * - * Be careful about zapping the list - IRQ/NMI context - * could still be processing it: + * If we do not self-reap then we have to wait for the + * child task to unschedule (it will happen for sure), + * so that its counter is at its final count. (This + * condition triggers rarely - child tasks usually get + * off their CPU before the parent has a chance to + * get this far into the reaping action) */ - curr_rq_lock_irq_save(&flags); - perf_flags = hw_perf_save_disable(); - - if (child_counter->state == PERF_COUNTER_STATE_ACTIVE) { + if (child != current) { + wait_task_inactive(child, 0); + list_del_init(&child_counter->list_entry); + } else { struct perf_cpu_context *cpuctx; + unsigned long flags; + u64 perf_flags; + + /* + * Disable and unlink this counter. + * + * Be careful about zapping the list - IRQ/NMI context + * could still be processing it: + */ + curr_rq_lock_irq_save(&flags); + perf_flags = hw_perf_save_disable(); cpuctx = &__get_cpu_var(perf_cpu_context); - child_counter->hw_ops->disable(child_counter); - child_counter->state = PERF_COUNTER_STATE_INACTIVE; - child_counter->oncpu = -1; + if (child_counter->state == PERF_COUNTER_STATE_ACTIVE) { + child_counter->state = PERF_COUNTER_STATE_INACTIVE; + child_counter->hw_ops->disable(child_counter); + cpuctx->active_oncpu--; + child_ctx->nr_active--; + child_counter->oncpu = -1; + } - cpuctx->active_oncpu--; - child_ctx->nr_active--; - } + list_del_init(&child_counter->list_entry); - list_del_init(&child_counter->list_entry); + child_ctx->nr_counters--; - hw_perf_restore(perf_flags); - curr_rq_unlock_irq_restore(&flags); + hw_perf_restore(perf_flags); + curr_rq_unlock_irq_restore(&flags); + } parent_counter = child_counter->parent; /* -- GitLab