提交 c9819f45 编写于 作者: C Christoph Lameter 提交者: Linus Torvalds

[PATCH] sched: use softirq for load balancing

Call rebalance_tick (renamed to run_rebalance_domains) from a newly introduced
softirq.

We calculate the earliest time for each layer of sched domains to be rescanned
(this is the rescan time for idle) and use the earliest of those to schedule
the softirq via a new field "next_balance" added to struct rq.
Signed-off-by: NChristoph Lameter <clameter@sgi.com>
Cc: Peter Williams <pwil3058@bigpond.net.au>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Christoph Lameter <clameter@sgi.com>
Cc: "Siddha, Suresh B" <suresh.b.siddha@intel.com>
Cc: "Chen, Kenneth W" <kenneth.w.chen@intel.com>
Acked-by: NIngo Molnar <mingo@elte.hu>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: NAndrew Morton <akpm@osdl.org>
Signed-off-by: NLinus Torvalds <torvalds@osdl.org>
上级 e418e1c2
...@@ -231,7 +231,8 @@ enum ...@@ -231,7 +231,8 @@ enum
NET_TX_SOFTIRQ, NET_TX_SOFTIRQ,
NET_RX_SOFTIRQ, NET_RX_SOFTIRQ,
BLOCK_SOFTIRQ, BLOCK_SOFTIRQ,
TASKLET_SOFTIRQ TASKLET_SOFTIRQ,
SCHED_SOFTIRQ,
}; };
/* softirq mask and active fields moved to irq_cpustat_t in /* softirq mask and active fields moved to irq_cpustat_t in
......
...@@ -227,6 +227,7 @@ struct rq { ...@@ -227,6 +227,7 @@ struct rq {
unsigned long expired_timestamp; unsigned long expired_timestamp;
unsigned long long timestamp_last_tick; unsigned long long timestamp_last_tick;
struct task_struct *curr, *idle; struct task_struct *curr, *idle;
unsigned long next_balance;
struct mm_struct *prev_mm; struct mm_struct *prev_mm;
struct prio_array *active, *expired, arrays[2]; struct prio_array *active, *expired, arrays[2];
int best_expired_prio; int best_expired_prio;
...@@ -2858,7 +2859,7 @@ static void update_load(struct rq *this_rq) ...@@ -2858,7 +2859,7 @@ static void update_load(struct rq *this_rq)
} }
/* /*
* rebalance_tick will get called every timer tick, on every CPU. * run_rebalance_domains is triggered when needed from the scheduler tick.
* *
* It checks each scheduling domain to see if it is due to be balanced, * It checks each scheduling domain to see if it is due to be balanced,
* and initiates a balancing operation if so. * and initiates a balancing operation if so.
...@@ -2866,9 +2867,10 @@ static void update_load(struct rq *this_rq) ...@@ -2866,9 +2867,10 @@ static void update_load(struct rq *this_rq)
* Balancing parameters are set up in arch_init_sched_domains. * Balancing parameters are set up in arch_init_sched_domains.
*/ */
static void static void run_rebalance_domains(struct softirq_action *h)
rebalance_tick(int this_cpu, struct rq *this_rq)
{ {
int this_cpu = smp_processor_id();
struct rq *this_rq = cpu_rq(this_cpu);
unsigned long interval; unsigned long interval;
struct sched_domain *sd; struct sched_domain *sd;
/* /*
...@@ -2877,6 +2879,8 @@ rebalance_tick(int this_cpu, struct rq *this_rq) ...@@ -2877,6 +2879,8 @@ rebalance_tick(int this_cpu, struct rq *this_rq)
*/ */
enum idle_type idle = !this_rq->nr_running ? enum idle_type idle = !this_rq->nr_running ?
SCHED_IDLE : NOT_IDLE; SCHED_IDLE : NOT_IDLE;
/* Earliest time when we have to call run_rebalance_domains again */
unsigned long next_balance = jiffies + 60*HZ;
for_each_domain(this_cpu, sd) { for_each_domain(this_cpu, sd) {
if (!(sd->flags & SD_LOAD_BALANCE)) if (!(sd->flags & SD_LOAD_BALANCE))
...@@ -2891,7 +2895,7 @@ rebalance_tick(int this_cpu, struct rq *this_rq) ...@@ -2891,7 +2895,7 @@ rebalance_tick(int this_cpu, struct rq *this_rq)
if (unlikely(!interval)) if (unlikely(!interval))
interval = 1; interval = 1;
if (jiffies - sd->last_balance >= interval) { if (time_after_eq(jiffies, sd->last_balance + interval)) {
if (load_balance(this_cpu, this_rq, sd, idle)) { if (load_balance(this_cpu, this_rq, sd, idle)) {
/* /*
* We've pulled tasks over so either we're no * We've pulled tasks over so either we're no
...@@ -2902,7 +2906,10 @@ rebalance_tick(int this_cpu, struct rq *this_rq) ...@@ -2902,7 +2906,10 @@ rebalance_tick(int this_cpu, struct rq *this_rq)
} }
sd->last_balance += interval; sd->last_balance += interval;
} }
if (time_after(next_balance, sd->last_balance + interval))
next_balance = sd->last_balance + interval;
} }
this_rq->next_balance = next_balance;
} }
#else #else
/* /*
...@@ -3155,7 +3162,8 @@ void scheduler_tick(void) ...@@ -3155,7 +3162,8 @@ void scheduler_tick(void)
task_running_tick(rq, p); task_running_tick(rq, p);
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
update_load(rq); update_load(rq);
rebalance_tick(cpu, rq); if (time_after_eq(jiffies, rq->next_balance))
raise_softirq(SCHED_SOFTIRQ);
#endif #endif
} }
...@@ -6859,6 +6867,10 @@ void __init sched_init(void) ...@@ -6859,6 +6867,10 @@ void __init sched_init(void)
set_load_weight(&init_task); set_load_weight(&init_task);
#ifdef CONFIG_SMP
open_softirq(SCHED_SOFTIRQ, run_rebalance_domains, NULL);
#endif
#ifdef CONFIG_RT_MUTEXES #ifdef CONFIG_RT_MUTEXES
plist_head_init(&init_task.pi_waiters, &init_task.pi_lock); plist_head_init(&init_task.pi_waiters, &init_task.pi_lock);
#endif #endif
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册