提交 1aaf90a4 编写于 作者: V Vincent Guittot 提交者: Ingo Molnar

sched: Move CFS tasks to CPUs with higher capacity

When a CPU is used to handle a lot of IRQs or some RT tasks, the remaining
capacity for CFS tasks can be significantly reduced. Once we detect such
situation by comparing cpu_capacity_orig and cpu_capacity, we trig an idle
load balance to check if it's worth moving its tasks on an idle CPU.

It's worth trying to move the task before the CPU is fully utilized to
minimize the preemption by irq or RT tasks.

Once the idle load_balance has selected the busiest CPU, it will look for an
active load balance for only two cases:

  - There is only 1 task on the busiest CPU.

  - We haven't been able to move a task of the busiest rq.

A CPU with a reduced capacity is included in the 1st case, and it's worth to
actively migrate its task if the idle CPU has got more available capacity for
CFS tasks. This test has been added in need_active_balance.

As a sidenote, this will not generate more spurious ilb because we already
trig an ilb if there is more than 1 busy cpu. If this cpu is the only one that
has a task, we will trig the ilb once for migrating the task.

The nohz_kick_needed function has been cleaned up a bit while adding the new
test

env.src_cpu and env.src_rq must be set unconditionnally because they are used
in need_active_balance which is called even if busiest->nr_running equals 1
Signed-off-by: NVincent Guittot <vincent.guittot@linaro.org>
Signed-off-by: NPeter Zijlstra (Intel) <peterz@infradead.org>
Cc: Morten.Rasmussen@arm.com
Cc: dietmar.eggemann@arm.com
Cc: efault@gmx.de
Cc: kamalesh@linux.vnet.ibm.com
Cc: linaro-kernel@lists.linaro.org
Cc: nicolas.pitre@linaro.org
Cc: preeti@linux.vnet.ibm.com
Cc: riel@redhat.com
Link: http://lkml.kernel.org/r/1425052454-25797-12-git-send-email-vincent.guittot@linaro.orgSigned-off-by: NIngo Molnar <mingo@kernel.org>
上级 caff37ef
...@@ -6855,6 +6855,19 @@ static int need_active_balance(struct lb_env *env) ...@@ -6855,6 +6855,19 @@ static int need_active_balance(struct lb_env *env)
return 1; return 1;
} }
/*
* The dst_cpu is idle and the src_cpu CPU has only 1 CFS task.
* It's worth migrating the task if the src_cpu's capacity is reduced
* because of other sched_class or IRQs if more capacity stays
* available on dst_cpu.
*/
if ((env->idle != CPU_NOT_IDLE) &&
(env->src_rq->cfs.h_nr_running == 1)) {
if ((check_cpu_capacity(env->src_rq, sd)) &&
(capacity_of(env->src_cpu)*sd->imbalance_pct < capacity_of(env->dst_cpu)*100))
return 1;
}
return unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2); return unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2);
} }
...@@ -6954,6 +6967,9 @@ static int load_balance(int this_cpu, struct rq *this_rq, ...@@ -6954,6 +6967,9 @@ static int load_balance(int this_cpu, struct rq *this_rq,
schedstat_add(sd, lb_imbalance[idle], env.imbalance); schedstat_add(sd, lb_imbalance[idle], env.imbalance);
env.src_cpu = busiest->cpu;
env.src_rq = busiest;
ld_moved = 0; ld_moved = 0;
if (busiest->nr_running > 1) { if (busiest->nr_running > 1) {
/* /*
...@@ -6963,8 +6979,6 @@ static int load_balance(int this_cpu, struct rq *this_rq, ...@@ -6963,8 +6979,6 @@ static int load_balance(int this_cpu, struct rq *this_rq,
* correctly treated as an imbalance. * correctly treated as an imbalance.
*/ */
env.flags |= LBF_ALL_PINNED; env.flags |= LBF_ALL_PINNED;
env.src_cpu = busiest->cpu;
env.src_rq = busiest;
env.loop_max = min(sysctl_sched_nr_migrate, busiest->nr_running); env.loop_max = min(sysctl_sched_nr_migrate, busiest->nr_running);
more_balance: more_balance:
...@@ -7664,22 +7678,25 @@ static void nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle) ...@@ -7664,22 +7678,25 @@ static void nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle)
/* /*
* Current heuristic for kicking the idle load balancer in the presence * Current heuristic for kicking the idle load balancer in the presence
* of an idle cpu is the system. * of an idle cpu in the system.
* - This rq has more than one task. * - This rq has more than one task.
* - At any scheduler domain level, this cpu's scheduler group has multiple * - This rq has at least one CFS task and the capacity of the CPU is
* busy cpu's exceeding the group's capacity. * significantly reduced because of RT tasks or IRQs.
* - At parent of LLC scheduler domain level, this cpu's scheduler group has
* multiple busy cpu.
* - For SD_ASYM_PACKING, if the lower numbered cpu's in the scheduler * - For SD_ASYM_PACKING, if the lower numbered cpu's in the scheduler
* domain span are idle. * domain span are idle.
*/ */
static inline int nohz_kick_needed(struct rq *rq) static inline bool nohz_kick_needed(struct rq *rq)
{ {
unsigned long now = jiffies; unsigned long now = jiffies;
struct sched_domain *sd; struct sched_domain *sd;
struct sched_group_capacity *sgc; struct sched_group_capacity *sgc;
int nr_busy, cpu = rq->cpu; int nr_busy, cpu = rq->cpu;
bool kick = false;
if (unlikely(rq->idle_balance)) if (unlikely(rq->idle_balance))
return 0; return false;
/* /*
* We may be recently in ticked or tickless idle mode. At the first * We may be recently in ticked or tickless idle mode. At the first
...@@ -7693,38 +7710,46 @@ static inline int nohz_kick_needed(struct rq *rq) ...@@ -7693,38 +7710,46 @@ static inline int nohz_kick_needed(struct rq *rq)
* balancing. * balancing.
*/ */
if (likely(!atomic_read(&nohz.nr_cpus))) if (likely(!atomic_read(&nohz.nr_cpus)))
return 0; return false;
if (time_before(now, nohz.next_balance)) if (time_before(now, nohz.next_balance))
return 0; return false;
if (rq->nr_running >= 2) if (rq->nr_running >= 2)
goto need_kick; return true;
rcu_read_lock(); rcu_read_lock();
sd = rcu_dereference(per_cpu(sd_busy, cpu)); sd = rcu_dereference(per_cpu(sd_busy, cpu));
if (sd) { if (sd) {
sgc = sd->groups->sgc; sgc = sd->groups->sgc;
nr_busy = atomic_read(&sgc->nr_busy_cpus); nr_busy = atomic_read(&sgc->nr_busy_cpus);
if (nr_busy > 1) if (nr_busy > 1) {
goto need_kick_unlock; kick = true;
goto unlock;
}
} }
sd = rcu_dereference(per_cpu(sd_asym, cpu)); sd = rcu_dereference(rq->sd);
if (sd) {
if ((rq->cfs.h_nr_running >= 1) &&
check_cpu_capacity(rq, sd)) {
kick = true;
goto unlock;
}
}
sd = rcu_dereference(per_cpu(sd_asym, cpu));
if (sd && (cpumask_first_and(nohz.idle_cpus_mask, if (sd && (cpumask_first_and(nohz.idle_cpus_mask,
sched_domain_span(sd)) < cpu)) sched_domain_span(sd)) < cpu)) {
goto need_kick_unlock; kick = true;
goto unlock;
rcu_read_unlock(); }
return 0;
need_kick_unlock: unlock:
rcu_read_unlock(); rcu_read_unlock();
need_kick: return kick;
return 1;
} }
#else #else
static void nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle) { } static void nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle) { }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册