提交 d2440c99 编写于 作者: H Huaixin Chang 提交者: Caspar Zhang

alinux: sched/fair: use static load in wake_affine_weight

For a long time runnable cpu load has been used in selecting task rq
when waking up tasks. Recent test has shown for test load with a large
quantity of short running tasks and almost full cpu utility, static load
is more helpful.

In our e2e tests, runnable load avg of java threads ranges from less than
10 to as large as 362, while these java threads are no different from
each other, and should be treated in the same way. After using static
load, qps imporvement has been seen in multiple test cases.

A new sched feature WA_STATIC_WEIGHT is introduced here to control. Echo
WA_STATIC_WEIGHT to /sys/kernel/debug/sched_features to turn static load
in wake_affine_weight on and NO_WA_STATIC_WEIGHT to turn it off. This
feature is kept off by default.

Test is done on the following hardware:

4 threads Intel(R) Xeon(R) Platinum 8269CY CPU @ 2.50GHz

In tests with 120 threads and sql loglevel configured to info:

	NO_WA_STATIC_WEIGHT     WA_STATIC_WEIGHT
	33170.63                34614.95 (+4.35%)

In tests with 160 threads and sql loglevel configured to info:

	NO_WA_STATIC_WEIGHT     WA_STATIC_WEIGHT
	35888.71                38247.20 (+6.57%)

In tests with 160 threads and sql loglevel configured to warn:

	NO_WA_STATIC_WEIGHT     WA_STATIC_WEIGHT
	39118.72                39698.72 (+1.48%)
Signed-off-by: NHuaixin Chang <changhuaixin@linux.alibaba.com>
Acked-by: NShanpei Chen <shanpeic@linux.alibaba.com>
上级 f7a573c3
......@@ -692,6 +692,7 @@ static u64 sched_vslice(struct cfs_rq *cfs_rq, struct sched_entity *se)
static int select_idle_sibling(struct task_struct *p, int prev_cpu, int cpu);
static unsigned long task_h_load(struct task_struct *p);
static unsigned long task_h_load_static(struct task_struct *p);
/* Give new sched_entity start runnable values to heavy its load in infant time */
void init_entity_runnable_average(struct sched_entity *se)
......@@ -5653,10 +5654,19 @@ wake_affine_weight(struct sched_domain *sd, struct task_struct *p,
s64 this_eff_load, prev_eff_load;
unsigned long task_load;
this_eff_load = target_load(this_cpu, sd->wake_idx);
if (sched_feat(WA_STATIC_WEIGHT))
this_eff_load =
scale_load_down(cpu_rq(this_cpu)->cfs.load.weight);
else
this_eff_load = target_load(this_cpu, sd->wake_idx);
if (sync) {
unsigned long current_load = task_h_load(current);
unsigned long current_load;
if (sched_feat(WA_STATIC_WEIGHT))
current_load = task_h_load_static(current);
else
current_load = task_h_load(current);
if (current_load > this_eff_load)
return this_cpu;
......@@ -5664,14 +5674,21 @@ wake_affine_weight(struct sched_domain *sd, struct task_struct *p,
this_eff_load -= current_load;
}
task_load = task_h_load(p);
if (sched_feat(WA_STATIC_WEIGHT))
task_load = task_h_load_static(p);
else
task_load = task_h_load(p);
this_eff_load += task_load;
if (sched_feat(WA_BIAS))
this_eff_load *= 100;
this_eff_load *= capacity_of(prev_cpu);
prev_eff_load = source_load(prev_cpu, sd->wake_idx);
if (sched_feat(WA_STATIC_WEIGHT))
prev_eff_load =
scale_load_down(cpu_rq(prev_cpu)->cfs.load.weight);
else
prev_eff_load = source_load(prev_cpu, sd->wake_idx);
prev_eff_load -= task_load;
if (sched_feat(WA_BIAS))
prev_eff_load *= 100 + (sd->imbalance_pct - 100) / 2;
......@@ -7506,6 +7523,48 @@ static unsigned long task_h_load(struct task_struct *p)
return div64_ul(p->se.avg.load_avg * cfs_rq->h_load,
cfs_rq_load_avg(cfs_rq) + 1);
}
static void update_cfs_rq_h_load_static(struct cfs_rq *cfs_rq)
{
struct rq *rq = rq_of(cfs_rq);
struct sched_entity *se = cfs_rq->tg->se[cpu_of(rq)];
unsigned long now = jiffies;
unsigned long load;
if (cfs_rq->last_h_load_update == now)
return;
WRITE_ONCE(cfs_rq->h_load_next, NULL);
for_each_sched_entity(se) {
cfs_rq = cfs_rq_of(se);
WRITE_ONCE(cfs_rq->h_load_next, se);
if (cfs_rq->last_h_load_update == now)
break;
}
if (!se) {
cfs_rq->h_load = scale_load_down(cfs_rq->load.weight);
cfs_rq->last_h_load_update = now;
}
while ((se = READ_ONCE(cfs_rq->h_load_next)) != NULL) {
load = cfs_rq->h_load;
load = div64_ul(load * se->load.weight,
cfs_rq->load.weight + 1);
cfs_rq = group_cfs_rq(se);
cfs_rq->h_load = load;
cfs_rq->last_h_load_update = now;
}
}
static unsigned long task_h_load_static(struct task_struct *p)
{
struct cfs_rq *cfs_rq = task_cfs_rq(p);
update_cfs_rq_h_load_static(cfs_rq);
return div64_ul(p->se.load.weight * cfs_rq->h_load,
cfs_rq->load.weight + 1);
}
#else
static inline void update_blocked_averages(int cpu)
{
......@@ -7534,6 +7593,11 @@ static unsigned long task_h_load(struct task_struct *p)
{
return p->se.avg.load_avg;
}
static unsigned long task_h_load_static(struct task_struct *p)
{
return scale_load_down(p->se.load.weight);
}
#endif
/********** Helpers for find_busiest_group ************************/
......
......@@ -85,6 +85,7 @@ SCHED_FEAT(ATTACH_AGE_LOAD, true)
SCHED_FEAT(WA_IDLE, true)
SCHED_FEAT(WA_WEIGHT, true)
SCHED_FEAT(WA_BIAS, true)
SCHED_FEAT(WA_STATIC_WEIGHT, false)
/*
* UtilEstimation. Use estimated CPU utilization.
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册