diff --git a/include/linux/wait.h b/include/linux/wait.h index 2232ed16635ae0929c97f62e1c8c1b09109f0649..37423e0e1379872c5ac54a7eae953f9f26568a82 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -363,7 +363,6 @@ do { \ */ #define wait_event_cmd(wq, condition, cmd1, cmd2) \ do { \ - might_sleep(); \ if (condition) \ break; \ __wait_event_cmd(wq, condition, cmd1, cmd2); \ diff --git a/kernel/sched/core.c b/kernel/sched/core.c index e628cb11b5606306e11f456471022fa65488976b..5eab11d4b747df5a5ff75a9e6558444c37b5c18a 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1814,6 +1814,10 @@ void __dl_clear_params(struct task_struct *p) dl_se->dl_period = 0; dl_se->flags = 0; dl_se->dl_bw = 0; + + dl_se->dl_throttled = 0; + dl_se->dl_new = 1; + dl_se->dl_yielded = 0; } /* @@ -1839,7 +1843,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p) #endif RB_CLEAR_NODE(&p->dl.rb_node); - hrtimer_init(&p->dl.dl_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + init_dl_task_timer(&p->dl); __dl_clear_params(p); INIT_LIST_HEAD(&p->rt.run_list); @@ -2049,6 +2053,9 @@ static inline int dl_bw_cpus(int i) * allocated bandwidth to reflect the new situation. * * This function is called while holding p's rq->lock. + * + * XXX we should delay bw change until the task's 0-lag point, see + * __setparam_dl(). */ static int dl_overflow(struct task_struct *p, int policy, const struct sched_attr *attr) @@ -3251,15 +3258,31 @@ __setparam_dl(struct task_struct *p, const struct sched_attr *attr) { struct sched_dl_entity *dl_se = &p->dl; - init_dl_task_timer(dl_se); dl_se->dl_runtime = attr->sched_runtime; dl_se->dl_deadline = attr->sched_deadline; dl_se->dl_period = attr->sched_period ?: dl_se->dl_deadline; dl_se->flags = attr->sched_flags; dl_se->dl_bw = to_ratio(dl_se->dl_period, dl_se->dl_runtime); - dl_se->dl_throttled = 0; - dl_se->dl_new = 1; - dl_se->dl_yielded = 0; + + /* + * Changing the parameters of a task is 'tricky' and we're not doing + * the correct thing -- also see task_dead_dl() and switched_from_dl(). + * + * What we SHOULD do is delay the bandwidth release until the 0-lag + * point. This would include retaining the task_struct until that time + * and change dl_overflow() to not immediately decrement the current + * amount. + * + * Instead we retain the current runtime/deadline and let the new + * parameters take effect after the current reservation period lapses. + * This is safe (albeit pessimistic) because the 0-lag point is always + * before the current scheduling deadline. + * + * We can still have temporary overloads because we do not delay the + * change in bandwidth until that time; so admission control is + * not on the safe side. It does however guarantee tasks will never + * consume more than promised. + */ } /* @@ -4642,6 +4665,9 @@ int cpuset_cpumask_can_shrink(const struct cpumask *cur, struct dl_bw *cur_dl_b; unsigned long flags; + if (!cpumask_weight(cur)) + return ret; + rcu_read_lock_sched(); cur_dl_b = dl_bw_of(cpumask_any(cur)); trial_cpus = cpumask_weight(trial); diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index b52092f2636d50e8a816b2e7e20a648b00d6bb70..726470d47f87959fe530858f3e99024ecad6d63f 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -1094,6 +1094,7 @@ static void task_dead_dl(struct task_struct *p) * Since we are TASK_DEAD we won't slip out of the domain! */ raw_spin_lock_irq(&dl_b->lock); + /* XXX we should retain the bw until 0-lag */ dl_b->total_bw -= p->dl.dl_bw; raw_spin_unlock_irq(&dl_b->lock); @@ -1614,8 +1615,8 @@ static void cancel_dl_timer(struct rq *rq, struct task_struct *p) static void switched_from_dl(struct rq *rq, struct task_struct *p) { + /* XXX we should retain the bw until 0-lag */ cancel_dl_timer(rq, p); - __dl_clear_params(p); /* diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 40667cbf371ba9e8732e6c30940cc146752ee0c3..fe331fc391f53382f4999c0cd0f13367a827b3cc 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1730,7 +1730,7 @@ static int preferred_group_nid(struct task_struct *p, int nid) nodes = node_online_map; for (dist = sched_max_numa_distance; dist > LOCAL_DISTANCE; dist--) { unsigned long max_faults = 0; - nodemask_t max_group; + nodemask_t max_group = NODE_MASK_NONE; int a, b; /* Are there nodes at this distance from each other? */