提交 98ec21a0 编写于 作者: L Linus Torvalds

Merge branch 'sched-hrtimers-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler updates from Thomas Gleixner:
 "This series of scheduler updates depends on sched/core and timers/core
  branches, which are already in your tree:

   - Scheduler balancing overhaul to plug a hard to trigger race which
     causes an oops in the balancer (Peter Zijlstra)

   - Lockdep updates which are related to the balancing updates (Peter
     Zijlstra)"

* 'sched-hrtimers-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  sched,lockdep: Employ lock pinning
  lockdep: Implement lock pinning
  lockdep: Simplify lock_release()
  sched: Streamline the task migration locking a little
  sched: Move code around
  sched,dl: Fix sched class hopping CBS hole
  sched, dl: Convert switched_{from, to}_dl() / prio_changed_dl() to balance callbacks
  sched,dl: Remove return value from pull_dl_task()
  sched, rt: Convert switched_{from, to}_rt() / prio_changed_rt() to balance callbacks
  sched,rt: Remove return value from pull_rt_task()
  sched: Allow balance callbacks for check_class_changed()
  sched: Use replace normalize_task() with __sched_setscheduler()
  sched: Replace post_schedule with a balance callback list
......@@ -255,6 +255,7 @@ struct held_lock {
unsigned int check:1; /* see lock_acquire() comment */
unsigned int hardirqs_off:1;
unsigned int references:12; /* 32 bits */
unsigned int pin_count;
};
/*
......@@ -354,6 +355,9 @@ extern void lockdep_set_current_reclaim_state(gfp_t gfp_mask);
extern void lockdep_clear_current_reclaim_state(void);
extern void lockdep_trace_alloc(gfp_t mask);
extern void lock_pin_lock(struct lockdep_map *lock);
extern void lock_unpin_lock(struct lockdep_map *lock);
# define INIT_LOCKDEP .lockdep_recursion = 0, .lockdep_reclaim_gfp = 0,
#define lockdep_depth(tsk) (debug_locks ? (tsk)->lockdep_depth : 0)
......@@ -368,6 +372,9 @@ extern void lockdep_trace_alloc(gfp_t mask);
#define lockdep_recursing(tsk) ((tsk)->lockdep_recursion)
#define lockdep_pin_lock(l) lock_pin_lock(&(l)->dep_map)
#define lockdep_unpin_lock(l) lock_unpin_lock(&(l)->dep_map)
#else /* !CONFIG_LOCKDEP */
static inline void lockdep_off(void)
......@@ -420,6 +427,9 @@ struct lock_class_key { };
#define lockdep_recursing(tsk) (0)
#define lockdep_pin_lock(l) do { (void)(l); } while (0)
#define lockdep_unpin_lock(l) do { (void)(l); } while (0)
#endif /* !LOCKDEP */
#ifdef CONFIG_LOCK_STAT
......
......@@ -3157,6 +3157,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
hlock->waittime_stamp = 0;
hlock->holdtime_stamp = lockstat_clock();
#endif
hlock->pin_count = 0;
if (check && !mark_irqflags(curr, hlock))
return 0;
......@@ -3260,26 +3261,6 @@ print_unlock_imbalance_bug(struct task_struct *curr, struct lockdep_map *lock,
return 0;
}
/*
* Common debugging checks for both nested and non-nested unlock:
*/
static int check_unlock(struct task_struct *curr, struct lockdep_map *lock,
unsigned long ip)
{
if (unlikely(!debug_locks))
return 0;
/*
* Lockdep should run with IRQs disabled, recursion, head-ache, etc..
*/
if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
return 0;
if (curr->lockdep_depth <= 0)
return print_unlock_imbalance_bug(curr, lock, ip);
return 1;
}
static int match_held_lock(struct held_lock *hlock, struct lockdep_map *lock)
{
if (hlock->instance == lock)
......@@ -3376,31 +3357,35 @@ __lock_set_class(struct lockdep_map *lock, const char *name,
}
/*
* Remove the lock to the list of currently held locks in a
* potentially non-nested (out of order) manner. This is a
* relatively rare operation, as all the unlock APIs default
* to nested mode (which uses lock_release()):
* Remove the lock to the list of currently held locks - this gets
* called on mutex_unlock()/spin_unlock*() (or on a failed
* mutex_lock_interruptible()).
*
* @nested is an hysterical artifact, needs a tree wide cleanup.
*/
static int
lock_release_non_nested(struct task_struct *curr,
struct lockdep_map *lock, unsigned long ip)
__lock_release(struct lockdep_map *lock, int nested, unsigned long ip)
{
struct task_struct *curr = current;
struct held_lock *hlock, *prev_hlock;
unsigned int depth;
int i;
/*
* Check whether the lock exists in the current stack
* of held locks:
*/
if (unlikely(!debug_locks))
return 0;
depth = curr->lockdep_depth;
/*
* So we're all set to release this lock.. wait what lock? We don't
* own any locks, you've been drinking again?
*/
if (DEBUG_LOCKS_WARN_ON(!depth))
return 0;
if (DEBUG_LOCKS_WARN_ON(depth <= 0))
return print_unlock_imbalance_bug(curr, lock, ip);
/*
* Check whether the lock exists in the current stack
* of held locks:
*/
prev_hlock = NULL;
for (i = depth-1; i >= 0; i--) {
hlock = curr->held_locks + i;
......@@ -3419,6 +3404,8 @@ lock_release_non_nested(struct task_struct *curr,
if (hlock->instance == lock)
lock_release_holdtime(hlock);
WARN(hlock->pin_count, "releasing a pinned lock\n");
if (hlock->references) {
hlock->references--;
if (hlock->references) {
......@@ -3456,91 +3443,66 @@ lock_release_non_nested(struct task_struct *curr,
*/
if (DEBUG_LOCKS_WARN_ON(curr->lockdep_depth != depth - 1))
return 0;
return 1;
}
/*
* Remove the lock to the list of currently held locks - this gets
* called on mutex_unlock()/spin_unlock*() (or on a failed
* mutex_lock_interruptible()). This is done for unlocks that nest
* perfectly. (i.e. the current top of the lock-stack is unlocked)
*/
static int lock_release_nested(struct task_struct *curr,
struct lockdep_map *lock, unsigned long ip)
static int __lock_is_held(struct lockdep_map *lock)
{
struct held_lock *hlock;
unsigned int depth;
/*
* Pop off the top of the lock stack:
*/
depth = curr->lockdep_depth - 1;
hlock = curr->held_locks + depth;
/*
* Is the unlock non-nested:
*/
if (hlock->instance != lock || hlock->references)
return lock_release_non_nested(curr, lock, ip);
curr->lockdep_depth--;
/*
* No more locks, but somehow we've got hash left over, who left it?
*/
if (DEBUG_LOCKS_WARN_ON(!depth && (hlock->prev_chain_key != 0)))
return 0;
struct task_struct *curr = current;
int i;
curr->curr_chain_key = hlock->prev_chain_key;
for (i = 0; i < curr->lockdep_depth; i++) {
struct held_lock *hlock = curr->held_locks + i;
lock_release_holdtime(hlock);
if (match_held_lock(hlock, lock))
return 1;
}
#ifdef CONFIG_DEBUG_LOCKDEP
hlock->prev_chain_key = 0;
hlock->class_idx = 0;
hlock->acquire_ip = 0;
hlock->irq_context = 0;
#endif
return 1;
return 0;
}
/*
* Remove the lock to the list of currently held locks - this gets
* called on mutex_unlock()/spin_unlock*() (or on a failed
* mutex_lock_interruptible()). This is done for unlocks that nest
* perfectly. (i.e. the current top of the lock-stack is unlocked)
*/
static void
__lock_release(struct lockdep_map *lock, int nested, unsigned long ip)
static void __lock_pin_lock(struct lockdep_map *lock)
{
struct task_struct *curr = current;
int i;
if (!check_unlock(curr, lock, ip))
if (unlikely(!debug_locks))
return;
if (nested) {
if (!lock_release_nested(curr, lock, ip))
return;
} else {
if (!lock_release_non_nested(curr, lock, ip))
for (i = 0; i < curr->lockdep_depth; i++) {
struct held_lock *hlock = curr->held_locks + i;
if (match_held_lock(hlock, lock)) {
hlock->pin_count++;
return;
}
}
check_chain_key(curr);
WARN(1, "pinning an unheld lock\n");
}
static int __lock_is_held(struct lockdep_map *lock)
static void __lock_unpin_lock(struct lockdep_map *lock)
{
struct task_struct *curr = current;
int i;
if (unlikely(!debug_locks))
return;
for (i = 0; i < curr->lockdep_depth; i++) {
struct held_lock *hlock = curr->held_locks + i;
if (match_held_lock(hlock, lock))
return 1;
if (match_held_lock(hlock, lock)) {
if (WARN(!hlock->pin_count, "unpinning an unpinned lock\n"))
return;
hlock->pin_count--;
return;
}
}
return 0;
WARN(1, "unpinning an unheld lock\n");
}
/*
......@@ -3639,7 +3601,8 @@ void lock_release(struct lockdep_map *lock, int nested,
check_flags(flags);
current->lockdep_recursion = 1;
trace_lock_release(lock, ip);
__lock_release(lock, nested, ip);
if (__lock_release(lock, nested, ip))
check_chain_key(current);
current->lockdep_recursion = 0;
raw_local_irq_restore(flags);
}
......@@ -3665,6 +3628,40 @@ int lock_is_held(struct lockdep_map *lock)
}
EXPORT_SYMBOL_GPL(lock_is_held);
void lock_pin_lock(struct lockdep_map *lock)
{
unsigned long flags;
if (unlikely(current->lockdep_recursion))
return;
raw_local_irq_save(flags);
check_flags(flags);
current->lockdep_recursion = 1;
__lock_pin_lock(lock);
current->lockdep_recursion = 0;
raw_local_irq_restore(flags);
}
EXPORT_SYMBOL_GPL(lock_pin_lock);
void lock_unpin_lock(struct lockdep_map *lock)
{
unsigned long flags;
if (unlikely(current->lockdep_recursion))
return;
raw_local_irq_save(flags);
check_flags(flags);
current->lockdep_recursion = 1;
__lock_unpin_lock(lock);
current->lockdep_recursion = 0;
raw_local_irq_restore(flags);
}
EXPORT_SYMBOL_GPL(lock_unpin_lock);
void lockdep_set_current_reclaim_state(gfp_t gfp_mask)
{
current->lockdep_reclaim_gfp = gfp_mask;
......
此差异已折叠。
......@@ -213,14 +213,28 @@ static inline bool need_pull_dl_task(struct rq *rq, struct task_struct *prev)
return dl_task(prev);
}
static inline void set_post_schedule(struct rq *rq)
static DEFINE_PER_CPU(struct callback_head, dl_push_head);
static DEFINE_PER_CPU(struct callback_head, dl_pull_head);
static void push_dl_tasks(struct rq *);
static void pull_dl_task(struct rq *);
static inline void queue_push_tasks(struct rq *rq)
{
rq->post_schedule = has_pushable_dl_tasks(rq);
if (!has_pushable_dl_tasks(rq))
return;
queue_balance_callback(rq, &per_cpu(dl_push_head, rq->cpu), push_dl_tasks);
}
static inline void queue_pull_task(struct rq *rq)
{
queue_balance_callback(rq, &per_cpu(dl_pull_head, rq->cpu), pull_dl_task);
}
static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq);
static void dl_task_offline_migration(struct rq *rq, struct task_struct *p)
static struct rq *dl_task_offline_migration(struct rq *rq, struct task_struct *p)
{
struct rq *later_rq = NULL;
bool fallback = false;
......@@ -254,14 +268,19 @@ static void dl_task_offline_migration(struct rq *rq, struct task_struct *p)
double_lock_balance(rq, later_rq);
}
/*
* By now the task is replenished and enqueued; migrate it.
*/
deactivate_task(rq, p, 0);
set_task_cpu(p, later_rq->cpu);
activate_task(later_rq, p, ENQUEUE_REPLENISH);
activate_task(later_rq, p, 0);
if (!fallback)
resched_curr(later_rq);
double_unlock_balance(rq, later_rq);
double_unlock_balance(later_rq, rq);
return later_rq;
}
#else
......@@ -291,12 +310,15 @@ static inline bool need_pull_dl_task(struct rq *rq, struct task_struct *prev)
return false;
}
static inline int pull_dl_task(struct rq *rq)
static inline void pull_dl_task(struct rq *rq)
{
}
static inline void queue_push_tasks(struct rq *rq)
{
return 0;
}
static inline void set_post_schedule(struct rq *rq)
static inline void queue_pull_task(struct rq *rq)
{
}
#endif /* CONFIG_SMP */
......@@ -498,22 +520,23 @@ static void update_dl_entity(struct sched_dl_entity *dl_se,
* actually started or not (i.e., the replenishment instant is in
* the future or in the past).
*/
static int start_dl_timer(struct sched_dl_entity *dl_se, bool boosted)
static int start_dl_timer(struct task_struct *p)
{
struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
struct rq *rq = rq_of_dl_rq(dl_rq);
struct sched_dl_entity *dl_se = &p->dl;
struct hrtimer *timer = &dl_se->dl_timer;
struct rq *rq = task_rq(p);
ktime_t now, act;
s64 delta;
if (boosted)
return 0;
lockdep_assert_held(&rq->lock);
/*
* We want the timer to fire at the deadline, but considering
* that it is actually coming from rq->clock and not from
* hrtimer's time base reading.
*/
act = ns_to_ktime(dl_se->deadline);
now = hrtimer_cb_get_time(&dl_se->dl_timer);
now = hrtimer_cb_get_time(timer);
delta = ktime_to_ns(now) - rq_clock(rq);
act = ktime_add_ns(act, delta);
......@@ -525,7 +548,19 @@ static int start_dl_timer(struct sched_dl_entity *dl_se, bool boosted)
if (ktime_us_delta(act, now) < 0)
return 0;
hrtimer_start(&dl_se->dl_timer, act, HRTIMER_MODE_ABS);
/*
* !enqueued will guarantee another callback; even if one is already in
* progress. This ensures a balanced {get,put}_task_struct().
*
* The race against __run_timer() clearing the enqueued state is
* harmless because we're holding task_rq()->lock, therefore the timer
* expiring after we've done the check will wait on its task_rq_lock()
* and observe our state.
*/
if (!hrtimer_is_queued(timer)) {
get_task_struct(p);
hrtimer_start(timer, act, HRTIMER_MODE_ABS);
}
return 1;
}
......@@ -555,35 +590,40 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
rq = task_rq_lock(p, &flags);
/*
* We need to take care of several possible races here:
*
* - the task might have changed its scheduling policy
* to something different than SCHED_DEADLINE
* - the task might have changed its reservation parameters
* (through sched_setattr())
* - the task might have been boosted by someone else and
* might be in the boosting/deboosting path
* The task might have changed its scheduling policy to something
* different than SCHED_DEADLINE (through switched_fromd_dl()).
*/
if (!dl_task(p)) {
__dl_clear_params(p);
goto unlock;
}
/*
* This is possible if switched_from_dl() raced against a running
* callback that took the above !dl_task() path and we've since then
* switched back into SCHED_DEADLINE.
*
* In all this cases we bail out, as the task is already
* in the runqueue or is going to be enqueued back anyway.
* There's nothing to do except drop our task reference.
*/
if (!dl_task(p) || dl_se->dl_new ||
dl_se->dl_boosted || !dl_se->dl_throttled)
if (dl_se->dl_new)
goto unlock;
sched_clock_tick();
update_rq_clock(rq);
/*
* The task might have been boosted by someone else and might be in the
* boosting/deboosting path, its not throttled.
*/
if (dl_se->dl_boosted)
goto unlock;
#ifdef CONFIG_SMP
/*
* If we find that the rq the task was on is no longer
* available, we need to select a new rq.
* Spurious timer due to start_dl_timer() race; or we already received
* a replenishment from rt_mutex_setprio().
*/
if (unlikely(!rq->online)) {
dl_task_offline_migration(rq, p);
if (!dl_se->dl_throttled)
goto unlock;
}
#endif
sched_clock_tick();
update_rq_clock(rq);
/*
* If the throttle happened during sched-out; like:
......@@ -609,17 +649,38 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
check_preempt_curr_dl(rq, p, 0);
else
resched_curr(rq);
#ifdef CONFIG_SMP
/*
* Queueing this task back might have overloaded rq,
* check if we need to kick someone away.
* Perform balancing operations here; after the replenishments. We
* cannot drop rq->lock before this, otherwise the assertion in
* start_dl_timer() about not missing updates is not true.
*
* If we find that the rq the task was on is no longer available, we
* need to select a new rq.
*
* XXX figure out if select_task_rq_dl() deals with offline cpus.
*/
if (unlikely(!rq->online))
rq = dl_task_offline_migration(rq, p);
/*
* Queueing this task back might have overloaded rq, check if we need
* to kick someone away.
*/
if (has_pushable_dl_tasks(rq))
push_dl_task(rq);
#endif
unlock:
task_rq_unlock(rq, p, &flags);
/*
* This can free the task_struct, including this hrtimer, do not touch
* anything related to that after this.
*/
put_task_struct(p);
return HRTIMER_NORESTART;
}
......@@ -679,7 +740,7 @@ static void update_curr_dl(struct rq *rq)
if (dl_runtime_exceeded(dl_se)) {
dl_se->dl_throttled = 1;
__dequeue_task_dl(rq, curr, 0);
if (unlikely(!start_dl_timer(dl_se, curr->dl.dl_boosted)))
if (unlikely(dl_se->dl_boosted || !start_dl_timer(curr)))
enqueue_task_dl(rq, curr, ENQUEUE_REPLENISH);
if (!is_leftmost(curr, &rq->dl))
......@@ -1036,8 +1097,6 @@ static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p)
resched_curr(rq);
}
static int pull_dl_task(struct rq *this_rq);
#endif /* CONFIG_SMP */
/*
......@@ -1094,7 +1153,15 @@ struct task_struct *pick_next_task_dl(struct rq *rq, struct task_struct *prev)
dl_rq = &rq->dl;
if (need_pull_dl_task(rq, prev)) {
/*
* This is OK, because current is on_cpu, which avoids it being
* picked for load-balance and preemption/IRQs are still
* disabled avoiding further scheduler activity on it and we're
* being very careful to re-start the picking loop.
*/
lockdep_unpin_lock(&rq->lock);
pull_dl_task(rq);
lockdep_pin_lock(&rq->lock);
/*
* pull_rt_task() can drop (and re-acquire) rq->lock; this
* means a stop task can slip in, in which case we need to
......@@ -1128,7 +1195,7 @@ struct task_struct *pick_next_task_dl(struct rq *rq, struct task_struct *prev)
if (hrtick_enabled(rq))
start_hrtick_dl(rq, p);
set_post_schedule(rq);
queue_push_tasks(rq);
return p;
}
......@@ -1165,7 +1232,6 @@ static void task_fork_dl(struct task_struct *p)
static void task_dead_dl(struct task_struct *p)
{
struct hrtimer *timer = &p->dl.dl_timer;
struct dl_bw *dl_b = dl_bw_of(task_cpu(p));
/*
......@@ -1175,8 +1241,6 @@ static void task_dead_dl(struct task_struct *p)
/* XXX we should retain the bw until 0-lag */
dl_b->total_bw -= p->dl.dl_bw;
raw_spin_unlock_irq(&dl_b->lock);
hrtimer_cancel(timer);
}
static void set_curr_task_dl(struct rq *rq)
......@@ -1504,15 +1568,16 @@ static void push_dl_tasks(struct rq *rq)
;
}
static int pull_dl_task(struct rq *this_rq)
static void pull_dl_task(struct rq *this_rq)
{
int this_cpu = this_rq->cpu, ret = 0, cpu;
int this_cpu = this_rq->cpu, cpu;
struct task_struct *p;
bool resched = false;
struct rq *src_rq;
u64 dmin = LONG_MAX;
if (likely(!dl_overloaded(this_rq)))
return 0;
return;
/*
* Match the barrier from dl_set_overloaded; this guarantees that if we
......@@ -1567,7 +1632,7 @@ static int pull_dl_task(struct rq *this_rq)
src_rq->curr->dl.deadline))
goto skip;
ret = 1;
resched = true;
deactivate_task(src_rq, p, 0);
set_task_cpu(p, this_cpu);
......@@ -1580,12 +1645,8 @@ static int pull_dl_task(struct rq *this_rq)
double_unlock_balance(this_rq, src_rq);
}
return ret;
}
static void post_schedule_dl(struct rq *rq)
{
push_dl_tasks(rq);
if (resched)
resched_curr(this_rq);
}
/*
......@@ -1701,37 +1762,16 @@ void __init init_sched_dl_class(void)
#endif /* CONFIG_SMP */
/*
* Ensure p's dl_timer is cancelled. May drop rq->lock for a while.
*/
static void cancel_dl_timer(struct rq *rq, struct task_struct *p)
{
struct hrtimer *dl_timer = &p->dl.dl_timer;
/* Nobody will change task's class if pi_lock is held */
lockdep_assert_held(&p->pi_lock);
if (hrtimer_active(dl_timer)) {
int ret = hrtimer_try_to_cancel(dl_timer);
if (unlikely(ret == -1)) {
/*
* Note, p may migrate OR new deadline tasks
* may appear in rq when we are unlocking it.
* A caller of us must be fine with that.
*/
raw_spin_unlock(&rq->lock);
hrtimer_cancel(dl_timer);
raw_spin_lock(&rq->lock);
}
}
}
static void switched_from_dl(struct rq *rq, struct task_struct *p)
{
/* XXX we should retain the bw until 0-lag */
cancel_dl_timer(rq, p);
__dl_clear_params(p);
/*
* Start the deadline timer; if we switch back to dl before this we'll
* continue consuming our current CBS slice. If we stay outside of
* SCHED_DEADLINE until the deadline passes, the timer will reset the
* task.
*/
if (!start_dl_timer(p))
__dl_clear_params(p);
/*
* Since this might be the only -deadline task on the rq,
......@@ -1741,8 +1781,7 @@ static void switched_from_dl(struct rq *rq, struct task_struct *p)
if (!task_on_rq_queued(p) || rq->dl.dl_nr_running)
return;
if (pull_dl_task(rq))
resched_curr(rq);
queue_pull_task(rq);
}
/*
......@@ -1751,21 +1790,16 @@ static void switched_from_dl(struct rq *rq, struct task_struct *p)
*/
static void switched_to_dl(struct rq *rq, struct task_struct *p)
{
int check_resched = 1;
if (task_on_rq_queued(p) && rq->curr != p) {
#ifdef CONFIG_SMP
if (p->nr_cpus_allowed > 1 && rq->dl.overloaded &&
push_dl_task(rq) && rq != task_rq(p))
/* Only reschedule if pushing failed */
check_resched = 0;
#endif /* CONFIG_SMP */
if (check_resched) {
if (dl_task(rq->curr))
check_preempt_curr_dl(rq, p, 0);
else
resched_curr(rq);
}
if (p->nr_cpus_allowed > 1 && rq->dl.overloaded)
queue_push_tasks(rq);
#else
if (dl_task(rq->curr))
check_preempt_curr_dl(rq, p, 0);
else
resched_curr(rq);
#endif
}
}
......@@ -1785,15 +1819,14 @@ static void prio_changed_dl(struct rq *rq, struct task_struct *p,
* or lowering its prio, so...
*/
if (!rq->dl.overloaded)
pull_dl_task(rq);
queue_pull_task(rq);
/*
* If we now have a earlier deadline task than p,
* then reschedule, provided p is still on this
* runqueue.
*/
if (dl_time_before(rq->dl.earliest_dl.curr, p->dl.deadline) &&
rq->curr == p)
if (dl_time_before(rq->dl.earliest_dl.curr, p->dl.deadline))
resched_curr(rq);
#else
/*
......@@ -1823,7 +1856,6 @@ const struct sched_class dl_sched_class = {
.set_cpus_allowed = set_cpus_allowed_dl,
.rq_online = rq_online_dl,
.rq_offline = rq_offline_dl,
.post_schedule = post_schedule_dl,
.task_woken = task_woken_dl,
#endif
......
......@@ -5392,7 +5392,15 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev)
return p;
idle:
/*
* This is OK, because current is on_cpu, which avoids it being picked
* for load-balance and preemption/IRQs are still disabled avoiding
* further scheduler activity on it and we're being very careful to
* re-start the picking loop.
*/
lockdep_unpin_lock(&rq->lock);
new_tasks = idle_balance(rq);
lockdep_pin_lock(&rq->lock);
/*
* Because idle_balance() releases (and re-acquires) rq->lock, it is
* possible for any higher priority task to appear. In that case we
......@@ -7426,9 +7434,6 @@ static int idle_balance(struct rq *this_rq)
goto out;
}
/*
* Drop the rq->lock, but keep IRQ/preempt disabled.
*/
raw_spin_unlock(&this_rq->lock);
update_blocked_averages(this_cpu);
......
......@@ -260,7 +260,7 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
#ifdef CONFIG_SMP
static int pull_rt_task(struct rq *this_rq);
static void pull_rt_task(struct rq *this_rq);
static inline bool need_pull_rt_task(struct rq *rq, struct task_struct *prev)
{
......@@ -354,13 +354,23 @@ static inline int has_pushable_tasks(struct rq *rq)
return !plist_head_empty(&rq->rt.pushable_tasks);
}
static inline void set_post_schedule(struct rq *rq)
static DEFINE_PER_CPU(struct callback_head, rt_push_head);
static DEFINE_PER_CPU(struct callback_head, rt_pull_head);
static void push_rt_tasks(struct rq *);
static void pull_rt_task(struct rq *);
static inline void queue_push_tasks(struct rq *rq)
{
/*
* We detect this state here so that we can avoid taking the RQ
* lock again later if there is no need to push
*/
rq->post_schedule = has_pushable_tasks(rq);
if (!has_pushable_tasks(rq))
return;
queue_balance_callback(rq, &per_cpu(rt_push_head, rq->cpu), push_rt_tasks);
}
static inline void queue_pull_task(struct rq *rq)
{
queue_balance_callback(rq, &per_cpu(rt_pull_head, rq->cpu), pull_rt_task);
}
static void enqueue_pushable_task(struct rq *rq, struct task_struct *p)
......@@ -412,12 +422,11 @@ static inline bool need_pull_rt_task(struct rq *rq, struct task_struct *prev)
return false;
}
static inline int pull_rt_task(struct rq *this_rq)
static inline void pull_rt_task(struct rq *this_rq)
{
return 0;
}
static inline void set_post_schedule(struct rq *rq)
static inline void queue_push_tasks(struct rq *rq)
{
}
#endif /* CONFIG_SMP */
......@@ -1469,7 +1478,15 @@ pick_next_task_rt(struct rq *rq, struct task_struct *prev)
struct rt_rq *rt_rq = &rq->rt;
if (need_pull_rt_task(rq, prev)) {
/*
* This is OK, because current is on_cpu, which avoids it being
* picked for load-balance and preemption/IRQs are still
* disabled avoiding further scheduler activity on it and we're
* being very careful to re-start the picking loop.
*/
lockdep_unpin_lock(&rq->lock);
pull_rt_task(rq);
lockdep_pin_lock(&rq->lock);
/*
* pull_rt_task() can drop (and re-acquire) rq->lock; this
* means a dl or stop task can slip in, in which case we need
......@@ -1497,7 +1514,7 @@ pick_next_task_rt(struct rq *rq, struct task_struct *prev)
/* The running task is never eligible for pushing */
dequeue_pushable_task(rq, p);
set_post_schedule(rq);
queue_push_tasks(rq);
return p;
}
......@@ -1952,14 +1969,15 @@ static void push_irq_work_func(struct irq_work *work)
}
#endif /* HAVE_RT_PUSH_IPI */
static int pull_rt_task(struct rq *this_rq)
static void pull_rt_task(struct rq *this_rq)
{
int this_cpu = this_rq->cpu, ret = 0, cpu;
int this_cpu = this_rq->cpu, cpu;
bool resched = false;
struct task_struct *p;
struct rq *src_rq;
if (likely(!rt_overloaded(this_rq)))
return 0;
return;
/*
* Match the barrier from rt_set_overloaded; this guarantees that if we
......@@ -1970,7 +1988,7 @@ static int pull_rt_task(struct rq *this_rq)
#ifdef HAVE_RT_PUSH_IPI
if (sched_feat(RT_PUSH_IPI)) {
tell_cpu_to_push(this_rq);
return 0;
return;
}
#endif
......@@ -2023,7 +2041,7 @@ static int pull_rt_task(struct rq *this_rq)
if (p->prio < src_rq->curr->prio)
goto skip;
ret = 1;
resched = true;
deactivate_task(src_rq, p, 0);
set_task_cpu(p, this_cpu);
......@@ -2039,12 +2057,8 @@ static int pull_rt_task(struct rq *this_rq)
double_unlock_balance(this_rq, src_rq);
}
return ret;
}
static void post_schedule_rt(struct rq *rq)
{
push_rt_tasks(rq);
if (resched)
resched_curr(this_rq);
}
/*
......@@ -2140,8 +2154,7 @@ static void switched_from_rt(struct rq *rq, struct task_struct *p)
if (!task_on_rq_queued(p) || rq->rt.rt_nr_running)
return;
if (pull_rt_task(rq))
resched_curr(rq);
queue_pull_task(rq);
}
void __init init_sched_rt_class(void)
......@@ -2162,8 +2175,6 @@ void __init init_sched_rt_class(void)
*/
static void switched_to_rt(struct rq *rq, struct task_struct *p)
{
int check_resched = 1;
/*
* If we are already running, then there's nothing
* that needs to be done. But if we are not running
......@@ -2173,13 +2184,12 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p)
*/
if (task_on_rq_queued(p) && rq->curr != p) {
#ifdef CONFIG_SMP
if (p->nr_cpus_allowed > 1 && rq->rt.overloaded &&
/* Don't resched if we changed runqueues */
push_rt_task(rq) && rq != task_rq(p))
check_resched = 0;
#endif /* CONFIG_SMP */
if (check_resched && p->prio < rq->curr->prio)
if (p->nr_cpus_allowed > 1 && rq->rt.overloaded)
queue_push_tasks(rq);
#else
if (p->prio < rq->curr->prio)
resched_curr(rq);
#endif /* CONFIG_SMP */
}
}
......@@ -2200,14 +2210,13 @@ prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio)
* may need to pull tasks to this runqueue.
*/
if (oldprio < p->prio)
pull_rt_task(rq);
queue_pull_task(rq);
/*
* If there's a higher priority task waiting to run
* then reschedule. Note, the above pull_rt_task
* can release the rq lock and p could migrate.
* Only reschedule if p is still on the same runqueue.
* then reschedule.
*/
if (p->prio > rq->rt.highest_prio.curr && rq->curr == p)
if (p->prio > rq->rt.highest_prio.curr)
resched_curr(rq);
#else
/* For UP simply resched on drop of prio */
......@@ -2318,7 +2327,6 @@ const struct sched_class rt_sched_class = {
.set_cpus_allowed = set_cpus_allowed_rt,
.rq_online = rq_online_rt,
.rq_offline = rq_offline_rt,
.post_schedule = post_schedule_rt,
.task_woken = task_woken_rt,
.switched_from = switched_from_rt,
#endif
......
......@@ -624,9 +624,10 @@ struct rq {
unsigned long cpu_capacity;
unsigned long cpu_capacity_orig;
struct callback_head *balance_callback;
unsigned char idle_balance;
/* For active balancing */
int post_schedule;
int active_balance;
int push_cpu;
struct cpu_stop_work active_balance_work;
......@@ -767,6 +768,21 @@ extern int migrate_swap(struct task_struct *, struct task_struct *);
#ifdef CONFIG_SMP
static inline void
queue_balance_callback(struct rq *rq,
struct callback_head *head,
void (*func)(struct rq *rq))
{
lockdep_assert_held(&rq->lock);
if (unlikely(head->next))
return;
head->func = (void (*)(struct callback_head *))func;
head->next = rq->balance_callback;
rq->balance_callback = head;
}
extern void sched_ttwu_pending(void);
#define rcu_dereference_check_sched_domain(p) \
......@@ -1192,7 +1208,6 @@ struct sched_class {
int (*select_task_rq)(struct task_struct *p, int task_cpu, int sd_flag, int flags);
void (*migrate_task_rq)(struct task_struct *p, int next_cpu);
void (*post_schedule) (struct rq *this_rq);
void (*task_waking) (struct task_struct *task);
void (*task_woken) (struct rq *this_rq, struct task_struct *task);
......@@ -1423,8 +1438,10 @@ static inline struct rq *__task_rq_lock(struct task_struct *p)
for (;;) {
rq = task_rq(p);
raw_spin_lock(&rq->lock);
if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) {
lockdep_pin_lock(&rq->lock);
return rq;
}
raw_spin_unlock(&rq->lock);
while (unlikely(task_on_rq_migrating(p)))
......@@ -1461,8 +1478,10 @@ static inline struct rq *task_rq_lock(struct task_struct *p, unsigned long *flag
* If we observe the new cpu in task_rq_lock, the acquire will
* pair with the WMB to ensure we must then also see migrating.
*/
if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) {
lockdep_pin_lock(&rq->lock);
return rq;
}
raw_spin_unlock(&rq->lock);
raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
......@@ -1474,6 +1493,7 @@ static inline struct rq *task_rq_lock(struct task_struct *p, unsigned long *flag
static inline void __task_rq_unlock(struct rq *rq)
__releases(rq->lock)
{
lockdep_unpin_lock(&rq->lock);
raw_spin_unlock(&rq->lock);
}
......@@ -1482,6 +1502,7 @@ task_rq_unlock(struct rq *rq, struct task_struct *p, unsigned long *flags)
__releases(rq->lock)
__releases(p->pi_lock)
{
lockdep_unpin_lock(&rq->lock);
raw_spin_unlock(&rq->lock);
raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册