rtmutex.c 40.3 KB
Newer Older
I
Ingo Molnar 已提交
1 2 3 4 5 6 7 8 9
/*
 * RT-Mutexes: simple blocking mutual exclusion locks with PI support
 *
 * started by Ingo Molnar and Thomas Gleixner.
 *
 *  Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
 *  Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
 *  Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt
 *  Copyright (C) 2006 Esben Nielsen
10 11
 *
 *  See Documentation/rt-mutex-design.txt for details.
I
Ingo Molnar 已提交
12 13
 */
#include <linux/spinlock.h>
14
#include <linux/export.h>
I
Ingo Molnar 已提交
15
#include <linux/sched.h>
16
#include <linux/sched/rt.h>
17
#include <linux/sched/deadline.h>
I
Ingo Molnar 已提交
18 19 20 21 22 23 24
#include <linux/timer.h>

#include "rtmutex_common.h"

/*
 * lock->owner state tracking:
 *
25 26
 * lock->owner holds the task_struct pointer of the owner. Bit 0
 * is used to keep track of the "lock has waiters" state.
I
Ingo Molnar 已提交
27
 *
28 29 30 31 32 33
 * owner	bit0
 * NULL		0	lock is free (fast acquire possible)
 * NULL		1	lock is free and has waiters and the top waiter
 *				is going to take the lock*
 * taskpointer	0	lock is held (fast release possible)
 * taskpointer	1	lock is held and has waiters**
I
Ingo Molnar 已提交
34 35
 *
 * The fast atomic compare exchange based acquire and release is only
36 37 38 39 40 41
 * possible when bit 0 of lock->owner is 0.
 *
 * (*) It also can be a transitional state when grabbing the lock
 * with ->wait_lock is held. To prevent any fast path cmpxchg to the lock,
 * we need to set the bit0 before looking at the lock, and the owner may be
 * NULL in this small time, hence this can be a transitional state.
I
Ingo Molnar 已提交
42
 *
43 44 45 46
 * (**) There is a small time when bit 0 is set but there are no
 * waiters. This can happen when grabbing the lock in the slow path.
 * To prevent a cmpxchg of the owner releasing the lock, we need to
 * set this bit before looking at the lock.
I
Ingo Molnar 已提交
47 48
 */

49
static void
50
rt_mutex_set_owner(struct rt_mutex *lock, struct task_struct *owner)
I
Ingo Molnar 已提交
51
{
52
	unsigned long val = (unsigned long)owner;
I
Ingo Molnar 已提交
53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71

	if (rt_mutex_has_waiters(lock))
		val |= RT_MUTEX_HAS_WAITERS;

	lock->owner = (struct task_struct *)val;
}

static inline void clear_rt_mutex_waiters(struct rt_mutex *lock)
{
	lock->owner = (struct task_struct *)
			((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS);
}

static void fixup_rt_mutex_waiters(struct rt_mutex *lock)
{
	if (!rt_mutex_has_waiters(lock))
		clear_rt_mutex_waiters(lock);
}

72 73 74 75 76 77 78 79 80 81 82 83 84 85
/*
 * We can speed up the acquire/release, if the architecture
 * supports cmpxchg and if there's no debugging state to be set up
 */
#if defined(__HAVE_ARCH_CMPXCHG) && !defined(CONFIG_DEBUG_RT_MUTEXES)
# define rt_mutex_cmpxchg(l,c,n)	(cmpxchg(&l->owner, c, n) == c)
static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
{
	unsigned long owner, *p = (unsigned long *) &lock->owner;

	do {
		owner = *p;
	} while (cmpxchg(p, owner, owner | RT_MUTEX_HAS_WAITERS) != owner);
}
T
Thomas Gleixner 已提交
86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126

/*
 * Safe fastpath aware unlock:
 * 1) Clear the waiters bit
 * 2) Drop lock->wait_lock
 * 3) Try to unlock the lock with cmpxchg
 */
static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock)
	__releases(lock->wait_lock)
{
	struct task_struct *owner = rt_mutex_owner(lock);

	clear_rt_mutex_waiters(lock);
	raw_spin_unlock(&lock->wait_lock);
	/*
	 * If a new waiter comes in between the unlock and the cmpxchg
	 * we have two situations:
	 *
	 * unlock(wait_lock);
	 *					lock(wait_lock);
	 * cmpxchg(p, owner, 0) == owner
	 *					mark_rt_mutex_waiters(lock);
	 *					acquire(lock);
	 * or:
	 *
	 * unlock(wait_lock);
	 *					lock(wait_lock);
	 *					mark_rt_mutex_waiters(lock);
	 *
	 * cmpxchg(p, owner, 0) != owner
	 *					enqueue_waiter();
	 *					unlock(wait_lock);
	 * lock(wait_lock);
	 * wake waiter();
	 * unlock(wait_lock);
	 *					lock(wait_lock);
	 *					acquire(lock);
	 */
	return rt_mutex_cmpxchg(lock, owner, NULL);
}

127 128 129 130 131 132 133
#else
# define rt_mutex_cmpxchg(l,c,n)	(0)
static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
{
	lock->owner = (struct task_struct *)
			((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS);
}
T
Thomas Gleixner 已提交
134 135 136 137 138 139 140 141 142 143 144

/*
 * Simple slow path only version: lock->owner is protected by lock->wait_lock.
 */
static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock)
	__releases(lock->wait_lock)
{
	lock->owner = NULL;
	raw_spin_unlock(&lock->wait_lock);
	return true;
}
145 146
#endif

147 148 149 150
static inline int
rt_mutex_waiter_less(struct rt_mutex_waiter *left,
		     struct rt_mutex_waiter *right)
{
151
	if (left->prio < right->prio)
152 153 154
		return 1;

	/*
155 156 157 158
	 * If both waiters have dl_prio(), we check the deadlines of the
	 * associated tasks.
	 * If left waiter has a dl_prio(), and we didn't return 1 above,
	 * then right waiter has a dl_prio() too.
159
	 */
160
	if (dl_prio(left->prio))
161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243
		return (left->task->dl.deadline < right->task->dl.deadline);

	return 0;
}

static void
rt_mutex_enqueue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter)
{
	struct rb_node **link = &lock->waiters.rb_node;
	struct rb_node *parent = NULL;
	struct rt_mutex_waiter *entry;
	int leftmost = 1;

	while (*link) {
		parent = *link;
		entry = rb_entry(parent, struct rt_mutex_waiter, tree_entry);
		if (rt_mutex_waiter_less(waiter, entry)) {
			link = &parent->rb_left;
		} else {
			link = &parent->rb_right;
			leftmost = 0;
		}
	}

	if (leftmost)
		lock->waiters_leftmost = &waiter->tree_entry;

	rb_link_node(&waiter->tree_entry, parent, link);
	rb_insert_color(&waiter->tree_entry, &lock->waiters);
}

static void
rt_mutex_dequeue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter)
{
	if (RB_EMPTY_NODE(&waiter->tree_entry))
		return;

	if (lock->waiters_leftmost == &waiter->tree_entry)
		lock->waiters_leftmost = rb_next(&waiter->tree_entry);

	rb_erase(&waiter->tree_entry, &lock->waiters);
	RB_CLEAR_NODE(&waiter->tree_entry);
}

static void
rt_mutex_enqueue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)
{
	struct rb_node **link = &task->pi_waiters.rb_node;
	struct rb_node *parent = NULL;
	struct rt_mutex_waiter *entry;
	int leftmost = 1;

	while (*link) {
		parent = *link;
		entry = rb_entry(parent, struct rt_mutex_waiter, pi_tree_entry);
		if (rt_mutex_waiter_less(waiter, entry)) {
			link = &parent->rb_left;
		} else {
			link = &parent->rb_right;
			leftmost = 0;
		}
	}

	if (leftmost)
		task->pi_waiters_leftmost = &waiter->pi_tree_entry;

	rb_link_node(&waiter->pi_tree_entry, parent, link);
	rb_insert_color(&waiter->pi_tree_entry, &task->pi_waiters);
}

static void
rt_mutex_dequeue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)
{
	if (RB_EMPTY_NODE(&waiter->pi_tree_entry))
		return;

	if (task->pi_waiters_leftmost == &waiter->pi_tree_entry)
		task->pi_waiters_leftmost = rb_next(&waiter->pi_tree_entry);

	rb_erase(&waiter->pi_tree_entry, &task->pi_waiters);
	RB_CLEAR_NODE(&waiter->pi_tree_entry);
}

I
Ingo Molnar 已提交
244
/*
245
 * Calculate task priority from the waiter tree priority
I
Ingo Molnar 已提交
246
 *
247
 * Return task->normal_prio when the waiter tree is empty or when
I
Ingo Molnar 已提交
248 249 250 251 252 253 254
 * the waiter is not allowed to do priority boosting
 */
int rt_mutex_getprio(struct task_struct *task)
{
	if (likely(!task_has_pi_waiters(task)))
		return task->normal_prio;

255
	return min(task_top_pi_waiter(task)->prio,
I
Ingo Molnar 已提交
256 257 258
		   task->normal_prio);
}

259 260 261 262 263 264 265 266
struct task_struct *rt_mutex_get_top_task(struct task_struct *task)
{
	if (likely(!task_has_pi_waiters(task)))
		return NULL;

	return task_top_pi_waiter(task)->task;
}

267 268 269 270 271 272 273 274 275 276 277 278
/*
 * Called by sched_setscheduler() to check whether the priority change
 * is overruled by a possible priority boosting.
 */
int rt_mutex_check_prio(struct task_struct *task, int newprio)
{
	if (!task_has_pi_waiters(task))
		return 0;

	return task_top_pi_waiter(task)->task->prio <= newprio;
}

I
Ingo Molnar 已提交
279 280 281 282 283
/*
 * Adjust the priority of a task, after its pi_waiters got modified.
 *
 * This can be both boosting and unboosting. task->pi_lock must be held.
 */
284
static void __rt_mutex_adjust_prio(struct task_struct *task)
I
Ingo Molnar 已提交
285 286 287
{
	int prio = rt_mutex_getprio(task);

288
	if (task->prio != prio || dl_prio(prio))
I
Ingo Molnar 已提交
289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304
		rt_mutex_setprio(task, prio);
}

/*
 * Adjust task priority (undo boosting). Called from the exit path of
 * rt_mutex_slowunlock() and rt_mutex_slowlock().
 *
 * (Note: We do this outside of the protection of lock->wait_lock to
 * allow the lock to be taken while or before we readjust the priority
 * of task. We do not use the spin_xx_mutex() variants here as we are
 * outside of the debug path.)
 */
static void rt_mutex_adjust_prio(struct task_struct *task)
{
	unsigned long flags;

305
	raw_spin_lock_irqsave(&task->pi_lock, flags);
I
Ingo Molnar 已提交
306
	__rt_mutex_adjust_prio(task);
307
	raw_spin_unlock_irqrestore(&task->pi_lock, flags);
I
Ingo Molnar 已提交
308 309 310 311 312 313 314
}

/*
 * Max number of times we'll walk the boosting chain:
 */
int max_lock_depth = 1024;

315 316 317 318 319
static inline struct rt_mutex *task_blocked_on_lock(struct task_struct *p)
{
	return p->pi_blocked_on ? p->pi_blocked_on->lock : NULL;
}

I
Ingo Molnar 已提交
320 321 322
/*
 * Adjust the priority chain. Also used for deadlock detection.
 * Decreases task's usage by one - may thus free the task.
323
 *
324 325
 * @task:	the task owning the mutex (owner) for which a chain walk is
 *		probably needed
326
 * @deadlock_detect: do we have to carry out deadlock detection?
327 328 329 330 331 332
 * @orig_lock:	the mutex (can be NULL if we are walking the chain to recheck
 *		things for a task that has just got its priority adjusted, and
 *		is waiting on a mutex)
 * @next_lock:	the mutex on which the owner of @orig_lock was blocked before
 *		we dropped its pi_lock. Is never dereferenced, only used for
 *		comparison to detect lock chain changes.
333
 * @orig_waiter: rt_mutex_waiter struct for the task that has just donated
334 335 336 337
 *		its priority to the mutex owner (can be NULL in the case
 *		depicted above or if the top waiter is gone away and we are
 *		actually deboosting the owner)
 * @top_task:	the current top waiter
338
 *
I
Ingo Molnar 已提交
339
 * Returns 0 or -EDEADLK.
340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381
 *
 * Chain walk basics and protection scope
 *
 * [R] refcount on task
 * [P] task->pi_lock held
 * [L] rtmutex->wait_lock held
 *
 * Step	Description				Protected by
 *	function arguments:
 *	@task					[R]
 *	@orig_lock if != NULL			@top_task is blocked on it
 *	@next_lock				Unprotected. Cannot be
 *						dereferenced. Only used for
 *						comparison.
 *	@orig_waiter if != NULL			@top_task is blocked on it
 *	@top_task				current, or in case of proxy
 *						locking protected by calling
 *						code
 *	again:
 *	  loop_sanity_check();
 *	retry:
 * [1]	  lock(task->pi_lock);			[R] acquire [P]
 * [2]	  waiter = task->pi_blocked_on;		[P]
 * [3]	  check_exit_conditions_1();		[P]
 * [4]	  lock = waiter->lock;			[P]
 * [5]	  if (!try_lock(lock->wait_lock)) {	[P] try to acquire [L]
 *	    unlock(task->pi_lock);		release [P]
 *	    goto retry;
 *	  }
 * [6]	  check_exit_conditions_2();		[P] + [L]
 * [7]	  requeue_lock_waiter(lock, waiter);	[P] + [L]
 * [8]	  unlock(task->pi_lock);		release [P]
 *	  put_task_struct(task);		release [R]
 * [9]	  check_exit_conditions_3();		[L]
 * [10]	  task = owner(lock);			[L]
 *	  get_task_struct(task);		[L] acquire [R]
 *	  lock(task->pi_lock);			[L] acquire [P]
 * [11]	  requeue_pi_waiter(tsk, waiters(lock));[P] + [L]
 * [12]	  check_exit_conditions_4();		[P] + [L]
 * [13]	  unlock(task->pi_lock);		release [P]
 *	  unlock(lock->wait_lock);		release [L]
 *	  goto again;
I
Ingo Molnar 已提交
382
 */
383 384 385
static int rt_mutex_adjust_prio_chain(struct task_struct *task,
				      int deadlock_detect,
				      struct rt_mutex *orig_lock,
386
				      struct rt_mutex *next_lock,
387 388
				      struct rt_mutex_waiter *orig_waiter,
				      struct task_struct *top_task)
I
Ingo Molnar 已提交
389 390
{
	struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter;
391
	struct rt_mutex_waiter *prerequeue_top_waiter;
I
Ingo Molnar 已提交
392
	int detect_deadlock, ret = 0, depth = 0;
393
	struct rt_mutex *lock;
I
Ingo Molnar 已提交
394 395 396 397 398 399 400 401 402 403 404 405
	unsigned long flags;

	detect_deadlock = debug_rt_mutex_detect_deadlock(orig_waiter,
							 deadlock_detect);

	/*
	 * The (de)boosting is a step by step approach with a lot of
	 * pitfalls. We want this to be preemptible and we want hold a
	 * maximum of two locks per step. So we have to check
	 * carefully whether things change under us.
	 */
 again:
406 407 408
	/*
	 * We limit the lock chain length for each invocation.
	 */
I
Ingo Molnar 已提交
409 410 411 412 413 414 415 416 417 418 419
	if (++depth > max_lock_depth) {
		static int prev_max;

		/*
		 * Print this only once. If the admin changes the limit,
		 * print a new message when reaching the limit again.
		 */
		if (prev_max != max_lock_depth) {
			prev_max = max_lock_depth;
			printk(KERN_WARNING "Maximum lock depth %d reached "
			       "task: %s (%d)\n", max_lock_depth,
420
			       top_task->comm, task_pid_nr(top_task));
I
Ingo Molnar 已提交
421 422 423
		}
		put_task_struct(task);

424
		return -EDEADLK;
I
Ingo Molnar 已提交
425
	}
426 427 428 429 430 431 432

	/*
	 * We are fully preemptible here and only hold the refcount on
	 * @task. So everything can have changed under us since the
	 * caller or our own code below (goto retry/again) dropped all
	 * locks.
	 */
I
Ingo Molnar 已提交
433 434
 retry:
	/*
435
	 * [1] Task cannot go away as we did a get_task() before !
I
Ingo Molnar 已提交
436
	 */
437
	raw_spin_lock_irqsave(&task->pi_lock, flags);
I
Ingo Molnar 已提交
438

439 440 441
	/*
	 * [2] Get the waiter on which @task is blocked on.
	 */
I
Ingo Molnar 已提交
442
	waiter = task->pi_blocked_on;
443 444 445 446 447

	/*
	 * [3] check_exit_conditions_1() protected by task->pi_lock.
	 */

I
Ingo Molnar 已提交
448 449 450 451 452
	/*
	 * Check whether the end of the boosting chain has been
	 * reached or the state of the chain has changed while we
	 * dropped the locks.
	 */
453
	if (!waiter)
I
Ingo Molnar 已提交
454 455
		goto out_unlock_pi;

456 457
	/*
	 * Check the orig_waiter state. After we dropped the locks,
458
	 * the previous owner of the lock might have released the lock.
459
	 */
460
	if (orig_waiter && !rt_mutex_owner(orig_lock))
461 462
		goto out_unlock_pi;

463 464 465 466 467 468 469 470 471 472 473 474
	/*
	 * We dropped all locks after taking a refcount on @task, so
	 * the task might have moved on in the lock chain or even left
	 * the chain completely and blocks now on an unrelated lock or
	 * on @orig_lock.
	 *
	 * We stored the lock on which @task was blocked in @next_lock,
	 * so we can detect the chain change.
	 */
	if (next_lock != waiter->lock)
		goto out_unlock_pi;

475 476 477 478 479
	/*
	 * Drop out, when the task has no waiters. Note,
	 * top_waiter can be NULL, when we are in the deboosting
	 * mode!
	 */
480 481 482 483 484 485 486 487 488 489
	if (top_waiter) {
		if (!task_has_pi_waiters(task))
			goto out_unlock_pi;
		/*
		 * If deadlock detection is off, we stop here if we
		 * are not the top pi waiter of the task.
		 */
		if (!detect_deadlock && top_waiter != task_top_pi_waiter(task))
			goto out_unlock_pi;
	}
I
Ingo Molnar 已提交
490 491 492 493 494

	/*
	 * When deadlock detection is off then we check, if further
	 * priority adjustment is necessary.
	 */
495
	if (!detect_deadlock && waiter->prio == task->prio)
I
Ingo Molnar 已提交
496 497
		goto out_unlock_pi;

498 499 500
	/*
	 * [4] Get the next lock
	 */
I
Ingo Molnar 已提交
501
	lock = waiter->lock;
502 503 504 505 506
	/*
	 * [5] We need to trylock here as we are holding task->pi_lock,
	 * which is the reverse lock order versus the other rtmutex
	 * operations.
	 */
507
	if (!raw_spin_trylock(&lock->wait_lock)) {
508
		raw_spin_unlock_irqrestore(&task->pi_lock, flags);
I
Ingo Molnar 已提交
509 510 511 512
		cpu_relax();
		goto retry;
	}

513
	/*
514 515 516
	 * [6] check_exit_conditions_2() protected by task->pi_lock and
	 * lock->wait_lock.
	 *
517 518 519 520 521
	 * Deadlock detection. If the lock is the same as the original
	 * lock which caused us to walk the lock chain or if the
	 * current lock is owned by the task which initiated the chain
	 * walk, we detected a deadlock.
	 */
522
	if (lock == orig_lock || rt_mutex_owner(lock) == top_task) {
I
Ingo Molnar 已提交
523
		debug_rt_mutex_deadlock(deadlock_detect, orig_waiter, lock);
524
		raw_spin_unlock(&lock->wait_lock);
525
		ret = -EDEADLK;
I
Ingo Molnar 已提交
526 527 528
		goto out_unlock_pi;
	}

529 530 531 532 533 534
	/*
	 * Store the current top waiter before doing the requeue
	 * operation on @lock. We need it for the boost/deboost
	 * decision below.
	 */
	prerequeue_top_waiter = rt_mutex_top_waiter(lock);
I
Ingo Molnar 已提交
535

536
	/* [7] Requeue the waiter in the lock waiter list. */
537
	rt_mutex_dequeue(lock, waiter);
538
	waiter->prio = task->prio;
539
	rt_mutex_enqueue(lock, waiter);
I
Ingo Molnar 已提交
540

541
	/* [8] Release the task */
542
	raw_spin_unlock_irqrestore(&task->pi_lock, flags);
543 544
	put_task_struct(task);

545
	/*
546 547
	 * [9] check_exit_conditions_3 protected by lock->wait_lock.
	 *
548 549 550 551
	 * We must abort the chain walk if there is no lock owner even
	 * in the dead lock detection case, as we have nothing to
	 * follow here. This is the end of the chain we are walking.
	 */
552 553
	if (!rt_mutex_owner(lock)) {
		/*
554 555 556
		 * If the requeue [7] above changed the top waiter,
		 * then we need to wake the new top waiter up to try
		 * to get the lock.
557
		 */
558
		if (prerequeue_top_waiter != rt_mutex_top_waiter(lock))
559 560
			wake_up_process(rt_mutex_top_waiter(lock)->task);
		raw_spin_unlock(&lock->wait_lock);
561
		return 0;
562
	}
I
Ingo Molnar 已提交
563

564
	/* [10] Grab the next task, i.e. the owner of @lock */
I
Ingo Molnar 已提交
565
	task = rt_mutex_owner(lock);
566
	get_task_struct(task);
567
	raw_spin_lock_irqsave(&task->pi_lock, flags);
I
Ingo Molnar 已提交
568

569
	/* [11] requeue the pi waiters if necessary */
I
Ingo Molnar 已提交
570
	if (waiter == rt_mutex_top_waiter(lock)) {
571 572 573 574 575 576 577
		/*
		 * The waiter became the new top (highest priority)
		 * waiter on the lock. Replace the previous top waiter
		 * in the owner tasks pi waiters list with this waiter
		 * and adjust the priority of the owner.
		 */
		rt_mutex_dequeue_pi(task, prerequeue_top_waiter);
578
		rt_mutex_enqueue_pi(task, waiter);
I
Ingo Molnar 已提交
579 580
		__rt_mutex_adjust_prio(task);

581 582 583 584 585 586 587 588 589 590 591
	} else if (prerequeue_top_waiter == waiter) {
		/*
		 * The waiter was the top waiter on the lock, but is
		 * no longer the top prority waiter. Replace waiter in
		 * the owner tasks pi waiters list with the new top
		 * (highest priority) waiter and adjust the priority
		 * of the owner.
		 * The new top waiter is stored in @waiter so that
		 * @waiter == @top_waiter evaluates to true below and
		 * we continue to deboost the rest of the chain.
		 */
592
		rt_mutex_dequeue_pi(task, waiter);
I
Ingo Molnar 已提交
593
		waiter = rt_mutex_top_waiter(lock);
594
		rt_mutex_enqueue_pi(task, waiter);
I
Ingo Molnar 已提交
595
		__rt_mutex_adjust_prio(task);
596 597 598 599 600
	} else {
		/*
		 * Nothing changed. No need to do any priority
		 * adjustment.
		 */
I
Ingo Molnar 已提交
601 602
	}

603
	/*
604 605 606 607
	 * [12] check_exit_conditions_4() protected by task->pi_lock
	 * and lock->wait_lock. The actual decisions are made after we
	 * dropped the locks.
	 *
608 609 610 611 612 613
	 * Check whether the task which owns the current lock is pi
	 * blocked itself. If yes we store a pointer to the lock for
	 * the lock chain change detection above. After we dropped
	 * task->pi_lock next_lock cannot be dereferenced anymore.
	 */
	next_lock = task_blocked_on_lock(task);
614 615 616 617
	/*
	 * Store the top waiter of @lock for the end of chain walk
	 * decision below.
	 */
I
Ingo Molnar 已提交
618
	top_waiter = rt_mutex_top_waiter(lock);
619 620 621

	/* [13] Drop the locks */
	raw_spin_unlock_irqrestore(&task->pi_lock, flags);
622
	raw_spin_unlock(&lock->wait_lock);
I
Ingo Molnar 已提交
623

624
	/*
625 626 627
	 * Make the actual exit decisions [12], based on the stored
	 * values.
	 *
628 629 630 631 632 633
	 * We reached the end of the lock chain. Stop right here. No
	 * point to go back just to figure that out.
	 */
	if (!next_lock)
		goto out_put_task;

634 635 636 637 638
	/*
	 * If the current waiter is not the top waiter on the lock,
	 * then we can stop the chain walk here if we are not in full
	 * deadlock detection mode.
	 */
I
Ingo Molnar 已提交
639 640 641 642 643 644
	if (!detect_deadlock && waiter != top_waiter)
		goto out_put_task;

	goto again;

 out_unlock_pi:
645
	raw_spin_unlock_irqrestore(&task->pi_lock, flags);
I
Ingo Molnar 已提交
646 647
 out_put_task:
	put_task_struct(task);
648

I
Ingo Molnar 已提交
649 650 651 652 653 654 655
	return ret;
}

/*
 * Try to take an rt-mutex
 *
 * Must be called with lock->wait_lock held.
656
 *
657 658 659 660
 * @lock:   The lock to be acquired.
 * @task:   The task which wants to acquire the lock
 * @waiter: The waiter that is queued to the lock's wait list if the
 *	    callsite called task_blocked_on_lock(), otherwise NULL
I
Ingo Molnar 已提交
661
 */
662
static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
663
				struct rt_mutex_waiter *waiter)
I
Ingo Molnar 已提交
664
{
665 666
	unsigned long flags;

I
Ingo Molnar 已提交
667
	/*
668 669 670 671
	 * Before testing whether we can acquire @lock, we set the
	 * RT_MUTEX_HAS_WAITERS bit in @lock->owner. This forces all
	 * other tasks which try to modify @lock into the slow path
	 * and they serialize on @lock->wait_lock.
I
Ingo Molnar 已提交
672
	 *
673 674
	 * The RT_MUTEX_HAS_WAITERS bit can have a transitional state
	 * as explained at the top of this file if and only if:
I
Ingo Molnar 已提交
675
	 *
676 677 678 679 680 681 682
	 * - There is a lock owner. The caller must fixup the
	 *   transient state if it does a trylock or leaves the lock
	 *   function due to a signal or timeout.
	 *
	 * - @task acquires the lock and there are no other
	 *   waiters. This is undone in rt_mutex_set_owner(@task) at
	 *   the end of this function.
I
Ingo Molnar 已提交
683 684 685
	 */
	mark_rt_mutex_waiters(lock);

686 687 688
	/*
	 * If @lock has an owner, give up.
	 */
689
	if (rt_mutex_owner(lock))
I
Ingo Molnar 已提交
690 691
		return 0;

692
	/*
693 694 695
	 * If @waiter != NULL, @task has already enqueued the waiter
	 * into @lock waiter list. If @waiter == NULL then this is a
	 * trylock attempt.
696
	 */
697 698 699 700 701 702 703
	if (waiter) {
		/*
		 * If waiter is not the highest priority waiter of
		 * @lock, give up.
		 */
		if (waiter != rt_mutex_top_waiter(lock))
			return 0;
704

705 706 707 708 709
		/*
		 * We can acquire the lock. Remove the waiter from the
		 * lock waiters list.
		 */
		rt_mutex_dequeue(lock, waiter);
710

711
	} else {
712
		/*
713 714 715 716 717 718
		 * If the lock has waiters already we check whether @task is
		 * eligible to take over the lock.
		 *
		 * If there are no other waiters, @task can acquire
		 * the lock.  @task->pi_blocked_on is NULL, so it does
		 * not need to be dequeued.
719 720
		 */
		if (rt_mutex_has_waiters(lock)) {
721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741
			/*
			 * If @task->prio is greater than or equal to
			 * the top waiter priority (kernel view),
			 * @task lost.
			 */
			if (task->prio >= rt_mutex_top_waiter(lock)->prio)
				return 0;

			/*
			 * The current top waiter stays enqueued. We
			 * don't have to change anything in the lock
			 * waiters order.
			 */
		} else {
			/*
			 * No waiters. Take the lock without the
			 * pi_lock dance.@task->pi_blocked_on is NULL
			 * and we have no waiters to enqueue in @task
			 * pi waiters list.
			 */
			goto takeit;
742 743 744
		}
	}

745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762
	/*
	 * Clear @task->pi_blocked_on. Requires protection by
	 * @task->pi_lock. Redundant operation for the @waiter == NULL
	 * case, but conditionals are more expensive than a redundant
	 * store.
	 */
	raw_spin_lock_irqsave(&task->pi_lock, flags);
	task->pi_blocked_on = NULL;
	/*
	 * Finish the lock acquisition. @task is the new owner. If
	 * other waiters exist we have to insert the highest priority
	 * waiter into @task->pi_waiters list.
	 */
	if (rt_mutex_has_waiters(lock))
		rt_mutex_enqueue_pi(task, rt_mutex_top_waiter(lock));
	raw_spin_unlock_irqrestore(&task->pi_lock, flags);

takeit:
I
Ingo Molnar 已提交
763
	/* We got the lock. */
764
	debug_rt_mutex_lock(lock);
I
Ingo Molnar 已提交
765

766 767 768 769
	/*
	 * This either preserves the RT_MUTEX_HAS_WAITERS bit if there
	 * are still waiters or clears it.
	 */
770
	rt_mutex_set_owner(lock, task);
I
Ingo Molnar 已提交
771

772
	rt_mutex_deadlock_account_lock(lock, task);
I
Ingo Molnar 已提交
773 774 775 776 777 778 779 780 781 782 783 784 785

	return 1;
}

/*
 * Task blocks on lock.
 *
 * Prepare waiter and propagate pi chain
 *
 * This must be called with lock->wait_lock held.
 */
static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
				   struct rt_mutex_waiter *waiter,
D
Darren Hart 已提交
786
				   struct task_struct *task,
787
				   int detect_deadlock)
I
Ingo Molnar 已提交
788
{
789
	struct task_struct *owner = rt_mutex_owner(lock);
I
Ingo Molnar 已提交
790
	struct rt_mutex_waiter *top_waiter = waiter;
791
	struct rt_mutex *next_lock;
792
	int chain_walk = 0, res;
793
	unsigned long flags;
I
Ingo Molnar 已提交
794

795 796 797 798 799 800 801 802 803
	/*
	 * Early deadlock detection. We really don't want the task to
	 * enqueue on itself just to untangle the mess later. It's not
	 * only an optimization. We drop the locks, so another waiter
	 * can come in before the chain walk detects the deadlock. So
	 * the other will detect the deadlock and return -EDEADLOCK,
	 * which is wrong, as the other waiter is not in a deadlock
	 * situation.
	 */
804
	if (owner == task)
805 806
		return -EDEADLK;

807
	raw_spin_lock_irqsave(&task->pi_lock, flags);
D
Darren Hart 已提交
808 809
	__rt_mutex_adjust_prio(task);
	waiter->task = task;
I
Ingo Molnar 已提交
810
	waiter->lock = lock;
811
	waiter->prio = task->prio;
I
Ingo Molnar 已提交
812 813 814 815

	/* Get the top priority waiter on the lock */
	if (rt_mutex_has_waiters(lock))
		top_waiter = rt_mutex_top_waiter(lock);
816
	rt_mutex_enqueue(lock, waiter);
I
Ingo Molnar 已提交
817

D
Darren Hart 已提交
818
	task->pi_blocked_on = waiter;
I
Ingo Molnar 已提交
819

820
	raw_spin_unlock_irqrestore(&task->pi_lock, flags);
I
Ingo Molnar 已提交
821

822 823 824
	if (!owner)
		return 0;

825
	raw_spin_lock_irqsave(&owner->pi_lock, flags);
I
Ingo Molnar 已提交
826
	if (waiter == rt_mutex_top_waiter(lock)) {
827 828
		rt_mutex_dequeue_pi(owner, top_waiter);
		rt_mutex_enqueue_pi(owner, waiter);
I
Ingo Molnar 已提交
829 830

		__rt_mutex_adjust_prio(owner);
831 832
		if (owner->pi_blocked_on)
			chain_walk = 1;
833
	} else if (debug_rt_mutex_detect_deadlock(waiter, detect_deadlock)) {
834
		chain_walk = 1;
835
	}
836

837 838 839 840 841 842 843 844 845 846
	/* Store the lock on which owner is blocked or NULL */
	next_lock = task_blocked_on_lock(owner);

	raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
	/*
	 * Even if full deadlock detection is on, if the owner is not
	 * blocked itself, we can avoid finding this out in the chain
	 * walk.
	 */
	if (!chain_walk || !next_lock)
I
Ingo Molnar 已提交
847 848
		return 0;

849 850 851 852 853 854 855
	/*
	 * The owner can't disappear while holding a lock,
	 * so the owner struct is protected by wait_lock.
	 * Gets dropped in rt_mutex_adjust_prio_chain()!
	 */
	get_task_struct(owner);

856
	raw_spin_unlock(&lock->wait_lock);
I
Ingo Molnar 已提交
857

858 859
	res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock,
					 next_lock, waiter, task);
I
Ingo Molnar 已提交
860

861
	raw_spin_lock(&lock->wait_lock);
I
Ingo Molnar 已提交
862 863 864 865 866 867 868

	return res;
}

/*
 * Wake up the next waiter on the lock.
 *
T
Thomas Gleixner 已提交
869 870
 * Remove the top waiter from the current tasks pi waiter list and
 * wake it up.
I
Ingo Molnar 已提交
871 872 873 874 875 876 877 878
 *
 * Called with lock->wait_lock held.
 */
static void wakeup_next_waiter(struct rt_mutex *lock)
{
	struct rt_mutex_waiter *waiter;
	unsigned long flags;

879
	raw_spin_lock_irqsave(&current->pi_lock, flags);
I
Ingo Molnar 已提交
880 881 882 883 884 885 886 887 888

	waiter = rt_mutex_top_waiter(lock);

	/*
	 * Remove it from current->pi_waiters. We do not adjust a
	 * possible priority boost right now. We execute wakeup in the
	 * boosted mode and go back to normal after releasing
	 * lock->wait_lock.
	 */
889
	rt_mutex_dequeue_pi(current, waiter);
I
Ingo Molnar 已提交
890

T
Thomas Gleixner 已提交
891 892 893 894 895 896 897 898 899
	/*
	 * As we are waking up the top waiter, and the waiter stays
	 * queued on the lock until it gets the lock, this lock
	 * obviously has waiters. Just set the bit here and this has
	 * the added benefit of forcing all new tasks into the
	 * slow path making sure no task of lower priority than
	 * the top waiter can steal this lock.
	 */
	lock->owner = (void *) RT_MUTEX_HAS_WAITERS;
I
Ingo Molnar 已提交
900

901
	raw_spin_unlock_irqrestore(&current->pi_lock, flags);
I
Ingo Molnar 已提交
902

T
Thomas Gleixner 已提交
903 904 905 906 907
	/*
	 * It's safe to dereference waiter as it cannot go away as
	 * long as we hold lock->wait_lock. The waiter task needs to
	 * acquire it in order to dequeue the waiter.
	 */
908
	wake_up_process(waiter->task);
I
Ingo Molnar 已提交
909 910 911
}

/*
912
 * Remove a waiter from a lock and give up
I
Ingo Molnar 已提交
913
 *
914 915
 * Must be called with lock->wait_lock held and
 * have just failed to try_to_take_rt_mutex().
I
Ingo Molnar 已提交
916
 */
917 918
static void remove_waiter(struct rt_mutex *lock,
			  struct rt_mutex_waiter *waiter)
I
Ingo Molnar 已提交
919
{
920
	bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock));
921
	struct task_struct *owner = rt_mutex_owner(lock);
922
	struct rt_mutex *next_lock;
I
Ingo Molnar 已提交
923 924
	unsigned long flags;

925
	raw_spin_lock_irqsave(&current->pi_lock, flags);
926
	rt_mutex_dequeue(lock, waiter);
I
Ingo Molnar 已提交
927
	current->pi_blocked_on = NULL;
928
	raw_spin_unlock_irqrestore(&current->pi_lock, flags);
I
Ingo Molnar 已提交
929

930 931 932 933 934
	/*
	 * Only update priority if the waiter was the highest priority
	 * waiter of the lock and there is an owner to update.
	 */
	if (!owner || !is_top_waiter)
935 936
		return;

937
	raw_spin_lock_irqsave(&owner->pi_lock, flags);
I
Ingo Molnar 已提交
938

939
	rt_mutex_dequeue_pi(owner, waiter);
I
Ingo Molnar 已提交
940

941 942
	if (rt_mutex_has_waiters(lock))
		rt_mutex_enqueue_pi(owner, rt_mutex_top_waiter(lock));
I
Ingo Molnar 已提交
943

944
	__rt_mutex_adjust_prio(owner);
I
Ingo Molnar 已提交
945

946 947
	/* Store the lock on which owner is blocked or NULL */
	next_lock = task_blocked_on_lock(owner);
948

949
	raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
I
Ingo Molnar 已提交
950

951 952 953 954
	/*
	 * Don't walk the chain, if the owner task is not blocked
	 * itself.
	 */
955
	if (!next_lock)
I
Ingo Molnar 已提交
956 957
		return;

958 959 960
	/* gets dropped in rt_mutex_adjust_prio_chain()! */
	get_task_struct(owner);

961
	raw_spin_unlock(&lock->wait_lock);
I
Ingo Molnar 已提交
962

963
	rt_mutex_adjust_prio_chain(owner, 0, lock, next_lock, NULL, current);
I
Ingo Molnar 已提交
964

965
	raw_spin_lock(&lock->wait_lock);
I
Ingo Molnar 已提交
966 967
}

968 969 970 971 972 973 974 975
/*
 * Recheck the pi chain, in case we got a priority setting
 *
 * Called from sched_setscheduler
 */
void rt_mutex_adjust_pi(struct task_struct *task)
{
	struct rt_mutex_waiter *waiter;
976
	struct rt_mutex *next_lock;
977 978
	unsigned long flags;

979
	raw_spin_lock_irqsave(&task->pi_lock, flags);
980 981

	waiter = task->pi_blocked_on;
982 983
	if (!waiter || (waiter->prio == task->prio &&
			!dl_prio(task->prio))) {
984
		raw_spin_unlock_irqrestore(&task->pi_lock, flags);
985 986
		return;
	}
987
	next_lock = waiter->lock;
988
	raw_spin_unlock_irqrestore(&task->pi_lock, flags);
989

990 991
	/* gets dropped in rt_mutex_adjust_prio_chain()! */
	get_task_struct(task);
992 993

	rt_mutex_adjust_prio_chain(task, 0, NULL, next_lock, NULL, task);
994 995
}

D
Darren Hart 已提交
996 997 998 999 1000 1001 1002 1003 1004
/**
 * __rt_mutex_slowlock() - Perform the wait-wake-try-to-take loop
 * @lock:		 the rt_mutex to take
 * @state:		 the state the task should block in (TASK_INTERRUPTIBLE
 * 			 or TASK_UNINTERRUPTIBLE)
 * @timeout:		 the pre-initialized and started timer, or NULL for none
 * @waiter:		 the pre-initialized rt_mutex_waiter
 *
 * lock->wait_lock must be held by the caller.
I
Ingo Molnar 已提交
1005 1006
 */
static int __sched
D
Darren Hart 已提交
1007 1008
__rt_mutex_slowlock(struct rt_mutex *lock, int state,
		    struct hrtimer_sleeper *timeout,
1009
		    struct rt_mutex_waiter *waiter)
I
Ingo Molnar 已提交
1010 1011 1012 1013 1014
{
	int ret = 0;

	for (;;) {
		/* Try to acquire the lock: */
1015
		if (try_to_take_rt_mutex(lock, current, waiter))
I
Ingo Molnar 已提交
1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031
			break;

		/*
		 * TASK_INTERRUPTIBLE checks for signals and
		 * timeout. Ignored otherwise.
		 */
		if (unlikely(state == TASK_INTERRUPTIBLE)) {
			/* Signal pending? */
			if (signal_pending(current))
				ret = -EINTR;
			if (timeout && !timeout->task)
				ret = -ETIMEDOUT;
			if (ret)
				break;
		}

1032
		raw_spin_unlock(&lock->wait_lock);
I
Ingo Molnar 已提交
1033

D
Darren Hart 已提交
1034
		debug_rt_mutex_print_deadlock(waiter);
I
Ingo Molnar 已提交
1035

1036
		schedule_rt_mutex(lock);
I
Ingo Molnar 已提交
1037

1038
		raw_spin_lock(&lock->wait_lock);
I
Ingo Molnar 已提交
1039 1040 1041
		set_current_state(state);
	}

D
Darren Hart 已提交
1042 1043 1044
	return ret;
}

1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064
static void rt_mutex_handle_deadlock(int res, int detect_deadlock,
				     struct rt_mutex_waiter *w)
{
	/*
	 * If the result is not -EDEADLOCK or the caller requested
	 * deadlock detection, nothing to do here.
	 */
	if (res != -EDEADLOCK || detect_deadlock)
		return;

	/*
	 * Yell lowdly and stop the task right here.
	 */
	rt_mutex_print_deadlock(w);
	while (1) {
		set_current_state(TASK_INTERRUPTIBLE);
		schedule();
	}
}

D
Darren Hart 已提交
1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076
/*
 * Slow path lock function:
 */
static int __sched
rt_mutex_slowlock(struct rt_mutex *lock, int state,
		  struct hrtimer_sleeper *timeout,
		  int detect_deadlock)
{
	struct rt_mutex_waiter waiter;
	int ret = 0;

	debug_rt_mutex_init_waiter(&waiter);
1077 1078
	RB_CLEAR_NODE(&waiter.pi_tree_entry);
	RB_CLEAR_NODE(&waiter.tree_entry);
D
Darren Hart 已提交
1079

1080
	raw_spin_lock(&lock->wait_lock);
D
Darren Hart 已提交
1081 1082

	/* Try to acquire the lock again: */
1083
	if (try_to_take_rt_mutex(lock, current, NULL)) {
1084
		raw_spin_unlock(&lock->wait_lock);
D
Darren Hart 已提交
1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096
		return 0;
	}

	set_current_state(state);

	/* Setup the timer, when timeout != NULL */
	if (unlikely(timeout)) {
		hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS);
		if (!hrtimer_active(&timeout->timer))
			timeout->task = NULL;
	}

1097 1098 1099 1100
	ret = task_blocks_on_rt_mutex(lock, &waiter, current, detect_deadlock);

	if (likely(!ret))
		ret = __rt_mutex_slowlock(lock, state, timeout, &waiter);
D
Darren Hart 已提交
1101

I
Ingo Molnar 已提交
1102 1103
	set_current_state(TASK_RUNNING);

1104
	if (unlikely(ret)) {
1105
		remove_waiter(lock, &waiter);
1106 1107
		rt_mutex_handle_deadlock(ret, detect_deadlock, &waiter);
	}
I
Ingo Molnar 已提交
1108 1109 1110 1111 1112 1113 1114

	/*
	 * try_to_take_rt_mutex() sets the waiter bit
	 * unconditionally. We might have to fix that up.
	 */
	fixup_rt_mutex_waiters(lock);

1115
	raw_spin_unlock(&lock->wait_lock);
I
Ingo Molnar 已提交
1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128

	/* Remove pending timer: */
	if (unlikely(timeout))
		hrtimer_cancel(&timeout->timer);

	debug_rt_mutex_free_waiter(&waiter);

	return ret;
}

/*
 * Slow path try-lock function:
 */
1129
static inline int rt_mutex_slowtrylock(struct rt_mutex *lock)
I
Ingo Molnar 已提交
1130
{
1131 1132 1133 1134 1135 1136 1137 1138 1139
	int ret;

	/*
	 * If the lock already has an owner we fail to get the lock.
	 * This can be done without taking the @lock->wait_lock as
	 * it is only being read, and this is a trylock anyway.
	 */
	if (rt_mutex_owner(lock))
		return 0;
I
Ingo Molnar 已提交
1140

1141 1142 1143 1144
	/*
	 * The mutex has currently no owner. Lock the wait lock and
	 * try to acquire the lock.
	 */
1145
	raw_spin_lock(&lock->wait_lock);
I
Ingo Molnar 已提交
1146

1147
	ret = try_to_take_rt_mutex(lock, current, NULL);
I
Ingo Molnar 已提交
1148

1149 1150 1151 1152 1153
	/*
	 * try_to_take_rt_mutex() sets the lock waiters bit
	 * unconditionally. Clean this up.
	 */
	fixup_rt_mutex_waiters(lock);
I
Ingo Molnar 已提交
1154

1155
	raw_spin_unlock(&lock->wait_lock);
I
Ingo Molnar 已提交
1156 1157 1158 1159 1160 1161 1162 1163 1164 1165

	return ret;
}

/*
 * Slow path to release a rt-mutex:
 */
static void __sched
rt_mutex_slowunlock(struct rt_mutex *lock)
{
1166
	raw_spin_lock(&lock->wait_lock);
I
Ingo Molnar 已提交
1167 1168 1169 1170 1171

	debug_rt_mutex_unlock(lock);

	rt_mutex_deadlock_account_unlock(current);

T
Thomas Gleixner 已提交
1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208
	/*
	 * We must be careful here if the fast path is enabled. If we
	 * have no waiters queued we cannot set owner to NULL here
	 * because of:
	 *
	 * foo->lock->owner = NULL;
	 *			rtmutex_lock(foo->lock);   <- fast path
	 *			free = atomic_dec_and_test(foo->refcnt);
	 *			rtmutex_unlock(foo->lock); <- fast path
	 *			if (free)
	 *				kfree(foo);
	 * raw_spin_unlock(foo->lock->wait_lock);
	 *
	 * So for the fastpath enabled kernel:
	 *
	 * Nothing can set the waiters bit as long as we hold
	 * lock->wait_lock. So we do the following sequence:
	 *
	 *	owner = rt_mutex_owner(lock);
	 *	clear_rt_mutex_waiters(lock);
	 *	raw_spin_unlock(&lock->wait_lock);
	 *	if (cmpxchg(&lock->owner, owner, 0) == owner)
	 *		return;
	 *	goto retry;
	 *
	 * The fastpath disabled variant is simple as all access to
	 * lock->owner is serialized by lock->wait_lock:
	 *
	 *	lock->owner = NULL;
	 *	raw_spin_unlock(&lock->wait_lock);
	 */
	while (!rt_mutex_has_waiters(lock)) {
		/* Drops lock->wait_lock ! */
		if (unlock_rt_mutex_safe(lock) == true)
			return;
		/* Relock the rtmutex and try again */
		raw_spin_lock(&lock->wait_lock);
I
Ingo Molnar 已提交
1209 1210
	}

T
Thomas Gleixner 已提交
1211 1212 1213 1214
	/*
	 * The wakeup next waiter path does not suffer from the above
	 * race. See the comments there.
	 */
I
Ingo Molnar 已提交
1215 1216
	wakeup_next_waiter(lock);

1217
	raw_spin_unlock(&lock->wait_lock);
I
Ingo Molnar 已提交
1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233

	/* Undo pi boosting if necessary: */
	rt_mutex_adjust_prio(current);
}

/*
 * debug aware fast / slowpath lock,trylock,unlock
 *
 * The atomic acquire/release ops are compiled away, when either the
 * architecture does not support cmpxchg or when debugging is enabled.
 */
static inline int
rt_mutex_fastlock(struct rt_mutex *lock, int state,
		  int detect_deadlock,
		  int (*slowfn)(struct rt_mutex *lock, int state,
				struct hrtimer_sleeper *timeout,
1234
				int detect_deadlock))
I
Ingo Molnar 已提交
1235 1236 1237 1238 1239
{
	if (!detect_deadlock && likely(rt_mutex_cmpxchg(lock, NULL, current))) {
		rt_mutex_deadlock_account_lock(lock, current);
		return 0;
	} else
1240
		return slowfn(lock, state, NULL, detect_deadlock);
I
Ingo Molnar 已提交
1241 1242 1243 1244 1245 1246 1247
}

static inline int
rt_mutex_timed_fastlock(struct rt_mutex *lock, int state,
			struct hrtimer_sleeper *timeout, int detect_deadlock,
			int (*slowfn)(struct rt_mutex *lock, int state,
				      struct hrtimer_sleeper *timeout,
1248
				      int detect_deadlock))
I
Ingo Molnar 已提交
1249 1250 1251 1252 1253
{
	if (!detect_deadlock && likely(rt_mutex_cmpxchg(lock, NULL, current))) {
		rt_mutex_deadlock_account_lock(lock, current);
		return 0;
	} else
1254
		return slowfn(lock, state, timeout, detect_deadlock);
I
Ingo Molnar 已提交
1255 1256 1257 1258
}

static inline int
rt_mutex_fasttrylock(struct rt_mutex *lock,
1259
		     int (*slowfn)(struct rt_mutex *lock))
I
Ingo Molnar 已提交
1260 1261 1262 1263 1264
{
	if (likely(rt_mutex_cmpxchg(lock, NULL, current))) {
		rt_mutex_deadlock_account_lock(lock, current);
		return 1;
	}
1265
	return slowfn(lock);
I
Ingo Molnar 已提交
1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312
}

static inline void
rt_mutex_fastunlock(struct rt_mutex *lock,
		    void (*slowfn)(struct rt_mutex *lock))
{
	if (likely(rt_mutex_cmpxchg(lock, current, NULL)))
		rt_mutex_deadlock_account_unlock(current);
	else
		slowfn(lock);
}

/**
 * rt_mutex_lock - lock a rt_mutex
 *
 * @lock: the rt_mutex to be locked
 */
void __sched rt_mutex_lock(struct rt_mutex *lock)
{
	might_sleep();

	rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, 0, rt_mutex_slowlock);
}
EXPORT_SYMBOL_GPL(rt_mutex_lock);

/**
 * rt_mutex_lock_interruptible - lock a rt_mutex interruptible
 *
 * @lock: 		the rt_mutex to be locked
 * @detect_deadlock:	deadlock detection on/off
 *
 * Returns:
 *  0 		on success
 * -EINTR 	when interrupted by a signal
 * -EDEADLK	when the lock would deadlock (when deadlock detection is on)
 */
int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock,
						 int detect_deadlock)
{
	might_sleep();

	return rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE,
				 detect_deadlock, rt_mutex_slowlock);
}
EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible);

/**
1313 1314 1315
 * rt_mutex_timed_lock - lock a rt_mutex interruptible
 *			the timeout structure is provided
 *			by the caller
I
Ingo Molnar 已提交
1316 1317 1318 1319 1320 1321 1322 1323
 *
 * @lock: 		the rt_mutex to be locked
 * @timeout:		timeout structure or NULL (no timeout)
 * @detect_deadlock:	deadlock detection on/off
 *
 * Returns:
 *  0 		on success
 * -EINTR 	when interrupted by a signal
1324
 * -ETIMEDOUT	when the timeout expired
I
Ingo Molnar 已提交
1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361
 * -EDEADLK	when the lock would deadlock (when deadlock detection is on)
 */
int
rt_mutex_timed_lock(struct rt_mutex *lock, struct hrtimer_sleeper *timeout,
		    int detect_deadlock)
{
	might_sleep();

	return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout,
				       detect_deadlock, rt_mutex_slowlock);
}
EXPORT_SYMBOL_GPL(rt_mutex_timed_lock);

/**
 * rt_mutex_trylock - try to lock a rt_mutex
 *
 * @lock:	the rt_mutex to be locked
 *
 * Returns 1 on success and 0 on contention
 */
int __sched rt_mutex_trylock(struct rt_mutex *lock)
{
	return rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock);
}
EXPORT_SYMBOL_GPL(rt_mutex_trylock);

/**
 * rt_mutex_unlock - unlock a rt_mutex
 *
 * @lock: the rt_mutex to be unlocked
 */
void __sched rt_mutex_unlock(struct rt_mutex *lock)
{
	rt_mutex_fastunlock(lock, rt_mutex_slowunlock);
}
EXPORT_SYMBOL_GPL(rt_mutex_unlock);

1362
/**
I
Ingo Molnar 已提交
1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391
 * rt_mutex_destroy - mark a mutex unusable
 * @lock: the mutex to be destroyed
 *
 * This function marks the mutex uninitialized, and any subsequent
 * use of the mutex is forbidden. The mutex must not be locked when
 * this function is called.
 */
void rt_mutex_destroy(struct rt_mutex *lock)
{
	WARN_ON(rt_mutex_is_locked(lock));
#ifdef CONFIG_DEBUG_RT_MUTEXES
	lock->magic = NULL;
#endif
}

EXPORT_SYMBOL_GPL(rt_mutex_destroy);

/**
 * __rt_mutex_init - initialize the rt lock
 *
 * @lock: the rt lock to be initialized
 *
 * Initialize the rt lock to unlocked state.
 *
 * Initializing of a locked rt lock is not allowed
 */
void __rt_mutex_init(struct rt_mutex *lock, const char *name)
{
	lock->owner = NULL;
1392
	raw_spin_lock_init(&lock->wait_lock);
1393 1394
	lock->waiters = RB_ROOT;
	lock->waiters_leftmost = NULL;
I
Ingo Molnar 已提交
1395 1396 1397 1398

	debug_rt_mutex_init(lock, name);
}
EXPORT_SYMBOL_GPL(__rt_mutex_init);
1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413

/**
 * rt_mutex_init_proxy_locked - initialize and lock a rt_mutex on behalf of a
 *				proxy owner
 *
 * @lock: 	the rt_mutex to be locked
 * @proxy_owner:the task to set as owner
 *
 * No locking. Caller has to do serializing itself
 * Special API call for PI-futex support
 */
void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
				struct task_struct *proxy_owner)
{
	__rt_mutex_init(lock, NULL);
1414
	debug_rt_mutex_proxy_lock(lock, proxy_owner);
1415
	rt_mutex_set_owner(lock, proxy_owner);
1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430
	rt_mutex_deadlock_account_lock(lock, proxy_owner);
}

/**
 * rt_mutex_proxy_unlock - release a lock on behalf of owner
 *
 * @lock: 	the rt_mutex to be locked
 *
 * No locking. Caller has to do serializing itself
 * Special API call for PI-futex support
 */
void rt_mutex_proxy_unlock(struct rt_mutex *lock,
			   struct task_struct *proxy_owner)
{
	debug_rt_mutex_proxy_unlock(lock);
1431
	rt_mutex_set_owner(lock, NULL);
1432 1433 1434
	rt_mutex_deadlock_account_unlock(proxy_owner);
}

D
Darren Hart 已提交
1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454
/**
 * rt_mutex_start_proxy_lock() - Start lock acquisition for another task
 * @lock:		the rt_mutex to take
 * @waiter:		the pre-initialized rt_mutex_waiter
 * @task:		the task to prepare
 * @detect_deadlock:	perform deadlock detection (1) or not (0)
 *
 * Returns:
 *  0 - task blocked on lock
 *  1 - acquired the lock for task, caller should wake it up
 * <0 - error
 *
 * Special API call for FUTEX_REQUEUE_PI support.
 */
int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
			      struct rt_mutex_waiter *waiter,
			      struct task_struct *task, int detect_deadlock)
{
	int ret;

1455
	raw_spin_lock(&lock->wait_lock);
D
Darren Hart 已提交
1456

1457
	if (try_to_take_rt_mutex(lock, task, NULL)) {
1458
		raw_spin_unlock(&lock->wait_lock);
D
Darren Hart 已提交
1459 1460 1461
		return 1;
	}

1462 1463
	/* We enforce deadlock detection for futexes */
	ret = task_blocks_on_rt_mutex(lock, waiter, task, 1);
D
Darren Hart 已提交
1464

1465
	if (ret && !rt_mutex_owner(lock)) {
D
Darren Hart 已提交
1466 1467 1468 1469 1470 1471 1472 1473
		/*
		 * Reset the return value. We might have
		 * returned with -EDEADLK and the owner
		 * released the lock while we were walking the
		 * pi chain.  Let the waiter sort it out.
		 */
		ret = 0;
	}
1474 1475 1476 1477

	if (unlikely(ret))
		remove_waiter(lock, waiter);

1478
	raw_spin_unlock(&lock->wait_lock);
D
Darren Hart 已提交
1479 1480 1481 1482 1483 1484

	debug_rt_mutex_print_deadlock(waiter);

	return ret;
}

1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503
/**
 * rt_mutex_next_owner - return the next owner of the lock
 *
 * @lock: the rt lock query
 *
 * Returns the next owner of the lock or NULL
 *
 * Caller has to serialize against other accessors to the lock
 * itself.
 *
 * Special API call for PI-futex support
 */
struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock)
{
	if (!rt_mutex_has_waiters(lock))
		return NULL;

	return rt_mutex_top_waiter(lock)->task;
}
D
Darren Hart 已提交
1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527

/**
 * rt_mutex_finish_proxy_lock() - Complete lock acquisition
 * @lock:		the rt_mutex we were woken on
 * @to:			the timeout, null if none. hrtimer should already have
 * 			been started.
 * @waiter:		the pre-initialized rt_mutex_waiter
 * @detect_deadlock:	perform deadlock detection (1) or not (0)
 *
 * Complete the lock acquisition started our behalf by another thread.
 *
 * Returns:
 *  0 - success
 * <0 - error, one of -EINTR, -ETIMEDOUT, or -EDEADLK
 *
 * Special API call for PI-futex requeue support
 */
int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
			       struct hrtimer_sleeper *to,
			       struct rt_mutex_waiter *waiter,
			       int detect_deadlock)
{
	int ret;

1528
	raw_spin_lock(&lock->wait_lock);
D
Darren Hart 已提交
1529 1530 1531

	set_current_state(TASK_INTERRUPTIBLE);

1532
	ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter);
D
Darren Hart 已提交
1533 1534 1535

	set_current_state(TASK_RUNNING);

1536
	if (unlikely(ret))
D
Darren Hart 已提交
1537 1538 1539 1540 1541 1542 1543 1544
		remove_waiter(lock, waiter);

	/*
	 * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might
	 * have to fix that up.
	 */
	fixup_rt_mutex_waiters(lock);

1545
	raw_spin_unlock(&lock->wait_lock);
D
Darren Hart 已提交
1546 1547 1548

	return ret;
}