rtmutex.c 36.3 KB
Newer Older
I
Ingo Molnar 已提交
1 2 3 4 5 6 7 8 9
/*
 * RT-Mutexes: simple blocking mutual exclusion locks with PI support
 *
 * started by Ingo Molnar and Thomas Gleixner.
 *
 *  Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
 *  Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
 *  Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt
 *  Copyright (C) 2006 Esben Nielsen
10 11
 *
 *  See Documentation/rt-mutex-design.txt for details.
I
Ingo Molnar 已提交
12 13
 */
#include <linux/spinlock.h>
14
#include <linux/export.h>
I
Ingo Molnar 已提交
15
#include <linux/sched.h>
16
#include <linux/sched/rt.h>
17
#include <linux/sched/deadline.h>
I
Ingo Molnar 已提交
18 19 20 21 22 23 24
#include <linux/timer.h>

#include "rtmutex_common.h"

/*
 * lock->owner state tracking:
 *
25 26
 * lock->owner holds the task_struct pointer of the owner. Bit 0
 * is used to keep track of the "lock has waiters" state.
I
Ingo Molnar 已提交
27
 *
28 29 30 31 32 33
 * owner	bit0
 * NULL		0	lock is free (fast acquire possible)
 * NULL		1	lock is free and has waiters and the top waiter
 *				is going to take the lock*
 * taskpointer	0	lock is held (fast release possible)
 * taskpointer	1	lock is held and has waiters**
I
Ingo Molnar 已提交
34 35
 *
 * The fast atomic compare exchange based acquire and release is only
36 37 38 39 40 41
 * possible when bit 0 of lock->owner is 0.
 *
 * (*) It also can be a transitional state when grabbing the lock
 * with ->wait_lock is held. To prevent any fast path cmpxchg to the lock,
 * we need to set the bit0 before looking at the lock, and the owner may be
 * NULL in this small time, hence this can be a transitional state.
I
Ingo Molnar 已提交
42
 *
43 44 45 46
 * (**) There is a small time when bit 0 is set but there are no
 * waiters. This can happen when grabbing the lock in the slow path.
 * To prevent a cmpxchg of the owner releasing the lock, we need to
 * set this bit before looking at the lock.
I
Ingo Molnar 已提交
47 48
 */

49
static void
50
rt_mutex_set_owner(struct rt_mutex *lock, struct task_struct *owner)
I
Ingo Molnar 已提交
51
{
52
	unsigned long val = (unsigned long)owner;
I
Ingo Molnar 已提交
53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71

	if (rt_mutex_has_waiters(lock))
		val |= RT_MUTEX_HAS_WAITERS;

	lock->owner = (struct task_struct *)val;
}

static inline void clear_rt_mutex_waiters(struct rt_mutex *lock)
{
	lock->owner = (struct task_struct *)
			((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS);
}

static void fixup_rt_mutex_waiters(struct rt_mutex *lock)
{
	if (!rt_mutex_has_waiters(lock))
		clear_rt_mutex_waiters(lock);
}

72 73 74 75 76 77 78 79 80 81 82 83 84 85
/*
 * We can speed up the acquire/release, if the architecture
 * supports cmpxchg and if there's no debugging state to be set up
 */
#if defined(__HAVE_ARCH_CMPXCHG) && !defined(CONFIG_DEBUG_RT_MUTEXES)
# define rt_mutex_cmpxchg(l,c,n)	(cmpxchg(&l->owner, c, n) == c)
static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
{
	unsigned long owner, *p = (unsigned long *) &lock->owner;

	do {
		owner = *p;
	} while (cmpxchg(p, owner, owner | RT_MUTEX_HAS_WAITERS) != owner);
}
T
Thomas Gleixner 已提交
86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126

/*
 * Safe fastpath aware unlock:
 * 1) Clear the waiters bit
 * 2) Drop lock->wait_lock
 * 3) Try to unlock the lock with cmpxchg
 */
static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock)
	__releases(lock->wait_lock)
{
	struct task_struct *owner = rt_mutex_owner(lock);

	clear_rt_mutex_waiters(lock);
	raw_spin_unlock(&lock->wait_lock);
	/*
	 * If a new waiter comes in between the unlock and the cmpxchg
	 * we have two situations:
	 *
	 * unlock(wait_lock);
	 *					lock(wait_lock);
	 * cmpxchg(p, owner, 0) == owner
	 *					mark_rt_mutex_waiters(lock);
	 *					acquire(lock);
	 * or:
	 *
	 * unlock(wait_lock);
	 *					lock(wait_lock);
	 *					mark_rt_mutex_waiters(lock);
	 *
	 * cmpxchg(p, owner, 0) != owner
	 *					enqueue_waiter();
	 *					unlock(wait_lock);
	 * lock(wait_lock);
	 * wake waiter();
	 * unlock(wait_lock);
	 *					lock(wait_lock);
	 *					acquire(lock);
	 */
	return rt_mutex_cmpxchg(lock, owner, NULL);
}

127 128 129 130 131 132 133
#else
# define rt_mutex_cmpxchg(l,c,n)	(0)
static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
{
	lock->owner = (struct task_struct *)
			((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS);
}
T
Thomas Gleixner 已提交
134 135 136 137 138 139 140 141 142 143 144

/*
 * Simple slow path only version: lock->owner is protected by lock->wait_lock.
 */
static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock)
	__releases(lock->wait_lock)
{
	lock->owner = NULL;
	raw_spin_unlock(&lock->wait_lock);
	return true;
}
145 146
#endif

147 148 149 150
static inline int
rt_mutex_waiter_less(struct rt_mutex_waiter *left,
		     struct rt_mutex_waiter *right)
{
151
	if (left->prio < right->prio)
152 153 154
		return 1;

	/*
155 156 157 158
	 * If both waiters have dl_prio(), we check the deadlines of the
	 * associated tasks.
	 * If left waiter has a dl_prio(), and we didn't return 1 above,
	 * then right waiter has a dl_prio() too.
159
	 */
160
	if (dl_prio(left->prio))
161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243
		return (left->task->dl.deadline < right->task->dl.deadline);

	return 0;
}

static void
rt_mutex_enqueue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter)
{
	struct rb_node **link = &lock->waiters.rb_node;
	struct rb_node *parent = NULL;
	struct rt_mutex_waiter *entry;
	int leftmost = 1;

	while (*link) {
		parent = *link;
		entry = rb_entry(parent, struct rt_mutex_waiter, tree_entry);
		if (rt_mutex_waiter_less(waiter, entry)) {
			link = &parent->rb_left;
		} else {
			link = &parent->rb_right;
			leftmost = 0;
		}
	}

	if (leftmost)
		lock->waiters_leftmost = &waiter->tree_entry;

	rb_link_node(&waiter->tree_entry, parent, link);
	rb_insert_color(&waiter->tree_entry, &lock->waiters);
}

static void
rt_mutex_dequeue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter)
{
	if (RB_EMPTY_NODE(&waiter->tree_entry))
		return;

	if (lock->waiters_leftmost == &waiter->tree_entry)
		lock->waiters_leftmost = rb_next(&waiter->tree_entry);

	rb_erase(&waiter->tree_entry, &lock->waiters);
	RB_CLEAR_NODE(&waiter->tree_entry);
}

static void
rt_mutex_enqueue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)
{
	struct rb_node **link = &task->pi_waiters.rb_node;
	struct rb_node *parent = NULL;
	struct rt_mutex_waiter *entry;
	int leftmost = 1;

	while (*link) {
		parent = *link;
		entry = rb_entry(parent, struct rt_mutex_waiter, pi_tree_entry);
		if (rt_mutex_waiter_less(waiter, entry)) {
			link = &parent->rb_left;
		} else {
			link = &parent->rb_right;
			leftmost = 0;
		}
	}

	if (leftmost)
		task->pi_waiters_leftmost = &waiter->pi_tree_entry;

	rb_link_node(&waiter->pi_tree_entry, parent, link);
	rb_insert_color(&waiter->pi_tree_entry, &task->pi_waiters);
}

static void
rt_mutex_dequeue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)
{
	if (RB_EMPTY_NODE(&waiter->pi_tree_entry))
		return;

	if (task->pi_waiters_leftmost == &waiter->pi_tree_entry)
		task->pi_waiters_leftmost = rb_next(&waiter->pi_tree_entry);

	rb_erase(&waiter->pi_tree_entry, &task->pi_waiters);
	RB_CLEAR_NODE(&waiter->pi_tree_entry);
}

I
Ingo Molnar 已提交
244
/*
245
 * Calculate task priority from the waiter tree priority
I
Ingo Molnar 已提交
246
 *
247
 * Return task->normal_prio when the waiter tree is empty or when
I
Ingo Molnar 已提交
248 249 250 251 252 253 254
 * the waiter is not allowed to do priority boosting
 */
int rt_mutex_getprio(struct task_struct *task)
{
	if (likely(!task_has_pi_waiters(task)))
		return task->normal_prio;

255
	return min(task_top_pi_waiter(task)->prio,
I
Ingo Molnar 已提交
256 257 258
		   task->normal_prio);
}

259 260 261 262 263 264 265 266
struct task_struct *rt_mutex_get_top_task(struct task_struct *task)
{
	if (likely(!task_has_pi_waiters(task)))
		return NULL;

	return task_top_pi_waiter(task)->task;
}

267 268 269 270 271 272 273 274 275 276 277 278
/*
 * Called by sched_setscheduler() to check whether the priority change
 * is overruled by a possible priority boosting.
 */
int rt_mutex_check_prio(struct task_struct *task, int newprio)
{
	if (!task_has_pi_waiters(task))
		return 0;

	return task_top_pi_waiter(task)->task->prio <= newprio;
}

I
Ingo Molnar 已提交
279 280 281 282 283
/*
 * Adjust the priority of a task, after its pi_waiters got modified.
 *
 * This can be both boosting and unboosting. task->pi_lock must be held.
 */
284
static void __rt_mutex_adjust_prio(struct task_struct *task)
I
Ingo Molnar 已提交
285 286 287
{
	int prio = rt_mutex_getprio(task);

288
	if (task->prio != prio || dl_prio(prio))
I
Ingo Molnar 已提交
289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304
		rt_mutex_setprio(task, prio);
}

/*
 * Adjust task priority (undo boosting). Called from the exit path of
 * rt_mutex_slowunlock() and rt_mutex_slowlock().
 *
 * (Note: We do this outside of the protection of lock->wait_lock to
 * allow the lock to be taken while or before we readjust the priority
 * of task. We do not use the spin_xx_mutex() variants here as we are
 * outside of the debug path.)
 */
static void rt_mutex_adjust_prio(struct task_struct *task)
{
	unsigned long flags;

305
	raw_spin_lock_irqsave(&task->pi_lock, flags);
I
Ingo Molnar 已提交
306
	__rt_mutex_adjust_prio(task);
307
	raw_spin_unlock_irqrestore(&task->pi_lock, flags);
I
Ingo Molnar 已提交
308 309 310 311 312 313 314
}

/*
 * Max number of times we'll walk the boosting chain:
 */
int max_lock_depth = 1024;

315 316 317 318 319
static inline struct rt_mutex *task_blocked_on_lock(struct task_struct *p)
{
	return p->pi_blocked_on ? p->pi_blocked_on->lock : NULL;
}

I
Ingo Molnar 已提交
320 321 322
/*
 * Adjust the priority chain. Also used for deadlock detection.
 * Decreases task's usage by one - may thus free the task.
323
 *
324 325
 * @task:	the task owning the mutex (owner) for which a chain walk is
 *		probably needed
326
 * @deadlock_detect: do we have to carry out deadlock detection?
327 328 329 330 331 332
 * @orig_lock:	the mutex (can be NULL if we are walking the chain to recheck
 *		things for a task that has just got its priority adjusted, and
 *		is waiting on a mutex)
 * @next_lock:	the mutex on which the owner of @orig_lock was blocked before
 *		we dropped its pi_lock. Is never dereferenced, only used for
 *		comparison to detect lock chain changes.
333
 * @orig_waiter: rt_mutex_waiter struct for the task that has just donated
334 335 336 337
 *		its priority to the mutex owner (can be NULL in the case
 *		depicted above or if the top waiter is gone away and we are
 *		actually deboosting the owner)
 * @top_task:	the current top waiter
338
 *
I
Ingo Molnar 已提交
339 340
 * Returns 0 or -EDEADLK.
 */
341 342 343
static int rt_mutex_adjust_prio_chain(struct task_struct *task,
				      int deadlock_detect,
				      struct rt_mutex *orig_lock,
344
				      struct rt_mutex *next_lock,
345 346
				      struct rt_mutex_waiter *orig_waiter,
				      struct task_struct *top_task)
I
Ingo Molnar 已提交
347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373
{
	struct rt_mutex *lock;
	struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter;
	int detect_deadlock, ret = 0, depth = 0;
	unsigned long flags;

	detect_deadlock = debug_rt_mutex_detect_deadlock(orig_waiter,
							 deadlock_detect);

	/*
	 * The (de)boosting is a step by step approach with a lot of
	 * pitfalls. We want this to be preemptible and we want hold a
	 * maximum of two locks per step. So we have to check
	 * carefully whether things change under us.
	 */
 again:
	if (++depth > max_lock_depth) {
		static int prev_max;

		/*
		 * Print this only once. If the admin changes the limit,
		 * print a new message when reaching the limit again.
		 */
		if (prev_max != max_lock_depth) {
			prev_max = max_lock_depth;
			printk(KERN_WARNING "Maximum lock depth %d reached "
			       "task: %s (%d)\n", max_lock_depth,
374
			       top_task->comm, task_pid_nr(top_task));
I
Ingo Molnar 已提交
375 376 377
		}
		put_task_struct(task);

378
		return -EDEADLK;
I
Ingo Molnar 已提交
379 380 381 382 383
	}
 retry:
	/*
	 * Task can not go away as we did a get_task() before !
	 */
384
	raw_spin_lock_irqsave(&task->pi_lock, flags);
I
Ingo Molnar 已提交
385 386 387 388 389 390 391

	waiter = task->pi_blocked_on;
	/*
	 * Check whether the end of the boosting chain has been
	 * reached or the state of the chain has changed while we
	 * dropped the locks.
	 */
392
	if (!waiter)
I
Ingo Molnar 已提交
393 394
		goto out_unlock_pi;

395 396
	/*
	 * Check the orig_waiter state. After we dropped the locks,
397
	 * the previous owner of the lock might have released the lock.
398
	 */
399
	if (orig_waiter && !rt_mutex_owner(orig_lock))
400 401
		goto out_unlock_pi;

402 403 404 405 406 407 408 409 410 411 412 413
	/*
	 * We dropped all locks after taking a refcount on @task, so
	 * the task might have moved on in the lock chain or even left
	 * the chain completely and blocks now on an unrelated lock or
	 * on @orig_lock.
	 *
	 * We stored the lock on which @task was blocked in @next_lock,
	 * so we can detect the chain change.
	 */
	if (next_lock != waiter->lock)
		goto out_unlock_pi;

414 415 416 417 418
	/*
	 * Drop out, when the task has no waiters. Note,
	 * top_waiter can be NULL, when we are in the deboosting
	 * mode!
	 */
419 420 421 422 423 424 425 426 427 428
	if (top_waiter) {
		if (!task_has_pi_waiters(task))
			goto out_unlock_pi;
		/*
		 * If deadlock detection is off, we stop here if we
		 * are not the top pi waiter of the task.
		 */
		if (!detect_deadlock && top_waiter != task_top_pi_waiter(task))
			goto out_unlock_pi;
	}
I
Ingo Molnar 已提交
429 430 431 432 433

	/*
	 * When deadlock detection is off then we check, if further
	 * priority adjustment is necessary.
	 */
434
	if (!detect_deadlock && waiter->prio == task->prio)
I
Ingo Molnar 已提交
435 436 437
		goto out_unlock_pi;

	lock = waiter->lock;
438
	if (!raw_spin_trylock(&lock->wait_lock)) {
439
		raw_spin_unlock_irqrestore(&task->pi_lock, flags);
I
Ingo Molnar 已提交
440 441 442 443
		cpu_relax();
		goto retry;
	}

444 445 446 447 448 449
	/*
	 * Deadlock detection. If the lock is the same as the original
	 * lock which caused us to walk the lock chain or if the
	 * current lock is owned by the task which initiated the chain
	 * walk, we detected a deadlock.
	 */
450
	if (lock == orig_lock || rt_mutex_owner(lock) == top_task) {
I
Ingo Molnar 已提交
451
		debug_rt_mutex_deadlock(deadlock_detect, orig_waiter, lock);
452
		raw_spin_unlock(&lock->wait_lock);
453
		ret = -EDEADLK;
I
Ingo Molnar 已提交
454 455 456 457 458 459
		goto out_unlock_pi;
	}

	top_waiter = rt_mutex_top_waiter(lock);

	/* Requeue the waiter */
460
	rt_mutex_dequeue(lock, waiter);
461
	waiter->prio = task->prio;
462
	rt_mutex_enqueue(lock, waiter);
I
Ingo Molnar 已提交
463 464

	/* Release the task */
465
	raw_spin_unlock_irqrestore(&task->pi_lock, flags);
466 467 468 469 470 471 472 473 474 475 476
	if (!rt_mutex_owner(lock)) {
		/*
		 * If the requeue above changed the top waiter, then we need
		 * to wake the new top waiter up to try to get the lock.
		 */

		if (top_waiter != rt_mutex_top_waiter(lock))
			wake_up_process(rt_mutex_top_waiter(lock)->task);
		raw_spin_unlock(&lock->wait_lock);
		goto out_put_task;
	}
I
Ingo Molnar 已提交
477 478 479 480
	put_task_struct(task);

	/* Grab the next task */
	task = rt_mutex_owner(lock);
481
	get_task_struct(task);
482
	raw_spin_lock_irqsave(&task->pi_lock, flags);
I
Ingo Molnar 已提交
483 484 485

	if (waiter == rt_mutex_top_waiter(lock)) {
		/* Boost the owner */
486 487
		rt_mutex_dequeue_pi(task, top_waiter);
		rt_mutex_enqueue_pi(task, waiter);
I
Ingo Molnar 已提交
488 489 490 491
		__rt_mutex_adjust_prio(task);

	} else if (top_waiter == waiter) {
		/* Deboost the owner */
492
		rt_mutex_dequeue_pi(task, waiter);
I
Ingo Molnar 已提交
493
		waiter = rt_mutex_top_waiter(lock);
494
		rt_mutex_enqueue_pi(task, waiter);
I
Ingo Molnar 已提交
495 496 497
		__rt_mutex_adjust_prio(task);
	}

498 499 500 501 502 503 504 505
	/*
	 * Check whether the task which owns the current lock is pi
	 * blocked itself. If yes we store a pointer to the lock for
	 * the lock chain change detection above. After we dropped
	 * task->pi_lock next_lock cannot be dereferenced anymore.
	 */
	next_lock = task_blocked_on_lock(task);

506
	raw_spin_unlock_irqrestore(&task->pi_lock, flags);
I
Ingo Molnar 已提交
507 508

	top_waiter = rt_mutex_top_waiter(lock);
509
	raw_spin_unlock(&lock->wait_lock);
I
Ingo Molnar 已提交
510

511 512 513 514 515 516 517
	/*
	 * We reached the end of the lock chain. Stop right here. No
	 * point to go back just to figure that out.
	 */
	if (!next_lock)
		goto out_put_task;

I
Ingo Molnar 已提交
518 519 520 521 522 523
	if (!detect_deadlock && waiter != top_waiter)
		goto out_put_task;

	goto again;

 out_unlock_pi:
524
	raw_spin_unlock_irqrestore(&task->pi_lock, flags);
I
Ingo Molnar 已提交
525 526
 out_put_task:
	put_task_struct(task);
527

I
Ingo Molnar 已提交
528 529 530 531 532 533 534
	return ret;
}

/*
 * Try to take an rt-mutex
 *
 * Must be called with lock->wait_lock held.
535
 *
536 537 538 539
 * @lock:   The lock to be acquired.
 * @task:   The task which wants to acquire the lock
 * @waiter: The waiter that is queued to the lock's wait list if the
 *	    callsite called task_blocked_on_lock(), otherwise NULL
I
Ingo Molnar 已提交
540
 */
541
static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
542
				struct rt_mutex_waiter *waiter)
I
Ingo Molnar 已提交
543
{
544 545
	unsigned long flags;

I
Ingo Molnar 已提交
546
	/*
547 548 549 550
	 * Before testing whether we can acquire @lock, we set the
	 * RT_MUTEX_HAS_WAITERS bit in @lock->owner. This forces all
	 * other tasks which try to modify @lock into the slow path
	 * and they serialize on @lock->wait_lock.
I
Ingo Molnar 已提交
551
	 *
552 553
	 * The RT_MUTEX_HAS_WAITERS bit can have a transitional state
	 * as explained at the top of this file if and only if:
I
Ingo Molnar 已提交
554
	 *
555 556 557 558 559 560 561
	 * - There is a lock owner. The caller must fixup the
	 *   transient state if it does a trylock or leaves the lock
	 *   function due to a signal or timeout.
	 *
	 * - @task acquires the lock and there are no other
	 *   waiters. This is undone in rt_mutex_set_owner(@task) at
	 *   the end of this function.
I
Ingo Molnar 已提交
562 563 564
	 */
	mark_rt_mutex_waiters(lock);

565 566 567
	/*
	 * If @lock has an owner, give up.
	 */
568
	if (rt_mutex_owner(lock))
I
Ingo Molnar 已提交
569 570
		return 0;

571
	/*
572 573 574
	 * If @waiter != NULL, @task has already enqueued the waiter
	 * into @lock waiter list. If @waiter == NULL then this is a
	 * trylock attempt.
575
	 */
576 577 578 579 580 581 582
	if (waiter) {
		/*
		 * If waiter is not the highest priority waiter of
		 * @lock, give up.
		 */
		if (waiter != rt_mutex_top_waiter(lock))
			return 0;
583

584 585 586 587 588
		/*
		 * We can acquire the lock. Remove the waiter from the
		 * lock waiters list.
		 */
		rt_mutex_dequeue(lock, waiter);
589

590
	} else {
591
		/*
592 593 594 595 596 597
		 * If the lock has waiters already we check whether @task is
		 * eligible to take over the lock.
		 *
		 * If there are no other waiters, @task can acquire
		 * the lock.  @task->pi_blocked_on is NULL, so it does
		 * not need to be dequeued.
598 599
		 */
		if (rt_mutex_has_waiters(lock)) {
600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620
			/*
			 * If @task->prio is greater than or equal to
			 * the top waiter priority (kernel view),
			 * @task lost.
			 */
			if (task->prio >= rt_mutex_top_waiter(lock)->prio)
				return 0;

			/*
			 * The current top waiter stays enqueued. We
			 * don't have to change anything in the lock
			 * waiters order.
			 */
		} else {
			/*
			 * No waiters. Take the lock without the
			 * pi_lock dance.@task->pi_blocked_on is NULL
			 * and we have no waiters to enqueue in @task
			 * pi waiters list.
			 */
			goto takeit;
621 622 623
		}
	}

624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641
	/*
	 * Clear @task->pi_blocked_on. Requires protection by
	 * @task->pi_lock. Redundant operation for the @waiter == NULL
	 * case, but conditionals are more expensive than a redundant
	 * store.
	 */
	raw_spin_lock_irqsave(&task->pi_lock, flags);
	task->pi_blocked_on = NULL;
	/*
	 * Finish the lock acquisition. @task is the new owner. If
	 * other waiters exist we have to insert the highest priority
	 * waiter into @task->pi_waiters list.
	 */
	if (rt_mutex_has_waiters(lock))
		rt_mutex_enqueue_pi(task, rt_mutex_top_waiter(lock));
	raw_spin_unlock_irqrestore(&task->pi_lock, flags);

takeit:
I
Ingo Molnar 已提交
642
	/* We got the lock. */
643
	debug_rt_mutex_lock(lock);
I
Ingo Molnar 已提交
644

645 646 647 648
	/*
	 * This either preserves the RT_MUTEX_HAS_WAITERS bit if there
	 * are still waiters or clears it.
	 */
649
	rt_mutex_set_owner(lock, task);
I
Ingo Molnar 已提交
650

651
	rt_mutex_deadlock_account_lock(lock, task);
I
Ingo Molnar 已提交
652 653 654 655 656 657 658 659 660 661 662 663 664

	return 1;
}

/*
 * Task blocks on lock.
 *
 * Prepare waiter and propagate pi chain
 *
 * This must be called with lock->wait_lock held.
 */
static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
				   struct rt_mutex_waiter *waiter,
D
Darren Hart 已提交
665
				   struct task_struct *task,
666
				   int detect_deadlock)
I
Ingo Molnar 已提交
667
{
668
	struct task_struct *owner = rt_mutex_owner(lock);
I
Ingo Molnar 已提交
669
	struct rt_mutex_waiter *top_waiter = waiter;
670
	struct rt_mutex *next_lock;
671
	int chain_walk = 0, res;
672
	unsigned long flags;
I
Ingo Molnar 已提交
673

674 675 676 677 678 679 680 681 682
	/*
	 * Early deadlock detection. We really don't want the task to
	 * enqueue on itself just to untangle the mess later. It's not
	 * only an optimization. We drop the locks, so another waiter
	 * can come in before the chain walk detects the deadlock. So
	 * the other will detect the deadlock and return -EDEADLOCK,
	 * which is wrong, as the other waiter is not in a deadlock
	 * situation.
	 */
683
	if (owner == task)
684 685
		return -EDEADLK;

686
	raw_spin_lock_irqsave(&task->pi_lock, flags);
D
Darren Hart 已提交
687 688
	__rt_mutex_adjust_prio(task);
	waiter->task = task;
I
Ingo Molnar 已提交
689
	waiter->lock = lock;
690
	waiter->prio = task->prio;
I
Ingo Molnar 已提交
691 692 693 694

	/* Get the top priority waiter on the lock */
	if (rt_mutex_has_waiters(lock))
		top_waiter = rt_mutex_top_waiter(lock);
695
	rt_mutex_enqueue(lock, waiter);
I
Ingo Molnar 已提交
696

D
Darren Hart 已提交
697
	task->pi_blocked_on = waiter;
I
Ingo Molnar 已提交
698

699
	raw_spin_unlock_irqrestore(&task->pi_lock, flags);
I
Ingo Molnar 已提交
700

701 702 703
	if (!owner)
		return 0;

704
	raw_spin_lock_irqsave(&owner->pi_lock, flags);
I
Ingo Molnar 已提交
705
	if (waiter == rt_mutex_top_waiter(lock)) {
706 707
		rt_mutex_dequeue_pi(owner, top_waiter);
		rt_mutex_enqueue_pi(owner, waiter);
I
Ingo Molnar 已提交
708 709

		__rt_mutex_adjust_prio(owner);
710 711
		if (owner->pi_blocked_on)
			chain_walk = 1;
712
	} else if (debug_rt_mutex_detect_deadlock(waiter, detect_deadlock)) {
713
		chain_walk = 1;
714
	}
715

716 717 718 719 720 721 722 723 724 725
	/* Store the lock on which owner is blocked or NULL */
	next_lock = task_blocked_on_lock(owner);

	raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
	/*
	 * Even if full deadlock detection is on, if the owner is not
	 * blocked itself, we can avoid finding this out in the chain
	 * walk.
	 */
	if (!chain_walk || !next_lock)
I
Ingo Molnar 已提交
726 727
		return 0;

728 729 730 731 732 733 734
	/*
	 * The owner can't disappear while holding a lock,
	 * so the owner struct is protected by wait_lock.
	 * Gets dropped in rt_mutex_adjust_prio_chain()!
	 */
	get_task_struct(owner);

735
	raw_spin_unlock(&lock->wait_lock);
I
Ingo Molnar 已提交
736

737 738
	res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock,
					 next_lock, waiter, task);
I
Ingo Molnar 已提交
739

740
	raw_spin_lock(&lock->wait_lock);
I
Ingo Molnar 已提交
741 742 743 744 745 746 747

	return res;
}

/*
 * Wake up the next waiter on the lock.
 *
T
Thomas Gleixner 已提交
748 749
 * Remove the top waiter from the current tasks pi waiter list and
 * wake it up.
I
Ingo Molnar 已提交
750 751 752 753 754 755 756 757
 *
 * Called with lock->wait_lock held.
 */
static void wakeup_next_waiter(struct rt_mutex *lock)
{
	struct rt_mutex_waiter *waiter;
	unsigned long flags;

758
	raw_spin_lock_irqsave(&current->pi_lock, flags);
I
Ingo Molnar 已提交
759 760 761 762 763 764 765 766 767

	waiter = rt_mutex_top_waiter(lock);

	/*
	 * Remove it from current->pi_waiters. We do not adjust a
	 * possible priority boost right now. We execute wakeup in the
	 * boosted mode and go back to normal after releasing
	 * lock->wait_lock.
	 */
768
	rt_mutex_dequeue_pi(current, waiter);
I
Ingo Molnar 已提交
769

T
Thomas Gleixner 已提交
770 771 772 773 774 775 776 777 778
	/*
	 * As we are waking up the top waiter, and the waiter stays
	 * queued on the lock until it gets the lock, this lock
	 * obviously has waiters. Just set the bit here and this has
	 * the added benefit of forcing all new tasks into the
	 * slow path making sure no task of lower priority than
	 * the top waiter can steal this lock.
	 */
	lock->owner = (void *) RT_MUTEX_HAS_WAITERS;
I
Ingo Molnar 已提交
779

780
	raw_spin_unlock_irqrestore(&current->pi_lock, flags);
I
Ingo Molnar 已提交
781

T
Thomas Gleixner 已提交
782 783 784 785 786
	/*
	 * It's safe to dereference waiter as it cannot go away as
	 * long as we hold lock->wait_lock. The waiter task needs to
	 * acquire it in order to dequeue the waiter.
	 */
787
	wake_up_process(waiter->task);
I
Ingo Molnar 已提交
788 789 790
}

/*
791
 * Remove a waiter from a lock and give up
I
Ingo Molnar 已提交
792
 *
793 794
 * Must be called with lock->wait_lock held and
 * have just failed to try_to_take_rt_mutex().
I
Ingo Molnar 已提交
795
 */
796 797
static void remove_waiter(struct rt_mutex *lock,
			  struct rt_mutex_waiter *waiter)
I
Ingo Molnar 已提交
798 799
{
	int first = (waiter == rt_mutex_top_waiter(lock));
800
	struct task_struct *owner = rt_mutex_owner(lock);
801
	struct rt_mutex *next_lock = NULL;
I
Ingo Molnar 已提交
802 803
	unsigned long flags;

804
	raw_spin_lock_irqsave(&current->pi_lock, flags);
805
	rt_mutex_dequeue(lock, waiter);
I
Ingo Molnar 已提交
806
	current->pi_blocked_on = NULL;
807
	raw_spin_unlock_irqrestore(&current->pi_lock, flags);
I
Ingo Molnar 已提交
808

809 810 811 812
	if (!owner)
		return;

	if (first) {
I
Ingo Molnar 已提交
813

814
		raw_spin_lock_irqsave(&owner->pi_lock, flags);
I
Ingo Molnar 已提交
815

816
		rt_mutex_dequeue_pi(owner, waiter);
I
Ingo Molnar 已提交
817 818 819 820 821

		if (rt_mutex_has_waiters(lock)) {
			struct rt_mutex_waiter *next;

			next = rt_mutex_top_waiter(lock);
822
			rt_mutex_enqueue_pi(owner, next);
I
Ingo Molnar 已提交
823 824 825
		}
		__rt_mutex_adjust_prio(owner);

826 827
		/* Store the lock on which owner is blocked or NULL */
		next_lock = task_blocked_on_lock(owner);
828

829
		raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
I
Ingo Molnar 已提交
830 831
	}

832
	if (!next_lock)
I
Ingo Molnar 已提交
833 834
		return;

835 836 837
	/* gets dropped in rt_mutex_adjust_prio_chain()! */
	get_task_struct(owner);

838
	raw_spin_unlock(&lock->wait_lock);
I
Ingo Molnar 已提交
839

840
	rt_mutex_adjust_prio_chain(owner, 0, lock, next_lock, NULL, current);
I
Ingo Molnar 已提交
841

842
	raw_spin_lock(&lock->wait_lock);
I
Ingo Molnar 已提交
843 844
}

845 846 847 848 849 850 851 852
/*
 * Recheck the pi chain, in case we got a priority setting
 *
 * Called from sched_setscheduler
 */
void rt_mutex_adjust_pi(struct task_struct *task)
{
	struct rt_mutex_waiter *waiter;
853
	struct rt_mutex *next_lock;
854 855
	unsigned long flags;

856
	raw_spin_lock_irqsave(&task->pi_lock, flags);
857 858

	waiter = task->pi_blocked_on;
859 860
	if (!waiter || (waiter->prio == task->prio &&
			!dl_prio(task->prio))) {
861
		raw_spin_unlock_irqrestore(&task->pi_lock, flags);
862 863
		return;
	}
864
	next_lock = waiter->lock;
865
	raw_spin_unlock_irqrestore(&task->pi_lock, flags);
866

867 868
	/* gets dropped in rt_mutex_adjust_prio_chain()! */
	get_task_struct(task);
869 870

	rt_mutex_adjust_prio_chain(task, 0, NULL, next_lock, NULL, task);
871 872
}

D
Darren Hart 已提交
873 874 875 876 877 878 879 880 881
/**
 * __rt_mutex_slowlock() - Perform the wait-wake-try-to-take loop
 * @lock:		 the rt_mutex to take
 * @state:		 the state the task should block in (TASK_INTERRUPTIBLE
 * 			 or TASK_UNINTERRUPTIBLE)
 * @timeout:		 the pre-initialized and started timer, or NULL for none
 * @waiter:		 the pre-initialized rt_mutex_waiter
 *
 * lock->wait_lock must be held by the caller.
I
Ingo Molnar 已提交
882 883
 */
static int __sched
D
Darren Hart 已提交
884 885
__rt_mutex_slowlock(struct rt_mutex *lock, int state,
		    struct hrtimer_sleeper *timeout,
886
		    struct rt_mutex_waiter *waiter)
I
Ingo Molnar 已提交
887 888 889 890 891
{
	int ret = 0;

	for (;;) {
		/* Try to acquire the lock: */
892
		if (try_to_take_rt_mutex(lock, current, waiter))
I
Ingo Molnar 已提交
893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908
			break;

		/*
		 * TASK_INTERRUPTIBLE checks for signals and
		 * timeout. Ignored otherwise.
		 */
		if (unlikely(state == TASK_INTERRUPTIBLE)) {
			/* Signal pending? */
			if (signal_pending(current))
				ret = -EINTR;
			if (timeout && !timeout->task)
				ret = -ETIMEDOUT;
			if (ret)
				break;
		}

909
		raw_spin_unlock(&lock->wait_lock);
I
Ingo Molnar 已提交
910

D
Darren Hart 已提交
911
		debug_rt_mutex_print_deadlock(waiter);
I
Ingo Molnar 已提交
912

913
		schedule_rt_mutex(lock);
I
Ingo Molnar 已提交
914

915
		raw_spin_lock(&lock->wait_lock);
I
Ingo Molnar 已提交
916 917 918
		set_current_state(state);
	}

D
Darren Hart 已提交
919 920 921
	return ret;
}

922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941
static void rt_mutex_handle_deadlock(int res, int detect_deadlock,
				     struct rt_mutex_waiter *w)
{
	/*
	 * If the result is not -EDEADLOCK or the caller requested
	 * deadlock detection, nothing to do here.
	 */
	if (res != -EDEADLOCK || detect_deadlock)
		return;

	/*
	 * Yell lowdly and stop the task right here.
	 */
	rt_mutex_print_deadlock(w);
	while (1) {
		set_current_state(TASK_INTERRUPTIBLE);
		schedule();
	}
}

D
Darren Hart 已提交
942 943 944 945 946 947 948 949 950 951 952 953
/*
 * Slow path lock function:
 */
static int __sched
rt_mutex_slowlock(struct rt_mutex *lock, int state,
		  struct hrtimer_sleeper *timeout,
		  int detect_deadlock)
{
	struct rt_mutex_waiter waiter;
	int ret = 0;

	debug_rt_mutex_init_waiter(&waiter);
954 955
	RB_CLEAR_NODE(&waiter.pi_tree_entry);
	RB_CLEAR_NODE(&waiter.tree_entry);
D
Darren Hart 已提交
956

957
	raw_spin_lock(&lock->wait_lock);
D
Darren Hart 已提交
958 959

	/* Try to acquire the lock again: */
960
	if (try_to_take_rt_mutex(lock, current, NULL)) {
961
		raw_spin_unlock(&lock->wait_lock);
D
Darren Hart 已提交
962 963 964 965 966 967 968 969 970 971 972 973
		return 0;
	}

	set_current_state(state);

	/* Setup the timer, when timeout != NULL */
	if (unlikely(timeout)) {
		hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS);
		if (!hrtimer_active(&timeout->timer))
			timeout->task = NULL;
	}

974 975 976 977
	ret = task_blocks_on_rt_mutex(lock, &waiter, current, detect_deadlock);

	if (likely(!ret))
		ret = __rt_mutex_slowlock(lock, state, timeout, &waiter);
D
Darren Hart 已提交
978

I
Ingo Molnar 已提交
979 980
	set_current_state(TASK_RUNNING);

981
	if (unlikely(ret)) {
982
		remove_waiter(lock, &waiter);
983 984
		rt_mutex_handle_deadlock(ret, detect_deadlock, &waiter);
	}
I
Ingo Molnar 已提交
985 986 987 988 989 990 991

	/*
	 * try_to_take_rt_mutex() sets the waiter bit
	 * unconditionally. We might have to fix that up.
	 */
	fixup_rt_mutex_waiters(lock);

992
	raw_spin_unlock(&lock->wait_lock);
I
Ingo Molnar 已提交
993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005

	/* Remove pending timer: */
	if (unlikely(timeout))
		hrtimer_cancel(&timeout->timer);

	debug_rt_mutex_free_waiter(&waiter);

	return ret;
}

/*
 * Slow path try-lock function:
 */
1006
static inline int rt_mutex_slowtrylock(struct rt_mutex *lock)
I
Ingo Molnar 已提交
1007
{
1008 1009 1010 1011 1012 1013 1014 1015 1016
	int ret;

	/*
	 * If the lock already has an owner we fail to get the lock.
	 * This can be done without taking the @lock->wait_lock as
	 * it is only being read, and this is a trylock anyway.
	 */
	if (rt_mutex_owner(lock))
		return 0;
I
Ingo Molnar 已提交
1017

1018 1019 1020 1021
	/*
	 * The mutex has currently no owner. Lock the wait lock and
	 * try to acquire the lock.
	 */
1022
	raw_spin_lock(&lock->wait_lock);
I
Ingo Molnar 已提交
1023

1024
	ret = try_to_take_rt_mutex(lock, current, NULL);
I
Ingo Molnar 已提交
1025

1026 1027 1028 1029 1030
	/*
	 * try_to_take_rt_mutex() sets the lock waiters bit
	 * unconditionally. Clean this up.
	 */
	fixup_rt_mutex_waiters(lock);
I
Ingo Molnar 已提交
1031

1032
	raw_spin_unlock(&lock->wait_lock);
I
Ingo Molnar 已提交
1033 1034 1035 1036 1037 1038 1039 1040 1041 1042

	return ret;
}

/*
 * Slow path to release a rt-mutex:
 */
static void __sched
rt_mutex_slowunlock(struct rt_mutex *lock)
{
1043
	raw_spin_lock(&lock->wait_lock);
I
Ingo Molnar 已提交
1044 1045 1046 1047 1048

	debug_rt_mutex_unlock(lock);

	rt_mutex_deadlock_account_unlock(current);

T
Thomas Gleixner 已提交
1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085
	/*
	 * We must be careful here if the fast path is enabled. If we
	 * have no waiters queued we cannot set owner to NULL here
	 * because of:
	 *
	 * foo->lock->owner = NULL;
	 *			rtmutex_lock(foo->lock);   <- fast path
	 *			free = atomic_dec_and_test(foo->refcnt);
	 *			rtmutex_unlock(foo->lock); <- fast path
	 *			if (free)
	 *				kfree(foo);
	 * raw_spin_unlock(foo->lock->wait_lock);
	 *
	 * So for the fastpath enabled kernel:
	 *
	 * Nothing can set the waiters bit as long as we hold
	 * lock->wait_lock. So we do the following sequence:
	 *
	 *	owner = rt_mutex_owner(lock);
	 *	clear_rt_mutex_waiters(lock);
	 *	raw_spin_unlock(&lock->wait_lock);
	 *	if (cmpxchg(&lock->owner, owner, 0) == owner)
	 *		return;
	 *	goto retry;
	 *
	 * The fastpath disabled variant is simple as all access to
	 * lock->owner is serialized by lock->wait_lock:
	 *
	 *	lock->owner = NULL;
	 *	raw_spin_unlock(&lock->wait_lock);
	 */
	while (!rt_mutex_has_waiters(lock)) {
		/* Drops lock->wait_lock ! */
		if (unlock_rt_mutex_safe(lock) == true)
			return;
		/* Relock the rtmutex and try again */
		raw_spin_lock(&lock->wait_lock);
I
Ingo Molnar 已提交
1086 1087
	}

T
Thomas Gleixner 已提交
1088 1089 1090 1091
	/*
	 * The wakeup next waiter path does not suffer from the above
	 * race. See the comments there.
	 */
I
Ingo Molnar 已提交
1092 1093
	wakeup_next_waiter(lock);

1094
	raw_spin_unlock(&lock->wait_lock);
I
Ingo Molnar 已提交
1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110

	/* Undo pi boosting if necessary: */
	rt_mutex_adjust_prio(current);
}

/*
 * debug aware fast / slowpath lock,trylock,unlock
 *
 * The atomic acquire/release ops are compiled away, when either the
 * architecture does not support cmpxchg or when debugging is enabled.
 */
static inline int
rt_mutex_fastlock(struct rt_mutex *lock, int state,
		  int detect_deadlock,
		  int (*slowfn)(struct rt_mutex *lock, int state,
				struct hrtimer_sleeper *timeout,
1111
				int detect_deadlock))
I
Ingo Molnar 已提交
1112 1113 1114 1115 1116
{
	if (!detect_deadlock && likely(rt_mutex_cmpxchg(lock, NULL, current))) {
		rt_mutex_deadlock_account_lock(lock, current);
		return 0;
	} else
1117
		return slowfn(lock, state, NULL, detect_deadlock);
I
Ingo Molnar 已提交
1118 1119 1120 1121 1122 1123 1124
}

static inline int
rt_mutex_timed_fastlock(struct rt_mutex *lock, int state,
			struct hrtimer_sleeper *timeout, int detect_deadlock,
			int (*slowfn)(struct rt_mutex *lock, int state,
				      struct hrtimer_sleeper *timeout,
1125
				      int detect_deadlock))
I
Ingo Molnar 已提交
1126 1127 1128 1129 1130
{
	if (!detect_deadlock && likely(rt_mutex_cmpxchg(lock, NULL, current))) {
		rt_mutex_deadlock_account_lock(lock, current);
		return 0;
	} else
1131
		return slowfn(lock, state, timeout, detect_deadlock);
I
Ingo Molnar 已提交
1132 1133 1134 1135
}

static inline int
rt_mutex_fasttrylock(struct rt_mutex *lock,
1136
		     int (*slowfn)(struct rt_mutex *lock))
I
Ingo Molnar 已提交
1137 1138 1139 1140 1141
{
	if (likely(rt_mutex_cmpxchg(lock, NULL, current))) {
		rt_mutex_deadlock_account_lock(lock, current);
		return 1;
	}
1142
	return slowfn(lock);
I
Ingo Molnar 已提交
1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189
}

static inline void
rt_mutex_fastunlock(struct rt_mutex *lock,
		    void (*slowfn)(struct rt_mutex *lock))
{
	if (likely(rt_mutex_cmpxchg(lock, current, NULL)))
		rt_mutex_deadlock_account_unlock(current);
	else
		slowfn(lock);
}

/**
 * rt_mutex_lock - lock a rt_mutex
 *
 * @lock: the rt_mutex to be locked
 */
void __sched rt_mutex_lock(struct rt_mutex *lock)
{
	might_sleep();

	rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, 0, rt_mutex_slowlock);
}
EXPORT_SYMBOL_GPL(rt_mutex_lock);

/**
 * rt_mutex_lock_interruptible - lock a rt_mutex interruptible
 *
 * @lock: 		the rt_mutex to be locked
 * @detect_deadlock:	deadlock detection on/off
 *
 * Returns:
 *  0 		on success
 * -EINTR 	when interrupted by a signal
 * -EDEADLK	when the lock would deadlock (when deadlock detection is on)
 */
int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock,
						 int detect_deadlock)
{
	might_sleep();

	return rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE,
				 detect_deadlock, rt_mutex_slowlock);
}
EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible);

/**
1190 1191 1192
 * rt_mutex_timed_lock - lock a rt_mutex interruptible
 *			the timeout structure is provided
 *			by the caller
I
Ingo Molnar 已提交
1193 1194 1195 1196 1197 1198 1199 1200
 *
 * @lock: 		the rt_mutex to be locked
 * @timeout:		timeout structure or NULL (no timeout)
 * @detect_deadlock:	deadlock detection on/off
 *
 * Returns:
 *  0 		on success
 * -EINTR 	when interrupted by a signal
1201
 * -ETIMEDOUT	when the timeout expired
I
Ingo Molnar 已提交
1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238
 * -EDEADLK	when the lock would deadlock (when deadlock detection is on)
 */
int
rt_mutex_timed_lock(struct rt_mutex *lock, struct hrtimer_sleeper *timeout,
		    int detect_deadlock)
{
	might_sleep();

	return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout,
				       detect_deadlock, rt_mutex_slowlock);
}
EXPORT_SYMBOL_GPL(rt_mutex_timed_lock);

/**
 * rt_mutex_trylock - try to lock a rt_mutex
 *
 * @lock:	the rt_mutex to be locked
 *
 * Returns 1 on success and 0 on contention
 */
int __sched rt_mutex_trylock(struct rt_mutex *lock)
{
	return rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock);
}
EXPORT_SYMBOL_GPL(rt_mutex_trylock);

/**
 * rt_mutex_unlock - unlock a rt_mutex
 *
 * @lock: the rt_mutex to be unlocked
 */
void __sched rt_mutex_unlock(struct rt_mutex *lock)
{
	rt_mutex_fastunlock(lock, rt_mutex_slowunlock);
}
EXPORT_SYMBOL_GPL(rt_mutex_unlock);

1239
/**
I
Ingo Molnar 已提交
1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268
 * rt_mutex_destroy - mark a mutex unusable
 * @lock: the mutex to be destroyed
 *
 * This function marks the mutex uninitialized, and any subsequent
 * use of the mutex is forbidden. The mutex must not be locked when
 * this function is called.
 */
void rt_mutex_destroy(struct rt_mutex *lock)
{
	WARN_ON(rt_mutex_is_locked(lock));
#ifdef CONFIG_DEBUG_RT_MUTEXES
	lock->magic = NULL;
#endif
}

EXPORT_SYMBOL_GPL(rt_mutex_destroy);

/**
 * __rt_mutex_init - initialize the rt lock
 *
 * @lock: the rt lock to be initialized
 *
 * Initialize the rt lock to unlocked state.
 *
 * Initializing of a locked rt lock is not allowed
 */
void __rt_mutex_init(struct rt_mutex *lock, const char *name)
{
	lock->owner = NULL;
1269
	raw_spin_lock_init(&lock->wait_lock);
1270 1271
	lock->waiters = RB_ROOT;
	lock->waiters_leftmost = NULL;
I
Ingo Molnar 已提交
1272 1273 1274 1275

	debug_rt_mutex_init(lock, name);
}
EXPORT_SYMBOL_GPL(__rt_mutex_init);
1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290

/**
 * rt_mutex_init_proxy_locked - initialize and lock a rt_mutex on behalf of a
 *				proxy owner
 *
 * @lock: 	the rt_mutex to be locked
 * @proxy_owner:the task to set as owner
 *
 * No locking. Caller has to do serializing itself
 * Special API call for PI-futex support
 */
void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
				struct task_struct *proxy_owner)
{
	__rt_mutex_init(lock, NULL);
1291
	debug_rt_mutex_proxy_lock(lock, proxy_owner);
1292
	rt_mutex_set_owner(lock, proxy_owner);
1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307
	rt_mutex_deadlock_account_lock(lock, proxy_owner);
}

/**
 * rt_mutex_proxy_unlock - release a lock on behalf of owner
 *
 * @lock: 	the rt_mutex to be locked
 *
 * No locking. Caller has to do serializing itself
 * Special API call for PI-futex support
 */
void rt_mutex_proxy_unlock(struct rt_mutex *lock,
			   struct task_struct *proxy_owner)
{
	debug_rt_mutex_proxy_unlock(lock);
1308
	rt_mutex_set_owner(lock, NULL);
1309 1310 1311
	rt_mutex_deadlock_account_unlock(proxy_owner);
}

D
Darren Hart 已提交
1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331
/**
 * rt_mutex_start_proxy_lock() - Start lock acquisition for another task
 * @lock:		the rt_mutex to take
 * @waiter:		the pre-initialized rt_mutex_waiter
 * @task:		the task to prepare
 * @detect_deadlock:	perform deadlock detection (1) or not (0)
 *
 * Returns:
 *  0 - task blocked on lock
 *  1 - acquired the lock for task, caller should wake it up
 * <0 - error
 *
 * Special API call for FUTEX_REQUEUE_PI support.
 */
int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
			      struct rt_mutex_waiter *waiter,
			      struct task_struct *task, int detect_deadlock)
{
	int ret;

1332
	raw_spin_lock(&lock->wait_lock);
D
Darren Hart 已提交
1333

1334
	if (try_to_take_rt_mutex(lock, task, NULL)) {
1335
		raw_spin_unlock(&lock->wait_lock);
D
Darren Hart 已提交
1336 1337 1338
		return 1;
	}

1339 1340
	/* We enforce deadlock detection for futexes */
	ret = task_blocks_on_rt_mutex(lock, waiter, task, 1);
D
Darren Hart 已提交
1341

1342
	if (ret && !rt_mutex_owner(lock)) {
D
Darren Hart 已提交
1343 1344 1345 1346 1347 1348 1349 1350
		/*
		 * Reset the return value. We might have
		 * returned with -EDEADLK and the owner
		 * released the lock while we were walking the
		 * pi chain.  Let the waiter sort it out.
		 */
		ret = 0;
	}
1351 1352 1353 1354

	if (unlikely(ret))
		remove_waiter(lock, waiter);

1355
	raw_spin_unlock(&lock->wait_lock);
D
Darren Hart 已提交
1356 1357 1358 1359 1360 1361

	debug_rt_mutex_print_deadlock(waiter);

	return ret;
}

1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380
/**
 * rt_mutex_next_owner - return the next owner of the lock
 *
 * @lock: the rt lock query
 *
 * Returns the next owner of the lock or NULL
 *
 * Caller has to serialize against other accessors to the lock
 * itself.
 *
 * Special API call for PI-futex support
 */
struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock)
{
	if (!rt_mutex_has_waiters(lock))
		return NULL;

	return rt_mutex_top_waiter(lock)->task;
}
D
Darren Hart 已提交
1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404

/**
 * rt_mutex_finish_proxy_lock() - Complete lock acquisition
 * @lock:		the rt_mutex we were woken on
 * @to:			the timeout, null if none. hrtimer should already have
 * 			been started.
 * @waiter:		the pre-initialized rt_mutex_waiter
 * @detect_deadlock:	perform deadlock detection (1) or not (0)
 *
 * Complete the lock acquisition started our behalf by another thread.
 *
 * Returns:
 *  0 - success
 * <0 - error, one of -EINTR, -ETIMEDOUT, or -EDEADLK
 *
 * Special API call for PI-futex requeue support
 */
int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
			       struct hrtimer_sleeper *to,
			       struct rt_mutex_waiter *waiter,
			       int detect_deadlock)
{
	int ret;

1405
	raw_spin_lock(&lock->wait_lock);
D
Darren Hart 已提交
1406 1407 1408

	set_current_state(TASK_INTERRUPTIBLE);

1409
	ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter);
D
Darren Hart 已提交
1410 1411 1412

	set_current_state(TASK_RUNNING);

1413
	if (unlikely(ret))
D
Darren Hart 已提交
1414 1415 1416 1417 1418 1419 1420 1421
		remove_waiter(lock, waiter);

	/*
	 * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might
	 * have to fix that up.
	 */
	fixup_rt_mutex_waiters(lock);

1422
	raw_spin_unlock(&lock->wait_lock);
D
Darren Hart 已提交
1423 1424 1425

	return ret;
}