diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 4dac1036594f2a06418506d1a56b8e446462049e..00b042c49ccdac7af3262a399d33dacc88c83e25 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -299,6 +299,23 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
 	__u.__val;					\
 })
 
+/**
+ * smp_cond_acquire() - Spin wait for cond with ACQUIRE ordering
+ * @cond: boolean expression to wait for
+ *
+ * Equivalent to using smp_load_acquire() on the condition variable but employs
+ * the control dependency of the wait to reduce the barrier on many platforms.
+ *
+ * The control dependency provides a LOAD->STORE order, the additional RMB
+ * provides LOAD->LOAD order, together they provide LOAD->{LOAD,STORE} order,
+ * aka. ACQUIRE.
+ */
+#define smp_cond_acquire(cond)	do {		\
+	while (!(cond))				\
+		cpu_relax();			\
+	smp_rmb(); /* ctrl + rmb := acquire */	\
+} while (0)
+
 #endif /* __KERNEL__ */
 
 #endif /* __ASSEMBLY__ */
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index 986207887defe346d61735472cd6426caea20681..ed9d96708f93c36fdf6ddc5dc480bb006517782e 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -433,8 +433,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
 	 *
 	 */
 	pv_wait_head(lock, node);
-	while ((val = smp_load_acquire(&lock->val.counter)) & _Q_LOCKED_PENDING_MASK)
-		cpu_relax();
+	smp_cond_acquire(!((val = atomic_read(&lock->val)) & _Q_LOCKED_PENDING_MASK));
 
 	/*
 	 * claim the lock:
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 7063c6a0744055c367eb82159677cfe6522b6c60..9f7862da2cd1909e8f60eabaccc466f6ecb8a538 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1968,19 +1968,13 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
 	/*
 	 * If the owning (remote) cpu is still in the middle of schedule() with
 	 * this task as prev, wait until its done referencing the task.
-	 */
-	while (p->on_cpu)
-		cpu_relax();
-	/*
-	 * Combined with the control dependency above, we have an effective
-	 * smp_load_acquire() without the need for full barriers.
 	 *
 	 * Pairs with the smp_store_release() in finish_lock_switch().
 	 *
 	 * This ensures that tasks getting woken will be fully ordered against
 	 * their previous state and preserve Program Order.
 	 */
-	smp_rmb();
+	smp_cond_acquire(!p->on_cpu);
 
 	p->sched_contributes_to_load = !!task_contributes_to_load(p);
 	p->state = TASK_WAKING;
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index b242775bf670e116233862c590915e06132485ca..1e0bb4afe3fd1e3b8d2b6c2313332b25f054cabf 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1076,7 +1076,7 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
 	 * In particular, the load of prev->state in finish_task_switch() must
 	 * happen before this.
 	 *
-	 * Pairs with the control dependency and rmb in try_to_wake_up().
+	 * Pairs with the smp_cond_acquire() in try_to_wake_up().
 	 */
 	smp_store_release(&prev->on_cpu, 0);
 #endif