提交 007dc78f 编写于 作者: L Linus Torvalds

Merge branch 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull locking updates from Ingo Molnar:
 "Here are the locking changes in this cycle:

   - rwsem unification and simpler micro-optimizations to prepare for
     more intrusive (and more lucrative) scalability improvements in
     v5.3 (Waiman Long)

   - Lockdep irq state tracking flag usage cleanups (Frederic
     Weisbecker)

   - static key improvements (Jakub Kicinski, Peter Zijlstra)

   - misc updates, cleanups and smaller fixes"

* 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (26 commits)
  locking/lockdep: Remove unnecessary unlikely()
  locking/static_key: Don't take sleeping locks in __static_key_slow_dec_deferred()
  locking/static_key: Factor out the fast path of static_key_slow_dec()
  locking/static_key: Add support for deferred static branches
  locking/lockdep: Test all incompatible scenarios at once in check_irq_usage()
  locking/lockdep: Avoid bogus Clang warning
  locking/lockdep: Generate LOCKF_ bit composites
  locking/lockdep: Use expanded masks on find_usage_*() functions
  locking/lockdep: Map remaining magic numbers to lock usage mask names
  locking/lockdep: Move valid_state() inside CONFIG_TRACE_IRQFLAGS && CONFIG_PROVE_LOCKING
  locking/rwsem: Prevent unneeded warning during locking selftest
  locking/rwsem: Optimize rwsem structure for uncontended lock acquisition
  locking/rwsem: Enable lock event counting
  locking/lock_events: Don't show pvqspinlock events on bare metal
  locking/lock_events: Make lock_events available for all archs & other locks
  locking/qspinlock_stat: Introduce generic lockevent_*() counting APIs
  locking/rwsem: Enhance DEBUG_RWSEMS_WARN_ON() macro
  locking/rwsem: Add debug check for __down_read*()
  locking/rwsem: Micro-optimize rwsem_try_read_lock_unqueued()
  locking/rwsem: Move rwsem internal function declarations to rwsem-xadd.h
  ...
......@@ -9100,7 +9100,6 @@ F: arch/*/include/asm/spinlock*.h
F: include/linux/rwlock*.h
F: include/linux/mutex*.h
F: include/linux/rwsem*.h
F: arch/*/include/asm/rwsem.h
F: include/linux/seqlock.h
F: lib/locking*.[ch]
F: kernel/locking/
......
......@@ -907,6 +907,15 @@ config HAVE_ARCH_PREL32_RELOCATIONS
config ARCH_USE_MEMREMAP_PROT
bool
config LOCK_EVENT_COUNTS
bool "Locking event counts collection"
depends on DEBUG_FS
---help---
Enable light-weight counting of various locking related events
in the system with minimal performance impact. This reduces
the chance of application behavior change because of timing
differences. The counts are reported via debugfs.
source "kernel/gcov/Kconfig"
source "scripts/gcc-plugins/Kconfig"
......
......@@ -50,13 +50,6 @@ config MMU
bool
default y
config RWSEM_GENERIC_SPINLOCK
bool
config RWSEM_XCHGADD_ALGORITHM
bool
default y
config ARCH_HAS_ILOG2_U32
bool
default n
......
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ALPHA_RWSEM_H
#define _ALPHA_RWSEM_H
/*
* Written by Ivan Kokshaysky <ink@jurassic.park.msu.ru>, 2001.
* Based on asm-alpha/semaphore.h and asm-i386/rwsem.h
*/
#ifndef _LINUX_RWSEM_H
#error "please don't include asm/rwsem.h directly, use linux/rwsem.h instead"
#endif
#ifdef __KERNEL__
#include <linux/compiler.h>
#define RWSEM_UNLOCKED_VALUE 0x0000000000000000L
#define RWSEM_ACTIVE_BIAS 0x0000000000000001L
#define RWSEM_ACTIVE_MASK 0x00000000ffffffffL
#define RWSEM_WAITING_BIAS (-0x0000000100000000L)
#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
static inline int ___down_read(struct rw_semaphore *sem)
{
long oldcount;
#ifndef CONFIG_SMP
oldcount = sem->count.counter;
sem->count.counter += RWSEM_ACTIVE_READ_BIAS;
#else
long temp;
__asm__ __volatile__(
"1: ldq_l %0,%1\n"
" addq %0,%3,%2\n"
" stq_c %2,%1\n"
" beq %2,2f\n"
" mb\n"
".subsection 2\n"
"2: br 1b\n"
".previous"
:"=&r" (oldcount), "=m" (sem->count), "=&r" (temp)
:"Ir" (RWSEM_ACTIVE_READ_BIAS), "m" (sem->count) : "memory");
#endif
return (oldcount < 0);
}
static inline void __down_read(struct rw_semaphore *sem)
{
if (unlikely(___down_read(sem)))
rwsem_down_read_failed(sem);
}
static inline int __down_read_killable(struct rw_semaphore *sem)
{
if (unlikely(___down_read(sem)))
if (IS_ERR(rwsem_down_read_failed_killable(sem)))
return -EINTR;
return 0;
}
/*
* trylock for reading -- returns 1 if successful, 0 if contention
*/
static inline int __down_read_trylock(struct rw_semaphore *sem)
{
long old, new, res;
res = atomic_long_read(&sem->count);
do {
new = res + RWSEM_ACTIVE_READ_BIAS;
if (new <= 0)
break;
old = res;
res = atomic_long_cmpxchg(&sem->count, old, new);
} while (res != old);
return res >= 0 ? 1 : 0;
}
static inline long ___down_write(struct rw_semaphore *sem)
{
long oldcount;
#ifndef CONFIG_SMP
oldcount = sem->count.counter;
sem->count.counter += RWSEM_ACTIVE_WRITE_BIAS;
#else
long temp;
__asm__ __volatile__(
"1: ldq_l %0,%1\n"
" addq %0,%3,%2\n"
" stq_c %2,%1\n"
" beq %2,2f\n"
" mb\n"
".subsection 2\n"
"2: br 1b\n"
".previous"
:"=&r" (oldcount), "=m" (sem->count), "=&r" (temp)
:"Ir" (RWSEM_ACTIVE_WRITE_BIAS), "m" (sem->count) : "memory");
#endif
return oldcount;
}
static inline void __down_write(struct rw_semaphore *sem)
{
if (unlikely(___down_write(sem)))
rwsem_down_write_failed(sem);
}
static inline int __down_write_killable(struct rw_semaphore *sem)
{
if (unlikely(___down_write(sem))) {
if (IS_ERR(rwsem_down_write_failed_killable(sem)))
return -EINTR;
}
return 0;
}
/*
* trylock for writing -- returns 1 if successful, 0 if contention
*/
static inline int __down_write_trylock(struct rw_semaphore *sem)
{
long ret = atomic_long_cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE,
RWSEM_ACTIVE_WRITE_BIAS);
if (ret == RWSEM_UNLOCKED_VALUE)
return 1;
return 0;
}
static inline void __up_read(struct rw_semaphore *sem)
{
long oldcount;
#ifndef CONFIG_SMP
oldcount = sem->count.counter;
sem->count.counter -= RWSEM_ACTIVE_READ_BIAS;
#else
long temp;
__asm__ __volatile__(
" mb\n"
"1: ldq_l %0,%1\n"
" subq %0,%3,%2\n"
" stq_c %2,%1\n"
" beq %2,2f\n"
".subsection 2\n"
"2: br 1b\n"
".previous"
:"=&r" (oldcount), "=m" (sem->count), "=&r" (temp)
:"Ir" (RWSEM_ACTIVE_READ_BIAS), "m" (sem->count) : "memory");
#endif
if (unlikely(oldcount < 0))
if ((int)oldcount - RWSEM_ACTIVE_READ_BIAS == 0)
rwsem_wake(sem);
}
static inline void __up_write(struct rw_semaphore *sem)
{
long count;
#ifndef CONFIG_SMP
sem->count.counter -= RWSEM_ACTIVE_WRITE_BIAS;
count = sem->count.counter;
#else
long temp;
__asm__ __volatile__(
" mb\n"
"1: ldq_l %0,%1\n"
" subq %0,%3,%2\n"
" stq_c %2,%1\n"
" beq %2,2f\n"
" subq %0,%3,%0\n"
".subsection 2\n"
"2: br 1b\n"
".previous"
:"=&r" (count), "=m" (sem->count), "=&r" (temp)
:"Ir" (RWSEM_ACTIVE_WRITE_BIAS), "m" (sem->count) : "memory");
#endif
if (unlikely(count))
if ((int)count == 0)
rwsem_wake(sem);
}
/*
* downgrade write lock to read lock
*/
static inline void __downgrade_write(struct rw_semaphore *sem)
{
long oldcount;
#ifndef CONFIG_SMP
oldcount = sem->count.counter;
sem->count.counter -= RWSEM_WAITING_BIAS;
#else
long temp;
__asm__ __volatile__(
"1: ldq_l %0,%1\n"
" addq %0,%3,%2\n"
" stq_c %2,%1\n"
" beq %2,2f\n"
" mb\n"
".subsection 2\n"
"2: br 1b\n"
".previous"
:"=&r" (oldcount), "=m" (sem->count), "=&r" (temp)
:"Ir" (-RWSEM_WAITING_BIAS), "m" (sem->count) : "memory");
#endif
if (unlikely(oldcount < 0))
rwsem_downgrade_wake(sem);
}
#endif /* __KERNEL__ */
#endif /* _ALPHA_RWSEM_H */
......@@ -63,9 +63,6 @@ config SCHED_OMIT_FRAME_POINTER
config GENERIC_CSUM
def_bool y
config RWSEM_GENERIC_SPINLOCK
def_bool y
config ARCH_DISCONTIGMEM_ENABLE
def_bool n
......
......@@ -178,10 +178,6 @@ config TRACE_IRQFLAGS_SUPPORT
bool
default !CPU_V7M
config RWSEM_XCHGADD_ALGORITHM
bool
default y
config ARCH_HAS_ILOG2_U32
bool
......
......@@ -12,7 +12,6 @@ generic-y += mm-arch-hooks.h
generic-y += msi.h
generic-y += parport.h
generic-y += preempt.h
generic-y += rwsem.h
generic-y += seccomp.h
generic-y += segment.h
generic-y += serial.h
......
......@@ -236,9 +236,6 @@ config LOCKDEP_SUPPORT
config TRACE_IRQFLAGS_SUPPORT
def_bool y
config RWSEM_XCHGADD_ALGORITHM
def_bool y
config GENERIC_BUG
def_bool y
depends on BUG
......
......@@ -16,7 +16,6 @@ generic-y += mm-arch-hooks.h
generic-y += msi.h
generic-y += qrwlock.h
generic-y += qspinlock.h
generic-y += rwsem.h
generic-y += segment.h
generic-y += serial.h
generic-y += set_memory.h
......
......@@ -28,9 +28,6 @@ config MMU
config FPU
def_bool n
config RWSEM_GENERIC_SPINLOCK
def_bool y
config GENERIC_CALIBRATE_DELAY
def_bool y
......
......@@ -92,9 +92,6 @@ config GENERIC_HWEIGHT
config MMU
def_bool y
config RWSEM_GENERIC_SPINLOCK
def_bool y
config STACKTRACE_SUPPORT
def_bool y
......
......@@ -27,9 +27,6 @@ config H8300
config CPU_BIG_ENDIAN
def_bool y
config RWSEM_GENERIC_SPINLOCK
def_bool y
config GENERIC_HWEIGHT
def_bool y
......
......@@ -65,12 +65,6 @@ config GENERIC_CSUM
config GENERIC_IRQ_PROBE
def_bool y
config RWSEM_GENERIC_SPINLOCK
def_bool n
config RWSEM_XCHGADD_ALGORITHM
def_bool y
config GENERIC_HWEIGHT
def_bool y
......
......@@ -27,7 +27,6 @@ generic-y += mm-arch-hooks.h
generic-y += pci.h
generic-y += percpu.h
generic-y += preempt.h
generic-y += rwsem.h
generic-y += sections.h
generic-y += segment.h
generic-y += serial.h
......
......@@ -83,10 +83,6 @@ config STACKTRACE_SUPPORT
config GENERIC_LOCKBREAK
def_bool n
config RWSEM_XCHGADD_ALGORITHM
bool
default y
config HUGETLB_PAGE_SIZE_VARIABLE
bool
depends on HUGETLB_PAGE
......
/* SPDX-License-Identifier: GPL-2.0 */
/*
* R/W semaphores for ia64
*
* Copyright (C) 2003 Ken Chen <kenneth.w.chen@intel.com>
* Copyright (C) 2003 Asit Mallick <asit.k.mallick@intel.com>
* Copyright (C) 2005 Christoph Lameter <cl@linux.com>
*
* Based on asm-i386/rwsem.h and other architecture implementation.
*
* The MSW of the count is the negated number of active writers and
* waiting lockers, and the LSW is the total number of active locks.
*
* The lock count is initialized to 0 (no active and no waiting lockers).
*
* When a writer subtracts WRITE_BIAS, it'll get 0xffffffff00000001 for
* the case of an uncontended lock. Readers increment by 1 and see a positive
* value when uncontended, negative if there are writers (and maybe) readers
* waiting (in which case it goes to sleep).
*/
#ifndef _ASM_IA64_RWSEM_H
#define _ASM_IA64_RWSEM_H
#ifndef _LINUX_RWSEM_H
#error "Please don't include <asm/rwsem.h> directly, use <linux/rwsem.h> instead."
#endif
#include <asm/intrinsics.h>
#define RWSEM_UNLOCKED_VALUE __IA64_UL_CONST(0x0000000000000000)
#define RWSEM_ACTIVE_BIAS (1L)
#define RWSEM_ACTIVE_MASK (0xffffffffL)
#define RWSEM_WAITING_BIAS (-0x100000000L)
#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
/*
* lock for reading
*/
static inline int
___down_read (struct rw_semaphore *sem)
{
long result = ia64_fetchadd8_acq((unsigned long *)&sem->count.counter, 1);
return (result < 0);
}
static inline void
__down_read (struct rw_semaphore *sem)
{
if (___down_read(sem))
rwsem_down_read_failed(sem);
}
static inline int
__down_read_killable (struct rw_semaphore *sem)
{
if (___down_read(sem))
if (IS_ERR(rwsem_down_read_failed_killable(sem)))
return -EINTR;
return 0;
}
/*
* lock for writing
*/
static inline long
___down_write (struct rw_semaphore *sem)
{
long old, new;
do {
old = atomic_long_read(&sem->count);
new = old + RWSEM_ACTIVE_WRITE_BIAS;
} while (atomic_long_cmpxchg_acquire(&sem->count, old, new) != old);
return old;
}
static inline void
__down_write (struct rw_semaphore *sem)
{
if (___down_write(sem))
rwsem_down_write_failed(sem);
}
static inline int
__down_write_killable (struct rw_semaphore *sem)
{
if (___down_write(sem)) {
if (IS_ERR(rwsem_down_write_failed_killable(sem)))
return -EINTR;
}
return 0;
}
/*
* unlock after reading
*/
static inline void
__up_read (struct rw_semaphore *sem)
{
long result = ia64_fetchadd8_rel((unsigned long *)&sem->count.counter, -1);
if (result < 0 && (--result & RWSEM_ACTIVE_MASK) == 0)
rwsem_wake(sem);
}
/*
* unlock after writing
*/
static inline void
__up_write (struct rw_semaphore *sem)
{
long old, new;
do {
old = atomic_long_read(&sem->count);
new = old - RWSEM_ACTIVE_WRITE_BIAS;
} while (atomic_long_cmpxchg_release(&sem->count, old, new) != old);
if (new < 0 && (new & RWSEM_ACTIVE_MASK) == 0)
rwsem_wake(sem);
}
/*
* trylock for reading -- returns 1 if successful, 0 if contention
*/
static inline int
__down_read_trylock (struct rw_semaphore *sem)
{
long tmp;
while ((tmp = atomic_long_read(&sem->count)) >= 0) {
if (tmp == atomic_long_cmpxchg_acquire(&sem->count, tmp, tmp+1)) {
return 1;
}
}
return 0;
}
/*
* trylock for writing -- returns 1 if successful, 0 if contention
*/
static inline int
__down_write_trylock (struct rw_semaphore *sem)
{
long tmp = atomic_long_cmpxchg_acquire(&sem->count,
RWSEM_UNLOCKED_VALUE, RWSEM_ACTIVE_WRITE_BIAS);
return tmp == RWSEM_UNLOCKED_VALUE;
}
/*
* downgrade write lock to read lock
*/
static inline void
__downgrade_write (struct rw_semaphore *sem)
{
long old, new;
do {
old = atomic_long_read(&sem->count);
new = old - RWSEM_WAITING_BIAS;
} while (atomic_long_cmpxchg_release(&sem->count, old, new) != old);
if (old < 0)
rwsem_downgrade_wake(sem);
}
#endif /* _ASM_IA64_RWSEM_H */
......@@ -33,13 +33,6 @@ config M68K
config CPU_BIG_ENDIAN
def_bool y
config RWSEM_GENERIC_SPINLOCK
bool
default y
config RWSEM_XCHGADD_ALGORITHM
bool
config ARCH_HAS_ILOG2_U32
bool
......
......@@ -59,15 +59,9 @@ config CPU_LITTLE_ENDIAN
endchoice
config RWSEM_GENERIC_SPINLOCK
def_bool y
config ZONE_DMA
def_bool y
config RWSEM_XCHGADD_ALGORITHM
bool
config ARCH_HAS_ILOG2_U32
def_bool n
......
......@@ -1037,13 +1037,6 @@ source "arch/mips/paravirt/Kconfig"
endmenu
config RWSEM_GENERIC_SPINLOCK
bool
default y
config RWSEM_XCHGADD_ALGORITHM
bool
config GENERIC_HWEIGHT
bool
default y
......
......@@ -60,9 +60,6 @@ config GENERIC_LOCKBREAK
def_bool y
depends on PREEMPT
config RWSEM_GENERIC_SPINLOCK
def_bool y
config TRACE_IRQFLAGS_SUPPORT
def_bool y
......
......@@ -41,9 +41,6 @@ config NO_IOPORT_MAP
config FPU
def_bool n
config RWSEM_GENERIC_SPINLOCK
def_bool y
config TRACE_IRQFLAGS_SUPPORT
def_bool n
......
......@@ -44,12 +44,6 @@ config CPU_BIG_ENDIAN
config MMU
def_bool y
config RWSEM_GENERIC_SPINLOCK
def_bool y
config RWSEM_XCHGADD_ALGORITHM
def_bool n
config GENERIC_HWEIGHT
def_bool y
......
......@@ -75,12 +75,6 @@ config GENERIC_LOCKBREAK
default y
depends on SMP && PREEMPT
config RWSEM_GENERIC_SPINLOCK
def_bool y
config RWSEM_XCHGADD_ALGORITHM
bool
config ARCH_HAS_ILOG2_U32
bool
default n
......
......@@ -103,13 +103,6 @@ config LOCKDEP_SUPPORT
bool
default y
config RWSEM_GENERIC_SPINLOCK
bool
config RWSEM_XCHGADD_ALGORITHM
bool
default y
config GENERIC_LOCKBREAK
bool
default y
......
......@@ -8,6 +8,5 @@ generic-y += irq_regs.h
generic-y += local64.h
generic-y += mcs_spinlock.h
generic-y += preempt.h
generic-y += rwsem.h
generic-y += vtime.h
generic-y += msi.h
......@@ -69,9 +69,6 @@ config STACKTRACE_SUPPORT
config TRACE_IRQFLAGS_SUPPORT
def_bool y
config RWSEM_GENERIC_SPINLOCK
def_bool y
config GENERIC_BUG
def_bool y
depends on BUG
......
......@@ -14,12 +14,6 @@ config LOCKDEP_SUPPORT
config STACKTRACE_SUPPORT
def_bool y
config RWSEM_GENERIC_SPINLOCK
bool
config RWSEM_XCHGADD_ALGORITHM
def_bool y
config ARCH_HAS_ILOG2_U32
def_bool n
......
......@@ -20,7 +20,6 @@ generic-y += local.h
generic-y += local64.h
generic-y += mcs_spinlock.h
generic-y += mm-arch-hooks.h
generic-y += rwsem.h
generic-y += trace_clock.h
generic-y += unaligned.h
generic-y += word-at-a-time.h
......@@ -90,12 +90,6 @@ config ARCH_DEFCONFIG
default "arch/sh/configs/shx3_defconfig" if SUPERH32
default "arch/sh/configs/cayman_defconfig" if SUPERH64
config RWSEM_GENERIC_SPINLOCK
def_bool y
config RWSEM_XCHGADD_ALGORITHM
bool
config GENERIC_BUG
def_bool y
depends on BUG && SUPERH32
......
......@@ -17,7 +17,6 @@ generic-y += mm-arch-hooks.h
generic-y += parport.h
generic-y += percpu.h
generic-y += preempt.h
generic-y += rwsem.h
generic-y += serial.h
generic-y += sizes.h
generic-y += trace_clock.h
......
......@@ -192,14 +192,6 @@ config NR_CPUS
source "kernel/Kconfig.hz"
config RWSEM_GENERIC_SPINLOCK
bool
default y if SPARC32
config RWSEM_XCHGADD_ALGORITHM
bool
default y if SPARC64
config GENERIC_HWEIGHT
bool
default y
......
......@@ -18,7 +18,6 @@ generic-y += mm-arch-hooks.h
generic-y += module.h
generic-y += msi.h
generic-y += preempt.h
generic-y += rwsem.h
generic-y += serial.h
generic-y += trace_clock.h
generic-y += word-at-a-time.h
......@@ -39,12 +39,6 @@ config STACKTRACE_SUPPORT
config LOCKDEP_SUPPORT
def_bool y
config RWSEM_GENERIC_SPINLOCK
def_bool y
config RWSEM_XCHGADD_ALGORITHM
bool
config ARCH_HAS_ILOG2_U32
bool
......
......@@ -268,9 +268,6 @@ config ARCH_MAY_HAVE_PC_FDC
def_bool y
depends on ISA_DMA_API
config RWSEM_XCHGADD_ALGORITHM
def_bool y
config GENERIC_CALIBRATE_DELAY
def_bool y
......@@ -783,14 +780,6 @@ config PARAVIRT_SPINLOCKS
If you are unsure how to answer this question, answer Y.
config QUEUED_LOCK_STAT
bool "Paravirt queued spinlock statistics"
depends on PARAVIRT_SPINLOCKS && DEBUG_FS
---help---
Enable the collection of statistical data on the slowpath
behavior of paravirtualized queued spinlocks and report
them on debugfs.
source "arch/x86/xen/Kconfig"
config KVM_GUEST
......
/* SPDX-License-Identifier: GPL-2.0 */
/* rwsem.h: R/W semaphores implemented using XADD/CMPXCHG for i486+
*
* Written by David Howells (dhowells@redhat.com).
*
* Derived from asm-x86/semaphore.h
*
*
* The MSW of the count is the negated number of active writers and waiting
* lockers, and the LSW is the total number of active locks
*
* The lock count is initialized to 0 (no active and no waiting lockers).
*
* When a writer subtracts WRITE_BIAS, it'll get 0xffff0001 for the case of an
* uncontended lock. This can be determined because XADD returns the old value.
* Readers increment by 1 and see a positive value when uncontended, negative
* if there are writers (and maybe) readers waiting (in which case it goes to
* sleep).
*
* The value of WAITING_BIAS supports up to 32766 waiting processes. This can
* be extended to 65534 by manually checking the whole MSW rather than relying
* on the S flag.
*
* The value of ACTIVE_BIAS supports up to 65535 active processes.
*
* This should be totally fair - if anything is waiting, a process that wants a
* lock will go to the back of the queue. When the currently active lock is
* released, if there's a writer at the front of the queue, then that and only
* that will be woken up; if there's a bunch of consecutive readers at the
* front, then they'll all be woken up, but no other readers will be.
*/
#ifndef _ASM_X86_RWSEM_H
#define _ASM_X86_RWSEM_H
#ifndef _LINUX_RWSEM_H
#error "please don't include asm/rwsem.h directly, use linux/rwsem.h instead"
#endif
#ifdef __KERNEL__
#include <asm/asm.h>
/*
* The bias values and the counter type limits the number of
* potential readers/writers to 32767 for 32 bits and 2147483647
* for 64 bits.
*/
#ifdef CONFIG_X86_64
# define RWSEM_ACTIVE_MASK 0xffffffffL
#else
# define RWSEM_ACTIVE_MASK 0x0000ffffL
#endif
#define RWSEM_UNLOCKED_VALUE 0x00000000L
#define RWSEM_ACTIVE_BIAS 0x00000001L
#define RWSEM_WAITING_BIAS (-RWSEM_ACTIVE_MASK-1)
#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
/*
* lock for reading
*/
#define ____down_read(sem, slow_path) \
({ \
struct rw_semaphore* ret; \
asm volatile("# beginning down_read\n\t" \
LOCK_PREFIX _ASM_INC "(%[sem])\n\t" \
/* adds 0x00000001 */ \
" jns 1f\n" \
" call " slow_path "\n" \
"1:\n\t" \
"# ending down_read\n\t" \
: "+m" (sem->count), "=a" (ret), \
ASM_CALL_CONSTRAINT \
: [sem] "a" (sem) \
: "memory", "cc"); \
ret; \
})
static inline void __down_read(struct rw_semaphore *sem)
{
____down_read(sem, "call_rwsem_down_read_failed");
}
static inline int __down_read_killable(struct rw_semaphore *sem)
{
if (IS_ERR(____down_read(sem, "call_rwsem_down_read_failed_killable")))
return -EINTR;
return 0;
}
/*
* trylock for reading -- returns 1 if successful, 0 if contention
*/
static inline bool __down_read_trylock(struct rw_semaphore *sem)
{
long result, tmp;
asm volatile("# beginning __down_read_trylock\n\t"
" mov %[count],%[result]\n\t"
"1:\n\t"
" mov %[result],%[tmp]\n\t"
" add %[inc],%[tmp]\n\t"
" jle 2f\n\t"
LOCK_PREFIX " cmpxchg %[tmp],%[count]\n\t"
" jnz 1b\n\t"
"2:\n\t"
"# ending __down_read_trylock\n\t"
: [count] "+m" (sem->count), [result] "=&a" (result),
[tmp] "=&r" (tmp)
: [inc] "i" (RWSEM_ACTIVE_READ_BIAS)
: "memory", "cc");
return result >= 0;
}
/*
* lock for writing
*/
#define ____down_write(sem, slow_path) \
({ \
long tmp; \
struct rw_semaphore* ret; \
\
asm volatile("# beginning down_write\n\t" \
LOCK_PREFIX " xadd %[tmp],(%[sem])\n\t" \
/* adds 0xffff0001, returns the old value */ \
" test " __ASM_SEL(%w1,%k1) "," __ASM_SEL(%w1,%k1) "\n\t" \
/* was the active mask 0 before? */\
" jz 1f\n" \
" call " slow_path "\n" \
"1:\n" \
"# ending down_write" \
: "+m" (sem->count), [tmp] "=d" (tmp), \
"=a" (ret), ASM_CALL_CONSTRAINT \
: [sem] "a" (sem), "[tmp]" (RWSEM_ACTIVE_WRITE_BIAS) \
: "memory", "cc"); \
ret; \
})
static inline void __down_write(struct rw_semaphore *sem)
{
____down_write(sem, "call_rwsem_down_write_failed");
}
static inline int __down_write_killable(struct rw_semaphore *sem)
{
if (IS_ERR(____down_write(sem, "call_rwsem_down_write_failed_killable")))
return -EINTR;
return 0;
}
/*
* trylock for writing -- returns 1 if successful, 0 if contention
*/
static inline bool __down_write_trylock(struct rw_semaphore *sem)
{
bool result;
long tmp0, tmp1;
asm volatile("# beginning __down_write_trylock\n\t"
" mov %[count],%[tmp0]\n\t"
"1:\n\t"
" test " __ASM_SEL(%w1,%k1) "," __ASM_SEL(%w1,%k1) "\n\t"
/* was the active mask 0 before? */
" jnz 2f\n\t"
" mov %[tmp0],%[tmp1]\n\t"
" add %[inc],%[tmp1]\n\t"
LOCK_PREFIX " cmpxchg %[tmp1],%[count]\n\t"
" jnz 1b\n\t"
"2:\n\t"
CC_SET(e)
"# ending __down_write_trylock\n\t"
: [count] "+m" (sem->count), [tmp0] "=&a" (tmp0),
[tmp1] "=&r" (tmp1), CC_OUT(e) (result)
: [inc] "er" (RWSEM_ACTIVE_WRITE_BIAS)
: "memory");
return result;
}
/*
* unlock after reading
*/
static inline void __up_read(struct rw_semaphore *sem)
{
long tmp;
asm volatile("# beginning __up_read\n\t"
LOCK_PREFIX " xadd %[tmp],(%[sem])\n\t"
/* subtracts 1, returns the old value */
" jns 1f\n\t"
" call call_rwsem_wake\n" /* expects old value in %edx */
"1:\n"
"# ending __up_read\n"
: "+m" (sem->count), [tmp] "=d" (tmp)
: [sem] "a" (sem), "[tmp]" (-RWSEM_ACTIVE_READ_BIAS)
: "memory", "cc");
}
/*
* unlock after writing
*/
static inline void __up_write(struct rw_semaphore *sem)
{
long tmp;
asm volatile("# beginning __up_write\n\t"
LOCK_PREFIX " xadd %[tmp],(%[sem])\n\t"
/* subtracts 0xffff0001, returns the old value */
" jns 1f\n\t"
" call call_rwsem_wake\n" /* expects old value in %edx */
"1:\n\t"
"# ending __up_write\n"
: "+m" (sem->count), [tmp] "=d" (tmp)
: [sem] "a" (sem), "[tmp]" (-RWSEM_ACTIVE_WRITE_BIAS)
: "memory", "cc");
}
/*
* downgrade write lock to read lock
*/
static inline void __downgrade_write(struct rw_semaphore *sem)
{
asm volatile("# beginning __downgrade_write\n\t"
LOCK_PREFIX _ASM_ADD "%[inc],(%[sem])\n\t"
/*
* transitions 0xZZZZ0001 -> 0xYYYY0001 (i386)
* 0xZZZZZZZZ00000001 -> 0xYYYYYYYY00000001 (x86_64)
*/
" jns 1f\n\t"
" call call_rwsem_downgrade_wake\n"
"1:\n\t"
"# ending __downgrade_write\n"
: "+m" (sem->count)
: [sem] "a" (sem), [inc] "er" (-RWSEM_WAITING_BIAS)
: "memory", "cc");
}
#endif /* __KERNEL__ */
#endif /* _ASM_X86_RWSEM_H */
......@@ -35,7 +35,6 @@ obj-$(CONFIG_SMP) += msr-smp.o cache-smp.o
lib-y := delay.o misc.o cmdline.o cpu.o
lib-y += usercopy_$(BITS).o usercopy.o getuser.o putuser.o
lib-y += memcpy_$(BITS).o
lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o
lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
......
/*
* x86 semaphore implementation.
*
* (C) Copyright 1999 Linus Torvalds
*
* Portions Copyright 1999 Red Hat, Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* rw semaphores implemented November 1999 by Benjamin LaHaise <bcrl@kvack.org>
*/
#include <linux/linkage.h>
#include <asm/alternative-asm.h>
#include <asm/frame.h>
#define __ASM_HALF_REG(reg) __ASM_SEL(reg, e##reg)
#define __ASM_HALF_SIZE(inst) __ASM_SEL(inst##w, inst##l)
#ifdef CONFIG_X86_32
/*
* The semaphore operations have a special calling sequence that
* allow us to do a simpler in-line version of them. These routines
* need to convert that sequence back into the C sequence when
* there is contention on the semaphore.
*
* %eax contains the semaphore pointer on entry. Save the C-clobbered
* registers (%eax, %edx and %ecx) except %eax which is either a return
* value or just gets clobbered. Same is true for %edx so make sure GCC
* reloads it after the slow path, by making it hold a temporary, for
* example see ____down_write().
*/
#define save_common_regs \
pushl %ecx
#define restore_common_regs \
popl %ecx
/* Avoid uglifying the argument copying x86-64 needs to do. */
.macro movq src, dst
.endm
#else
/*
* x86-64 rwsem wrappers
*
* This interfaces the inline asm code to the slow-path
* C routines. We need to save the call-clobbered regs
* that the asm does not mark as clobbered, and move the
* argument from %rax to %rdi.
*
* NOTE! We don't need to save %rax, because the functions
* will always return the semaphore pointer in %rax (which
* is also the input argument to these helpers)
*
* The following can clobber %rdx because the asm clobbers it:
* call_rwsem_down_write_failed
* call_rwsem_wake
* but %rdi, %rsi, %rcx, %r8-r11 always need saving.
*/
#define save_common_regs \
pushq %rdi; \
pushq %rsi; \
pushq %rcx; \
pushq %r8; \
pushq %r9; \
pushq %r10; \
pushq %r11
#define restore_common_regs \
popq %r11; \
popq %r10; \
popq %r9; \
popq %r8; \
popq %rcx; \
popq %rsi; \
popq %rdi
#endif
/* Fix up special calling conventions */
ENTRY(call_rwsem_down_read_failed)
FRAME_BEGIN
save_common_regs
__ASM_SIZE(push,) %__ASM_REG(dx)
movq %rax,%rdi
call rwsem_down_read_failed
__ASM_SIZE(pop,) %__ASM_REG(dx)
restore_common_regs
FRAME_END
ret
ENDPROC(call_rwsem_down_read_failed)
ENTRY(call_rwsem_down_read_failed_killable)
FRAME_BEGIN
save_common_regs
__ASM_SIZE(push,) %__ASM_REG(dx)
movq %rax,%rdi
call rwsem_down_read_failed_killable
__ASM_SIZE(pop,) %__ASM_REG(dx)
restore_common_regs
FRAME_END
ret
ENDPROC(call_rwsem_down_read_failed_killable)
ENTRY(call_rwsem_down_write_failed)
FRAME_BEGIN
save_common_regs
movq %rax,%rdi
call rwsem_down_write_failed
restore_common_regs
FRAME_END
ret
ENDPROC(call_rwsem_down_write_failed)
ENTRY(call_rwsem_down_write_failed_killable)
FRAME_BEGIN
save_common_regs
movq %rax,%rdi
call rwsem_down_write_failed_killable
restore_common_regs
FRAME_END
ret
ENDPROC(call_rwsem_down_write_failed_killable)
ENTRY(call_rwsem_wake)
FRAME_BEGIN
/* do nothing if still outstanding active readers */
__ASM_HALF_SIZE(dec) %__ASM_HALF_REG(dx)
jnz 1f
save_common_regs
movq %rax,%rdi
call rwsem_wake
restore_common_regs
1: FRAME_END
ret
ENDPROC(call_rwsem_wake)
ENTRY(call_rwsem_downgrade_wake)
FRAME_BEGIN
save_common_regs
__ASM_SIZE(push,) %__ASM_REG(dx)
movq %rax,%rdi
call rwsem_downgrade_wake
__ASM_SIZE(pop,) %__ASM_REG(dx)
restore_common_regs
FRAME_END
ret
ENDPROC(call_rwsem_downgrade_wake)
......@@ -32,12 +32,6 @@ config ARCH_DEFCONFIG
default "arch/um/configs/i386_defconfig" if X86_32
default "arch/um/configs/x86_64_defconfig" if X86_64
config RWSEM_XCHGADD_ALGORITHM
def_bool 64BIT
config RWSEM_GENERIC_SPINLOCK
def_bool !RWSEM_XCHGADD_ALGORITHM
config 3_LEVEL_PGTABLES
bool "Three-level pagetables" if !64BIT
default 64BIT
......
......@@ -21,14 +21,12 @@ obj-y += checksum_32.o syscalls_32.o
obj-$(CONFIG_ELF_CORE) += elfcore.o
subarch-y = ../lib/string_32.o ../lib/atomic64_32.o ../lib/atomic64_cx8_32.o
subarch-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += ../lib/rwsem.o
else
obj-y += syscalls_64.o vdso/
subarch-y = ../lib/csum-partial_64.o ../lib/memcpy_64.o ../entry/thunk_64.o \
../lib/rwsem.o
subarch-y = ../lib/csum-partial_64.o ../lib/memcpy_64.o ../entry/thunk_64.o
endif
......
......@@ -46,9 +46,6 @@ config XTENSA
with reasonable minimum requirements. The Xtensa Linux project has
a home page at <http://www.linux-xtensa.org/>.
config RWSEM_XCHGADD_ALGORITHM
def_bool y
config GENERIC_HWEIGHT
def_bool y
......
......@@ -25,7 +25,6 @@ generic-y += percpu.h
generic-y += preempt.h
generic-y += qrwlock.h
generic-y += qspinlock.h
generic-y += rwsem.h
generic-y += sections.h
generic-y += socket.h
generic-y += topology.h
......
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_GENERIC_RWSEM_H
#define _ASM_GENERIC_RWSEM_H
#ifndef _LINUX_RWSEM_H
#error "Please don't include <asm/rwsem.h> directly, use <linux/rwsem.h> instead."
#endif
#ifdef __KERNEL__
/*
* R/W semaphores originally for PPC using the stuff in lib/rwsem.c.
* Adapted largely from include/asm-i386/rwsem.h
* by Paul Mackerras <paulus@samba.org>.
*/
/*
* the semaphore definition
*/
#ifdef CONFIG_64BIT
# define RWSEM_ACTIVE_MASK 0xffffffffL
#else
# define RWSEM_ACTIVE_MASK 0x0000ffffL
#endif
#define RWSEM_UNLOCKED_VALUE 0x00000000L
#define RWSEM_ACTIVE_BIAS 0x00000001L
#define RWSEM_WAITING_BIAS (-RWSEM_ACTIVE_MASK-1)
#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
/*
* lock for reading
*/
static inline void __down_read(struct rw_semaphore *sem)
{
if (unlikely(atomic_long_inc_return_acquire(&sem->count) <= 0))
rwsem_down_read_failed(sem);
}
static inline int __down_read_killable(struct rw_semaphore *sem)
{
if (unlikely(atomic_long_inc_return_acquire(&sem->count) <= 0)) {
if (IS_ERR(rwsem_down_read_failed_killable(sem)))
return -EINTR;
}
return 0;
}
static inline int __down_read_trylock(struct rw_semaphore *sem)
{
long tmp;
while ((tmp = atomic_long_read(&sem->count)) >= 0) {
if (tmp == atomic_long_cmpxchg_acquire(&sem->count, tmp,
tmp + RWSEM_ACTIVE_READ_BIAS)) {
return 1;
}
}
return 0;
}
/*
* lock for writing
*/
static inline void __down_write(struct rw_semaphore *sem)
{
long tmp;
tmp = atomic_long_add_return_acquire(RWSEM_ACTIVE_WRITE_BIAS,
&sem->count);
if (unlikely(tmp != RWSEM_ACTIVE_WRITE_BIAS))
rwsem_down_write_failed(sem);
}
static inline int __down_write_killable(struct rw_semaphore *sem)
{
long tmp;
tmp = atomic_long_add_return_acquire(RWSEM_ACTIVE_WRITE_BIAS,
&sem->count);
if (unlikely(tmp != RWSEM_ACTIVE_WRITE_BIAS))
if (IS_ERR(rwsem_down_write_failed_killable(sem)))
return -EINTR;
return 0;
}
static inline int __down_write_trylock(struct rw_semaphore *sem)
{
long tmp;
tmp = atomic_long_cmpxchg_acquire(&sem->count, RWSEM_UNLOCKED_VALUE,
RWSEM_ACTIVE_WRITE_BIAS);
return tmp == RWSEM_UNLOCKED_VALUE;
}
/*
* unlock after reading
*/
static inline void __up_read(struct rw_semaphore *sem)
{
long tmp;
tmp = atomic_long_dec_return_release(&sem->count);
if (unlikely(tmp < -1 && (tmp & RWSEM_ACTIVE_MASK) == 0))
rwsem_wake(sem);
}
/*
* unlock after writing
*/
static inline void __up_write(struct rw_semaphore *sem)
{
if (unlikely(atomic_long_sub_return_release(RWSEM_ACTIVE_WRITE_BIAS,
&sem->count) < 0))
rwsem_wake(sem);
}
/*
* downgrade write lock to read lock
*/
static inline void __downgrade_write(struct rw_semaphore *sem)
{
long tmp;
/*
* When downgrading from exclusive to shared ownership,
* anything inside the write-locked region cannot leak
* into the read side. In contrast, anything in the
* read-locked region is ok to be re-ordered into the
* write side. As such, rely on RELEASE semantics.
*/
tmp = atomic_long_add_return_release(-RWSEM_WAITING_BIAS, &sem->count);
if (tmp < 0)
rwsem_downgrade_wake(sem);
}
#endif /* __KERNEL__ */
#endif /* _ASM_GENERIC_RWSEM_H */
......@@ -12,21 +12,79 @@ struct static_key_deferred {
struct delayed_work work;
};
extern void static_key_slow_dec_deferred(struct static_key_deferred *key);
extern void static_key_deferred_flush(struct static_key_deferred *key);
struct static_key_true_deferred {
struct static_key_true key;
unsigned long timeout;
struct delayed_work work;
};
struct static_key_false_deferred {
struct static_key_false key;
unsigned long timeout;
struct delayed_work work;
};
#define static_key_slow_dec_deferred(x) \
__static_key_slow_dec_deferred(&(x)->key, &(x)->work, (x)->timeout)
#define static_branch_slow_dec_deferred(x) \
__static_key_slow_dec_deferred(&(x)->key.key, &(x)->work, (x)->timeout)
#define static_key_deferred_flush(x) \
__static_key_deferred_flush((x), &(x)->work)
extern void
__static_key_slow_dec_deferred(struct static_key *key,
struct delayed_work *work,
unsigned long timeout);
extern void __static_key_deferred_flush(void *key, struct delayed_work *work);
extern void
jump_label_rate_limit(struct static_key_deferred *key, unsigned long rl);
extern void jump_label_update_timeout(struct work_struct *work);
#define DEFINE_STATIC_KEY_DEFERRED_TRUE(name, rl) \
struct static_key_true_deferred name = { \
.key = { STATIC_KEY_INIT_TRUE }, \
.timeout = (rl), \
.work = __DELAYED_WORK_INITIALIZER((name).work, \
jump_label_update_timeout, \
0), \
}
#define DEFINE_STATIC_KEY_DEFERRED_FALSE(name, rl) \
struct static_key_false_deferred name = { \
.key = { STATIC_KEY_INIT_FALSE }, \
.timeout = (rl), \
.work = __DELAYED_WORK_INITIALIZER((name).work, \
jump_label_update_timeout, \
0), \
}
#define static_branch_deferred_inc(x) static_branch_inc(&(x)->key)
#else /* !CONFIG_JUMP_LABEL */
struct static_key_deferred {
struct static_key key;
};
struct static_key_true_deferred {
struct static_key_true key;
};
struct static_key_false_deferred {
struct static_key_false key;
};
#define DEFINE_STATIC_KEY_DEFERRED_TRUE(name, rl) \
struct static_key_true_deferred name = { STATIC_KEY_TRUE_INIT }
#define DEFINE_STATIC_KEY_DEFERRED_FALSE(name, rl) \
struct static_key_false_deferred name = { STATIC_KEY_FALSE_INIT }
#define static_branch_slow_dec_deferred(x) static_branch_dec(&(x)->key)
static inline void static_key_slow_dec_deferred(struct static_key_deferred *key)
{
STATIC_KEY_CHECK_USE(key);
static_key_slow_dec(&key->key);
}
static inline void static_key_deferred_flush(struct static_key_deferred *key)
static inline void static_key_deferred_flush(void *key)
{
STATIC_KEY_CHECK_USE(key);
}
......
......@@ -476,7 +476,7 @@ struct pin_cookie { };
#define NIL_COOKIE (struct pin_cookie){ }
#define lockdep_pin_lock(l) ({ struct pin_cookie cookie; cookie; })
#define lockdep_pin_lock(l) ({ struct pin_cookie cookie = { }; cookie; })
#define lockdep_repin_lock(l, c) do { (void)(l); (void)(c); } while (0)
#define lockdep_unpin_lock(l, c) do { (void)(l); (void)(c); } while (0)
......
/* SPDX-License-Identifier: GPL-2.0 */
/* rwsem-spinlock.h: fallback C implementation
*
* Copyright (c) 2001 David Howells (dhowells@redhat.com).
* - Derived partially from ideas by Andrea Arcangeli <andrea@suse.de>
* - Derived also from comments by Linus
*/
#ifndef _LINUX_RWSEM_SPINLOCK_H
#define _LINUX_RWSEM_SPINLOCK_H
#ifndef _LINUX_RWSEM_H
#error "please don't include linux/rwsem-spinlock.h directly, use linux/rwsem.h instead"
#endif
#ifdef __KERNEL__
/*
* the rw-semaphore definition
* - if count is 0 then there are no active readers or writers
* - if count is +ve then that is the number of active readers
* - if count is -1 then there is one active writer
* - if wait_list is not empty, then there are processes waiting for the semaphore
*/
struct rw_semaphore {
__s32 count;
raw_spinlock_t wait_lock;
struct list_head wait_list;
#ifdef CONFIG_DEBUG_LOCK_ALLOC
struct lockdep_map dep_map;
#endif
};
#define RWSEM_UNLOCKED_VALUE 0x00000000
extern void __down_read(struct rw_semaphore *sem);
extern int __must_check __down_read_killable(struct rw_semaphore *sem);
extern int __down_read_trylock(struct rw_semaphore *sem);
extern void __down_write(struct rw_semaphore *sem);
extern int __must_check __down_write_killable(struct rw_semaphore *sem);
extern int __down_write_trylock(struct rw_semaphore *sem);
extern void __up_read(struct rw_semaphore *sem);
extern void __up_write(struct rw_semaphore *sem);
extern void __downgrade_write(struct rw_semaphore *sem);
extern int rwsem_is_locked(struct rw_semaphore *sem);
#endif /* __KERNEL__ */
#endif /* _LINUX_RWSEM_SPINLOCK_H */
......@@ -20,25 +20,30 @@
#include <linux/osq_lock.h>
#endif
struct rw_semaphore;
#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
#include <linux/rwsem-spinlock.h> /* use a generic implementation */
#define __RWSEM_INIT_COUNT(name) .count = RWSEM_UNLOCKED_VALUE
#else
/* All arch specific implementations share the same struct */
/*
* For an uncontended rwsem, count and owner are the only fields a task
* needs to touch when acquiring the rwsem. So they are put next to each
* other to increase the chance that they will share the same cacheline.
*
* In a contended rwsem, the owner is likely the most frequently accessed
* field in the structure as the optimistic waiter that holds the osq lock
* will spin on owner. For an embedded rwsem, other hot fields in the
* containing structure should be moved further away from the rwsem to
* reduce the chance that they will share the same cacheline causing
* cacheline bouncing problem.
*/
struct rw_semaphore {
atomic_long_t count;
struct list_head wait_list;
raw_spinlock_t wait_lock;
#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
struct optimistic_spin_queue osq; /* spinner MCS lock */
/*
* Write owner. Used as a speculative check to see
* if the owner is running on the cpu.
*/
struct task_struct *owner;
struct optimistic_spin_queue osq; /* spinner MCS lock */
#endif
raw_spinlock_t wait_lock;
struct list_head wait_list;
#ifdef CONFIG_DEBUG_LOCK_ALLOC
struct lockdep_map dep_map;
#endif
......@@ -50,24 +55,14 @@ struct rw_semaphore {
*/
#define RWSEM_OWNER_UNKNOWN ((struct task_struct *)-2L)
extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
extern struct rw_semaphore *rwsem_down_read_failed_killable(struct rw_semaphore *sem);
extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
extern struct rw_semaphore *rwsem_down_write_failed_killable(struct rw_semaphore *sem);
extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *);
extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
/* Include the arch specific part */
#include <asm/rwsem.h>
/* In all implementations count != 0 means locked */
static inline int rwsem_is_locked(struct rw_semaphore *sem)
{
return atomic_long_read(&sem->count) != 0;
}
#define RWSEM_UNLOCKED_VALUE 0L
#define __RWSEM_INIT_COUNT(name) .count = ATOMIC_LONG_INIT(RWSEM_UNLOCKED_VALUE)
#endif
/* Common initializer macros and functions */
......
......@@ -229,7 +229,7 @@ config MUTEX_SPIN_ON_OWNER
config RWSEM_SPIN_ON_OWNER
def_bool y
depends on SMP && RWSEM_XCHGADD_ALGORITHM && ARCH_SUPPORTS_ATOMIC_RMW
depends on SMP && ARCH_SUPPORTS_ATOMIC_RMW
config LOCK_SPIN_ON_OWNER
def_bool y
......
......@@ -202,11 +202,13 @@ void static_key_disable(struct static_key *key)
}
EXPORT_SYMBOL_GPL(static_key_disable);
static void __static_key_slow_dec_cpuslocked(struct static_key *key,
unsigned long rate_limit,
struct delayed_work *work)
static bool static_key_slow_try_dec(struct static_key *key)
{
lockdep_assert_cpus_held();
int val;
val = atomic_fetch_add_unless(&key->enabled, -1, 1);
if (val == 1)
return false;
/*
* The negative count check is valid even when a negative
......@@ -215,63 +217,70 @@ static void __static_key_slow_dec_cpuslocked(struct static_key *key,
* returns is unbalanced, because all other static_key_slow_inc()
* instances block while the update is in progress.
*/
if (!atomic_dec_and_mutex_lock(&key->enabled, &jump_label_mutex)) {
WARN(atomic_read(&key->enabled) < 0,
"jump label: negative count!\n");
WARN(val < 0, "jump label: negative count!\n");
return true;
}
static void __static_key_slow_dec_cpuslocked(struct static_key *key)
{
lockdep_assert_cpus_held();
if (static_key_slow_try_dec(key))
return;
}
if (rate_limit) {
atomic_inc(&key->enabled);
schedule_delayed_work(work, rate_limit);
} else {
jump_label_lock();
if (atomic_dec_and_test(&key->enabled))
jump_label_update(key);
}
jump_label_unlock();
}
static void __static_key_slow_dec(struct static_key *key,
unsigned long rate_limit,
struct delayed_work *work)
static void __static_key_slow_dec(struct static_key *key)
{
cpus_read_lock();
__static_key_slow_dec_cpuslocked(key, rate_limit, work);
__static_key_slow_dec_cpuslocked(key);
cpus_read_unlock();
}
static void jump_label_update_timeout(struct work_struct *work)
void jump_label_update_timeout(struct work_struct *work)
{
struct static_key_deferred *key =
container_of(work, struct static_key_deferred, work.work);
__static_key_slow_dec(&key->key, 0, NULL);
__static_key_slow_dec(&key->key);
}
EXPORT_SYMBOL_GPL(jump_label_update_timeout);
void static_key_slow_dec(struct static_key *key)
{
STATIC_KEY_CHECK_USE(key);
__static_key_slow_dec(key, 0, NULL);
__static_key_slow_dec(key);
}
EXPORT_SYMBOL_GPL(static_key_slow_dec);
void static_key_slow_dec_cpuslocked(struct static_key *key)
{
STATIC_KEY_CHECK_USE(key);
__static_key_slow_dec_cpuslocked(key, 0, NULL);
__static_key_slow_dec_cpuslocked(key);
}
void static_key_slow_dec_deferred(struct static_key_deferred *key)
void __static_key_slow_dec_deferred(struct static_key *key,
struct delayed_work *work,
unsigned long timeout)
{
STATIC_KEY_CHECK_USE(key);
__static_key_slow_dec(&key->key, key->timeout, &key->work);
if (static_key_slow_try_dec(key))
return;
schedule_delayed_work(work, timeout);
}
EXPORT_SYMBOL_GPL(static_key_slow_dec_deferred);
EXPORT_SYMBOL_GPL(__static_key_slow_dec_deferred);
void static_key_deferred_flush(struct static_key_deferred *key)
void __static_key_deferred_flush(void *key, struct delayed_work *work)
{
STATIC_KEY_CHECK_USE(key);
flush_delayed_work(&key->work);
flush_delayed_work(work);
}
EXPORT_SYMBOL_GPL(static_key_deferred_flush);
EXPORT_SYMBOL_GPL(__static_key_deferred_flush);
void jump_label_rate_limit(struct static_key_deferred *key,
unsigned long rl)
......
......@@ -3,7 +3,7 @@
# and is generally not a function of system call inputs.
KCOV_INSTRUMENT := n
obj-y += mutex.o semaphore.o rwsem.o percpu-rwsem.o
obj-y += mutex.o semaphore.o rwsem.o percpu-rwsem.o rwsem-xadd.o
ifdef CONFIG_FUNCTION_TRACER
CFLAGS_REMOVE_lockdep.o = $(CC_FLAGS_FTRACE)
......@@ -25,8 +25,7 @@ obj-$(CONFIG_RT_MUTEXES) += rtmutex.o
obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o
obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o
obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o
obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o
obj-$(CONFIG_QUEUED_RWLOCKS) += qrwlock.o
obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o
obj-$(CONFIG_WW_MUTEX_SELFTEST) += test-ww_mutex.o
obj-$(CONFIG_LOCK_EVENT_COUNTS) += lock_events.o
/* SPDX-License-Identifier: GPL-2.0 */
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* Authors: Waiman Long <waiman.long@hpe.com>
*/
/*
* Collect locking event counts
*/
#include <linux/debugfs.h>
#include <linux/sched.h>
#include <linux/sched/clock.h>
#include <linux/fs.h>
#include "lock_events.h"
#undef LOCK_EVENT
#define LOCK_EVENT(name) [LOCKEVENT_ ## name] = #name,
#define LOCK_EVENTS_DIR "lock_event_counts"
/*
* When CONFIG_LOCK_EVENT_COUNTS is enabled, event counts of different
* types of locks will be reported under the <debugfs>/lock_event_counts/
* directory. See lock_events_list.h for the list of available locking
* events.
*
* Writing to the special ".reset_counts" file will reset all the above
* locking event counts. This is a very slow operation and so should not
* be done frequently.
*
* These event counts are implemented as per-cpu variables which are
* summed and computed whenever the corresponding debugfs files are read. This
* minimizes added overhead making the counts usable even in a production
* environment.
*/
static const char * const lockevent_names[lockevent_num + 1] = {
#include "lock_events_list.h"
[LOCKEVENT_reset_cnts] = ".reset_counts",
};
/*
* Per-cpu counts
*/
DEFINE_PER_CPU(unsigned long, lockevents[lockevent_num]);
/*
* The lockevent_read() function can be overridden.
*/
ssize_t __weak lockevent_read(struct file *file, char __user *user_buf,
size_t count, loff_t *ppos)
{
char buf[64];
int cpu, id, len;
u64 sum = 0;
/*
* Get the counter ID stored in file->f_inode->i_private
*/
id = (long)file_inode(file)->i_private;
if (id >= lockevent_num)
return -EBADF;
for_each_possible_cpu(cpu)
sum += per_cpu(lockevents[id], cpu);
len = snprintf(buf, sizeof(buf) - 1, "%llu\n", sum);
return simple_read_from_buffer(user_buf, count, ppos, buf, len);
}
/*
* Function to handle write request
*
* When idx = reset_cnts, reset all the counts.
*/
static ssize_t lockevent_write(struct file *file, const char __user *user_buf,
size_t count, loff_t *ppos)
{
int cpu;
/*
* Get the counter ID stored in file->f_inode->i_private
*/
if ((long)file_inode(file)->i_private != LOCKEVENT_reset_cnts)
return count;
for_each_possible_cpu(cpu) {
int i;
unsigned long *ptr = per_cpu_ptr(lockevents, cpu);
for (i = 0 ; i < lockevent_num; i++)
WRITE_ONCE(ptr[i], 0);
}
return count;
}
/*
* Debugfs data structures
*/
static const struct file_operations fops_lockevent = {
.read = lockevent_read,
.write = lockevent_write,
.llseek = default_llseek,
};
#ifdef CONFIG_PARAVIRT_SPINLOCKS
#include <asm/paravirt.h>
static bool __init skip_lockevent(const char *name)
{
static int pv_on __initdata = -1;
if (pv_on < 0)
pv_on = !pv_is_native_spin_unlock();
/*
* Skip PV qspinlock events on bare metal.
*/
if (!pv_on && !memcmp(name, "pv_", 3))
return true;
return false;
}
#else
static inline bool skip_lockevent(const char *name)
{
return false;
}
#endif
/*
* Initialize debugfs for the locking event counts.
*/
static int __init init_lockevent_counts(void)
{
struct dentry *d_counts = debugfs_create_dir(LOCK_EVENTS_DIR, NULL);
int i;
if (!d_counts)
goto out;
/*
* Create the debugfs files
*
* As reading from and writing to the stat files can be slow, only
* root is allowed to do the read/write to limit impact to system
* performance.
*/
for (i = 0; i < lockevent_num; i++) {
if (skip_lockevent(lockevent_names[i]))
continue;
if (!debugfs_create_file(lockevent_names[i], 0400, d_counts,
(void *)(long)i, &fops_lockevent))
goto fail_undo;
}
if (!debugfs_create_file(lockevent_names[LOCKEVENT_reset_cnts], 0200,
d_counts, (void *)(long)LOCKEVENT_reset_cnts,
&fops_lockevent))
goto fail_undo;
return 0;
fail_undo:
debugfs_remove_recursive(d_counts);
out:
pr_warn("Could not create '%s' debugfs entries\n", LOCK_EVENTS_DIR);
return -ENOMEM;
}
fs_initcall(init_lockevent_counts);
/* SPDX-License-Identifier: GPL-2.0 */
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* Authors: Waiman Long <longman@redhat.com>
*/
#ifndef __LOCKING_LOCK_EVENTS_H
#define __LOCKING_LOCK_EVENTS_H
enum lock_events {
#include "lock_events_list.h"
lockevent_num, /* Total number of lock event counts */
LOCKEVENT_reset_cnts = lockevent_num,
};
#ifdef CONFIG_LOCK_EVENT_COUNTS
/*
* Per-cpu counters
*/
DECLARE_PER_CPU(unsigned long, lockevents[lockevent_num]);
/*
* Increment the PV qspinlock statistical counters
*/
static inline void __lockevent_inc(enum lock_events event, bool cond)
{
if (cond)
__this_cpu_inc(lockevents[event]);
}
#define lockevent_inc(ev) __lockevent_inc(LOCKEVENT_ ##ev, true)
#define lockevent_cond_inc(ev, c) __lockevent_inc(LOCKEVENT_ ##ev, c)
static inline void __lockevent_add(enum lock_events event, int inc)
{
__this_cpu_add(lockevents[event], inc);
}
#define lockevent_add(ev, c) __lockevent_add(LOCKEVENT_ ##ev, c)
#else /* CONFIG_LOCK_EVENT_COUNTS */
#define lockevent_inc(ev)
#define lockevent_add(ev, c)
#define lockevent_cond_inc(ev, c)
#endif /* CONFIG_LOCK_EVENT_COUNTS */
#endif /* __LOCKING_LOCK_EVENTS_H */
/* SPDX-License-Identifier: GPL-2.0 */
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* Authors: Waiman Long <longman@redhat.com>
*/
#ifndef LOCK_EVENT
#define LOCK_EVENT(name) LOCKEVENT_ ## name,
#endif
#ifdef CONFIG_QUEUED_SPINLOCKS
#ifdef CONFIG_PARAVIRT_SPINLOCKS
/*
* Locking events for PV qspinlock.
*/
LOCK_EVENT(pv_hash_hops) /* Average # of hops per hashing operation */
LOCK_EVENT(pv_kick_unlock) /* # of vCPU kicks issued at unlock time */
LOCK_EVENT(pv_kick_wake) /* # of vCPU kicks for pv_latency_wake */
LOCK_EVENT(pv_latency_kick) /* Average latency (ns) of vCPU kick */
LOCK_EVENT(pv_latency_wake) /* Average latency (ns) of kick-to-wakeup */
LOCK_EVENT(pv_lock_stealing) /* # of lock stealing operations */
LOCK_EVENT(pv_spurious_wakeup) /* # of spurious wakeups in non-head vCPUs */
LOCK_EVENT(pv_wait_again) /* # of wait's after queue head vCPU kick */
LOCK_EVENT(pv_wait_early) /* # of early vCPU wait's */
LOCK_EVENT(pv_wait_head) /* # of vCPU wait's at the queue head */
LOCK_EVENT(pv_wait_node) /* # of vCPU wait's at non-head queue node */
#endif /* CONFIG_PARAVIRT_SPINLOCKS */
/*
* Locking events for qspinlock
*
* Subtracting lock_use_node[234] from lock_slowpath will give you
* lock_use_node1.
*/
LOCK_EVENT(lock_pending) /* # of locking ops via pending code */
LOCK_EVENT(lock_slowpath) /* # of locking ops via MCS lock queue */
LOCK_EVENT(lock_use_node2) /* # of locking ops that use 2nd percpu node */
LOCK_EVENT(lock_use_node3) /* # of locking ops that use 3rd percpu node */
LOCK_EVENT(lock_use_node4) /* # of locking ops that use 4th percpu node */
LOCK_EVENT(lock_no_node) /* # of locking ops w/o using percpu node */
#endif /* CONFIG_QUEUED_SPINLOCKS */
/*
* Locking events for rwsem
*/
LOCK_EVENT(rwsem_sleep_reader) /* # of reader sleeps */
LOCK_EVENT(rwsem_sleep_writer) /* # of writer sleeps */
LOCK_EVENT(rwsem_wake_reader) /* # of reader wakeups */
LOCK_EVENT(rwsem_wake_writer) /* # of writer wakeups */
LOCK_EVENT(rwsem_opt_wlock) /* # of write locks opt-spin acquired */
LOCK_EVENT(rwsem_opt_fail) /* # of failed opt-spinnings */
LOCK_EVENT(rwsem_rlock) /* # of read locks acquired */
LOCK_EVENT(rwsem_rlock_fast) /* # of fast read locks acquired */
LOCK_EVENT(rwsem_rlock_fail) /* # of failed read lock acquisitions */
LOCK_EVENT(rwsem_rtrylock) /* # of read trylock calls */
LOCK_EVENT(rwsem_wlock) /* # of write locks acquired */
LOCK_EVENT(rwsem_wlock_fail) /* # of failed write lock acquisitions */
LOCK_EVENT(rwsem_wtrylock) /* # of write trylock calls */
......@@ -501,11 +501,11 @@ static char get_usage_char(struct lock_class *class, enum lock_usage_bit bit)
{
char c = '.';
if (class->usage_mask & lock_flag(bit + 2))
if (class->usage_mask & lock_flag(bit + LOCK_USAGE_DIR_MASK))
c = '+';
if (class->usage_mask & lock_flag(bit)) {
c = '-';
if (class->usage_mask & lock_flag(bit + 2))
if (class->usage_mask & lock_flag(bit + LOCK_USAGE_DIR_MASK))
c = '?';
}
......@@ -1666,19 +1666,25 @@ check_redundant(struct lock_list *root, struct lock_class *target,
}
#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING)
static inline int usage_accumulate(struct lock_list *entry, void *mask)
{
*(unsigned long *)mask |= entry->class->usage_mask;
return 0;
}
/*
* Forwards and backwards subgraph searching, for the purposes of
* proving that two subgraphs can be connected by a new dependency
* without creating any illegal irq-safe -> irq-unsafe lock dependency.
*/
static inline int usage_match(struct lock_list *entry, void *bit)
static inline int usage_match(struct lock_list *entry, void *mask)
{
return entry->class->usage_mask & (1 << (enum lock_usage_bit)bit);
return entry->class->usage_mask & *(unsigned long *)mask;
}
/*
* Find a node in the forwards-direction dependency sub-graph starting
* at @root->class that matches @bit.
......@@ -1690,14 +1696,14 @@ static inline int usage_match(struct lock_list *entry, void *bit)
* Return <0 on error.
*/
static int
find_usage_forwards(struct lock_list *root, enum lock_usage_bit bit,
find_usage_forwards(struct lock_list *root, unsigned long usage_mask,
struct lock_list **target_entry)
{
int result;
debug_atomic_inc(nr_find_usage_forwards_checks);
result = __bfs_forwards(root, (void *)bit, usage_match, target_entry);
result = __bfs_forwards(root, &usage_mask, usage_match, target_entry);
return result;
}
......@@ -1713,14 +1719,14 @@ find_usage_forwards(struct lock_list *root, enum lock_usage_bit bit,
* Return <0 on error.
*/
static int
find_usage_backwards(struct lock_list *root, enum lock_usage_bit bit,
find_usage_backwards(struct lock_list *root, unsigned long usage_mask,
struct lock_list **target_entry)
{
int result;
debug_atomic_inc(nr_find_usage_backwards_checks);
result = __bfs_backwards(root, (void *)bit, usage_match, target_entry);
result = __bfs_backwards(root, &usage_mask, usage_match, target_entry);
return result;
}
......@@ -1912,39 +1918,6 @@ print_bad_irq_dependency(struct task_struct *curr,
return 0;
}
static int
check_usage(struct task_struct *curr, struct held_lock *prev,
struct held_lock *next, enum lock_usage_bit bit_backwards,
enum lock_usage_bit bit_forwards, const char *irqclass)
{
int ret;
struct lock_list this, that;
struct lock_list *uninitialized_var(target_entry);
struct lock_list *uninitialized_var(target_entry1);
this.parent = NULL;
this.class = hlock_class(prev);
ret = find_usage_backwards(&this, bit_backwards, &target_entry);
if (ret < 0)
return print_bfs_bug(ret);
if (ret == 1)
return ret;
that.parent = NULL;
that.class = hlock_class(next);
ret = find_usage_forwards(&that, bit_forwards, &target_entry1);
if (ret < 0)
return print_bfs_bug(ret);
if (ret == 1)
return ret;
return print_bad_irq_dependency(curr, &this, &that,
target_entry, target_entry1,
prev, next,
bit_backwards, bit_forwards, irqclass);
}
static const char *state_names[] = {
#define LOCKDEP_STATE(__STATE) \
__stringify(__STATE),
......@@ -1961,9 +1934,19 @@ static const char *state_rnames[] = {
static inline const char *state_name(enum lock_usage_bit bit)
{
return (bit & LOCK_USAGE_READ_MASK) ? state_rnames[bit >> 2] : state_names[bit >> 2];
if (bit & LOCK_USAGE_READ_MASK)
return state_rnames[bit >> LOCK_USAGE_DIR_MASK];
else
return state_names[bit >> LOCK_USAGE_DIR_MASK];
}
/*
* The bit number is encoded like:
*
* bit0: 0 exclusive, 1 read lock
* bit1: 0 used in irq, 1 irq enabled
* bit2-n: state
*/
static int exclusive_bit(int new_bit)
{
int state = new_bit & LOCK_USAGE_STATE_MASK;
......@@ -1975,45 +1958,160 @@ static int exclusive_bit(int new_bit)
return state | (dir ^ LOCK_USAGE_DIR_MASK);
}
/*
* Observe that when given a bitmask where each bitnr is encoded as above, a
* right shift of the mask transforms the individual bitnrs as -1 and
* conversely, a left shift transforms into +1 for the individual bitnrs.
*
* So for all bits whose number have LOCK_ENABLED_* set (bitnr1 == 1), we can
* create the mask with those bit numbers using LOCK_USED_IN_* (bitnr1 == 0)
* instead by subtracting the bit number by 2, or shifting the mask right by 2.
*
* Similarly, bitnr1 == 0 becomes bitnr1 == 1 by adding 2, or shifting left 2.
*
* So split the mask (note that LOCKF_ENABLED_IRQ_ALL|LOCKF_USED_IN_IRQ_ALL is
* all bits set) and recompose with bitnr1 flipped.
*/
static unsigned long invert_dir_mask(unsigned long mask)
{
unsigned long excl = 0;
/* Invert dir */
excl |= (mask & LOCKF_ENABLED_IRQ_ALL) >> LOCK_USAGE_DIR_MASK;
excl |= (mask & LOCKF_USED_IN_IRQ_ALL) << LOCK_USAGE_DIR_MASK;
return excl;
}
/*
* As above, we clear bitnr0 (LOCK_*_READ off) with bitmask ops. First, for all
* bits with bitnr0 set (LOCK_*_READ), add those with bitnr0 cleared (LOCK_*).
* And then mask out all bitnr0.
*/
static unsigned long exclusive_mask(unsigned long mask)
{
unsigned long excl = invert_dir_mask(mask);
/* Strip read */
excl |= (excl & LOCKF_IRQ_READ) >> LOCK_USAGE_READ_MASK;
excl &= ~LOCKF_IRQ_READ;
return excl;
}
/*
* Retrieve the _possible_ original mask to which @mask is
* exclusive. Ie: this is the opposite of exclusive_mask().
* Note that 2 possible original bits can match an exclusive
* bit: one has LOCK_USAGE_READ_MASK set, the other has it
* cleared. So both are returned for each exclusive bit.
*/
static unsigned long original_mask(unsigned long mask)
{
unsigned long excl = invert_dir_mask(mask);
/* Include read in existing usages */
excl |= (excl & LOCKF_IRQ) << LOCK_USAGE_READ_MASK;
return excl;
}
/*
* Find the first pair of bit match between an original
* usage mask and an exclusive usage mask.
*/
static int find_exclusive_match(unsigned long mask,
unsigned long excl_mask,
enum lock_usage_bit *bitp,
enum lock_usage_bit *excl_bitp)
{
int bit, excl;
for_each_set_bit(bit, &mask, LOCK_USED) {
excl = exclusive_bit(bit);
if (excl_mask & lock_flag(excl)) {
*bitp = bit;
*excl_bitp = excl;
return 0;
}
}
return -1;
}
/*
* Prove that the new dependency does not connect a hardirq-safe(-read)
* lock with a hardirq-unsafe lock - to achieve this we search
* the backwards-subgraph starting at <prev>, and the
* forwards-subgraph starting at <next>:
*/
static int check_irq_usage(struct task_struct *curr, struct held_lock *prev,
struct held_lock *next, enum lock_usage_bit bit)
struct held_lock *next)
{
unsigned long usage_mask = 0, forward_mask, backward_mask;
enum lock_usage_bit forward_bit = 0, backward_bit = 0;
struct lock_list *uninitialized_var(target_entry1);
struct lock_list *uninitialized_var(target_entry);
struct lock_list this, that;
int ret;
/*
* Prove that the new dependency does not connect a hardirq-safe
* lock with a hardirq-unsafe lock - to achieve this we search
* the backwards-subgraph starting at <prev>, and the
* forwards-subgraph starting at <next>:
* Step 1: gather all hard/soft IRQs usages backward in an
* accumulated usage mask.
*/
if (!check_usage(curr, prev, next, bit,
exclusive_bit(bit), state_name(bit)))
return 0;
this.parent = NULL;
this.class = hlock_class(prev);
ret = __bfs_backwards(&this, &usage_mask, usage_accumulate, NULL);
if (ret < 0)
return print_bfs_bug(ret);
bit++; /* _READ */
usage_mask &= LOCKF_USED_IN_IRQ_ALL;
if (!usage_mask)
return 1;
/*
* Prove that the new dependency does not connect a hardirq-safe-read
* lock with a hardirq-unsafe lock - to achieve this we search
* the backwards-subgraph starting at <prev>, and the
* forwards-subgraph starting at <next>:
* Step 2: find exclusive uses forward that match the previous
* backward accumulated mask.
*/
if (!check_usage(curr, prev, next, bit,
exclusive_bit(bit), state_name(bit)))
return 0;
forward_mask = exclusive_mask(usage_mask);
return 1;
}
that.parent = NULL;
that.class = hlock_class(next);
static int
check_prev_add_irq(struct task_struct *curr, struct held_lock *prev,
struct held_lock *next)
{
#define LOCKDEP_STATE(__STATE) \
if (!check_irq_usage(curr, prev, next, LOCK_USED_IN_##__STATE)) \
return 0;
#include "lockdep_states.h"
#undef LOCKDEP_STATE
ret = find_usage_forwards(&that, forward_mask, &target_entry1);
if (ret < 0)
return print_bfs_bug(ret);
if (ret == 1)
return ret;
return 1;
/*
* Step 3: we found a bad match! Now retrieve a lock from the backward
* list whose usage mask matches the exclusive usage mask from the
* lock found on the forward list.
*/
backward_mask = original_mask(target_entry1->class->usage_mask);
ret = find_usage_backwards(&this, backward_mask, &target_entry);
if (ret < 0)
return print_bfs_bug(ret);
if (DEBUG_LOCKS_WARN_ON(ret == 1))
return 1;
/*
* Step 4: narrow down to a pair of incompatible usage bits
* and report it.
*/
ret = find_exclusive_match(target_entry->class->usage_mask,
target_entry1->class->usage_mask,
&backward_bit, &forward_bit);
if (DEBUG_LOCKS_WARN_ON(ret == -1))
return 1;
return print_bad_irq_dependency(curr, &this, &that,
target_entry, target_entry1,
prev, next,
backward_bit, forward_bit,
state_name(backward_bit));
}
static void inc_chains(void)
......@@ -2030,9 +2128,8 @@ static void inc_chains(void)
#else
static inline int
check_prev_add_irq(struct task_struct *curr, struct held_lock *prev,
struct held_lock *next)
static inline int check_irq_usage(struct task_struct *curr,
struct held_lock *prev, struct held_lock *next)
{
return 1;
}
......@@ -2211,7 +2308,7 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
else if (unlikely(ret < 0))
return print_bfs_bug(ret);
if (!check_prev_add_irq(curr, prev, next))
if (!check_irq_usage(curr, prev, next))
return 0;
/*
......@@ -2773,6 +2870,12 @@ static void check_chain_key(struct task_struct *curr)
#endif
}
static int mark_lock(struct task_struct *curr, struct held_lock *this,
enum lock_usage_bit new_bit);
#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING)
static void
print_usage_bug_scenario(struct held_lock *lock)
{
......@@ -2842,10 +2945,6 @@ valid_state(struct task_struct *curr, struct held_lock *this,
return 1;
}
static int mark_lock(struct task_struct *curr, struct held_lock *this,
enum lock_usage_bit new_bit);
#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING)
/*
* print irq inversion bug:
......@@ -2925,7 +3024,7 @@ check_usage_forwards(struct task_struct *curr, struct held_lock *this,
root.parent = NULL;
root.class = hlock_class(this);
ret = find_usage_forwards(&root, bit, &target_entry);
ret = find_usage_forwards(&root, lock_flag(bit), &target_entry);
if (ret < 0)
return print_bfs_bug(ret);
if (ret == 1)
......@@ -2949,7 +3048,7 @@ check_usage_backwards(struct task_struct *curr, struct held_lock *this,
root.parent = NULL;
root.class = hlock_class(this);
ret = find_usage_backwards(&root, bit, &target_entry);
ret = find_usage_backwards(&root, lock_flag(bit), &target_entry);
if (ret < 0)
return print_bfs_bug(ret);
if (ret == 1)
......@@ -3004,7 +3103,7 @@ static int (*state_verbose_f[])(struct lock_class *class) = {
static inline int state_verbose(enum lock_usage_bit bit,
struct lock_class *class)
{
return state_verbose_f[bit >> 2](class);
return state_verbose_f[bit >> LOCK_USAGE_DIR_MASK](class);
}
typedef int (*check_usage_f)(struct task_struct *, struct held_lock *,
......@@ -3146,7 +3245,7 @@ void lockdep_hardirqs_on(unsigned long ip)
/*
* See the fine text that goes along with this variable definition.
*/
if (DEBUG_LOCKS_WARN_ON(unlikely(early_boot_irqs_disabled)))
if (DEBUG_LOCKS_WARN_ON(early_boot_irqs_disabled))
return;
/*
......
......@@ -42,13 +42,35 @@ enum {
__LOCKF(USED)
};
#define LOCKF_ENABLED_IRQ (LOCKF_ENABLED_HARDIRQ | LOCKF_ENABLED_SOFTIRQ)
#define LOCKF_USED_IN_IRQ (LOCKF_USED_IN_HARDIRQ | LOCKF_USED_IN_SOFTIRQ)
#define LOCKDEP_STATE(__STATE) LOCKF_ENABLED_##__STATE |
static const unsigned long LOCKF_ENABLED_IRQ =
#include "lockdep_states.h"
0;
#undef LOCKDEP_STATE
#define LOCKDEP_STATE(__STATE) LOCKF_USED_IN_##__STATE |
static const unsigned long LOCKF_USED_IN_IRQ =
#include "lockdep_states.h"
0;
#undef LOCKDEP_STATE
#define LOCKDEP_STATE(__STATE) LOCKF_ENABLED_##__STATE##_READ |
static const unsigned long LOCKF_ENABLED_IRQ_READ =
#include "lockdep_states.h"
0;
#undef LOCKDEP_STATE
#define LOCKDEP_STATE(__STATE) LOCKF_USED_IN_##__STATE##_READ |
static const unsigned long LOCKF_USED_IN_IRQ_READ =
#include "lockdep_states.h"
0;
#undef LOCKDEP_STATE
#define LOCKF_ENABLED_IRQ_ALL (LOCKF_ENABLED_IRQ | LOCKF_ENABLED_IRQ_READ)
#define LOCKF_USED_IN_IRQ_ALL (LOCKF_USED_IN_IRQ | LOCKF_USED_IN_IRQ_READ)
#define LOCKF_ENABLED_IRQ_READ \
(LOCKF_ENABLED_HARDIRQ_READ | LOCKF_ENABLED_SOFTIRQ_READ)
#define LOCKF_USED_IN_IRQ_READ \
(LOCKF_USED_IN_HARDIRQ_READ | LOCKF_USED_IN_SOFTIRQ_READ)
#define LOCKF_IRQ (LOCKF_ENABLED_IRQ | LOCKF_USED_IN_IRQ)
#define LOCKF_IRQ_READ (LOCKF_ENABLED_IRQ_READ | LOCKF_USED_IN_IRQ_READ)
/*
* CONFIG_LOCKDEP_SMALL is defined for sparc. Sparc requires .text,
......
......@@ -7,6 +7,8 @@
#include <linux/sched.h>
#include <linux/errno.h>
#include "rwsem.h"
int __percpu_init_rwsem(struct percpu_rw_semaphore *sem,
const char *name, struct lock_class_key *rwsem_key)
{
......
......@@ -395,7 +395,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
* 0,1,0 -> 0,0,1
*/
clear_pending_set_locked(lock);
qstat_inc(qstat_lock_pending, true);
lockevent_inc(lock_pending);
return;
/*
......@@ -403,7 +403,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
* queuing.
*/
queue:
qstat_inc(qstat_lock_slowpath, true);
lockevent_inc(lock_slowpath);
pv_queue:
node = this_cpu_ptr(&qnodes[0].mcs);
idx = node->count++;
......@@ -419,7 +419,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
* simple enough.
*/
if (unlikely(idx >= MAX_NODES)) {
qstat_inc(qstat_lock_no_node, true);
lockevent_inc(lock_no_node);
while (!queued_spin_trylock(lock))
cpu_relax();
goto release;
......@@ -430,7 +430,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
/*
* Keep counts of non-zero index values:
*/
qstat_inc(qstat_lock_use_node2 + idx - 1, idx);
lockevent_cond_inc(lock_use_node2 + idx - 1, idx);
/*
* Ensure that we increment the head node->count before initialising
......
......@@ -89,7 +89,7 @@ static inline bool pv_hybrid_queued_unfair_trylock(struct qspinlock *lock)
if (!(val & _Q_LOCKED_PENDING_MASK) &&
(cmpxchg_acquire(&lock->locked, 0, _Q_LOCKED_VAL) == 0)) {
qstat_inc(qstat_pv_lock_stealing, true);
lockevent_inc(pv_lock_stealing);
return true;
}
if (!(val & _Q_TAIL_MASK) || (val & _Q_PENDING_MASK))
......@@ -219,7 +219,7 @@ static struct qspinlock **pv_hash(struct qspinlock *lock, struct pv_node *node)
hopcnt++;
if (!cmpxchg(&he->lock, NULL, lock)) {
WRITE_ONCE(he->node, node);
qstat_hop(hopcnt);
lockevent_pv_hop(hopcnt);
return &he->lock;
}
}
......@@ -320,8 +320,8 @@ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev)
smp_store_mb(pn->state, vcpu_halted);
if (!READ_ONCE(node->locked)) {
qstat_inc(qstat_pv_wait_node, true);
qstat_inc(qstat_pv_wait_early, wait_early);
lockevent_inc(pv_wait_node);
lockevent_cond_inc(pv_wait_early, wait_early);
pv_wait(&pn->state, vcpu_halted);
}
......@@ -339,7 +339,8 @@ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev)
* So it is better to spin for a while in the hope that the
* MCS lock will be released soon.
*/
qstat_inc(qstat_pv_spurious_wakeup, !READ_ONCE(node->locked));
lockevent_cond_inc(pv_spurious_wakeup,
!READ_ONCE(node->locked));
}
/*
......@@ -416,7 +417,7 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node)
/*
* Tracking # of slowpath locking operations
*/
qstat_inc(qstat_lock_slowpath, true);
lockevent_inc(lock_slowpath);
for (;; waitcnt++) {
/*
......@@ -464,8 +465,8 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node)
}
}
WRITE_ONCE(pn->state, vcpu_hashed);
qstat_inc(qstat_pv_wait_head, true);
qstat_inc(qstat_pv_wait_again, waitcnt);
lockevent_inc(pv_wait_head);
lockevent_cond_inc(pv_wait_again, waitcnt);
pv_wait(&lock->locked, _Q_SLOW_VAL);
/*
......@@ -528,7 +529,7 @@ __pv_queued_spin_unlock_slowpath(struct qspinlock *lock, u8 locked)
* vCPU is harmless other than the additional latency in completing
* the unlock.
*/
qstat_inc(qstat_pv_kick_unlock, true);
lockevent_inc(pv_kick_unlock);
pv_kick(node->cpu);
}
......
......@@ -9,262 +9,105 @@
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* Authors: Waiman Long <waiman.long@hpe.com>
* Authors: Waiman Long <longman@redhat.com>
*/
/*
* When queued spinlock statistical counters are enabled, the following
* debugfs files will be created for reporting the counter values:
*
* <debugfs>/qlockstat/
* pv_hash_hops - average # of hops per hashing operation
* pv_kick_unlock - # of vCPU kicks issued at unlock time
* pv_kick_wake - # of vCPU kicks used for computing pv_latency_wake
* pv_latency_kick - average latency (ns) of vCPU kick operation
* pv_latency_wake - average latency (ns) from vCPU kick to wakeup
* pv_lock_stealing - # of lock stealing operations
* pv_spurious_wakeup - # of spurious wakeups in non-head vCPUs
* pv_wait_again - # of wait's after a queue head vCPU kick
* pv_wait_early - # of early vCPU wait's
* pv_wait_head - # of vCPU wait's at the queue head
* pv_wait_node - # of vCPU wait's at a non-head queue node
* lock_pending - # of locking operations via pending code
* lock_slowpath - # of locking operations via MCS lock queue
* lock_use_node2 - # of locking operations that use 2nd per-CPU node
* lock_use_node3 - # of locking operations that use 3rd per-CPU node
* lock_use_node4 - # of locking operations that use 4th per-CPU node
* lock_no_node - # of locking operations without using per-CPU node
*
* Subtracting lock_use_node[234] from lock_slowpath will give you
* lock_use_node1.
*
* Writing to the "reset_counters" file will reset all the above counter
* values.
*
* These statistical counters are implemented as per-cpu variables which are
* summed and computed whenever the corresponding debugfs files are read. This
* minimizes added overhead making the counters usable even in a production
* environment.
*
* There may be slight difference between pv_kick_wake and pv_kick_unlock.
*/
enum qlock_stats {
qstat_pv_hash_hops,
qstat_pv_kick_unlock,
qstat_pv_kick_wake,
qstat_pv_latency_kick,
qstat_pv_latency_wake,
qstat_pv_lock_stealing,
qstat_pv_spurious_wakeup,
qstat_pv_wait_again,
qstat_pv_wait_early,
qstat_pv_wait_head,
qstat_pv_wait_node,
qstat_lock_pending,
qstat_lock_slowpath,
qstat_lock_use_node2,
qstat_lock_use_node3,
qstat_lock_use_node4,
qstat_lock_no_node,
qstat_num, /* Total number of statistical counters */
qstat_reset_cnts = qstat_num,
};
#include "lock_events.h"
#ifdef CONFIG_QUEUED_LOCK_STAT
#ifdef CONFIG_LOCK_EVENT_COUNTS
#ifdef CONFIG_PARAVIRT_SPINLOCKS
/*
* Collect pvqspinlock statistics
* Collect pvqspinlock locking event counts
*/
#include <linux/debugfs.h>
#include <linux/sched.h>
#include <linux/sched/clock.h>
#include <linux/fs.h>
static const char * const qstat_names[qstat_num + 1] = {
[qstat_pv_hash_hops] = "pv_hash_hops",
[qstat_pv_kick_unlock] = "pv_kick_unlock",
[qstat_pv_kick_wake] = "pv_kick_wake",
[qstat_pv_spurious_wakeup] = "pv_spurious_wakeup",
[qstat_pv_latency_kick] = "pv_latency_kick",
[qstat_pv_latency_wake] = "pv_latency_wake",
[qstat_pv_lock_stealing] = "pv_lock_stealing",
[qstat_pv_wait_again] = "pv_wait_again",
[qstat_pv_wait_early] = "pv_wait_early",
[qstat_pv_wait_head] = "pv_wait_head",
[qstat_pv_wait_node] = "pv_wait_node",
[qstat_lock_pending] = "lock_pending",
[qstat_lock_slowpath] = "lock_slowpath",
[qstat_lock_use_node2] = "lock_use_node2",
[qstat_lock_use_node3] = "lock_use_node3",
[qstat_lock_use_node4] = "lock_use_node4",
[qstat_lock_no_node] = "lock_no_node",
[qstat_reset_cnts] = "reset_counters",
};
#define EVENT_COUNT(ev) lockevents[LOCKEVENT_ ## ev]
/*
* Per-cpu counters
* PV specific per-cpu counter
*/
static DEFINE_PER_CPU(unsigned long, qstats[qstat_num]);
static DEFINE_PER_CPU(u64, pv_kick_time);
/*
* Function to read and return the qlock statistical counter values
* Function to read and return the PV qspinlock counts.
*
* The following counters are handled specially:
* 1. qstat_pv_latency_kick
* 1. pv_latency_kick
* Average kick latency (ns) = pv_latency_kick/pv_kick_unlock
* 2. qstat_pv_latency_wake
* 2. pv_latency_wake
* Average wake latency (ns) = pv_latency_wake/pv_kick_wake
* 3. qstat_pv_hash_hops
* 3. pv_hash_hops
* Average hops/hash = pv_hash_hops/pv_kick_unlock
*/
static ssize_t qstat_read(struct file *file, char __user *user_buf,
size_t count, loff_t *ppos)
ssize_t lockevent_read(struct file *file, char __user *user_buf,
size_t count, loff_t *ppos)
{
char buf[64];
int cpu, counter, len;
u64 stat = 0, kicks = 0;
int cpu, id, len;
u64 sum = 0, kicks = 0;
/*
* Get the counter ID stored in file->f_inode->i_private
*/
counter = (long)file_inode(file)->i_private;
id = (long)file_inode(file)->i_private;
if (counter >= qstat_num)
if (id >= lockevent_num)
return -EBADF;
for_each_possible_cpu(cpu) {
stat += per_cpu(qstats[counter], cpu);
sum += per_cpu(lockevents[id], cpu);
/*
* Need to sum additional counter for some of them
* Need to sum additional counters for some of them
*/
switch (counter) {
switch (id) {
case qstat_pv_latency_kick:
case qstat_pv_hash_hops:
kicks += per_cpu(qstats[qstat_pv_kick_unlock], cpu);
case LOCKEVENT_pv_latency_kick:
case LOCKEVENT_pv_hash_hops:
kicks += per_cpu(EVENT_COUNT(pv_kick_unlock), cpu);
break;
case qstat_pv_latency_wake:
kicks += per_cpu(qstats[qstat_pv_kick_wake], cpu);
case LOCKEVENT_pv_latency_wake:
kicks += per_cpu(EVENT_COUNT(pv_kick_wake), cpu);
break;
}
}
if (counter == qstat_pv_hash_hops) {
if (id == LOCKEVENT_pv_hash_hops) {
u64 frac = 0;
if (kicks) {
frac = 100ULL * do_div(stat, kicks);
frac = 100ULL * do_div(sum, kicks);
frac = DIV_ROUND_CLOSEST_ULL(frac, kicks);
}
/*
* Return a X.XX decimal number
*/
len = snprintf(buf, sizeof(buf) - 1, "%llu.%02llu\n", stat, frac);
len = snprintf(buf, sizeof(buf) - 1, "%llu.%02llu\n",
sum, frac);
} else {
/*
* Round to the nearest ns
*/
if ((counter == qstat_pv_latency_kick) ||
(counter == qstat_pv_latency_wake)) {
if ((id == LOCKEVENT_pv_latency_kick) ||
(id == LOCKEVENT_pv_latency_wake)) {
if (kicks)
stat = DIV_ROUND_CLOSEST_ULL(stat, kicks);
sum = DIV_ROUND_CLOSEST_ULL(sum, kicks);
}
len = snprintf(buf, sizeof(buf) - 1, "%llu\n", stat);
len = snprintf(buf, sizeof(buf) - 1, "%llu\n", sum);
}
return simple_read_from_buffer(user_buf, count, ppos, buf, len);
}
/*
* Function to handle write request
*
* When counter = reset_cnts, reset all the counter values.
* Since the counter updates aren't atomic, the resetting is done twice
* to make sure that the counters are very likely to be all cleared.
*/
static ssize_t qstat_write(struct file *file, const char __user *user_buf,
size_t count, loff_t *ppos)
{
int cpu;
/*
* Get the counter ID stored in file->f_inode->i_private
*/
if ((long)file_inode(file)->i_private != qstat_reset_cnts)
return count;
for_each_possible_cpu(cpu) {
int i;
unsigned long *ptr = per_cpu_ptr(qstats, cpu);
for (i = 0 ; i < qstat_num; i++)
WRITE_ONCE(ptr[i], 0);
}
return count;
}
/*
* Debugfs data structures
*/
static const struct file_operations fops_qstat = {
.read = qstat_read,
.write = qstat_write,
.llseek = default_llseek,
};
/*
* Initialize debugfs for the qspinlock statistical counters
*/
static int __init init_qspinlock_stat(void)
{
struct dentry *d_qstat = debugfs_create_dir("qlockstat", NULL);
int i;
if (!d_qstat)
goto out;
/*
* Create the debugfs files
*
* As reading from and writing to the stat files can be slow, only
* root is allowed to do the read/write to limit impact to system
* performance.
*/
for (i = 0; i < qstat_num; i++)
if (!debugfs_create_file(qstat_names[i], 0400, d_qstat,
(void *)(long)i, &fops_qstat))
goto fail_undo;
if (!debugfs_create_file(qstat_names[qstat_reset_cnts], 0200, d_qstat,
(void *)(long)qstat_reset_cnts, &fops_qstat))
goto fail_undo;
return 0;
fail_undo:
debugfs_remove_recursive(d_qstat);
out:
pr_warn("Could not create 'qlockstat' debugfs entries\n");
return -ENOMEM;
}
fs_initcall(init_qspinlock_stat);
/*
* Increment the PV qspinlock statistical counters
*/
static inline void qstat_inc(enum qlock_stats stat, bool cond)
{
if (cond)
this_cpu_inc(qstats[stat]);
}
/*
* PV hash hop count
*/
static inline void qstat_hop(int hopcnt)
static inline void lockevent_pv_hop(int hopcnt)
{
this_cpu_add(qstats[qstat_pv_hash_hops], hopcnt);
this_cpu_add(EVENT_COUNT(pv_hash_hops), hopcnt);
}
/*
......@@ -276,7 +119,7 @@ static inline void __pv_kick(int cpu)
per_cpu(pv_kick_time, cpu) = start;
pv_kick(cpu);
this_cpu_add(qstats[qstat_pv_latency_kick], sched_clock() - start);
this_cpu_add(EVENT_COUNT(pv_latency_kick), sched_clock() - start);
}
/*
......@@ -289,18 +132,19 @@ static inline void __pv_wait(u8 *ptr, u8 val)
*pkick_time = 0;
pv_wait(ptr, val);
if (*pkick_time) {
this_cpu_add(qstats[qstat_pv_latency_wake],
this_cpu_add(EVENT_COUNT(pv_latency_wake),
sched_clock() - *pkick_time);
qstat_inc(qstat_pv_kick_wake, true);
lockevent_inc(pv_kick_wake);
}
}
#define pv_kick(c) __pv_kick(c)
#define pv_wait(p, v) __pv_wait(p, v)
#else /* CONFIG_QUEUED_LOCK_STAT */
#endif /* CONFIG_PARAVIRT_SPINLOCKS */
#else /* CONFIG_LOCK_EVENT_COUNTS */
static inline void qstat_inc(enum qlock_stats stat, bool cond) { }
static inline void qstat_hop(int hopcnt) { }
static inline void lockevent_pv_hop(int hopcnt) { }
#endif /* CONFIG_QUEUED_LOCK_STAT */
#endif /* CONFIG_LOCK_EVENT_COUNTS */
// SPDX-License-Identifier: GPL-2.0
/* rwsem-spinlock.c: R/W semaphores: contention handling functions for
* generic spinlock implementation
*
* Copyright (c) 2001 David Howells (dhowells@redhat.com).
* - Derived partially from idea by Andrea Arcangeli <andrea@suse.de>
* - Derived also from comments by Linus
*/
#include <linux/rwsem.h>
#include <linux/sched/signal.h>
#include <linux/sched/debug.h>
#include <linux/export.h>
enum rwsem_waiter_type {
RWSEM_WAITING_FOR_WRITE,
RWSEM_WAITING_FOR_READ
};
struct rwsem_waiter {
struct list_head list;
struct task_struct *task;
enum rwsem_waiter_type type;
};
int rwsem_is_locked(struct rw_semaphore *sem)
{
int ret = 1;
unsigned long flags;
if (raw_spin_trylock_irqsave(&sem->wait_lock, flags)) {
ret = (sem->count != 0);
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
}
return ret;
}
EXPORT_SYMBOL(rwsem_is_locked);
/*
* initialise the semaphore
*/
void __init_rwsem(struct rw_semaphore *sem, const char *name,
struct lock_class_key *key)
{
#ifdef CONFIG_DEBUG_LOCK_ALLOC
/*
* Make sure we are not reinitializing a held semaphore:
*/
debug_check_no_locks_freed((void *)sem, sizeof(*sem));
lockdep_init_map(&sem->dep_map, name, key, 0);
#endif
sem->count = 0;
raw_spin_lock_init(&sem->wait_lock);
INIT_LIST_HEAD(&sem->wait_list);
}
EXPORT_SYMBOL(__init_rwsem);
/*
* handle the lock release when processes blocked on it that can now run
* - if we come here, then:
* - the 'active count' _reached_ zero
* - the 'waiting count' is non-zero
* - the spinlock must be held by the caller
* - woken process blocks are discarded from the list after having task zeroed
* - writers are only woken if wakewrite is non-zero
*/
static inline struct rw_semaphore *
__rwsem_do_wake(struct rw_semaphore *sem, int wakewrite)
{
struct rwsem_waiter *waiter;
struct task_struct *tsk;
int woken;
waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
if (waiter->type == RWSEM_WAITING_FOR_WRITE) {
if (wakewrite)
/* Wake up a writer. Note that we do not grant it the
* lock - it will have to acquire it when it runs. */
wake_up_process(waiter->task);
goto out;
}
/* grant an infinite number of read locks to the front of the queue */
woken = 0;
do {
struct list_head *next = waiter->list.next;
list_del(&waiter->list);
tsk = waiter->task;
/*
* Make sure we do not wakeup the next reader before
* setting the nil condition to grant the next reader;
* otherwise we could miss the wakeup on the other
* side and end up sleeping again. See the pairing
* in rwsem_down_read_failed().
*/
smp_mb();
waiter->task = NULL;
wake_up_process(tsk);
put_task_struct(tsk);
woken++;
if (next == &sem->wait_list)
break;
waiter = list_entry(next, struct rwsem_waiter, list);
} while (waiter->type != RWSEM_WAITING_FOR_WRITE);
sem->count += woken;
out:
return sem;
}
/*
* wake a single writer
*/
static inline struct rw_semaphore *
__rwsem_wake_one_writer(struct rw_semaphore *sem)
{
struct rwsem_waiter *waiter;
waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
wake_up_process(waiter->task);
return sem;
}
/*
* get a read lock on the semaphore
*/
int __sched __down_read_common(struct rw_semaphore *sem, int state)
{
struct rwsem_waiter waiter;
unsigned long flags;
raw_spin_lock_irqsave(&sem->wait_lock, flags);
if (sem->count >= 0 && list_empty(&sem->wait_list)) {
/* granted */
sem->count++;
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
goto out;
}
/* set up my own style of waitqueue */
waiter.task = current;
waiter.type = RWSEM_WAITING_FOR_READ;
get_task_struct(current);
list_add_tail(&waiter.list, &sem->wait_list);
/* wait to be given the lock */
for (;;) {
if (!waiter.task)
break;
if (signal_pending_state(state, current))
goto out_nolock;
set_current_state(state);
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
schedule();
raw_spin_lock_irqsave(&sem->wait_lock, flags);
}
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
out:
return 0;
out_nolock:
/*
* We didn't take the lock, so that there is a writer, which
* is owner or the first waiter of the sem. If it's a waiter,
* it will be woken by current owner. Not need to wake anybody.
*/
list_del(&waiter.list);
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
return -EINTR;
}
void __sched __down_read(struct rw_semaphore *sem)
{
__down_read_common(sem, TASK_UNINTERRUPTIBLE);
}
int __sched __down_read_killable(struct rw_semaphore *sem)
{
return __down_read_common(sem, TASK_KILLABLE);
}
/*
* trylock for reading -- returns 1 if successful, 0 if contention
*/
int __down_read_trylock(struct rw_semaphore *sem)
{
unsigned long flags;
int ret = 0;
raw_spin_lock_irqsave(&sem->wait_lock, flags);
if (sem->count >= 0 && list_empty(&sem->wait_list)) {
/* granted */
sem->count++;
ret = 1;
}
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
return ret;
}
/*
* get a write lock on the semaphore
*/
int __sched __down_write_common(struct rw_semaphore *sem, int state)
{
struct rwsem_waiter waiter;
unsigned long flags;
int ret = 0;
raw_spin_lock_irqsave(&sem->wait_lock, flags);
/* set up my own style of waitqueue */
waiter.task = current;
waiter.type = RWSEM_WAITING_FOR_WRITE;
list_add_tail(&waiter.list, &sem->wait_list);
/* wait for someone to release the lock */
for (;;) {
/*
* That is the key to support write lock stealing: allows the
* task already on CPU to get the lock soon rather than put
* itself into sleep and waiting for system woke it or someone
* else in the head of the wait list up.
*/
if (sem->count == 0)
break;
if (signal_pending_state(state, current))
goto out_nolock;
set_current_state(state);
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
schedule();
raw_spin_lock_irqsave(&sem->wait_lock, flags);
}
/* got the lock */
sem->count = -1;
list_del(&waiter.list);
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
return ret;
out_nolock:
list_del(&waiter.list);
if (!list_empty(&sem->wait_list) && sem->count >= 0)
__rwsem_do_wake(sem, 0);
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
return -EINTR;
}
void __sched __down_write(struct rw_semaphore *sem)
{
__down_write_common(sem, TASK_UNINTERRUPTIBLE);
}
int __sched __down_write_killable(struct rw_semaphore *sem)
{
return __down_write_common(sem, TASK_KILLABLE);
}
/*
* trylock for writing -- returns 1 if successful, 0 if contention
*/
int __down_write_trylock(struct rw_semaphore *sem)
{
unsigned long flags;
int ret = 0;
raw_spin_lock_irqsave(&sem->wait_lock, flags);
if (sem->count == 0) {
/* got the lock */
sem->count = -1;
ret = 1;
}
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
return ret;
}
/*
* release a read lock on the semaphore
*/
void __up_read(struct rw_semaphore *sem)
{
unsigned long flags;
raw_spin_lock_irqsave(&sem->wait_lock, flags);
if (--sem->count == 0 && !list_empty(&sem->wait_list))
sem = __rwsem_wake_one_writer(sem);
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
}
/*
* release a write lock on the semaphore
*/
void __up_write(struct rw_semaphore *sem)
{
unsigned long flags;
raw_spin_lock_irqsave(&sem->wait_lock, flags);
sem->count = 0;
if (!list_empty(&sem->wait_list))
sem = __rwsem_do_wake(sem, 1);
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
}
/*
* downgrade a write lock into a read lock
* - just wake up any readers at the front of the queue
*/
void __downgrade_write(struct rw_semaphore *sem)
{
unsigned long flags;
raw_spin_lock_irqsave(&sem->wait_lock, flags);
sem->count = 1;
if (!list_empty(&sem->wait_list))
sem = __rwsem_do_wake(sem, 0);
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
}
......@@ -147,6 +147,7 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem,
* will notice the queued writer.
*/
wake_q_add(wake_q, waiter->task);
lockevent_inc(rwsem_wake_writer);
}
return;
......@@ -176,9 +177,8 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem,
goto try_reader_grant;
}
/*
* It is not really necessary to set it to reader-owned here,
* but it gives the spinners an early indication that the
* readers now have the lock.
* Set it to reader-owned to give spinners an early
* indication that readers now have the lock.
*/
__rwsem_set_reader_owned(sem, waiter->task);
}
......@@ -215,6 +215,7 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem,
}
adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment;
lockevent_cond_inc(rwsem_wake_reader, woken);
if (list_empty(&sem->wait_list)) {
/* hit end of list above */
adjustment -= RWSEM_WAITING_BIAS;
......@@ -224,92 +225,6 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem,
atomic_long_add(adjustment, &sem->count);
}
/*
* Wait for the read lock to be granted
*/
static inline struct rw_semaphore __sched *
__rwsem_down_read_failed_common(struct rw_semaphore *sem, int state)
{
long count, adjustment = -RWSEM_ACTIVE_READ_BIAS;
struct rwsem_waiter waiter;
DEFINE_WAKE_Q(wake_q);
waiter.task = current;
waiter.type = RWSEM_WAITING_FOR_READ;
raw_spin_lock_irq(&sem->wait_lock);
if (list_empty(&sem->wait_list)) {
/*
* In case the wait queue is empty and the lock isn't owned
* by a writer, this reader can exit the slowpath and return
* immediately as its RWSEM_ACTIVE_READ_BIAS has already
* been set in the count.
*/
if (atomic_long_read(&sem->count) >= 0) {
raw_spin_unlock_irq(&sem->wait_lock);
return sem;
}
adjustment += RWSEM_WAITING_BIAS;
}
list_add_tail(&waiter.list, &sem->wait_list);
/* we're now waiting on the lock, but no longer actively locking */
count = atomic_long_add_return(adjustment, &sem->count);
/*
* If there are no active locks, wake the front queued process(es).
*
* If there are no writers and we are first in the queue,
* wake our own waiter to join the existing active readers !
*/
if (count == RWSEM_WAITING_BIAS ||
(count > RWSEM_WAITING_BIAS &&
adjustment != -RWSEM_ACTIVE_READ_BIAS))
__rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
raw_spin_unlock_irq(&sem->wait_lock);
wake_up_q(&wake_q);
/* wait to be given the lock */
while (true) {
set_current_state(state);
if (!waiter.task)
break;
if (signal_pending_state(state, current)) {
raw_spin_lock_irq(&sem->wait_lock);
if (waiter.task)
goto out_nolock;
raw_spin_unlock_irq(&sem->wait_lock);
break;
}
schedule();
}
__set_current_state(TASK_RUNNING);
return sem;
out_nolock:
list_del(&waiter.list);
if (list_empty(&sem->wait_list))
atomic_long_add(-RWSEM_WAITING_BIAS, &sem->count);
raw_spin_unlock_irq(&sem->wait_lock);
__set_current_state(TASK_RUNNING);
return ERR_PTR(-EINTR);
}
__visible struct rw_semaphore * __sched
rwsem_down_read_failed(struct rw_semaphore *sem)
{
return __rwsem_down_read_failed_common(sem, TASK_UNINTERRUPTIBLE);
}
EXPORT_SYMBOL(rwsem_down_read_failed);
__visible struct rw_semaphore * __sched
rwsem_down_read_failed_killable(struct rw_semaphore *sem)
{
return __rwsem_down_read_failed_common(sem, TASK_KILLABLE);
}
EXPORT_SYMBOL(rwsem_down_read_failed_killable);
/*
* This function must be called with the sem->wait_lock held to prevent
* race conditions between checking the rwsem wait list and setting the
......@@ -346,21 +261,17 @@ static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem)
*/
static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem)
{
long old, count = atomic_long_read(&sem->count);
while (true) {
if (!(count == 0 || count == RWSEM_WAITING_BIAS))
return false;
long count = atomic_long_read(&sem->count);
old = atomic_long_cmpxchg_acquire(&sem->count, count,
count + RWSEM_ACTIVE_WRITE_BIAS);
if (old == count) {
while (!count || count == RWSEM_WAITING_BIAS) {
if (atomic_long_try_cmpxchg_acquire(&sem->count, &count,
count + RWSEM_ACTIVE_WRITE_BIAS)) {
rwsem_set_owner(sem);
lockevent_inc(rwsem_opt_wlock);
return true;
}
count = old;
}
return false;
}
static inline bool owner_on_cpu(struct task_struct *owner)
......@@ -481,6 +392,7 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
osq_unlock(&sem->osq);
done:
preempt_enable();
lockevent_cond_inc(rwsem_opt_fail, !taken);
return taken;
}
......@@ -504,6 +416,97 @@ static inline bool rwsem_has_spinner(struct rw_semaphore *sem)
}
#endif
/*
* Wait for the read lock to be granted
*/
static inline struct rw_semaphore __sched *
__rwsem_down_read_failed_common(struct rw_semaphore *sem, int state)
{
long count, adjustment = -RWSEM_ACTIVE_READ_BIAS;
struct rwsem_waiter waiter;
DEFINE_WAKE_Q(wake_q);
waiter.task = current;
waiter.type = RWSEM_WAITING_FOR_READ;
raw_spin_lock_irq(&sem->wait_lock);
if (list_empty(&sem->wait_list)) {
/*
* In case the wait queue is empty and the lock isn't owned
* by a writer, this reader can exit the slowpath and return
* immediately as its RWSEM_ACTIVE_READ_BIAS has already
* been set in the count.
*/
if (atomic_long_read(&sem->count) >= 0) {
raw_spin_unlock_irq(&sem->wait_lock);
rwsem_set_reader_owned(sem);
lockevent_inc(rwsem_rlock_fast);
return sem;
}
adjustment += RWSEM_WAITING_BIAS;
}
list_add_tail(&waiter.list, &sem->wait_list);
/* we're now waiting on the lock, but no longer actively locking */
count = atomic_long_add_return(adjustment, &sem->count);
/*
* If there are no active locks, wake the front queued process(es).
*
* If there are no writers and we are first in the queue,
* wake our own waiter to join the existing active readers !
*/
if (count == RWSEM_WAITING_BIAS ||
(count > RWSEM_WAITING_BIAS &&
adjustment != -RWSEM_ACTIVE_READ_BIAS))
__rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
raw_spin_unlock_irq(&sem->wait_lock);
wake_up_q(&wake_q);
/* wait to be given the lock */
while (true) {
set_current_state(state);
if (!waiter.task)
break;
if (signal_pending_state(state, current)) {
raw_spin_lock_irq(&sem->wait_lock);
if (waiter.task)
goto out_nolock;
raw_spin_unlock_irq(&sem->wait_lock);
break;
}
schedule();
lockevent_inc(rwsem_sleep_reader);
}
__set_current_state(TASK_RUNNING);
lockevent_inc(rwsem_rlock);
return sem;
out_nolock:
list_del(&waiter.list);
if (list_empty(&sem->wait_list))
atomic_long_add(-RWSEM_WAITING_BIAS, &sem->count);
raw_spin_unlock_irq(&sem->wait_lock);
__set_current_state(TASK_RUNNING);
lockevent_inc(rwsem_rlock_fail);
return ERR_PTR(-EINTR);
}
__visible struct rw_semaphore * __sched
rwsem_down_read_failed(struct rw_semaphore *sem)
{
return __rwsem_down_read_failed_common(sem, TASK_UNINTERRUPTIBLE);
}
EXPORT_SYMBOL(rwsem_down_read_failed);
__visible struct rw_semaphore * __sched
rwsem_down_read_failed_killable(struct rw_semaphore *sem)
{
return __rwsem_down_read_failed_common(sem, TASK_KILLABLE);
}
EXPORT_SYMBOL(rwsem_down_read_failed_killable);
/*
* Wait until we successfully acquire the write lock
*/
......@@ -580,6 +583,7 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state)
goto out_nolock;
schedule();
lockevent_inc(rwsem_sleep_writer);
set_current_state(state);
} while ((count = atomic_long_read(&sem->count)) & RWSEM_ACTIVE_MASK);
......@@ -588,6 +592,7 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state)
__set_current_state(TASK_RUNNING);
list_del(&waiter.list);
raw_spin_unlock_irq(&sem->wait_lock);
lockevent_inc(rwsem_wlock);
return ret;
......@@ -601,6 +606,7 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state)
__rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
raw_spin_unlock_irq(&sem->wait_lock);
wake_up_q(&wake_q);
lockevent_inc(rwsem_wlock_fail);
return ERR_PTR(-EINTR);
}
......
......@@ -24,7 +24,6 @@ void __sched down_read(struct rw_semaphore *sem)
rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_);
LOCK_CONTENDED(sem, __down_read_trylock, __down_read);
rwsem_set_reader_owned(sem);
}
EXPORT_SYMBOL(down_read);
......@@ -39,7 +38,6 @@ int __sched down_read_killable(struct rw_semaphore *sem)
return -EINTR;
}
rwsem_set_reader_owned(sem);
return 0;
}
......@@ -52,10 +50,8 @@ int down_read_trylock(struct rw_semaphore *sem)
{
int ret = __down_read_trylock(sem);
if (ret == 1) {
if (ret == 1)
rwsem_acquire_read(&sem->dep_map, 0, 1, _RET_IP_);
rwsem_set_reader_owned(sem);
}
return ret;
}
......@@ -70,7 +66,6 @@ void __sched down_write(struct rw_semaphore *sem)
rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
rwsem_set_owner(sem);
}
EXPORT_SYMBOL(down_write);
......@@ -88,7 +83,6 @@ int __sched down_write_killable(struct rw_semaphore *sem)
return -EINTR;
}
rwsem_set_owner(sem);
return 0;
}
......@@ -101,10 +95,8 @@ int down_write_trylock(struct rw_semaphore *sem)
{
int ret = __down_write_trylock(sem);
if (ret == 1) {
if (ret == 1)
rwsem_acquire(&sem->dep_map, 0, 1, _RET_IP_);
rwsem_set_owner(sem);
}
return ret;
}
......@@ -117,9 +109,7 @@ EXPORT_SYMBOL(down_write_trylock);
void up_read(struct rw_semaphore *sem)
{
rwsem_release(&sem->dep_map, 1, _RET_IP_);
DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & RWSEM_READER_OWNED));
rwsem_clear_reader_owned(sem);
__up_read(sem);
}
......@@ -131,9 +121,7 @@ EXPORT_SYMBOL(up_read);
void up_write(struct rw_semaphore *sem)
{
rwsem_release(&sem->dep_map, 1, _RET_IP_);
DEBUG_RWSEMS_WARN_ON(sem->owner != current);
rwsem_clear_owner(sem);
__up_write(sem);
}
......@@ -145,9 +133,7 @@ EXPORT_SYMBOL(up_write);
void downgrade_write(struct rw_semaphore *sem)
{
lock_downgrade(&sem->dep_map, _RET_IP_);
DEBUG_RWSEMS_WARN_ON(sem->owner != current);
rwsem_set_reader_owned(sem);
__downgrade_write(sem);
}
......@@ -161,7 +147,6 @@ void down_read_nested(struct rw_semaphore *sem, int subclass)
rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_);
LOCK_CONTENDED(sem, __down_read_trylock, __down_read);
rwsem_set_reader_owned(sem);
}
EXPORT_SYMBOL(down_read_nested);
......@@ -172,7 +157,6 @@ void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest)
rwsem_acquire_nest(&sem->dep_map, 0, 0, nest, _RET_IP_);
LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
rwsem_set_owner(sem);
}
EXPORT_SYMBOL(_down_write_nest_lock);
......@@ -193,7 +177,6 @@ void down_write_nested(struct rw_semaphore *sem, int subclass)
rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_);
LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
rwsem_set_owner(sem);
}
EXPORT_SYMBOL(down_write_nested);
......@@ -208,7 +191,6 @@ int __sched down_write_killable_nested(struct rw_semaphore *sem, int subclass)
return -EINTR;
}
rwsem_set_owner(sem);
return 0;
}
......@@ -216,7 +198,8 @@ EXPORT_SYMBOL(down_write_killable_nested);
void up_read_non_owner(struct rw_semaphore *sem)
{
DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & RWSEM_READER_OWNED));
DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & RWSEM_READER_OWNED),
sem);
__up_read(sem);
}
......
......@@ -23,15 +23,44 @@
* is involved. Ideally we would like to track all the readers that own
* a rwsem, but the overhead is simply too big.
*/
#include "lock_events.h"
#define RWSEM_READER_OWNED (1UL << 0)
#define RWSEM_ANONYMOUSLY_OWNED (1UL << 1)
#ifdef CONFIG_DEBUG_RWSEMS
# define DEBUG_RWSEMS_WARN_ON(c) DEBUG_LOCKS_WARN_ON(c)
# define DEBUG_RWSEMS_WARN_ON(c, sem) do { \
if (!debug_locks_silent && \
WARN_ONCE(c, "DEBUG_RWSEMS_WARN_ON(%s): count = 0x%lx, owner = 0x%lx, curr 0x%lx, list %sempty\n",\
#c, atomic_long_read(&(sem)->count), \
(long)((sem)->owner), (long)current, \
list_empty(&(sem)->wait_list) ? "" : "not ")) \
debug_locks_off(); \
} while (0)
#else
# define DEBUG_RWSEMS_WARN_ON(c, sem)
#endif
/*
* R/W semaphores originally for PPC using the stuff in lib/rwsem.c.
* Adapted largely from include/asm-i386/rwsem.h
* by Paul Mackerras <paulus@samba.org>.
*/
/*
* the semaphore definition
*/
#ifdef CONFIG_64BIT
# define RWSEM_ACTIVE_MASK 0xffffffffL
#else
# define DEBUG_RWSEMS_WARN_ON(c)
# define RWSEM_ACTIVE_MASK 0x0000ffffL
#endif
#define RWSEM_ACTIVE_BIAS 0x00000001L
#define RWSEM_WAITING_BIAS (-RWSEM_ACTIVE_MASK-1)
#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
/*
* All writes to owner are protected by WRITE_ONCE() to make sure that
......@@ -132,3 +161,144 @@ static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem)
{
}
#endif
extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
extern struct rw_semaphore *rwsem_down_read_failed_killable(struct rw_semaphore *sem);
extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
extern struct rw_semaphore *rwsem_down_write_failed_killable(struct rw_semaphore *sem);
extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem);
extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
/*
* lock for reading
*/
static inline void __down_read(struct rw_semaphore *sem)
{
if (unlikely(atomic_long_inc_return_acquire(&sem->count) <= 0)) {
rwsem_down_read_failed(sem);
DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner &
RWSEM_READER_OWNED), sem);
} else {
rwsem_set_reader_owned(sem);
}
}
static inline int __down_read_killable(struct rw_semaphore *sem)
{
if (unlikely(atomic_long_inc_return_acquire(&sem->count) <= 0)) {
if (IS_ERR(rwsem_down_read_failed_killable(sem)))
return -EINTR;
DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner &
RWSEM_READER_OWNED), sem);
} else {
rwsem_set_reader_owned(sem);
}
return 0;
}
static inline int __down_read_trylock(struct rw_semaphore *sem)
{
/*
* Optimize for the case when the rwsem is not locked at all.
*/
long tmp = RWSEM_UNLOCKED_VALUE;
lockevent_inc(rwsem_rtrylock);
do {
if (atomic_long_try_cmpxchg_acquire(&sem->count, &tmp,
tmp + RWSEM_ACTIVE_READ_BIAS)) {
rwsem_set_reader_owned(sem);
return 1;
}
} while (tmp >= 0);
return 0;
}
/*
* lock for writing
*/
static inline void __down_write(struct rw_semaphore *sem)
{
long tmp;
tmp = atomic_long_add_return_acquire(RWSEM_ACTIVE_WRITE_BIAS,
&sem->count);
if (unlikely(tmp != RWSEM_ACTIVE_WRITE_BIAS))
rwsem_down_write_failed(sem);
rwsem_set_owner(sem);
}
static inline int __down_write_killable(struct rw_semaphore *sem)
{
long tmp;
tmp = atomic_long_add_return_acquire(RWSEM_ACTIVE_WRITE_BIAS,
&sem->count);
if (unlikely(tmp != RWSEM_ACTIVE_WRITE_BIAS))
if (IS_ERR(rwsem_down_write_failed_killable(sem)))
return -EINTR;
rwsem_set_owner(sem);
return 0;
}
static inline int __down_write_trylock(struct rw_semaphore *sem)
{
long tmp;
lockevent_inc(rwsem_wtrylock);
tmp = atomic_long_cmpxchg_acquire(&sem->count, RWSEM_UNLOCKED_VALUE,
RWSEM_ACTIVE_WRITE_BIAS);
if (tmp == RWSEM_UNLOCKED_VALUE) {
rwsem_set_owner(sem);
return true;
}
return false;
}
/*
* unlock after reading
*/
static inline void __up_read(struct rw_semaphore *sem)
{
long tmp;
DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & RWSEM_READER_OWNED),
sem);
rwsem_clear_reader_owned(sem);
tmp = atomic_long_dec_return_release(&sem->count);
if (unlikely(tmp < -1 && (tmp & RWSEM_ACTIVE_MASK) == 0))
rwsem_wake(sem);
}
/*
* unlock after writing
*/
static inline void __up_write(struct rw_semaphore *sem)
{
DEBUG_RWSEMS_WARN_ON(sem->owner != current, sem);
rwsem_clear_owner(sem);
if (unlikely(atomic_long_sub_return_release(RWSEM_ACTIVE_WRITE_BIAS,
&sem->count) < 0))
rwsem_wake(sem);
}
/*
* downgrade write lock to read lock
*/
static inline void __downgrade_write(struct rw_semaphore *sem)
{
long tmp;
/*
* When downgrading from exclusive to shared ownership,
* anything inside the write-locked region cannot leak
* into the read side. In contrast, anything in the
* read-locked region is ok to be re-ordered into the
* write side. As such, rely on RELEASE semantics.
*/
DEBUG_RWSEMS_WARN_ON(sem->owner != current, sem);
tmp = atomic_long_add_return_release(-RWSEM_WAITING_BIAS, &sem->count);
rwsem_set_reader_owned(sem);
if (tmp < 0)
rwsem_downgrade_wake(sem);
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册