提交 eb3b7b84 编写于 作者: M Martin Schwidefsky

s390/rwlock: introduce rwlock wait queueing

Like the common queued rwlock code the s390 implementation uses the
queued spinlock code on a spinlock_t embedded in the rwlock_t to achieve
the queueing. The encoding of the rwlock_t differs though, the counter
field in the rwlock_t is split into two parts. The upper two bytes hold
the write bit and the write wait counter, the lower two bytes hold the
read counter.

The arch_read_lock operation works exactly like the common qrwlock but
the enqueue operation for a writer follows a diffent logic. After the
failed inline try to get the rwlock in write, the writer first increases
the write wait counter, acquires the wait spin_lock for the queueing,
and then loops until there are no readers and the write bit is zero.
Without the write wait counter a CPU that just released the rwlock
could immediately reacquire the lock in the inline code, bypassing all
outstanding read and write waiters. For s390 this would cause massive
imbalances in favour of writers in case of a contended rwlock.
Signed-off-by: NMartin Schwidefsky <schwidefsky@de.ibm.com>
上级 b96f7d88
...@@ -39,19 +39,24 @@ __ATOMIC_OPS(__atomic64_xor, long, "laxg") ...@@ -39,19 +39,24 @@ __ATOMIC_OPS(__atomic64_xor, long, "laxg")
#undef __ATOMIC_OPS #undef __ATOMIC_OPS
#undef __ATOMIC_OP #undef __ATOMIC_OP
static inline void __atomic_add_const(int val, int *ptr) #define __ATOMIC_CONST_OP(op_name, op_type, op_string, op_barrier) \
{ static inline void op_name(op_type val, op_type *ptr) \
asm volatile( { \
" asi %[ptr],%[val]\n" asm volatile( \
: [ptr] "+Q" (*ptr) : [val] "i" (val) : "cc"); op_string " %[ptr],%[val]\n" \
op_barrier \
: [ptr] "+Q" (*ptr) : [val] "i" (val) : "cc", "memory");\
} }
static inline void __atomic64_add_const(long val, long *ptr) #define __ATOMIC_CONST_OPS(op_name, op_type, op_string) \
{ __ATOMIC_CONST_OP(op_name, op_type, op_string, "\n") \
asm volatile( __ATOMIC_CONST_OP(op_name##_barrier, op_type, op_string, "bcr 14,0\n")
" agsi %[ptr],%[val]\n"
: [ptr] "+Q" (*ptr) : [val] "i" (val) : "cc"); __ATOMIC_CONST_OPS(__atomic_add_const, int, "asi")
} __ATOMIC_CONST_OPS(__atomic64_add_const, long, "agsi")
#undef __ATOMIC_CONST_OPS
#undef __ATOMIC_CONST_OP
#else /* CONFIG_HAVE_MARCH_Z196_FEATURES */ #else /* CONFIG_HAVE_MARCH_Z196_FEATURES */
...@@ -107,6 +112,11 @@ __ATOMIC64_OPS(__atomic64_xor, "xgr") ...@@ -107,6 +112,11 @@ __ATOMIC64_OPS(__atomic64_xor, "xgr")
#undef __ATOMIC64_OPS #undef __ATOMIC64_OPS
#define __atomic_add_const(val, ptr) __atomic_add(val, ptr)
#define __atomic_add_const_barrier(val, ptr) __atomic_add(val, ptr)
#define __atomic64_add_const(val, ptr) __atomic64_add(val, ptr)
#define __atomic64_add_const_barrier(val, ptr) __atomic64_add(val, ptr)
#endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */ #endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */
static inline int __atomic_cmpxchg(int *ptr, int old, int new) static inline int __atomic_cmpxchg(int *ptr, int old, int new)
......
...@@ -35,7 +35,6 @@ bool arch_vcpu_is_preempted(int cpu); ...@@ -35,7 +35,6 @@ bool arch_vcpu_is_preempted(int cpu);
* (the type definitions are in asm/spinlock_types.h) * (the type definitions are in asm/spinlock_types.h)
*/ */
void arch_lock_relax(int cpu);
void arch_spin_relax(arch_spinlock_t *lock); void arch_spin_relax(arch_spinlock_t *lock);
void arch_spin_lock_wait(arch_spinlock_t *); void arch_spin_lock_wait(arch_spinlock_t *);
...@@ -110,164 +109,63 @@ static inline void arch_spin_unlock(arch_spinlock_t *lp) ...@@ -110,164 +109,63 @@ static inline void arch_spin_unlock(arch_spinlock_t *lp)
* read_can_lock - would read_trylock() succeed? * read_can_lock - would read_trylock() succeed?
* @lock: the rwlock in question. * @lock: the rwlock in question.
*/ */
#define arch_read_can_lock(x) ((int)(x)->lock >= 0) #define arch_read_can_lock(x) (((x)->cnts & 0xffff0000) == 0)
/** /**
* write_can_lock - would write_trylock() succeed? * write_can_lock - would write_trylock() succeed?
* @lock: the rwlock in question. * @lock: the rwlock in question.
*/ */
#define arch_write_can_lock(x) ((x)->lock == 0) #define arch_write_can_lock(x) ((x)->cnts == 0)
extern int _raw_read_trylock_retry(arch_rwlock_t *lp);
extern int _raw_write_trylock_retry(arch_rwlock_t *lp);
#define arch_read_lock_flags(lock, flags) arch_read_lock(lock) #define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
#define arch_write_lock_flags(lock, flags) arch_write_lock(lock) #define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
#define arch_read_relax(rw) barrier()
#define arch_write_relax(rw) barrier()
static inline int arch_read_trylock_once(arch_rwlock_t *rw) void arch_read_lock_wait(arch_rwlock_t *lp);
{ void arch_write_lock_wait(arch_rwlock_t *lp);
int old = ACCESS_ONCE(rw->lock);
return likely(old >= 0 &&
__atomic_cmpxchg_bool(&rw->lock, old, old + 1));
}
static inline int arch_write_trylock_once(arch_rwlock_t *rw)
{
int old = ACCESS_ONCE(rw->lock);
return likely(old == 0 &&
__atomic_cmpxchg_bool(&rw->lock, 0, 0x80000000));
}
#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
#define __RAW_OP_OR "lao"
#define __RAW_OP_AND "lan"
#define __RAW_OP_ADD "laa"
#define __RAW_LOCK(ptr, op_val, op_string) \
({ \
int old_val; \
\
typecheck(int *, ptr); \
asm volatile( \
op_string " %0,%2,%1\n" \
"bcr 14,0\n" \
: "=d" (old_val), "+Q" (*ptr) \
: "d" (op_val) \
: "cc", "memory"); \
old_val; \
})
#define __RAW_UNLOCK(ptr, op_val, op_string) \
({ \
int old_val; \
\
typecheck(int *, ptr); \
asm volatile( \
op_string " %0,%2,%1\n" \
: "=d" (old_val), "+Q" (*ptr) \
: "d" (op_val) \
: "cc", "memory"); \
old_val; \
})
extern void _raw_read_lock_wait(arch_rwlock_t *lp);
extern void _raw_write_lock_wait(arch_rwlock_t *lp, int prev);
static inline void arch_read_lock(arch_rwlock_t *rw) static inline void arch_read_lock(arch_rwlock_t *rw)
{ {
int old; int old;
old = __RAW_LOCK(&rw->lock, 1, __RAW_OP_ADD); old = __atomic_add(1, &rw->cnts);
if (old < 0) if (old & 0xffff0000)
_raw_read_lock_wait(rw); arch_read_lock_wait(rw);
} }
static inline void arch_read_unlock(arch_rwlock_t *rw) static inline void arch_read_unlock(arch_rwlock_t *rw)
{ {
__RAW_UNLOCK(&rw->lock, -1, __RAW_OP_ADD); __atomic_add_const_barrier(-1, &rw->cnts);
} }
static inline void arch_write_lock(arch_rwlock_t *rw) static inline void arch_write_lock(arch_rwlock_t *rw)
{ {
int old; if (!__atomic_cmpxchg_bool(&rw->cnts, 0, 0x30000))
arch_write_lock_wait(rw);
old = __RAW_LOCK(&rw->lock, 0x80000000, __RAW_OP_OR);
if (old != 0)
_raw_write_lock_wait(rw, old);
rw->owner = SPINLOCK_LOCKVAL;
} }
static inline void arch_write_unlock(arch_rwlock_t *rw) static inline void arch_write_unlock(arch_rwlock_t *rw)
{ {
rw->owner = 0; __atomic_add_barrier(-0x30000, &rw->cnts);
__RAW_UNLOCK(&rw->lock, 0x7fffffff, __RAW_OP_AND);
} }
#else /* CONFIG_HAVE_MARCH_Z196_FEATURES */
extern void _raw_read_lock_wait(arch_rwlock_t *lp);
extern void _raw_write_lock_wait(arch_rwlock_t *lp);
static inline void arch_read_lock(arch_rwlock_t *rw) static inline int arch_read_trylock(arch_rwlock_t *rw)
{
if (!arch_read_trylock_once(rw))
_raw_read_lock_wait(rw);
}
static inline void arch_read_unlock(arch_rwlock_t *rw)
{ {
int old; int old;
do { old = READ_ONCE(rw->cnts);
old = ACCESS_ONCE(rw->lock); return (!(old & 0xffff0000) &&
} while (!__atomic_cmpxchg_bool(&rw->lock, old, old - 1)); __atomic_cmpxchg_bool(&rw->cnts, old, old + 1));
}
static inline void arch_write_lock(arch_rwlock_t *rw)
{
if (!arch_write_trylock_once(rw))
_raw_write_lock_wait(rw);
rw->owner = SPINLOCK_LOCKVAL;
}
static inline void arch_write_unlock(arch_rwlock_t *rw)
{
typecheck(int, rw->lock);
rw->owner = 0;
asm volatile(
"st %1,%0\n"
: "+Q" (rw->lock)
: "d" (0)
: "cc", "memory");
}
#endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */
static inline int arch_read_trylock(arch_rwlock_t *rw)
{
if (!arch_read_trylock_once(rw))
return _raw_read_trylock_retry(rw);
return 1;
} }
static inline int arch_write_trylock(arch_rwlock_t *rw) static inline int arch_write_trylock(arch_rwlock_t *rw)
{ {
if (!arch_write_trylock_once(rw) && !_raw_write_trylock_retry(rw)) int old;
return 0;
rw->owner = SPINLOCK_LOCKVAL;
return 1;
}
static inline void arch_read_relax(arch_rwlock_t *rw)
{
arch_lock_relax(rw->owner);
}
static inline void arch_write_relax(arch_rwlock_t *rw) old = READ_ONCE(rw->cnts);
{ return !old && __atomic_cmpxchg_bool(&rw->cnts, 0, 0x30000);
arch_lock_relax(rw->owner);
} }
#endif /* __ASM_SPINLOCK_H */ #endif /* __ASM_SPINLOCK_H */
...@@ -12,8 +12,8 @@ typedef struct { ...@@ -12,8 +12,8 @@ typedef struct {
#define __ARCH_SPIN_LOCK_UNLOCKED { .lock = 0, } #define __ARCH_SPIN_LOCK_UNLOCKED { .lock = 0, }
typedef struct { typedef struct {
int lock; int cnts;
int owner; arch_spinlock_t wait;
} arch_rwlock_t; } arch_rwlock_t;
#define __ARCH_RW_LOCK_UNLOCKED { 0 } #define __ARCH_RW_LOCK_UNLOCKED { 0 }
......
...@@ -268,129 +268,49 @@ int arch_spin_trylock_retry(arch_spinlock_t *lp) ...@@ -268,129 +268,49 @@ int arch_spin_trylock_retry(arch_spinlock_t *lp)
} }
EXPORT_SYMBOL(arch_spin_trylock_retry); EXPORT_SYMBOL(arch_spin_trylock_retry);
void _raw_read_lock_wait(arch_rwlock_t *rw) void arch_read_lock_wait(arch_rwlock_t *rw)
{ {
int count = spin_retry; if (unlikely(in_interrupt())) {
int owner, old; while (READ_ONCE(rw->cnts) & 0x10000)
barrier();
#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES return;
__RAW_LOCK(&rw->lock, -1, __RAW_OP_ADD);
#endif
owner = 0;
while (1) {
if (count-- <= 0) {
if (owner && arch_vcpu_is_preempted(owner - 1))
smp_yield_cpu(owner - 1);
count = spin_retry;
}
old = ACCESS_ONCE(rw->lock);
owner = ACCESS_ONCE(rw->owner);
if (old < 0)
continue;
if (__atomic_cmpxchg_bool(&rw->lock, old, old + 1))
return;
} }
}
EXPORT_SYMBOL(_raw_read_lock_wait);
int _raw_read_trylock_retry(arch_rwlock_t *rw)
{
int count = spin_retry;
int old;
while (count-- > 0) { /* Remove this reader again to allow recursive read locking */
old = ACCESS_ONCE(rw->lock); __atomic_add_const(-1, &rw->cnts);
if (old < 0) /* Put the reader into the wait queue */
continue; arch_spin_lock(&rw->wait);
if (__atomic_cmpxchg_bool(&rw->lock, old, old + 1)) /* Now add this reader to the count value again */
return 1; __atomic_add_const(1, &rw->cnts);
} /* Loop until the writer is done */
return 0; while (READ_ONCE(rw->cnts) & 0x10000)
barrier();
arch_spin_unlock(&rw->wait);
} }
EXPORT_SYMBOL(_raw_read_trylock_retry); EXPORT_SYMBOL(arch_read_lock_wait);
#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES void arch_write_lock_wait(arch_rwlock_t *rw)
void _raw_write_lock_wait(arch_rwlock_t *rw, int prev)
{ {
int count = spin_retry; int old;
int owner, old;
owner = 0;
while (1) {
if (count-- <= 0) {
if (owner && arch_vcpu_is_preempted(owner - 1))
smp_yield_cpu(owner - 1);
count = spin_retry;
}
old = ACCESS_ONCE(rw->lock);
owner = ACCESS_ONCE(rw->owner);
smp_mb();
if (old >= 0) {
prev = __RAW_LOCK(&rw->lock, 0x80000000, __RAW_OP_OR);
old = prev;
}
if ((old & 0x7fffffff) == 0 && prev >= 0)
break;
}
}
EXPORT_SYMBOL(_raw_write_lock_wait);
#else /* CONFIG_HAVE_MARCH_Z196_FEATURES */ /* Add this CPU to the write waiters */
__atomic_add(0x20000, &rw->cnts);
void _raw_write_lock_wait(arch_rwlock_t *rw) /* Put the writer into the wait queue */
{ arch_spin_lock(&rw->wait);
int count = spin_retry;
int owner, old, prev;
prev = 0x80000000;
owner = 0;
while (1) { while (1) {
if (count-- <= 0) { old = READ_ONCE(rw->cnts);
if (owner && arch_vcpu_is_preempted(owner - 1)) if ((old & 0x1ffff) == 0 &&
smp_yield_cpu(owner - 1); __atomic_cmpxchg_bool(&rw->cnts, old, old | 0x10000))
count = spin_retry; /* Got the lock */
}
old = ACCESS_ONCE(rw->lock);
owner = ACCESS_ONCE(rw->owner);
if (old >= 0 &&
__atomic_cmpxchg_bool(&rw->lock, old, old | 0x80000000))
prev = old;
else
smp_mb();
if ((old & 0x7fffffff) == 0 && prev >= 0)
break; break;
barrier();
} }
}
EXPORT_SYMBOL(_raw_write_lock_wait);
#endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */
int _raw_write_trylock_retry(arch_rwlock_t *rw)
{
int count = spin_retry;
int old;
while (count-- > 0) {
old = ACCESS_ONCE(rw->lock);
if (old)
continue;
if (__atomic_cmpxchg_bool(&rw->lock, 0, 0x80000000))
return 1;
}
return 0;
}
EXPORT_SYMBOL(_raw_write_trylock_retry);
void arch_lock_relax(int cpu) arch_spin_unlock(&rw->wait);
{
if (!cpu)
return;
if (MACHINE_IS_LPAR && !arch_vcpu_is_preempted(cpu - 1))
return;
smp_yield_cpu(cpu - 1);
} }
EXPORT_SYMBOL(arch_lock_relax); EXPORT_SYMBOL(arch_write_lock_wait);
void arch_spin_relax(arch_spinlock_t *lp) void arch_spin_relax(arch_spinlock_t *lp)
{ {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册