提交 a0c1e907 编写于 作者: T Thomas Gleixner 提交者: Linus Torvalds

futex: runtime enable pi and robust functionality

Not all architectures implement futex_atomic_cmpxchg_inatomic().  The default
implementation returns -ENOSYS, which is currently not handled inside of the
futex guts.

Futex PI calls and robust list exits with a held futex result in an endless
loop in the futex code on architectures which have no support.

Fixing up every place where futex_atomic_cmpxchg_inatomic() is called would
add a fair amount of extra if/else constructs to the already complex code.  It
is also not possible to disable the robust feature before user space tries to
register robust lists.

Compile time disabling is not a good idea either, as there are already
architectures with runtime detection of futex_atomic_cmpxchg_inatomic support.

Detect the functionality at runtime instead by calling
cmpxchg_futex_value_locked() with a NULL pointer from the futex initialization
code.  This is guaranteed to fail, but the call of
futex_atomic_cmpxchg_inatomic() happens with pagefaults disabled.

On architectures, which use the asm-generic implementation or have a runtime
CPU feature detection, a -ENOSYS return value disables the PI/robust features.

On architectures with a working implementation the call returns -EFAULT and
the PI/robust features are enabled.

The relevant syscalls return -ENOSYS and the robust list exit code is blocked,
when the detection fails.

Fixes http://lkml.org/lkml/2008/2/11/149
Originally reported by: Lennart Buytenhek
Signed-off-by: NThomas Gleixner <tglx@linutronix.de>
Acked-by: NIngo Molnar <mingo@elte.hu>
Cc: Lennert Buytenhek <buytenh@wantstofly.org>
Cc: Riku Voipio <riku.voipio@movial.fi>
Cc: <stable@kernel.org>
Signed-off-by: NAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
上级 3e4ab747
...@@ -167,6 +167,7 @@ union futex_key { ...@@ -167,6 +167,7 @@ union futex_key {
#ifdef CONFIG_FUTEX #ifdef CONFIG_FUTEX
extern void exit_robust_list(struct task_struct *curr); extern void exit_robust_list(struct task_struct *curr);
extern void exit_pi_state_list(struct task_struct *curr); extern void exit_pi_state_list(struct task_struct *curr);
extern int futex_cmpxchg_enabled;
#else #else
static inline void exit_robust_list(struct task_struct *curr) static inline void exit_robust_list(struct task_struct *curr)
{ {
......
...@@ -60,6 +60,8 @@ ...@@ -60,6 +60,8 @@
#include "rtmutex_common.h" #include "rtmutex_common.h"
int __read_mostly futex_cmpxchg_enabled;
#define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8) #define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8)
/* /*
...@@ -469,6 +471,8 @@ void exit_pi_state_list(struct task_struct *curr) ...@@ -469,6 +471,8 @@ void exit_pi_state_list(struct task_struct *curr)
struct futex_hash_bucket *hb; struct futex_hash_bucket *hb;
union futex_key key; union futex_key key;
if (!futex_cmpxchg_enabled)
return;
/* /*
* We are a ZOMBIE and nobody can enqueue itself on * We are a ZOMBIE and nobody can enqueue itself on
* pi_state_list anymore, but we have to be careful * pi_state_list anymore, but we have to be careful
...@@ -1870,6 +1874,8 @@ asmlinkage long ...@@ -1870,6 +1874,8 @@ asmlinkage long
sys_set_robust_list(struct robust_list_head __user *head, sys_set_robust_list(struct robust_list_head __user *head,
size_t len) size_t len)
{ {
if (!futex_cmpxchg_enabled)
return -ENOSYS;
/* /*
* The kernel knows only one size for now: * The kernel knows only one size for now:
*/ */
...@@ -1894,6 +1900,9 @@ sys_get_robust_list(int pid, struct robust_list_head __user * __user *head_ptr, ...@@ -1894,6 +1900,9 @@ sys_get_robust_list(int pid, struct robust_list_head __user * __user *head_ptr,
struct robust_list_head __user *head; struct robust_list_head __user *head;
unsigned long ret; unsigned long ret;
if (!futex_cmpxchg_enabled)
return -ENOSYS;
if (!pid) if (!pid)
head = current->robust_list; head = current->robust_list;
else { else {
...@@ -1997,6 +2006,9 @@ void exit_robust_list(struct task_struct *curr) ...@@ -1997,6 +2006,9 @@ void exit_robust_list(struct task_struct *curr)
unsigned long futex_offset; unsigned long futex_offset;
int rc; int rc;
if (!futex_cmpxchg_enabled)
return;
/* /*
* Fetch the list head (which was registered earlier, via * Fetch the list head (which was registered earlier, via
* sys_set_robust_list()): * sys_set_robust_list()):
...@@ -2051,7 +2063,7 @@ void exit_robust_list(struct task_struct *curr) ...@@ -2051,7 +2063,7 @@ void exit_robust_list(struct task_struct *curr)
long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
u32 __user *uaddr2, u32 val2, u32 val3) u32 __user *uaddr2, u32 val2, u32 val3)
{ {
int ret; int ret = -ENOSYS;
int cmd = op & FUTEX_CMD_MASK; int cmd = op & FUTEX_CMD_MASK;
struct rw_semaphore *fshared = NULL; struct rw_semaphore *fshared = NULL;
...@@ -2083,13 +2095,16 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, ...@@ -2083,13 +2095,16 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
ret = futex_wake_op(uaddr, fshared, uaddr2, val, val2, val3); ret = futex_wake_op(uaddr, fshared, uaddr2, val, val2, val3);
break; break;
case FUTEX_LOCK_PI: case FUTEX_LOCK_PI:
ret = futex_lock_pi(uaddr, fshared, val, timeout, 0); if (futex_cmpxchg_enabled)
ret = futex_lock_pi(uaddr, fshared, val, timeout, 0);
break; break;
case FUTEX_UNLOCK_PI: case FUTEX_UNLOCK_PI:
ret = futex_unlock_pi(uaddr, fshared); if (futex_cmpxchg_enabled)
ret = futex_unlock_pi(uaddr, fshared);
break; break;
case FUTEX_TRYLOCK_PI: case FUTEX_TRYLOCK_PI:
ret = futex_lock_pi(uaddr, fshared, 0, timeout, 1); if (futex_cmpxchg_enabled)
ret = futex_lock_pi(uaddr, fshared, 0, timeout, 1);
break; break;
default: default:
ret = -ENOSYS; ret = -ENOSYS;
...@@ -2145,8 +2160,23 @@ static struct file_system_type futex_fs_type = { ...@@ -2145,8 +2160,23 @@ static struct file_system_type futex_fs_type = {
static int __init init(void) static int __init init(void)
{ {
u32 curval;
int i; int i;
/*
* This will fail and we want it. Some arch implementations do
* runtime detection of the futex_atomic_cmpxchg_inatomic()
* functionality. We want to know that before we call in any
* of the complex code paths. Also we want to prevent
* registration of robust lists in that case. NULL is
* guaranteed to fault and we get -EFAULT on functional
* implementation, the non functional ones will return
* -ENOSYS.
*/
curval = cmpxchg_futex_value_locked(NULL, 0, 0);
if (curval == -EFAULT)
futex_cmpxchg_enabled = 1;
for (i = 0; i < ARRAY_SIZE(futex_queues); i++) { for (i = 0; i < ARRAY_SIZE(futex_queues); i++) {
plist_head_init(&futex_queues[i].chain, &futex_queues[i].lock); plist_head_init(&futex_queues[i].chain, &futex_queues[i].lock);
spin_lock_init(&futex_queues[i].lock); spin_lock_init(&futex_queues[i].lock);
......
...@@ -54,6 +54,9 @@ void compat_exit_robust_list(struct task_struct *curr) ...@@ -54,6 +54,9 @@ void compat_exit_robust_list(struct task_struct *curr)
compat_long_t futex_offset; compat_long_t futex_offset;
int rc; int rc;
if (!futex_cmpxchg_enabled)
return;
/* /*
* Fetch the list head (which was registered earlier, via * Fetch the list head (which was registered earlier, via
* sys_set_robust_list()): * sys_set_robust_list()):
...@@ -115,6 +118,9 @@ asmlinkage long ...@@ -115,6 +118,9 @@ asmlinkage long
compat_sys_set_robust_list(struct compat_robust_list_head __user *head, compat_sys_set_robust_list(struct compat_robust_list_head __user *head,
compat_size_t len) compat_size_t len)
{ {
if (!futex_cmpxchg_enabled)
return -ENOSYS;
if (unlikely(len != sizeof(*head))) if (unlikely(len != sizeof(*head)))
return -EINVAL; return -EINVAL;
...@@ -130,6 +136,9 @@ compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr, ...@@ -130,6 +136,9 @@ compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr,
struct compat_robust_list_head __user *head; struct compat_robust_list_head __user *head;
unsigned long ret; unsigned long ret;
if (!futex_cmpxchg_enabled)
return -ENOSYS;
if (!pid) if (!pid)
head = current->compat_robust_list; head = current->compat_robust_list;
else { else {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册