提交 0d97a82b 编写于 作者: M Manfred Spraul 提交者: Linus Torvalds

ipc/msg.c: update and document memory barriers

Transfer findings from ipc/mqueue.c:

- A control barrier was missing for the lockless receive case So in
  theory, not yet initialized data may have been copied to user space -
  obviously only for architectures where control barriers are not NOP.

- use smp_store_release().  In theory, the refount may have been
  decreased to 0 already when wake_q_add() tries to get a reference.

Link: http://lkml.kernel.org/r/20191020123305.14715-5-manfred@colorfullife.comSigned-off-by: NManfred Spraul <manfred@colorfullife.com>
Cc: Waiman Long <longman@redhat.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: <1vier1@web.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: NAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
上级 c5b2cbdb
...@@ -61,6 +61,16 @@ struct msg_queue { ...@@ -61,6 +61,16 @@ struct msg_queue {
struct list_head q_senders; struct list_head q_senders;
} __randomize_layout; } __randomize_layout;
/*
* MSG_BARRIER Locking:
*
* Similar to the optimization used in ipc/mqueue.c, one syscall return path
* does not acquire any locks when it sees that a message exists in
* msg_receiver.r_msg. Therefore r_msg is set using smp_store_release()
* and accessed using READ_ONCE()+smp_acquire__after_ctrl_dep(). In addition,
* wake_q_add_safe() is used. See ipc/mqueue.c for more details
*/
/* one msg_receiver structure for each sleeping receiver */ /* one msg_receiver structure for each sleeping receiver */
struct msg_receiver { struct msg_receiver {
struct list_head r_list; struct list_head r_list;
...@@ -184,6 +194,10 @@ static inline void ss_add(struct msg_queue *msq, ...@@ -184,6 +194,10 @@ static inline void ss_add(struct msg_queue *msq,
{ {
mss->tsk = current; mss->tsk = current;
mss->msgsz = msgsz; mss->msgsz = msgsz;
/*
* No memory barrier required: we did ipc_lock_object(),
* and the waker obtains that lock before calling wake_q_add().
*/
__set_current_state(TASK_INTERRUPTIBLE); __set_current_state(TASK_INTERRUPTIBLE);
list_add_tail(&mss->list, &msq->q_senders); list_add_tail(&mss->list, &msq->q_senders);
} }
...@@ -237,8 +251,11 @@ static void expunge_all(struct msg_queue *msq, int res, ...@@ -237,8 +251,11 @@ static void expunge_all(struct msg_queue *msq, int res,
struct msg_receiver *msr, *t; struct msg_receiver *msr, *t;
list_for_each_entry_safe(msr, t, &msq->q_receivers, r_list) { list_for_each_entry_safe(msr, t, &msq->q_receivers, r_list) {
wake_q_add(wake_q, msr->r_tsk); get_task_struct(msr->r_tsk);
WRITE_ONCE(msr->r_msg, ERR_PTR(res));
/* see MSG_BARRIER for purpose/pairing */
smp_store_release(&msr->r_msg, ERR_PTR(res));
wake_q_add_safe(wake_q, msr->r_tsk);
} }
} }
...@@ -798,13 +815,17 @@ static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg, ...@@ -798,13 +815,17 @@ static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg,
list_del(&msr->r_list); list_del(&msr->r_list);
if (msr->r_maxsize < msg->m_ts) { if (msr->r_maxsize < msg->m_ts) {
wake_q_add(wake_q, msr->r_tsk); wake_q_add(wake_q, msr->r_tsk);
WRITE_ONCE(msr->r_msg, ERR_PTR(-E2BIG));
/* See expunge_all regarding memory barrier */
smp_store_release(&msr->r_msg, ERR_PTR(-E2BIG));
} else { } else {
ipc_update_pid(&msq->q_lrpid, task_pid(msr->r_tsk)); ipc_update_pid(&msq->q_lrpid, task_pid(msr->r_tsk));
msq->q_rtime = ktime_get_real_seconds(); msq->q_rtime = ktime_get_real_seconds();
wake_q_add(wake_q, msr->r_tsk); wake_q_add(wake_q, msr->r_tsk);
WRITE_ONCE(msr->r_msg, msg);
/* See expunge_all regarding memory barrier */
smp_store_release(&msr->r_msg, msg);
return 1; return 1;
} }
} }
...@@ -1154,7 +1175,11 @@ static long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, in ...@@ -1154,7 +1175,11 @@ static long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, in
msr_d.r_maxsize = INT_MAX; msr_d.r_maxsize = INT_MAX;
else else
msr_d.r_maxsize = bufsz; msr_d.r_maxsize = bufsz;
msr_d.r_msg = ERR_PTR(-EAGAIN);
/* memory barrier not require due to ipc_lock_object() */
WRITE_ONCE(msr_d.r_msg, ERR_PTR(-EAGAIN));
/* memory barrier not required, we own ipc_lock_object() */
__set_current_state(TASK_INTERRUPTIBLE); __set_current_state(TASK_INTERRUPTIBLE);
ipc_unlock_object(&msq->q_perm); ipc_unlock_object(&msq->q_perm);
...@@ -1183,8 +1208,12 @@ static long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, in ...@@ -1183,8 +1208,12 @@ static long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, in
* signal) it will either see the message and continue ... * signal) it will either see the message and continue ...
*/ */
msg = READ_ONCE(msr_d.r_msg); msg = READ_ONCE(msr_d.r_msg);
if (msg != ERR_PTR(-EAGAIN)) if (msg != ERR_PTR(-EAGAIN)) {
/* see MSG_BARRIER for purpose/pairing */
smp_acquire__after_ctrl_dep();
goto out_unlock1; goto out_unlock1;
}
/* /*
* ... or see -EAGAIN, acquire the lock to check the message * ... or see -EAGAIN, acquire the lock to check the message
...@@ -1192,7 +1221,7 @@ static long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, in ...@@ -1192,7 +1221,7 @@ static long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, in
*/ */
ipc_lock_object(&msq->q_perm); ipc_lock_object(&msq->q_perm);
msg = msr_d.r_msg; msg = READ_ONCE(msr_d.r_msg);
if (msg != ERR_PTR(-EAGAIN)) if (msg != ERR_PTR(-EAGAIN))
goto out_unlock0; goto out_unlock0;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册