提交 823e75f7 编写于 作者: L Linus Torvalds

Merge branch 'ipc-scalability'

Merge IPC cleanup and scalability patches from Andrew Morton.

This cleans up many of the oddities in the IPC code, uses the list
iterator helpers, splits out locking and adds per-semaphore locks for
greater scalability of the IPC semaphore code.

Most normal user-level locking by now uses futexes (ie pthreads, but
also a lot of specialized locks), but SysV IPC semaphores are apparently
still used in some big applications, either for portability reasons, or
because they offer tracking and undo (and you don't need to have a
special shared memory area for them).

Our IPC semaphore scalability was pitiful.  We used to lock much too big
ranges, and we used to have a single ipc lock per ipc semaphore array.
Most loads never cared, but some do.  There are some numbers in the
individual commits.

* ipc-scalability:
  ipc: sysv shared memory limited to 8TiB
  ipc/msg.c: use list_for_each_entry_[safe] for list traversing
  ipc,sem: fine grained locking for semtimedop
  ipc,sem: have only one list in struct sem_queue
  ipc,sem: open code and rename sem_lock
  ipc,sem: do not hold ipc lock more than necessary
  ipc: introduce lockless pre_down ipcctl
  ipc: introduce obtaining a lockless ipc object
  ipc: remove bogus lock comment for ipc_checkid
  ipc/msgutil.c: use linux/uaccess.h
  ipc: refactor msg list search into separate function
  ipc: simplify msg list search
  ipc: implement MSG_COPY as a new receive mode
  ipc: remove msg handling from queue scan
  ipc: set EFAULT as default error in load_msg()
  ipc: tighten msg copy loops
  ipc: separate msg allocation from userspace copy
  ipc: clamp with min()
...@@ -43,8 +43,8 @@ struct ipc_namespace { ...@@ -43,8 +43,8 @@ struct ipc_namespace {
size_t shm_ctlmax; size_t shm_ctlmax;
size_t shm_ctlall; size_t shm_ctlall;
unsigned long shm_tot;
int shm_ctlmni; int shm_ctlmni;
int shm_tot;
/* /*
* Defines whether IPC_RMID is forced for _all_ shm segments regardless * Defines whether IPC_RMID is forced for _all_ shm segments regardless
* of shmctl() * of shmctl()
......
...@@ -66,6 +66,7 @@ struct msg_sender { ...@@ -66,6 +66,7 @@ struct msg_sender {
#define SEARCH_EQUAL 2 #define SEARCH_EQUAL 2
#define SEARCH_NOTEQUAL 3 #define SEARCH_NOTEQUAL 3
#define SEARCH_LESSEQUAL 4 #define SEARCH_LESSEQUAL 4
#define SEARCH_NUMBER 5
#define msg_ids(ns) ((ns)->ids[IPC_MSG_IDS]) #define msg_ids(ns) ((ns)->ids[IPC_MSG_IDS])
...@@ -237,14 +238,9 @@ static inline void ss_del(struct msg_sender *mss) ...@@ -237,14 +238,9 @@ static inline void ss_del(struct msg_sender *mss)
static void ss_wakeup(struct list_head *h, int kill) static void ss_wakeup(struct list_head *h, int kill)
{ {
struct list_head *tmp; struct msg_sender *mss, *t;
tmp = h->next; list_for_each_entry_safe(mss, t, h, list) {
while (tmp != h) {
struct msg_sender *mss;
mss = list_entry(tmp, struct msg_sender, list);
tmp = tmp->next;
if (kill) if (kill)
mss->list.next = NULL; mss->list.next = NULL;
wake_up_process(mss->tsk); wake_up_process(mss->tsk);
...@@ -253,14 +249,9 @@ static void ss_wakeup(struct list_head *h, int kill) ...@@ -253,14 +249,9 @@ static void ss_wakeup(struct list_head *h, int kill)
static void expunge_all(struct msg_queue *msq, int res) static void expunge_all(struct msg_queue *msq, int res)
{ {
struct list_head *tmp; struct msg_receiver *msr, *t;
tmp = msq->q_receivers.next;
while (tmp != &msq->q_receivers) {
struct msg_receiver *msr;
msr = list_entry(tmp, struct msg_receiver, r_list); list_for_each_entry_safe(msr, t, &msq->q_receivers, r_list) {
tmp = tmp->next;
msr->r_msg = NULL; msr->r_msg = NULL;
wake_up_process(msr->r_tsk); wake_up_process(msr->r_tsk);
smp_mb(); smp_mb();
...@@ -278,7 +269,7 @@ static void expunge_all(struct msg_queue *msq, int res) ...@@ -278,7 +269,7 @@ static void expunge_all(struct msg_queue *msq, int res)
*/ */
static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
{ {
struct list_head *tmp; struct msg_msg *msg, *t;
struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm); struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm);
expunge_all(msq, -EIDRM); expunge_all(msq, -EIDRM);
...@@ -286,11 +277,7 @@ static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) ...@@ -286,11 +277,7 @@ static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
msg_rmid(ns, msq); msg_rmid(ns, msq);
msg_unlock(msq); msg_unlock(msq);
tmp = msq->q_messages.next; list_for_each_entry_safe(msg, t, &msq->q_messages, m_list) {
while (tmp != &msq->q_messages) {
struct msg_msg *msg = list_entry(tmp, struct msg_msg, m_list);
tmp = tmp->next;
atomic_dec(&ns->msg_hdrs); atomic_dec(&ns->msg_hdrs);
free_msg(msg); free_msg(msg);
} }
...@@ -583,6 +570,7 @@ static int testmsg(struct msg_msg *msg, long type, int mode) ...@@ -583,6 +570,7 @@ static int testmsg(struct msg_msg *msg, long type, int mode)
switch(mode) switch(mode)
{ {
case SEARCH_ANY: case SEARCH_ANY:
case SEARCH_NUMBER:
return 1; return 1;
case SEARCH_LESSEQUAL: case SEARCH_LESSEQUAL:
if (msg->m_type <=type) if (msg->m_type <=type)
...@@ -602,14 +590,9 @@ static int testmsg(struct msg_msg *msg, long type, int mode) ...@@ -602,14 +590,9 @@ static int testmsg(struct msg_msg *msg, long type, int mode)
static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg) static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg)
{ {
struct list_head *tmp; struct msg_receiver *msr, *t;
tmp = msq->q_receivers.next; list_for_each_entry_safe(msr, t, &msq->q_receivers, r_list) {
while (tmp != &msq->q_receivers) {
struct msg_receiver *msr;
msr = list_entry(tmp, struct msg_receiver, r_list);
tmp = tmp->next;
if (testmsg(msg, msr->r_msgtype, msr->r_mode) && if (testmsg(msg, msr->r_msgtype, msr->r_mode) &&
!security_msg_queue_msgrcv(msq, msg, msr->r_tsk, !security_msg_queue_msgrcv(msq, msg, msr->r_tsk,
msr->r_msgtype, msr->r_mode)) { msr->r_msgtype, msr->r_mode)) {
...@@ -685,7 +668,12 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext, ...@@ -685,7 +668,12 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext,
goto out_unlock_free; goto out_unlock_free;
} }
ss_add(msq, &s); ss_add(msq, &s);
ipc_rcu_getref(msq);
if (!ipc_rcu_getref(msq)) {
err = -EIDRM;
goto out_unlock_free;
}
msg_unlock(msq); msg_unlock(msq);
schedule(); schedule();
...@@ -738,6 +726,8 @@ SYSCALL_DEFINE4(msgsnd, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz, ...@@ -738,6 +726,8 @@ SYSCALL_DEFINE4(msgsnd, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz,
static inline int convert_mode(long *msgtyp, int msgflg) static inline int convert_mode(long *msgtyp, int msgflg)
{ {
if (msgflg & MSG_COPY)
return SEARCH_NUMBER;
/* /*
* find message of correct type. * find message of correct type.
* msgtyp = 0 => get first. * msgtyp = 0 => get first.
...@@ -774,14 +764,10 @@ static long do_msg_fill(void __user *dest, struct msg_msg *msg, size_t bufsz) ...@@ -774,14 +764,10 @@ static long do_msg_fill(void __user *dest, struct msg_msg *msg, size_t bufsz)
* This function creates new kernel message structure, large enough to store * This function creates new kernel message structure, large enough to store
* bufsz message bytes. * bufsz message bytes.
*/ */
static inline struct msg_msg *prepare_copy(void __user *buf, size_t bufsz, static inline struct msg_msg *prepare_copy(void __user *buf, size_t bufsz)
int msgflg, long *msgtyp,
unsigned long *copy_number)
{ {
struct msg_msg *copy; struct msg_msg *copy;
*copy_number = *msgtyp;
*msgtyp = 0;
/* /*
* Create dummy message to copy real message to. * Create dummy message to copy real message to.
*/ */
...@@ -797,9 +783,7 @@ static inline void free_copy(struct msg_msg *copy) ...@@ -797,9 +783,7 @@ static inline void free_copy(struct msg_msg *copy)
free_msg(copy); free_msg(copy);
} }
#else #else
static inline struct msg_msg *prepare_copy(void __user *buf, size_t bufsz, static inline struct msg_msg *prepare_copy(void __user *buf, size_t bufsz)
int msgflg, long *msgtyp,
unsigned long *copy_number)
{ {
return ERR_PTR(-ENOSYS); return ERR_PTR(-ENOSYS);
} }
...@@ -809,6 +793,30 @@ static inline void free_copy(struct msg_msg *copy) ...@@ -809,6 +793,30 @@ static inline void free_copy(struct msg_msg *copy)
} }
#endif #endif
static struct msg_msg *find_msg(struct msg_queue *msq, long *msgtyp, int mode)
{
struct msg_msg *msg;
long count = 0;
list_for_each_entry(msg, &msq->q_messages, m_list) {
if (testmsg(msg, *msgtyp, mode) &&
!security_msg_queue_msgrcv(msq, msg, current,
*msgtyp, mode)) {
if (mode == SEARCH_LESSEQUAL && msg->m_type != 1) {
*msgtyp = msg->m_type - 1;
} else if (mode == SEARCH_NUMBER) {
if (*msgtyp == count)
return msg;
} else
return msg;
count++;
}
}
return ERR_PTR(-EAGAIN);
}
long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp,
int msgflg, int msgflg,
long (*msg_handler)(void __user *, struct msg_msg *, size_t)) long (*msg_handler)(void __user *, struct msg_msg *, size_t))
...@@ -818,15 +826,13 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, ...@@ -818,15 +826,13 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp,
int mode; int mode;
struct ipc_namespace *ns; struct ipc_namespace *ns;
struct msg_msg *copy = NULL; struct msg_msg *copy = NULL;
unsigned long copy_number = 0;
ns = current->nsproxy->ipc_ns; ns = current->nsproxy->ipc_ns;
if (msqid < 0 || (long) bufsz < 0) if (msqid < 0 || (long) bufsz < 0)
return -EINVAL; return -EINVAL;
if (msgflg & MSG_COPY) { if (msgflg & MSG_COPY) {
copy = prepare_copy(buf, min_t(size_t, bufsz, ns->msg_ctlmax), copy = prepare_copy(buf, min_t(size_t, bufsz, ns->msg_ctlmax));
msgflg, &msgtyp, &copy_number);
if (IS_ERR(copy)) if (IS_ERR(copy))
return PTR_ERR(copy); return PTR_ERR(copy);
} }
...@@ -840,45 +846,13 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, ...@@ -840,45 +846,13 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp,
for (;;) { for (;;) {
struct msg_receiver msr_d; struct msg_receiver msr_d;
struct list_head *tmp;
long msg_counter = 0;
msg = ERR_PTR(-EACCES); msg = ERR_PTR(-EACCES);
if (ipcperms(ns, &msq->q_perm, S_IRUGO)) if (ipcperms(ns, &msq->q_perm, S_IRUGO))
goto out_unlock; goto out_unlock;
msg = ERR_PTR(-EAGAIN); msg = find_msg(msq, &msgtyp, mode);
tmp = msq->q_messages.next;
while (tmp != &msq->q_messages) {
struct msg_msg *walk_msg;
walk_msg = list_entry(tmp, struct msg_msg, m_list);
if (testmsg(walk_msg, msgtyp, mode) &&
!security_msg_queue_msgrcv(msq, walk_msg, current,
msgtyp, mode)) {
msg = walk_msg;
if (mode == SEARCH_LESSEQUAL &&
walk_msg->m_type != 1) {
msgtyp = walk_msg->m_type - 1;
} else if (msgflg & MSG_COPY) {
if (copy_number == msg_counter) {
/*
* Found requested message.
* Copy it.
*/
msg = copy_msg(msg, copy);
if (IS_ERR(msg))
goto out_unlock;
break;
}
msg = ERR_PTR(-EAGAIN);
} else
break;
msg_counter++;
}
tmp = tmp->next;
}
if (!IS_ERR(msg)) { if (!IS_ERR(msg)) {
/* /*
* Found a suitable message. * Found a suitable message.
...@@ -892,8 +866,10 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, ...@@ -892,8 +866,10 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp,
* If we are copying, then do not unlink message and do * If we are copying, then do not unlink message and do
* not update queue parameters. * not update queue parameters.
*/ */
if (msgflg & MSG_COPY) if (msgflg & MSG_COPY) {
msg = copy_msg(msg, copy);
goto out_unlock; goto out_unlock;
}
list_del(&msg->m_list); list_del(&msg->m_list);
msq->q_qnum--; msq->q_qnum--;
msq->q_rtime = get_seconds(); msq->q_rtime = get_seconds();
......
...@@ -17,7 +17,7 @@ ...@@ -17,7 +17,7 @@
#include <linux/ipc_namespace.h> #include <linux/ipc_namespace.h>
#include <linux/utsname.h> #include <linux/utsname.h>
#include <linux/proc_fs.h> #include <linux/proc_fs.h>
#include <asm/uaccess.h> #include <linux/uaccess.h>
#include "util.h" #include "util.h"
...@@ -37,59 +37,70 @@ struct ipc_namespace init_ipc_ns = { ...@@ -37,59 +37,70 @@ struct ipc_namespace init_ipc_ns = {
atomic_t nr_ipc_ns = ATOMIC_INIT(1); atomic_t nr_ipc_ns = ATOMIC_INIT(1);
struct msg_msgseg { struct msg_msgseg {
struct msg_msgseg* next; struct msg_msgseg *next;
/* the next part of the message follows immediately */ /* the next part of the message follows immediately */
}; };
#define DATALEN_MSG (PAGE_SIZE-sizeof(struct msg_msg)) #define DATALEN_MSG (int)(PAGE_SIZE-sizeof(struct msg_msg))
#define DATALEN_SEG (PAGE_SIZE-sizeof(struct msg_msgseg)) #define DATALEN_SEG (int)(PAGE_SIZE-sizeof(struct msg_msgseg))
struct msg_msg *load_msg(const void __user *src, int len)
static struct msg_msg *alloc_msg(int len)
{ {
struct msg_msg *msg; struct msg_msg *msg;
struct msg_msgseg **pseg; struct msg_msgseg **pseg;
int err;
int alen; int alen;
alen = len; alen = min(len, DATALEN_MSG);
if (alen > DATALEN_MSG)
alen = DATALEN_MSG;
msg = kmalloc(sizeof(*msg) + alen, GFP_KERNEL); msg = kmalloc(sizeof(*msg) + alen, GFP_KERNEL);
if (msg == NULL) if (msg == NULL)
return ERR_PTR(-ENOMEM); return NULL;
msg->next = NULL; msg->next = NULL;
msg->security = NULL; msg->security = NULL;
if (copy_from_user(msg + 1, src, alen)) {
err = -EFAULT;
goto out_err;
}
len -= alen; len -= alen;
src = ((char __user *)src) + alen;
pseg = &msg->next; pseg = &msg->next;
while (len > 0) { while (len > 0) {
struct msg_msgseg *seg; struct msg_msgseg *seg;
alen = len; alen = min(len, DATALEN_SEG);
if (alen > DATALEN_SEG) seg = kmalloc(sizeof(*seg) + alen, GFP_KERNEL);
alen = DATALEN_SEG; if (seg == NULL)
seg = kmalloc(sizeof(*seg) + alen,
GFP_KERNEL);
if (seg == NULL) {
err = -ENOMEM;
goto out_err; goto out_err;
}
*pseg = seg; *pseg = seg;
seg->next = NULL; seg->next = NULL;
if (copy_from_user(seg + 1, src, alen)) {
err = -EFAULT;
goto out_err;
}
pseg = &seg->next; pseg = &seg->next;
len -= alen; len -= alen;
src = ((char __user *)src) + alen; }
return msg;
out_err:
free_msg(msg);
return NULL;
}
struct msg_msg *load_msg(const void __user *src, int len)
{
struct msg_msg *msg;
struct msg_msgseg *seg;
int err = -EFAULT;
int alen;
msg = alloc_msg(len);
if (msg == NULL)
return ERR_PTR(-ENOMEM);
alen = min(len, DATALEN_MSG);
if (copy_from_user(msg + 1, src, alen))
goto out_err;
for (seg = msg->next; seg != NULL; seg = seg->next) {
len -= alen;
src = (char __user *)src + alen;
alen = min(len, DATALEN_SEG);
if (copy_from_user(seg + 1, src, alen))
goto out_err;
} }
err = security_msg_msg_alloc(msg); err = security_msg_msg_alloc(msg);
...@@ -113,23 +124,16 @@ struct msg_msg *copy_msg(struct msg_msg *src, struct msg_msg *dst) ...@@ -113,23 +124,16 @@ struct msg_msg *copy_msg(struct msg_msg *src, struct msg_msg *dst)
if (src->m_ts > dst->m_ts) if (src->m_ts > dst->m_ts)
return ERR_PTR(-EINVAL); return ERR_PTR(-EINVAL);
alen = len; alen = min(len, DATALEN_MSG);
if (alen > DATALEN_MSG)
alen = DATALEN_MSG;
memcpy(dst + 1, src + 1, alen); memcpy(dst + 1, src + 1, alen);
len -= alen; for (dst_pseg = dst->next, src_pseg = src->next;
dst_pseg = dst->next; src_pseg != NULL;
src_pseg = src->next; dst_pseg = dst_pseg->next, src_pseg = src_pseg->next) {
while (len > 0) {
alen = len;
if (alen > DATALEN_SEG)
alen = DATALEN_SEG;
memcpy(dst_pseg + 1, src_pseg + 1, alen);
dst_pseg = dst_pseg->next;
len -= alen; len -= alen;
src_pseg = src_pseg->next; alen = min(len, DATALEN_SEG);
memcpy(dst_pseg + 1, src_pseg + 1, alen);
} }
dst->m_type = src->m_type; dst->m_type = src->m_type;
...@@ -148,24 +152,16 @@ int store_msg(void __user *dest, struct msg_msg *msg, int len) ...@@ -148,24 +152,16 @@ int store_msg(void __user *dest, struct msg_msg *msg, int len)
int alen; int alen;
struct msg_msgseg *seg; struct msg_msgseg *seg;
alen = len; alen = min(len, DATALEN_MSG);
if (alen > DATALEN_MSG)
alen = DATALEN_MSG;
if (copy_to_user(dest, msg + 1, alen)) if (copy_to_user(dest, msg + 1, alen))
return -1; return -1;
len -= alen; for (seg = msg->next; seg != NULL; seg = seg->next) {
dest = ((char __user *)dest) + alen; len -= alen;
seg = msg->next; dest = (char __user *)dest + alen;
while (len > 0) { alen = min(len, DATALEN_SEG);
alen = len;
if (alen > DATALEN_SEG)
alen = DATALEN_SEG;
if (copy_to_user(dest, seg + 1, alen)) if (copy_to_user(dest, seg + 1, alen))
return -1; return -1;
len -= alen;
dest = ((char __user *)dest) + alen;
seg = seg->next;
} }
return 0; return 0;
} }
......
此差异已折叠。
...@@ -462,7 +462,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) ...@@ -462,7 +462,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
size_t size = params->u.size; size_t size = params->u.size;
int error; int error;
struct shmid_kernel *shp; struct shmid_kernel *shp;
int numpages = (size + PAGE_SIZE -1) >> PAGE_SHIFT; size_t numpages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
struct file * file; struct file * file;
char name[13]; char name[13];
int id; int id;
......
...@@ -439,9 +439,9 @@ void ipc_rmid(struct ipc_ids *ids, struct kern_ipc_perm *ipcp) ...@@ -439,9 +439,9 @@ void ipc_rmid(struct ipc_ids *ids, struct kern_ipc_perm *ipcp)
* NULL is returned if the allocation fails * NULL is returned if the allocation fails
*/ */
void* ipc_alloc(int size) void *ipc_alloc(int size)
{ {
void* out; void *out;
if(size > PAGE_SIZE) if(size > PAGE_SIZE)
out = vmalloc(size); out = vmalloc(size);
else else
...@@ -478,7 +478,7 @@ void ipc_free(void* ptr, int size) ...@@ -478,7 +478,7 @@ void ipc_free(void* ptr, int size)
*/ */
struct ipc_rcu_hdr struct ipc_rcu_hdr
{ {
int refcount; atomic_t refcount;
int is_vmalloc; int is_vmalloc;
void *data[0]; void *data[0];
}; };
...@@ -516,39 +516,41 @@ static inline int rcu_use_vmalloc(int size) ...@@ -516,39 +516,41 @@ static inline int rcu_use_vmalloc(int size)
* @size: size desired * @size: size desired
* *
* Allocate memory for the rcu header structure + the object. * Allocate memory for the rcu header structure + the object.
* Returns the pointer to the object. * Returns the pointer to the object or NULL upon failure.
* NULL is returned if the allocation fails.
*/ */
void *ipc_rcu_alloc(int size)
void* ipc_rcu_alloc(int size)
{ {
void* out; void *out;
/*
/*
* We prepend the allocation with the rcu struct, and * We prepend the allocation with the rcu struct, and
* workqueue if necessary (for vmalloc). * workqueue if necessary (for vmalloc).
*/ */
if (rcu_use_vmalloc(size)) { if (rcu_use_vmalloc(size)) {
out = vmalloc(HDRLEN_VMALLOC + size); out = vmalloc(HDRLEN_VMALLOC + size);
if (out) { if (!out)
out += HDRLEN_VMALLOC; goto done;
container_of(out, struct ipc_rcu_hdr, data)->is_vmalloc = 1;
container_of(out, struct ipc_rcu_hdr, data)->refcount = 1; out += HDRLEN_VMALLOC;
} container_of(out, struct ipc_rcu_hdr, data)->is_vmalloc = 1;
} else { } else {
out = kmalloc(HDRLEN_KMALLOC + size, GFP_KERNEL); out = kmalloc(HDRLEN_KMALLOC + size, GFP_KERNEL);
if (out) { if (!out)
out += HDRLEN_KMALLOC; goto done;
container_of(out, struct ipc_rcu_hdr, data)->is_vmalloc = 0;
container_of(out, struct ipc_rcu_hdr, data)->refcount = 1; out += HDRLEN_KMALLOC;
} container_of(out, struct ipc_rcu_hdr, data)->is_vmalloc = 0;
} }
/* set reference counter no matter what kind of allocation was done */
atomic_set(&container_of(out, struct ipc_rcu_hdr, data)->refcount, 1);
done:
return out; return out;
} }
void ipc_rcu_getref(void *ptr) int ipc_rcu_getref(void *ptr)
{ {
container_of(ptr, struct ipc_rcu_hdr, data)->refcount++; return atomic_inc_not_zero(&container_of(ptr, struct ipc_rcu_hdr, data)->refcount);
} }
static void ipc_do_vfree(struct work_struct *work) static void ipc_do_vfree(struct work_struct *work)
...@@ -578,7 +580,7 @@ static void ipc_schedule_free(struct rcu_head *head) ...@@ -578,7 +580,7 @@ static void ipc_schedule_free(struct rcu_head *head)
void ipc_rcu_putref(void *ptr) void ipc_rcu_putref(void *ptr)
{ {
if (--container_of(ptr, struct ipc_rcu_hdr, data)->refcount > 0) if (!atomic_dec_and_test(&container_of(ptr, struct ipc_rcu_hdr, data)->refcount))
return; return;
if (container_of(ptr, struct ipc_rcu_hdr, data)->is_vmalloc) { if (container_of(ptr, struct ipc_rcu_hdr, data)->is_vmalloc) {
...@@ -668,6 +670,28 @@ void ipc64_perm_to_ipc_perm (struct ipc64_perm *in, struct ipc_perm *out) ...@@ -668,6 +670,28 @@ void ipc64_perm_to_ipc_perm (struct ipc64_perm *in, struct ipc_perm *out)
out->seq = in->seq; out->seq = in->seq;
} }
/**
* ipc_obtain_object
* @ids: ipc identifier set
* @id: ipc id to look for
*
* Look for an id in the ipc ids idr and return associated ipc object.
*
* Call inside the RCU critical section.
* The ipc object is *not* locked on exit.
*/
struct kern_ipc_perm *ipc_obtain_object(struct ipc_ids *ids, int id)
{
struct kern_ipc_perm *out;
int lid = ipcid_to_idx(id);
out = idr_find(&ids->ipcs_idr, lid);
if (!out)
return ERR_PTR(-EINVAL);
return out;
}
/** /**
* ipc_lock - Lock an ipc structure without rw_mutex held * ipc_lock - Lock an ipc structure without rw_mutex held
* @ids: IPC identifier set * @ids: IPC identifier set
...@@ -675,32 +699,53 @@ void ipc64_perm_to_ipc_perm (struct ipc64_perm *in, struct ipc_perm *out) ...@@ -675,32 +699,53 @@ void ipc64_perm_to_ipc_perm (struct ipc64_perm *in, struct ipc_perm *out)
* *
* Look for an id in the ipc ids idr and lock the associated ipc object. * Look for an id in the ipc ids idr and lock the associated ipc object.
* *
* The ipc object is locked on exit. * The ipc object is locked on successful exit.
*/ */
struct kern_ipc_perm *ipc_lock(struct ipc_ids *ids, int id) struct kern_ipc_perm *ipc_lock(struct ipc_ids *ids, int id)
{ {
struct kern_ipc_perm *out; struct kern_ipc_perm *out;
int lid = ipcid_to_idx(id);
rcu_read_lock(); rcu_read_lock();
out = idr_find(&ids->ipcs_idr, lid); out = ipc_obtain_object(ids, id);
if (out == NULL) { if (IS_ERR(out))
rcu_read_unlock(); goto err1;
return ERR_PTR(-EINVAL);
}
spin_lock(&out->lock); spin_lock(&out->lock);
/* ipc_rmid() may have already freed the ID while ipc_lock /* ipc_rmid() may have already freed the ID while ipc_lock
* was spinning: here verify that the structure is still valid * was spinning: here verify that the structure is still valid
*/ */
if (out->deleted) { if (!out->deleted)
spin_unlock(&out->lock); return out;
rcu_read_unlock();
return ERR_PTR(-EINVAL);
}
spin_unlock(&out->lock);
out = ERR_PTR(-EINVAL);
err1:
rcu_read_unlock();
return out;
}
/**
* ipc_obtain_object_check
* @ids: ipc identifier set
* @id: ipc id to look for
*
* Similar to ipc_obtain_object() but also checks
* the ipc object reference counter.
*
* Call inside the RCU critical section.
* The ipc object is *not* locked on exit.
*/
struct kern_ipc_perm *ipc_obtain_object_check(struct ipc_ids *ids, int id)
{
struct kern_ipc_perm *out = ipc_obtain_object(ids, id);
if (IS_ERR(out))
goto out;
if (ipc_checkid(out, id))
return ERR_PTR(-EIDRM);
out:
return out; return out;
} }
...@@ -781,11 +826,28 @@ struct kern_ipc_perm *ipcctl_pre_down(struct ipc_namespace *ns, ...@@ -781,11 +826,28 @@ struct kern_ipc_perm *ipcctl_pre_down(struct ipc_namespace *ns,
struct ipc64_perm *perm, int extra_perm) struct ipc64_perm *perm, int extra_perm)
{ {
struct kern_ipc_perm *ipcp; struct kern_ipc_perm *ipcp;
ipcp = ipcctl_pre_down_nolock(ns, ids, id, cmd, perm, extra_perm);
if (IS_ERR(ipcp))
goto out;
spin_lock(&ipcp->lock);
out:
return ipcp;
}
struct kern_ipc_perm *ipcctl_pre_down_nolock(struct ipc_namespace *ns,
struct ipc_ids *ids, int id, int cmd,
struct ipc64_perm *perm, int extra_perm)
{
kuid_t euid; kuid_t euid;
int err; int err = -EPERM;
struct kern_ipc_perm *ipcp;
down_write(&ids->rw_mutex); down_write(&ids->rw_mutex);
ipcp = ipc_lock_check(ids, id); rcu_read_lock();
ipcp = ipc_obtain_object_check(ids, id);
if (IS_ERR(ipcp)) { if (IS_ERR(ipcp)) {
err = PTR_ERR(ipcp); err = PTR_ERR(ipcp);
goto out_up; goto out_up;
...@@ -794,17 +856,21 @@ struct kern_ipc_perm *ipcctl_pre_down(struct ipc_namespace *ns, ...@@ -794,17 +856,21 @@ struct kern_ipc_perm *ipcctl_pre_down(struct ipc_namespace *ns,
audit_ipc_obj(ipcp); audit_ipc_obj(ipcp);
if (cmd == IPC_SET) if (cmd == IPC_SET)
audit_ipc_set_perm(extra_perm, perm->uid, audit_ipc_set_perm(extra_perm, perm->uid,
perm->gid, perm->mode); perm->gid, perm->mode);
euid = current_euid(); euid = current_euid();
if (uid_eq(euid, ipcp->cuid) || uid_eq(euid, ipcp->uid) || if (uid_eq(euid, ipcp->cuid) || uid_eq(euid, ipcp->uid) ||
ns_capable(ns->user_ns, CAP_SYS_ADMIN)) ns_capable(ns->user_ns, CAP_SYS_ADMIN))
return ipcp; return ipcp;
err = -EPERM;
ipc_unlock(ipcp);
out_up: out_up:
/*
* Unsuccessful lookup, unlock and return
* the corresponding error.
*/
rcu_read_unlock();
up_write(&ids->rw_mutex); up_write(&ids->rw_mutex);
return ERR_PTR(err); return ERR_PTR(err);
} }
......
...@@ -119,14 +119,18 @@ void ipc_free(void* ptr, int size); ...@@ -119,14 +119,18 @@ void ipc_free(void* ptr, int size);
* to 0 schedules the rcu destruction. Caller must guarantee locking. * to 0 schedules the rcu destruction. Caller must guarantee locking.
*/ */
void* ipc_rcu_alloc(int size); void* ipc_rcu_alloc(int size);
void ipc_rcu_getref(void *ptr); int ipc_rcu_getref(void *ptr);
void ipc_rcu_putref(void *ptr); void ipc_rcu_putref(void *ptr);
struct kern_ipc_perm *ipc_lock(struct ipc_ids *, int); struct kern_ipc_perm *ipc_lock(struct ipc_ids *, int);
struct kern_ipc_perm *ipc_obtain_object(struct ipc_ids *ids, int id);
void kernel_to_ipc64_perm(struct kern_ipc_perm *in, struct ipc64_perm *out); void kernel_to_ipc64_perm(struct kern_ipc_perm *in, struct ipc64_perm *out);
void ipc64_perm_to_ipc_perm(struct ipc64_perm *in, struct ipc_perm *out); void ipc64_perm_to_ipc_perm(struct ipc64_perm *in, struct ipc_perm *out);
int ipc_update_perm(struct ipc64_perm *in, struct kern_ipc_perm *out); int ipc_update_perm(struct ipc64_perm *in, struct kern_ipc_perm *out);
struct kern_ipc_perm *ipcctl_pre_down_nolock(struct ipc_namespace *ns,
struct ipc_ids *ids, int id, int cmd,
struct ipc64_perm *perm, int extra_perm);
struct kern_ipc_perm *ipcctl_pre_down(struct ipc_namespace *ns, struct kern_ipc_perm *ipcctl_pre_down(struct ipc_namespace *ns,
struct ipc_ids *ids, int id, int cmd, struct ipc_ids *ids, int id, int cmd,
struct ipc64_perm *perm, int extra_perm); struct ipc64_perm *perm, int extra_perm);
...@@ -150,14 +154,9 @@ static inline int ipc_buildid(int id, int seq) ...@@ -150,14 +154,9 @@ static inline int ipc_buildid(int id, int seq)
return SEQ_MULTIPLIER * seq + id; return SEQ_MULTIPLIER * seq + id;
} }
/*
* Must be called with ipcp locked
*/
static inline int ipc_checkid(struct kern_ipc_perm *ipcp, int uid) static inline int ipc_checkid(struct kern_ipc_perm *ipcp, int uid)
{ {
if (uid / SEQ_MULTIPLIER != ipcp->seq) return uid / SEQ_MULTIPLIER != ipcp->seq;
return 1;
return 0;
} }
static inline void ipc_lock_by_ptr(struct kern_ipc_perm *perm) static inline void ipc_lock_by_ptr(struct kern_ipc_perm *perm)
...@@ -172,7 +171,13 @@ static inline void ipc_unlock(struct kern_ipc_perm *perm) ...@@ -172,7 +171,13 @@ static inline void ipc_unlock(struct kern_ipc_perm *perm)
rcu_read_unlock(); rcu_read_unlock();
} }
static inline void ipc_lock_object(struct kern_ipc_perm *perm)
{
spin_lock(&perm->lock);
}
struct kern_ipc_perm *ipc_lock_check(struct ipc_ids *ids, int id); struct kern_ipc_perm *ipc_lock_check(struct ipc_ids *ids, int id);
struct kern_ipc_perm *ipc_obtain_object_check(struct ipc_ids *ids, int id);
int ipcget(struct ipc_namespace *ns, struct ipc_ids *ids, int ipcget(struct ipc_namespace *ns, struct ipc_ids *ids,
struct ipc_ops *ops, struct ipc_params *params); struct ipc_ops *ops, struct ipc_params *params);
void free_ipcs(struct ipc_namespace *ns, struct ipc_ids *ids, void free_ipcs(struct ipc_namespace *ns, struct ipc_ids *ids,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册