msg.c 20.5 KB
Newer Older
L
Linus Torvalds 已提交
1 2
/*
 * linux/ipc/msg.c
3
 * Copyright (C) 1992 Krishna Balasubramanian
L
Linus Torvalds 已提交
4 5 6 7 8 9 10 11 12 13 14
 *
 * Removed all the remaining kerneld mess
 * Catch the -EFAULT stuff properly
 * Use GFP_KERNEL for messages as in 1.2
 * Fixed up the unchecked user space derefs
 * Copyright (C) 1998 Alan Cox & Andi Kleen
 *
 * /proc/sysvipc/msg support (c) 1999 Dragos Acostachioaie <dragos@iname.com>
 *
 * mostly rewritten, threaded and wake-one semantics added
 * MSGMAX limit removed, sysctl's added
15
 * (c) 1999 Manfred Spraul <manfred@colorfullife.com>
S
Steve Grubb 已提交
16 17 18
 *
 * support for audit of ipc object properties and permission changes
 * Dustin Kirkland <dustin.kirkland@us.ibm.com>
K
Kirill Korotaev 已提交
19 20 21 22
 *
 * namespaces support
 * OpenVZ, SWsoft Inc.
 * Pavel Emelianov <xemul@openvz.org>
L
Linus Torvalds 已提交
23 24
 */

25
#include <linux/capability.h>
L
Linus Torvalds 已提交
26 27 28 29 30 31 32 33 34 35
#include <linux/slab.h>
#include <linux/msg.h>
#include <linux/spinlock.h>
#include <linux/init.h>
#include <linux/proc_fs.h>
#include <linux/list.h>
#include <linux/security.h>
#include <linux/sched.h>
#include <linux/syscalls.h>
#include <linux/audit.h>
36
#include <linux/seq_file.h>
I
Ingo Molnar 已提交
37
#include <linux/mutex.h>
K
Kirill Korotaev 已提交
38
#include <linux/nsproxy.h>
I
Ingo Molnar 已提交
39

L
Linus Torvalds 已提交
40 41 42 43
#include <asm/current.h>
#include <asm/uaccess.h>
#include "util.h"

44 45 46
/*
 * one msg_receiver structure for each sleeping receiver:
 */
L
Linus Torvalds 已提交
47
struct msg_receiver {
48 49
	struct list_head	r_list;
	struct task_struct	*r_tsk;
L
Linus Torvalds 已提交
50

51 52 53
	int			r_mode;
	long			r_msgtype;
	long			r_maxsize;
L
Linus Torvalds 已提交
54

55
	struct msg_msg		*volatile r_msg;
L
Linus Torvalds 已提交
56 57 58 59
};

/* one msg_sender for each sleeping sender */
struct msg_sender {
60 61
	struct list_head	list;
	struct task_struct	*tsk;
L
Linus Torvalds 已提交
62 63 64 65 66 67 68
};

#define SEARCH_ANY		1
#define SEARCH_EQUAL		2
#define SEARCH_NOTEQUAL		3
#define SEARCH_LESSEQUAL	4

69 70
static atomic_t msg_bytes =	ATOMIC_INIT(0);
static atomic_t msg_hdrs =	ATOMIC_INIT(0);
L
Linus Torvalds 已提交
71

K
Kirill Korotaev 已提交
72
static struct ipc_ids init_msg_ids;
L
Linus Torvalds 已提交
73

K
Kirill Korotaev 已提交
74
#define msg_ids(ns)	(*((ns)->ids[IPC_MSG_IDS]))
L
Linus Torvalds 已提交
75

K
Kirill Korotaev 已提交
76 77 78 79 80 81 82 83 84 85
#define msg_lock(ns, id)	((struct msg_queue*)ipc_lock(&msg_ids(ns), id))
#define msg_unlock(msq)		ipc_unlock(&(msq)->q_perm)
#define msg_rmid(ns, id)	((struct msg_queue*)ipc_rmid(&msg_ids(ns), id))
#define msg_checkid(ns, msq, msgid)	\
	ipc_checkid(&msg_ids(ns), &msq->q_perm, msgid)
#define msg_buildid(ns, id, seq) \
	ipc_buildid(&msg_ids(ns), id, seq)

static void freeque (struct ipc_namespace *ns, struct msg_queue *msq, int id);
static int newque (struct ipc_namespace *ns, key_t key, int msgflg);
L
Linus Torvalds 已提交
86
#ifdef CONFIG_PROC_FS
87
static int sysvipc_msg_proc_show(struct seq_file *s, void *it);
L
Linus Torvalds 已提交
88 89
#endif

90
static void __msg_init_ns(struct ipc_namespace *ns, struct ipc_ids *ids)
K
Kirill Korotaev 已提交
91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
{
	ns->ids[IPC_MSG_IDS] = ids;
	ns->msg_ctlmax = MSGMAX;
	ns->msg_ctlmnb = MSGMNB;
	ns->msg_ctlmni = MSGMNI;
	ipc_init_ids(ids, ns->msg_ctlmni);
}

int msg_init_ns(struct ipc_namespace *ns)
{
	struct ipc_ids *ids;

	ids = kmalloc(sizeof(struct ipc_ids), GFP_KERNEL);
	if (ids == NULL)
		return -ENOMEM;

	__msg_init_ns(ns, ids);
	return 0;
}

void msg_exit_ns(struct ipc_namespace *ns)
{
	int i;
	struct msg_queue *msq;

	mutex_lock(&msg_ids(ns).mutex);
	for (i = 0; i <= msg_ids(ns).max_id; i++) {
		msq = msg_lock(ns, i);
		if (msq == NULL)
			continue;

		freeque(ns, msq, i);
	}
	mutex_unlock(&msg_ids(ns).mutex);

P
Pavel Emelianov 已提交
126
	ipc_fini_ids(ns->ids[IPC_MSG_IDS]);
K
Kirill Korotaev 已提交
127 128 129 130
	kfree(ns->ids[IPC_MSG_IDS]);
	ns->ids[IPC_MSG_IDS] = NULL;
}

131
void __init msg_init(void)
L
Linus Torvalds 已提交
132
{
K
Kirill Korotaev 已提交
133
	__msg_init_ns(&init_ipc_ns, &init_msg_ids);
134 135
	ipc_init_proc_interface("sysvipc/msg",
				"       key      msqid perms      cbytes       qnum lspid lrpid   uid   gid  cuid  cgid      stime      rtime      ctime\n",
K
Kirill Korotaev 已提交
136
				IPC_MSG_IDS, sysvipc_msg_proc_show);
L
Linus Torvalds 已提交
137 138
}

K
Kirill Korotaev 已提交
139
static int newque (struct ipc_namespace *ns, key_t key, int msgflg)
L
Linus Torvalds 已提交
140 141
{
	struct msg_queue *msq;
142
	int id, retval;
L
Linus Torvalds 已提交
143

144 145
	msq = ipc_rcu_alloc(sizeof(*msq));
	if (!msq)
L
Linus Torvalds 已提交
146 147
		return -ENOMEM;

148
	msq->q_perm.mode = msgflg & S_IRWXUGO;
L
Linus Torvalds 已提交
149 150 151 152 153 154 155 156 157
	msq->q_perm.key = key;

	msq->q_perm.security = NULL;
	retval = security_msg_queue_alloc(msq);
	if (retval) {
		ipc_rcu_putref(msq);
		return retval;
	}

K
Kirill Korotaev 已提交
158
	id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni);
159
	if (id == -1) {
L
Linus Torvalds 已提交
160 161 162 163 164
		security_msg_queue_free(msq);
		ipc_rcu_putref(msq);
		return -ENOSPC;
	}

K
Kirill Korotaev 已提交
165
	msq->q_id = msg_buildid(ns, id, msq->q_perm.seq);
L
Linus Torvalds 已提交
166 167 168
	msq->q_stime = msq->q_rtime = 0;
	msq->q_ctime = get_seconds();
	msq->q_cbytes = msq->q_qnum = 0;
K
Kirill Korotaev 已提交
169
	msq->q_qbytes = ns->msg_ctlmnb;
L
Linus Torvalds 已提交
170 171 172 173 174 175
	msq->q_lspid = msq->q_lrpid = 0;
	INIT_LIST_HEAD(&msq->q_messages);
	INIT_LIST_HEAD(&msq->q_receivers);
	INIT_LIST_HEAD(&msq->q_senders);
	msg_unlock(msq);

176
	return msq->q_id;
L
Linus Torvalds 已提交
177 178
}

179
static inline void ss_add(struct msg_queue *msq, struct msg_sender *mss)
L
Linus Torvalds 已提交
180
{
181 182 183
	mss->tsk = current;
	current->state = TASK_INTERRUPTIBLE;
	list_add_tail(&mss->list, &msq->q_senders);
L
Linus Torvalds 已提交
184 185
}

186
static inline void ss_del(struct msg_sender *mss)
L
Linus Torvalds 已提交
187
{
188
	if (mss->list.next != NULL)
L
Linus Torvalds 已提交
189 190 191
		list_del(&mss->list);
}

192
static void ss_wakeup(struct list_head *h, int kill)
L
Linus Torvalds 已提交
193 194 195 196 197
{
	struct list_head *tmp;

	tmp = h->next;
	while (tmp != h) {
198 199 200
		struct msg_sender *mss;

		mss = list_entry(tmp, struct msg_sender, list);
L
Linus Torvalds 已提交
201
		tmp = tmp->next;
202 203
		if (kill)
			mss->list.next = NULL;
L
Linus Torvalds 已提交
204 205 206 207
		wake_up_process(mss->tsk);
	}
}

208
static void expunge_all(struct msg_queue *msq, int res)
L
Linus Torvalds 已提交
209 210 211 212 213
{
	struct list_head *tmp;

	tmp = msq->q_receivers.next;
	while (tmp != &msq->q_receivers) {
214 215 216
		struct msg_receiver *msr;

		msr = list_entry(tmp, struct msg_receiver, r_list);
L
Linus Torvalds 已提交
217 218 219 220 221 222 223
		tmp = tmp->next;
		msr->r_msg = NULL;
		wake_up_process(msr->r_tsk);
		smp_mb();
		msr->r_msg = ERR_PTR(res);
	}
}
224 225 226 227

/*
 * freeque() wakes up waiters on the sender and receiver waiting queue,
 * removes the message queue from message queue ID
L
Linus Torvalds 已提交
228 229
 * array, and cleans up all the messages associated with this queue.
 *
I
Ingo Molnar 已提交
230 231
 * msg_ids.mutex and the spinlock for this message queue is hold
 * before freeque() is called. msg_ids.mutex remains locked on exit.
L
Linus Torvalds 已提交
232
 */
K
Kirill Korotaev 已提交
233
static void freeque(struct ipc_namespace *ns, struct msg_queue *msq, int id)
L
Linus Torvalds 已提交
234 235 236
{
	struct list_head *tmp;

237 238
	expunge_all(msq, -EIDRM);
	ss_wakeup(&msq->q_senders, 1);
K
Kirill Korotaev 已提交
239
	msq = msg_rmid(ns, id);
L
Linus Torvalds 已提交
240
	msg_unlock(msq);
241

L
Linus Torvalds 已提交
242
	tmp = msq->q_messages.next;
243 244 245
	while (tmp != &msq->q_messages) {
		struct msg_msg *msg = list_entry(tmp, struct msg_msg, m_list);

L
Linus Torvalds 已提交
246 247 248 249 250 251 252 253 254
		tmp = tmp->next;
		atomic_dec(&msg_hdrs);
		free_msg(msg);
	}
	atomic_sub(msq->q_cbytes, &msg_bytes);
	security_msg_queue_free(msq);
	ipc_rcu_putref(msq);
}

255
asmlinkage long sys_msgget(key_t key, int msgflg)
L
Linus Torvalds 已提交
256 257
{
	struct msg_queue *msq;
258
	int id, ret = -EPERM;
K
Kirill Korotaev 已提交
259 260 261
	struct ipc_namespace *ns;

	ns = current->nsproxy->ipc_ns;
L
Linus Torvalds 已提交
262
	
K
Kirill Korotaev 已提交
263
	mutex_lock(&msg_ids(ns).mutex);
L
Linus Torvalds 已提交
264
	if (key == IPC_PRIVATE) 
K
Kirill Korotaev 已提交
265 266
		ret = newque(ns, key, msgflg);
	else if ((id = ipc_findkey(&msg_ids(ns), key)) == -1) { /* key not used */
L
Linus Torvalds 已提交
267 268 269
		if (!(msgflg & IPC_CREAT))
			ret = -ENOENT;
		else
K
Kirill Korotaev 已提交
270
			ret = newque(ns, key, msgflg);
L
Linus Torvalds 已提交
271 272 273
	} else if (msgflg & IPC_CREAT && msgflg & IPC_EXCL) {
		ret = -EEXIST;
	} else {
K
Kirill Korotaev 已提交
274
		msq = msg_lock(ns, id);
275
		BUG_ON(msq == NULL);
L
Linus Torvalds 已提交
276 277 278
		if (ipcperms(&msq->q_perm, msgflg))
			ret = -EACCES;
		else {
K
Kirill Korotaev 已提交
279
			int qid = msg_buildid(ns, id, msq->q_perm.seq);
280 281

			ret = security_msg_queue_associate(msq, msgflg);
L
Linus Torvalds 已提交
282 283 284 285 286
			if (!ret)
				ret = qid;
		}
		msg_unlock(msq);
	}
K
Kirill Korotaev 已提交
287
	mutex_unlock(&msg_ids(ns).mutex);
288

L
Linus Torvalds 已提交
289 290 291
	return ret;
}

292 293
static inline unsigned long
copy_msqid_to_user(void __user *buf, struct msqid64_ds *in, int version)
L
Linus Torvalds 已提交
294 295 296
{
	switch(version) {
	case IPC_64:
297
		return copy_to_user(buf, in, sizeof(*in));
L
Linus Torvalds 已提交
298
	case IPC_OLD:
299
	{
L
Linus Torvalds 已提交
300 301
		struct msqid_ds out;

302
		memset(&out, 0, sizeof(out));
L
Linus Torvalds 已提交
303 304 305 306 307 308 309

		ipc64_perm_to_ipc_perm(&in->msg_perm, &out.msg_perm);

		out.msg_stime		= in->msg_stime;
		out.msg_rtime		= in->msg_rtime;
		out.msg_ctime		= in->msg_ctime;

310
		if (in->msg_cbytes > USHRT_MAX)
L
Linus Torvalds 已提交
311 312 313 314 315
			out.msg_cbytes	= USHRT_MAX;
		else
			out.msg_cbytes	= in->msg_cbytes;
		out.msg_lcbytes		= in->msg_cbytes;

316
		if (in->msg_qnum > USHRT_MAX)
L
Linus Torvalds 已提交
317 318 319 320
			out.msg_qnum	= USHRT_MAX;
		else
			out.msg_qnum	= in->msg_qnum;

321
		if (in->msg_qbytes > USHRT_MAX)
L
Linus Torvalds 已提交
322 323 324 325 326 327 328 329
			out.msg_qbytes	= USHRT_MAX;
		else
			out.msg_qbytes	= in->msg_qbytes;
		out.msg_lqbytes		= in->msg_qbytes;

		out.msg_lspid		= in->msg_lspid;
		out.msg_lrpid		= in->msg_lrpid;

330 331
		return copy_to_user(buf, &out, sizeof(out));
	}
L
Linus Torvalds 已提交
332 333 334 335 336 337 338 339 340 341 342 343
	default:
		return -EINVAL;
	}
}

struct msq_setbuf {
	unsigned long	qbytes;
	uid_t		uid;
	gid_t		gid;
	mode_t		mode;
};

344 345
static inline unsigned long
copy_msqid_from_user(struct msq_setbuf *out, void __user *buf, int version)
L
Linus Torvalds 已提交
346 347 348
{
	switch(version) {
	case IPC_64:
349
	{
L
Linus Torvalds 已提交
350 351
		struct msqid64_ds tbuf;

352
		if (copy_from_user(&tbuf, buf, sizeof(tbuf)))
L
Linus Torvalds 已提交
353 354 355 356 357 358 359 360
			return -EFAULT;

		out->qbytes		= tbuf.msg_qbytes;
		out->uid		= tbuf.msg_perm.uid;
		out->gid		= tbuf.msg_perm.gid;
		out->mode		= tbuf.msg_perm.mode;

		return 0;
361
	}
L
Linus Torvalds 已提交
362
	case IPC_OLD:
363
	{
L
Linus Torvalds 已提交
364 365
		struct msqid_ds tbuf_old;

366
		if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old)))
L
Linus Torvalds 已提交
367 368 369 370 371 372
			return -EFAULT;

		out->uid		= tbuf_old.msg_perm.uid;
		out->gid		= tbuf_old.msg_perm.gid;
		out->mode		= tbuf_old.msg_perm.mode;

373
		if (tbuf_old.msg_qbytes == 0)
L
Linus Torvalds 已提交
374 375 376 377 378
			out->qbytes	= tbuf_old.msg_lqbytes;
		else
			out->qbytes	= tbuf_old.msg_qbytes;

		return 0;
379
	}
L
Linus Torvalds 已提交
380 381 382 383 384
	default:
		return -EINVAL;
	}
}

385
asmlinkage long sys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf)
L
Linus Torvalds 已提交
386 387
{
	struct kern_ipc_perm *ipcp;
388
	struct msq_setbuf uninitialized_var(setbuf);
389 390
	struct msg_queue *msq;
	int err, version;
K
Kirill Korotaev 已提交
391
	struct ipc_namespace *ns;
392

L
Linus Torvalds 已提交
393 394 395 396
	if (msqid < 0 || cmd < 0)
		return -EINVAL;

	version = ipc_parse_version(&cmd);
K
Kirill Korotaev 已提交
397
	ns = current->nsproxy->ipc_ns;
L
Linus Torvalds 已提交
398 399

	switch (cmd) {
400 401 402
	case IPC_INFO:
	case MSG_INFO:
	{
L
Linus Torvalds 已提交
403 404
		struct msginfo msginfo;
		int max_id;
405

L
Linus Torvalds 已提交
406 407
		if (!buf)
			return -EFAULT;
408 409
		/*
		 * We must not return kernel stack data.
L
Linus Torvalds 已提交
410 411 412 413 414 415 416
		 * due to padding, it's not enough
		 * to set all member fields.
		 */
		err = security_msg_queue_msgctl(NULL, cmd);
		if (err)
			return err;

417
		memset(&msginfo, 0, sizeof(msginfo));
K
Kirill Korotaev 已提交
418 419 420
		msginfo.msgmni = ns->msg_ctlmni;
		msginfo.msgmax = ns->msg_ctlmax;
		msginfo.msgmnb = ns->msg_ctlmnb;
L
Linus Torvalds 已提交
421 422
		msginfo.msgssz = MSGSSZ;
		msginfo.msgseg = MSGSEG;
K
Kirill Korotaev 已提交
423
		mutex_lock(&msg_ids(ns).mutex);
L
Linus Torvalds 已提交
424
		if (cmd == MSG_INFO) {
K
Kirill Korotaev 已提交
425
			msginfo.msgpool = msg_ids(ns).in_use;
L
Linus Torvalds 已提交
426 427 428 429 430 431 432
			msginfo.msgmap = atomic_read(&msg_hdrs);
			msginfo.msgtql = atomic_read(&msg_bytes);
		} else {
			msginfo.msgmap = MSGMAP;
			msginfo.msgpool = MSGPOOL;
			msginfo.msgtql = MSGTQL;
		}
K
Kirill Korotaev 已提交
433 434
		max_id = msg_ids(ns).max_id;
		mutex_unlock(&msg_ids(ns).mutex);
435
		if (copy_to_user(buf, &msginfo, sizeof(struct msginfo)))
L
Linus Torvalds 已提交
436
			return -EFAULT;
437
		return (max_id < 0) ? 0 : max_id;
L
Linus Torvalds 已提交
438 439 440 441 442 443
	}
	case MSG_STAT:
	case IPC_STAT:
	{
		struct msqid64_ds tbuf;
		int success_return;
444

L
Linus Torvalds 已提交
445 446
		if (!buf)
			return -EFAULT;
K
Kirill Korotaev 已提交
447
		if (cmd == MSG_STAT && msqid >= msg_ids(ns).entries->size)
L
Linus Torvalds 已提交
448 449
			return -EINVAL;

450
		memset(&tbuf, 0, sizeof(tbuf));
L
Linus Torvalds 已提交
451

K
Kirill Korotaev 已提交
452
		msq = msg_lock(ns, msqid);
L
Linus Torvalds 已提交
453 454 455
		if (msq == NULL)
			return -EINVAL;

456
		if (cmd == MSG_STAT) {
K
Kirill Korotaev 已提交
457
			success_return = msg_buildid(ns, msqid, msq->q_perm.seq);
L
Linus Torvalds 已提交
458 459
		} else {
			err = -EIDRM;
K
Kirill Korotaev 已提交
460
			if (msg_checkid(ns, msq, msqid))
L
Linus Torvalds 已提交
461 462 463 464
				goto out_unlock;
			success_return = 0;
		}
		err = -EACCES;
465
		if (ipcperms(&msq->q_perm, S_IRUGO))
L
Linus Torvalds 已提交
466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488
			goto out_unlock;

		err = security_msg_queue_msgctl(msq, cmd);
		if (err)
			goto out_unlock;

		kernel_to_ipc64_perm(&msq->q_perm, &tbuf.msg_perm);
		tbuf.msg_stime  = msq->q_stime;
		tbuf.msg_rtime  = msq->q_rtime;
		tbuf.msg_ctime  = msq->q_ctime;
		tbuf.msg_cbytes = msq->q_cbytes;
		tbuf.msg_qnum   = msq->q_qnum;
		tbuf.msg_qbytes = msq->q_qbytes;
		tbuf.msg_lspid  = msq->q_lspid;
		tbuf.msg_lrpid  = msq->q_lrpid;
		msg_unlock(msq);
		if (copy_msqid_to_user(buf, &tbuf, version))
			return -EFAULT;
		return success_return;
	}
	case IPC_SET:
		if (!buf)
			return -EFAULT;
489
		if (copy_msqid_from_user(&setbuf, buf, version))
L
Linus Torvalds 已提交
490 491 492 493 494 495 496 497
			return -EFAULT;
		break;
	case IPC_RMID:
		break;
	default:
		return  -EINVAL;
	}

K
Kirill Korotaev 已提交
498 499
	mutex_lock(&msg_ids(ns).mutex);
	msq = msg_lock(ns, msqid);
500
	err = -EINVAL;
L
Linus Torvalds 已提交
501 502 503 504
	if (msq == NULL)
		goto out_up;

	err = -EIDRM;
K
Kirill Korotaev 已提交
505
	if (msg_checkid(ns, msq, msqid))
L
Linus Torvalds 已提交
506 507
		goto out_unlock_up;
	ipcp = &msq->q_perm;
S
Steve Grubb 已提交
508 509 510 511

	err = audit_ipc_obj(ipcp);
	if (err)
		goto out_unlock_up;
512
	if (cmd == IPC_SET) {
513 514
		err = audit_ipc_set_perm(setbuf.qbytes, setbuf.uid, setbuf.gid,
					 setbuf.mode);
515 516 517
		if (err)
			goto out_unlock_up;
	}
S
Steve Grubb 已提交
518

L
Linus Torvalds 已提交
519
	err = -EPERM;
520
	if (current->euid != ipcp->cuid &&
L
Linus Torvalds 已提交
521
	    current->euid != ipcp->uid && !capable(CAP_SYS_ADMIN))
522
		/* We _could_ check for CAP_CHOWN above, but we don't */
L
Linus Torvalds 已提交
523 524 525 526 527 528 529 530 531 532
		goto out_unlock_up;

	err = security_msg_queue_msgctl(msq, cmd);
	if (err)
		goto out_unlock_up;

	switch (cmd) {
	case IPC_SET:
	{
		err = -EPERM;
K
Kirill Korotaev 已提交
533
		if (setbuf.qbytes > ns->msg_ctlmnb && !capable(CAP_SYS_RESOURCE))
L
Linus Torvalds 已提交
534 535 536 537 538 539
			goto out_unlock_up;

		msq->q_qbytes = setbuf.qbytes;

		ipcp->uid = setbuf.uid;
		ipcp->gid = setbuf.gid;
540 541
		ipcp->mode = (ipcp->mode & ~S_IRWXUGO) |
			     (S_IRWXUGO & setbuf.mode);
L
Linus Torvalds 已提交
542 543 544 545
		msq->q_ctime = get_seconds();
		/* sleeping receivers might be excluded by
		 * stricter permissions.
		 */
546
		expunge_all(msq, -EAGAIN);
L
Linus Torvalds 已提交
547 548 549
		/* sleeping senders might be able to send
		 * due to a larger queue size.
		 */
550
		ss_wakeup(&msq->q_senders, 0);
L
Linus Torvalds 已提交
551 552 553 554
		msg_unlock(msq);
		break;
	}
	case IPC_RMID:
K
Kirill Korotaev 已提交
555
		freeque(ns, msq, msqid);
L
Linus Torvalds 已提交
556 557 558 559
		break;
	}
	err = 0;
out_up:
K
Kirill Korotaev 已提交
560
	mutex_unlock(&msg_ids(ns).mutex);
L
Linus Torvalds 已提交
561 562 563 564 565 566 567 568 569
	return err;
out_unlock_up:
	msg_unlock(msq);
	goto out_up;
out_unlock:
	msg_unlock(msq);
	return err;
}

570
static int testmsg(struct msg_msg *msg, long type, int mode)
L
Linus Torvalds 已提交
571 572 573 574 575 576
{
	switch(mode)
	{
		case SEARCH_ANY:
			return 1;
		case SEARCH_LESSEQUAL:
577
			if (msg->m_type <=type)
L
Linus Torvalds 已提交
578 579 580
				return 1;
			break;
		case SEARCH_EQUAL:
581
			if (msg->m_type == type)
L
Linus Torvalds 已提交
582 583 584
				return 1;
			break;
		case SEARCH_NOTEQUAL:
585
			if (msg->m_type != type)
L
Linus Torvalds 已提交
586 587 588 589 590 591
				return 1;
			break;
	}
	return 0;
}

592
static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg)
L
Linus Torvalds 已提交
593
{
594
	struct list_head *tmp;
L
Linus Torvalds 已提交
595 596 597

	tmp = msq->q_receivers.next;
	while (tmp != &msq->q_receivers) {
598 599 600
		struct msg_receiver *msr;

		msr = list_entry(tmp, struct msg_receiver, r_list);
L
Linus Torvalds 已提交
601
		tmp = tmp->next;
602 603 604 605
		if (testmsg(msg, msr->r_msgtype, msr->r_mode) &&
		    !security_msg_queue_msgrcv(msq, msg, msr->r_tsk,
					       msr->r_msgtype, msr->r_mode)) {

L
Linus Torvalds 已提交
606
			list_del(&msr->r_list);
607
			if (msr->r_maxsize < msg->m_ts) {
L
Linus Torvalds 已提交
608 609 610 611 612 613 614 615 616 617 618
				msr->r_msg = NULL;
				wake_up_process(msr->r_tsk);
				smp_mb();
				msr->r_msg = ERR_PTR(-E2BIG);
			} else {
				msr->r_msg = NULL;
				msq->q_lrpid = msr->r_tsk->pid;
				msq->q_rtime = get_seconds();
				wake_up_process(msr->r_tsk);
				smp_mb();
				msr->r_msg = msg;
619

L
Linus Torvalds 已提交
620 621 622 623 624 625 626
				return 1;
			}
		}
	}
	return 0;
}

627 628
long do_msgsnd(int msqid, long mtype, void __user *mtext,
		size_t msgsz, int msgflg)
L
Linus Torvalds 已提交
629 630 631 632
{
	struct msg_queue *msq;
	struct msg_msg *msg;
	int err;
K
Kirill Korotaev 已提交
633 634 635
	struct ipc_namespace *ns;

	ns = current->nsproxy->ipc_ns;
636

K
Kirill Korotaev 已提交
637
	if (msgsz > ns->msg_ctlmax || (long) msgsz < 0 || msqid < 0)
L
Linus Torvalds 已提交
638 639 640 641
		return -EINVAL;
	if (mtype < 1)
		return -EINVAL;

642
	msg = load_msg(mtext, msgsz);
643
	if (IS_ERR(msg))
L
Linus Torvalds 已提交
644 645 646 647 648
		return PTR_ERR(msg);

	msg->m_type = mtype;
	msg->m_ts = msgsz;

K
Kirill Korotaev 已提交
649
	msq = msg_lock(ns, msqid);
650 651
	err = -EINVAL;
	if (msq == NULL)
L
Linus Torvalds 已提交
652 653 654
		goto out_free;

	err= -EIDRM;
K
Kirill Korotaev 已提交
655
	if (msg_checkid(ns, msq, msqid))
L
Linus Torvalds 已提交
656 657 658 659 660
		goto out_unlock_free;

	for (;;) {
		struct msg_sender s;

661
		err = -EACCES;
L
Linus Torvalds 已提交
662 663 664 665 666 667 668
		if (ipcperms(&msq->q_perm, S_IWUGO))
			goto out_unlock_free;

		err = security_msg_queue_msgsnd(msq, msg, msgflg);
		if (err)
			goto out_unlock_free;

669
		if (msgsz + msq->q_cbytes <= msq->q_qbytes &&
L
Linus Torvalds 已提交
670 671 672 673 674
				1 + msq->q_qnum <= msq->q_qbytes) {
			break;
		}

		/* queue full, wait: */
675 676
		if (msgflg & IPC_NOWAIT) {
			err = -EAGAIN;
L
Linus Torvalds 已提交
677 678 679 680 681 682 683 684 685 686 687 688 689 690
			goto out_unlock_free;
		}
		ss_add(msq, &s);
		ipc_rcu_getref(msq);
		msg_unlock(msq);
		schedule();

		ipc_lock_by_ptr(&msq->q_perm);
		ipc_rcu_putref(msq);
		if (msq->q_perm.deleted) {
			err = -EIDRM;
			goto out_unlock_free;
		}
		ss_del(&s);
691

L
Linus Torvalds 已提交
692
		if (signal_pending(current)) {
693
			err = -ERESTARTNOHAND;
L
Linus Torvalds 已提交
694 695 696 697 698 699 700
			goto out_unlock_free;
		}
	}

	msq->q_lspid = current->tgid;
	msq->q_stime = get_seconds();

701
	if (!pipelined_send(msq, msg)) {
L
Linus Torvalds 已提交
702
		/* noone is waiting for this message, enqueue it */
703
		list_add_tail(&msg->m_list, &msq->q_messages);
L
Linus Torvalds 已提交
704 705
		msq->q_cbytes += msgsz;
		msq->q_qnum++;
706
		atomic_add(msgsz, &msg_bytes);
L
Linus Torvalds 已提交
707 708
		atomic_inc(&msg_hdrs);
	}
709

L
Linus Torvalds 已提交
710 711 712 713 714 715
	err = 0;
	msg = NULL;

out_unlock_free:
	msg_unlock(msq);
out_free:
716
	if (msg != NULL)
L
Linus Torvalds 已提交
717 718 719 720
		free_msg(msg);
	return err;
}

721 722 723 724 725 726 727 728 729 730
asmlinkage long
sys_msgsnd(int msqid, struct msgbuf __user *msgp, size_t msgsz, int msgflg)
{
	long mtype;

	if (get_user(mtype, &msgp->mtype))
		return -EFAULT;
	return do_msgsnd(msqid, mtype, msgp->mtext, msgsz, msgflg);
}

731
static inline int convert_mode(long *msgtyp, int msgflg)
L
Linus Torvalds 已提交
732
{
733
	/*
L
Linus Torvalds 已提交
734 735 736
	 *  find message of correct type.
	 *  msgtyp = 0 => get first.
	 *  msgtyp > 0 => get first message of matching type.
737
	 *  msgtyp < 0 => get message with least type must be < abs(msgtype).
L
Linus Torvalds 已提交
738
	 */
739
	if (*msgtyp == 0)
L
Linus Torvalds 已提交
740
		return SEARCH_ANY;
741 742
	if (*msgtyp < 0) {
		*msgtyp = -*msgtyp;
L
Linus Torvalds 已提交
743 744
		return SEARCH_LESSEQUAL;
	}
745
	if (msgflg & MSG_EXCEPT)
L
Linus Torvalds 已提交
746 747 748 749
		return SEARCH_NOTEQUAL;
	return SEARCH_EQUAL;
}

750 751
long do_msgrcv(int msqid, long *pmtype, void __user *mtext,
		size_t msgsz, long msgtyp, int msgflg)
L
Linus Torvalds 已提交
752 753 754 755
{
	struct msg_queue *msq;
	struct msg_msg *msg;
	int mode;
K
Kirill Korotaev 已提交
756
	struct ipc_namespace *ns;
L
Linus Torvalds 已提交
757 758 759

	if (msqid < 0 || (long) msgsz < 0)
		return -EINVAL;
760
	mode = convert_mode(&msgtyp, msgflg);
K
Kirill Korotaev 已提交
761
	ns = current->nsproxy->ipc_ns;
L
Linus Torvalds 已提交
762

K
Kirill Korotaev 已提交
763
	msq = msg_lock(ns, msqid);
764
	if (msq == NULL)
L
Linus Torvalds 已提交
765 766 767
		return -EINVAL;

	msg = ERR_PTR(-EIDRM);
K
Kirill Korotaev 已提交
768
	if (msg_checkid(ns, msq, msqid))
L
Linus Torvalds 已提交
769 770 771 772
		goto out_unlock;

	for (;;) {
		struct msg_receiver msr_d;
773
		struct list_head *tmp;
L
Linus Torvalds 已提交
774 775

		msg = ERR_PTR(-EACCES);
776
		if (ipcperms(&msq->q_perm, S_IRUGO))
L
Linus Torvalds 已提交
777 778 779 780 781 782
			goto out_unlock;

		msg = ERR_PTR(-EAGAIN);
		tmp = msq->q_messages.next;
		while (tmp != &msq->q_messages) {
			struct msg_msg *walk_msg;
783 784 785 786 787 788

			walk_msg = list_entry(tmp, struct msg_msg, m_list);
			if (testmsg(walk_msg, msgtyp, mode) &&
			    !security_msg_queue_msgrcv(msq, walk_msg, current,
						       msgtyp, mode)) {

L
Linus Torvalds 已提交
789
				msg = walk_msg;
790 791 792 793
				if (mode == SEARCH_LESSEQUAL &&
						walk_msg->m_type != 1) {
					msg = walk_msg;
					msgtyp = walk_msg->m_type - 1;
L
Linus Torvalds 已提交
794
				} else {
795
					msg = walk_msg;
L
Linus Torvalds 已提交
796 797 798 799 800
					break;
				}
			}
			tmp = tmp->next;
		}
801 802 803 804 805
		if (!IS_ERR(msg)) {
			/*
			 * Found a suitable message.
			 * Unlink it from the queue.
			 */
L
Linus Torvalds 已提交
806 807 808 809 810 811 812 813 814
			if ((msgsz < msg->m_ts) && !(msgflg & MSG_NOERROR)) {
				msg = ERR_PTR(-E2BIG);
				goto out_unlock;
			}
			list_del(&msg->m_list);
			msq->q_qnum--;
			msq->q_rtime = get_seconds();
			msq->q_lrpid = current->tgid;
			msq->q_cbytes -= msg->m_ts;
815
			atomic_sub(msg->m_ts, &msg_bytes);
L
Linus Torvalds 已提交
816
			atomic_dec(&msg_hdrs);
817
			ss_wakeup(&msq->q_senders, 0);
L
Linus Torvalds 已提交
818 819 820 821 822 823 824 825
			msg_unlock(msq);
			break;
		}
		/* No message waiting. Wait for a message */
		if (msgflg & IPC_NOWAIT) {
			msg = ERR_PTR(-ENOMSG);
			goto out_unlock;
		}
826
		list_add_tail(&msr_d.r_list, &msq->q_receivers);
L
Linus Torvalds 已提交
827 828 829
		msr_d.r_tsk = current;
		msr_d.r_msgtype = msgtyp;
		msr_d.r_mode = mode;
830
		if (msgflg & MSG_NOERROR)
L
Linus Torvalds 已提交
831
			msr_d.r_maxsize = INT_MAX;
832
		else
L
Linus Torvalds 已提交
833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856
			msr_d.r_maxsize = msgsz;
		msr_d.r_msg = ERR_PTR(-EAGAIN);
		current->state = TASK_INTERRUPTIBLE;
		msg_unlock(msq);

		schedule();

		/* Lockless receive, part 1:
		 * Disable preemption.  We don't hold a reference to the queue
		 * and getting a reference would defeat the idea of a lockless
		 * operation, thus the code relies on rcu to guarantee the
		 * existance of msq:
		 * Prior to destruction, expunge_all(-EIRDM) changes r_msg.
		 * Thus if r_msg is -EAGAIN, then the queue not yet destroyed.
		 * rcu_read_lock() prevents preemption between reading r_msg
		 * and the spin_lock() inside ipc_lock_by_ptr().
		 */
		rcu_read_lock();

		/* Lockless receive, part 2:
		 * Wait until pipelined_send or expunge_all are outside of
		 * wake_up_process(). There is a race with exit(), see
		 * ipc/mqueue.c for the details.
		 */
857
		msg = (struct msg_msg*)msr_d.r_msg;
L
Linus Torvalds 已提交
858 859
		while (msg == NULL) {
			cpu_relax();
860
			msg = (struct msg_msg *)msr_d.r_msg;
L
Linus Torvalds 已提交
861 862 863 864 865 866
		}

		/* Lockless receive, part 3:
		 * If there is a message or an error then accept it without
		 * locking.
		 */
867
		if (msg != ERR_PTR(-EAGAIN)) {
L
Linus Torvalds 已提交
868 869 870 871 872 873 874 875 876 877 878 879 880 881
			rcu_read_unlock();
			break;
		}

		/* Lockless receive, part 3:
		 * Acquire the queue spinlock.
		 */
		ipc_lock_by_ptr(&msq->q_perm);
		rcu_read_unlock();

		/* Lockless receive, part 4:
		 * Repeat test after acquiring the spinlock.
		 */
		msg = (struct msg_msg*)msr_d.r_msg;
882
		if (msg != ERR_PTR(-EAGAIN))
L
Linus Torvalds 已提交
883 884 885 886 887 888 889 890 891 892 893
			goto out_unlock;

		list_del(&msr_d.r_list);
		if (signal_pending(current)) {
			msg = ERR_PTR(-ERESTARTNOHAND);
out_unlock:
			msg_unlock(msq);
			break;
		}
	}
	if (IS_ERR(msg))
894
		return PTR_ERR(msg);
L
Linus Torvalds 已提交
895 896

	msgsz = (msgsz > msg->m_ts) ? msg->m_ts : msgsz;
897 898
	*pmtype = msg->m_type;
	if (store_msg(mtext, msg, msgsz))
899
		msgsz = -EFAULT;
900

L
Linus Torvalds 已提交
901
	free_msg(msg);
902

L
Linus Torvalds 已提交
903 904 905
	return msgsz;
}

906 907 908 909 910 911 912 913 914 915 916 917 918 919 920
asmlinkage long sys_msgrcv(int msqid, struct msgbuf __user *msgp, size_t msgsz,
			   long msgtyp, int msgflg)
{
	long err, mtype;

	err =  do_msgrcv(msqid, &mtype, msgp->mtext, msgsz, msgtyp, msgflg);
	if (err < 0)
		goto out;

	if (put_user(mtype, &msgp->mtype))
		err = -EFAULT;
out:
	return err;
}

L
Linus Torvalds 已提交
921
#ifdef CONFIG_PROC_FS
922
static int sysvipc_msg_proc_show(struct seq_file *s, void *it)
L
Linus Torvalds 已提交
923
{
924 925 926
	struct msg_queue *msq = it;

	return seq_printf(s,
927 928 929 930 931 932 933 934 935 936 937 938 939 940 941
			"%10d %10d  %4o  %10lu %10lu %5u %5u %5u %5u %5u %5u %10lu %10lu %10lu\n",
			msq->q_perm.key,
			msq->q_id,
			msq->q_perm.mode,
			msq->q_cbytes,
			msq->q_qnum,
			msq->q_lspid,
			msq->q_lrpid,
			msq->q_perm.uid,
			msq->q_perm.gid,
			msq->q_perm.cuid,
			msq->q_perm.cgid,
			msq->q_stime,
			msq->q_rtime,
			msq->q_ctime);
L
Linus Torvalds 已提交
942 943
}
#endif