msg.c 21.9 KB
Newer Older
L
Linus Torvalds 已提交
1 2
/*
 * linux/ipc/msg.c
3
 * Copyright (C) 1992 Krishna Balasubramanian
L
Linus Torvalds 已提交
4 5 6 7 8 9 10 11 12 13 14
 *
 * Removed all the remaining kerneld mess
 * Catch the -EFAULT stuff properly
 * Use GFP_KERNEL for messages as in 1.2
 * Fixed up the unchecked user space derefs
 * Copyright (C) 1998 Alan Cox & Andi Kleen
 *
 * /proc/sysvipc/msg support (c) 1999 Dragos Acostachioaie <dragos@iname.com>
 *
 * mostly rewritten, threaded and wake-one semantics added
 * MSGMAX limit removed, sysctl's added
15
 * (c) 1999 Manfred Spraul <manfred@colorfullife.com>
S
Steve Grubb 已提交
16 17 18
 *
 * support for audit of ipc object properties and permission changes
 * Dustin Kirkland <dustin.kirkland@us.ibm.com>
K
Kirill Korotaev 已提交
19 20 21 22
 *
 * namespaces support
 * OpenVZ, SWsoft Inc.
 * Pavel Emelianov <xemul@openvz.org>
L
Linus Torvalds 已提交
23 24
 */

25
#include <linux/capability.h>
L
Linus Torvalds 已提交
26 27 28 29
#include <linux/slab.h>
#include <linux/msg.h>
#include <linux/spinlock.h>
#include <linux/init.h>
30
#include <linux/mm.h>
L
Linus Torvalds 已提交
31 32 33 34 35 36
#include <linux/proc_fs.h>
#include <linux/list.h>
#include <linux/security.h>
#include <linux/sched.h>
#include <linux/syscalls.h>
#include <linux/audit.h>
37
#include <linux/seq_file.h>
N
Nadia Derbey 已提交
38
#include <linux/rwsem.h>
K
Kirill Korotaev 已提交
39
#include <linux/nsproxy.h>
40
#include <linux/ipc_namespace.h>
I
Ingo Molnar 已提交
41

L
Linus Torvalds 已提交
42 43 44 45
#include <asm/current.h>
#include <asm/uaccess.h>
#include "util.h"

46 47 48
/*
 * one msg_receiver structure for each sleeping receiver:
 */
L
Linus Torvalds 已提交
49
struct msg_receiver {
50 51
	struct list_head	r_list;
	struct task_struct	*r_tsk;
L
Linus Torvalds 已提交
52

53 54 55
	int			r_mode;
	long			r_msgtype;
	long			r_maxsize;
L
Linus Torvalds 已提交
56

57
	struct msg_msg		*volatile r_msg;
L
Linus Torvalds 已提交
58 59 60 61
};

/* one msg_sender for each sleeping sender */
struct msg_sender {
62 63
	struct list_head	list;
	struct task_struct	*tsk;
L
Linus Torvalds 已提交
64 65 66 67 68 69 70
};

#define SEARCH_ANY		1
#define SEARCH_EQUAL		2
#define SEARCH_NOTEQUAL		3
#define SEARCH_LESSEQUAL	4

71
#define msg_ids(ns)	((ns)->ids[IPC_MSG_IDS])
L
Linus Torvalds 已提交
72

K
Kirill Korotaev 已提交
73 74
#define msg_unlock(msq)		ipc_unlock(&(msq)->q_perm)

75
static void freeque(struct ipc_namespace *, struct kern_ipc_perm *);
N
Nadia Derbey 已提交
76
static int newque(struct ipc_namespace *, struct ipc_params *);
L
Linus Torvalds 已提交
77
#ifdef CONFIG_PROC_FS
78
static int sysvipc_msg_proc_show(struct seq_file *s, void *it);
L
Linus Torvalds 已提交
79 80
#endif

81 82 83
/*
 * Scale msgmni with the available lowmem size: the memory dedicated to msg
 * queues should occupy at most 1/MSG_MEM_SCALE of lowmem.
84 85
 * Also take into account the number of nsproxies created so far.
 * This should be done staying within the (MSGMNI , IPCMNI/nr_ipc_ns) range.
86
 */
87
void recompute_msgmni(struct ipc_namespace *ns)
88 89 90
{
	struct sysinfo i;
	unsigned long allowed;
91
	int nb_ns;
92 93 94 95

	si_meminfo(&i);
	allowed = (((i.totalram - i.totalhigh) / MSG_MEM_SCALE) * i.mem_unit)
		/ MSGMNB;
96 97
	nb_ns = atomic_read(&nr_ipc_ns);
	allowed /= nb_ns;
98 99 100 101 102 103

	if (allowed < MSGMNI) {
		ns->msg_ctlmni = MSGMNI;
		goto out_callback;
	}

104 105
	if (allowed > IPCMNI / nb_ns) {
		ns->msg_ctlmni = IPCMNI / nb_ns;
106 107 108 109 110 111 112 113 114 115 116
		goto out_callback;
	}

	ns->msg_ctlmni = allowed;

out_callback:

	printk(KERN_INFO "msgmni has been set to %d for ipc namespace %p\n",
		ns->msg_ctlmni, ns);
}

117
void msg_init_ns(struct ipc_namespace *ns)
K
Kirill Korotaev 已提交
118 119 120
{
	ns->msg_ctlmax = MSGMAX;
	ns->msg_ctlmnb = MSGMNB;
121 122 123

	recompute_msgmni(ns);

124 125
	atomic_set(&ns->msg_bytes, 0);
	atomic_set(&ns->msg_hdrs, 0);
126
	ipc_init_ids(&ns->ids[IPC_MSG_IDS]);
K
Kirill Korotaev 已提交
127 128
}

129
#ifdef CONFIG_IPC_NS
K
Kirill Korotaev 已提交
130 131
void msg_exit_ns(struct ipc_namespace *ns)
{
132
	free_ipcs(ns, &msg_ids(ns), freeque);
K
Kirill Korotaev 已提交
133
}
134
#endif
K
Kirill Korotaev 已提交
135

136
void __init msg_init(void)
L
Linus Torvalds 已提交
137
{
138
	msg_init_ns(&init_ipc_ns);
139 140
	ipc_init_proc_interface("sysvipc/msg",
				"       key      msqid perms      cbytes       qnum lspid lrpid   uid   gid  cuid  cgid      stime      rtime      ctime\n",
K
Kirill Korotaev 已提交
141
				IPC_MSG_IDS, sysvipc_msg_proc_show);
L
Linus Torvalds 已提交
142 143
}

N
Nadia Derbey 已提交
144 145 146 147 148 149 150 151 152
/*
 * This routine is called in the paths where the rw_mutex is held to protect
 * access to the idr tree.
 */
static inline struct msg_queue *msg_lock_check_down(struct ipc_namespace *ns,
						int id)
{
	struct kern_ipc_perm *ipcp = ipc_lock_check_down(&msg_ids(ns), id);

153 154 155
	if (IS_ERR(ipcp))
		return (struct msg_queue *)ipcp;

N
Nadia Derbey 已提交
156 157 158 159 160 161 162
	return container_of(ipcp, struct msg_queue, q_perm);
}

/*
 * msg_lock_(check_) routines are called in the paths where the rw_mutex
 * is not held.
 */
163 164
static inline struct msg_queue *msg_lock(struct ipc_namespace *ns, int id)
{
N
Nadia Derbey 已提交
165 166
	struct kern_ipc_perm *ipcp = ipc_lock(&msg_ids(ns), id);

167 168 169
	if (IS_ERR(ipcp))
		return (struct msg_queue *)ipcp;

N
Nadia Derbey 已提交
170
	return container_of(ipcp, struct msg_queue, q_perm);
171 172 173 174 175
}

static inline struct msg_queue *msg_lock_check(struct ipc_namespace *ns,
						int id)
{
N
Nadia Derbey 已提交
176 177
	struct kern_ipc_perm *ipcp = ipc_lock_check(&msg_ids(ns), id);

178 179 180
	if (IS_ERR(ipcp))
		return (struct msg_queue *)ipcp;

N
Nadia Derbey 已提交
181
	return container_of(ipcp, struct msg_queue, q_perm);
182 183
}

N
Nadia Derbey 已提交
184 185 186 187 188
static inline void msg_rmid(struct ipc_namespace *ns, struct msg_queue *s)
{
	ipc_rmid(&msg_ids(ns), &s->q_perm);
}

N
Nadia Derbey 已提交
189 190 191 192 193
/**
 * newque - Create a new msg queue
 * @ns: namespace
 * @params: ptr to the structure that contains the key and msgflg
 *
N
Nadia Derbey 已提交
194
 * Called with msg_ids.rw_mutex held (writer)
N
Nadia Derbey 已提交
195
 */
N
Nadia Derbey 已提交
196
static int newque(struct ipc_namespace *ns, struct ipc_params *params)
L
Linus Torvalds 已提交
197 198
{
	struct msg_queue *msq;
199
	int id, retval;
N
Nadia Derbey 已提交
200 201
	key_t key = params->key;
	int msgflg = params->flg;
L
Linus Torvalds 已提交
202

203 204
	msq = ipc_rcu_alloc(sizeof(*msq));
	if (!msq)
L
Linus Torvalds 已提交
205 206
		return -ENOMEM;

207
	msq->q_perm.mode = msgflg & S_IRWXUGO;
L
Linus Torvalds 已提交
208 209 210 211 212 213 214 215 216
	msq->q_perm.key = key;

	msq->q_perm.security = NULL;
	retval = security_msg_queue_alloc(msq);
	if (retval) {
		ipc_rcu_putref(msq);
		return retval;
	}

N
Nadia Derbey 已提交
217 218 219
	/*
	 * ipc_addid() locks msq
	 */
K
Kirill Korotaev 已提交
220
	id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni);
221
	if (id < 0) {
L
Linus Torvalds 已提交
222 223
		security_msg_queue_free(msq);
		ipc_rcu_putref(msq);
224
		return id;
L
Linus Torvalds 已提交
225 226 227 228 229
	}

	msq->q_stime = msq->q_rtime = 0;
	msq->q_ctime = get_seconds();
	msq->q_cbytes = msq->q_qnum = 0;
K
Kirill Korotaev 已提交
230
	msq->q_qbytes = ns->msg_ctlmnb;
L
Linus Torvalds 已提交
231 232 233 234
	msq->q_lspid = msq->q_lrpid = 0;
	INIT_LIST_HEAD(&msq->q_messages);
	INIT_LIST_HEAD(&msq->q_receivers);
	INIT_LIST_HEAD(&msq->q_senders);
N
Nadia Derbey 已提交
235

L
Linus Torvalds 已提交
236 237
	msg_unlock(msq);

N
Nadia Derbey 已提交
238
	return msq->q_perm.id;
L
Linus Torvalds 已提交
239 240
}

241
static inline void ss_add(struct msg_queue *msq, struct msg_sender *mss)
L
Linus Torvalds 已提交
242
{
243 244 245
	mss->tsk = current;
	current->state = TASK_INTERRUPTIBLE;
	list_add_tail(&mss->list, &msq->q_senders);
L
Linus Torvalds 已提交
246 247
}

248
static inline void ss_del(struct msg_sender *mss)
L
Linus Torvalds 已提交
249
{
250
	if (mss->list.next != NULL)
L
Linus Torvalds 已提交
251 252 253
		list_del(&mss->list);
}

254
static void ss_wakeup(struct list_head *h, int kill)
L
Linus Torvalds 已提交
255 256 257 258 259
{
	struct list_head *tmp;

	tmp = h->next;
	while (tmp != h) {
260 261 262
		struct msg_sender *mss;

		mss = list_entry(tmp, struct msg_sender, list);
L
Linus Torvalds 已提交
263
		tmp = tmp->next;
264 265
		if (kill)
			mss->list.next = NULL;
L
Linus Torvalds 已提交
266 267 268 269
		wake_up_process(mss->tsk);
	}
}

270
static void expunge_all(struct msg_queue *msq, int res)
L
Linus Torvalds 已提交
271 272 273 274 275
{
	struct list_head *tmp;

	tmp = msq->q_receivers.next;
	while (tmp != &msq->q_receivers) {
276 277 278
		struct msg_receiver *msr;

		msr = list_entry(tmp, struct msg_receiver, r_list);
L
Linus Torvalds 已提交
279 280 281 282 283 284 285
		tmp = tmp->next;
		msr->r_msg = NULL;
		wake_up_process(msr->r_tsk);
		smp_mb();
		msr->r_msg = ERR_PTR(res);
	}
}
286 287 288

/*
 * freeque() wakes up waiters on the sender and receiver waiting queue,
N
Nadia Derbey 已提交
289 290
 * removes the message queue from message queue ID IDR, and cleans up all the
 * messages associated with this queue.
L
Linus Torvalds 已提交
291
 *
N
Nadia Derbey 已提交
292 293
 * msg_ids.rw_mutex (writer) and the spinlock for this message queue are held
 * before freeque() is called. msg_ids.rw_mutex remains locked on exit.
L
Linus Torvalds 已提交
294
 */
295
static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
L
Linus Torvalds 已提交
296 297
{
	struct list_head *tmp;
298
	struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm);
L
Linus Torvalds 已提交
299

300 301
	expunge_all(msq, -EIDRM);
	ss_wakeup(&msq->q_senders, 1);
N
Nadia Derbey 已提交
302
	msg_rmid(ns, msq);
L
Linus Torvalds 已提交
303
	msg_unlock(msq);
304

L
Linus Torvalds 已提交
305
	tmp = msq->q_messages.next;
306 307 308
	while (tmp != &msq->q_messages) {
		struct msg_msg *msg = list_entry(tmp, struct msg_msg, m_list);

L
Linus Torvalds 已提交
309
		tmp = tmp->next;
310
		atomic_dec(&ns->msg_hdrs);
L
Linus Torvalds 已提交
311 312
		free_msg(msg);
	}
313
	atomic_sub(msq->q_cbytes, &ns->msg_bytes);
L
Linus Torvalds 已提交
314 315 316 317
	security_msg_queue_free(msq);
	ipc_rcu_putref(msq);
}

N
Nadia Derbey 已提交
318
/*
N
Nadia Derbey 已提交
319
 * Called with msg_ids.rw_mutex and ipcp locked.
N
Nadia Derbey 已提交
320
 */
N
Nadia Derbey 已提交
321
static inline int msg_security(struct kern_ipc_perm *ipcp, int msgflg)
N
Nadia Derbey 已提交
322
{
N
Nadia Derbey 已提交
323 324 325
	struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm);

	return security_msg_queue_associate(msq, msgflg);
N
Nadia Derbey 已提交
326 327
}

328
asmlinkage long sys_msgget(key_t key, int msgflg)
L
Linus Torvalds 已提交
329
{
K
Kirill Korotaev 已提交
330
	struct ipc_namespace *ns;
N
Nadia Derbey 已提交
331 332
	struct ipc_ops msg_ops;
	struct ipc_params msg_params;
K
Kirill Korotaev 已提交
333 334

	ns = current->nsproxy->ipc_ns;
N
Nadia Derbey 已提交
335

N
Nadia Derbey 已提交
336 337 338 339 340 341
	msg_ops.getnew = newque;
	msg_ops.associate = msg_security;
	msg_ops.more_checks = NULL;

	msg_params.key = key;
	msg_params.flg = msgflg;
342

N
Nadia Derbey 已提交
343
	return ipcget(ns, &msg_ids(ns), &msg_ops, &msg_params);
L
Linus Torvalds 已提交
344 345
}

346 347
static inline unsigned long
copy_msqid_to_user(void __user *buf, struct msqid64_ds *in, int version)
L
Linus Torvalds 已提交
348 349 350
{
	switch(version) {
	case IPC_64:
351
		return copy_to_user(buf, in, sizeof(*in));
L
Linus Torvalds 已提交
352
	case IPC_OLD:
353
	{
L
Linus Torvalds 已提交
354 355
		struct msqid_ds out;

356
		memset(&out, 0, sizeof(out));
L
Linus Torvalds 已提交
357 358 359 360 361 362 363

		ipc64_perm_to_ipc_perm(&in->msg_perm, &out.msg_perm);

		out.msg_stime		= in->msg_stime;
		out.msg_rtime		= in->msg_rtime;
		out.msg_ctime		= in->msg_ctime;

364
		if (in->msg_cbytes > USHRT_MAX)
L
Linus Torvalds 已提交
365 366 367 368 369
			out.msg_cbytes	= USHRT_MAX;
		else
			out.msg_cbytes	= in->msg_cbytes;
		out.msg_lcbytes		= in->msg_cbytes;

370
		if (in->msg_qnum > USHRT_MAX)
L
Linus Torvalds 已提交
371 372 373 374
			out.msg_qnum	= USHRT_MAX;
		else
			out.msg_qnum	= in->msg_qnum;

375
		if (in->msg_qbytes > USHRT_MAX)
L
Linus Torvalds 已提交
376 377 378 379 380 381 382 383
			out.msg_qbytes	= USHRT_MAX;
		else
			out.msg_qbytes	= in->msg_qbytes;
		out.msg_lqbytes		= in->msg_qbytes;

		out.msg_lspid		= in->msg_lspid;
		out.msg_lrpid		= in->msg_lrpid;

384 385
		return copy_to_user(buf, &out, sizeof(out));
	}
L
Linus Torvalds 已提交
386 387 388 389 390 391 392 393 394 395 396 397
	default:
		return -EINVAL;
	}
}

struct msq_setbuf {
	unsigned long	qbytes;
	uid_t		uid;
	gid_t		gid;
	mode_t		mode;
};

398 399
static inline unsigned long
copy_msqid_from_user(struct msq_setbuf *out, void __user *buf, int version)
L
Linus Torvalds 已提交
400 401 402
{
	switch(version) {
	case IPC_64:
403
	{
L
Linus Torvalds 已提交
404 405
		struct msqid64_ds tbuf;

406
		if (copy_from_user(&tbuf, buf, sizeof(tbuf)))
L
Linus Torvalds 已提交
407 408 409 410 411 412 413 414
			return -EFAULT;

		out->qbytes		= tbuf.msg_qbytes;
		out->uid		= tbuf.msg_perm.uid;
		out->gid		= tbuf.msg_perm.gid;
		out->mode		= tbuf.msg_perm.mode;

		return 0;
415
	}
L
Linus Torvalds 已提交
416
	case IPC_OLD:
417
	{
L
Linus Torvalds 已提交
418 419
		struct msqid_ds tbuf_old;

420
		if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old)))
L
Linus Torvalds 已提交
421 422 423 424 425 426
			return -EFAULT;

		out->uid		= tbuf_old.msg_perm.uid;
		out->gid		= tbuf_old.msg_perm.gid;
		out->mode		= tbuf_old.msg_perm.mode;

427
		if (tbuf_old.msg_qbytes == 0)
L
Linus Torvalds 已提交
428 429 430 431 432
			out->qbytes	= tbuf_old.msg_lqbytes;
		else
			out->qbytes	= tbuf_old.msg_qbytes;

		return 0;
433
	}
L
Linus Torvalds 已提交
434 435 436 437 438
	default:
		return -EINVAL;
	}
}

439 440 441 442 443 444 445
/*
 * This function handles some msgctl commands which require the rw_mutex
 * to be held in write mode.
 * NOTE: no locks must be held, the rw_mutex is taken inside this function.
 */
static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd,
		       struct msqid_ds __user *buf, int version)
L
Linus Torvalds 已提交
446 447
{
	struct kern_ipc_perm *ipcp;
448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527
	struct msq_setbuf setbuf;
	struct msg_queue *msq;
	int err;

	if (cmd == IPC_SET) {
		if (copy_msqid_from_user(&setbuf, buf, version))
			return -EFAULT;
	}

	down_write(&msg_ids(ns).rw_mutex);
	msq = msg_lock_check_down(ns, msqid);
	if (IS_ERR(msq)) {
		err = PTR_ERR(msq);
		goto out_up;
	}

	ipcp = &msq->q_perm;

	err = audit_ipc_obj(ipcp);
	if (err)
		goto out_unlock;

	if (cmd == IPC_SET) {
		err = audit_ipc_set_perm(setbuf.qbytes, setbuf.uid, setbuf.gid,
					 setbuf.mode);
		if (err)
			goto out_unlock;
	}

	if (current->euid != ipcp->cuid &&
	    current->euid != ipcp->uid &&
	    !capable(CAP_SYS_ADMIN)) {
		/* We _could_ check for CAP_CHOWN above, but we don't */
		err = -EPERM;
		goto out_unlock;
	}

	err = security_msg_queue_msgctl(msq, cmd);
	if (err)
		goto out_unlock;

	switch (cmd) {
	case IPC_RMID:
		freeque(ns, ipcp);
		goto out_up;
	case IPC_SET:
		if (setbuf.qbytes > ns->msg_ctlmnb &&
		    !capable(CAP_SYS_RESOURCE)) {
			err = -EPERM;
			goto out_unlock;
		}

		msq->q_qbytes = setbuf.qbytes;

		ipcp->uid = setbuf.uid;
		ipcp->gid = setbuf.gid;
		ipcp->mode = (ipcp->mode & ~S_IRWXUGO) |
			     (S_IRWXUGO & setbuf.mode);
		msq->q_ctime = get_seconds();
		/* sleeping receivers might be excluded by
		 * stricter permissions.
		 */
		expunge_all(msq, -EAGAIN);
		/* sleeping senders might be able to send
		 * due to a larger queue size.
		 */
		ss_wakeup(&msq->q_senders, 0);
		break;
	default:
		err = -EINVAL;
	}
out_unlock:
	msg_unlock(msq);
out_up:
	up_write(&msg_ids(ns).rw_mutex);
	return err;
}

asmlinkage long sys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf)
{
528 529
	struct msg_queue *msq;
	int err, version;
K
Kirill Korotaev 已提交
530
	struct ipc_namespace *ns;
531

L
Linus Torvalds 已提交
532 533 534 535
	if (msqid < 0 || cmd < 0)
		return -EINVAL;

	version = ipc_parse_version(&cmd);
K
Kirill Korotaev 已提交
536
	ns = current->nsproxy->ipc_ns;
L
Linus Torvalds 已提交
537 538

	switch (cmd) {
539 540 541
	case IPC_INFO:
	case MSG_INFO:
	{
L
Linus Torvalds 已提交
542 543
		struct msginfo msginfo;
		int max_id;
544

L
Linus Torvalds 已提交
545 546
		if (!buf)
			return -EFAULT;
547 548
		/*
		 * We must not return kernel stack data.
L
Linus Torvalds 已提交
549 550 551 552 553 554 555
		 * due to padding, it's not enough
		 * to set all member fields.
		 */
		err = security_msg_queue_msgctl(NULL, cmd);
		if (err)
			return err;

556
		memset(&msginfo, 0, sizeof(msginfo));
K
Kirill Korotaev 已提交
557 558 559
		msginfo.msgmni = ns->msg_ctlmni;
		msginfo.msgmax = ns->msg_ctlmax;
		msginfo.msgmnb = ns->msg_ctlmnb;
L
Linus Torvalds 已提交
560 561
		msginfo.msgssz = MSGSSZ;
		msginfo.msgseg = MSGSEG;
N
Nadia Derbey 已提交
562
		down_read(&msg_ids(ns).rw_mutex);
L
Linus Torvalds 已提交
563
		if (cmd == MSG_INFO) {
K
Kirill Korotaev 已提交
564
			msginfo.msgpool = msg_ids(ns).in_use;
565 566
			msginfo.msgmap = atomic_read(&ns->msg_hdrs);
			msginfo.msgtql = atomic_read(&ns->msg_bytes);
L
Linus Torvalds 已提交
567 568 569 570 571
		} else {
			msginfo.msgmap = MSGMAP;
			msginfo.msgpool = MSGPOOL;
			msginfo.msgtql = MSGTQL;
		}
N
Nadia Derbey 已提交
572
		max_id = ipc_get_maxid(&msg_ids(ns));
N
Nadia Derbey 已提交
573
		up_read(&msg_ids(ns).rw_mutex);
574
		if (copy_to_user(buf, &msginfo, sizeof(struct msginfo)))
L
Linus Torvalds 已提交
575
			return -EFAULT;
576
		return (max_id < 0) ? 0 : max_id;
L
Linus Torvalds 已提交
577
	}
N
Nadia Derbey 已提交
578
	case MSG_STAT:	/* msqid is an index rather than a msg queue id */
L
Linus Torvalds 已提交
579 580 581 582
	case IPC_STAT:
	{
		struct msqid64_ds tbuf;
		int success_return;
583

L
Linus Torvalds 已提交
584 585 586
		if (!buf)
			return -EFAULT;

587
		if (cmd == MSG_STAT) {
588 589 590
			msq = msg_lock(ns, msqid);
			if (IS_ERR(msq))
				return PTR_ERR(msq);
N
Nadia Derbey 已提交
591
			success_return = msq->q_perm.id;
L
Linus Torvalds 已提交
592
		} else {
593 594 595
			msq = msg_lock_check(ns, msqid);
			if (IS_ERR(msq))
				return PTR_ERR(msq);
L
Linus Torvalds 已提交
596 597 598
			success_return = 0;
		}
		err = -EACCES;
599
		if (ipcperms(&msq->q_perm, S_IRUGO))
L
Linus Torvalds 已提交
600 601 602 603 604 605
			goto out_unlock;

		err = security_msg_queue_msgctl(msq, cmd);
		if (err)
			goto out_unlock;

606 607
		memset(&tbuf, 0, sizeof(tbuf));

L
Linus Torvalds 已提交
608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623
		kernel_to_ipc64_perm(&msq->q_perm, &tbuf.msg_perm);
		tbuf.msg_stime  = msq->q_stime;
		tbuf.msg_rtime  = msq->q_rtime;
		tbuf.msg_ctime  = msq->q_ctime;
		tbuf.msg_cbytes = msq->q_cbytes;
		tbuf.msg_qnum   = msq->q_qnum;
		tbuf.msg_qbytes = msq->q_qbytes;
		tbuf.msg_lspid  = msq->q_lspid;
		tbuf.msg_lrpid  = msq->q_lrpid;
		msg_unlock(msq);
		if (copy_msqid_to_user(buf, &tbuf, version))
			return -EFAULT;
		return success_return;
	}
	case IPC_SET:
	case IPC_RMID:
624 625
		err = msgctl_down(ns, msqid, cmd, buf, version);
		return err;
L
Linus Torvalds 已提交
626 627 628 629 630 631 632 633 634
	default:
		return  -EINVAL;
	}

out_unlock:
	msg_unlock(msq);
	return err;
}

635
static int testmsg(struct msg_msg *msg, long type, int mode)
L
Linus Torvalds 已提交
636 637 638 639 640 641
{
	switch(mode)
	{
		case SEARCH_ANY:
			return 1;
		case SEARCH_LESSEQUAL:
642
			if (msg->m_type <=type)
L
Linus Torvalds 已提交
643 644 645
				return 1;
			break;
		case SEARCH_EQUAL:
646
			if (msg->m_type == type)
L
Linus Torvalds 已提交
647 648 649
				return 1;
			break;
		case SEARCH_NOTEQUAL:
650
			if (msg->m_type != type)
L
Linus Torvalds 已提交
651 652 653 654 655 656
				return 1;
			break;
	}
	return 0;
}

657
static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg)
L
Linus Torvalds 已提交
658
{
659
	struct list_head *tmp;
L
Linus Torvalds 已提交
660 661 662

	tmp = msq->q_receivers.next;
	while (tmp != &msq->q_receivers) {
663 664 665
		struct msg_receiver *msr;

		msr = list_entry(tmp, struct msg_receiver, r_list);
L
Linus Torvalds 已提交
666
		tmp = tmp->next;
667 668 669 670
		if (testmsg(msg, msr->r_msgtype, msr->r_mode) &&
		    !security_msg_queue_msgrcv(msq, msg, msr->r_tsk,
					       msr->r_msgtype, msr->r_mode)) {

L
Linus Torvalds 已提交
671
			list_del(&msr->r_list);
672
			if (msr->r_maxsize < msg->m_ts) {
L
Linus Torvalds 已提交
673 674 675 676 677 678
				msr->r_msg = NULL;
				wake_up_process(msr->r_tsk);
				smp_mb();
				msr->r_msg = ERR_PTR(-E2BIG);
			} else {
				msr->r_msg = NULL;
679
				msq->q_lrpid = task_pid_vnr(msr->r_tsk);
L
Linus Torvalds 已提交
680 681 682 683
				msq->q_rtime = get_seconds();
				wake_up_process(msr->r_tsk);
				smp_mb();
				msr->r_msg = msg;
684

L
Linus Torvalds 已提交
685 686 687 688 689 690 691
				return 1;
			}
		}
	}
	return 0;
}

692 693
long do_msgsnd(int msqid, long mtype, void __user *mtext,
		size_t msgsz, int msgflg)
L
Linus Torvalds 已提交
694 695 696 697
{
	struct msg_queue *msq;
	struct msg_msg *msg;
	int err;
K
Kirill Korotaev 已提交
698 699 700
	struct ipc_namespace *ns;

	ns = current->nsproxy->ipc_ns;
701

K
Kirill Korotaev 已提交
702
	if (msgsz > ns->msg_ctlmax || (long) msgsz < 0 || msqid < 0)
L
Linus Torvalds 已提交
703 704 705 706
		return -EINVAL;
	if (mtype < 1)
		return -EINVAL;

707
	msg = load_msg(mtext, msgsz);
708
	if (IS_ERR(msg))
L
Linus Torvalds 已提交
709 710 711 712 713
		return PTR_ERR(msg);

	msg->m_type = mtype;
	msg->m_ts = msgsz;

714 715 716
	msq = msg_lock_check(ns, msqid);
	if (IS_ERR(msq)) {
		err = PTR_ERR(msq);
L
Linus Torvalds 已提交
717
		goto out_free;
718
	}
L
Linus Torvalds 已提交
719 720 721 722

	for (;;) {
		struct msg_sender s;

723
		err = -EACCES;
L
Linus Torvalds 已提交
724 725 726 727 728 729 730
		if (ipcperms(&msq->q_perm, S_IWUGO))
			goto out_unlock_free;

		err = security_msg_queue_msgsnd(msq, msg, msgflg);
		if (err)
			goto out_unlock_free;

731
		if (msgsz + msq->q_cbytes <= msq->q_qbytes &&
L
Linus Torvalds 已提交
732 733 734 735 736
				1 + msq->q_qnum <= msq->q_qbytes) {
			break;
		}

		/* queue full, wait: */
737 738
		if (msgflg & IPC_NOWAIT) {
			err = -EAGAIN;
L
Linus Torvalds 已提交
739 740 741 742 743 744 745 746 747 748 749 750 751 752
			goto out_unlock_free;
		}
		ss_add(msq, &s);
		ipc_rcu_getref(msq);
		msg_unlock(msq);
		schedule();

		ipc_lock_by_ptr(&msq->q_perm);
		ipc_rcu_putref(msq);
		if (msq->q_perm.deleted) {
			err = -EIDRM;
			goto out_unlock_free;
		}
		ss_del(&s);
753

L
Linus Torvalds 已提交
754
		if (signal_pending(current)) {
755
			err = -ERESTARTNOHAND;
L
Linus Torvalds 已提交
756 757 758 759
			goto out_unlock_free;
		}
	}

760
	msq->q_lspid = task_tgid_vnr(current);
L
Linus Torvalds 已提交
761 762
	msq->q_stime = get_seconds();

763
	if (!pipelined_send(msq, msg)) {
L
Linus Torvalds 已提交
764
		/* noone is waiting for this message, enqueue it */
765
		list_add_tail(&msg->m_list, &msq->q_messages);
L
Linus Torvalds 已提交
766 767
		msq->q_cbytes += msgsz;
		msq->q_qnum++;
768 769
		atomic_add(msgsz, &ns->msg_bytes);
		atomic_inc(&ns->msg_hdrs);
L
Linus Torvalds 已提交
770
	}
771

L
Linus Torvalds 已提交
772 773 774 775 776 777
	err = 0;
	msg = NULL;

out_unlock_free:
	msg_unlock(msq);
out_free:
778
	if (msg != NULL)
L
Linus Torvalds 已提交
779 780 781 782
		free_msg(msg);
	return err;
}

783 784 785 786 787 788 789 790 791 792
asmlinkage long
sys_msgsnd(int msqid, struct msgbuf __user *msgp, size_t msgsz, int msgflg)
{
	long mtype;

	if (get_user(mtype, &msgp->mtype))
		return -EFAULT;
	return do_msgsnd(msqid, mtype, msgp->mtext, msgsz, msgflg);
}

793
static inline int convert_mode(long *msgtyp, int msgflg)
L
Linus Torvalds 已提交
794
{
795
	/*
L
Linus Torvalds 已提交
796 797 798
	 *  find message of correct type.
	 *  msgtyp = 0 => get first.
	 *  msgtyp > 0 => get first message of matching type.
799
	 *  msgtyp < 0 => get message with least type must be < abs(msgtype).
L
Linus Torvalds 已提交
800
	 */
801
	if (*msgtyp == 0)
L
Linus Torvalds 已提交
802
		return SEARCH_ANY;
803 804
	if (*msgtyp < 0) {
		*msgtyp = -*msgtyp;
L
Linus Torvalds 已提交
805 806
		return SEARCH_LESSEQUAL;
	}
807
	if (msgflg & MSG_EXCEPT)
L
Linus Torvalds 已提交
808 809 810 811
		return SEARCH_NOTEQUAL;
	return SEARCH_EQUAL;
}

812 813
long do_msgrcv(int msqid, long *pmtype, void __user *mtext,
		size_t msgsz, long msgtyp, int msgflg)
L
Linus Torvalds 已提交
814 815 816 817
{
	struct msg_queue *msq;
	struct msg_msg *msg;
	int mode;
K
Kirill Korotaev 已提交
818
	struct ipc_namespace *ns;
L
Linus Torvalds 已提交
819 820 821

	if (msqid < 0 || (long) msgsz < 0)
		return -EINVAL;
822
	mode = convert_mode(&msgtyp, msgflg);
K
Kirill Korotaev 已提交
823
	ns = current->nsproxy->ipc_ns;
L
Linus Torvalds 已提交
824

825 826 827
	msq = msg_lock_check(ns, msqid);
	if (IS_ERR(msq))
		return PTR_ERR(msq);
L
Linus Torvalds 已提交
828 829 830

	for (;;) {
		struct msg_receiver msr_d;
831
		struct list_head *tmp;
L
Linus Torvalds 已提交
832 833

		msg = ERR_PTR(-EACCES);
834
		if (ipcperms(&msq->q_perm, S_IRUGO))
L
Linus Torvalds 已提交
835 836 837 838 839 840
			goto out_unlock;

		msg = ERR_PTR(-EAGAIN);
		tmp = msq->q_messages.next;
		while (tmp != &msq->q_messages) {
			struct msg_msg *walk_msg;
841 842 843 844 845 846

			walk_msg = list_entry(tmp, struct msg_msg, m_list);
			if (testmsg(walk_msg, msgtyp, mode) &&
			    !security_msg_queue_msgrcv(msq, walk_msg, current,
						       msgtyp, mode)) {

L
Linus Torvalds 已提交
847
				msg = walk_msg;
848 849 850 851
				if (mode == SEARCH_LESSEQUAL &&
						walk_msg->m_type != 1) {
					msg = walk_msg;
					msgtyp = walk_msg->m_type - 1;
L
Linus Torvalds 已提交
852
				} else {
853
					msg = walk_msg;
L
Linus Torvalds 已提交
854 855 856 857 858
					break;
				}
			}
			tmp = tmp->next;
		}
859 860 861 862 863
		if (!IS_ERR(msg)) {
			/*
			 * Found a suitable message.
			 * Unlink it from the queue.
			 */
L
Linus Torvalds 已提交
864 865 866 867 868 869 870
			if ((msgsz < msg->m_ts) && !(msgflg & MSG_NOERROR)) {
				msg = ERR_PTR(-E2BIG);
				goto out_unlock;
			}
			list_del(&msg->m_list);
			msq->q_qnum--;
			msq->q_rtime = get_seconds();
871
			msq->q_lrpid = task_tgid_vnr(current);
L
Linus Torvalds 已提交
872
			msq->q_cbytes -= msg->m_ts;
873 874
			atomic_sub(msg->m_ts, &ns->msg_bytes);
			atomic_dec(&ns->msg_hdrs);
875
			ss_wakeup(&msq->q_senders, 0);
L
Linus Torvalds 已提交
876 877 878 879 880 881 882 883
			msg_unlock(msq);
			break;
		}
		/* No message waiting. Wait for a message */
		if (msgflg & IPC_NOWAIT) {
			msg = ERR_PTR(-ENOMSG);
			goto out_unlock;
		}
884
		list_add_tail(&msr_d.r_list, &msq->q_receivers);
L
Linus Torvalds 已提交
885 886 887
		msr_d.r_tsk = current;
		msr_d.r_msgtype = msgtyp;
		msr_d.r_mode = mode;
888
		if (msgflg & MSG_NOERROR)
L
Linus Torvalds 已提交
889
			msr_d.r_maxsize = INT_MAX;
890
		else
L
Linus Torvalds 已提交
891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914
			msr_d.r_maxsize = msgsz;
		msr_d.r_msg = ERR_PTR(-EAGAIN);
		current->state = TASK_INTERRUPTIBLE;
		msg_unlock(msq);

		schedule();

		/* Lockless receive, part 1:
		 * Disable preemption.  We don't hold a reference to the queue
		 * and getting a reference would defeat the idea of a lockless
		 * operation, thus the code relies on rcu to guarantee the
		 * existance of msq:
		 * Prior to destruction, expunge_all(-EIRDM) changes r_msg.
		 * Thus if r_msg is -EAGAIN, then the queue not yet destroyed.
		 * rcu_read_lock() prevents preemption between reading r_msg
		 * and the spin_lock() inside ipc_lock_by_ptr().
		 */
		rcu_read_lock();

		/* Lockless receive, part 2:
		 * Wait until pipelined_send or expunge_all are outside of
		 * wake_up_process(). There is a race with exit(), see
		 * ipc/mqueue.c for the details.
		 */
915
		msg = (struct msg_msg*)msr_d.r_msg;
L
Linus Torvalds 已提交
916 917
		while (msg == NULL) {
			cpu_relax();
918
			msg = (struct msg_msg *)msr_d.r_msg;
L
Linus Torvalds 已提交
919 920 921 922 923 924
		}

		/* Lockless receive, part 3:
		 * If there is a message or an error then accept it without
		 * locking.
		 */
925
		if (msg != ERR_PTR(-EAGAIN)) {
L
Linus Torvalds 已提交
926 927 928 929 930 931 932 933 934 935 936 937 938 939
			rcu_read_unlock();
			break;
		}

		/* Lockless receive, part 3:
		 * Acquire the queue spinlock.
		 */
		ipc_lock_by_ptr(&msq->q_perm);
		rcu_read_unlock();

		/* Lockless receive, part 4:
		 * Repeat test after acquiring the spinlock.
		 */
		msg = (struct msg_msg*)msr_d.r_msg;
940
		if (msg != ERR_PTR(-EAGAIN))
L
Linus Torvalds 已提交
941 942 943 944 945 946 947 948 949 950 951
			goto out_unlock;

		list_del(&msr_d.r_list);
		if (signal_pending(current)) {
			msg = ERR_PTR(-ERESTARTNOHAND);
out_unlock:
			msg_unlock(msq);
			break;
		}
	}
	if (IS_ERR(msg))
952
		return PTR_ERR(msg);
L
Linus Torvalds 已提交
953 954

	msgsz = (msgsz > msg->m_ts) ? msg->m_ts : msgsz;
955 956
	*pmtype = msg->m_type;
	if (store_msg(mtext, msg, msgsz))
957
		msgsz = -EFAULT;
958

L
Linus Torvalds 已提交
959
	free_msg(msg);
960

L
Linus Torvalds 已提交
961 962 963
	return msgsz;
}

964 965 966 967 968 969 970 971 972 973 974 975 976 977 978
asmlinkage long sys_msgrcv(int msqid, struct msgbuf __user *msgp, size_t msgsz,
			   long msgtyp, int msgflg)
{
	long err, mtype;

	err =  do_msgrcv(msqid, &mtype, msgp->mtext, msgsz, msgtyp, msgflg);
	if (err < 0)
		goto out;

	if (put_user(mtype, &msgp->mtype))
		err = -EFAULT;
out:
	return err;
}

L
Linus Torvalds 已提交
979
#ifdef CONFIG_PROC_FS
980
static int sysvipc_msg_proc_show(struct seq_file *s, void *it)
L
Linus Torvalds 已提交
981
{
982 983 984
	struct msg_queue *msq = it;

	return seq_printf(s,
985 986
			"%10d %10d  %4o  %10lu %10lu %5u %5u %5u %5u %5u %5u %10lu %10lu %10lu\n",
			msq->q_perm.key,
N
Nadia Derbey 已提交
987
			msq->q_perm.id,
988 989 990 991 992 993 994 995 996 997 998 999
			msq->q_perm.mode,
			msq->q_cbytes,
			msq->q_qnum,
			msq->q_lspid,
			msq->q_lrpid,
			msq->q_perm.uid,
			msq->q_perm.gid,
			msq->q_perm.cuid,
			msq->q_perm.cgid,
			msq->q_stime,
			msq->q_rtime,
			msq->q_ctime);
L
Linus Torvalds 已提交
1000 1001
}
#endif