sem.c 58.3 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0
L
Linus Torvalds 已提交
2 3 4 5 6 7 8 9
/*
 * linux/ipc/sem.c
 * Copyright (C) 1992 Krishna Balasubramanian
 * Copyright (C) 1995 Eric Schenk, Bruno Haible
 *
 * /proc/sysvipc/sem support (c) 1999 Dragos Acostachioaie <dragos@iname.com>
 *
 * SMP-threaded, sysctl's added
10
 * (c) 1999 Manfred Spraul <manfred@colorfullife.com>
L
Linus Torvalds 已提交
11
 * Enforced range limit on SEM_UNDO
A
Alan Cox 已提交
12
 * (c) 2001 Red Hat Inc
L
Linus Torvalds 已提交
13 14
 * Lockless wakeup
 * (c) 2003 Manfred Spraul <manfred@colorfullife.com>
D
Davidlohr Bueso 已提交
15
 * (c) 2016 Davidlohr Bueso <dave@stgolabs.net>
16 17
 * Further wakeup optimizations, documentation
 * (c) 2010 Manfred Spraul <manfred@colorfullife.com>
S
Steve Grubb 已提交
18 19 20
 *
 * support for audit of ipc object properties and permission changes
 * Dustin Kirkland <dustin.kirkland@us.ibm.com>
K
Kirill Korotaev 已提交
21 22 23 24
 *
 * namespaces support
 * OpenVZ, SWsoft Inc.
 * Pavel Emelianov <xemul@openvz.org>
25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51
 *
 * Implementation notes: (May 2010)
 * This file implements System V semaphores.
 *
 * User space visible behavior:
 * - FIFO ordering for semop() operations (just FIFO, not starvation
 *   protection)
 * - multiple semaphore operations that alter the same semaphore in
 *   one semop() are handled.
 * - sem_ctime (time of last semctl()) is updated in the IPC_SET, SETVAL and
 *   SETALL calls.
 * - two Linux specific semctl() commands: SEM_STAT, SEM_INFO.
 * - undo adjustments at process exit are limited to 0..SEMVMX.
 * - namespace are supported.
 * - SEMMSL, SEMMNS, SEMOPM and SEMMNI can be configured at runtine by writing
 *   to /proc/sys/kernel/sem.
 * - statistics about the usage are reported in /proc/sysvipc/sem.
 *
 * Internals:
 * - scalability:
 *   - all global variables are read-mostly.
 *   - semop() calls and semctl(RMID) are synchronized by RCU.
 *   - most operations do write operations (actually: spin_lock calls) to
 *     the per-semaphore array structure.
 *   Thus: Perfect SMP scaling between independent semaphore arrays.
 *         If multiple semaphores in one array are used, then cache line
 *         trashing on the semaphore array spinlock will limit the scaling.
52
 * - semncnt and semzcnt are calculated on demand in count_semcnt()
53 54 55 56 57
 * - the task that performs a successful semop() scans the list of all
 *   sleeping tasks and completes any pending operations that can be fulfilled.
 *   Semaphores are actively given to waiting tasks (necessary for FIFO).
 *   (see update_queue())
 * - To improve the scalability, the actual wake-up calls are performed after
D
Davidlohr Bueso 已提交
58
 *   dropping all locks. (see wake_up_sem_queue_prepare())
59 60 61 62 63 64 65 66 67 68 69 70
 * - All work is done by the waker, the woken up task does not have to do
 *   anything - not even acquiring a lock or dropping a refcount.
 * - A woken up task may not even touch the semaphore array anymore, it may
 *   have been destroyed already by a semctl(RMID).
 * - UNDO values are stored in an array (one per process and per
 *   semaphore array, lazily allocated). For backwards compatibility, multiple
 *   modes for the UNDO variables are supported (per process, per thread)
 *   (see copy_semundo, CLONE_SYSVSEM)
 * - There are two lists of the pending operations: a per-array list
 *   and per-semaphore list (stored in the array). This allows to achieve FIFO
 *   ordering without always scanning all pending operations.
 *   The worst-case behavior is nevertheless O(N^2) for N wakeups.
L
Linus Torvalds 已提交
71 72 73 74 75 76 77 78 79 80
 */

#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/init.h>
#include <linux/proc_fs.h>
#include <linux/time.h>
#include <linux/security.h>
#include <linux/syscalls.h>
#include <linux/audit.h>
81
#include <linux/capability.h>
82
#include <linux/seq_file.h>
N
Nadia Derbey 已提交
83
#include <linux/rwsem.h>
K
Kirill Korotaev 已提交
84
#include <linux/nsproxy.h>
85
#include <linux/ipc_namespace.h>
86
#include <linux/sched/wake_q.h>
I
Ingo Molnar 已提交
87

P
Paul McQuade 已提交
88
#include <linux/uaccess.h>
L
Linus Torvalds 已提交
89 90
#include "util.h"

91 92 93 94 95 96 97 98 99

/* One queue for each sleeping process in the system. */
struct sem_queue {
	struct list_head	list;	 /* queue of pending operations */
	struct task_struct	*sleeper; /* this process */
	struct sem_undo		*undo;	 /* undo structure */
	int			pid;	 /* process id of requesting process */
	int			status;	 /* completion status of operation */
	struct sembuf		*sops;	 /* array of pending operations */
100
	struct sembuf		*blocking; /* the operation that blocked */
101
	int			nsops;	 /* number of operations */
102 103
	bool			alter;	 /* does *sops alter the array? */
	bool                    dupsop;	 /* sops on more than one sem_num */
104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
};

/* Each task has a list of undo requests. They are executed automatically
 * when the process exits.
 */
struct sem_undo {
	struct list_head	list_proc;	/* per-process list: *
						 * all undos from one process
						 * rcu protected */
	struct rcu_head		rcu;		/* rcu struct for sem_undo */
	struct sem_undo_list	*ulp;		/* back ptr to sem_undo_list */
	struct list_head	list_id;	/* per semaphore array list:
						 * all undos for one array */
	int			semid;		/* semaphore set identifier */
	short			*semadj;	/* array of adjustments */
						/* one per semaphore */
};

/* sem_undo_list controls shared access to the list of sem_undo structures
 * that may be shared among all a CLONE_SYSVSEM task group.
 */
struct sem_undo_list {
126
	refcount_t		refcnt;
127 128 129 130 131
	spinlock_t		lock;
	struct list_head	list_proc;
};


132
#define sem_ids(ns)	((ns)->ids[IPC_SEM_IDS])
K
Kirill Korotaev 已提交
133

N
Nadia Derbey 已提交
134
static int newary(struct ipc_namespace *, struct ipc_params *);
135
static void freeary(struct ipc_namespace *, struct kern_ipc_perm *);
L
Linus Torvalds 已提交
136
#ifdef CONFIG_PROC_FS
137
static int sysvipc_sem_proc_show(struct seq_file *s, void *it);
L
Linus Torvalds 已提交
138 139 140 141 142
#endif

#define SEMMSL_FAST	256 /* 512 bytes on stack */
#define SEMOPM_FAST	64  /* ~ 372 bytes on stack */

M
Manfred Spraul 已提交
143 144 145 146 147 148 149
/*
 * Switching from the mode suitable for simple ops
 * to the mode for complex ops is costly. Therefore:
 * use some hysteresis
 */
#define USE_GLOBAL_LOCK_HYSTERESIS	10

L
Linus Torvalds 已提交
150
/*
151
 * Locking:
152
 * a) global sem_lock() for read/write
L
Linus Torvalds 已提交
153
 *	sem_undo.id_next,
154
 *	sem_array.complex_count,
155 156
 *	sem_array.pending{_alter,_const},
 *	sem_array.sem_undo
P
Paul McQuade 已提交
157
 *
158
 * b) global or semaphore sem_lock() for read/write:
159
 *	sem_array.sems[i].pending_{const,alter}:
160 161 162 163 164
 *
 * c) special:
 *	sem_undo_list.list_proc:
 *	* undo_list->lock for write
 *	* rcu for read
M
Manfred Spraul 已提交
165 166 167 168 169 170 171 172 173 174 175 176 177 178 179
 *	use_global_lock:
 *	* global sem_lock() for write
 *	* either local or global sem_lock() for read.
 *
 * Memory ordering:
 * Most ordering is enforced by using spin_lock() and spin_unlock().
 * The special case is use_global_lock:
 * Setting it from non-zero to 0 is a RELEASE, this is ensured by
 * using smp_store_release().
 * Testing if it is non-zero is an ACQUIRE, this is ensured by using
 * smp_load_acquire().
 * Setting it from 0 to non-zero must be ordered with regards to
 * this smp_load_acquire(), this is guaranteed because the smp_load_acquire()
 * is inside a spin_lock() and after a write from 0 to non-zero a
 * spin_lock()+spin_unlock() is done.
L
Linus Torvalds 已提交
180 181
 */

K
Kirill Korotaev 已提交
182 183 184 185 186
#define sc_semmsl	sem_ctls[0]
#define sc_semmns	sem_ctls[1]
#define sc_semopm	sem_ctls[2]
#define sc_semmni	sem_ctls[3]

187
int sem_init_ns(struct ipc_namespace *ns)
K
Kirill Korotaev 已提交
188 189 190 191 192 193
{
	ns->sc_semmsl = SEMMSL;
	ns->sc_semmns = SEMMNS;
	ns->sc_semopm = SEMOPM;
	ns->sc_semmni = SEMMNI;
	ns->used_sems = 0;
194
	return ipc_init_ids(&ns->ids[IPC_SEM_IDS]);
K
Kirill Korotaev 已提交
195 196
}

197
#ifdef CONFIG_IPC_NS
K
Kirill Korotaev 已提交
198 199
void sem_exit_ns(struct ipc_namespace *ns)
{
200
	free_ipcs(ns, &sem_ids(ns), freeary);
S
Serge E. Hallyn 已提交
201
	idr_destroy(&ns->ids[IPC_SEM_IDS].ipcs_idr);
202
	rhashtable_destroy(&ns->ids[IPC_SEM_IDS].key_ht);
K
Kirill Korotaev 已提交
203
}
204
#endif
L
Linus Torvalds 已提交
205

206
int __init sem_init(void)
L
Linus Torvalds 已提交
207
{
208 209
	const int err = sem_init_ns(&init_ipc_ns);

210 211
	ipc_init_proc_interface("sysvipc/sem",
				"       key      semid perms      nsems   uid   gid  cuid  cgid      otime      ctime\n",
K
Kirill Korotaev 已提交
212
				IPC_SEM_IDS, sysvipc_sem_proc_show);
213
	return err;
L
Linus Torvalds 已提交
214 215
}

216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236
/**
 * unmerge_queues - unmerge queues, if possible.
 * @sma: semaphore array
 *
 * The function unmerges the wait queues if complex_count is 0.
 * It must be called prior to dropping the global semaphore array lock.
 */
static void unmerge_queues(struct sem_array *sma)
{
	struct sem_queue *q, *tq;

	/* complex operations still around? */
	if (sma->complex_count)
		return;
	/*
	 * We will switch back to simple mode.
	 * Move all pending operation back into the per-semaphore
	 * queues.
	 */
	list_for_each_entry_safe(q, tq, &sma->pending_alter, list) {
		struct sem *curr;
237
		curr = &sma->sems[q->sops[0].sem_num];
238 239 240 241 242 243 244

		list_add_tail(&q->list, &curr->pending_alter);
	}
	INIT_LIST_HEAD(&sma->pending_alter);
}

/**
D
Davidlohr Bueso 已提交
245
 * merge_queues - merge single semop queues into global queue
246 247 248 249 250 251 252 253 254 255 256
 * @sma: semaphore array
 *
 * This function merges all per-semaphore queues into the global queue.
 * It is necessary to achieve FIFO ordering for the pending single-sop
 * operations when a multi-semop operation must sleep.
 * Only the alter operations must be moved, the const operations can stay.
 */
static void merge_queues(struct sem_array *sma)
{
	int i;
	for (i = 0; i < sma->sem_nsems; i++) {
257
		struct sem *sem = &sma->sems[i];
258 259 260 261 262

		list_splice_init(&sem->pending_alter, &sma->pending_alter);
	}
}

D
Davidlohr Bueso 已提交
263 264
static void sem_rcu_free(struct rcu_head *head)
{
265 266
	struct kern_ipc_perm *p = container_of(head, struct kern_ipc_perm, rcu);
	struct sem_array *sma = container_of(p, struct sem_array, sem_perm);
D
Davidlohr Bueso 已提交
267

268
	security_sem_free(&sma->sem_perm);
K
Kees Cook 已提交
269
	kvfree(sma);
D
Davidlohr Bueso 已提交
270 271
}

272
/*
273
 * Enter the mode suitable for non-simple operations:
274 275
 * Caller must own sem_perm.lock.
 */
276
static void complexmode_enter(struct sem_array *sma)
277 278 279 280
{
	int i;
	struct sem *sem;

M
Manfred Spraul 已提交
281 282 283 284 285 286 287
	if (sma->use_global_lock > 0)  {
		/*
		 * We are already in global lock mode.
		 * Nothing to do, just reset the
		 * counter until we return to simple mode.
		 */
		sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS;
M
Manfred Spraul 已提交
288 289
		return;
	}
M
Manfred Spraul 已提交
290
	sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS;
291

292
	for (i = 0; i < sma->sem_nsems; i++) {
293
		sem = &sma->sems[i];
294 295
		spin_lock(&sem->lock);
		spin_unlock(&sem->lock);
296
	}
297 298 299 300 301 302 303 304 305 306 307 308 309 310
}

/*
 * Try to leave the mode that disallows simple operations:
 * Caller must own sem_perm.lock.
 */
static void complexmode_tryleave(struct sem_array *sma)
{
	if (sma->complex_count)  {
		/* Complex ops are sleeping.
		 * We must stay in complex mode
		 */
		return;
	}
M
Manfred Spraul 已提交
311 312 313 314 315 316 317 318 319 320 321
	if (sma->use_global_lock == 1) {
		/*
		 * Immediately after setting use_global_lock to 0,
		 * a simple op can start. Thus: all memory writes
		 * performed by the current operation must be visible
		 * before we set use_global_lock to 0.
		 */
		smp_store_release(&sma->use_global_lock, 0);
	} else {
		sma->use_global_lock--;
	}
322 323
}

324
#define SEM_GLOBAL_LOCK	(-1)
325 326 327 328 329 330 331 332 333 334
/*
 * If the request contains only one semaphore operation, and there are
 * no complex transactions pending, lock only the semaphore involved.
 * Otherwise, lock the entire semaphore array, since we either have
 * multiple semaphores in our own semops, or we need to look at
 * semaphores from other pending complex operations.
 */
static inline int sem_lock(struct sem_array *sma, struct sembuf *sops,
			      int nsops)
{
335
	struct sem *sem;
336

337 338 339
	if (nsops != 1) {
		/* Complex operation - acquire a full lock */
		ipc_lock_object(&sma->sem_perm);
340

341 342 343
		/* Prevent parallel simple ops */
		complexmode_enter(sma);
		return SEM_GLOBAL_LOCK;
344 345 346 347
	}

	/*
	 * Only one semaphore affected - try to optimize locking.
348 349 350
	 * Optimized locking is possible if no complex operation
	 * is either enqueued or processed right now.
	 *
M
Manfred Spraul 已提交
351
	 * Both facts are tracked by use_global_mode.
352
	 */
353
	sem = &sma->sems[sops->sem_num];
354

355
	/*
M
Manfred Spraul 已提交
356
	 * Initial check for use_global_lock. Just an optimization,
357 358
	 * no locking, no memory barrier.
	 */
M
Manfred Spraul 已提交
359
	if (!sma->use_global_lock) {
360
		/*
361 362
		 * It appears that no complex operation is around.
		 * Acquire the per-semaphore lock.
363
		 */
364 365
		spin_lock(&sem->lock);

M
Manfred Spraul 已提交
366 367
		/* pairs with smp_store_release() */
		if (!smp_load_acquire(&sma->use_global_lock)) {
368 369
			/* fast path successful! */
			return sops->sem_num;
370
		}
371 372 373 374 375
		spin_unlock(&sem->lock);
	}

	/* slow path: acquire the full lock */
	ipc_lock_object(&sma->sem_perm);
376

M
Manfred Spraul 已提交
377 378 379 380 381 382 383 384 385
	if (sma->use_global_lock == 0) {
		/*
		 * The use_global_lock mode ended while we waited for
		 * sma->sem_perm.lock. Thus we must switch to locking
		 * with sem->lock.
		 * Unlike in the fast path, there is no need to recheck
		 * sma->use_global_lock after we have acquired sem->lock:
		 * We own sma->sem_perm.lock, thus use_global_lock cannot
		 * change.
386 387
		 */
		spin_lock(&sem->lock);
M
Manfred Spraul 已提交
388

389 390
		ipc_unlock_object(&sma->sem_perm);
		return sops->sem_num;
391
	} else {
M
Manfred Spraul 已提交
392 393 394 395
		/*
		 * Not a false alarm, thus continue to use the global lock
		 * mode. No need for complexmode_enter(), this was done by
		 * the caller that has set use_global_mode to non-zero.
396
		 */
397
		return SEM_GLOBAL_LOCK;
398 399 400 401 402
	}
}

static inline void sem_unlock(struct sem_array *sma, int locknum)
{
403
	if (locknum == SEM_GLOBAL_LOCK) {
404
		unmerge_queues(sma);
405
		complexmode_tryleave(sma);
406
		ipc_unlock_object(&sma->sem_perm);
407
	} else {
408
		struct sem *sem = &sma->sems[locknum];
409 410 411 412
		spin_unlock(&sem->lock);
	}
}

N
Nadia Derbey 已提交
413
/*
D
Davidlohr Bueso 已提交
414
 * sem_lock_(check_) routines are called in the paths where the rwsem
N
Nadia Derbey 已提交
415
 * is not held.
416 417
 *
 * The caller holds the RCU read lock.
N
Nadia Derbey 已提交
418
 */
419 420
static inline struct sem_array *sem_obtain_object(struct ipc_namespace *ns, int id)
{
D
Davidlohr Bueso 已提交
421
	struct kern_ipc_perm *ipcp = ipc_obtain_object_idr(&sem_ids(ns), id);
422 423 424 425 426 427 428 429 430 431 432 433 434 435

	if (IS_ERR(ipcp))
		return ERR_CAST(ipcp);

	return container_of(ipcp, struct sem_array, sem_perm);
}

static inline struct sem_array *sem_obtain_object_check(struct ipc_namespace *ns,
							int id)
{
	struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&sem_ids(ns), id);

	if (IS_ERR(ipcp))
		return ERR_CAST(ipcp);
436

N
Nadia Derbey 已提交
437
	return container_of(ipcp, struct sem_array, sem_perm);
438 439
}

440 441
static inline void sem_lock_and_putref(struct sem_array *sma)
{
442
	sem_lock(sma, NULL, -1);
443
	ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
444 445
}

N
Nadia Derbey 已提交
446 447 448 449 450
static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s)
{
	ipc_rmid(&sem_ids(ns), &s->sem_perm);
}

K
Kees Cook 已提交
451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468
static struct sem_array *sem_alloc(size_t nsems)
{
	struct sem_array *sma;
	size_t size;

	if (nsems > (INT_MAX - sizeof(*sma)) / sizeof(sma->sems[0]))
		return NULL;

	size = sizeof(*sma) + nsems * sizeof(sma->sems[0]);
	sma = kvmalloc(size, GFP_KERNEL);
	if (unlikely(!sma))
		return NULL;

	memset(sma, 0, size);

	return sma;
}

N
Nadia Derbey 已提交
469 470 471 472 473
/**
 * newary - Create a new semaphore set
 * @ns: namespace
 * @params: ptr to the structure that contains key, semflg and nsems
 *
D
Davidlohr Bueso 已提交
474
 * Called with sem_ids.rwsem held (as a writer)
N
Nadia Derbey 已提交
475
 */
N
Nadia Derbey 已提交
476
static int newary(struct ipc_namespace *ns, struct ipc_params *params)
L
Linus Torvalds 已提交
477 478 479
{
	int retval;
	struct sem_array *sma;
N
Nadia Derbey 已提交
480 481 482
	key_t key = params->key;
	int nsems = params->u.nsems;
	int semflg = params->flg;
483
	int i;
L
Linus Torvalds 已提交
484 485 486

	if (!nsems)
		return -EINVAL;
K
Kirill Korotaev 已提交
487
	if (ns->used_sems + nsems > ns->sc_semmns)
L
Linus Torvalds 已提交
488 489
		return -ENOSPC;

K
Kees Cook 已提交
490
	sma = sem_alloc(nsems);
491
	if (!sma)
L
Linus Torvalds 已提交
492
		return -ENOMEM;
493

L
Linus Torvalds 已提交
494 495 496 497
	sma->sem_perm.mode = (semflg & S_IRWXUGO);
	sma->sem_perm.key = key;

	sma->sem_perm.security = NULL;
498
	retval = security_sem_alloc(&sma->sem_perm);
L
Linus Torvalds 已提交
499
	if (retval) {
K
Kees Cook 已提交
500
		kvfree(sma);
L
Linus Torvalds 已提交
501 502 503
		return retval;
	}

504
	for (i = 0; i < nsems; i++) {
505 506 507
		INIT_LIST_HEAD(&sma->sems[i].pending_alter);
		INIT_LIST_HEAD(&sma->sems[i].pending_const);
		spin_lock_init(&sma->sems[i].lock);
508
	}
509 510

	sma->complex_count = 0;
M
Manfred Spraul 已提交
511
	sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS;
512 513
	INIT_LIST_HEAD(&sma->pending_alter);
	INIT_LIST_HEAD(&sma->pending_const);
514
	INIT_LIST_HEAD(&sma->list_id);
L
Linus Torvalds 已提交
515
	sma->sem_nsems = nsems;
516
	sma->sem_ctime = ktime_get_real_seconds();
517

518
	/* ipc_addid() locks sma upon success. */
519 520 521 522
	retval = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni);
	if (retval < 0) {
		call_rcu(&sma->sem_perm.rcu, sem_rcu_free);
		return retval;
523 524 525
	}
	ns->used_sems += nsems;

526
	sem_unlock(sma, -1);
527
	rcu_read_unlock();
L
Linus Torvalds 已提交
528

N
Nadia Derbey 已提交
529
	return sma->sem_perm.id;
L
Linus Torvalds 已提交
530 531
}

N
Nadia Derbey 已提交
532

N
Nadia Derbey 已提交
533
/*
D
Davidlohr Bueso 已提交
534
 * Called with sem_ids.rwsem and ipcp locked.
N
Nadia Derbey 已提交
535
 */
N
Nadia Derbey 已提交
536
static inline int sem_security(struct kern_ipc_perm *ipcp, int semflg)
N
Nadia Derbey 已提交
537
{
538
	return security_sem_associate(ipcp, semflg);
N
Nadia Derbey 已提交
539 540
}

N
Nadia Derbey 已提交
541
/*
D
Davidlohr Bueso 已提交
542
 * Called with sem_ids.rwsem and ipcp locked.
N
Nadia Derbey 已提交
543
 */
N
Nadia Derbey 已提交
544 545
static inline int sem_more_checks(struct kern_ipc_perm *ipcp,
				struct ipc_params *params)
N
Nadia Derbey 已提交
546
{
N
Nadia Derbey 已提交
547 548 549 550
	struct sem_array *sma;

	sma = container_of(ipcp, struct sem_array, sem_perm);
	if (params->u.nsems > sma->sem_nsems)
N
Nadia Derbey 已提交
551 552 553 554 555
		return -EINVAL;

	return 0;
}

556
SYSCALL_DEFINE3(semget, key_t, key, int, nsems, int, semflg)
L
Linus Torvalds 已提交
557
{
K
Kirill Korotaev 已提交
558
	struct ipc_namespace *ns;
M
Mathias Krause 已提交
559 560 561 562 563
	static const struct ipc_ops sem_ops = {
		.getnew = newary,
		.associate = sem_security,
		.more_checks = sem_more_checks,
	};
N
Nadia Derbey 已提交
564
	struct ipc_params sem_params;
K
Kirill Korotaev 已提交
565 566

	ns = current->nsproxy->ipc_ns;
L
Linus Torvalds 已提交
567

K
Kirill Korotaev 已提交
568
	if (nsems < 0 || nsems > ns->sc_semmsl)
L
Linus Torvalds 已提交
569
		return -EINVAL;
N
Nadia Derbey 已提交
570

N
Nadia Derbey 已提交
571 572 573
	sem_params.key = key;
	sem_params.flg = semflg;
	sem_params.u.nsems = nsems;
L
Linus Torvalds 已提交
574

N
Nadia Derbey 已提交
575
	return ipcget(ns, &sem_ids(ns), &sem_ops, &sem_params);
L
Linus Torvalds 已提交
576 577
}

578
/**
579 580
 * perform_atomic_semop[_slow] - Attempt to perform semaphore
 *                               operations on a given array.
581
 * @sma: semaphore array
582
 * @q: struct sem_queue that describes the operation
583
 *
584 585 586 587 588 589 590
 * Caller blocking are as follows, based the value
 * indicated by the semaphore operation (sem_op):
 *
 *  (1) >0 never blocks.
 *  (2)  0 (wait-for-zero operation): semval is non-zero.
 *  (3) <0 attempting to decrement semval to a value smaller than zero.
 *
591 592
 * Returns 0 if the operation was possible.
 * Returns 1 if the operation is impossible, the caller must sleep.
593
 * Returns <0 for error codes.
L
Linus Torvalds 已提交
594
 */
595
static int perform_atomic_semop_slow(struct sem_array *sma, struct sem_queue *q)
L
Linus Torvalds 已提交
596
{
597
	int result, sem_op, nsops, pid;
L
Linus Torvalds 已提交
598
	struct sembuf *sop;
M
Manfred Spraul 已提交
599
	struct sem *curr;
600 601 602 603 604 605
	struct sembuf *sops;
	struct sem_undo *un;

	sops = q->sops;
	nsops = q->nsops;
	un = q->undo;
L
Linus Torvalds 已提交
606 607

	for (sop = sops; sop < sops + nsops; sop++) {
608
		curr = &sma->sems[sop->sem_num];
L
Linus Torvalds 已提交
609 610
		sem_op = sop->sem_op;
		result = curr->semval;
611

L
Linus Torvalds 已提交
612 613 614 615 616 617 618 619
		if (!sem_op && result)
			goto would_block;

		result += sem_op;
		if (result < 0)
			goto would_block;
		if (result > SEMVMX)
			goto out_of_range;
620

L
Linus Torvalds 已提交
621 622
		if (sop->sem_flg & SEM_UNDO) {
			int undo = un->semadj[sop->sem_num] - sem_op;
623
			/* Exceeding the undo range is an error. */
L
Linus Torvalds 已提交
624 625
			if (undo < (-SEMAEM - 1) || undo > SEMAEM)
				goto out_of_range;
626
			un->semadj[sop->sem_num] = undo;
L
Linus Torvalds 已提交
627
		}
628

L
Linus Torvalds 已提交
629 630 631 632
		curr->semval = result;
	}

	sop--;
633
	pid = q->pid;
L
Linus Torvalds 已提交
634
	while (sop >= sops) {
635
		sma->sems[sop->sem_num].sempid = pid;
L
Linus Torvalds 已提交
636 637
		sop--;
	}
638

L
Linus Torvalds 已提交
639 640 641 642 643 644 645
	return 0;

out_of_range:
	result = -ERANGE;
	goto undo;

would_block:
646 647
	q->blocking = sop;

L
Linus Torvalds 已提交
648 649 650 651 652 653 654 655
	if (sop->sem_flg & IPC_NOWAIT)
		result = -EAGAIN;
	else
		result = 1;

undo:
	sop--;
	while (sop >= sops) {
656
		sem_op = sop->sem_op;
657
		sma->sems[sop->sem_num].semval -= sem_op;
658 659
		if (sop->sem_flg & SEM_UNDO)
			un->semadj[sop->sem_num] += sem_op;
L
Linus Torvalds 已提交
660 661 662 663 664 665
		sop--;
	}

	return result;
}

666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687
static int perform_atomic_semop(struct sem_array *sma, struct sem_queue *q)
{
	int result, sem_op, nsops;
	struct sembuf *sop;
	struct sem *curr;
	struct sembuf *sops;
	struct sem_undo *un;

	sops = q->sops;
	nsops = q->nsops;
	un = q->undo;

	if (unlikely(q->dupsop))
		return perform_atomic_semop_slow(sma, q);

	/*
	 * We scan the semaphore set twice, first to ensure that the entire
	 * operation can succeed, therefore avoiding any pointless writes
	 * to shared memory and having to undo such changes in order to block
	 * until the operations can go through.
	 */
	for (sop = sops; sop < sops + nsops; sop++) {
688
		curr = &sma->sems[sop->sem_num];
689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711
		sem_op = sop->sem_op;
		result = curr->semval;

		if (!sem_op && result)
			goto would_block; /* wait-for-zero */

		result += sem_op;
		if (result < 0)
			goto would_block;

		if (result > SEMVMX)
			return -ERANGE;

		if (sop->sem_flg & SEM_UNDO) {
			int undo = un->semadj[sop->sem_num] - sem_op;

			/* Exceeding the undo range is an error. */
			if (undo < (-SEMAEM - 1) || undo > SEMAEM)
				return -ERANGE;
		}
	}

	for (sop = sops; sop < sops + nsops; sop++) {
712
		curr = &sma->sems[sop->sem_num];
713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731
		sem_op = sop->sem_op;
		result = curr->semval;

		if (sop->sem_flg & SEM_UNDO) {
			int undo = un->semadj[sop->sem_num] - sem_op;

			un->semadj[sop->sem_num] = undo;
		}
		curr->semval += sem_op;
		curr->sempid = q->pid;
	}

	return 0;

would_block:
	q->blocking = sop;
	return sop->sem_flg & IPC_NOWAIT ? -EAGAIN : 1;
}

D
Davidlohr Bueso 已提交
732 733
static inline void wake_up_sem_queue_prepare(struct sem_queue *q, int error,
					     struct wake_q_head *wake_q)
734
{
D
Davidlohr Bueso 已提交
735 736 737 738 739 740 741 742 743
	wake_q_add(wake_q, q->sleeper);
	/*
	 * Rely on the above implicit barrier, such that we can
	 * ensure that we hold reference to the task before setting
	 * q->status. Otherwise we could race with do_exit if the
	 * task is awoken by an external event before calling
	 * wake_up_process().
	 */
	WRITE_ONCE(q->status, error);
N
Nick Piggin 已提交
744 745
}

746 747 748
static void unlink_queue(struct sem_array *sma, struct sem_queue *q)
{
	list_del(&q->list);
749
	if (q->nsops > 1)
750 751 752
		sma->complex_count--;
}

753 754 755 756 757 758 759
/** check_restart(sma, q)
 * @sma: semaphore array
 * @q: the operation that just completed
 *
 * update_queue is O(N^2) when it restarts scanning the whole queue of
 * waiting operations. Therefore this function checks if the restart is
 * really necessary. It is called after a previously waiting operation
760 761
 * modified the array.
 * Note that wait-for-zero operations are handled without restart.
762
 */
763
static inline int check_restart(struct sem_array *sma, struct sem_queue *q)
764
{
765 766
	/* pending complex alter operations are too difficult to analyse */
	if (!list_empty(&sma->pending_alter))
767 768 769 770 771 772
		return 1;

	/* we were a sleeping complex operation. Too difficult */
	if (q->nsops > 1)
		return 1;

773 774 775 776 777 778 779 780 781 782 783 784 785
	/* It is impossible that someone waits for the new value:
	 * - complex operations always restart.
	 * - wait-for-zero are handled seperately.
	 * - q is a previously sleeping simple operation that
	 *   altered the array. It must be a decrement, because
	 *   simple increments never sleep.
	 * - If there are older (higher priority) decrements
	 *   in the queue, then they have observed the original
	 *   semval value and couldn't proceed. The operation
	 *   decremented to value - thus they won't proceed either.
	 */
	return 0;
}
786

787
/**
D
Davidlohr Bueso 已提交
788
 * wake_const_ops - wake up non-alter tasks
789 790
 * @sma: semaphore array.
 * @semnum: semaphore that was modified.
D
Davidlohr Bueso 已提交
791
 * @wake_q: lockless wake-queue head.
792 793 794 795 796
 *
 * wake_const_ops must be called after a semaphore in a semaphore array
 * was set to 0. If complex const operations are pending, wake_const_ops must
 * be called with semnum = -1, as well as with the number of each modified
 * semaphore.
D
Davidlohr Bueso 已提交
797
 * The tasks that must be woken up are added to @wake_q. The return code
798 799 800 801
 * is stored in q->pid.
 * The function returns 1 if at least one operation was completed successfully.
 */
static int wake_const_ops(struct sem_array *sma, int semnum,
D
Davidlohr Bueso 已提交
802
			  struct wake_q_head *wake_q)
803
{
804
	struct sem_queue *q, *tmp;
805 806 807 808 809 810
	struct list_head *pending_list;
	int semop_completed = 0;

	if (semnum == -1)
		pending_list = &sma->pending_const;
	else
811
		pending_list = &sma->sems[semnum].pending_const;
812

813 814
	list_for_each_entry_safe(q, tmp, pending_list, list) {
		int error = perform_atomic_semop(sma, q);
815

816 817 818 819
		if (error > 0)
			continue;
		/* operation completed, remove from queue & wakeup */
		unlink_queue(sma, q);
820

821 822 823
		wake_up_sem_queue_prepare(q, error, wake_q);
		if (error == 0)
			semop_completed = 1;
824
	}
825

826 827 828 829
	return semop_completed;
}

/**
D
Davidlohr Bueso 已提交
830
 * do_smart_wakeup_zero - wakeup all wait for zero tasks
831 832 833
 * @sma: semaphore array
 * @sops: operations that were performed
 * @nsops: number of operations
D
Davidlohr Bueso 已提交
834
 * @wake_q: lockless wake-queue head
835
 *
D
Davidlohr Bueso 已提交
836 837
 * Checks all required queue for wait-for-zero operations, based
 * on the actual changes that were performed on the semaphore array.
838 839 840
 * The function returns 1 if at least one operation was completed successfully.
 */
static int do_smart_wakeup_zero(struct sem_array *sma, struct sembuf *sops,
D
Davidlohr Bueso 已提交
841
				int nsops, struct wake_q_head *wake_q)
842 843 844 845 846 847 848 849 850 851
{
	int i;
	int semop_completed = 0;
	int got_zero = 0;

	/* first: the per-semaphore queues, if known */
	if (sops) {
		for (i = 0; i < nsops; i++) {
			int num = sops[i].sem_num;

852
			if (sma->sems[num].semval == 0) {
853
				got_zero = 1;
D
Davidlohr Bueso 已提交
854
				semop_completed |= wake_const_ops(sma, num, wake_q);
855 856 857 858 859 860
			}
		}
	} else {
		/*
		 * No sops means modified semaphores not known.
		 * Assume all were changed.
861
		 */
862
		for (i = 0; i < sma->sem_nsems; i++) {
863
			if (sma->sems[i].semval == 0) {
864
				got_zero = 1;
D
Davidlohr Bueso 已提交
865
				semop_completed |= wake_const_ops(sma, i, wake_q);
866 867
			}
		}
868 869
	}
	/*
870 871
	 * If one of the modified semaphores got 0,
	 * then check the global queue, too.
872
	 */
873
	if (got_zero)
D
Davidlohr Bueso 已提交
874
		semop_completed |= wake_const_ops(sma, -1, wake_q);
875

876
	return semop_completed;
877 878
}

879 880

/**
D
Davidlohr Bueso 已提交
881
 * update_queue - look for tasks that can be completed.
882 883
 * @sma: semaphore array.
 * @semnum: semaphore that was modified.
D
Davidlohr Bueso 已提交
884
 * @wake_q: lockless wake-queue head.
885 886
 *
 * update_queue must be called after a semaphore in a semaphore array
887 888 889
 * was modified. If multiple semaphores were modified, update_queue must
 * be called with semnum = -1, as well as with the number of each modified
 * semaphore.
D
Davidlohr Bueso 已提交
890
 * The tasks that must be woken up are added to @wake_q. The return code
891
 * is stored in q->pid.
892 893
 * The function internally checks if const operations can now succeed.
 *
894
 * The function return 1 if at least one semop was completed successfully.
L
Linus Torvalds 已提交
895
 */
D
Davidlohr Bueso 已提交
896
static int update_queue(struct sem_array *sma, int semnum, struct wake_q_head *wake_q)
L
Linus Torvalds 已提交
897
{
898
	struct sem_queue *q, *tmp;
899
	struct list_head *pending_list;
900
	int semop_completed = 0;
901

902
	if (semnum == -1)
903
		pending_list = &sma->pending_alter;
904
	else
905
		pending_list = &sma->sems[semnum].pending_alter;
N
Nick Piggin 已提交
906 907

again:
908
	list_for_each_entry_safe(q, tmp, pending_list, list) {
909
		int error, restart;
910

911 912
		/* If we are scanning the single sop, per-semaphore list of
		 * one semaphore and that semaphore is 0, then it is not
913
		 * necessary to scan further: simple increments
914 915 916 917
		 * that affect only one entry succeed immediately and cannot
		 * be in the  per semaphore pending queue, and decrements
		 * cannot be successful if the value is already 0.
		 */
918
		if (semnum != -1 && sma->sems[semnum].semval == 0)
919 920
			break;

921
		error = perform_atomic_semop(sma, q);
L
Linus Torvalds 已提交
922 923

		/* Does q->sleeper still need to sleep? */
N
Nick Piggin 已提交
924 925 926
		if (error > 0)
			continue;

927
		unlink_queue(sma, q);
N
Nick Piggin 已提交
928

929
		if (error) {
930
			restart = 0;
931 932
		} else {
			semop_completed = 1;
D
Davidlohr Bueso 已提交
933
			do_smart_wakeup_zero(sma, q->sops, q->nsops, wake_q);
934
			restart = check_restart(sma, q);
935
		}
936

D
Davidlohr Bueso 已提交
937
		wake_up_sem_queue_prepare(q, error, wake_q);
938
		if (restart)
N
Nick Piggin 已提交
939
			goto again;
L
Linus Torvalds 已提交
940
	}
941
	return semop_completed;
L
Linus Torvalds 已提交
942 943
}

944
/**
D
Davidlohr Bueso 已提交
945
 * set_semotime - set sem_otime
946 947 948 949 950 951 952 953 954
 * @sma: semaphore array
 * @sops: operations that modified the array, may be NULL
 *
 * sem_otime is replicated to avoid cache line trashing.
 * This function sets one instance to the current time.
 */
static void set_semotime(struct sem_array *sma, struct sembuf *sops)
{
	if (sops == NULL) {
955
		sma->sems[0].sem_otime = get_seconds();
956
	} else {
957
		sma->sems[sops[0].sem_num].sem_otime =
958 959 960 961
							get_seconds();
	}
}

962
/**
D
Davidlohr Bueso 已提交
963
 * do_smart_update - optimized update_queue
964 965 966
 * @sma: semaphore array
 * @sops: operations that were performed
 * @nsops: number of operations
967
 * @otime: force setting otime
D
Davidlohr Bueso 已提交
968
 * @wake_q: lockless wake-queue head
969
 *
970 971
 * do_smart_update() does the required calls to update_queue and wakeup_zero,
 * based on the actual changes that were performed on the semaphore array.
972
 * Note that the function does not do the actual wake-up: the caller is
D
Davidlohr Bueso 已提交
973
 * responsible for calling wake_up_q().
974
 * It is safe to perform this call after dropping all locks.
975
 */
976
static void do_smart_update(struct sem_array *sma, struct sembuf *sops, int nsops,
D
Davidlohr Bueso 已提交
977
			    int otime, struct wake_q_head *wake_q)
978 979 980
{
	int i;

D
Davidlohr Bueso 已提交
981
	otime |= do_smart_wakeup_zero(sma, sops, nsops, wake_q);
982

983 984
	if (!list_empty(&sma->pending_alter)) {
		/* semaphore array uses the global queue - just process it. */
D
Davidlohr Bueso 已提交
985
		otime |= update_queue(sma, -1, wake_q);
986 987 988 989 990 991 992
	} else {
		if (!sops) {
			/*
			 * No sops, thus the modified semaphores are not
			 * known. Check all.
			 */
			for (i = 0; i < sma->sem_nsems; i++)
D
Davidlohr Bueso 已提交
993
				otime |= update_queue(sma, i, wake_q);
994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006
		} else {
			/*
			 * Check the semaphores that were increased:
			 * - No complex ops, thus all sleeping ops are
			 *   decrease.
			 * - if we decreased the value, then any sleeping
			 *   semaphore ops wont be able to run: If the
			 *   previous value was too small, then the new
			 *   value will be too small, too.
			 */
			for (i = 0; i < nsops; i++) {
				if (sops[i].sem_op > 0) {
					otime |= update_queue(sma,
D
Davidlohr Bueso 已提交
1007
							      sops[i].sem_num, wake_q);
1008
				}
1009
			}
1010
		}
1011
	}
1012 1013
	if (otime)
		set_semotime(sma, sops);
1014 1015
}

1016
/*
1017
 * check_qop: Test if a queued operation sleeps on the semaphore semnum
1018 1019 1020 1021
 */
static int check_qop(struct sem_array *sma, int semnum, struct sem_queue *q,
			bool count_zero)
{
1022
	struct sembuf *sop = q->blocking;
1023

1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034
	/*
	 * Linux always (since 0.99.10) reported a task as sleeping on all
	 * semaphores. This violates SUS, therefore it was changed to the
	 * standard compliant behavior.
	 * Give the administrators a chance to notice that an application
	 * might misbehave because it relies on the Linux behavior.
	 */
	pr_info_once("semctl(GETNCNT/GETZCNT) is since 3.16 Single Unix Specification compliant.\n"
			"The task %s (%d) triggered the difference, watch for misbehavior.\n",
			current->comm, task_pid_nr(current));

1035 1036
	if (sop->sem_num != semnum)
		return 0;
1037

1038 1039 1040 1041 1042 1043
	if (count_zero && sop->sem_op == 0)
		return 1;
	if (!count_zero && sop->sem_op < 0)
		return 1;

	return 0;
1044 1045
}

L
Linus Torvalds 已提交
1046 1047 1048
/* The following counts are associated to each semaphore:
 *   semncnt        number of tasks waiting on semval being nonzero
 *   semzcnt        number of tasks waiting on semval being zero
1049 1050 1051
 *
 * Per definition, a task waits only on the semaphore of the first semop
 * that cannot proceed, even if additional operation would block, too.
L
Linus Torvalds 已提交
1052
 */
1053 1054
static int count_semcnt(struct sem_array *sma, ushort semnum,
			bool count_zero)
L
Linus Torvalds 已提交
1055
{
1056
	struct list_head *l;
M
Manfred Spraul 已提交
1057
	struct sem_queue *q;
1058
	int semcnt;
L
Linus Torvalds 已提交
1059

1060 1061 1062
	semcnt = 0;
	/* First: check the simple operations. They are easy to evaluate */
	if (count_zero)
1063
		l = &sma->sems[semnum].pending_const;
1064
	else
1065
		l = &sma->sems[semnum].pending_alter;
L
Linus Torvalds 已提交
1066

1067 1068 1069 1070 1071
	list_for_each_entry(q, l, list) {
		/* all task on a per-semaphore list sleep on exactly
		 * that semaphore
		 */
		semcnt++;
R
Rik van Riel 已提交
1072 1073
	}

1074
	/* Then: check the complex operations. */
1075
	list_for_each_entry(q, &sma->pending_alter, list) {
1076 1077 1078 1079 1080 1081
		semcnt += check_qop(sma, semnum, q, count_zero);
	}
	if (count_zero) {
		list_for_each_entry(q, &sma->pending_const, list) {
			semcnt += check_qop(sma, semnum, q, count_zero);
		}
1082
	}
1083
	return semcnt;
L
Linus Torvalds 已提交
1084 1085
}

D
Davidlohr Bueso 已提交
1086 1087
/* Free a semaphore set. freeary() is called with sem_ids.rwsem locked
 * as a writer and the spinlock for this semaphore set hold. sem_ids.rwsem
N
Nadia Derbey 已提交
1088
 * remains locked on exit.
L
Linus Torvalds 已提交
1089
 */
1090
static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
L
Linus Torvalds 已提交
1091
{
1092 1093
	struct sem_undo *un, *tu;
	struct sem_queue *q, *tq;
1094
	struct sem_array *sma = container_of(ipcp, struct sem_array, sem_perm);
1095
	int i;
D
Davidlohr Bueso 已提交
1096
	DEFINE_WAKE_Q(wake_q);
L
Linus Torvalds 已提交
1097

1098
	/* Free the existing undo structures for this semaphore set.  */
1099
	ipc_assert_locked_object(&sma->sem_perm);
1100 1101 1102
	list_for_each_entry_safe(un, tu, &sma->list_id, list_id) {
		list_del(&un->list_id);
		spin_lock(&un->ulp->lock);
L
Linus Torvalds 已提交
1103
		un->semid = -1;
1104 1105
		list_del_rcu(&un->list_proc);
		spin_unlock(&un->ulp->lock);
1106
		kfree_rcu(un, rcu);
1107
	}
L
Linus Torvalds 已提交
1108 1109

	/* Wake up all pending processes and let them fail with EIDRM. */
1110 1111
	list_for_each_entry_safe(q, tq, &sma->pending_const, list) {
		unlink_queue(sma, q);
D
Davidlohr Bueso 已提交
1112
		wake_up_sem_queue_prepare(q, -EIDRM, &wake_q);
1113 1114 1115
	}

	list_for_each_entry_safe(q, tq, &sma->pending_alter, list) {
1116
		unlink_queue(sma, q);
D
Davidlohr Bueso 已提交
1117
		wake_up_sem_queue_prepare(q, -EIDRM, &wake_q);
L
Linus Torvalds 已提交
1118
	}
1119
	for (i = 0; i < sma->sem_nsems; i++) {
1120
		struct sem *sem = &sma->sems[i];
1121 1122
		list_for_each_entry_safe(q, tq, &sem->pending_const, list) {
			unlink_queue(sma, q);
D
Davidlohr Bueso 已提交
1123
			wake_up_sem_queue_prepare(q, -EIDRM, &wake_q);
1124 1125
		}
		list_for_each_entry_safe(q, tq, &sem->pending_alter, list) {
1126
			unlink_queue(sma, q);
D
Davidlohr Bueso 已提交
1127
			wake_up_sem_queue_prepare(q, -EIDRM, &wake_q);
1128 1129
		}
	}
L
Linus Torvalds 已提交
1130

N
Nadia Derbey 已提交
1131 1132
	/* Remove the semaphore set from the IDR */
	sem_rmid(ns, sma);
1133
	sem_unlock(sma, -1);
1134
	rcu_read_unlock();
L
Linus Torvalds 已提交
1135

D
Davidlohr Bueso 已提交
1136
	wake_up_q(&wake_q);
K
Kirill Korotaev 已提交
1137
	ns->used_sems -= sma->sem_nsems;
1138
	ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
L
Linus Torvalds 已提交
1139 1140 1141 1142
}

static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in, int version)
{
M
Manfred Spraul 已提交
1143
	switch (version) {
L
Linus Torvalds 已提交
1144 1145 1146 1147 1148 1149
	case IPC_64:
		return copy_to_user(buf, in, sizeof(*in));
	case IPC_OLD:
	    {
		struct semid_ds out;

1150 1151
		memset(&out, 0, sizeof(out));

L
Linus Torvalds 已提交
1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164
		ipc64_perm_to_ipc_perm(&in->sem_perm, &out.sem_perm);

		out.sem_otime	= in->sem_otime;
		out.sem_ctime	= in->sem_ctime;
		out.sem_nsems	= in->sem_nsems;

		return copy_to_user(buf, &out, sizeof(out));
	    }
	default:
		return -EINVAL;
	}
}

1165
static time64_t get_semotime(struct sem_array *sma)
1166 1167
{
	int i;
1168
	time64_t res;
1169

1170
	res = sma->sems[0].sem_otime;
1171
	for (i = 1; i < sma->sem_nsems; i++) {
1172
		time64_t to = sma->sems[i].sem_otime;
1173 1174 1175 1176 1177 1178 1179

		if (to > res)
			res = to;
	}
	return res;
}

1180 1181
static int semctl_stat(struct ipc_namespace *ns, int semid,
			 int cmd, struct semid64_ds *semid64)
L
Linus Torvalds 已提交
1182 1183
{
	struct sem_array *sma;
1184 1185
	int id = 0;
	int err;
L
Linus Torvalds 已提交
1186

1187
	memset(semid64, 0, sizeof(*semid64));
P
Paul McQuade 已提交
1188

1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201
	rcu_read_lock();
	if (cmd == SEM_STAT) {
		sma = sem_obtain_object(ns, semid);
		if (IS_ERR(sma)) {
			err = PTR_ERR(sma);
			goto out_unlock;
		}
		id = sma->sem_perm.id;
	} else {
		sma = sem_obtain_object_check(ns, semid);
		if (IS_ERR(sma)) {
			err = PTR_ERR(sma);
			goto out_unlock;
L
Linus Torvalds 已提交
1202 1203 1204
		}
	}

1205 1206 1207
	err = -EACCES;
	if (ipcperms(ns, &sma->sem_perm, S_IRUGO))
		goto out_unlock;
L
Linus Torvalds 已提交
1208

1209
	err = security_sem_semctl(&sma->sem_perm, cmd);
1210 1211
	if (err)
		goto out_unlock;
L
Linus Torvalds 已提交
1212

1213 1214 1215 1216 1217 1218 1219 1220
	ipc_lock_object(&sma->sem_perm);

	if (!ipc_valid_object(&sma->sem_perm)) {
		ipc_unlock_object(&sma->sem_perm);
		err = -EIDRM;
		goto out_unlock;
	}

1221 1222 1223 1224
	kernel_to_ipc64_perm(&sma->sem_perm, &semid64->sem_perm);
	semid64->sem_otime = get_semotime(sma);
	semid64->sem_ctime = sma->sem_ctime;
	semid64->sem_nsems = sma->sem_nsems;
1225 1226

	ipc_unlock_object(&sma->sem_perm);
1227 1228
	rcu_read_unlock();
	return id;
L
Linus Torvalds 已提交
1229 1230

out_unlock:
1231
	rcu_read_unlock();
L
Linus Torvalds 已提交
1232 1233 1234
	return err;
}

1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269
static int semctl_info(struct ipc_namespace *ns, int semid,
			 int cmd, void __user *p)
{
	struct seminfo seminfo;
	int max_id;
	int err;

	err = security_sem_semctl(NULL, cmd);
	if (err)
		return err;

	memset(&seminfo, 0, sizeof(seminfo));
	seminfo.semmni = ns->sc_semmni;
	seminfo.semmns = ns->sc_semmns;
	seminfo.semmsl = ns->sc_semmsl;
	seminfo.semopm = ns->sc_semopm;
	seminfo.semvmx = SEMVMX;
	seminfo.semmnu = SEMMNU;
	seminfo.semmap = SEMMAP;
	seminfo.semume = SEMUME;
	down_read(&sem_ids(ns).rwsem);
	if (cmd == SEM_INFO) {
		seminfo.semusz = sem_ids(ns).in_use;
		seminfo.semaem = ns->used_sems;
	} else {
		seminfo.semusz = SEMUSZ;
		seminfo.semaem = SEMAEM;
	}
	max_id = ipc_get_maxid(&sem_ids(ns));
	up_read(&sem_ids(ns).rwsem);
	if (copy_to_user(p, &seminfo, sizeof(struct seminfo)))
		return -EFAULT;
	return (max_id < 0) ? 0 : max_id;
}

1270
static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum,
1271
		int val)
1272 1273 1274
{
	struct sem_undo *un;
	struct sem_array *sma;
M
Manfred Spraul 已提交
1275
	struct sem *curr;
1276
	int err;
D
Davidlohr Bueso 已提交
1277 1278
	DEFINE_WAKE_Q(wake_q);

1279 1280
	if (val > SEMVMX || val < 0)
		return -ERANGE;
1281

1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298
	rcu_read_lock();
	sma = sem_obtain_object_check(ns, semid);
	if (IS_ERR(sma)) {
		rcu_read_unlock();
		return PTR_ERR(sma);
	}

	if (semnum < 0 || semnum >= sma->sem_nsems) {
		rcu_read_unlock();
		return -EINVAL;
	}


	if (ipcperms(ns, &sma->sem_perm, S_IWUGO)) {
		rcu_read_unlock();
		return -EACCES;
	}
1299

1300
	err = security_sem_semctl(&sma->sem_perm, SETVAL);
1301 1302 1303 1304
	if (err) {
		rcu_read_unlock();
		return -EACCES;
	}
1305

1306
	sem_lock(sma, NULL, -1);
1307

1308
	if (!ipc_valid_object(&sma->sem_perm)) {
1309 1310 1311 1312 1313
		sem_unlock(sma, -1);
		rcu_read_unlock();
		return -EIDRM;
	}

1314
	curr = &sma->sems[semnum];
1315

1316
	ipc_assert_locked_object(&sma->sem_perm);
1317 1318 1319 1320 1321
	list_for_each_entry(un, &sma->list_id, list_id)
		un->semadj[semnum] = 0;

	curr->semval = val;
	curr->sempid = task_tgid_vnr(current);
1322
	sma->sem_ctime = ktime_get_real_seconds();
1323
	/* maybe some queued-up processes were waiting for this */
D
Davidlohr Bueso 已提交
1324
	do_smart_update(sma, NULL, 0, 0, &wake_q);
1325
	sem_unlock(sma, -1);
1326
	rcu_read_unlock();
D
Davidlohr Bueso 已提交
1327
	wake_up_q(&wake_q);
1328
	return 0;
1329 1330
}

K
Kirill Korotaev 已提交
1331
static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
1332
		int cmd, void __user *p)
L
Linus Torvalds 已提交
1333 1334
{
	struct sem_array *sma;
M
Manfred Spraul 已提交
1335
	struct sem *curr;
1336
	int err, nsems;
L
Linus Torvalds 已提交
1337
	ushort fast_sem_io[SEMMSL_FAST];
M
Manfred Spraul 已提交
1338
	ushort *sem_io = fast_sem_io;
D
Davidlohr Bueso 已提交
1339
	DEFINE_WAKE_Q(wake_q);
1340 1341 1342 1343 1344

	rcu_read_lock();
	sma = sem_obtain_object_check(ns, semid);
	if (IS_ERR(sma)) {
		rcu_read_unlock();
1345
		return PTR_ERR(sma);
1346
	}
L
Linus Torvalds 已提交
1347 1348 1349 1350

	nsems = sma->sem_nsems;

	err = -EACCES;
1351 1352
	if (ipcperms(ns, &sma->sem_perm, cmd == SETALL ? S_IWUGO : S_IRUGO))
		goto out_rcu_wakeup;
L
Linus Torvalds 已提交
1353

1354
	err = security_sem_semctl(&sma->sem_perm, cmd);
1355 1356
	if (err)
		goto out_rcu_wakeup;
L
Linus Torvalds 已提交
1357 1358 1359 1360 1361

	err = -EACCES;
	switch (cmd) {
	case GETALL:
	{
1362
		ushort __user *array = p;
L
Linus Torvalds 已提交
1363 1364
		int i;

1365
		sem_lock(sma, NULL, -1);
1366
		if (!ipc_valid_object(&sma->sem_perm)) {
1367 1368 1369
			err = -EIDRM;
			goto out_unlock;
		}
M
Manfred Spraul 已提交
1370
		if (nsems > SEMMSL_FAST) {
1371
			if (!ipc_rcu_getref(&sma->sem_perm)) {
1372
				err = -EIDRM;
1373
				goto out_unlock;
1374 1375
			}
			sem_unlock(sma, -1);
1376
			rcu_read_unlock();
K
Kees Cook 已提交
1377 1378
			sem_io = kvmalloc_array(nsems, sizeof(ushort),
						GFP_KERNEL);
M
Manfred Spraul 已提交
1379
			if (sem_io == NULL) {
1380
				ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
L
Linus Torvalds 已提交
1381 1382 1383
				return -ENOMEM;
			}

1384
			rcu_read_lock();
1385
			sem_lock_and_putref(sma);
1386
			if (!ipc_valid_object(&sma->sem_perm)) {
L
Linus Torvalds 已提交
1387
				err = -EIDRM;
1388
				goto out_unlock;
L
Linus Torvalds 已提交
1389
			}
1390
		}
L
Linus Torvalds 已提交
1391
		for (i = 0; i < sma->sem_nsems; i++)
1392
			sem_io[i] = sma->sems[i].semval;
1393
		sem_unlock(sma, -1);
1394
		rcu_read_unlock();
L
Linus Torvalds 已提交
1395
		err = 0;
M
Manfred Spraul 已提交
1396
		if (copy_to_user(array, sem_io, nsems*sizeof(ushort)))
L
Linus Torvalds 已提交
1397 1398 1399 1400 1401 1402 1403 1404
			err = -EFAULT;
		goto out_free;
	}
	case SETALL:
	{
		int i;
		struct sem_undo *un;

1405
		if (!ipc_rcu_getref(&sma->sem_perm)) {
1406 1407
			err = -EIDRM;
			goto out_rcu_wakeup;
1408
		}
1409
		rcu_read_unlock();
L
Linus Torvalds 已提交
1410

M
Manfred Spraul 已提交
1411
		if (nsems > SEMMSL_FAST) {
K
Kees Cook 已提交
1412 1413
			sem_io = kvmalloc_array(nsems, sizeof(ushort),
						GFP_KERNEL);
M
Manfred Spraul 已提交
1414
			if (sem_io == NULL) {
1415
				ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
L
Linus Torvalds 已提交
1416 1417 1418 1419
				return -ENOMEM;
			}
		}

M
Manfred Spraul 已提交
1420
		if (copy_from_user(sem_io, p, nsems*sizeof(ushort))) {
1421
			ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
L
Linus Torvalds 已提交
1422 1423 1424 1425 1426 1427
			err = -EFAULT;
			goto out_free;
		}

		for (i = 0; i < nsems; i++) {
			if (sem_io[i] > SEMVMX) {
1428
				ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
L
Linus Torvalds 已提交
1429 1430 1431 1432
				err = -ERANGE;
				goto out_free;
			}
		}
1433
		rcu_read_lock();
1434
		sem_lock_and_putref(sma);
1435
		if (!ipc_valid_object(&sma->sem_perm)) {
L
Linus Torvalds 已提交
1436
			err = -EIDRM;
1437
			goto out_unlock;
L
Linus Torvalds 已提交
1438 1439
		}

1440
		for (i = 0; i < nsems; i++) {
1441 1442
			sma->sems[i].semval = sem_io[i];
			sma->sems[i].sempid = task_tgid_vnr(current);
1443
		}
1444

1445
		ipc_assert_locked_object(&sma->sem_perm);
1446
		list_for_each_entry(un, &sma->list_id, list_id) {
L
Linus Torvalds 已提交
1447 1448
			for (i = 0; i < nsems; i++)
				un->semadj[i] = 0;
1449
		}
1450
		sma->sem_ctime = ktime_get_real_seconds();
L
Linus Torvalds 已提交
1451
		/* maybe some queued-up processes were waiting for this */
D
Davidlohr Bueso 已提交
1452
		do_smart_update(sma, NULL, 0, 0, &wake_q);
L
Linus Torvalds 已提交
1453 1454 1455
		err = 0;
		goto out_unlock;
	}
1456
	/* GETVAL, GETPID, GETNCTN, GETZCNT: fall-through */
L
Linus Torvalds 已提交
1457 1458
	}
	err = -EINVAL;
1459 1460
	if (semnum < 0 || semnum >= nsems)
		goto out_rcu_wakeup;
L
Linus Torvalds 已提交
1461

1462
	sem_lock(sma, NULL, -1);
1463
	if (!ipc_valid_object(&sma->sem_perm)) {
1464 1465 1466
		err = -EIDRM;
		goto out_unlock;
	}
1467
	curr = &sma->sems[semnum];
L
Linus Torvalds 已提交
1468 1469 1470 1471 1472 1473 1474 1475 1476

	switch (cmd) {
	case GETVAL:
		err = curr->semval;
		goto out_unlock;
	case GETPID:
		err = curr->sempid;
		goto out_unlock;
	case GETNCNT:
1477
		err = count_semcnt(sma, semnum, 0);
L
Linus Torvalds 已提交
1478 1479
		goto out_unlock;
	case GETZCNT:
1480
		err = count_semcnt(sma, semnum, 1);
L
Linus Torvalds 已提交
1481 1482
		goto out_unlock;
	}
1483

L
Linus Torvalds 已提交
1484
out_unlock:
1485
	sem_unlock(sma, -1);
1486
out_rcu_wakeup:
1487
	rcu_read_unlock();
D
Davidlohr Bueso 已提交
1488
	wake_up_q(&wake_q);
L
Linus Torvalds 已提交
1489
out_free:
M
Manfred Spraul 已提交
1490
	if (sem_io != fast_sem_io)
K
Kees Cook 已提交
1491
		kvfree(sem_io);
L
Linus Torvalds 已提交
1492 1493 1494
	return err;
}

1495 1496
static inline unsigned long
copy_semid_from_user(struct semid64_ds *out, void __user *buf, int version)
L
Linus Torvalds 已提交
1497
{
M
Manfred Spraul 已提交
1498
	switch (version) {
L
Linus Torvalds 已提交
1499
	case IPC_64:
1500
		if (copy_from_user(out, buf, sizeof(*out)))
L
Linus Torvalds 已提交
1501 1502 1503 1504 1505 1506
			return -EFAULT;
		return 0;
	case IPC_OLD:
	    {
		struct semid_ds tbuf_old;

M
Manfred Spraul 已提交
1507
		if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old)))
L
Linus Torvalds 已提交
1508 1509
			return -EFAULT;

1510 1511 1512
		out->sem_perm.uid	= tbuf_old.sem_perm.uid;
		out->sem_perm.gid	= tbuf_old.sem_perm.gid;
		out->sem_perm.mode	= tbuf_old.sem_perm.mode;
L
Linus Torvalds 已提交
1513 1514 1515 1516 1517 1518 1519 1520

		return 0;
	    }
	default:
		return -EINVAL;
	}
}

1521
/*
D
Davidlohr Bueso 已提交
1522
 * This function handles some semctl commands which require the rwsem
1523
 * to be held in write mode.
D
Davidlohr Bueso 已提交
1524
 * NOTE: no locks must be held, the rwsem is taken inside this function.
1525
 */
1526
static int semctl_down(struct ipc_namespace *ns, int semid,
1527
		       int cmd, struct semid64_ds *semid64)
L
Linus Torvalds 已提交
1528 1529 1530 1531 1532
{
	struct sem_array *sma;
	int err;
	struct kern_ipc_perm *ipcp;

D
Davidlohr Bueso 已提交
1533
	down_write(&sem_ids(ns).rwsem);
1534 1535
	rcu_read_lock();

1536
	ipcp = ipcctl_pre_down_nolock(ns, &sem_ids(ns), semid, cmd,
1537
				      &semid64->sem_perm, 0);
1538 1539 1540 1541
	if (IS_ERR(ipcp)) {
		err = PTR_ERR(ipcp);
		goto out_unlock1;
	}
S
Steve Grubb 已提交
1542

1543
	sma = container_of(ipcp, struct sem_array, sem_perm);
L
Linus Torvalds 已提交
1544

1545
	err = security_sem_semctl(&sma->sem_perm, cmd);
1546 1547
	if (err)
		goto out_unlock1;
L
Linus Torvalds 已提交
1548

1549
	switch (cmd) {
L
Linus Torvalds 已提交
1550
	case IPC_RMID:
1551
		sem_lock(sma, NULL, -1);
1552
		/* freeary unlocks the ipc object and rcu */
1553
		freeary(ns, ipcp);
1554
		goto out_up;
L
Linus Torvalds 已提交
1555
	case IPC_SET:
1556
		sem_lock(sma, NULL, -1);
1557
		err = ipc_update_perm(&semid64->sem_perm, ipcp);
1558
		if (err)
1559
			goto out_unlock0;
1560
		sma->sem_ctime = ktime_get_real_seconds();
L
Linus Torvalds 已提交
1561 1562 1563
		break;
	default:
		err = -EINVAL;
1564
		goto out_unlock1;
L
Linus Torvalds 已提交
1565 1566
	}

1567
out_unlock0:
1568
	sem_unlock(sma, -1);
1569
out_unlock1:
1570
	rcu_read_unlock();
1571
out_up:
D
Davidlohr Bueso 已提交
1572
	up_write(&sem_ids(ns).rwsem);
L
Linus Torvalds 已提交
1573 1574 1575
	return err;
}

1576
SYSCALL_DEFINE4(semctl, int, semid, int, semnum, int, cmd, unsigned long, arg)
L
Linus Torvalds 已提交
1577 1578
{
	int version;
K
Kirill Korotaev 已提交
1579
	struct ipc_namespace *ns;
1580
	void __user *p = (void __user *)arg;
1581 1582
	struct semid64_ds semid64;
	int err;
L
Linus Torvalds 已提交
1583 1584 1585 1586 1587

	if (semid < 0)
		return -EINVAL;

	version = ipc_parse_version(&cmd);
K
Kirill Korotaev 已提交
1588
	ns = current->nsproxy->ipc_ns;
L
Linus Torvalds 已提交
1589

M
Manfred Spraul 已提交
1590
	switch (cmd) {
L
Linus Torvalds 已提交
1591 1592
	case IPC_INFO:
	case SEM_INFO:
1593
		return semctl_info(ns, semid, cmd, p);
1594
	case IPC_STAT:
L
Linus Torvalds 已提交
1595
	case SEM_STAT:
1596 1597 1598 1599 1600 1601
		err = semctl_stat(ns, semid, cmd, &semid64);
		if (err < 0)
			return err;
		if (copy_semid_to_user(p, &semid64, version))
			err = -EFAULT;
		return err;
L
Linus Torvalds 已提交
1602 1603 1604 1605 1606 1607
	case GETALL:
	case GETVAL:
	case GETPID:
	case GETNCNT:
	case GETZCNT:
	case SETALL:
1608
		return semctl_main(ns, semid, semnum, cmd, p);
1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619
	case SETVAL: {
		int val;
#if defined(CONFIG_64BIT) && defined(__BIG_ENDIAN)
		/* big-endian 64bit */
		val = arg >> 32;
#else
		/* 32bit or little-endian 64bit */
		val = arg;
#endif
		return semctl_setval(ns, semid, semnum, val);
	}
L
Linus Torvalds 已提交
1620
	case IPC_SET:
1621 1622 1623 1624
		if (copy_semid_from_user(&semid64, p, version))
			return -EFAULT;
	case IPC_RMID:
		return semctl_down(ns, semid, cmd, &semid64);
L
Linus Torvalds 已提交
1625 1626 1627 1628 1629
	default:
		return -EINVAL;
	}
}

A
Al Viro 已提交
1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647
#ifdef CONFIG_COMPAT

struct compat_semid_ds {
	struct compat_ipc_perm sem_perm;
	compat_time_t sem_otime;
	compat_time_t sem_ctime;
	compat_uptr_t sem_base;
	compat_uptr_t sem_pending;
	compat_uptr_t sem_pending_last;
	compat_uptr_t undo;
	unsigned short sem_nsems;
};

static int copy_compat_semid_from_user(struct semid64_ds *out, void __user *buf,
					int version)
{
	memset(out, 0, sizeof(*out));
	if (version == IPC_64) {
1648
		struct compat_semid64_ds __user *p = buf;
A
Al Viro 已提交
1649 1650
		return get_compat_ipc64_perm(&out->sem_perm, &p->sem_perm);
	} else {
1651
		struct compat_semid_ds __user *p = buf;
A
Al Viro 已提交
1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707
		return get_compat_ipc_perm(&out->sem_perm, &p->sem_perm);
	}
}

static int copy_compat_semid_to_user(void __user *buf, struct semid64_ds *in,
					int version)
{
	if (version == IPC_64) {
		struct compat_semid64_ds v;
		memset(&v, 0, sizeof(v));
		to_compat_ipc64_perm(&v.sem_perm, &in->sem_perm);
		v.sem_otime = in->sem_otime;
		v.sem_ctime = in->sem_ctime;
		v.sem_nsems = in->sem_nsems;
		return copy_to_user(buf, &v, sizeof(v));
	} else {
		struct compat_semid_ds v;
		memset(&v, 0, sizeof(v));
		to_compat_ipc_perm(&v.sem_perm, &in->sem_perm);
		v.sem_otime = in->sem_otime;
		v.sem_ctime = in->sem_ctime;
		v.sem_nsems = in->sem_nsems;
		return copy_to_user(buf, &v, sizeof(v));
	}
}

COMPAT_SYSCALL_DEFINE4(semctl, int, semid, int, semnum, int, cmd, int, arg)
{
	void __user *p = compat_ptr(arg);
	struct ipc_namespace *ns;
	struct semid64_ds semid64;
	int version = compat_ipc_parse_version(&cmd);
	int err;

	ns = current->nsproxy->ipc_ns;

	if (semid < 0)
		return -EINVAL;

	switch (cmd & (~IPC_64)) {
	case IPC_INFO:
	case SEM_INFO:
		return semctl_info(ns, semid, cmd, p);
	case IPC_STAT:
	case SEM_STAT:
		err = semctl_stat(ns, semid, cmd, &semid64);
		if (err < 0)
			return err;
		if (copy_compat_semid_to_user(p, &semid64, version))
			err = -EFAULT;
		return err;
	case GETVAL:
	case GETPID:
	case GETNCNT:
	case GETZCNT:
	case GETALL:
L
Linus Torvalds 已提交
1708
	case SETALL:
1709 1710 1711
		return semctl_main(ns, semid, semnum, cmd, p);
	case SETVAL:
		return semctl_setval(ns, semid, semnum, arg);
L
Linus Torvalds 已提交
1712
	case IPC_SET:
A
Al Viro 已提交
1713 1714 1715 1716 1717
		if (copy_compat_semid_from_user(&semid64, p, version))
			return -EFAULT;
		/* fallthru */
	case IPC_RMID:
		return semctl_down(ns, semid, cmd, &semid64);
L
Linus Torvalds 已提交
1718 1719 1720 1721
	default:
		return -EINVAL;
	}
}
A
Al Viro 已提交
1722
#endif
L
Linus Torvalds 已提交
1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740

/* If the task doesn't already have a undo_list, then allocate one
 * here.  We guarantee there is only one thread using this undo list,
 * and current is THE ONE
 *
 * If this allocation and assignment succeeds, but later
 * portions of this code fail, there is no need to free the sem_undo_list.
 * Just let it stay associated with the task, and it'll be freed later
 * at exit time.
 *
 * This can block, so callers must hold no locks.
 */
static inline int get_undo_list(struct sem_undo_list **undo_listp)
{
	struct sem_undo_list *undo_list;

	undo_list = current->sysvsem.undo_list;
	if (!undo_list) {
1741
		undo_list = kzalloc(sizeof(*undo_list), GFP_KERNEL);
L
Linus Torvalds 已提交
1742 1743
		if (undo_list == NULL)
			return -ENOMEM;
I
Ingo Molnar 已提交
1744
		spin_lock_init(&undo_list->lock);
1745
		refcount_set(&undo_list->refcnt, 1);
1746 1747
		INIT_LIST_HEAD(&undo_list->list_proc);

L
Linus Torvalds 已提交
1748 1749 1750 1751 1752 1753
		current->sysvsem.undo_list = undo_list;
	}
	*undo_listp = undo_list;
	return 0;
}

1754
static struct sem_undo *__lookup_undo(struct sem_undo_list *ulp, int semid)
L
Linus Torvalds 已提交
1755
{
1756
	struct sem_undo *un;
1757

1758 1759 1760
	list_for_each_entry_rcu(un, &ulp->list_proc, list_proc) {
		if (un->semid == semid)
			return un;
L
Linus Torvalds 已提交
1761
	}
1762
	return NULL;
L
Linus Torvalds 已提交
1763 1764
}

1765 1766 1767 1768
static struct sem_undo *lookup_undo(struct sem_undo_list *ulp, int semid)
{
	struct sem_undo *un;

M
Manfred Spraul 已提交
1769
	assert_spin_locked(&ulp->lock);
1770 1771 1772 1773 1774 1775 1776 1777 1778

	un = __lookup_undo(ulp, semid);
	if (un) {
		list_del_rcu(&un->list_proc);
		list_add_rcu(&un->list_proc, &ulp->list_proc);
	}
	return un;
}

1779
/**
D
Davidlohr Bueso 已提交
1780
 * find_alloc_undo - lookup (and if not present create) undo array
1781 1782 1783 1784 1785 1786
 * @ns: namespace
 * @semid: semaphore array id
 *
 * The function looks up (and if not present creates) the undo structure.
 * The size of the undo structure depends on the size of the semaphore
 * array, thus the alloc path is not that straightforward.
1787 1788
 * Lifetime-rules: sem_undo is rcu-protected, on success, the function
 * performs a rcu_read_lock().
1789 1790
 */
static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid)
L
Linus Torvalds 已提交
1791 1792 1793 1794
{
	struct sem_array *sma;
	struct sem_undo_list *ulp;
	struct sem_undo *un, *new;
1795
	int nsems, error;
L
Linus Torvalds 已提交
1796 1797 1798 1799 1800

	error = get_undo_list(&ulp);
	if (error)
		return ERR_PTR(error);

1801
	rcu_read_lock();
1802
	spin_lock(&ulp->lock);
L
Linus Torvalds 已提交
1803
	un = lookup_undo(ulp, semid);
1804
	spin_unlock(&ulp->lock);
M
Manfred Spraul 已提交
1805
	if (likely(un != NULL))
L
Linus Torvalds 已提交
1806 1807 1808
		goto out;

	/* no undo structure around - allocate one. */
1809
	/* step 1: figure out the size of the semaphore array */
1810 1811 1812
	sma = sem_obtain_object_check(ns, semid);
	if (IS_ERR(sma)) {
		rcu_read_unlock();
J
Julia Lawall 已提交
1813
		return ERR_CAST(sma);
1814
	}
1815

L
Linus Torvalds 已提交
1816
	nsems = sma->sem_nsems;
1817
	if (!ipc_rcu_getref(&sma->sem_perm)) {
1818 1819 1820 1821
		rcu_read_unlock();
		un = ERR_PTR(-EIDRM);
		goto out;
	}
1822
	rcu_read_unlock();
L
Linus Torvalds 已提交
1823

1824
	/* step 2: allocate new undo structure */
1825
	new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL);
L
Linus Torvalds 已提交
1826
	if (!new) {
1827
		ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
L
Linus Torvalds 已提交
1828 1829 1830
		return ERR_PTR(-ENOMEM);
	}

1831
	/* step 3: Acquire the lock on semaphore array */
1832
	rcu_read_lock();
1833
	sem_lock_and_putref(sma);
1834
	if (!ipc_valid_object(&sma->sem_perm)) {
1835
		sem_unlock(sma, -1);
1836
		rcu_read_unlock();
L
Linus Torvalds 已提交
1837 1838 1839 1840
		kfree(new);
		un = ERR_PTR(-EIDRM);
		goto out;
	}
1841 1842 1843 1844 1845 1846 1847 1848 1849 1850
	spin_lock(&ulp->lock);

	/*
	 * step 4: check for races: did someone else allocate the undo struct?
	 */
	un = lookup_undo(ulp, semid);
	if (un) {
		kfree(new);
		goto success;
	}
1851 1852
	/* step 5: initialize & link new undo structure */
	new->semadj = (short *) &new[1];
1853
	new->ulp = ulp;
1854 1855
	new->semid = semid;
	assert_spin_locked(&ulp->lock);
1856
	list_add_rcu(&new->list_proc, &ulp->list_proc);
1857
	ipc_assert_locked_object(&sma->sem_perm);
1858
	list_add(&new->list_id, &sma->list_id);
1859
	un = new;
1860

1861
success:
1862
	spin_unlock(&ulp->lock);
1863
	sem_unlock(sma, -1);
L
Linus Torvalds 已提交
1864 1865 1866 1867
out:
	return un;
}

A
Al Viro 已提交
1868
static long do_semtimedop(int semid, struct sembuf __user *tsops,
1869
		unsigned nsops, const struct timespec64 *timeout)
L
Linus Torvalds 已提交
1870 1871 1872 1873
{
	int error = -EINVAL;
	struct sem_array *sma;
	struct sembuf fast_sops[SEMOPM_FAST];
M
Manfred Spraul 已提交
1874
	struct sembuf *sops = fast_sops, *sop;
L
Linus Torvalds 已提交
1875
	struct sem_undo *un;
1876 1877
	int max, locknum;
	bool undos = false, alter = false, dupsop = false;
L
Linus Torvalds 已提交
1878
	struct sem_queue queue;
1879
	unsigned long dup = 0, jiffies_left = 0;
K
Kirill Korotaev 已提交
1880 1881 1882
	struct ipc_namespace *ns;

	ns = current->nsproxy->ipc_ns;
L
Linus Torvalds 已提交
1883 1884 1885

	if (nsops < 1 || semid < 0)
		return -EINVAL;
K
Kirill Korotaev 已提交
1886
	if (nsops > ns->sc_semopm)
L
Linus Torvalds 已提交
1887
		return -E2BIG;
M
Manfred Spraul 已提交
1888
	if (nsops > SEMOPM_FAST) {
1889
		sops = kvmalloc(sizeof(*sops)*nsops, GFP_KERNEL);
M
Manfred Spraul 已提交
1890
		if (sops == NULL)
L
Linus Torvalds 已提交
1891 1892
			return -ENOMEM;
	}
1893

M
Manfred Spraul 已提交
1894 1895
	if (copy_from_user(sops, tsops, nsops * sizeof(*tsops))) {
		error =  -EFAULT;
L
Linus Torvalds 已提交
1896 1897
		goto out_free;
	}
1898

L
Linus Torvalds 已提交
1899
	if (timeout) {
A
Al Viro 已提交
1900 1901
		if (timeout->tv_sec < 0 || timeout->tv_nsec < 0 ||
			timeout->tv_nsec >= 1000000000L) {
L
Linus Torvalds 已提交
1902 1903 1904
			error = -EINVAL;
			goto out_free;
		}
1905
		jiffies_left = timespec64_to_jiffies(timeout);
L
Linus Torvalds 已提交
1906
	}
1907

L
Linus Torvalds 已提交
1908 1909
	max = 0;
	for (sop = sops; sop < sops + nsops; sop++) {
1910 1911
		unsigned long mask = 1ULL << ((sop->sem_num) % BITS_PER_LONG);

L
Linus Torvalds 已提交
1912 1913 1914
		if (sop->sem_num >= max)
			max = sop->sem_num;
		if (sop->sem_flg & SEM_UNDO)
1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928
			undos = true;
		if (dup & mask) {
			/*
			 * There was a previous alter access that appears
			 * to have accessed the same semaphore, thus use
			 * the dupsop logic. "appears", because the detection
			 * can only check % BITS_PER_LONG.
			 */
			dupsop = true;
		}
		if (sop->sem_op != 0) {
			alter = true;
			dup |= mask;
		}
L
Linus Torvalds 已提交
1929 1930 1931
	}

	if (undos) {
1932
		/* On success, find_alloc_undo takes the rcu_read_lock */
1933
		un = find_alloc_undo(ns, semid);
L
Linus Torvalds 已提交
1934 1935 1936 1937
		if (IS_ERR(un)) {
			error = PTR_ERR(un);
			goto out_free;
		}
1938
	} else {
L
Linus Torvalds 已提交
1939
		un = NULL;
1940 1941
		rcu_read_lock();
	}
L
Linus Torvalds 已提交
1942

1943
	sma = sem_obtain_object_check(ns, semid);
1944
	if (IS_ERR(sma)) {
1945
		rcu_read_unlock();
1946
		error = PTR_ERR(sma);
L
Linus Torvalds 已提交
1947
		goto out_free;
1948 1949
	}

1950
	error = -EFBIG;
1951 1952 1953 1954
	if (max >= sma->sem_nsems) {
		rcu_read_unlock();
		goto out_free;
	}
1955 1956

	error = -EACCES;
1957 1958 1959 1960
	if (ipcperms(ns, &sma->sem_perm, alter ? S_IWUGO : S_IRUGO)) {
		rcu_read_unlock();
		goto out_free;
	}
1961

1962
	error = security_sem_semop(&sma->sem_perm, sops, nsops, alter);
1963 1964 1965 1966
	if (error) {
		rcu_read_unlock();
		goto out_free;
	}
1967

1968 1969
	error = -EIDRM;
	locknum = sem_lock(sma, sops, nsops);
1970 1971 1972 1973 1974 1975 1976 1977 1978
	/*
	 * We eventually might perform the following check in a lockless
	 * fashion, considering ipc_valid_object() locking constraints.
	 * If nsops == 1 and there is no contention for sem_perm.lock, then
	 * only a per-semaphore lock is held and it's OK to proceed with the
	 * check below. More details on the fine grained locking scheme
	 * entangled here and why it's RMID race safe on comments at sem_lock()
	 */
	if (!ipc_valid_object(&sma->sem_perm))
1979
		goto out_unlock_free;
L
Linus Torvalds 已提交
1980
	/*
1981
	 * semid identifiers are not unique - find_alloc_undo may have
L
Linus Torvalds 已提交
1982
	 * allocated an undo structure, it was invalidated by an RMID
1983
	 * and now a new array with received the same id. Check and fail.
L
Lucas De Marchi 已提交
1984
	 * This case can be detected checking un->semid. The existence of
1985
	 * "un" itself is guaranteed by rcu.
L
Linus Torvalds 已提交
1986
	 */
1987 1988
	if (un && un->semid == -1)
		goto out_unlock_free;
1989

1990 1991 1992 1993 1994
	queue.sops = sops;
	queue.nsops = nsops;
	queue.undo = un;
	queue.pid = task_tgid_vnr(current);
	queue.alter = alter;
1995
	queue.dupsop = dupsop;
1996 1997

	error = perform_atomic_semop(sma, &queue);
D
Davidlohr Bueso 已提交
1998 1999 2000 2001 2002
	if (error == 0) { /* non-blocking succesfull path */
		DEFINE_WAKE_Q(wake_q);

		/*
		 * If the operation was successful, then do
2003 2004 2005
		 * the required updates.
		 */
		if (alter)
D
Davidlohr Bueso 已提交
2006
			do_smart_update(sma, sops, nsops, 1, &wake_q);
2007 2008
		else
			set_semotime(sma, sops);
D
Davidlohr Bueso 已提交
2009 2010 2011 2012 2013 2014

		sem_unlock(sma, locknum);
		rcu_read_unlock();
		wake_up_q(&wake_q);

		goto out_free;
L
Linus Torvalds 已提交
2015
	}
D
Davidlohr Bueso 已提交
2016
	if (error < 0) /* non-blocking error path */
2017
		goto out_unlock_free;
L
Linus Torvalds 已提交
2018

D
Davidlohr Bueso 已提交
2019 2020
	/*
	 * We need to sleep on this operation, so we put the current
L
Linus Torvalds 已提交
2021 2022
	 * task into the pending queue and go to sleep.
	 */
2023 2024
	if (nsops == 1) {
		struct sem *curr;
2025
		curr = &sma->sems[sops->sem_num];
2026

2027 2028 2029 2030 2031 2032 2033 2034 2035 2036
		if (alter) {
			if (sma->complex_count) {
				list_add_tail(&queue.list,
						&sma->pending_alter);
			} else {

				list_add_tail(&queue.list,
						&curr->pending_alter);
			}
		} else {
2037
			list_add_tail(&queue.list, &curr->pending_const);
2038
		}
2039
	} else {
2040 2041 2042
		if (!sma->complex_count)
			merge_queues(sma);

2043
		if (alter)
2044
			list_add_tail(&queue.list, &sma->pending_alter);
2045
		else
2046 2047
			list_add_tail(&queue.list, &sma->pending_const);

2048 2049 2050
		sma->complex_count++;
	}

D
Davidlohr Bueso 已提交
2051 2052 2053
	do {
		queue.status = -EINTR;
		queue.sleeper = current;
2054

D
Davidlohr Bueso 已提交
2055 2056 2057
		__set_current_state(TASK_INTERRUPTIBLE);
		sem_unlock(sma, locknum);
		rcu_read_unlock();
L
Linus Torvalds 已提交
2058

D
Davidlohr Bueso 已提交
2059 2060 2061 2062
		if (timeout)
			jiffies_left = schedule_timeout(jiffies_left);
		else
			schedule();
L
Linus Torvalds 已提交
2063

D
Davidlohr Bueso 已提交
2064
		/*
D
Davidlohr Bueso 已提交
2065 2066 2067 2068 2069 2070 2071 2072 2073
		 * fastpath: the semop has completed, either successfully or
		 * not, from the syscall pov, is quite irrelevant to us at this
		 * point; we're done.
		 *
		 * We _do_ care, nonetheless, about being awoken by a signal or
		 * spuriously.  The queue.status is checked again in the
		 * slowpath (aka after taking sem_lock), such that we can detect
		 * scenarios where we were awakened externally, during the
		 * window between wake_q_add() and wake_up_q().
2074
		 */
D
Davidlohr Bueso 已提交
2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085
		error = READ_ONCE(queue.status);
		if (error != -EINTR) {
			/*
			 * User space could assume that semop() is a memory
			 * barrier: Without the mb(), the cpu could
			 * speculatively read in userspace stale data that was
			 * overwritten by the previous owner of the semaphore.
			 */
			smp_mb();
			goto out_free;
		}
2086

D
Davidlohr Bueso 已提交
2087
		rcu_read_lock();
2088
		locknum = sem_lock(sma, sops, nsops);
L
Linus Torvalds 已提交
2089

2090 2091 2092 2093
		if (!ipc_valid_object(&sma->sem_perm))
			goto out_unlock_free;

		error = READ_ONCE(queue.status);
L
Linus Torvalds 已提交
2094

D
Davidlohr Bueso 已提交
2095 2096 2097 2098 2099 2100
		/*
		 * If queue.status != -EINTR we are woken up by another process.
		 * Leave without unlink_queue(), but with sem_unlock().
		 */
		if (error != -EINTR)
			goto out_unlock_free;
2101

D
Davidlohr Bueso 已提交
2102 2103 2104 2105 2106 2107
		/*
		 * If an interrupt occurred we have to clean up the queue.
		 */
		if (timeout && jiffies_left == 0)
			error = -EAGAIN;
	} while (error == -EINTR && !signal_pending(current)); /* spurious */
2108

2109
	unlink_queue(sma, &queue);
L
Linus Torvalds 已提交
2110 2111

out_unlock_free:
2112
	sem_unlock(sma, locknum);
2113
	rcu_read_unlock();
L
Linus Torvalds 已提交
2114
out_free:
M
Manfred Spraul 已提交
2115
	if (sops != fast_sops)
2116
		kvfree(sops);
L
Linus Torvalds 已提交
2117 2118 2119
	return error;
}

A
Al Viro 已提交
2120 2121 2122 2123
SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
		unsigned, nsops, const struct timespec __user *, timeout)
{
	if (timeout) {
2124 2125
		struct timespec64 ts;
		if (get_timespec64(&ts, timeout))
A
Al Viro 已提交
2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137
			return -EFAULT;
		return do_semtimedop(semid, tsops, nsops, &ts);
	}
	return do_semtimedop(semid, tsops, nsops, NULL);
}

#ifdef CONFIG_COMPAT
COMPAT_SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsems,
		       unsigned, nsops,
		       const struct compat_timespec __user *, timeout)
{
	if (timeout) {
2138 2139
		struct timespec64 ts;
		if (compat_get_timespec64(&ts, timeout))
A
Al Viro 已提交
2140 2141 2142 2143 2144 2145 2146
			return -EFAULT;
		return do_semtimedop(semid, tsems, nsops, &ts);
	}
	return do_semtimedop(semid, tsems, nsops, NULL);
}
#endif

2147 2148
SYSCALL_DEFINE3(semop, int, semid, struct sembuf __user *, tsops,
		unsigned, nsops)
L
Linus Torvalds 已提交
2149
{
A
Al Viro 已提交
2150
	return do_semtimedop(semid, tsops, nsops, NULL);
L
Linus Torvalds 已提交
2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165
}

/* If CLONE_SYSVSEM is set, establish sharing of SEM_UNDO state between
 * parent and child tasks.
 */

int copy_semundo(unsigned long clone_flags, struct task_struct *tsk)
{
	struct sem_undo_list *undo_list;
	int error;

	if (clone_flags & CLONE_SYSVSEM) {
		error = get_undo_list(&undo_list);
		if (error)
			return error;
2166
		refcount_inc(&undo_list->refcnt);
L
Linus Torvalds 已提交
2167
		tsk->sysvsem.undo_list = undo_list;
P
Paul McQuade 已提交
2168
	} else
L
Linus Torvalds 已提交
2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187
		tsk->sysvsem.undo_list = NULL;

	return 0;
}

/*
 * add semadj values to semaphores, free undo structures.
 * undo structures are not freed when semaphore arrays are destroyed
 * so some of them may be out of date.
 * IMPLEMENTATION NOTE: There is some confusion over whether the
 * set of adjustments that needs to be done should be done in an atomic
 * manner or not. That is, if we are attempting to decrement the semval
 * should we queue up and wait until we can do so legally?
 * The original implementation attempted to do this (queue and wait).
 * The current implementation does not do so. The POSIX standard
 * and SVID should be consulted to determine what behavior is mandated.
 */
void exit_sem(struct task_struct *tsk)
{
2188
	struct sem_undo_list *ulp;
L
Linus Torvalds 已提交
2189

2190 2191
	ulp = tsk->sysvsem.undo_list;
	if (!ulp)
L
Linus Torvalds 已提交
2192
		return;
2193
	tsk->sysvsem.undo_list = NULL;
L
Linus Torvalds 已提交
2194

2195
	if (!refcount_dec_and_test(&ulp->refcnt))
L
Linus Torvalds 已提交
2196 2197
		return;

2198
	for (;;) {
L
Linus Torvalds 已提交
2199
		struct sem_array *sma;
2200
		struct sem_undo *un;
2201
		int semid, i;
D
Davidlohr Bueso 已提交
2202
		DEFINE_WAKE_Q(wake_q);
2203

2204 2205
		cond_resched();

2206
		rcu_read_lock();
2207 2208
		un = list_entry_rcu(ulp->list_proc.next,
				    struct sem_undo, list_proc);
2209 2210 2211 2212 2213 2214 2215
		if (&un->list_proc == &ulp->list_proc) {
			/*
			 * We must wait for freeary() before freeing this ulp,
			 * in case we raced with last sem_undo. There is a small
			 * possibility where we exit while freeary() didn't
			 * finish unlocking sem_undo_list.
			 */
2216 2217
			spin_lock(&ulp->lock);
			spin_unlock(&ulp->lock);
2218 2219 2220 2221 2222 2223
			rcu_read_unlock();
			break;
		}
		spin_lock(&ulp->lock);
		semid = un->semid;
		spin_unlock(&ulp->lock);
2224

2225
		/* exit_sem raced with IPC_RMID, nothing to do */
2226 2227
		if (semid == -1) {
			rcu_read_unlock();
2228
			continue;
2229
		}
L
Linus Torvalds 已提交
2230

2231
		sma = sem_obtain_object_check(tsk->nsproxy->ipc_ns, semid);
2232
		/* exit_sem raced with IPC_RMID, nothing to do */
2233 2234
		if (IS_ERR(sma)) {
			rcu_read_unlock();
2235
			continue;
2236
		}
L
Linus Torvalds 已提交
2237

2238
		sem_lock(sma, NULL, -1);
2239
		/* exit_sem raced with IPC_RMID, nothing to do */
2240
		if (!ipc_valid_object(&sma->sem_perm)) {
2241 2242 2243 2244
			sem_unlock(sma, -1);
			rcu_read_unlock();
			continue;
		}
2245
		un = __lookup_undo(ulp, semid);
2246 2247 2248 2249
		if (un == NULL) {
			/* exit_sem raced with IPC_RMID+semget() that created
			 * exactly the same semid. Nothing to do.
			 */
2250
			sem_unlock(sma, -1);
2251
			rcu_read_unlock();
2252 2253 2254 2255
			continue;
		}

		/* remove un from the linked lists */
2256
		ipc_assert_locked_object(&sma->sem_perm);
2257 2258
		list_del(&un->list_id);

2259 2260 2261 2262
		/* we are the last process using this ulp, acquiring ulp->lock
		 * isn't required. Besides that, we are also protected against
		 * IPC_RMID as we hold sma->sem_perm lock now
		 */
2263 2264
		list_del_rcu(&un->list_proc);

2265 2266
		/* perform adjustments registered in un */
		for (i = 0; i < sma->sem_nsems; i++) {
2267
			struct sem *semaphore = &sma->sems[i];
2268 2269
			if (un->semadj[i]) {
				semaphore->semval += un->semadj[i];
L
Linus Torvalds 已提交
2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280
				/*
				 * Range checks of the new semaphore value,
				 * not defined by sus:
				 * - Some unices ignore the undo entirely
				 *   (e.g. HP UX 11i 11.22, Tru64 V5.1)
				 * - some cap the value (e.g. FreeBSD caps
				 *   at 0, but doesn't enforce SEMVMX)
				 *
				 * Linux caps the semaphore value, both at 0
				 * and at SEMVMX.
				 *
M
Manfred Spraul 已提交
2281
				 *	Manfred <manfred@colorfullife.com>
L
Linus Torvalds 已提交
2282
				 */
I
Ingo Molnar 已提交
2283 2284 2285 2286
				if (semaphore->semval < 0)
					semaphore->semval = 0;
				if (semaphore->semval > SEMVMX)
					semaphore->semval = SEMVMX;
2287
				semaphore->sempid = task_tgid_vnr(current);
L
Linus Torvalds 已提交
2288 2289 2290
			}
		}
		/* maybe some queued-up processes were waiting for this */
D
Davidlohr Bueso 已提交
2291
		do_smart_update(sma, NULL, 0, 1, &wake_q);
2292
		sem_unlock(sma, -1);
2293
		rcu_read_unlock();
D
Davidlohr Bueso 已提交
2294
		wake_up_q(&wake_q);
2295

2296
		kfree_rcu(un, rcu);
L
Linus Torvalds 已提交
2297
	}
2298
	kfree(ulp);
L
Linus Torvalds 已提交
2299 2300 2301
}

#ifdef CONFIG_PROC_FS
2302
static int sysvipc_sem_proc_show(struct seq_file *s, void *it)
L
Linus Torvalds 已提交
2303
{
2304
	struct user_namespace *user_ns = seq_user_ns(s);
2305 2306
	struct kern_ipc_perm *ipcp = it;
	struct sem_array *sma = container_of(ipcp, struct sem_array, sem_perm);
2307
	time64_t sem_otime;
2308

2309 2310 2311
	/*
	 * The proc interface isn't aware of sem_lock(), it calls
	 * ipc_lock_object() directly (in sysvipc_find_ipc).
2312 2313
	 * In order to stay compatible with sem_lock(), we must
	 * enter / leave complex_mode.
2314
	 */
2315
	complexmode_enter(sma);
2316

2317
	sem_otime = get_semotime(sma);
2318

2319
	seq_printf(s,
2320
		   "%10d %10d  %4o %10u %5u %5u %5u %5u %10llu %10llu\n",
2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331
		   sma->sem_perm.key,
		   sma->sem_perm.id,
		   sma->sem_perm.mode,
		   sma->sem_nsems,
		   from_kuid_munged(user_ns, sma->sem_perm.uid),
		   from_kgid_munged(user_ns, sma->sem_perm.gid),
		   from_kuid_munged(user_ns, sma->sem_perm.cuid),
		   from_kgid_munged(user_ns, sma->sem_perm.cgid),
		   sem_otime,
		   sma->sem_ctime);

2332 2333
	complexmode_tryleave(sma);

2334
	return 0;
L
Linus Torvalds 已提交
2335 2336
}
#endif