fcntl.c 21.3 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9
/*
 *  linux/fs/fcntl.c
 *
 *  Copyright (C) 1991, 1992  Linus Torvalds
 */

#include <linux/syscalls.h>
#include <linux/init.h>
#include <linux/mm.h>
10
#include <linux/sched/task.h>
L
Linus Torvalds 已提交
11 12
#include <linux/fs.h>
#include <linux/file.h>
A
Al Viro 已提交
13
#include <linux/fdtable.h>
14
#include <linux/capability.h>
L
Linus Torvalds 已提交
15 16 17
#include <linux/dnotify.h>
#include <linux/slab.h>
#include <linux/module.h>
18
#include <linux/pipe_fs_i.h>
L
Linus Torvalds 已提交
19 20
#include <linux/security.h>
#include <linux/ptrace.h>
21
#include <linux/signal.h>
22
#include <linux/rcupdate.h>
23
#include <linux/pid_namespace.h>
24
#include <linux/user_namespace.h>
D
David Herrmann 已提交
25
#include <linux/shmem_fs.h>
26
#include <linux/compat.h>
L
Linus Torvalds 已提交
27 28 29

#include <asm/poll.h>
#include <asm/siginfo.h>
30
#include <linux/uaccess.h>
L
Linus Torvalds 已提交
31

32
#define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME)
L
Linus Torvalds 已提交
33 34 35

static int setfl(int fd, struct file * filp, unsigned long arg)
{
A
Al Viro 已提交
36
	struct inode * inode = file_inode(filp);
L
Linus Torvalds 已提交
37 38
	int error = 0;

39 40 41 42 43
	/*
	 * O_APPEND cannot be cleared if the file is marked as append-only
	 * and the file is open for write.
	 */
	if (((arg ^ filp->f_flags) & O_APPEND) && IS_APPEND(inode))
L
Linus Torvalds 已提交
44 45 46 47
		return -EPERM;

	/* O_NOATIME can only be set by the owner or superuser */
	if ((arg & O_NOATIME) && !(filp->f_flags & O_NOATIME))
48
		if (!inode_owner_or_capable(inode))
L
Linus Torvalds 已提交
49 50 51 52 53 54 55
			return -EPERM;

	/* required for strict SunOS emulation */
	if (O_NONBLOCK != O_NDELAY)
	       if (arg & O_NDELAY)
		   arg |= O_NONBLOCK;

56
	/* Pipe packetized mode is controlled by O_DIRECT flag */
A
Al Viro 已提交
57
	if (!S_ISFIFO(inode->i_mode) && (arg & O_DIRECT)) {
L
Linus Torvalds 已提交
58 59 60 61 62
		if (!filp->f_mapping || !filp->f_mapping->a_ops ||
			!filp->f_mapping->a_ops->direct_IO)
				return -EINVAL;
	}

A
Al Viro 已提交
63
	if (filp->f_op->check_flags)
L
Linus Torvalds 已提交
64 65 66 67
		error = filp->f_op->check_flags(arg);
	if (error)
		return error;

68
	/*
69
	 * ->fasync() is responsible for setting the FASYNC bit.
70
	 */
A
Al Viro 已提交
71
	if (((arg ^ filp->f_flags) & FASYNC) && filp->f_op->fasync) {
72 73 74
		error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0);
		if (error < 0)
			goto out;
75 76
		if (error > 0)
			error = 0;
L
Linus Torvalds 已提交
77
	}
J
Jonathan Corbet 已提交
78
	spin_lock(&filp->f_lock);
L
Linus Torvalds 已提交
79
	filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK);
J
Jonathan Corbet 已提交
80
	spin_unlock(&filp->f_lock);
81

L
Linus Torvalds 已提交
82 83 84 85
 out:
	return error;
}

86
static void f_modown(struct file *filp, struct pid *pid, enum pid_type type,
87
                     int force)
L
Linus Torvalds 已提交
88
{
89
	write_lock_irq(&filp->f_owner.lock);
L
Linus Torvalds 已提交
90
	if (force || !filp->f_owner.pid) {
91 92 93
		put_pid(filp->f_owner.pid);
		filp->f_owner.pid = get_pid(pid);
		filp->f_owner.pid_type = type;
94 95 96 97 98 99

		if (pid) {
			const struct cred *cred = current_cred();
			filp->f_owner.uid = cred->uid;
			filp->f_owner.euid = cred->euid;
		}
L
Linus Torvalds 已提交
100
	}
101
	write_unlock_irq(&filp->f_owner.lock);
L
Linus Torvalds 已提交
102 103
}

104
void __f_setown(struct file *filp, struct pid *pid, enum pid_type type,
105
		int force)
L
Linus Torvalds 已提交
106
{
107
	security_file_set_fowner(filp);
108
	f_modown(filp, pid, type, force);
L
Linus Torvalds 已提交
109
}
110
EXPORT_SYMBOL(__f_setown);
L
Linus Torvalds 已提交
111

112
void f_setown(struct file *filp, unsigned long arg, int force)
113 114 115 116 117 118 119 120 121 122
{
	enum pid_type type;
	struct pid *pid;
	int who = arg;
	type = PIDTYPE_PID;
	if (who < 0) {
		type = PIDTYPE_PGID;
		who = -who;
	}
	rcu_read_lock();
123
	pid = find_vpid(who);
124
	__f_setown(filp, pid, type, force);
125 126
	rcu_read_unlock();
}
L
Linus Torvalds 已提交
127 128 129 130
EXPORT_SYMBOL(f_setown);

void f_delown(struct file *filp)
{
131
	f_modown(filp, NULL, PIDTYPE_PID, 1);
132 133 134 135 136
}

pid_t f_getown(struct file *filp)
{
	pid_t pid;
137
	read_lock(&filp->f_owner.lock);
138
	pid = pid_vnr(filp->f_owner.pid);
139 140
	if (filp->f_owner.pid_type == PIDTYPE_PGID)
		pid = -pid;
141
	read_unlock(&filp->f_owner.lock);
142
	return pid;
L
Linus Torvalds 已提交
143 144
}

P
Peter Zijlstra 已提交
145 146
static int f_setown_ex(struct file *filp, unsigned long arg)
{
A
Al Viro 已提交
147
	struct f_owner_ex __user *owner_p = (void __user *)arg;
P
Peter Zijlstra 已提交
148 149 150 151 152 153 154
	struct f_owner_ex owner;
	struct pid *pid;
	int type;
	int ret;

	ret = copy_from_user(&owner, owner_p, sizeof(owner));
	if (ret)
155
		return -EFAULT;
P
Peter Zijlstra 已提交
156 157 158 159 160 161 162 163 164 165

	switch (owner.type) {
	case F_OWNER_TID:
		type = PIDTYPE_MAX;
		break;

	case F_OWNER_PID:
		type = PIDTYPE_PID;
		break;

166
	case F_OWNER_PGRP:
P
Peter Zijlstra 已提交
167 168 169 170 171 172 173 174 175 176 177 178
		type = PIDTYPE_PGID;
		break;

	default:
		return -EINVAL;
	}

	rcu_read_lock();
	pid = find_vpid(owner.pid);
	if (owner.pid && !pid)
		ret = -ESRCH;
	else
179
		 __f_setown(filp, pid, type, 1);
P
Peter Zijlstra 已提交
180 181 182 183 184 185 186
	rcu_read_unlock();

	return ret;
}

static int f_getown_ex(struct file *filp, unsigned long arg)
{
A
Al Viro 已提交
187
	struct f_owner_ex __user *owner_p = (void __user *)arg;
P
Peter Zijlstra 已提交
188 189 190 191 192 193 194 195 196 197 198 199 200 201 202
	struct f_owner_ex owner;
	int ret = 0;

	read_lock(&filp->f_owner.lock);
	owner.pid = pid_vnr(filp->f_owner.pid);
	switch (filp->f_owner.pid_type) {
	case PIDTYPE_MAX:
		owner.type = F_OWNER_TID;
		break;

	case PIDTYPE_PID:
		owner.type = F_OWNER_PID;
		break;

	case PIDTYPE_PGID:
203
		owner.type = F_OWNER_PGRP;
P
Peter Zijlstra 已提交
204 205 206 207 208 209 210 211 212
		break;

	default:
		WARN_ON(1);
		ret = -EINVAL;
		break;
	}
	read_unlock(&filp->f_owner.lock);

213
	if (!ret) {
P
Peter Zijlstra 已提交
214
		ret = copy_to_user(owner_p, &owner, sizeof(owner));
215 216 217
		if (ret)
			ret = -EFAULT;
	}
P
Peter Zijlstra 已提交
218 219 220
	return ret;
}

221 222 223 224
#ifdef CONFIG_CHECKPOINT_RESTORE
static int f_getowner_uids(struct file *filp, unsigned long arg)
{
	struct user_namespace *user_ns = current_user_ns();
A
Al Viro 已提交
225
	uid_t __user *dst = (void __user *)arg;
226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245
	uid_t src[2];
	int err;

	read_lock(&filp->f_owner.lock);
	src[0] = from_kuid(user_ns, filp->f_owner.uid);
	src[1] = from_kuid(user_ns, filp->f_owner.euid);
	read_unlock(&filp->f_owner.lock);

	err  = put_user(src[0], &dst[0]);
	err |= put_user(src[1], &dst[1]);

	return err;
}
#else
static int f_getowner_uids(struct file *filp, unsigned long arg)
{
	return -EINVAL;
}
#endif

L
Linus Torvalds 已提交
246 247 248
static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
		struct file *filp)
{
249 250
	void __user *argp = (void __user *)arg;
	struct flock flock;
L
Linus Torvalds 已提交
251 252 253 254
	long err = -EINVAL;

	switch (cmd) {
	case F_DUPFD:
255 256
		err = f_dupfd(arg, filp, 0);
		break;
U
Ulrich Drepper 已提交
257
	case F_DUPFD_CLOEXEC:
A
Al Viro 已提交
258
		err = f_dupfd(arg, filp, O_CLOEXEC);
L
Linus Torvalds 已提交
259 260 261 262 263 264 265 266 267 268 269 270 271 272
		break;
	case F_GETFD:
		err = get_close_on_exec(fd) ? FD_CLOEXEC : 0;
		break;
	case F_SETFD:
		err = 0;
		set_close_on_exec(fd, arg & FD_CLOEXEC);
		break;
	case F_GETFL:
		err = filp->f_flags;
		break;
	case F_SETFL:
		err = setfl(fd, filp, arg);
		break;
273 274
#if BITS_PER_LONG != 32
	/* 32-bit arches must use fcntl64() */
275
	case F_OFD_GETLK:
276
#endif
L
Linus Torvalds 已提交
277
	case F_GETLK:
278 279 280 281 282
		if (copy_from_user(&flock, argp, sizeof(flock)))
			return -EFAULT;
		err = fcntl_getlk(filp, cmd, &flock);
		if (!err && copy_to_user(argp, &flock, sizeof(flock)))
			return -EFAULT;
L
Linus Torvalds 已提交
283
		break;
284 285
#if BITS_PER_LONG != 32
	/* 32-bit arches must use fcntl64() */
286 287
	case F_OFD_SETLK:
	case F_OFD_SETLKW:
288 289
#endif
		/* Fallthrough */
L
Linus Torvalds 已提交
290 291
	case F_SETLK:
	case F_SETLKW:
292 293 294
		if (copy_from_user(&flock, argp, sizeof(flock)))
			return -EFAULT;
		err = fcntl_setlk(fd, filp, cmd, &flock);
L
Linus Torvalds 已提交
295 296 297 298 299 300 301 302 303
		break;
	case F_GETOWN:
		/*
		 * XXX If f_owner is a process group, the
		 * negative return value will get converted
		 * into an error.  Oops.  If we keep the
		 * current syscall conventions, the only way
		 * to fix this will be in libc.
		 */
304
		err = f_getown(filp);
L
Linus Torvalds 已提交
305 306 307
		force_successful_syscall_return();
		break;
	case F_SETOWN:
308 309
		f_setown(filp, arg, 1);
		err = 0;
L
Linus Torvalds 已提交
310
		break;
P
Peter Zijlstra 已提交
311 312 313 314 315 316
	case F_GETOWN_EX:
		err = f_getown_ex(filp, arg);
		break;
	case F_SETOWN_EX:
		err = f_setown_ex(filp, arg);
		break;
317 318 319
	case F_GETOWNER_UIDS:
		err = f_getowner_uids(filp, arg);
		break;
L
Linus Torvalds 已提交
320 321 322 323 324
	case F_GETSIG:
		err = filp->f_owner.signum;
		break;
	case F_SETSIG:
		/* arg == 0 restores default behaviour. */
325
		if (!valid_signal(arg)) {
L
Linus Torvalds 已提交
326 327 328 329 330 331 332 333 334 335 336 337 338 339
			break;
		}
		err = 0;
		filp->f_owner.signum = arg;
		break;
	case F_GETLEASE:
		err = fcntl_getlease(filp);
		break;
	case F_SETLEASE:
		err = fcntl_setlease(fd, filp, arg);
		break;
	case F_NOTIFY:
		err = fcntl_dirnotify(fd, filp, arg);
		break;
340 341 342 343
	case F_SETPIPE_SZ:
	case F_GETPIPE_SZ:
		err = pipe_fcntl(filp, cmd, arg);
		break;
D
David Herrmann 已提交
344 345 346 347
	case F_ADD_SEALS:
	case F_GET_SEALS:
		err = shmem_fcntl(filp, cmd, arg);
		break;
L
Linus Torvalds 已提交
348 349 350 351 352 353
	default:
		break;
	}
	return err;
}

354 355 356 357 358 359 360 361 362 363 364 365 366
static int check_fcntl_cmd(unsigned cmd)
{
	switch (cmd) {
	case F_DUPFD:
	case F_DUPFD_CLOEXEC:
	case F_GETFD:
	case F_SETFD:
	case F_GETFL:
		return 1;
	}
	return 0;
}

367
SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg)
L
Linus Torvalds 已提交
368
{	
369
	struct fd f = fdget_raw(fd);
L
Linus Torvalds 已提交
370 371
	long err = -EBADF;

372
	if (!f.file)
L
Linus Torvalds 已提交
373 374
		goto out;

375
	if (unlikely(f.file->f_mode & FMODE_PATH)) {
376 377
		if (!check_fcntl_cmd(cmd))
			goto out1;
378 379
	}

380
	err = security_file_fcntl(f.file, cmd, arg);
381
	if (!err)
382
		err = do_fcntl(fd, cmd, arg, f.file);
L
Linus Torvalds 已提交
383

384
out1:
385
 	fdput(f);
L
Linus Torvalds 已提交
386 387 388 389 390
out:
	return err;
}

#if BITS_PER_LONG == 32
391 392
SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
		unsigned long, arg)
L
Linus Torvalds 已提交
393
{	
394
	void __user *argp = (void __user *)arg;
395
	struct fd f = fdget_raw(fd);
396
	struct flock64 flock;
397
	long err = -EBADF;
L
Linus Torvalds 已提交
398

399
	if (!f.file)
L
Linus Torvalds 已提交
400 401
		goto out;

402
	if (unlikely(f.file->f_mode & FMODE_PATH)) {
403 404
		if (!check_fcntl_cmd(cmd))
			goto out1;
405 406
	}

407
	err = security_file_fcntl(f.file, cmd, arg);
408 409
	if (err)
		goto out1;
L
Linus Torvalds 已提交
410 411
	
	switch (cmd) {
412
	case F_GETLK64:
413
	case F_OFD_GETLK:
414 415 416 417 418 419
		err = -EFAULT;
		if (copy_from_user(&flock, argp, sizeof(flock)))
			break;
		err = fcntl_getlk64(f.file, cmd, &flock);
		if (!err && copy_to_user(argp, &flock, sizeof(flock)))
			err = -EFAULT;
420 421 422
		break;
	case F_SETLK64:
	case F_SETLKW64:
423 424
	case F_OFD_SETLK:
	case F_OFD_SETLKW:
425 426 427 428
		err = -EFAULT;
		if (copy_from_user(&flock, argp, sizeof(flock)))
			break;
		err = fcntl_setlk64(fd, f.file, cmd, &flock);
429 430 431 432
		break;
	default:
		err = do_fcntl(fd, cmd, arg, f.file);
		break;
L
Linus Torvalds 已提交
433
	}
434
out1:
435
	fdput(f);
L
Linus Torvalds 已提交
436 437 438 439 440
out:
	return err;
}
#endif

441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508
#ifdef CONFIG_COMPAT
static int get_compat_flock(struct flock *kfl, struct compat_flock __user *ufl)
{
	if (!access_ok(VERIFY_READ, ufl, sizeof(*ufl)) ||
	    __get_user(kfl->l_type, &ufl->l_type) ||
	    __get_user(kfl->l_whence, &ufl->l_whence) ||
	    __get_user(kfl->l_start, &ufl->l_start) ||
	    __get_user(kfl->l_len, &ufl->l_len) ||
	    __get_user(kfl->l_pid, &ufl->l_pid))
		return -EFAULT;
	return 0;
}

static int put_compat_flock(struct flock *kfl, struct compat_flock __user *ufl)
{
	if (!access_ok(VERIFY_WRITE, ufl, sizeof(*ufl)) ||
	    __put_user(kfl->l_type, &ufl->l_type) ||
	    __put_user(kfl->l_whence, &ufl->l_whence) ||
	    __put_user(kfl->l_start, &ufl->l_start) ||
	    __put_user(kfl->l_len, &ufl->l_len) ||
	    __put_user(kfl->l_pid, &ufl->l_pid))
		return -EFAULT;
	return 0;
}

#ifndef HAVE_ARCH_GET_COMPAT_FLOCK64
static int get_compat_flock64(struct flock *kfl, struct compat_flock64 __user *ufl)
{
	if (!access_ok(VERIFY_READ, ufl, sizeof(*ufl)) ||
	    __get_user(kfl->l_type, &ufl->l_type) ||
	    __get_user(kfl->l_whence, &ufl->l_whence) ||
	    __get_user(kfl->l_start, &ufl->l_start) ||
	    __get_user(kfl->l_len, &ufl->l_len) ||
	    __get_user(kfl->l_pid, &ufl->l_pid))
		return -EFAULT;
	return 0;
}
#endif

#ifndef HAVE_ARCH_PUT_COMPAT_FLOCK64
static int put_compat_flock64(struct flock *kfl, struct compat_flock64 __user *ufl)
{
	if (!access_ok(VERIFY_WRITE, ufl, sizeof(*ufl)) ||
	    __put_user(kfl->l_type, &ufl->l_type) ||
	    __put_user(kfl->l_whence, &ufl->l_whence) ||
	    __put_user(kfl->l_start, &ufl->l_start) ||
	    __put_user(kfl->l_len, &ufl->l_len) ||
	    __put_user(kfl->l_pid, &ufl->l_pid))
		return -EFAULT;
	return 0;
}
#endif

static unsigned int
convert_fcntl_cmd(unsigned int cmd)
{
	switch (cmd) {
	case F_GETLK64:
		return F_GETLK;
	case F_SETLK64:
		return F_SETLK;
	case F_SETLKW64:
		return F_SETLKW;
	}

	return cmd;
}

509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526
/*
 * GETLK was successful and we need to return the data, but it needs to fit in
 * the compat structure.
 * l_start shouldn't be too big, unless the original start + end is greater than
 * COMPAT_OFF_T_MAX, in which case the app was asking for trouble, so we return
 * -EOVERFLOW in that case.  l_len could be too big, in which case we just
 * truncate it, and only allow the app to see that part of the conflicting lock
 * that might make sense to it anyway
 */
static int fixup_compat_flock(struct flock *flock)
{
	if (flock->l_start > COMPAT_OFF_T_MAX)
		return -EOVERFLOW;
	if (flock->l_len > COMPAT_OFF_T_MAX)
		flock->l_len = COMPAT_OFF_T_MAX;
	return 0;
}

527 528 529
COMPAT_SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
		       compat_ulong_t, arg)
{
530 531 532 533 534 535 536 537 538 539 540 541 542 543 544
	struct fd f = fdget_raw(fd);
	struct flock flock;
	long err = -EBADF;

	if (!f.file)
		return err;

	if (unlikely(f.file->f_mode & FMODE_PATH)) {
		if (!check_fcntl_cmd(cmd))
			goto out_put;
	}

	err = security_file_fcntl(f.file, cmd, arg);
	if (err)
		goto out_put;
545 546 547

	switch (cmd) {
	case F_GETLK:
548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571
		err = get_compat_flock(&flock, compat_ptr(arg));
		if (err)
			break;
		err = fcntl_getlk(f.file, convert_fcntl_cmd(cmd), &flock);
		if (err)
			break;
		err = fixup_compat_flock(&flock);
		if (err)
			return err;
		err = put_compat_flock(&flock, compat_ptr(arg));
		break;
	case F_GETLK64:
	case F_OFD_GETLK:
		err = get_compat_flock64(&flock, compat_ptr(arg));
		if (err)
			break;
		err = fcntl_getlk(f.file, convert_fcntl_cmd(cmd), &flock);
		if (err)
			break;
		err = fixup_compat_flock(&flock);
		if (err)
			return err;
		err = put_compat_flock64(&flock, compat_ptr(arg));
		break;
572 573
	case F_SETLK:
	case F_SETLKW:
574 575
		err = get_compat_flock(&flock, compat_ptr(arg));
		if (err)
576
			break;
577
		err = fcntl_setlk(fd, f.file, convert_fcntl_cmd(cmd), &flock);
578 579 580 581 582
		break;
	case F_SETLK64:
	case F_SETLKW64:
	case F_OFD_SETLK:
	case F_OFD_SETLKW:
583 584
		err = get_compat_flock64(&flock, compat_ptr(arg));
		if (err)
585
			break;
586
		err = fcntl_setlk(fd, f.file, convert_fcntl_cmd(cmd), &flock);
587 588
		break;
	default:
589
		err = do_fcntl(fd, cmd, arg, f.file);
590 591
		break;
	}
592 593 594
out_put:
	fdput(f);
	return err;
595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612
}

COMPAT_SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd,
		       compat_ulong_t, arg)
{
	switch (cmd) {
	case F_GETLK64:
	case F_SETLK64:
	case F_SETLKW64:
	case F_OFD_GETLK:
	case F_OFD_SETLK:
	case F_OFD_SETLKW:
		return -EINVAL;
	}
	return compat_sys_fcntl64(fd, cmd, arg);
}
#endif

L
Linus Torvalds 已提交
613 614
/* Table to convert sigio signal codes into poll band bitmaps */

615
static const long band_table[NSIGPOLL] = {
L
Linus Torvalds 已提交
616 617 618 619 620 621 622 623 624 625 626
	POLLIN | POLLRDNORM,			/* POLL_IN */
	POLLOUT | POLLWRNORM | POLLWRBAND,	/* POLL_OUT */
	POLLIN | POLLRDNORM | POLLMSG,		/* POLL_MSG */
	POLLERR,				/* POLL_ERR */
	POLLPRI | POLLRDBAND,			/* POLL_PRI */
	POLLHUP | POLLERR			/* POLL_HUP */
};

static inline int sigio_perm(struct task_struct *p,
                             struct fown_struct *fown, int sig)
{
627 628 629 630 631
	const struct cred *cred;
	int ret;

	rcu_read_lock();
	cred = __task_cred(p);
632 633 634
	ret = ((uid_eq(fown->euid, GLOBAL_ROOT_UID) ||
		uid_eq(fown->euid, cred->suid) || uid_eq(fown->euid, cred->uid) ||
		uid_eq(fown->uid,  cred->suid) || uid_eq(fown->uid,  cred->uid)) &&
635 636 637
	       !security_file_send_sigiotask(p, fown, sig));
	rcu_read_unlock();
	return ret;
L
Linus Torvalds 已提交
638 639 640
}

static void send_sigio_to_task(struct task_struct *p,
641
			       struct fown_struct *fown,
P
Peter Zijlstra 已提交
642
			       int fd, int reason, int group)
L
Linus Torvalds 已提交
643
{
644 645 646 647 648 649 650
	/*
	 * F_SETSIG can change ->signum lockless in parallel, make
	 * sure we read it once and use the same value throughout.
	 */
	int signum = ACCESS_ONCE(fown->signum);

	if (!sigio_perm(p, fown, signum))
L
Linus Torvalds 已提交
651 652
		return;

653
	switch (signum) {
L
Linus Torvalds 已提交
654 655 656 657 658 659 660 661
		siginfo_t si;
		default:
			/* Queue a rt signal with the appropriate fd as its
			   value.  We use SI_SIGIO as the source, not 
			   SI_KERNEL, since kernel signals always get 
			   delivered even if we can't queue.  Failure to
			   queue in this case _should_ be reported; we fall
			   back to SIGIO in that case. --sct */
662
			si.si_signo = signum;
L
Linus Torvalds 已提交
663 664 665 666 667
			si.si_errno = 0;
		        si.si_code  = reason;
			/* Make sure we are called with one of the POLL_*
			   reasons, otherwise we could leak kernel stack into
			   userspace.  */
668
			BUG_ON((reason & __SI_MASK) != __SI_POLL);
L
Linus Torvalds 已提交
669 670 671 672 673
			if (reason - POLL_IN >= NSIGPOLL)
				si.si_band  = ~0L;
			else
				si.si_band = band_table[reason - POLL_IN];
			si.si_fd    = fd;
P
Peter Zijlstra 已提交
674
			if (!do_send_sig_info(signum, &si, p, group))
L
Linus Torvalds 已提交
675 676 677
				break;
		/* fall-through: fall back on the old plain SIGIO signal */
		case 0:
P
Peter Zijlstra 已提交
678
			do_send_sig_info(SIGIO, SEND_SIG_PRIV, p, group);
L
Linus Torvalds 已提交
679 680 681 682 683 684
	}
}

void send_sigio(struct fown_struct *fown, int fd, int band)
{
	struct task_struct *p;
685 686
	enum pid_type type;
	struct pid *pid;
P
Peter Zijlstra 已提交
687
	int group = 1;
L
Linus Torvalds 已提交
688 689
	
	read_lock(&fown->lock);
P
Peter Zijlstra 已提交
690

691
	type = fown->pid_type;
P
Peter Zijlstra 已提交
692 693 694 695 696
	if (type == PIDTYPE_MAX) {
		group = 0;
		type = PIDTYPE_PID;
	}

L
Linus Torvalds 已提交
697 698 699 700 701
	pid = fown->pid;
	if (!pid)
		goto out_unlock_fown;
	
	read_lock(&tasklist_lock);
702
	do_each_pid_task(pid, type, p) {
P
Peter Zijlstra 已提交
703
		send_sigio_to_task(p, fown, fd, band, group);
704
	} while_each_pid_task(pid, type, p);
L
Linus Torvalds 已提交
705 706 707 708 709 710
	read_unlock(&tasklist_lock);
 out_unlock_fown:
	read_unlock(&fown->lock);
}

static void send_sigurg_to_task(struct task_struct *p,
P
Peter Zijlstra 已提交
711
				struct fown_struct *fown, int group)
L
Linus Torvalds 已提交
712 713
{
	if (sigio_perm(p, fown, SIGURG))
P
Peter Zijlstra 已提交
714
		do_send_sig_info(SIGURG, SEND_SIG_PRIV, p, group);
L
Linus Torvalds 已提交
715 716 717 718 719
}

int send_sigurg(struct fown_struct *fown)
{
	struct task_struct *p;
720 721
	enum pid_type type;
	struct pid *pid;
P
Peter Zijlstra 已提交
722
	int group = 1;
723
	int ret = 0;
L
Linus Torvalds 已提交
724 725
	
	read_lock(&fown->lock);
P
Peter Zijlstra 已提交
726

727
	type = fown->pid_type;
P
Peter Zijlstra 已提交
728 729 730 731 732
	if (type == PIDTYPE_MAX) {
		group = 0;
		type = PIDTYPE_PID;
	}

L
Linus Torvalds 已提交
733 734 735 736 737 738 739
	pid = fown->pid;
	if (!pid)
		goto out_unlock_fown;

	ret = 1;
	
	read_lock(&tasklist_lock);
740
	do_each_pid_task(pid, type, p) {
P
Peter Zijlstra 已提交
741
		send_sigurg_to_task(p, fown, group);
742
	} while_each_pid_task(pid, type, p);
L
Linus Torvalds 已提交
743 744 745 746 747 748
	read_unlock(&tasklist_lock);
 out_unlock_fown:
	read_unlock(&fown->lock);
	return ret;
}

749
static DEFINE_SPINLOCK(fasync_lock);
750
static struct kmem_cache *fasync_cache __read_mostly;
L
Linus Torvalds 已提交
751

752 753 754 755 756 757
static void fasync_free_rcu(struct rcu_head *head)
{
	kmem_cache_free(fasync_cache,
			container_of(head, struct fasync_struct, fa_rcu));
}

L
Linus Torvalds 已提交
758
/*
759 760 761 762 763 764 765
 * Remove a fasync entry. If successfully removed, return
 * positive and clear the FASYNC flag. If no entry exists,
 * do nothing and return 0.
 *
 * NOTE! It is very important that the FASYNC flag always
 * match the state "is the filp on a fasync list".
 *
L
Linus Torvalds 已提交
766
 */
767
int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp)
L
Linus Torvalds 已提交
768 769 770 771
{
	struct fasync_struct *fa, **fp;
	int result = 0;

772
	spin_lock(&filp->f_lock);
773
	spin_lock(&fasync_lock);
774 775 776
	for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
		if (fa->fa_file != filp)
			continue;
777 778 779 780 781

		spin_lock_irq(&fa->fa_lock);
		fa->fa_file = NULL;
		spin_unlock_irq(&fa->fa_lock);

782
		*fp = fa->fa_next;
783
		call_rcu(&fa->fa_rcu, fasync_free_rcu);
784 785 786
		filp->f_flags &= ~FASYNC;
		result = 1;
		break;
L
Linus Torvalds 已提交
787
	}
788
	spin_unlock(&fasync_lock);
789 790 791 792
	spin_unlock(&filp->f_lock);
	return result;
}

793 794 795 796 797
struct fasync_struct *fasync_alloc(void)
{
	return kmem_cache_alloc(fasync_cache, GFP_KERNEL);
}

798
/*
799 800 801
 * NOTE! This can be used only for unused fasync entries:
 * entries that actually got inserted on the fasync list
 * need to be released by rcu - see fasync_remove_entry.
802
 */
803
void fasync_free(struct fasync_struct *new)
804
{
805 806
	kmem_cache_free(fasync_cache, new);
}
807

808 809 810
/*
 * Insert a new entry into the fasync list.  Return the pointer to the
 * old one if we didn't use the new one.
811 812 813
 *
 * NOTE! It is very important that the FASYNC flag always
 * match the state "is the filp on a fasync list".
814 815 816 817
 */
struct fasync_struct *fasync_insert_entry(int fd, struct file *filp, struct fasync_struct **fapp, struct fasync_struct *new)
{
        struct fasync_struct *fa, **fp;
818 819

	spin_lock(&filp->f_lock);
820
	spin_lock(&fasync_lock);
L
Linus Torvalds 已提交
821
	for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
822 823
		if (fa->fa_file != filp)
			continue;
824 825

		spin_lock_irq(&fa->fa_lock);
826
		fa->fa_fd = fd;
827
		spin_unlock_irq(&fa->fa_lock);
828
		goto out;
L
Linus Torvalds 已提交
829 830
	}

831
	spin_lock_init(&new->fa_lock);
832 833 834 835
	new->magic = FASYNC_MAGIC;
	new->fa_file = filp;
	new->fa_fd = fd;
	new->fa_next = *fapp;
836
	rcu_assign_pointer(*fapp, new);
837 838
	filp->f_flags |= FASYNC;

L
Linus Torvalds 已提交
839
out:
840
	spin_unlock(&fasync_lock);
841
	spin_unlock(&filp->f_lock);
842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869
	return fa;
}

/*
 * Add a fasync entry. Return negative on error, positive if
 * added, and zero if did nothing but change an existing one.
 */
static int fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fapp)
{
	struct fasync_struct *new;

	new = fasync_alloc();
	if (!new)
		return -ENOMEM;

	/*
	 * fasync_insert_entry() returns the old (update) entry if
	 * it existed.
	 *
	 * So free the (unused) new entry and return 0 to let the
	 * caller know that we didn't add any new fasync entries.
	 */
	if (fasync_insert_entry(fd, filp, fapp, new)) {
		fasync_free(new);
		return 0;
	}

	return 1;
L
Linus Torvalds 已提交
870 871
}

872 873 874 875 876 877 878 879 880 881 882 883 884
/*
 * fasync_helper() is used by almost all character device drivers
 * to set up the fasync queue, and for regular files by the file
 * lease code. It returns negative on error, 0 if it did no changes
 * and positive if it added/deleted the entry.
 */
int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fapp)
{
	if (!on)
		return fasync_remove_entry(filp, fapp);
	return fasync_add_entry(fd, filp, fapp);
}

L
Linus Torvalds 已提交
885 886
EXPORT_SYMBOL(fasync_helper);

887 888 889 890
/*
 * rcu_read_lock() is held
 */
static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band)
L
Linus Torvalds 已提交
891 892
{
	while (fa) {
893
		struct fown_struct *fown;
894 895
		unsigned long flags;

L
Linus Torvalds 已提交
896 897 898 899 900
		if (fa->magic != FASYNC_MAGIC) {
			printk(KERN_ERR "kill_fasync: bad magic number in "
			       "fasync_struct!\n");
			return;
		}
901
		spin_lock_irqsave(&fa->fa_lock, flags);
902 903 904 905 906 907 908 909
		if (fa->fa_file) {
			fown = &fa->fa_file->f_owner;
			/* Don't send SIGURG to processes which have not set a
			   queued signum: SIGURG has its own default signalling
			   mechanism. */
			if (!(sig == SIGURG && fown->signum == 0))
				send_sigio(fown, fa->fa_fd, band);
		}
910
		spin_unlock_irqrestore(&fa->fa_lock, flags);
911
		fa = rcu_dereference(fa->fa_next);
L
Linus Torvalds 已提交
912 913 914 915 916 917 918 919 920
	}
}

void kill_fasync(struct fasync_struct **fp, int sig, int band)
{
	/* First a quick test without locking: usually
	 * the list is empty.
	 */
	if (*fp) {
921 922 923
		rcu_read_lock();
		kill_fasync_rcu(rcu_dereference(*fp), sig, band);
		rcu_read_unlock();
L
Linus Torvalds 已提交
924 925 926 927
	}
}
EXPORT_SYMBOL(kill_fasync);

928
static int __init fcntl_init(void)
L
Linus Torvalds 已提交
929
{
930 931 932 933 934
	/*
	 * Please add new bits here to ensure allocation uniqueness.
	 * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY
	 * is defined as O_NONBLOCK on some platforms and not on others.
	 */
C
Christoph Hellwig 已提交
935 936 937 938
	BUILD_BUG_ON(21 - 1 /* for O_RDONLY being 0 */ !=
		HWEIGHT32(
			(VALID_OPEN_FLAGS & ~(O_NONBLOCK | O_NDELAY)) |
			__FMODE_EXEC | __FMODE_NONOTIFY));
939

L
Linus Torvalds 已提交
940
	fasync_cache = kmem_cache_create("fasync_cache",
941
		sizeof(struct fasync_struct), 0, SLAB_PANIC, NULL);
L
Linus Torvalds 已提交
942 943 944
	return 0;
}

945
module_init(fcntl_init)