fanotify_user.c 22.1 KB
Newer Older
1
#include <linux/fanotify.h>
2
#include <linux/fcntl.h>
3
#include <linux/file.h>
4
#include <linux/fs.h>
5
#include <linux/anon_inodes.h>
6
#include <linux/fsnotify_backend.h>
7
#include <linux/init.h>
E
Eric Paris 已提交
8
#include <linux/mount.h>
9
#include <linux/namei.h>
E
Eric Paris 已提交
10
#include <linux/poll.h>
11 12
#include <linux/security.h>
#include <linux/syscalls.h>
T
Tejun Heo 已提交
13
#include <linux/slab.h>
14
#include <linux/types.h>
E
Eric Paris 已提交
15
#include <linux/uaccess.h>
16
#include <linux/compat.h>
E
Eric Paris 已提交
17 18

#include <asm/ioctls.h>
19

20
#include "../../mount.h"
21
#include "../fdinfo.h"
22
#include "fanotify.h"
23

24
#define FANOTIFY_DEFAULT_MAX_EVENTS	16384
25
#define FANOTIFY_DEFAULT_MAX_MARKS	8192
26
#define FANOTIFY_DEFAULT_MAX_LISTENERS	128
27

28
extern const struct fsnotify_ops fanotify_fsnotify_ops;
29

30
static struct kmem_cache *fanotify_mark_cache __read_mostly;
31
static struct kmem_cache *fanotify_response_event_cache __read_mostly;
32
struct kmem_cache *fanotify_event_cachep __read_mostly;
33 34 35 36

struct fanotify_response_event {
	struct list_head list;
	__s32 fd;
37
	struct fanotify_event_info *event;
38
};
39

E
Eric Paris 已提交
40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
/*
 * Get an fsnotify notification event if one exists and is small
 * enough to fit in "count". Return an error pointer if the count
 * is not large enough.
 *
 * Called with the group->notification_mutex held.
 */
static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
					    size_t count)
{
	BUG_ON(!mutex_is_locked(&group->notification_mutex));

	pr_debug("%s: group=%p count=%zd\n", __func__, group, count);

	if (fsnotify_notify_queue_is_empty(group))
		return NULL;

	if (FAN_EVENT_METADATA_LEN > count)
		return ERR_PTR(-EINVAL);

	/* held the notification_mutex the whole time, so this is the
	 * same event we peeked above */
	return fsnotify_remove_notify_event(group);
}

65
static int create_fd(struct fsnotify_group *group,
66 67
		     struct fanotify_event_info *event,
		     struct file **file)
E
Eric Paris 已提交
68 69 70 71
{
	int client_fd;
	struct file *new_file;

72
	pr_debug("%s: group=%p event=%p\n", __func__, group, event);
E
Eric Paris 已提交
73 74 75 76 77 78 79 80 81 82 83

	client_fd = get_unused_fd();
	if (client_fd < 0)
		return client_fd;

	/*
	 * we need a new file handle for the userspace program so it can read even if it was
	 * originally opened O_WRONLY.
	 */
	/* it's possible this event was an overflow event.  in that case dentry and mnt
	 * are NULL;  That's fine, just don't call dentry open */
84 85
	if (event->path.dentry && event->path.mnt)
		new_file = dentry_open(&event->path,
86
				       group->fanotify_data.f_flags | FMODE_NONOTIFY,
E
Eric Paris 已提交
87 88 89 90 91 92 93 94 95 96 97 98 99 100
				       current_cred());
	else
		new_file = ERR_PTR(-EOVERFLOW);
	if (IS_ERR(new_file)) {
		/*
		 * we still send an event even if we can't open the file.  this
		 * can happen when say tasks are gone and we try to open their
		 * /proc files or we try to open a WRONLY file like in sysfs
		 * we just send the errno to userspace since there isn't much
		 * else we can do.
		 */
		put_unused_fd(client_fd);
		client_fd = PTR_ERR(new_file);
	} else {
101
		*file = new_file;
E
Eric Paris 已提交
102 103
	}

104
	return client_fd;
E
Eric Paris 已提交
105 106
}

107
static int fill_event_metadata(struct fsnotify_group *group,
108 109 110
			       struct fanotify_event_metadata *metadata,
			       struct fsnotify_event *fsn_event,
			       struct file **file)
E
Eric Paris 已提交
111
{
112
	int ret = 0;
113
	struct fanotify_event_info *event;
114

E
Eric Paris 已提交
115
	pr_debug("%s: group=%p metadata=%p event=%p\n", __func__,
116
		 group, metadata, fsn_event);
E
Eric Paris 已提交
117

118
	*file = NULL;
119
	event = container_of(fsn_event, struct fanotify_event_info, fse);
E
Eric Paris 已提交
120
	metadata->event_len = FAN_EVENT_METADATA_LEN;
121
	metadata->metadata_len = FAN_EVENT_METADATA_LEN;
E
Eric Paris 已提交
122
	metadata->vers = FANOTIFY_METADATA_VERSION;
123
	metadata->reserved = 0;
124
	metadata->mask = fsn_event->mask & FAN_ALL_OUTGOING_EVENTS;
125
	metadata->pid = pid_vnr(event->tgid);
126
	if (unlikely(fsn_event->mask & FAN_Q_OVERFLOW))
127 128
		metadata->fd = FAN_NOFD;
	else {
129
		metadata->fd = create_fd(group, event, file);
130 131 132
		if (metadata->fd < 0)
			ret = metadata->fd;
	}
E
Eric Paris 已提交
133

134
	return ret;
E
Eric Paris 已提交
135 136
}

137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
static struct fanotify_response_event *dequeue_re(struct fsnotify_group *group,
						  __s32 fd)
{
	struct fanotify_response_event *re, *return_re = NULL;

	mutex_lock(&group->fanotify_data.access_mutex);
	list_for_each_entry(re, &group->fanotify_data.access_list, list) {
		if (re->fd != fd)
			continue;

		list_del_init(&re->list);
		return_re = re;
		break;
	}
	mutex_unlock(&group->fanotify_data.access_mutex);

	pr_debug("%s: found return_re=%p\n", __func__, return_re);

	return return_re;
}

static int process_access_response(struct fsnotify_group *group,
				   struct fanotify_response *response_struct)
{
	struct fanotify_response_event *re;
	__s32 fd = response_struct->fd;
	__u32 response = response_struct->response;

	pr_debug("%s: group=%p fd=%d response=%d\n", __func__, group,
		 fd, response);
	/*
	 * make sure the response is valid, if invalid we do nothing and either
L
Lucas De Marchi 已提交
170
	 * userspace can send a valid response or we will clean it up after the
171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209
	 * timeout
	 */
	switch (response) {
	case FAN_ALLOW:
	case FAN_DENY:
		break;
	default:
		return -EINVAL;
	}

	if (fd < 0)
		return -EINVAL;

	re = dequeue_re(group, fd);
	if (!re)
		return -ENOENT;

	re->event->response = response;

	wake_up(&group->fanotify_data.access_waitq);

	kmem_cache_free(fanotify_response_event_cache, re);

	return 0;
}

static int prepare_for_access_response(struct fsnotify_group *group,
				       struct fsnotify_event *event,
				       __s32 fd)
{
	struct fanotify_response_event *re;

	if (!(event->mask & FAN_ALL_PERM_EVENTS))
		return 0;

	re = kmem_cache_alloc(fanotify_response_event_cache, GFP_KERNEL);
	if (!re)
		return -ENOMEM;

210
	re->event = FANOTIFY_E(event);
211 212 213
	re->fd = fd;

	mutex_lock(&group->fanotify_data.access_mutex);
214

215
	if (atomic_read(&group->fanotify_data.bypass_perm)) {
216 217
		mutex_unlock(&group->fanotify_data.access_mutex);
		kmem_cache_free(fanotify_response_event_cache, re);
218
		FANOTIFY_E(event)->response = FAN_ALLOW;
219 220 221
		return 0;
	}
		
222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237
	list_add_tail(&re->list, &group->fanotify_data.access_list);
	mutex_unlock(&group->fanotify_data.access_mutex);

	return 0;
}

#else
static int prepare_for_access_response(struct fsnotify_group *group,
				       struct fsnotify_event *event,
				       __s32 fd)
{
	return 0;
}

#endif

E
Eric Paris 已提交
238 239 240 241 242
static ssize_t copy_event_to_user(struct fsnotify_group *group,
				  struct fsnotify_event *event,
				  char __user *buf)
{
	struct fanotify_event_metadata fanotify_event_metadata;
243
	struct file *f;
244
	int fd, ret;
E
Eric Paris 已提交
245 246 247

	pr_debug("%s: group=%p event=%p\n", __func__, group, event);

248
	ret = fill_event_metadata(group, &fanotify_event_metadata, event, &f);
249 250
	if (ret < 0)
		goto out;
251

252
	fd = fanotify_event_metadata.fd;
253
	ret = -EFAULT;
254 255
	if (copy_to_user(buf, &fanotify_event_metadata,
			 fanotify_event_metadata.event_len))
256 257 258 259 260
		goto out_close_fd;

	ret = prepare_for_access_response(group, event, fd);
	if (ret)
		goto out_close_fd;
E
Eric Paris 已提交
261

262 263
	if (fd != FAN_NOFD)
		fd_install(fd, f);
264
	return fanotify_event_metadata.event_len;
265 266

out_close_fd:
267 268 269 270
	if (fd != FAN_NOFD) {
		put_unused_fd(fd);
		fput(f);
	}
271 272 273
out:
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
	if (event->mask & FAN_ALL_PERM_EVENTS) {
274
		FANOTIFY_E(event)->response = FAN_DENY;
275 276 277
		wake_up(&group->fanotify_data.access_waitq);
	}
#endif
278
	return ret;
E
Eric Paris 已提交
279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321
}

/* intofiy userspace file descriptor functions */
static unsigned int fanotify_poll(struct file *file, poll_table *wait)
{
	struct fsnotify_group *group = file->private_data;
	int ret = 0;

	poll_wait(file, &group->notification_waitq, wait);
	mutex_lock(&group->notification_mutex);
	if (!fsnotify_notify_queue_is_empty(group))
		ret = POLLIN | POLLRDNORM;
	mutex_unlock(&group->notification_mutex);

	return ret;
}

static ssize_t fanotify_read(struct file *file, char __user *buf,
			     size_t count, loff_t *pos)
{
	struct fsnotify_group *group;
	struct fsnotify_event *kevent;
	char __user *start;
	int ret;
	DEFINE_WAIT(wait);

	start = buf;
	group = file->private_data;

	pr_debug("%s: group=%p\n", __func__, group);

	while (1) {
		prepare_to_wait(&group->notification_waitq, &wait, TASK_INTERRUPTIBLE);

		mutex_lock(&group->notification_mutex);
		kevent = get_one_event(group, count);
		mutex_unlock(&group->notification_mutex);

		if (kevent) {
			ret = PTR_ERR(kevent);
			if (IS_ERR(kevent))
				break;
			ret = copy_event_to_user(group, kevent, buf);
322 323 324 325 326 327
			/*
			 * Permission events get destroyed after we
			 * receive response
			 */
			if (!(kevent->mask & FAN_ALL_PERM_EVENTS))
				fsnotify_destroy_event(group, kevent);
E
Eric Paris 已提交
328 329 330 331 332 333 334 335 336 337
			if (ret < 0)
				break;
			buf += ret;
			count -= ret;
			continue;
		}

		ret = -EAGAIN;
		if (file->f_flags & O_NONBLOCK)
			break;
338
		ret = -ERESTARTSYS;
E
Eric Paris 已提交
339 340 341 342 343 344 345 346 347 348 349 350 351 352 353
		if (signal_pending(current))
			break;

		if (start != buf)
			break;

		schedule();
	}

	finish_wait(&group->notification_waitq, &wait);
	if (start != buf && ret != -EFAULT)
		ret = buf - start;
	return ret;
}

354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380
static ssize_t fanotify_write(struct file *file, const char __user *buf, size_t count, loff_t *pos)
{
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
	struct fanotify_response response = { .fd = -1, .response = -1 };
	struct fsnotify_group *group;
	int ret;

	group = file->private_data;

	if (count > sizeof(response))
		count = sizeof(response);

	pr_debug("%s: group=%p count=%zu\n", __func__, group, count);

	if (copy_from_user(&response, buf, count))
		return -EFAULT;

	ret = process_access_response(group, &response);
	if (ret < 0)
		count = ret;

	return count;
#else
	return -EINVAL;
#endif
}

381 382 383 384
static int fanotify_release(struct inode *ignored, struct file *file)
{
	struct fsnotify_group *group = file->private_data;

385
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
386 387
	struct fanotify_response_event *re, *lre;

388 389
	mutex_lock(&group->fanotify_data.access_mutex);

390
	atomic_inc(&group->fanotify_data.bypass_perm);
391 392 393 394 395 396 397 398 399 400 401 402 403 404

	list_for_each_entry_safe(re, lre, &group->fanotify_data.access_list, list) {
		pr_debug("%s: found group=%p re=%p event=%p\n", __func__, group,
			 re, re->event);

		list_del_init(&re->list);
		re->event->response = FAN_ALLOW;

		kmem_cache_free(fanotify_response_event_cache, re);
	}
	mutex_unlock(&group->fanotify_data.access_mutex);

	wake_up(&group->fanotify_data.access_waitq);
#endif
405

406
	/* matches the fanotify_init->fsnotify_alloc_group */
407
	fsnotify_destroy_group(group);
408 409 410 411

	return 0;
}

E
Eric Paris 已提交
412 413 414
static long fanotify_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
	struct fsnotify_group *group;
415
	struct fsnotify_event *fsn_event;
E
Eric Paris 已提交
416 417 418 419 420 421 422 423 424 425 426
	void __user *p;
	int ret = -ENOTTY;
	size_t send_len = 0;

	group = file->private_data;

	p = (void __user *) arg;

	switch (cmd) {
	case FIONREAD:
		mutex_lock(&group->notification_mutex);
427
		list_for_each_entry(fsn_event, &group->notification_list, list)
E
Eric Paris 已提交
428 429 430 431 432 433 434 435 436
			send_len += FAN_EVENT_METADATA_LEN;
		mutex_unlock(&group->notification_mutex);
		ret = put_user(send_len, (int __user *) p);
		break;
	}

	return ret;
}

437
static const struct file_operations fanotify_fops = {
438
	.show_fdinfo	= fanotify_show_fdinfo,
E
Eric Paris 已提交
439 440
	.poll		= fanotify_poll,
	.read		= fanotify_read,
441
	.write		= fanotify_write,
442 443
	.fasync		= NULL,
	.release	= fanotify_release,
E
Eric Paris 已提交
444 445
	.unlocked_ioctl	= fanotify_ioctl,
	.compat_ioctl	= fanotify_ioctl,
446
	.llseek		= noop_llseek,
447 448
};

449 450 451 452 453 454 455 456 457 458 459 460 461 462
static void fanotify_free_mark(struct fsnotify_mark *fsn_mark)
{
	kmem_cache_free(fanotify_mark_cache, fsn_mark);
}

static int fanotify_find_path(int dfd, const char __user *filename,
			      struct path *path, unsigned int flags)
{
	int ret;

	pr_debug("%s: dfd=%d filename=%p flags=%x\n", __func__,
		 dfd, filename, flags);

	if (filename == NULL) {
463
		struct fd f = fdget(dfd);
464 465

		ret = -EBADF;
466
		if (!f.file)
467 468 469 470
			goto out;

		ret = -ENOTDIR;
		if ((flags & FAN_MARK_ONLYDIR) &&
A
Al Viro 已提交
471
		    !(S_ISDIR(file_inode(f.file)->i_mode))) {
472
			fdput(f);
473 474 475
			goto out;
		}

476
		*path = f.file->f_path;
477
		path_get(path);
478
		fdput(f);
479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499
	} else {
		unsigned int lookup_flags = 0;

		if (!(flags & FAN_MARK_DONT_FOLLOW))
			lookup_flags |= LOOKUP_FOLLOW;
		if (flags & FAN_MARK_ONLYDIR)
			lookup_flags |= LOOKUP_DIRECTORY;

		ret = user_path_at(dfd, filename, lookup_flags, path);
		if (ret)
			goto out;
	}

	/* you can only watch an inode if you have read permissions on it */
	ret = inode_permission(path->dentry->d_inode, MAY_READ);
	if (ret)
		path_put(path);
out:
	return ret;
}

500 501
static __u32 fanotify_mark_remove_from_mask(struct fsnotify_mark *fsn_mark,
					    __u32 mask,
502 503
					    unsigned int flags,
					    int *destroy)
504 505 506 507
{
	__u32 oldmask;

	spin_lock(&fsn_mark->lock);
508 509 510 511 512 513 514
	if (!(flags & FAN_MARK_IGNORED_MASK)) {
		oldmask = fsn_mark->mask;
		fsnotify_set_mark_mask_locked(fsn_mark, (oldmask & ~mask));
	} else {
		oldmask = fsn_mark->ignored_mask;
		fsnotify_set_mark_ignored_mask_locked(fsn_mark, (oldmask & ~mask));
	}
515 516
	spin_unlock(&fsn_mark->lock);

517
	*destroy = !(oldmask & ~mask);
518 519 520 521

	return mask & oldmask;
}

522
static int fanotify_remove_vfsmount_mark(struct fsnotify_group *group,
523 524
					 struct vfsmount *mnt, __u32 mask,
					 unsigned int flags)
525 526
{
	struct fsnotify_mark *fsn_mark = NULL;
527
	__u32 removed;
528
	int destroy_mark;
529

530
	mutex_lock(&group->mark_mutex);
531
	fsn_mark = fsnotify_find_vfsmount_mark(group, mnt);
532 533
	if (!fsn_mark) {
		mutex_unlock(&group->mark_mutex);
534
		return -ENOENT;
535
	}
536

537 538 539
	removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags,
						 &destroy_mark);
	if (destroy_mark)
540 541
		fsnotify_destroy_mark_locked(fsn_mark, group);
	mutex_unlock(&group->mark_mutex);
542

543
	fsnotify_put_mark(fsn_mark);
544
	if (removed & real_mount(mnt)->mnt_fsnotify_mask)
545 546 547 548
		fsnotify_recalc_vfsmount_mask(mnt);

	return 0;
}
549

550
static int fanotify_remove_inode_mark(struct fsnotify_group *group,
551 552
				      struct inode *inode, __u32 mask,
				      unsigned int flags)
553 554 555
{
	struct fsnotify_mark *fsn_mark = NULL;
	__u32 removed;
556
	int destroy_mark;
557

558
	mutex_lock(&group->mark_mutex);
559
	fsn_mark = fsnotify_find_inode_mark(group, inode);
560 561
	if (!fsn_mark) {
		mutex_unlock(&group->mark_mutex);
562
		return -ENOENT;
563
	}
564

565 566 567
	removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags,
						 &destroy_mark);
	if (destroy_mark)
568 569 570
		fsnotify_destroy_mark_locked(fsn_mark, group);
	mutex_unlock(&group->mark_mutex);

571
	/* matches the fsnotify_find_inode_mark() */
572
	fsnotify_put_mark(fsn_mark);
573 574
	if (removed & inode->i_fsnotify_mask)
		fsnotify_recalc_inode_mask(inode);
575

576 577 578
	return 0;
}

579 580 581
static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark,
				       __u32 mask,
				       unsigned int flags)
582
{
583
	__u32 oldmask = -1;
584 585

	spin_lock(&fsn_mark->lock);
586 587 588 589
	if (!(flags & FAN_MARK_IGNORED_MASK)) {
		oldmask = fsn_mark->mask;
		fsnotify_set_mark_mask_locked(fsn_mark, (oldmask | mask));
	} else {
590 591
		__u32 tmask = fsn_mark->ignored_mask | mask;
		fsnotify_set_mark_ignored_mask_locked(fsn_mark, tmask);
592 593
		if (flags & FAN_MARK_IGNORED_SURV_MODIFY)
			fsn_mark->flags |= FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY;
594
	}
595 596 597 598 599 600

	if (!(flags & FAN_MARK_ONDIR)) {
		__u32 tmask = fsn_mark->ignored_mask | FAN_ONDIR;
		fsnotify_set_mark_ignored_mask_locked(fsn_mark, tmask);
	}

601 602 603 604 605
	spin_unlock(&fsn_mark->lock);

	return mask & ~oldmask;
}

606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630
static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group,
						   struct inode *inode,
						   struct vfsmount *mnt)
{
	struct fsnotify_mark *mark;
	int ret;

	if (atomic_read(&group->num_marks) > group->fanotify_data.max_marks)
		return ERR_PTR(-ENOSPC);

	mark = kmem_cache_alloc(fanotify_mark_cache, GFP_KERNEL);
	if (!mark)
		return ERR_PTR(-ENOMEM);

	fsnotify_init_mark(mark, fanotify_free_mark);
	ret = fsnotify_add_mark_locked(mark, group, inode, mnt, 0);
	if (ret) {
		fsnotify_put_mark(mark);
		return ERR_PTR(ret);
	}

	return mark;
}


631
static int fanotify_add_vfsmount_mark(struct fsnotify_group *group,
632 633
				      struct vfsmount *mnt, __u32 mask,
				      unsigned int flags)
634 635
{
	struct fsnotify_mark *fsn_mark;
636
	__u32 added;
637

638
	mutex_lock(&group->mark_mutex);
639 640
	fsn_mark = fsnotify_find_vfsmount_mark(group, mnt);
	if (!fsn_mark) {
641 642
		fsn_mark = fanotify_add_new_mark(group, NULL, mnt);
		if (IS_ERR(fsn_mark)) {
643
			mutex_unlock(&group->mark_mutex);
644
			return PTR_ERR(fsn_mark);
645
		}
646
	}
647
	added = fanotify_mark_add_to_mask(fsn_mark, mask, flags);
648
	mutex_unlock(&group->mark_mutex);
649

650
	if (added & ~real_mount(mnt)->mnt_fsnotify_mask)
E
Eric Paris 已提交
651
		fsnotify_recalc_vfsmount_mask(mnt);
652

653
	fsnotify_put_mark(fsn_mark);
654
	return 0;
655 656
}

657
static int fanotify_add_inode_mark(struct fsnotify_group *group,
658 659
				   struct inode *inode, __u32 mask,
				   unsigned int flags)
660 661
{
	struct fsnotify_mark *fsn_mark;
662
	__u32 added;
663 664

	pr_debug("%s: group=%p inode=%p\n", __func__, group, inode);
665

666 667 668 669 670 671 672 673 674 675
	/*
	 * If some other task has this inode open for write we should not add
	 * an ignored mark, unless that ignored mark is supposed to survive
	 * modification changes anyway.
	 */
	if ((flags & FAN_MARK_IGNORED_MASK) &&
	    !(flags & FAN_MARK_IGNORED_SURV_MODIFY) &&
	    (atomic_read(&inode->i_writecount) > 0))
		return 0;

676
	mutex_lock(&group->mark_mutex);
677
	fsn_mark = fsnotify_find_inode_mark(group, inode);
678
	if (!fsn_mark) {
679 680
		fsn_mark = fanotify_add_new_mark(group, inode, NULL);
		if (IS_ERR(fsn_mark)) {
681
			mutex_unlock(&group->mark_mutex);
682
			return PTR_ERR(fsn_mark);
683
		}
684
	}
685
	added = fanotify_mark_add_to_mask(fsn_mark, mask, flags);
686
	mutex_unlock(&group->mark_mutex);
687

E
Eric Paris 已提交
688 689
	if (added & ~inode->i_fsnotify_mask)
		fsnotify_recalc_inode_mask(inode);
690

691
	fsnotify_put_mark(fsn_mark);
692
	return 0;
693
}
694

695
/* fanotify syscalls */
696
SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
697
{
698 699
	struct fsnotify_group *group;
	int f_flags, fd;
700
	struct user_struct *user;
701

702 703
	pr_debug("%s: flags=%d event_f_flags=%d\n",
		__func__, flags, event_f_flags);
704 705

	if (!capable(CAP_SYS_ADMIN))
706
		return -EPERM;
707 708 709 710

	if (flags & ~FAN_ALL_INIT_FLAGS)
		return -EINVAL;

711 712 713 714 715 716
	user = get_current_user();
	if (atomic_read(&user->fanotify_listeners) > FANOTIFY_DEFAULT_MAX_LISTENERS) {
		free_uid(user);
		return -EMFILE;
	}

717
	f_flags = O_RDWR | FMODE_NONOTIFY;
718 719 720 721 722 723 724
	if (flags & FAN_CLOEXEC)
		f_flags |= O_CLOEXEC;
	if (flags & FAN_NONBLOCK)
		f_flags |= O_NONBLOCK;

	/* fsnotify_alloc_group takes a ref.  Dropped in fanotify_release */
	group = fsnotify_alloc_group(&fanotify_fsnotify_ops);
725 726
	if (IS_ERR(group)) {
		free_uid(user);
727
		return PTR_ERR(group);
728
	}
729

730 731 732
	group->fanotify_data.user = user;
	atomic_inc(&user->fanotify_listeners);

733
	group->fanotify_data.f_flags = event_f_flags;
E
Eric Paris 已提交
734 735 736 737
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
	mutex_init(&group->fanotify_data.access_mutex);
	init_waitqueue_head(&group->fanotify_data.access_waitq);
	INIT_LIST_HEAD(&group->fanotify_data.access_list);
738
	atomic_set(&group->fanotify_data.bypass_perm, 0);
E
Eric Paris 已提交
739
#endif
740 741 742 743 744 745 746 747 748 749 750 751
	switch (flags & FAN_ALL_CLASS_BITS) {
	case FAN_CLASS_NOTIF:
		group->priority = FS_PRIO_0;
		break;
	case FAN_CLASS_CONTENT:
		group->priority = FS_PRIO_1;
		break;
	case FAN_CLASS_PRE_CONTENT:
		group->priority = FS_PRIO_2;
		break;
	default:
		fd = -EINVAL;
752
		goto out_destroy_group;
753
	}
E
Eric Paris 已提交
754

755 756 757
	if (flags & FAN_UNLIMITED_QUEUE) {
		fd = -EPERM;
		if (!capable(CAP_SYS_ADMIN))
758
			goto out_destroy_group;
759 760 761 762
		group->max_events = UINT_MAX;
	} else {
		group->max_events = FANOTIFY_DEFAULT_MAX_EVENTS;
	}
763

764 765 766
	if (flags & FAN_UNLIMITED_MARKS) {
		fd = -EPERM;
		if (!capable(CAP_SYS_ADMIN))
767
			goto out_destroy_group;
768 769 770 771
		group->fanotify_data.max_marks = UINT_MAX;
	} else {
		group->fanotify_data.max_marks = FANOTIFY_DEFAULT_MAX_MARKS;
	}
772

773 774
	fd = anon_inode_getfd("[fanotify]", &fanotify_fops, group, f_flags);
	if (fd < 0)
775
		goto out_destroy_group;
776 777 778

	return fd;

779 780
out_destroy_group:
	fsnotify_destroy_group(group);
781
	return fd;
782
}
783

784 785 786
SYSCALL_DEFINE5(fanotify_mark, int, fanotify_fd, unsigned int, flags,
			      __u64, mask, int, dfd,
			      const char  __user *, pathname)
787
{
788 789
	struct inode *inode = NULL;
	struct vfsmount *mnt = NULL;
790
	struct fsnotify_group *group;
791
	struct fd f;
792
	struct path path;
793
	int ret;
794 795 796 797 798 799 800 801

	pr_debug("%s: fanotify_fd=%d flags=%x dfd=%d pathname=%p mask=%llx\n",
		 __func__, fanotify_fd, flags, dfd, pathname, mask);

	/* we only use the lower 32 bits as of right now. */
	if (mask & ((__u64)0xffffffff << 32))
		return -EINVAL;

802 803
	if (flags & ~FAN_ALL_MARK_FLAGS)
		return -EINVAL;
E
Eric Paris 已提交
804
	switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE | FAN_MARK_FLUSH)) {
805
	case FAN_MARK_ADD:		/* fallthrough */
806
	case FAN_MARK_REMOVE:
807 808
		if (!mask)
			return -EINVAL;
E
Eric Paris 已提交
809
	case FAN_MARK_FLUSH:
810 811 812 813
		break;
	default:
		return -EINVAL;
	}
814 815 816 817 818 819

	if (mask & FAN_ONDIR) {
		flags |= FAN_MARK_ONDIR;
		mask &= ~FAN_ONDIR;
	}

820 821 822
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
	if (mask & ~(FAN_ALL_EVENTS | FAN_ALL_PERM_EVENTS | FAN_EVENT_ON_CHILD))
#else
823
	if (mask & ~(FAN_ALL_EVENTS | FAN_EVENT_ON_CHILD))
824
#endif
825 826
		return -EINVAL;

827 828
	f = fdget(fanotify_fd);
	if (unlikely(!f.file))
829 830 831 832
		return -EBADF;

	/* verify that this is indeed an fanotify instance */
	ret = -EINVAL;
833
	if (unlikely(f.file->f_op != &fanotify_fops))
834
		goto fput_and_out;
835
	group = f.file->private_data;
836 837 838 839 840 841 842 843 844

	/*
	 * group->priority == FS_PRIO_0 == FAN_CLASS_NOTIF.  These are not
	 * allowed to set permissions events.
	 */
	ret = -EINVAL;
	if (mask & FAN_ALL_PERM_EVENTS &&
	    group->priority == FS_PRIO_0)
		goto fput_and_out;
845 846 847 848 849 850

	ret = fanotify_find_path(dfd, pathname, &path, flags);
	if (ret)
		goto fput_and_out;

	/* inode held in place by reference to path; group by fget on fd */
851
	if (!(flags & FAN_MARK_MOUNT))
852 853 854
		inode = path.dentry->d_inode;
	else
		mnt = path.mnt;
855 856

	/* create/update an inode mark */
E
Eric Paris 已提交
857
	switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE | FAN_MARK_FLUSH)) {
858
	case FAN_MARK_ADD:
859
		if (flags & FAN_MARK_MOUNT)
860
			ret = fanotify_add_vfsmount_mark(group, mnt, mask, flags);
861
		else
862
			ret = fanotify_add_inode_mark(group, inode, mask, flags);
863 864
		break;
	case FAN_MARK_REMOVE:
865
		if (flags & FAN_MARK_MOUNT)
866
			ret = fanotify_remove_vfsmount_mark(group, mnt, mask, flags);
867
		else
868
			ret = fanotify_remove_inode_mark(group, inode, mask, flags);
869
		break;
E
Eric Paris 已提交
870 871 872 873 874 875
	case FAN_MARK_FLUSH:
		if (flags & FAN_MARK_MOUNT)
			fsnotify_clear_vfsmount_marks_by_group(group);
		else
			fsnotify_clear_inode_marks_by_group(group);
		break;
876 877 878
	default:
		ret = -EINVAL;
	}
879 880 881

	path_put(&path);
fput_and_out:
882
	fdput(f);
883 884 885
	return ret;
}

886 887 888 889 890 891 892 893 894
#ifdef CONFIG_COMPAT
COMPAT_SYSCALL_DEFINE6(fanotify_mark,
				int, fanotify_fd, unsigned int, flags,
				__u32, mask0, __u32, mask1, int, dfd,
				const char  __user *, pathname)
{
	return sys_fanotify_mark(fanotify_fd, flags,
#ifdef __BIG_ENDIAN
				((__u64)mask0 << 32) | mask1,
H
Heiko Carstens 已提交
895 896
#else
				((__u64)mask1 << 32) | mask0,
897 898 899 900 901
#endif
				 dfd, pathname);
}
#endif

902
/*
903
 * fanotify_user_setup - Our initialization function.  Note that we cannot return
904 905 906 907 908 909
 * error because we have compiled-in VFS hooks.  So an (unlikely) failure here
 * must result in panic().
 */
static int __init fanotify_user_setup(void)
{
	fanotify_mark_cache = KMEM_CACHE(fsnotify_mark, SLAB_PANIC);
910 911
	fanotify_response_event_cache = KMEM_CACHE(fanotify_response_event,
						   SLAB_PANIC);
912
	fanotify_event_cachep = KMEM_CACHE(fanotify_event_info, SLAB_PANIC);
913 914

	return 0;
915
}
916
device_initcall(fanotify_user_setup);