提交 23281c80 编写于 作者: L Linus Torvalds

Merge branch 'fsnotify' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs

Pull fsnotify updates from Jan Kara:

 - fixes of use-after-tree issues when handling fanotify permission
   events from Miklos

 - refcount_t conversions from Elena

 - fixes of ENOMEM handling in dnotify and fsnotify from me

* 'fsnotify' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs:
  fsnotify: convert fsnotify_mark.refcnt from atomic_t to refcount_t
  fanotify: clean up CONFIG_FANOTIFY_ACCESS_PERMISSIONS ifdefs
  fsnotify: clean up fsnotify()
  fanotify: fix fsnotify_prepare_user_wait() failure
  fsnotify: fix pinning group in fsnotify_prepare_user_wait()
  fsnotify: pin both inode and vfsmount mark
  fsnotify: clean up fsnotify_prepare/finish_user_wait()
  fsnotify: convert fsnotify_group.refcnt from atomic_t to refcount_t
  fsnotify: Protect bail out path of fsnotify_add_mark_locked() properly
  dnotify: Handle errors from fsnotify_add_mark_locked() in fcntl_dirnotify()
...@@ -319,7 +319,11 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg) ...@@ -319,7 +319,11 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark); dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark);
spin_lock(&fsn_mark->lock); spin_lock(&fsn_mark->lock);
} else { } else {
fsnotify_add_mark_locked(new_fsn_mark, inode, NULL, 0); error = fsnotify_add_mark_locked(new_fsn_mark, inode, NULL, 0);
if (error) {
mutex_unlock(&dnotify_group->mark_mutex);
goto out_err;
}
spin_lock(&new_fsn_mark->lock); spin_lock(&new_fsn_mark->lock);
fsn_mark = new_fsn_mark; fsn_mark = new_fsn_mark;
dn_mark = new_dn_mark; dn_mark = new_dn_mark;
...@@ -345,6 +349,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg) ...@@ -345,6 +349,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
*/ */
if (dn_mark == new_dn_mark) if (dn_mark == new_dn_mark)
destroy = 1; destroy = 1;
error = 0;
goto out; goto out;
} }
......
...@@ -36,15 +36,13 @@ static int fanotify_merge(struct list_head *list, struct fsnotify_event *event) ...@@ -36,15 +36,13 @@ static int fanotify_merge(struct list_head *list, struct fsnotify_event *event)
pr_debug("%s: list=%p event=%p\n", __func__, list, event); pr_debug("%s: list=%p event=%p\n", __func__, list, event);
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
/* /*
* Don't merge a permission event with any other event so that we know * Don't merge a permission event with any other event so that we know
* the event structure we have created in fanotify_handle_event() is the * the event structure we have created in fanotify_handle_event() is the
* one we should check for permission response. * one we should check for permission response.
*/ */
if (event->mask & FAN_ALL_PERM_EVENTS) if (fanotify_is_perm_event(event->mask))
return 0; return 0;
#endif
list_for_each_entry_reverse(test_event, list, list) { list_for_each_entry_reverse(test_event, list, list) {
if (should_merge(test_event, event)) { if (should_merge(test_event, event)) {
...@@ -56,7 +54,6 @@ static int fanotify_merge(struct list_head *list, struct fsnotify_event *event) ...@@ -56,7 +54,6 @@ static int fanotify_merge(struct list_head *list, struct fsnotify_event *event)
return 0; return 0;
} }
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
static int fanotify_get_response(struct fsnotify_group *group, static int fanotify_get_response(struct fsnotify_group *group,
struct fanotify_perm_event_info *event, struct fanotify_perm_event_info *event,
struct fsnotify_iter_info *iter_info) struct fsnotify_iter_info *iter_info)
...@@ -65,19 +62,8 @@ static int fanotify_get_response(struct fsnotify_group *group, ...@@ -65,19 +62,8 @@ static int fanotify_get_response(struct fsnotify_group *group,
pr_debug("%s: group=%p event=%p\n", __func__, group, event); pr_debug("%s: group=%p event=%p\n", __func__, group, event);
/*
* fsnotify_prepare_user_wait() fails if we race with mark deletion.
* Just let the operation pass in that case.
*/
if (!fsnotify_prepare_user_wait(iter_info)) {
event->response = FAN_ALLOW;
goto out;
}
wait_event(group->fanotify_data.access_waitq, event->response); wait_event(group->fanotify_data.access_waitq, event->response);
fsnotify_finish_user_wait(iter_info);
out:
/* userspace responded, convert to something usable */ /* userspace responded, convert to something usable */
switch (event->response) { switch (event->response) {
case FAN_ALLOW: case FAN_ALLOW:
...@@ -94,7 +80,6 @@ static int fanotify_get_response(struct fsnotify_group *group, ...@@ -94,7 +80,6 @@ static int fanotify_get_response(struct fsnotify_group *group,
return ret; return ret;
} }
#endif
static bool fanotify_should_send_event(struct fsnotify_mark *inode_mark, static bool fanotify_should_send_event(struct fsnotify_mark *inode_mark,
struct fsnotify_mark *vfsmnt_mark, struct fsnotify_mark *vfsmnt_mark,
...@@ -153,8 +138,7 @@ struct fanotify_event_info *fanotify_alloc_event(struct inode *inode, u32 mask, ...@@ -153,8 +138,7 @@ struct fanotify_event_info *fanotify_alloc_event(struct inode *inode, u32 mask,
{ {
struct fanotify_event_info *event; struct fanotify_event_info *event;
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS if (fanotify_is_perm_event(mask)) {
if (mask & FAN_ALL_PERM_EVENTS) {
struct fanotify_perm_event_info *pevent; struct fanotify_perm_event_info *pevent;
pevent = kmem_cache_alloc(fanotify_perm_event_cachep, pevent = kmem_cache_alloc(fanotify_perm_event_cachep,
...@@ -165,7 +149,6 @@ struct fanotify_event_info *fanotify_alloc_event(struct inode *inode, u32 mask, ...@@ -165,7 +149,6 @@ struct fanotify_event_info *fanotify_alloc_event(struct inode *inode, u32 mask,
pevent->response = 0; pevent->response = 0;
goto init; goto init;
} }
#endif
event = kmem_cache_alloc(fanotify_event_cachep, GFP_KERNEL); event = kmem_cache_alloc(fanotify_event_cachep, GFP_KERNEL);
if (!event) if (!event)
return NULL; return NULL;
...@@ -212,9 +195,19 @@ static int fanotify_handle_event(struct fsnotify_group *group, ...@@ -212,9 +195,19 @@ static int fanotify_handle_event(struct fsnotify_group *group,
pr_debug("%s: group=%p inode=%p mask=%x\n", __func__, group, inode, pr_debug("%s: group=%p inode=%p mask=%x\n", __func__, group, inode,
mask); mask);
if (fanotify_is_perm_event(mask)) {
/*
* fsnotify_prepare_user_wait() fails if we race with mark
* deletion. Just let the operation pass in that case.
*/
if (!fsnotify_prepare_user_wait(iter_info))
return 0;
}
event = fanotify_alloc_event(inode, mask, data); event = fanotify_alloc_event(inode, mask, data);
ret = -ENOMEM;
if (unlikely(!event)) if (unlikely(!event))
return -ENOMEM; goto finish;
fsn_event = &event->fse; fsn_event = &event->fse;
ret = fsnotify_add_event(group, fsn_event, fanotify_merge); ret = fsnotify_add_event(group, fsn_event, fanotify_merge);
...@@ -224,16 +217,16 @@ static int fanotify_handle_event(struct fsnotify_group *group, ...@@ -224,16 +217,16 @@ static int fanotify_handle_event(struct fsnotify_group *group,
/* Our event wasn't used in the end. Free it. */ /* Our event wasn't used in the end. Free it. */
fsnotify_destroy_event(group, fsn_event); fsnotify_destroy_event(group, fsn_event);
return 0; ret = 0;
} } else if (fanotify_is_perm_event(mask)) {
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
if (mask & FAN_ALL_PERM_EVENTS) {
ret = fanotify_get_response(group, FANOTIFY_PE(fsn_event), ret = fanotify_get_response(group, FANOTIFY_PE(fsn_event),
iter_info); iter_info);
fsnotify_destroy_event(group, fsn_event); fsnotify_destroy_event(group, fsn_event);
} }
#endif finish:
if (fanotify_is_perm_event(mask))
fsnotify_finish_user_wait(iter_info);
return ret; return ret;
} }
...@@ -253,13 +246,11 @@ static void fanotify_free_event(struct fsnotify_event *fsn_event) ...@@ -253,13 +246,11 @@ static void fanotify_free_event(struct fsnotify_event *fsn_event)
event = FANOTIFY_E(fsn_event); event = FANOTIFY_E(fsn_event);
path_put(&event->path); path_put(&event->path);
put_pid(event->tgid); put_pid(event->tgid);
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS if (fanotify_is_perm_event(fsn_event->mask)) {
if (fsn_event->mask & FAN_ALL_PERM_EVENTS) {
kmem_cache_free(fanotify_perm_event_cachep, kmem_cache_free(fanotify_perm_event_cachep,
FANOTIFY_PE(fsn_event)); FANOTIFY_PE(fsn_event));
return; return;
} }
#endif
kmem_cache_free(fanotify_event_cachep, event); kmem_cache_free(fanotify_event_cachep, event);
} }
......
...@@ -22,7 +22,6 @@ struct fanotify_event_info { ...@@ -22,7 +22,6 @@ struct fanotify_event_info {
struct pid *tgid; struct pid *tgid;
}; };
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
/* /*
* Structure for permission fanotify events. It gets allocated and freed in * Structure for permission fanotify events. It gets allocated and freed in
* fanotify_handle_event() since we wait there for user response. When the * fanotify_handle_event() since we wait there for user response. When the
...@@ -41,7 +40,12 @@ FANOTIFY_PE(struct fsnotify_event *fse) ...@@ -41,7 +40,12 @@ FANOTIFY_PE(struct fsnotify_event *fse)
{ {
return container_of(fse, struct fanotify_perm_event_info, fae.fse); return container_of(fse, struct fanotify_perm_event_info, fae.fse);
} }
#endif
static inline bool fanotify_is_perm_event(u32 mask)
{
return IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS) &&
mask & FAN_ALL_PERM_EVENTS;
}
static inline struct fanotify_event_info *FANOTIFY_E(struct fsnotify_event *fse) static inline struct fanotify_event_info *FANOTIFY_E(struct fsnotify_event *fse)
{ {
......
...@@ -143,7 +143,6 @@ static int fill_event_metadata(struct fsnotify_group *group, ...@@ -143,7 +143,6 @@ static int fill_event_metadata(struct fsnotify_group *group,
return ret; return ret;
} }
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
static struct fanotify_perm_event_info *dequeue_event( static struct fanotify_perm_event_info *dequeue_event(
struct fsnotify_group *group, int fd) struct fsnotify_group *group, int fd)
{ {
...@@ -200,7 +199,6 @@ static int process_access_response(struct fsnotify_group *group, ...@@ -200,7 +199,6 @@ static int process_access_response(struct fsnotify_group *group,
return 0; return 0;
} }
#endif
static ssize_t copy_event_to_user(struct fsnotify_group *group, static ssize_t copy_event_to_user(struct fsnotify_group *group,
struct fsnotify_event *event, struct fsnotify_event *event,
...@@ -222,10 +220,8 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, ...@@ -222,10 +220,8 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
fanotify_event_metadata.event_len)) fanotify_event_metadata.event_len))
goto out_close_fd; goto out_close_fd;
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS if (fanotify_is_perm_event(event->mask))
if (event->mask & FAN_ALL_PERM_EVENTS)
FANOTIFY_PE(event)->fd = fd; FANOTIFY_PE(event)->fd = fd;
#endif
if (fd != FAN_NOFD) if (fd != FAN_NOFD)
fd_install(fd, f); fd_install(fd, f);
...@@ -310,10 +306,9 @@ static ssize_t fanotify_read(struct file *file, char __user *buf, ...@@ -310,10 +306,9 @@ static ssize_t fanotify_read(struct file *file, char __user *buf,
* Permission events get queued to wait for response. Other * Permission events get queued to wait for response. Other
* events can be destroyed now. * events can be destroyed now.
*/ */
if (!(kevent->mask & FAN_ALL_PERM_EVENTS)) { if (!fanotify_is_perm_event(kevent->mask)) {
fsnotify_destroy_event(group, kevent); fsnotify_destroy_event(group, kevent);
} else { } else {
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
if (ret <= 0) { if (ret <= 0) {
FANOTIFY_PE(kevent)->response = FAN_DENY; FANOTIFY_PE(kevent)->response = FAN_DENY;
wake_up(&group->fanotify_data.access_waitq); wake_up(&group->fanotify_data.access_waitq);
...@@ -323,7 +318,6 @@ static ssize_t fanotify_read(struct file *file, char __user *buf, ...@@ -323,7 +318,6 @@ static ssize_t fanotify_read(struct file *file, char __user *buf,
&group->fanotify_data.access_list); &group->fanotify_data.access_list);
spin_unlock(&group->notification_lock); spin_unlock(&group->notification_lock);
} }
#endif
} }
if (ret < 0) if (ret < 0)
break; break;
...@@ -339,11 +333,13 @@ static ssize_t fanotify_read(struct file *file, char __user *buf, ...@@ -339,11 +333,13 @@ static ssize_t fanotify_read(struct file *file, char __user *buf,
static ssize_t fanotify_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) static ssize_t fanotify_write(struct file *file, const char __user *buf, size_t count, loff_t *pos)
{ {
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
struct fanotify_response response = { .fd = -1, .response = -1 }; struct fanotify_response response = { .fd = -1, .response = -1 };
struct fsnotify_group *group; struct fsnotify_group *group;
int ret; int ret;
if (!IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS))
return -EINVAL;
group = file->private_data; group = file->private_data;
if (count > sizeof(response)) if (count > sizeof(response))
...@@ -359,16 +355,11 @@ static ssize_t fanotify_write(struct file *file, const char __user *buf, size_t ...@@ -359,16 +355,11 @@ static ssize_t fanotify_write(struct file *file, const char __user *buf, size_t
count = ret; count = ret;
return count; return count;
#else
return -EINVAL;
#endif
} }
static int fanotify_release(struct inode *ignored, struct file *file) static int fanotify_release(struct inode *ignored, struct file *file)
{ {
struct fsnotify_group *group = file->private_data; struct fsnotify_group *group = file->private_data;
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
struct fanotify_perm_event_info *event, *next; struct fanotify_perm_event_info *event, *next;
struct fsnotify_event *fsn_event; struct fsnotify_event *fsn_event;
...@@ -404,14 +395,14 @@ static int fanotify_release(struct inode *ignored, struct file *file) ...@@ -404,14 +395,14 @@ static int fanotify_release(struct inode *ignored, struct file *file)
spin_unlock(&group->notification_lock); spin_unlock(&group->notification_lock);
fsnotify_destroy_event(group, fsn_event); fsnotify_destroy_event(group, fsn_event);
spin_lock(&group->notification_lock); spin_lock(&group->notification_lock);
} else } else {
FANOTIFY_PE(fsn_event)->response = FAN_ALLOW; FANOTIFY_PE(fsn_event)->response = FAN_ALLOW;
}
} }
spin_unlock(&group->notification_lock); spin_unlock(&group->notification_lock);
/* Response for all permission events it set, wakeup waiters */ /* Response for all permission events it set, wakeup waiters */
wake_up(&group->fanotify_data.access_waitq); wake_up(&group->fanotify_data.access_waitq);
#endif
/* matches the fanotify_init->fsnotify_alloc_group */ /* matches the fanotify_init->fsnotify_alloc_group */
fsnotify_destroy_group(group); fsnotify_destroy_group(group);
...@@ -769,10 +760,8 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) ...@@ -769,10 +760,8 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
if (force_o_largefile()) if (force_o_largefile())
event_f_flags |= O_LARGEFILE; event_f_flags |= O_LARGEFILE;
group->fanotify_data.f_flags = event_f_flags; group->fanotify_data.f_flags = event_f_flags;
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
init_waitqueue_head(&group->fanotify_data.access_waitq); init_waitqueue_head(&group->fanotify_data.access_waitq);
INIT_LIST_HEAD(&group->fanotify_data.access_list); INIT_LIST_HEAD(&group->fanotify_data.access_list);
#endif
switch (flags & FAN_ALL_CLASS_BITS) { switch (flags & FAN_ALL_CLASS_BITS) {
case FAN_CLASS_NOTIF: case FAN_CLASS_NOTIF:
group->priority = FS_PRIO_0; group->priority = FS_PRIO_0;
...@@ -826,6 +815,7 @@ SYSCALL_DEFINE5(fanotify_mark, int, fanotify_fd, unsigned int, flags, ...@@ -826,6 +815,7 @@ SYSCALL_DEFINE5(fanotify_mark, int, fanotify_fd, unsigned int, flags,
struct fsnotify_group *group; struct fsnotify_group *group;
struct fd f; struct fd f;
struct path path; struct path path;
u32 valid_mask = FAN_ALL_EVENTS | FAN_EVENT_ON_CHILD;
int ret; int ret;
pr_debug("%s: fanotify_fd=%d flags=%x dfd=%d pathname=%p mask=%llx\n", pr_debug("%s: fanotify_fd=%d flags=%x dfd=%d pathname=%p mask=%llx\n",
...@@ -856,11 +846,10 @@ SYSCALL_DEFINE5(fanotify_mark, int, fanotify_fd, unsigned int, flags, ...@@ -856,11 +846,10 @@ SYSCALL_DEFINE5(fanotify_mark, int, fanotify_fd, unsigned int, flags,
mask &= ~FAN_ONDIR; mask &= ~FAN_ONDIR;
} }
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS if (IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS))
if (mask & ~(FAN_ALL_EVENTS | FAN_ALL_PERM_EVENTS | FAN_EVENT_ON_CHILD)) valid_mask |= FAN_ALL_PERM_EVENTS;
#else
if (mask & ~(FAN_ALL_EVENTS | FAN_EVENT_ON_CHILD)) if (mask & ~valid_mask)
#endif
return -EINVAL; return -EINVAL;
f = fdget(fanotify_fd); f = fdget(fanotify_fd);
...@@ -950,10 +939,10 @@ static int __init fanotify_user_setup(void) ...@@ -950,10 +939,10 @@ static int __init fanotify_user_setup(void)
{ {
fanotify_mark_cache = KMEM_CACHE(fsnotify_mark, SLAB_PANIC); fanotify_mark_cache = KMEM_CACHE(fsnotify_mark, SLAB_PANIC);
fanotify_event_cachep = KMEM_CACHE(fanotify_event_info, SLAB_PANIC); fanotify_event_cachep = KMEM_CACHE(fanotify_event_info, SLAB_PANIC);
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS if (IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS)) {
fanotify_perm_event_cachep = KMEM_CACHE(fanotify_perm_event_info, fanotify_perm_event_cachep =
SLAB_PANIC); KMEM_CACHE(fanotify_perm_event_info, SLAB_PANIC);
#endif }
return 0; return 0;
} }
......
...@@ -243,6 +243,29 @@ static int send_to_group(struct inode *to_tell, ...@@ -243,6 +243,29 @@ static int send_to_group(struct inode *to_tell,
file_name, cookie, iter_info); file_name, cookie, iter_info);
} }
static struct fsnotify_mark *fsnotify_first_mark(struct fsnotify_mark_connector **connp)
{
struct fsnotify_mark_connector *conn;
struct hlist_node *node = NULL;
conn = srcu_dereference(*connp, &fsnotify_mark_srcu);
if (conn)
node = srcu_dereference(conn->list.first, &fsnotify_mark_srcu);
return hlist_entry_safe(node, struct fsnotify_mark, obj_list);
}
static struct fsnotify_mark *fsnotify_next_mark(struct fsnotify_mark *mark)
{
struct hlist_node *node = NULL;
if (mark)
node = srcu_dereference(mark->obj_list.next,
&fsnotify_mark_srcu);
return hlist_entry_safe(node, struct fsnotify_mark, obj_list);
}
/* /*
* This is the main call to fsnotify. The VFS calls into hook specific functions * This is the main call to fsnotify. The VFS calls into hook specific functions
* in linux/fsnotify.h. Those functions then in turn call here. Here will call * in linux/fsnotify.h. Those functions then in turn call here. Here will call
...@@ -252,11 +275,7 @@ static int send_to_group(struct inode *to_tell, ...@@ -252,11 +275,7 @@ static int send_to_group(struct inode *to_tell,
int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is, int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is,
const unsigned char *file_name, u32 cookie) const unsigned char *file_name, u32 cookie)
{ {
struct hlist_node *inode_node = NULL, *vfsmount_node = NULL; struct fsnotify_iter_info iter_info = {};
struct fsnotify_mark *inode_mark = NULL, *vfsmount_mark = NULL;
struct fsnotify_group *inode_group, *vfsmount_group;
struct fsnotify_mark_connector *inode_conn, *vfsmount_conn;
struct fsnotify_iter_info iter_info;
struct mount *mnt; struct mount *mnt;
int ret = 0; int ret = 0;
/* global tests shouldn't care about events on child only the specific event */ /* global tests shouldn't care about events on child only the specific event */
...@@ -291,26 +310,16 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is, ...@@ -291,26 +310,16 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is,
if ((mask & FS_MODIFY) || if ((mask & FS_MODIFY) ||
(test_mask & to_tell->i_fsnotify_mask)) { (test_mask & to_tell->i_fsnotify_mask)) {
inode_conn = srcu_dereference(to_tell->i_fsnotify_marks, iter_info.inode_mark =
&fsnotify_mark_srcu); fsnotify_first_mark(&to_tell->i_fsnotify_marks);
if (inode_conn)
inode_node = srcu_dereference(inode_conn->list.first,
&fsnotify_mark_srcu);
} }
if (mnt && ((mask & FS_MODIFY) || if (mnt && ((mask & FS_MODIFY) ||
(test_mask & mnt->mnt_fsnotify_mask))) { (test_mask & mnt->mnt_fsnotify_mask))) {
inode_conn = srcu_dereference(to_tell->i_fsnotify_marks, iter_info.inode_mark =
&fsnotify_mark_srcu); fsnotify_first_mark(&to_tell->i_fsnotify_marks);
if (inode_conn) iter_info.vfsmount_mark =
inode_node = srcu_dereference(inode_conn->list.first, fsnotify_first_mark(&mnt->mnt_fsnotify_marks);
&fsnotify_mark_srcu);
vfsmount_conn = srcu_dereference(mnt->mnt_fsnotify_marks,
&fsnotify_mark_srcu);
if (vfsmount_conn)
vfsmount_node = srcu_dereference(
vfsmount_conn->list.first,
&fsnotify_mark_srcu);
} }
/* /*
...@@ -318,39 +327,19 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is, ...@@ -318,39 +327,19 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is,
* ignore masks are properly reflected for mount mark notifications. * ignore masks are properly reflected for mount mark notifications.
* That's why this traversal is so complicated... * That's why this traversal is so complicated...
*/ */
while (inode_node || vfsmount_node) { while (iter_info.inode_mark || iter_info.vfsmount_mark) {
inode_group = NULL; struct fsnotify_mark *inode_mark = iter_info.inode_mark;
inode_mark = NULL; struct fsnotify_mark *vfsmount_mark = iter_info.vfsmount_mark;
vfsmount_group = NULL;
vfsmount_mark = NULL; if (inode_mark && vfsmount_mark) {
int cmp = fsnotify_compare_groups(inode_mark->group,
if (inode_node) { vfsmount_mark->group);
inode_mark = hlist_entry(srcu_dereference(inode_node, &fsnotify_mark_srcu), if (cmp > 0)
struct fsnotify_mark, obj_list);
inode_group = inode_mark->group;
}
if (vfsmount_node) {
vfsmount_mark = hlist_entry(srcu_dereference(vfsmount_node, &fsnotify_mark_srcu),
struct fsnotify_mark, obj_list);
vfsmount_group = vfsmount_mark->group;
}
if (inode_group && vfsmount_group) {
int cmp = fsnotify_compare_groups(inode_group,
vfsmount_group);
if (cmp > 0) {
inode_group = NULL;
inode_mark = NULL; inode_mark = NULL;
} else if (cmp < 0) { else if (cmp < 0)
vfsmount_group = NULL;
vfsmount_mark = NULL; vfsmount_mark = NULL;
}
} }
iter_info.inode_mark = inode_mark;
iter_info.vfsmount_mark = vfsmount_mark;
ret = send_to_group(to_tell, inode_mark, vfsmount_mark, mask, ret = send_to_group(to_tell, inode_mark, vfsmount_mark, mask,
data, data_is, cookie, file_name, data, data_is, cookie, file_name,
&iter_info); &iter_info);
...@@ -358,12 +347,12 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is, ...@@ -358,12 +347,12 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is,
if (ret && (mask & ALL_FSNOTIFY_PERM_EVENTS)) if (ret && (mask & ALL_FSNOTIFY_PERM_EVENTS))
goto out; goto out;
if (inode_group) if (inode_mark)
inode_node = srcu_dereference(inode_node->next, iter_info.inode_mark =
&fsnotify_mark_srcu); fsnotify_next_mark(iter_info.inode_mark);
if (vfsmount_group) if (vfsmount_mark)
vfsmount_node = srcu_dereference(vfsmount_node->next, iter_info.vfsmount_mark =
&fsnotify_mark_srcu); fsnotify_next_mark(iter_info.vfsmount_mark);
} }
ret = 0; ret = 0;
out: out:
......
...@@ -107,7 +107,7 @@ void fsnotify_destroy_group(struct fsnotify_group *group) ...@@ -107,7 +107,7 @@ void fsnotify_destroy_group(struct fsnotify_group *group)
*/ */
void fsnotify_get_group(struct fsnotify_group *group) void fsnotify_get_group(struct fsnotify_group *group)
{ {
atomic_inc(&group->refcnt); refcount_inc(&group->refcnt);
} }
/* /*
...@@ -115,7 +115,7 @@ void fsnotify_get_group(struct fsnotify_group *group) ...@@ -115,7 +115,7 @@ void fsnotify_get_group(struct fsnotify_group *group)
*/ */
void fsnotify_put_group(struct fsnotify_group *group) void fsnotify_put_group(struct fsnotify_group *group)
{ {
if (atomic_dec_and_test(&group->refcnt)) if (refcount_dec_and_test(&group->refcnt))
fsnotify_final_destroy_group(group); fsnotify_final_destroy_group(group);
} }
...@@ -131,7 +131,7 @@ struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops) ...@@ -131,7 +131,7 @@ struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
/* set to 0 when there a no external references to this group */ /* set to 0 when there a no external references to this group */
atomic_set(&group->refcnt, 1); refcount_set(&group->refcnt, 1);
atomic_set(&group->num_marks, 0); atomic_set(&group->num_marks, 0);
atomic_set(&group->user_waits, 0); atomic_set(&group->user_waits, 0);
......
...@@ -376,7 +376,7 @@ static struct inotify_inode_mark *inotify_idr_find_locked(struct fsnotify_group ...@@ -376,7 +376,7 @@ static struct inotify_inode_mark *inotify_idr_find_locked(struct fsnotify_group
fsnotify_get_mark(fsn_mark); fsnotify_get_mark(fsn_mark);
/* One ref for being in the idr, one ref we just took */ /* One ref for being in the idr, one ref we just took */
BUG_ON(atomic_read(&fsn_mark->refcnt) < 2); BUG_ON(refcount_read(&fsn_mark->refcnt) < 2);
} }
return i_mark; return i_mark;
...@@ -446,7 +446,7 @@ static void inotify_remove_from_idr(struct fsnotify_group *group, ...@@ -446,7 +446,7 @@ static void inotify_remove_from_idr(struct fsnotify_group *group,
* One ref for being in the idr * One ref for being in the idr
* one ref grabbed by inotify_idr_find * one ref grabbed by inotify_idr_find
*/ */
if (unlikely(atomic_read(&i_mark->fsn_mark.refcnt) < 2)) { if (unlikely(refcount_read(&i_mark->fsn_mark.refcnt) < 2)) {
printk(KERN_ERR "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p\n", printk(KERN_ERR "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p\n",
__func__, i_mark, i_mark->wd, i_mark->fsn_mark.group); __func__, i_mark, i_mark->wd, i_mark->fsn_mark.group);
/* we can't really recover with bad ref cnting.. */ /* we can't really recover with bad ref cnting.. */
......
...@@ -105,18 +105,8 @@ static DECLARE_WORK(connector_reaper_work, fsnotify_connector_destroy_workfn); ...@@ -105,18 +105,8 @@ static DECLARE_WORK(connector_reaper_work, fsnotify_connector_destroy_workfn);
void fsnotify_get_mark(struct fsnotify_mark *mark) void fsnotify_get_mark(struct fsnotify_mark *mark)
{ {
WARN_ON_ONCE(!atomic_read(&mark->refcnt)); WARN_ON_ONCE(!refcount_read(&mark->refcnt));
atomic_inc(&mark->refcnt); refcount_inc(&mark->refcnt);
}
/*
* Get mark reference when we found the mark via lockless traversal of object
* list. Mark can be already removed from the list by now and on its way to be
* destroyed once SRCU period ends.
*/
static bool fsnotify_get_mark_safe(struct fsnotify_mark *mark)
{
return atomic_inc_not_zero(&mark->refcnt);
} }
static void __fsnotify_recalc_mask(struct fsnotify_mark_connector *conn) static void __fsnotify_recalc_mask(struct fsnotify_mark_connector *conn)
...@@ -211,7 +201,7 @@ void fsnotify_put_mark(struct fsnotify_mark *mark) ...@@ -211,7 +201,7 @@ void fsnotify_put_mark(struct fsnotify_mark *mark)
/* Catch marks that were actually never attached to object */ /* Catch marks that were actually never attached to object */
if (!mark->connector) { if (!mark->connector) {
if (atomic_dec_and_test(&mark->refcnt)) if (refcount_dec_and_test(&mark->refcnt))
fsnotify_final_mark_destroy(mark); fsnotify_final_mark_destroy(mark);
return; return;
} }
...@@ -220,7 +210,7 @@ void fsnotify_put_mark(struct fsnotify_mark *mark) ...@@ -220,7 +210,7 @@ void fsnotify_put_mark(struct fsnotify_mark *mark)
* We have to be careful so that traversals of obj_list under lock can * We have to be careful so that traversals of obj_list under lock can
* safely grab mark reference. * safely grab mark reference.
*/ */
if (!atomic_dec_and_lock(&mark->refcnt, &mark->connector->lock)) if (!refcount_dec_and_lock(&mark->refcnt, &mark->connector->lock))
return; return;
conn = mark->connector; conn = mark->connector;
...@@ -256,32 +246,60 @@ void fsnotify_put_mark(struct fsnotify_mark *mark) ...@@ -256,32 +246,60 @@ void fsnotify_put_mark(struct fsnotify_mark *mark)
FSNOTIFY_REAPER_DELAY); FSNOTIFY_REAPER_DELAY);
} }
bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info) /*
* Get mark reference when we found the mark via lockless traversal of object
* list. Mark can be already removed from the list by now and on its way to be
* destroyed once SRCU period ends.
*
* Also pin the group so it doesn't disappear under us.
*/
static bool fsnotify_get_mark_safe(struct fsnotify_mark *mark)
{ {
struct fsnotify_group *group; if (!mark)
return true;
if (WARN_ON_ONCE(!iter_info->inode_mark && !iter_info->vfsmount_mark))
return false; if (refcount_inc_not_zero(&mark->refcnt)) {
spin_lock(&mark->lock);
if (iter_info->inode_mark) if (mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) {
group = iter_info->inode_mark->group; /* mark is attached, group is still alive then */
else atomic_inc(&mark->group->user_waits);
group = iter_info->vfsmount_mark->group; spin_unlock(&mark->lock);
return true;
}
spin_unlock(&mark->lock);
fsnotify_put_mark(mark);
}
return false;
}
/* /*
* Since acquisition of mark reference is an atomic op as well, we can * Puts marks and wakes up group destruction if necessary.
* be sure this inc is seen before any effect of refcount increment. *
*/ * Pairs with fsnotify_get_mark_safe()
atomic_inc(&group->user_waits); */
static void fsnotify_put_mark_wake(struct fsnotify_mark *mark)
{
if (mark) {
struct fsnotify_group *group = mark->group;
if (iter_info->inode_mark) { fsnotify_put_mark(mark);
/* This can fail if mark is being removed */ /*
if (!fsnotify_get_mark_safe(iter_info->inode_mark)) * We abuse notification_waitq on group shutdown for waiting for
goto out_wait; * all marks pinned when waiting for userspace.
*/
if (atomic_dec_and_test(&group->user_waits) && group->shutdown)
wake_up(&group->notification_waitq);
} }
if (iter_info->vfsmount_mark) { }
if (!fsnotify_get_mark_safe(iter_info->vfsmount_mark))
goto out_inode; bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info)
{
/* This can fail if mark is being removed */
if (!fsnotify_get_mark_safe(iter_info->inode_mark))
return false;
if (!fsnotify_get_mark_safe(iter_info->vfsmount_mark)) {
fsnotify_put_mark_wake(iter_info->inode_mark);
return false;
} }
/* /*
...@@ -292,34 +310,13 @@ bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info) ...@@ -292,34 +310,13 @@ bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info)
srcu_read_unlock(&fsnotify_mark_srcu, iter_info->srcu_idx); srcu_read_unlock(&fsnotify_mark_srcu, iter_info->srcu_idx);
return true; return true;
out_inode:
if (iter_info->inode_mark)
fsnotify_put_mark(iter_info->inode_mark);
out_wait:
if (atomic_dec_and_test(&group->user_waits) && group->shutdown)
wake_up(&group->notification_waitq);
return false;
} }
void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info) void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info)
{ {
struct fsnotify_group *group = NULL;
iter_info->srcu_idx = srcu_read_lock(&fsnotify_mark_srcu); iter_info->srcu_idx = srcu_read_lock(&fsnotify_mark_srcu);
if (iter_info->inode_mark) { fsnotify_put_mark_wake(iter_info->inode_mark);
group = iter_info->inode_mark->group; fsnotify_put_mark_wake(iter_info->vfsmount_mark);
fsnotify_put_mark(iter_info->inode_mark);
}
if (iter_info->vfsmount_mark) {
group = iter_info->vfsmount_mark->group;
fsnotify_put_mark(iter_info->vfsmount_mark);
}
/*
* We abuse notification_waitq on group shutdown for waiting for all
* marks pinned when waiting for userspace.
*/
if (atomic_dec_and_test(&group->user_waits) && group->shutdown)
wake_up(&group->notification_waitq);
} }
/* /*
...@@ -338,7 +335,7 @@ void fsnotify_detach_mark(struct fsnotify_mark *mark) ...@@ -338,7 +335,7 @@ void fsnotify_detach_mark(struct fsnotify_mark *mark)
WARN_ON_ONCE(!mutex_is_locked(&group->mark_mutex)); WARN_ON_ONCE(!mutex_is_locked(&group->mark_mutex));
WARN_ON_ONCE(!srcu_read_lock_held(&fsnotify_mark_srcu) && WARN_ON_ONCE(!srcu_read_lock_held(&fsnotify_mark_srcu) &&
atomic_read(&mark->refcnt) < 1 + refcount_read(&mark->refcnt) < 1 +
!!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)); !!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED));
spin_lock(&mark->lock); spin_lock(&mark->lock);
...@@ -599,9 +596,11 @@ int fsnotify_add_mark_locked(struct fsnotify_mark *mark, struct inode *inode, ...@@ -599,9 +596,11 @@ int fsnotify_add_mark_locked(struct fsnotify_mark *mark, struct inode *inode,
return ret; return ret;
err: err:
spin_lock(&mark->lock);
mark->flags &= ~(FSNOTIFY_MARK_FLAG_ALIVE | mark->flags &= ~(FSNOTIFY_MARK_FLAG_ALIVE |
FSNOTIFY_MARK_FLAG_ATTACHED); FSNOTIFY_MARK_FLAG_ATTACHED);
list_del_init(&mark->g_list); list_del_init(&mark->g_list);
spin_unlock(&mark->lock);
atomic_dec(&group->num_marks); atomic_dec(&group->num_marks);
fsnotify_put_mark(mark); fsnotify_put_mark(mark);
...@@ -738,7 +737,7 @@ void fsnotify_init_mark(struct fsnotify_mark *mark, ...@@ -738,7 +737,7 @@ void fsnotify_init_mark(struct fsnotify_mark *mark,
{ {
memset(mark, 0, sizeof(*mark)); memset(mark, 0, sizeof(*mark));
spin_lock_init(&mark->lock); spin_lock_init(&mark->lock);
atomic_set(&mark->refcnt, 1); refcount_set(&mark->refcnt, 1);
fsnotify_get_group(group); fsnotify_get_group(group);
mark->group = group; mark->group = group;
} }
......
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#include <linux/types.h> #include <linux/types.h>
#include <linux/atomic.h> #include <linux/atomic.h>
#include <linux/user_namespace.h> #include <linux/user_namespace.h>
#include <linux/refcount.h>
/* /*
* IN_* from inotfy.h lines up EXACTLY with FS_*, this is so we can easily * IN_* from inotfy.h lines up EXACTLY with FS_*, this is so we can easily
...@@ -136,7 +137,7 @@ struct fsnotify_group { ...@@ -136,7 +137,7 @@ struct fsnotify_group {
* inotify_init() and the refcnt will hit 0 only when that fd has been * inotify_init() and the refcnt will hit 0 only when that fd has been
* closed. * closed.
*/ */
atomic_t refcnt; /* things with interest in this group */ refcount_t refcnt; /* things with interest in this group */
const struct fsnotify_ops *ops; /* how this group handles things */ const struct fsnotify_ops *ops; /* how this group handles things */
...@@ -183,11 +184,9 @@ struct fsnotify_group { ...@@ -183,11 +184,9 @@ struct fsnotify_group {
#endif #endif
#ifdef CONFIG_FANOTIFY #ifdef CONFIG_FANOTIFY
struct fanotify_group_private_data { struct fanotify_group_private_data {
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
/* allows a group to block waiting for a userspace response */ /* allows a group to block waiting for a userspace response */
struct list_head access_list; struct list_head access_list;
wait_queue_head_t access_waitq; wait_queue_head_t access_waitq;
#endif /* CONFIG_FANOTIFY_ACCESS_PERMISSIONS */
int f_flags; int f_flags;
unsigned int max_marks; unsigned int max_marks;
struct user_struct *user; struct user_struct *user;
...@@ -244,7 +243,7 @@ struct fsnotify_mark { ...@@ -244,7 +243,7 @@ struct fsnotify_mark {
__u32 mask; __u32 mask;
/* We hold one for presence in g_list. Also one ref for each 'thing' /* We hold one for presence in g_list. Also one ref for each 'thing'
* in kernel that found and may be using this mark. */ * in kernel that found and may be using this mark. */
atomic_t refcnt; refcount_t refcnt;
/* Group this mark is for. Set on mark creation, stable until last ref /* Group this mark is for. Set on mark creation, stable until last ref
* is dropped */ * is dropped */
struct fsnotify_group *group; struct fsnotify_group *group;
......
...@@ -1008,7 +1008,7 @@ static void audit_tree_freeing_mark(struct fsnotify_mark *entry, struct fsnotify ...@@ -1008,7 +1008,7 @@ static void audit_tree_freeing_mark(struct fsnotify_mark *entry, struct fsnotify
* We are guaranteed to have at least one reference to the mark from * We are guaranteed to have at least one reference to the mark from
* either the inode or the caller of fsnotify_destroy_mark(). * either the inode or the caller of fsnotify_destroy_mark().
*/ */
BUG_ON(atomic_read(&entry->refcnt) < 1); BUG_ON(refcount_read(&entry->refcnt) < 1);
} }
static const struct fsnotify_ops audit_tree_ops = { static const struct fsnotify_ops audit_tree_ops = {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册