mark.c 10.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
/*
 *  Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com>
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2, or (at your option)
 *  any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; see the file COPYING.  If not, write to
 *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/*
 * fsnotify inode mark locking/lifetime/and refcnting
 *
 * REFCNT:
23 24 25 26 27 28
 * The group->recnt and mark->refcnt tell how many "things" in the kernel
 * currently are referencing the objects. Both kind of objects typically will
 * live inside the kernel with a refcnt of 2, one for its creation and one for
 * the reference a group and a mark hold to each other.
 * If you are holding the appropriate locks, you can take a reference and the
 * object itself is guaranteed to survive until the reference is dropped.
29 30
 *
 * LOCKING:
31 32
 * There are 3 locks involved with fsnotify inode marks and they MUST be taken
 * in order as follows:
33
 *
34
 * group->mark_mutex
35 36 37
 * mark->lock
 * inode->i_lock
 *
38 39 40 41 42 43 44 45
 * group->mark_mutex protects the marks_list anchored inside a given group and
 * each mark is hooked via the g_list.  It also protects the groups private
 * data (i.e group limits).

 * mark->lock protects the marks attributes like its masks and flags.
 * Furthermore it protects the access to a reference of the group that the mark
 * is assigned to as well as the access to a reference of the inode/vfsmount
 * that is being watched by the mark.
46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67
 *
 * inode->i_lock protects the i_fsnotify_marks list anchored inside a
 * given inode and each mark is hooked via the i_list. (and sorta the
 * free_i_list)
 *
 *
 * LIFETIME:
 * Inode marks survive between when they are added to an inode and when their
 * refcnt==0.
 *
 * The inode mark can be cleared for a number of different reasons including:
 * - The inode is unlinked for the last time.  (fsnotify_inode_remove)
 * - The inode is being evicted from cache. (fsnotify_inode_delete)
 * - The fs the inode is on is unmounted.  (fsnotify_inode_delete/fsnotify_unmount_inodes)
 * - Something explicitly requests that it be removed.  (fsnotify_destroy_mark)
 * - The fsnotify_group associated with the mark is going away and all such marks
 *   need to be cleaned up. (fsnotify_clear_marks_by_group)
 *
 * Worst case we are given an inode and need to clean up all the marks on that
 * inode.  We take i_lock and walk the i_fsnotify_marks safely.  For each
 * mark on the list we take a reference (so the mark can't disappear under us).
 * We remove that mark form the inode's list of marks and we add this mark to a
68 69 70 71 72
 * private list anchored on the stack using i_free_list; we walk i_free_list
 * and before we destroy the mark we make sure that we dont race with a
 * concurrent destroy_group by getting a ref to the marks group and taking the
 * groups mutex.

73 74 75 76 77 78 79 80 81
 * Very similarly for freeing by group, except we use free_g_list.
 *
 * This has the very interesting property of being able to run concurrently with
 * any (or all) other directions.
 */

#include <linux/fs.h>
#include <linux/init.h>
#include <linux/kernel.h>
82
#include <linux/kthread.h>
83 84 85 86
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
87
#include <linux/srcu.h>
88

89
#include <linux/atomic.h>
90 91 92 93

#include <linux/fsnotify_backend.h>
#include "fsnotify.h"

94 95 96 97 98
struct srcu_struct fsnotify_mark_srcu;
static DEFINE_SPINLOCK(destroy_lock);
static LIST_HEAD(destroy_list);
static DECLARE_WAIT_QUEUE_HEAD(destroy_waitq);

99 100 101 102 103 104 105
void fsnotify_get_mark(struct fsnotify_mark *mark)
{
	atomic_inc(&mark->refcnt);
}

void fsnotify_put_mark(struct fsnotify_mark *mark)
{
106 107 108
	if (atomic_dec_and_test(&mark->refcnt)) {
		if (mark->group)
			fsnotify_put_group(mark->group);
109
		mark->free_mark(mark);
110
	}
111 112 113 114 115 116 117
}

/*
 * Any time a mark is getting freed we end up here.
 * The caller had better be holding a reference to this mark so we don't actually
 * do the final put under the mark->lock
 */
118 119
void fsnotify_destroy_mark_locked(struct fsnotify_mark *mark,
				  struct fsnotify_group *group)
120
{
121
	struct inode *inode = NULL;
122

123 124
	BUG_ON(!mutex_is_locked(&group->mark_mutex));

125
	spin_lock(&mark->lock);
126

127 128
	/* something else already called this function on this mark */
	if (!(mark->flags & FSNOTIFY_MARK_FLAG_ALIVE)) {
129
		spin_unlock(&mark->lock);
130
		return;
131 132
	}

133 134
	mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE;

135 136
	if (mark->flags & FSNOTIFY_MARK_FLAG_INODE) {
		inode = mark->i.inode;
137
		fsnotify_destroy_inode_mark(mark);
138 139
	} else if (mark->flags & FSNOTIFY_MARK_FLAG_VFSMOUNT)
		fsnotify_destroy_vfsmount_mark(mark);
140 141 142 143 144 145
	else
		BUG();

	list_del_init(&mark->g_list);

	spin_unlock(&mark->lock);
146

147 148
	if (inode && (mark->flags & FSNOTIFY_MARK_FLAG_OBJECT_PINNED))
		iput(inode);
149
	/* release lock temporarily */
150
	mutex_unlock(&group->mark_mutex);
151

152 153 154 155
	spin_lock(&destroy_lock);
	list_add(&mark->destroy_list, &destroy_list);
	spin_unlock(&destroy_lock);
	wake_up(&destroy_waitq);
156 157 158 159 160
	/*
	 * We don't necessarily have a ref on mark from caller so the above destroy
	 * may have actually freed it, unless this group provides a 'freeing_mark'
	 * function which must be holding a reference.
	 */
161

162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181
	/*
	 * Some groups like to know that marks are being freed.  This is a
	 * callback to the group function to let it know that this mark
	 * is being freed.
	 */
	if (group->ops->freeing_mark)
		group->ops->freeing_mark(mark, group);

	/*
	 * __fsnotify_update_child_dentry_flags(inode);
	 *
	 * I really want to call that, but we can't, we have no idea if the inode
	 * still exists the second we drop the mark->lock.
	 *
	 * The next time an event arrive to this inode from one of it's children
	 * __fsnotify_parent will see that the inode doesn't care about it's
	 * children and will update all of these flags then.  So really this
	 * is just a lazy update (and could be a perf win...)
	 */

182
	atomic_dec(&group->num_marks);
183

184
	mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING);
185 186 187 188 189
}

void fsnotify_destroy_mark(struct fsnotify_mark *mark,
			   struct fsnotify_group *group)
{
190
	mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING);
191 192
	fsnotify_destroy_mark_locked(mark, group);
	mutex_unlock(&group->mark_mutex);
193 194
}

195 196 197 198 199 200 201 202 203 204
void fsnotify_set_mark_mask_locked(struct fsnotify_mark *mark, __u32 mask)
{
	assert_spin_locked(&mark->lock);

	mark->mask = mask;

	if (mark->flags & FSNOTIFY_MARK_FLAG_INODE)
		fsnotify_set_inode_mark_mask_locked(mark, mask);
}

205 206 207 208 209 210
void fsnotify_set_mark_ignored_mask_locked(struct fsnotify_mark *mark, __u32 mask)
{
	assert_spin_locked(&mark->lock);

	mark->ignored_mask = mask;
}
211

212 213 214 215 216
/*
 * Attach an initialized mark to a given group and fs object.
 * These marks may be used for the fsnotify backend to determine which
 * event types should be delivered to which group.
 */
217 218 219
int fsnotify_add_mark_locked(struct fsnotify_mark *mark,
			     struct fsnotify_group *group, struct inode *inode,
			     struct vfsmount *mnt, int allow_dups)
220 221 222 223 224
{
	int ret = 0;

	BUG_ON(inode && mnt);
	BUG_ON(!inode && !mnt);
225
	BUG_ON(!mutex_is_locked(&group->mark_mutex));
226 227 228

	/*
	 * LOCKING ORDER!!!!
229
	 * group->mark_mutex
230
	 * mark->lock
231 232
	 * inode->i_lock
	 */
233
	spin_lock(&mark->lock);
234 235
	mark->flags |= FSNOTIFY_MARK_FLAG_ALIVE;

236
	fsnotify_get_group(group);
237 238 239 240 241 242 243 244 245
	mark->group = group;
	list_add(&mark->g_list, &group->marks_list);
	atomic_inc(&group->num_marks);
	fsnotify_get_mark(mark); /* for i_list and g_list */

	if (inode) {
		ret = fsnotify_add_inode_mark(mark, group, inode, allow_dups);
		if (ret)
			goto err;
246 247 248 249
	} else if (mnt) {
		ret = fsnotify_add_vfsmount_mark(mark, group, mnt, allow_dups);
		if (ret)
			goto err;
250 251 252 253
	} else {
		BUG();
	}

254 255
	/* this will pin the object if appropriate */
	fsnotify_set_mark_mask_locked(mark, mark->mask);
256 257 258 259 260 261 262
	spin_unlock(&mark->lock);

	if (inode)
		__fsnotify_update_child_dentry_flags(inode);

	return ret;
err:
263
	mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE;
264
	list_del_init(&mark->g_list);
265
	fsnotify_put_group(group);
266
	mark->group = NULL;
267 268 269 270
	atomic_dec(&group->num_marks);

	spin_unlock(&mark->lock);

271 272 273 274 275
	spin_lock(&destroy_lock);
	list_add(&mark->destroy_list, &destroy_list);
	spin_unlock(&destroy_lock);
	wake_up(&destroy_waitq);

276 277 278
	return ret;
}

279 280 281 282 283 284 285 286 287 288
int fsnotify_add_mark(struct fsnotify_mark *mark, struct fsnotify_group *group,
		      struct inode *inode, struct vfsmount *mnt, int allow_dups)
{
	int ret;
	mutex_lock(&group->mark_mutex);
	ret = fsnotify_add_mark_locked(mark, group, inode, mnt, allow_dups);
	mutex_unlock(&group->mark_mutex);
	return ret;
}

289
/*
290
 * clear any marks in a group in which mark->flags & flags is true
291
 */
292 293
void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group,
					 unsigned int flags)
294 295 296
{
	struct fsnotify_mark *lmark, *mark;

297
	mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING);
298
	list_for_each_entry_safe(mark, lmark, &group->marks_list, g_list) {
299 300
		if (mark->flags & flags) {
			fsnotify_get_mark(mark);
301 302
			fsnotify_destroy_mark_locked(mark, group);
			fsnotify_put_mark(mark);
303
		}
304
	}
305
	mutex_unlock(&group->mark_mutex);
306 307
}

308 309 310 311 312 313 314 315
/*
 * Given a group, destroy all of the marks associated with that group.
 */
void fsnotify_clear_marks_by_group(struct fsnotify_group *group)
{
	fsnotify_clear_marks_by_group_flags(group, (unsigned int)-1);
}

316 317 318 319 320
void fsnotify_duplicate_mark(struct fsnotify_mark *new, struct fsnotify_mark *old)
{
	assert_spin_locked(&old->lock);
	new->i.inode = old->i.inode;
	new->m.mnt = old->m.mnt;
321 322
	if (old->group)
		fsnotify_get_group(old->group);
323 324 325 326 327 328 329 330 331 332 333
	new->group = old->group;
	new->mask = old->mask;
	new->free_mark = old->free_mark;
}

/*
 * Nothing fancy, just initialize lists and locks and counters.
 */
void fsnotify_init_mark(struct fsnotify_mark *mark,
			void (*free_mark)(struct fsnotify_mark *mark))
{
334
	memset(mark, 0, sizeof(*mark));
335 336 337 338
	spin_lock_init(&mark->lock);
	atomic_set(&mark->refcnt, 1);
	mark->free_mark = free_mark;
}
339 340 341 342 343 344 345 346

static int fsnotify_mark_destroy(void *ignored)
{
	struct fsnotify_mark *mark, *next;
	LIST_HEAD(private_destroy_list);

	for (;;) {
		spin_lock(&destroy_lock);
347 348
		/* exchange the list head */
		list_replace_init(&destroy_list, &private_destroy_list);
349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375
		spin_unlock(&destroy_lock);

		synchronize_srcu(&fsnotify_mark_srcu);

		list_for_each_entry_safe(mark, next, &private_destroy_list, destroy_list) {
			list_del_init(&mark->destroy_list);
			fsnotify_put_mark(mark);
		}

		wait_event_interruptible(destroy_waitq, !list_empty(&destroy_list));
	}

	return 0;
}

static int __init fsnotify_mark_init(void)
{
	struct task_struct *thread;

	thread = kthread_run(fsnotify_mark_destroy, NULL,
			     "fsnotify_mark");
	if (IS_ERR(thread))
		panic("unable to start fsnotify mark destruction thread.");

	return 0;
}
device_initcall(fsnotify_mark_init);
新手
引导
客服 返回
顶部