pnode.c 10.1 KB
Newer Older
1 2 3 4 5 6 7 8
/*
 *  linux/fs/pnode.c
 *
 * (C) Copyright IBM Corporation 2005.
 *	Released under GPL v2.
 *	Author : Ram Pai (linuxram@us.ibm.com)
 *
 */
9
#include <linux/mnt_namespace.h>
10 11
#include <linux/mount.h>
#include <linux/fs.h>
12
#include <linux/nsproxy.h>
13
#include "internal.h"
14 15
#include "pnode.h"

R
Ram Pai 已提交
16
/* return the next shared peer mount of @p */
17
static inline struct mount *next_peer(struct mount *p)
R
Ram Pai 已提交
18
{
19
	return list_entry(p->mnt_share.next, struct mount, mnt_share);
R
Ram Pai 已提交
20 21
}

22
static inline struct mount *first_slave(struct mount *p)
R
Ram Pai 已提交
23
{
24
	return list_entry(p->mnt_slave_list.next, struct mount, mnt_slave);
R
Ram Pai 已提交
25 26
}

27
static inline struct mount *next_slave(struct mount *p)
R
Ram Pai 已提交
28
{
29
	return list_entry(p->mnt_slave.next, struct mount, mnt_slave);
R
Ram Pai 已提交
30 31
}

32 33 34
static struct mount *get_peer_under_root(struct mount *mnt,
					 struct mnt_namespace *ns,
					 const struct path *root)
35
{
36
	struct mount *m = mnt;
37 38 39

	do {
		/* Check the namespace first for optimization */
A
Al Viro 已提交
40
		if (m->mnt_ns == ns && is_path_reachable(m, m->mnt.mnt_root, root))
41
			return m;
42

43
		m = next_peer(m);
44
	} while (m != mnt);
45 46 47 48 49 50 51 52 53 54

	return NULL;
}

/*
 * Get ID of closest dominating peer group having a representative
 * under the given root.
 *
 * Caller must hold namespace_sem
 */
55
int get_dominating_id(struct mount *mnt, const struct path *root)
56
{
57
	struct mount *m;
58

59
	for (m = mnt->mnt_master; m != NULL; m = m->mnt_master) {
A
Al Viro 已提交
60
		struct mount *d = get_peer_under_root(m, mnt->mnt_ns, root);
61
		if (d)
A
Al Viro 已提交
62
			return d->mnt_group_id;
63 64 65 66 67
	}

	return 0;
}

68
static int do_make_slave(struct mount *mnt)
R
Ram Pai 已提交
69
{
70
	struct mount *peer_mnt = mnt, *master = mnt->mnt_master;
A
Al Viro 已提交
71
	struct mount *slave_mnt;
R
Ram Pai 已提交
72 73 74

	/*
	 * slave 'mnt' to a peer mount that has the
75
	 * same root dentry. If none is available then
R
Ram Pai 已提交
76 77
	 * slave it to anything that is available.
	 */
78
	while ((peer_mnt = next_peer(peer_mnt)) != mnt &&
79
	       peer_mnt->mnt.mnt_root != mnt->mnt.mnt_root) ;
R
Ram Pai 已提交
80 81

	if (peer_mnt == mnt) {
82
		peer_mnt = next_peer(mnt);
R
Ram Pai 已提交
83 84 85
		if (peer_mnt == mnt)
			peer_mnt = NULL;
	}
86 87
	if (mnt->mnt_group_id && IS_MNT_SHARED(mnt) &&
	    list_empty(&mnt->mnt_share))
88
		mnt_release_group_id(mnt);
89

90
	list_del_init(&mnt->mnt_share);
A
Al Viro 已提交
91
	mnt->mnt_group_id = 0;
R
Ram Pai 已提交
92 93 94 95 96

	if (peer_mnt)
		master = peer_mnt;

	if (master) {
97
		list_for_each_entry(slave_mnt, &mnt->mnt_slave_list, mnt_slave)
98
			slave_mnt->mnt_master = master;
99 100 101
		list_move(&mnt->mnt_slave, &master->mnt_slave_list);
		list_splice(&mnt->mnt_slave_list, master->mnt_slave_list.prev);
		INIT_LIST_HEAD(&mnt->mnt_slave_list);
R
Ram Pai 已提交
102
	} else {
103
		struct list_head *p = &mnt->mnt_slave_list;
R
Ram Pai 已提交
104
		while (!list_empty(p)) {
105
                        slave_mnt = list_first_entry(p,
106 107
					struct mount, mnt_slave);
			list_del_init(&slave_mnt->mnt_slave);
R
Ram Pai 已提交
108 109 110
			slave_mnt->mnt_master = NULL;
		}
	}
111
	mnt->mnt_master = master;
112
	CLEAR_MNT_SHARED(mnt);
R
Ram Pai 已提交
113 114 115
	return 0;
}

N
Nick Piggin 已提交
116 117 118
/*
 * vfsmount lock must be held for write
 */
119
void change_mnt_propagation(struct mount *mnt, int type)
120
{
R
Ram Pai 已提交
121
	if (type == MS_SHARED) {
122
		set_mnt_shared(mnt);
R
Ram Pai 已提交
123 124
		return;
	}
125
	do_make_slave(mnt);
R
Ram Pai 已提交
126
	if (type != MS_SLAVE) {
127
		list_del_init(&mnt->mnt_slave);
A
Al Viro 已提交
128
		mnt->mnt_master = NULL;
R
Ram Pai 已提交
129
		if (type == MS_UNBINDABLE)
130
			mnt->mnt.mnt_flags |= MNT_UNBINDABLE;
A
Andries E. Brouwer 已提交
131
		else
132
			mnt->mnt.mnt_flags &= ~MNT_UNBINDABLE;
R
Ram Pai 已提交
133
	}
134
}
135 136 137 138 139

/*
 * get the next mount in the propagation tree.
 * @m: the mount seen last
 * @origin: the original mount from where the tree walk initiated
140 141 142 143 144
 *
 * Note that peer groups form contiguous segments of slave lists.
 * We rely on that in get_source() to be able to find out if
 * vfsmount found while iterating with propagation_next() is
 * a peer of one we'd found earlier.
145
 */
146 147
static struct mount *propagation_next(struct mount *m,
					 struct mount *origin)
148
{
R
Ram Pai 已提交
149
	/* are there any slaves of this mount? */
A
Al Viro 已提交
150
	if (!IS_MNT_NEW(m) && !list_empty(&m->mnt_slave_list))
R
Ram Pai 已提交
151 152 153
		return first_slave(m);

	while (1) {
154
		struct mount *master = m->mnt_master;
R
Ram Pai 已提交
155

156
		if (master == origin->mnt_master) {
157 158
			struct mount *next = next_peer(m);
			return (next == origin) ? NULL : next;
159
		} else if (m->mnt_slave.next != &master->mnt_slave_list)
R
Ram Pai 已提交
160 161 162 163 164 165 166
			return next_slave(m);

		/* back at master */
		m = master;
	}
}

A
Al Viro 已提交
167
static struct mount *next_group(struct mount *m, struct mount *origin)
R
Ram Pai 已提交
168
{
A
Al Viro 已提交
169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195
	while (1) {
		while (1) {
			struct mount *next;
			if (!IS_MNT_NEW(m) && !list_empty(&m->mnt_slave_list))
				return first_slave(m);
			next = next_peer(m);
			if (m->mnt_group_id == origin->mnt_group_id) {
				if (next == origin)
					return NULL;
			} else if (m->mnt_slave.next != &next->mnt_slave)
				break;
			m = next;
		}
		/* m is the last peer */
		while (1) {
			struct mount *master = m->mnt_master;
			if (m->mnt_slave.next != &master->mnt_slave_list)
				return next_slave(m);
			m = next_peer(master);
			if (master->mnt_group_id == origin->mnt_group_id)
				break;
			if (master->mnt_slave.next == &m->mnt_slave)
				break;
			m = master;
		}
		if (m == origin)
			return NULL;
R
Ram Pai 已提交
196
	}
A
Al Viro 已提交
197
}
R
Ram Pai 已提交
198

A
Al Viro 已提交
199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231
/* all accesses are serialized by namespace_sem */
static struct user_namespace *user_ns;
static struct mount *last_dest, *last_source, *dest_master;
static struct mountpoint *mp;
static struct hlist_head *list;

static int propagate_one(struct mount *m)
{
	struct mount *child;
	int type;
	/* skip ones added by this propagate_mnt() */
	if (IS_MNT_NEW(m))
		return 0;
	/* skip if mountpoint isn't covered by it */
	if (!is_subdir(mp->m_dentry, m->mnt.mnt_root))
		return 0;
	if (m->mnt_group_id == last_dest->mnt_group_id) {
		type = CL_MAKE_SHARED;
	} else {
		struct mount *n, *p;
		for (n = m; ; n = p) {
			p = n->mnt_master;
			if (p == dest_master || IS_MNT_MARKED(p)) {
				while (last_dest->mnt_master != p) {
					last_source = last_source->mnt_master;
					last_dest = last_source->mnt_parent;
				}
				if (n->mnt_group_id != last_dest->mnt_group_id) {
					last_source = last_source->mnt_master;
					last_dest = last_source->mnt_parent;
				}
				break;
			}
232
		}
A
Al Viro 已提交
233 234 235 236
		type = CL_SLAVE;
		/* beginning of peer group among the slaves? */
		if (IS_MNT_SHARED(m))
			type |= CL_MAKE_SHARED;
R
Ram Pai 已提交
237
	}
A
Al Viro 已提交
238 239 240 241 242 243 244
		
	/* Notice when we are propagating across user namespaces */
	if (m->mnt_ns->user_ns != user_ns)
		type |= CL_UNPRIVILEGED;
	child = copy_tree(last_source, last_source->mnt.mnt_root, type);
	if (IS_ERR(child))
		return PTR_ERR(child);
245
	child->mnt.mnt_flags &= ~MNT_LOCKED;
A
Al Viro 已提交
246 247 248 249 250 251 252 253 254 255
	mnt_set_mountpoint(m, mp, child);
	last_dest = m;
	last_source = child;
	if (m->mnt_master != dest_master) {
		read_seqlock_excl(&mount_lock);
		SET_MNT_MARK(m->mnt_master);
		read_sequnlock_excl(&mount_lock);
	}
	hlist_add_head(&child->mnt_hash, list);
	return 0;
256 257 258 259 260 261 262 263 264 265 266 267 268 269 270
}

/*
 * mount 'source_mnt' under the destination 'dest_mnt' at
 * dentry 'dest_dentry'. And propagate that mount to
 * all the peer and slave mounts of 'dest_mnt'.
 * Link all the new mounts into a propagation tree headed at
 * source_mnt. Also link all the new mounts using ->mnt_list
 * headed at source_mnt's ->mnt_list
 *
 * @dest_mnt: destination mount.
 * @dest_dentry: destination dentry.
 * @source_mnt: source mount.
 * @tree_list : list of heads of trees to be attached.
 */
271
int propagate_mnt(struct mount *dest_mnt, struct mountpoint *dest_mp,
A
Al Viro 已提交
272
		    struct mount *source_mnt, struct hlist_head *tree_list)
273
{
A
Al Viro 已提交
274
	struct mount *m, *n;
275
	int ret = 0;
276

A
Al Viro 已提交
277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292
	/*
	 * we don't want to bother passing tons of arguments to
	 * propagate_one(); everything is serialized by namespace_sem,
	 * so globals will do just fine.
	 */
	user_ns = current->nsproxy->mnt_ns->user_ns;
	last_dest = dest_mnt;
	last_source = source_mnt;
	mp = dest_mp;
	list = tree_list;
	dest_master = dest_mnt->mnt_master;

	/* all peers of dest_mnt, except dest_mnt itself */
	for (n = next_peer(dest_mnt); n != dest_mnt; n = next_peer(n)) {
		ret = propagate_one(n);
		if (ret)
293
			goto out;
A
Al Viro 已提交
294
	}
295

A
Al Viro 已提交
296 297 298 299 300 301 302 303 304 305 306
	/* all slave groups */
	for (m = next_group(dest_mnt, dest_mnt); m;
			m = next_group(m, dest_mnt)) {
		/* everything in that slave group */
		n = m;
		do {
			ret = propagate_one(n);
			if (ret)
				goto out;
			n = next_peer(n);
		} while (n != m);
307 308
	}
out:
A
Al Viro 已提交
309 310 311 312 313
	read_seqlock_excl(&mount_lock);
	hlist_for_each_entry(n, tree_list, mnt_hash) {
		m = n->mnt_parent;
		if (m->mnt_master != dest_mnt->mnt_master)
			CLEAR_MNT_MARK(m->mnt_master);
314
	}
A
Al Viro 已提交
315
	read_sequnlock_excl(&mount_lock);
316 317
	return ret;
}
R
Ram Pai 已提交
318 319 320 321

/*
 * return true if the refcount is greater than count
 */
322
static inline int do_refcount_check(struct mount *mnt, int count)
R
Ram Pai 已提交
323
{
A
Al Viro 已提交
324
	return mnt_get_count(mnt) > count;
R
Ram Pai 已提交
325 326 327 328 329 330 331 332 333
}

/*
 * check if the mount 'mnt' can be unmounted successfully.
 * @mnt: the mount to be checked for unmount
 * NOTE: unmounting 'mnt' would naturally propagate to all
 * other mounts its parent propagates to.
 * Check if any of these mounts that **do not have submounts**
 * have more references than 'refcnt'. If so return busy.
N
Nick Piggin 已提交
334
 *
N
Nick Piggin 已提交
335
 * vfsmount lock must be held for write
R
Ram Pai 已提交
336
 */
337
int propagate_mount_busy(struct mount *mnt, int refcnt)
R
Ram Pai 已提交
338
{
339
	struct mount *m, *child;
340
	struct mount *parent = mnt->mnt_parent;
R
Ram Pai 已提交
341 342
	int ret = 0;

343
	if (mnt == parent)
R
Ram Pai 已提交
344 345 346 347 348 349 350
		return do_refcount_check(mnt, refcnt);

	/*
	 * quickly check if the current mount can be unmounted.
	 * If not, we don't have to go checking for all other
	 * mounts
	 */
351
	if (!list_empty(&mnt->mnt_mounts) || do_refcount_check(mnt, refcnt))
R
Ram Pai 已提交
352 353
		return 1;

354 355
	for (m = propagation_next(parent, parent); m;
	     		m = propagation_next(m, parent)) {
A
Al Viro 已提交
356
		child = __lookup_mnt_last(&m->mnt, mnt->mnt_mountpoint);
357
		if (child && list_empty(&child->mnt_mounts) &&
358
		    (ret = do_refcount_check(child, 1)))
R
Ram Pai 已提交
359 360 361 362 363 364 365 366 367
			break;
	}
	return ret;
}

/*
 * NOTE: unmounting 'mnt' naturally propagates to all other mounts its
 * parent propagates to.
 */
368
static void __propagate_umount(struct mount *mnt)
R
Ram Pai 已提交
369
{
370
	struct mount *parent = mnt->mnt_parent;
371
	struct mount *m;
R
Ram Pai 已提交
372

373
	BUG_ON(parent == mnt);
R
Ram Pai 已提交
374

375 376
	for (m = propagation_next(parent, parent); m;
			m = propagation_next(m, parent)) {
R
Ram Pai 已提交
377

A
Al Viro 已提交
378 379
		struct mount *child = __lookup_mnt_last(&m->mnt,
						mnt->mnt_mountpoint);
R
Ram Pai 已提交
380 381 382 383
		/*
		 * umount the child only if the child has no
		 * other children
		 */
A
Al Viro 已提交
384
		if (child && list_empty(&child->mnt_mounts)) {
385
			list_del_init(&child->mnt_child);
A
Al Viro 已提交
386 387 388
			hlist_del_init_rcu(&child->mnt_hash);
			hlist_add_before_rcu(&child->mnt_hash, &mnt->mnt_hash);
		}
R
Ram Pai 已提交
389 390 391 392 393 394 395
	}
}

/*
 * collect all mounts that receive propagation from the mount in @list,
 * and return these additional mounts in the same list.
 * @list: the list of mounts to be unmounted.
N
Nick Piggin 已提交
396 397
 *
 * vfsmount lock must be held for write
R
Ram Pai 已提交
398
 */
A
Al Viro 已提交
399
int propagate_umount(struct hlist_head *list)
R
Ram Pai 已提交
400
{
401
	struct mount *mnt;
R
Ram Pai 已提交
402

A
Al Viro 已提交
403
	hlist_for_each_entry(mnt, list, mnt_hash)
R
Ram Pai 已提交
404 405 406
		__propagate_umount(mnt);
	return 0;
}