pnode.c 10.1 KB
Newer Older
1 2 3 4 5 6 7 8
/*
 *  linux/fs/pnode.c
 *
 * (C) Copyright IBM Corporation 2005.
 *	Released under GPL v2.
 *	Author : Ram Pai (linuxram@us.ibm.com)
 *
 */
9
#include <linux/mnt_namespace.h>
10 11
#include <linux/mount.h>
#include <linux/fs.h>
12
#include <linux/nsproxy.h>
13
#include "internal.h"
14 15
#include "pnode.h"

R
Ram Pai 已提交
16
/* return the next shared peer mount of @p */
17
static inline struct mount *next_peer(struct mount *p)
R
Ram Pai 已提交
18
{
19
	return list_entry(p->mnt_share.next, struct mount, mnt_share);
R
Ram Pai 已提交
20 21
}

22
static inline struct mount *first_slave(struct mount *p)
R
Ram Pai 已提交
23
{
24
	return list_entry(p->mnt_slave_list.next, struct mount, mnt_slave);
R
Ram Pai 已提交
25 26
}

27
static inline struct mount *next_slave(struct mount *p)
R
Ram Pai 已提交
28
{
29
	return list_entry(p->mnt_slave.next, struct mount, mnt_slave);
R
Ram Pai 已提交
30 31
}

32 33 34
static struct mount *get_peer_under_root(struct mount *mnt,
					 struct mnt_namespace *ns,
					 const struct path *root)
35
{
36
	struct mount *m = mnt;
37 38 39

	do {
		/* Check the namespace first for optimization */
A
Al Viro 已提交
40
		if (m->mnt_ns == ns && is_path_reachable(m, m->mnt.mnt_root, root))
41
			return m;
42

43
		m = next_peer(m);
44
	} while (m != mnt);
45 46 47 48 49 50 51 52 53 54

	return NULL;
}

/*
 * Get ID of closest dominating peer group having a representative
 * under the given root.
 *
 * Caller must hold namespace_sem
 */
55
int get_dominating_id(struct mount *mnt, const struct path *root)
56
{
57
	struct mount *m;
58

59
	for (m = mnt->mnt_master; m != NULL; m = m->mnt_master) {
A
Al Viro 已提交
60
		struct mount *d = get_peer_under_root(m, mnt->mnt_ns, root);
61
		if (d)
A
Al Viro 已提交
62
			return d->mnt_group_id;
63 64 65 66 67
	}

	return 0;
}

68
static int do_make_slave(struct mount *mnt)
R
Ram Pai 已提交
69
{
70
	struct mount *peer_mnt = mnt, *master = mnt->mnt_master;
A
Al Viro 已提交
71
	struct mount *slave_mnt;
R
Ram Pai 已提交
72 73 74

	/*
	 * slave 'mnt' to a peer mount that has the
75
	 * same root dentry. If none is available then
R
Ram Pai 已提交
76 77
	 * slave it to anything that is available.
	 */
78
	while ((peer_mnt = next_peer(peer_mnt)) != mnt &&
79
	       peer_mnt->mnt.mnt_root != mnt->mnt.mnt_root) ;
R
Ram Pai 已提交
80 81

	if (peer_mnt == mnt) {
82
		peer_mnt = next_peer(mnt);
R
Ram Pai 已提交
83 84 85
		if (peer_mnt == mnt)
			peer_mnt = NULL;
	}
86 87
	if (mnt->mnt_group_id && IS_MNT_SHARED(mnt) &&
	    list_empty(&mnt->mnt_share))
88
		mnt_release_group_id(mnt);
89

90
	list_del_init(&mnt->mnt_share);
A
Al Viro 已提交
91
	mnt->mnt_group_id = 0;
R
Ram Pai 已提交
92 93 94 95 96

	if (peer_mnt)
		master = peer_mnt;

	if (master) {
97
		list_for_each_entry(slave_mnt, &mnt->mnt_slave_list, mnt_slave)
98
			slave_mnt->mnt_master = master;
99 100 101
		list_move(&mnt->mnt_slave, &master->mnt_slave_list);
		list_splice(&mnt->mnt_slave_list, master->mnt_slave_list.prev);
		INIT_LIST_HEAD(&mnt->mnt_slave_list);
R
Ram Pai 已提交
102
	} else {
103
		struct list_head *p = &mnt->mnt_slave_list;
R
Ram Pai 已提交
104
		while (!list_empty(p)) {
105
                        slave_mnt = list_first_entry(p,
106 107
					struct mount, mnt_slave);
			list_del_init(&slave_mnt->mnt_slave);
R
Ram Pai 已提交
108 109 110
			slave_mnt->mnt_master = NULL;
		}
	}
111
	mnt->mnt_master = master;
112
	CLEAR_MNT_SHARED(mnt);
R
Ram Pai 已提交
113 114 115
	return 0;
}

N
Nick Piggin 已提交
116 117 118
/*
 * vfsmount lock must be held for write
 */
119
void change_mnt_propagation(struct mount *mnt, int type)
120
{
R
Ram Pai 已提交
121
	if (type == MS_SHARED) {
122
		set_mnt_shared(mnt);
R
Ram Pai 已提交
123 124
		return;
	}
125
	do_make_slave(mnt);
R
Ram Pai 已提交
126
	if (type != MS_SLAVE) {
127
		list_del_init(&mnt->mnt_slave);
A
Al Viro 已提交
128
		mnt->mnt_master = NULL;
R
Ram Pai 已提交
129
		if (type == MS_UNBINDABLE)
130
			mnt->mnt.mnt_flags |= MNT_UNBINDABLE;
A
Andries E. Brouwer 已提交
131
		else
132
			mnt->mnt.mnt_flags &= ~MNT_UNBINDABLE;
R
Ram Pai 已提交
133
	}
134
}
135 136 137 138 139

/*
 * get the next mount in the propagation tree.
 * @m: the mount seen last
 * @origin: the original mount from where the tree walk initiated
140 141 142 143 144
 *
 * Note that peer groups form contiguous segments of slave lists.
 * We rely on that in get_source() to be able to find out if
 * vfsmount found while iterating with propagation_next() is
 * a peer of one we'd found earlier.
145
 */
146 147
static struct mount *propagation_next(struct mount *m,
					 struct mount *origin)
148
{
R
Ram Pai 已提交
149
	/* are there any slaves of this mount? */
A
Al Viro 已提交
150
	if (!IS_MNT_NEW(m) && !list_empty(&m->mnt_slave_list))
R
Ram Pai 已提交
151 152 153
		return first_slave(m);

	while (1) {
154
		struct mount *master = m->mnt_master;
R
Ram Pai 已提交
155

156
		if (master == origin->mnt_master) {
157 158
			struct mount *next = next_peer(m);
			return (next == origin) ? NULL : next;
159
		} else if (m->mnt_slave.next != &master->mnt_slave_list)
R
Ram Pai 已提交
160 161 162 163 164 165 166
			return next_slave(m);

		/* back at master */
		m = master;
	}
}

A
Al Viro 已提交
167
static struct mount *next_group(struct mount *m, struct mount *origin)
R
Ram Pai 已提交
168
{
A
Al Viro 已提交
169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195
	while (1) {
		while (1) {
			struct mount *next;
			if (!IS_MNT_NEW(m) && !list_empty(&m->mnt_slave_list))
				return first_slave(m);
			next = next_peer(m);
			if (m->mnt_group_id == origin->mnt_group_id) {
				if (next == origin)
					return NULL;
			} else if (m->mnt_slave.next != &next->mnt_slave)
				break;
			m = next;
		}
		/* m is the last peer */
		while (1) {
			struct mount *master = m->mnt_master;
			if (m->mnt_slave.next != &master->mnt_slave_list)
				return next_slave(m);
			m = next_peer(master);
			if (master->mnt_group_id == origin->mnt_group_id)
				break;
			if (master->mnt_slave.next == &m->mnt_slave)
				break;
			m = master;
		}
		if (m == origin)
			return NULL;
R
Ram Pai 已提交
196
	}
A
Al Viro 已提交
197
}
R
Ram Pai 已提交
198

A
Al Viro 已提交
199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231
/* all accesses are serialized by namespace_sem */
static struct user_namespace *user_ns;
static struct mount *last_dest, *last_source, *dest_master;
static struct mountpoint *mp;
static struct hlist_head *list;

static int propagate_one(struct mount *m)
{
	struct mount *child;
	int type;
	/* skip ones added by this propagate_mnt() */
	if (IS_MNT_NEW(m))
		return 0;
	/* skip if mountpoint isn't covered by it */
	if (!is_subdir(mp->m_dentry, m->mnt.mnt_root))
		return 0;
	if (m->mnt_group_id == last_dest->mnt_group_id) {
		type = CL_MAKE_SHARED;
	} else {
		struct mount *n, *p;
		for (n = m; ; n = p) {
			p = n->mnt_master;
			if (p == dest_master || IS_MNT_MARKED(p)) {
				while (last_dest->mnt_master != p) {
					last_source = last_source->mnt_master;
					last_dest = last_source->mnt_parent;
				}
				if (n->mnt_group_id != last_dest->mnt_group_id) {
					last_source = last_source->mnt_master;
					last_dest = last_source->mnt_parent;
				}
				break;
			}
232
		}
A
Al Viro 已提交
233 234 235 236
		type = CL_SLAVE;
		/* beginning of peer group among the slaves? */
		if (IS_MNT_SHARED(m))
			type |= CL_MAKE_SHARED;
R
Ram Pai 已提交
237
	}
A
Al Viro 已提交
238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254
		
	/* Notice when we are propagating across user namespaces */
	if (m->mnt_ns->user_ns != user_ns)
		type |= CL_UNPRIVILEGED;
	child = copy_tree(last_source, last_source->mnt.mnt_root, type);
	if (IS_ERR(child))
		return PTR_ERR(child);
	mnt_set_mountpoint(m, mp, child);
	last_dest = m;
	last_source = child;
	if (m->mnt_master != dest_master) {
		read_seqlock_excl(&mount_lock);
		SET_MNT_MARK(m->mnt_master);
		read_sequnlock_excl(&mount_lock);
	}
	hlist_add_head(&child->mnt_hash, list);
	return 0;
255 256 257 258 259 260 261 262 263 264 265 266 267 268 269
}

/*
 * mount 'source_mnt' under the destination 'dest_mnt' at
 * dentry 'dest_dentry'. And propagate that mount to
 * all the peer and slave mounts of 'dest_mnt'.
 * Link all the new mounts into a propagation tree headed at
 * source_mnt. Also link all the new mounts using ->mnt_list
 * headed at source_mnt's ->mnt_list
 *
 * @dest_mnt: destination mount.
 * @dest_dentry: destination dentry.
 * @source_mnt: source mount.
 * @tree_list : list of heads of trees to be attached.
 */
270
int propagate_mnt(struct mount *dest_mnt, struct mountpoint *dest_mp,
A
Al Viro 已提交
271
		    struct mount *source_mnt, struct hlist_head *tree_list)
272
{
A
Al Viro 已提交
273
	struct mount *m, *n;
274
	int ret = 0;
275

A
Al Viro 已提交
276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291
	/*
	 * we don't want to bother passing tons of arguments to
	 * propagate_one(); everything is serialized by namespace_sem,
	 * so globals will do just fine.
	 */
	user_ns = current->nsproxy->mnt_ns->user_ns;
	last_dest = dest_mnt;
	last_source = source_mnt;
	mp = dest_mp;
	list = tree_list;
	dest_master = dest_mnt->mnt_master;

	/* all peers of dest_mnt, except dest_mnt itself */
	for (n = next_peer(dest_mnt); n != dest_mnt; n = next_peer(n)) {
		ret = propagate_one(n);
		if (ret)
292
			goto out;
A
Al Viro 已提交
293
	}
294

A
Al Viro 已提交
295 296 297 298 299 300 301 302 303 304 305
	/* all slave groups */
	for (m = next_group(dest_mnt, dest_mnt); m;
			m = next_group(m, dest_mnt)) {
		/* everything in that slave group */
		n = m;
		do {
			ret = propagate_one(n);
			if (ret)
				goto out;
			n = next_peer(n);
		} while (n != m);
306 307
	}
out:
A
Al Viro 已提交
308 309 310 311 312
	read_seqlock_excl(&mount_lock);
	hlist_for_each_entry(n, tree_list, mnt_hash) {
		m = n->mnt_parent;
		if (m->mnt_master != dest_mnt->mnt_master)
			CLEAR_MNT_MARK(m->mnt_master);
313
	}
A
Al Viro 已提交
314
	read_sequnlock_excl(&mount_lock);
315 316
	return ret;
}
R
Ram Pai 已提交
317 318 319 320

/*
 * return true if the refcount is greater than count
 */
321
static inline int do_refcount_check(struct mount *mnt, int count)
R
Ram Pai 已提交
322
{
A
Al Viro 已提交
323
	return mnt_get_count(mnt) > count;
R
Ram Pai 已提交
324 325 326 327 328 329 330 331 332
}

/*
 * check if the mount 'mnt' can be unmounted successfully.
 * @mnt: the mount to be checked for unmount
 * NOTE: unmounting 'mnt' would naturally propagate to all
 * other mounts its parent propagates to.
 * Check if any of these mounts that **do not have submounts**
 * have more references than 'refcnt'. If so return busy.
N
Nick Piggin 已提交
333
 *
N
Nick Piggin 已提交
334
 * vfsmount lock must be held for write
R
Ram Pai 已提交
335
 */
336
int propagate_mount_busy(struct mount *mnt, int refcnt)
R
Ram Pai 已提交
337
{
338
	struct mount *m, *child;
339
	struct mount *parent = mnt->mnt_parent;
R
Ram Pai 已提交
340 341
	int ret = 0;

342
	if (mnt == parent)
R
Ram Pai 已提交
343 344 345 346 347 348 349
		return do_refcount_check(mnt, refcnt);

	/*
	 * quickly check if the current mount can be unmounted.
	 * If not, we don't have to go checking for all other
	 * mounts
	 */
350
	if (!list_empty(&mnt->mnt_mounts) || do_refcount_check(mnt, refcnt))
R
Ram Pai 已提交
351 352
		return 1;

353 354
	for (m = propagation_next(parent, parent); m;
	     		m = propagation_next(m, parent)) {
A
Al Viro 已提交
355
		child = __lookup_mnt_last(&m->mnt, mnt->mnt_mountpoint);
356
		if (child && list_empty(&child->mnt_mounts) &&
357
		    (ret = do_refcount_check(child, 1)))
R
Ram Pai 已提交
358 359 360 361 362 363 364 365 366
			break;
	}
	return ret;
}

/*
 * NOTE: unmounting 'mnt' naturally propagates to all other mounts its
 * parent propagates to.
 */
367
static void __propagate_umount(struct mount *mnt)
R
Ram Pai 已提交
368
{
369
	struct mount *parent = mnt->mnt_parent;
370
	struct mount *m;
R
Ram Pai 已提交
371

372
	BUG_ON(parent == mnt);
R
Ram Pai 已提交
373

374 375
	for (m = propagation_next(parent, parent); m;
			m = propagation_next(m, parent)) {
R
Ram Pai 已提交
376

A
Al Viro 已提交
377 378
		struct mount *child = __lookup_mnt_last(&m->mnt,
						mnt->mnt_mountpoint);
R
Ram Pai 已提交
379 380 381 382
		/*
		 * umount the child only if the child has no
		 * other children
		 */
A
Al Viro 已提交
383 384 385 386
		if (child && list_empty(&child->mnt_mounts)) {
			hlist_del_init_rcu(&child->mnt_hash);
			hlist_add_before_rcu(&child->mnt_hash, &mnt->mnt_hash);
		}
R
Ram Pai 已提交
387 388 389 390 391 392 393
	}
}

/*
 * collect all mounts that receive propagation from the mount in @list,
 * and return these additional mounts in the same list.
 * @list: the list of mounts to be unmounted.
N
Nick Piggin 已提交
394 395
 *
 * vfsmount lock must be held for write
R
Ram Pai 已提交
396
 */
A
Al Viro 已提交
397
int propagate_umount(struct hlist_head *list)
R
Ram Pai 已提交
398
{
399
	struct mount *mnt;
R
Ram Pai 已提交
400

A
Al Viro 已提交
401
	hlist_for_each_entry(mnt, list, mnt_hash)
R
Ram Pai 已提交
402 403 404
		__propagate_umount(mnt);
	return 0;
}