pnode.c 15.1 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0-only
2 3 4 5 6 7
/*
 *  linux/fs/pnode.c
 *
 * (C) Copyright IBM Corporation 2005.
 *	Author : Ram Pai (linuxram@us.ibm.com)
 */
8
#include <linux/mnt_namespace.h>
9 10
#include <linux/mount.h>
#include <linux/fs.h>
11
#include <linux/nsproxy.h>
12
#include <uapi/linux/mount.h>
13
#include "internal.h"
14 15
#include "pnode.h"

R
Ram Pai 已提交
16
/* return the next shared peer mount of @p */
17
static inline struct mount *next_peer(struct mount *p)
R
Ram Pai 已提交
18
{
19
	return list_entry(p->mnt_share.next, struct mount, mnt_share);
R
Ram Pai 已提交
20 21
}

22
static inline struct mount *first_slave(struct mount *p)
R
Ram Pai 已提交
23
{
24
	return list_entry(p->mnt_slave_list.next, struct mount, mnt_slave);
R
Ram Pai 已提交
25 26
}

27 28 29 30 31
static inline struct mount *last_slave(struct mount *p)
{
	return list_entry(p->mnt_slave_list.prev, struct mount, mnt_slave);
}

32
static inline struct mount *next_slave(struct mount *p)
R
Ram Pai 已提交
33
{
34
	return list_entry(p->mnt_slave.next, struct mount, mnt_slave);
R
Ram Pai 已提交
35 36
}

37 38 39
static struct mount *get_peer_under_root(struct mount *mnt,
					 struct mnt_namespace *ns,
					 const struct path *root)
40
{
41
	struct mount *m = mnt;
42 43 44

	do {
		/* Check the namespace first for optimization */
A
Al Viro 已提交
45
		if (m->mnt_ns == ns && is_path_reachable(m, m->mnt.mnt_root, root))
46
			return m;
47

48
		m = next_peer(m);
49
	} while (m != mnt);
50 51 52 53 54 55 56 57 58 59

	return NULL;
}

/*
 * Get ID of closest dominating peer group having a representative
 * under the given root.
 *
 * Caller must hold namespace_sem
 */
60
int get_dominating_id(struct mount *mnt, const struct path *root)
61
{
62
	struct mount *m;
63

64
	for (m = mnt->mnt_master; m != NULL; m = m->mnt_master) {
A
Al Viro 已提交
65
		struct mount *d = get_peer_under_root(m, mnt->mnt_ns, root);
66
		if (d)
A
Al Viro 已提交
67
			return d->mnt_group_id;
68 69 70 71 72
	}

	return 0;
}

73
static int do_make_slave(struct mount *mnt)
R
Ram Pai 已提交
74
{
A
Al Viro 已提交
75
	struct mount *master, *slave_mnt;
R
Ram Pai 已提交
76

A
Al Viro 已提交
77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92
	if (list_empty(&mnt->mnt_share)) {
		if (IS_MNT_SHARED(mnt)) {
			mnt_release_group_id(mnt);
			CLEAR_MNT_SHARED(mnt);
		}
		master = mnt->mnt_master;
		if (!master) {
			struct list_head *p = &mnt->mnt_slave_list;
			while (!list_empty(p)) {
				slave_mnt = list_first_entry(p,
						struct mount, mnt_slave);
				list_del_init(&slave_mnt->mnt_slave);
				slave_mnt->mnt_master = NULL;
			}
			return 0;
		}
R
Ram Pai 已提交
93
	} else {
A
Al Viro 已提交
94 95 96 97 98 99 100 101 102 103 104
		struct mount *m;
		/*
		 * slave 'mnt' to a peer mount that has the
		 * same root dentry. If none is available then
		 * slave it to anything that is available.
		 */
		for (m = master = next_peer(mnt); m != mnt; m = next_peer(m)) {
			if (m->mnt.mnt_root == mnt->mnt.mnt_root) {
				master = m;
				break;
			}
R
Ram Pai 已提交
105
		}
A
Al Viro 已提交
106 107 108
		list_del_init(&mnt->mnt_share);
		mnt->mnt_group_id = 0;
		CLEAR_MNT_SHARED(mnt);
R
Ram Pai 已提交
109
	}
A
Al Viro 已提交
110 111 112 113 114
	list_for_each_entry(slave_mnt, &mnt->mnt_slave_list, mnt_slave)
		slave_mnt->mnt_master = master;
	list_move(&mnt->mnt_slave, &master->mnt_slave_list);
	list_splice(&mnt->mnt_slave_list, master->mnt_slave_list.prev);
	INIT_LIST_HEAD(&mnt->mnt_slave_list);
115
	mnt->mnt_master = master;
R
Ram Pai 已提交
116 117 118
	return 0;
}

N
Nick Piggin 已提交
119 120 121
/*
 * vfsmount lock must be held for write
 */
122
void change_mnt_propagation(struct mount *mnt, int type)
123
{
R
Ram Pai 已提交
124
	if (type == MS_SHARED) {
125
		set_mnt_shared(mnt);
R
Ram Pai 已提交
126 127
		return;
	}
128
	do_make_slave(mnt);
R
Ram Pai 已提交
129
	if (type != MS_SLAVE) {
130
		list_del_init(&mnt->mnt_slave);
A
Al Viro 已提交
131
		mnt->mnt_master = NULL;
R
Ram Pai 已提交
132
		if (type == MS_UNBINDABLE)
133
			mnt->mnt.mnt_flags |= MNT_UNBINDABLE;
A
Andries E. Brouwer 已提交
134
		else
135
			mnt->mnt.mnt_flags &= ~MNT_UNBINDABLE;
R
Ram Pai 已提交
136
	}
137
}
138 139 140 141 142

/*
 * get the next mount in the propagation tree.
 * @m: the mount seen last
 * @origin: the original mount from where the tree walk initiated
143 144 145 146 147
 *
 * Note that peer groups form contiguous segments of slave lists.
 * We rely on that in get_source() to be able to find out if
 * vfsmount found while iterating with propagation_next() is
 * a peer of one we'd found earlier.
148
 */
149 150
static struct mount *propagation_next(struct mount *m,
					 struct mount *origin)
151
{
R
Ram Pai 已提交
152
	/* are there any slaves of this mount? */
A
Al Viro 已提交
153
	if (!IS_MNT_NEW(m) && !list_empty(&m->mnt_slave_list))
R
Ram Pai 已提交
154 155 156
		return first_slave(m);

	while (1) {
157
		struct mount *master = m->mnt_master;
R
Ram Pai 已提交
158

159
		if (master == origin->mnt_master) {
160 161
			struct mount *next = next_peer(m);
			return (next == origin) ? NULL : next;
162
		} else if (m->mnt_slave.next != &master->mnt_slave_list)
R
Ram Pai 已提交
163 164 165 166 167 168 169
			return next_slave(m);

		/* back at master */
		m = master;
	}
}

170 171 172 173 174 175 176 177 178 179 180 181 182
static struct mount *skip_propagation_subtree(struct mount *m,
						struct mount *origin)
{
	/*
	 * Advance m such that propagation_next will not return
	 * the slaves of m.
	 */
	if (!IS_MNT_NEW(m) && !list_empty(&m->mnt_slave_list))
		m = last_slave(m);

	return m;
}

A
Al Viro 已提交
183
static struct mount *next_group(struct mount *m, struct mount *origin)
R
Ram Pai 已提交
184
{
A
Al Viro 已提交
185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211
	while (1) {
		while (1) {
			struct mount *next;
			if (!IS_MNT_NEW(m) && !list_empty(&m->mnt_slave_list))
				return first_slave(m);
			next = next_peer(m);
			if (m->mnt_group_id == origin->mnt_group_id) {
				if (next == origin)
					return NULL;
			} else if (m->mnt_slave.next != &next->mnt_slave)
				break;
			m = next;
		}
		/* m is the last peer */
		while (1) {
			struct mount *master = m->mnt_master;
			if (m->mnt_slave.next != &master->mnt_slave_list)
				return next_slave(m);
			m = next_peer(master);
			if (master->mnt_group_id == origin->mnt_group_id)
				break;
			if (master->mnt_slave.next == &m->mnt_slave)
				break;
			m = master;
		}
		if (m == origin)
			return NULL;
R
Ram Pai 已提交
212
	}
A
Al Viro 已提交
213
}
R
Ram Pai 已提交
214

A
Al Viro 已提交
215
/* all accesses are serialized by namespace_sem */
216
static struct mount *last_dest, *first_source, *last_source, *dest_master;
A
Al Viro 已提交
217 218 219
static struct mountpoint *mp;
static struct hlist_head *list;

220 221 222 223 224
static inline bool peers(struct mount *m1, struct mount *m2)
{
	return m1->mnt_group_id == m2->mnt_group_id && m1->mnt_group_id;
}

A
Al Viro 已提交
225 226 227 228 229 230 231 232 233 234
static int propagate_one(struct mount *m)
{
	struct mount *child;
	int type;
	/* skip ones added by this propagate_mnt() */
	if (IS_MNT_NEW(m))
		return 0;
	/* skip if mountpoint isn't covered by it */
	if (!is_subdir(mp->m_dentry, m->mnt.mnt_root))
		return 0;
235
	if (peers(m, last_dest)) {
A
Al Viro 已提交
236 237 238
		type = CL_MAKE_SHARED;
	} else {
		struct mount *n, *p;
239
		bool done;
A
Al Viro 已提交
240 241
		for (n = m; ; n = p) {
			p = n->mnt_master;
242
			if (p == dest_master || IS_MNT_MARKED(p))
A
Al Viro 已提交
243
				break;
244
		}
245 246 247 248 249 250 251 252 253 254
		do {
			struct mount *parent = last_source->mnt_parent;
			if (last_source == first_source)
				break;
			done = parent->mnt_master == p;
			if (done && peers(n, parent))
				break;
			last_source = last_source->mnt_master;
		} while (!done);

A
Al Viro 已提交
255 256 257 258
		type = CL_SLAVE;
		/* beginning of peer group among the slaves? */
		if (IS_MNT_SHARED(m))
			type |= CL_MAKE_SHARED;
R
Ram Pai 已提交
259
	}
A
Al Viro 已提交
260 261 262 263
		
	child = copy_tree(last_source, last_source->mnt.mnt_root, type);
	if (IS_ERR(child))
		return PTR_ERR(child);
264
	child->mnt.mnt_flags &= ~MNT_LOCKED;
A
Al Viro 已提交
265 266 267 268 269 270 271 272 273
	mnt_set_mountpoint(m, mp, child);
	last_dest = m;
	last_source = child;
	if (m->mnt_master != dest_master) {
		read_seqlock_excl(&mount_lock);
		SET_MNT_MARK(m->mnt_master);
		read_sequnlock_excl(&mount_lock);
	}
	hlist_add_head(&child->mnt_hash, list);
274
	return count_mounts(m->mnt_ns, child);
275 276 277 278 279 280 281 282 283 284 285 286 287 288 289
}

/*
 * mount 'source_mnt' under the destination 'dest_mnt' at
 * dentry 'dest_dentry'. And propagate that mount to
 * all the peer and slave mounts of 'dest_mnt'.
 * Link all the new mounts into a propagation tree headed at
 * source_mnt. Also link all the new mounts using ->mnt_list
 * headed at source_mnt's ->mnt_list
 *
 * @dest_mnt: destination mount.
 * @dest_dentry: destination dentry.
 * @source_mnt: source mount.
 * @tree_list : list of heads of trees to be attached.
 */
290
int propagate_mnt(struct mount *dest_mnt, struct mountpoint *dest_mp,
A
Al Viro 已提交
291
		    struct mount *source_mnt, struct hlist_head *tree_list)
292
{
A
Al Viro 已提交
293
	struct mount *m, *n;
294
	int ret = 0;
295

A
Al Viro 已提交
296 297 298 299 300 301
	/*
	 * we don't want to bother passing tons of arguments to
	 * propagate_one(); everything is serialized by namespace_sem,
	 * so globals will do just fine.
	 */
	last_dest = dest_mnt;
302
	first_source = source_mnt;
A
Al Viro 已提交
303 304 305 306 307 308 309 310 311
	last_source = source_mnt;
	mp = dest_mp;
	list = tree_list;
	dest_master = dest_mnt->mnt_master;

	/* all peers of dest_mnt, except dest_mnt itself */
	for (n = next_peer(dest_mnt); n != dest_mnt; n = next_peer(n)) {
		ret = propagate_one(n);
		if (ret)
312
			goto out;
A
Al Viro 已提交
313
	}
314

A
Al Viro 已提交
315 316 317 318 319 320 321 322 323 324 325
	/* all slave groups */
	for (m = next_group(dest_mnt, dest_mnt); m;
			m = next_group(m, dest_mnt)) {
		/* everything in that slave group */
		n = m;
		do {
			ret = propagate_one(n);
			if (ret)
				goto out;
			n = next_peer(n);
		} while (n != m);
326 327
	}
out:
A
Al Viro 已提交
328 329 330 331 332
	read_seqlock_excl(&mount_lock);
	hlist_for_each_entry(n, tree_list, mnt_hash) {
		m = n->mnt_parent;
		if (m->mnt_master != dest_mnt->mnt_master)
			CLEAR_MNT_MARK(m->mnt_master);
333
	}
A
Al Viro 已提交
334
	read_sequnlock_excl(&mount_lock);
335 336
	return ret;
}
R
Ram Pai 已提交
337

338 339 340 341 342 343 344 345 346 347 348 349 350 351 352
static struct mount *find_topper(struct mount *mnt)
{
	/* If there is exactly one mount covering mnt completely return it. */
	struct mount *child;

	if (!list_is_singular(&mnt->mnt_mounts))
		return NULL;

	child = list_first_entry(&mnt->mnt_mounts, struct mount, mnt_child);
	if (child->mnt_mountpoint != mnt->mnt.mnt_root)
		return NULL;

	return child;
}

R
Ram Pai 已提交
353 354 355
/*
 * return true if the refcount is greater than count
 */
356
static inline int do_refcount_check(struct mount *mnt, int count)
R
Ram Pai 已提交
357
{
A
Al Viro 已提交
358
	return mnt_get_count(mnt) > count;
R
Ram Pai 已提交
359 360 361 362 363 364 365 366 367
}

/*
 * check if the mount 'mnt' can be unmounted successfully.
 * @mnt: the mount to be checked for unmount
 * NOTE: unmounting 'mnt' would naturally propagate to all
 * other mounts its parent propagates to.
 * Check if any of these mounts that **do not have submounts**
 * have more references than 'refcnt'. If so return busy.
N
Nick Piggin 已提交
368
 *
N
Nick Piggin 已提交
369
 * vfsmount lock must be held for write
R
Ram Pai 已提交
370
 */
371
int propagate_mount_busy(struct mount *mnt, int refcnt)
R
Ram Pai 已提交
372
{
373
	struct mount *m, *child, *topper;
374
	struct mount *parent = mnt->mnt_parent;
R
Ram Pai 已提交
375

376
	if (mnt == parent)
R
Ram Pai 已提交
377 378 379 380 381 382 383
		return do_refcount_check(mnt, refcnt);

	/*
	 * quickly check if the current mount can be unmounted.
	 * If not, we don't have to go checking for all other
	 * mounts
	 */
384
	if (!list_empty(&mnt->mnt_mounts) || do_refcount_check(mnt, refcnt))
R
Ram Pai 已提交
385 386
		return 1;

387 388
	for (m = propagation_next(parent, parent); m;
	     		m = propagation_next(m, parent)) {
389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404
		int count = 1;
		child = __lookup_mnt(&m->mnt, mnt->mnt_mountpoint);
		if (!child)
			continue;

		/* Is there exactly one mount on the child that covers
		 * it completely whose reference should be ignored?
		 */
		topper = find_topper(child);
		if (topper)
			count += 1;
		else if (!list_empty(&child->mnt_mounts))
			continue;

		if (do_refcount_check(child, count))
			return 1;
R
Ram Pai 已提交
405
	}
406
	return 0;
R
Ram Pai 已提交
407 408
}

409 410 411 412 413 414 415 416 417 418 419 420 421 422
/*
 * Clear MNT_LOCKED when it can be shown to be safe.
 *
 * mount_lock lock must be held for write
 */
void propagate_mount_unlock(struct mount *mnt)
{
	struct mount *parent = mnt->mnt_parent;
	struct mount *m, *child;

	BUG_ON(parent == mnt);

	for (m = propagation_next(parent, parent); m;
			m = propagation_next(m, parent)) {
423
		child = __lookup_mnt(&m->mnt, mnt->mnt_mountpoint);
424 425 426 427 428
		if (child)
			child->mnt.mnt_flags &= ~MNT_LOCKED;
	}
}

429
static void umount_one(struct mount *mnt, struct list_head *to_umount)
430
{
431 432 433 434 435
	CLEAR_MNT_MARK(mnt);
	mnt->mnt.mnt_flags |= MNT_UMOUNT;
	list_del_init(&mnt->mnt_child);
	list_del_init(&mnt->mnt_umounting);
	list_move_tail(&mnt->mnt_list, to_umount);
436 437
}

R
Ram Pai 已提交
438 439 440 441
/*
 * NOTE: unmounting 'mnt' naturally propagates to all other mounts its
 * parent propagates to.
 */
442 443 444
static bool __propagate_umount(struct mount *mnt,
			       struct list_head *to_umount,
			       struct list_head *to_restore)
R
Ram Pai 已提交
445
{
446 447
	bool progress = false;
	struct mount *child;
R
Ram Pai 已提交
448

449 450 451 452 453 454
	/*
	 * The state of the parent won't change if this mount is
	 * already unmounted or marked as without children.
	 */
	if (mnt->mnt.mnt_flags & (MNT_UMOUNT | MNT_MARKED))
		goto out;
R
Ram Pai 已提交
455

456 457 458 459 460
	/* Verify topper is the only grandchild that has not been
	 * speculatively unmounted.
	 */
	list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
		if (child->mnt_mountpoint == mnt->mnt.mnt_root)
461
			continue;
462 463 464 465 466
		if (!list_empty(&child->mnt_umounting) && IS_MNT_MARKED(child))
			continue;
		/* Found a mounted child */
		goto children;
	}
467

468 469 470
	/* Mark mounts that can be unmounted if not locked */
	SET_MNT_MARK(mnt);
	progress = true;
471

472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493
	/* If a mount is without children and not locked umount it. */
	if (!IS_MNT_LOCKED(mnt)) {
		umount_one(mnt, to_umount);
	} else {
children:
		list_move_tail(&mnt->mnt_umounting, to_restore);
	}
out:
	return progress;
}

static void umount_list(struct list_head *to_umount,
			struct list_head *to_restore)
{
	struct mount *mnt, *child, *tmp;
	list_for_each_entry(mnt, to_umount, mnt_list) {
		list_for_each_entry_safe(child, tmp, &mnt->mnt_mounts, mnt_child) {
			/* topper? */
			if (child->mnt_mountpoint == mnt->mnt.mnt_root)
				list_move_tail(&child->mnt_umounting, to_restore);
			else
				umount_one(child, to_umount);
A
Al Viro 已提交
494
		}
R
Ram Pai 已提交
495 496 497
	}
}

498
static void restore_mounts(struct list_head *to_restore)
499
{
500 501
	/* Restore mounts to a clean working state */
	while (!list_empty(to_restore)) {
502 503 504
		struct mount *mnt, *parent;
		struct mountpoint *mp;

505 506 507
		mnt = list_first_entry(to_restore, struct mount, mnt_umounting);
		CLEAR_MNT_MARK(mnt);
		list_del_init(&mnt->mnt_umounting);
508

509
		/* Should this mount be reparented? */
510 511 512 513 514 515
		mp = mnt->mnt_mp;
		parent = mnt->mnt_parent;
		while (parent->mnt.mnt_flags & MNT_UMOUNT) {
			mp = parent->mnt_mp;
			parent = parent->mnt_parent;
		}
516 517
		if (parent != mnt->mnt_parent)
			mnt_change_mountpoint(parent, mp, mnt);
518 519 520
	}
}

521 522 523 524 525 526 527 528 529
static void cleanup_umount_visitations(struct list_head *visited)
{
	while (!list_empty(visited)) {
		struct mount *mnt =
			list_first_entry(visited, struct mount, mnt_umounting);
		list_del_init(&mnt->mnt_umounting);
	}
}

R
Ram Pai 已提交
530 531 532 533
/*
 * collect all mounts that receive propagation from the mount in @list,
 * and return these additional mounts in the same list.
 * @list: the list of mounts to be unmounted.
N
Nick Piggin 已提交
534 535
 *
 * vfsmount lock must be held for write
R
Ram Pai 已提交
536
 */
537
int propagate_umount(struct list_head *list)
R
Ram Pai 已提交
538
{
539
	struct mount *mnt;
540 541
	LIST_HEAD(to_restore);
	LIST_HEAD(to_umount);
542
	LIST_HEAD(visited);
R
Ram Pai 已提交
543

544 545
	/* Find candidates for unmounting */
	list_for_each_entry_reverse(mnt, list, mnt_list) {
546 547
		struct mount *parent = mnt->mnt_parent;
		struct mount *m;
548

549 550 551 552 553 554 555 556 557 558
		/*
		 * If this mount has already been visited it is known that it's
		 * entire peer group and all of their slaves in the propagation
		 * tree for the mountpoint has already been visited and there is
		 * no need to visit them again.
		 */
		if (!list_empty(&mnt->mnt_umounting))
			continue;

		list_add_tail(&mnt->mnt_umounting, &visited);
559 560 561 562 563 564 565
		for (m = propagation_next(parent, parent); m;
		     m = propagation_next(m, parent)) {
			struct mount *child = __lookup_mnt(&m->mnt,
							   mnt->mnt_mountpoint);
			if (!child)
				continue;

566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586
			if (!list_empty(&child->mnt_umounting)) {
				/*
				 * If the child has already been visited it is
				 * know that it's entire peer group and all of
				 * their slaves in the propgation tree for the
				 * mountpoint has already been visited and there
				 * is no need to visit this subtree again.
				 */
				m = skip_propagation_subtree(m, parent);
				continue;
			} else if (child->mnt.mnt_flags & MNT_UMOUNT) {
				/*
				 * We have come accross an partially unmounted
				 * mount in list that has not been visited yet.
				 * Remember it has been visited and continue
				 * about our merry way.
				 */
				list_add_tail(&child->mnt_umounting, &visited);
				continue;
			}

587 588 589 590 591 592 593 594 595 596
			/* Check the child and parents while progress is made */
			while (__propagate_umount(child,
						  &to_umount, &to_restore)) {
				/* Is the parent a umount candidate? */
				child = child->mnt_parent;
				if (list_empty(&child->mnt_umounting))
					break;
			}
		}
	}
597

598 599
	umount_list(&to_umount, &to_restore);
	restore_mounts(&to_restore);
600
	cleanup_umount_visitations(&visited);
601
	list_splice_tail(&to_umount, list);
602

R
Ram Pai 已提交
603 604
	return 0;
}