super.c 41.7 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0-only
M
Miklos Szeredi 已提交
2 3 4 5 6
/*
 *
 * Copyright (C) 2011 Novell Inc.
 */

7
#include <uapi/linux/magic.h>
M
Miklos Szeredi 已提交
8 9 10 11 12 13
#include <linux/fs.h>
#include <linux/namei.h>
#include <linux/xattr.h>
#include <linux/mount.h>
#include <linux/parser.h>
#include <linux/module.h>
A
Andy Whitcroft 已提交
14
#include <linux/statfs.h>
E
Erez Zadok 已提交
15
#include <linux/seq_file.h>
M
Miklos Szeredi 已提交
16
#include <linux/posix_acl_xattr.h>
17
#include <linux/exportfs.h>
M
Miklos Szeredi 已提交
18 19 20 21 22 23 24 25 26
#include "overlayfs.h"

MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
MODULE_DESCRIPTION("Overlay filesystem");
MODULE_LICENSE("GPL");


struct ovl_dir_cache;

27 28
#define OVL_MAX_STACK 500

29 30
static bool ovl_redirect_dir_def = IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_DIR);
module_param_named(redirect_dir, ovl_redirect_dir_def, bool, 0644);
31
MODULE_PARM_DESC(redirect_dir,
32
		 "Default to on or off for the redirect_dir feature");
M
Miklos Szeredi 已提交
33

34 35 36 37
static bool ovl_redirect_always_follow =
	IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW);
module_param_named(redirect_always_follow, ovl_redirect_always_follow,
		   bool, 0644);
38
MODULE_PARM_DESC(redirect_always_follow,
39 40
		 "Follow redirects even if redirect_dir feature is turned off");

41 42
static bool ovl_index_def = IS_ENABLED(CONFIG_OVERLAY_FS_INDEX);
module_param_named(index, ovl_index_def, bool, 0644);
43
MODULE_PARM_DESC(index,
44 45
		 "Default to on or off for the inodes index feature");

46 47
static bool ovl_nfs_export_def = IS_ENABLED(CONFIG_OVERLAY_FS_NFS_EXPORT);
module_param_named(nfs_export, ovl_nfs_export_def, bool, 0644);
48
MODULE_PARM_DESC(nfs_export,
49 50
		 "Default to on or off for the NFS export feature");

51 52
static bool ovl_xino_auto_def = IS_ENABLED(CONFIG_OVERLAY_FS_XINO_AUTO);
module_param_named(xino_auto, ovl_xino_auto_def, bool, 0644);
53
MODULE_PARM_DESC(xino_auto,
54 55
		 "Auto enable xino feature");

56 57 58 59 60 61 62 63
static void ovl_entry_stack_free(struct ovl_entry *oe)
{
	unsigned int i;

	for (i = 0; i < oe->numlower; i++)
		dput(oe->lowerstack[i].dentry);
}

64 65
static bool ovl_metacopy_def = IS_ENABLED(CONFIG_OVERLAY_FS_METACOPY);
module_param_named(metacopy, ovl_metacopy_def, bool, 0644);
66
MODULE_PARM_DESC(metacopy,
67 68
		 "Default to on or off for the metadata only copy up feature");

M
Miklos Szeredi 已提交
69 70 71 72 73
static void ovl_dentry_release(struct dentry *dentry)
{
	struct ovl_entry *oe = dentry->d_fsdata;

	if (oe) {
74
		ovl_entry_stack_free(oe);
M
Miklos Szeredi 已提交
75 76 77 78
		kfree_rcu(oe, rcu);
	}
}

79
static struct dentry *ovl_d_real(struct dentry *dentry,
80
				 const struct inode *inode)
M
Miklos Szeredi 已提交
81 82 83
{
	struct dentry *real;

84 85 86 87
	/* It's an overlay file */
	if (inode && d_inode(dentry) == inode)
		return dentry;

88
	if (!d_is_reg(dentry)) {
M
Miklos Szeredi 已提交
89 90 91 92 93 94
		if (!inode || inode == d_inode(dentry))
			return dentry;
		goto bug;
	}

	real = ovl_dentry_upper(dentry);
95
	if (real && (inode == d_inode(real)))
M
Miklos Szeredi 已提交
96 97
		return real;

98 99 100 101
	if (real && !inode && ovl_has_upperdata(d_inode(dentry)))
		return real;

	real = ovl_dentry_lowerdata(dentry);
M
Miklos Szeredi 已提交
102 103 104
	if (!real)
		goto bug;

M
Miklos Szeredi 已提交
105
	/* Handle recursion */
106
	real = d_real(real, inode);
M
Miklos Szeredi 已提交
107

M
Miklos Szeredi 已提交
108 109 110
	if (!inode || inode == d_inode(real))
		return real;
bug:
M
Miklos Szeredi 已提交
111
	WARN(1, "ovl_d_real(%pd4, %s:%lu): real dentry not found\n", dentry,
M
Miklos Szeredi 已提交
112 113 114 115
	     inode ? inode->i_sb->s_id : "NULL", inode ? inode->i_ino : 0);
	return dentry;
}

116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156
static int ovl_dentry_revalidate(struct dentry *dentry, unsigned int flags)
{
	struct ovl_entry *oe = dentry->d_fsdata;
	unsigned int i;
	int ret = 1;

	for (i = 0; i < oe->numlower; i++) {
		struct dentry *d = oe->lowerstack[i].dentry;

		if (d->d_flags & DCACHE_OP_REVALIDATE) {
			ret = d->d_op->d_revalidate(d, flags);
			if (ret < 0)
				return ret;
			if (!ret) {
				if (!(flags & LOOKUP_RCU))
					d_invalidate(d);
				return -ESTALE;
			}
		}
	}
	return 1;
}

static int ovl_dentry_weak_revalidate(struct dentry *dentry, unsigned int flags)
{
	struct ovl_entry *oe = dentry->d_fsdata;
	unsigned int i;
	int ret = 1;

	for (i = 0; i < oe->numlower; i++) {
		struct dentry *d = oe->lowerstack[i].dentry;

		if (d->d_flags & DCACHE_OP_WEAK_REVALIDATE) {
			ret = d->d_op->d_weak_revalidate(d, flags);
			if (ret <= 0)
				break;
		}
	}
	return ret;
}

M
Miklos Szeredi 已提交
157 158
static const struct dentry_operations ovl_dentry_operations = {
	.d_release = ovl_dentry_release,
M
Miklos Szeredi 已提交
159
	.d_real = ovl_d_real,
M
Miklos Szeredi 已提交
160 161
};

162 163
static const struct dentry_operations ovl_reval_dentry_operations = {
	.d_release = ovl_dentry_release,
M
Miklos Szeredi 已提交
164
	.d_real = ovl_d_real,
165 166 167 168
	.d_revalidate = ovl_dentry_revalidate,
	.d_weak_revalidate = ovl_dentry_weak_revalidate,
};

169 170 171 172 173 174
static struct kmem_cache *ovl_inode_cachep;

static struct inode *ovl_alloc_inode(struct super_block *sb)
{
	struct ovl_inode *oi = kmem_cache_alloc(ovl_inode_cachep, GFP_KERNEL);

175 176 177
	if (!oi)
		return NULL;

178
	oi->cache = NULL;
M
Miklos Szeredi 已提交
179
	oi->redirect = NULL;
180
	oi->version = 0;
M
Miklos Szeredi 已提交
181
	oi->flags = 0;
182
	oi->__upperdentry = NULL;
183
	oi->lower = NULL;
184
	oi->lowerdata = NULL;
185
	mutex_init(&oi->lock);
186

187 188 189
	return &oi->vfs_inode;
}

A
Al Viro 已提交
190
static void ovl_free_inode(struct inode *inode)
191
{
A
Al Viro 已提交
192
	struct ovl_inode *oi = OVL_I(inode);
193

A
Al Viro 已提交
194 195 196
	kfree(oi->redirect);
	mutex_destroy(&oi->lock);
	kmem_cache_free(ovl_inode_cachep, oi);
197 198 199 200
}

static void ovl_destroy_inode(struct inode *inode)
{
201 202 203
	struct ovl_inode *oi = OVL_I(inode);

	dput(oi->__upperdentry);
204
	iput(oi->lower);
205 206 207 208
	if (S_ISDIR(inode->i_mode))
		ovl_dir_cache_free(inode);
	else
		iput(oi->lowerdata);
209 210
}

M
Miklos Szeredi 已提交
211
static void ovl_free_fs(struct ovl_fs *ofs)
M
Miklos Szeredi 已提交
212
{
213
	unsigned i;
M
Miklos Szeredi 已提交
214

215
	iput(ofs->workbasedir_trap);
A
Amir Goldstein 已提交
216 217 218
	iput(ofs->indexdir_trap);
	iput(ofs->workdir_trap);
	iput(ofs->upperdir_trap);
M
Miklos Szeredi 已提交
219 220 221 222 223 224 225 226
	dput(ofs->indexdir);
	dput(ofs->workdir);
	if (ofs->workdir_locked)
		ovl_inuse_unlock(ofs->workbasedir);
	dput(ofs->workbasedir);
	if (ofs->upperdir_locked)
		ovl_inuse_unlock(ofs->upper_mnt->mnt_root);
	mntput(ofs->upper_mnt);
A
Amir Goldstein 已提交
227 228
	for (i = 0; i < ofs->numlower; i++) {
		iput(ofs->lower_layers[i].trap);
M
Miklos Szeredi 已提交
229
		mntput(ofs->lower_layers[i].mnt);
A
Amir Goldstein 已提交
230
	}
231 232
	for (i = 0; i < ofs->numlowerfs; i++)
		free_anon_bdev(ofs->lower_fs[i].pseudo_dev);
M
Miklos Szeredi 已提交
233
	kfree(ofs->lower_layers);
234
	kfree(ofs->lower_fs);
M
Miklos Szeredi 已提交
235 236 237 238

	kfree(ofs->config.lowerdir);
	kfree(ofs->config.upperdir);
	kfree(ofs->config.workdir);
239
	kfree(ofs->config.redirect_mode);
M
Miklos Szeredi 已提交
240 241 242
	if (ofs->creator_cred)
		put_cred(ofs->creator_cred);
	kfree(ofs);
M
Miklos Szeredi 已提交
243 244
}

245 246 247 248 249 250 251
static void ovl_put_super(struct super_block *sb)
{
	struct ovl_fs *ofs = sb->s_fs_info;

	ovl_free_fs(ofs);
}

252
/* Sync real dirty inodes in upper filesystem (if it exists) */
253 254
static int ovl_sync_fs(struct super_block *sb, int wait)
{
M
Miklos Szeredi 已提交
255
	struct ovl_fs *ofs = sb->s_fs_info;
256 257 258
	struct super_block *upper_sb;
	int ret;

M
Miklos Szeredi 已提交
259
	if (!ofs->upper_mnt)
260
		return 0;
261 262 263 264 265 266 267 268 269 270

	/*
	 * If this is a sync(2) call or an emergency sync, all the super blocks
	 * will be iterated, including upper_sb, so no need to do anything.
	 *
	 * If this is a syncfs(2) call, then we do need to call
	 * sync_filesystem() on upper_sb, but enough if we do it when being
	 * called with wait == 1.
	 */
	if (!wait)
271 272
		return 0;

273 274
	upper_sb = ofs->upper_mnt->mnt_sb;

275
	down_read(&upper_sb->s_umount);
276
	ret = sync_filesystem(upper_sb);
277
	up_read(&upper_sb->s_umount);
278

279 280 281
	return ret;
}

A
Andy Whitcroft 已提交
282 283 284 285 286 287
/**
 * ovl_statfs
 * @sb: The overlayfs super block
 * @buf: The struct kstatfs to fill in with stats
 *
 * Get the filesystem statistics.  As writes always target the upper layer
288
 * filesystem pass the statfs to the upper filesystem (if it exists)
A
Andy Whitcroft 已提交
289 290 291 292 293 294 295 296
 */
static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf)
{
	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
	struct dentry *root_dentry = dentry->d_sb->s_root;
	struct path path;
	int err;

297
	ovl_path_real(root_dentry, &path);
A
Andy Whitcroft 已提交
298 299 300

	err = vfs_statfs(&path, buf);
	if (!err) {
M
Miklos Szeredi 已提交
301
		buf->f_namelen = ofs->namelen;
A
Andy Whitcroft 已提交
302 303 304 305 306 307
		buf->f_type = OVERLAYFS_SUPER_MAGIC;
	}

	return err;
}

308
/* Will this overlay be forced to mount/remount ro? */
M
Miklos Szeredi 已提交
309
static bool ovl_force_readonly(struct ovl_fs *ofs)
310
{
M
Miklos Szeredi 已提交
311
	return (!ofs->upper_mnt || !ofs->workdir);
312 313
}

314 315 316 317 318
static const char *ovl_redirect_mode_def(void)
{
	return ovl_redirect_dir_def ? "on" : "off";
}

319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335
enum {
	OVL_XINO_OFF,
	OVL_XINO_AUTO,
	OVL_XINO_ON,
};

static const char * const ovl_xino_str[] = {
	"off",
	"auto",
	"on",
};

static inline int ovl_xino_def(void)
{
	return ovl_xino_auto_def ? OVL_XINO_AUTO : OVL_XINO_OFF;
}

E
Erez Zadok 已提交
336 337 338 339 340 341 342 343 344
/**
 * ovl_show_options
 *
 * Prints the mount options for a given superblock.
 * Returns zero; does not fail.
 */
static int ovl_show_options(struct seq_file *m, struct dentry *dentry)
{
	struct super_block *sb = dentry->d_sb;
M
Miklos Szeredi 已提交
345
	struct ovl_fs *ofs = sb->s_fs_info;
E
Erez Zadok 已提交
346

M
Miklos Szeredi 已提交
347 348 349 350
	seq_show_option(m, "lowerdir", ofs->config.lowerdir);
	if (ofs->config.upperdir) {
		seq_show_option(m, "upperdir", ofs->config.upperdir);
		seq_show_option(m, "workdir", ofs->config.workdir);
M
Miklos Szeredi 已提交
351
	}
M
Miklos Szeredi 已提交
352
	if (ofs->config.default_permissions)
M
Miklos Szeredi 已提交
353
		seq_puts(m, ",default_permissions");
354 355
	if (strcmp(ofs->config.redirect_mode, ovl_redirect_mode_def()) != 0)
		seq_printf(m, ",redirect_dir=%s", ofs->config.redirect_mode);
M
Miklos Szeredi 已提交
356
	if (ofs->config.index != ovl_index_def)
357
		seq_printf(m, ",index=%s", ofs->config.index ? "on" : "off");
358 359 360
	if (ofs->config.nfs_export != ovl_nfs_export_def)
		seq_printf(m, ",nfs_export=%s", ofs->config.nfs_export ?
						"on" : "off");
361 362
	if (ofs->config.xino != ovl_xino_def())
		seq_printf(m, ",xino=%s", ovl_xino_str[ofs->config.xino]);
363 364 365
	if (ofs->config.metacopy != ovl_metacopy_def)
		seq_printf(m, ",metacopy=%s",
			   ofs->config.metacopy ? "on" : "off");
E
Erez Zadok 已提交
366 367 368
	return 0;
}

369 370
static int ovl_remount(struct super_block *sb, int *flags, char *data)
{
M
Miklos Szeredi 已提交
371
	struct ovl_fs *ofs = sb->s_fs_info;
372

373
	if (!(*flags & SB_RDONLY) && ovl_force_readonly(ofs))
374 375 376 377 378
		return -EROFS;

	return 0;
}

M
Miklos Szeredi 已提交
379
static const struct super_operations ovl_super_operations = {
380
	.alloc_inode	= ovl_alloc_inode,
A
Al Viro 已提交
381
	.free_inode	= ovl_free_inode,
382 383
	.destroy_inode	= ovl_destroy_inode,
	.drop_inode	= generic_delete_inode,
M
Miklos Szeredi 已提交
384
	.put_super	= ovl_put_super,
385
	.sync_fs	= ovl_sync_fs,
A
Andy Whitcroft 已提交
386
	.statfs		= ovl_statfs,
E
Erez Zadok 已提交
387
	.show_options	= ovl_show_options,
388
	.remount_fs	= ovl_remount,
M
Miklos Szeredi 已提交
389 390 391 392 393 394
};

enum {
	OPT_LOWERDIR,
	OPT_UPPERDIR,
	OPT_WORKDIR,
M
Miklos Szeredi 已提交
395
	OPT_DEFAULT_PERMISSIONS,
396
	OPT_REDIRECT_DIR,
397 398
	OPT_INDEX_ON,
	OPT_INDEX_OFF,
399 400
	OPT_NFS_EXPORT_ON,
	OPT_NFS_EXPORT_OFF,
401 402 403
	OPT_XINO_ON,
	OPT_XINO_OFF,
	OPT_XINO_AUTO,
404 405
	OPT_METACOPY_ON,
	OPT_METACOPY_OFF,
M
Miklos Szeredi 已提交
406 407 408 409 410 411 412
	OPT_ERR,
};

static const match_table_t ovl_tokens = {
	{OPT_LOWERDIR,			"lowerdir=%s"},
	{OPT_UPPERDIR,			"upperdir=%s"},
	{OPT_WORKDIR,			"workdir=%s"},
M
Miklos Szeredi 已提交
413
	{OPT_DEFAULT_PERMISSIONS,	"default_permissions"},
414
	{OPT_REDIRECT_DIR,		"redirect_dir=%s"},
415 416
	{OPT_INDEX_ON,			"index=on"},
	{OPT_INDEX_OFF,			"index=off"},
417 418
	{OPT_NFS_EXPORT_ON,		"nfs_export=on"},
	{OPT_NFS_EXPORT_OFF,		"nfs_export=off"},
419 420 421
	{OPT_XINO_ON,			"xino=on"},
	{OPT_XINO_OFF,			"xino=off"},
	{OPT_XINO_AUTO,			"xino=auto"},
422 423
	{OPT_METACOPY_ON,		"metacopy=on"},
	{OPT_METACOPY_OFF,		"metacopy=off"},
M
Miklos Szeredi 已提交
424 425 426
	{OPT_ERR,			NULL}
};

M
Miklos Szeredi 已提交
427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449
static char *ovl_next_opt(char **s)
{
	char *sbegin = *s;
	char *p;

	if (sbegin == NULL)
		return NULL;

	for (p = sbegin; *p; p++) {
		if (*p == '\\') {
			p++;
			if (!*p)
				break;
		} else if (*p == ',') {
			*p = '\0';
			*s = p + 1;
			return sbegin;
		}
	}
	*s = NULL;
	return sbegin;
}

450 451 452 453 454 455 456 457 458 459 460 461 462 463 464
static int ovl_parse_redirect_mode(struct ovl_config *config, const char *mode)
{
	if (strcmp(mode, "on") == 0) {
		config->redirect_dir = true;
		/*
		 * Does not make sense to have redirect creation without
		 * redirect following.
		 */
		config->redirect_follow = true;
	} else if (strcmp(mode, "follow") == 0) {
		config->redirect_follow = true;
	} else if (strcmp(mode, "off") == 0) {
		if (ovl_redirect_always_follow)
			config->redirect_follow = true;
	} else if (strcmp(mode, "nofollow") != 0) {
L
lijiazi 已提交
465
		pr_err("bad mount option \"redirect_dir=%s\"\n",
466 467 468 469 470 471 472
		       mode);
		return -EINVAL;
	}

	return 0;
}

M
Miklos Szeredi 已提交
473 474 475
static int ovl_parse_opt(char *opt, struct ovl_config *config)
{
	char *p;
476
	int err;
477
	bool metacopy_opt = false, redirect_opt = false;
M
Miklos Szeredi 已提交
478

479 480 481 482
	config->redirect_mode = kstrdup(ovl_redirect_mode_def(), GFP_KERNEL);
	if (!config->redirect_mode)
		return -ENOMEM;

M
Miklos Szeredi 已提交
483
	while ((p = ovl_next_opt(&opt)) != NULL) {
M
Miklos Szeredi 已提交
484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512
		int token;
		substring_t args[MAX_OPT_ARGS];

		if (!*p)
			continue;

		token = match_token(p, ovl_tokens, args);
		switch (token) {
		case OPT_UPPERDIR:
			kfree(config->upperdir);
			config->upperdir = match_strdup(&args[0]);
			if (!config->upperdir)
				return -ENOMEM;
			break;

		case OPT_LOWERDIR:
			kfree(config->lowerdir);
			config->lowerdir = match_strdup(&args[0]);
			if (!config->lowerdir)
				return -ENOMEM;
			break;

		case OPT_WORKDIR:
			kfree(config->workdir);
			config->workdir = match_strdup(&args[0]);
			if (!config->workdir)
				return -ENOMEM;
			break;

M
Miklos Szeredi 已提交
513 514 515 516
		case OPT_DEFAULT_PERMISSIONS:
			config->default_permissions = true;
			break;

517 518 519 520 521
		case OPT_REDIRECT_DIR:
			kfree(config->redirect_mode);
			config->redirect_mode = match_strdup(&args[0]);
			if (!config->redirect_mode)
				return -ENOMEM;
522
			redirect_opt = true;
M
Miklos Szeredi 已提交
523 524
			break;

525 526 527 528 529 530 531 532
		case OPT_INDEX_ON:
			config->index = true;
			break;

		case OPT_INDEX_OFF:
			config->index = false;
			break;

533 534 535 536 537 538 539 540
		case OPT_NFS_EXPORT_ON:
			config->nfs_export = true;
			break;

		case OPT_NFS_EXPORT_OFF:
			config->nfs_export = false;
			break;

541 542 543 544 545 546 547 548 549 550 551 552
		case OPT_XINO_ON:
			config->xino = OVL_XINO_ON;
			break;

		case OPT_XINO_OFF:
			config->xino = OVL_XINO_OFF;
			break;

		case OPT_XINO_AUTO:
			config->xino = OVL_XINO_AUTO;
			break;

553 554
		case OPT_METACOPY_ON:
			config->metacopy = true;
555
			metacopy_opt = true;
556 557 558 559 560 561
			break;

		case OPT_METACOPY_OFF:
			config->metacopy = false;
			break;

M
Miklos Szeredi 已提交
562
		default:
L
lijiazi 已提交
563 564
			pr_err("unrecognized mount option \"%s\" or missing value\n",
					p);
M
Miklos Szeredi 已提交
565 566 567
			return -EINVAL;
		}
	}
H
hujianyang 已提交
568 569 570

	/* Workdir is useless in non-upper mount */
	if (!config->upperdir && config->workdir) {
L
lijiazi 已提交
571
		pr_info("option \"workdir=%s\" is useless in a non-upper mount, ignore\n",
H
hujianyang 已提交
572 573 574 575 576
			config->workdir);
		kfree(config->workdir);
		config->workdir = NULL;
	}

577 578 579 580
	err = ovl_parse_redirect_mode(config, config->redirect_mode);
	if (err)
		return err;

581 582 583 584 585 586 587 588 589 590
	/*
	 * This is to make the logic below simpler.  It doesn't make any other
	 * difference, since config->redirect_dir is only used for upper.
	 */
	if (!config->upperdir && config->redirect_follow)
		config->redirect_dir = true;

	/* Resolve metacopy -> redirect_dir dependency */
	if (config->metacopy && !config->redirect_dir) {
		if (metacopy_opt && redirect_opt) {
L
lijiazi 已提交
591
			pr_err("conflicting options: metacopy=on,redirect_dir=%s\n",
592 593 594 595 596 597 598 599
			       config->redirect_mode);
			return -EINVAL;
		}
		if (redirect_opt) {
			/*
			 * There was an explicit redirect_dir=... that resulted
			 * in this conflict.
			 */
L
lijiazi 已提交
600
			pr_info("disabling metacopy due to redirect_dir=%s\n",
601 602 603 604 605 606
				config->redirect_mode);
			config->metacopy = false;
		} else {
			/* Automatically enable redirect otherwise. */
			config->redirect_follow = config->redirect_dir = true;
		}
607 608 609
	}

	return 0;
M
Miklos Szeredi 已提交
610 611 612
}

#define OVL_WORKDIR_NAME "work"
613
#define OVL_INDEXDIR_NAME "index"
M
Miklos Szeredi 已提交
614

M
Miklos Szeredi 已提交
615
static struct dentry *ovl_workdir_create(struct ovl_fs *ofs,
616
					 const char *name, bool persist)
M
Miklos Szeredi 已提交
617
{
M
Miklos Szeredi 已提交
618 619
	struct inode *dir =  ofs->workbasedir->d_inode;
	struct vfsmount *mnt = ofs->upper_mnt;
M
Miklos Szeredi 已提交
620 621 622
	struct dentry *work;
	int err;
	bool retried = false;
623
	bool locked = false;
M
Miklos Szeredi 已提交
624

A
Al Viro 已提交
625
	inode_lock_nested(dir, I_MUTEX_PARENT);
626 627
	locked = true;

M
Miklos Szeredi 已提交
628
retry:
M
Miklos Szeredi 已提交
629
	work = lookup_one_len(name, ofs->workbasedir, strlen(name));
M
Miklos Szeredi 已提交
630 631

	if (!IS_ERR(work)) {
632 633
		struct iattr attr = {
			.ia_valid = ATTR_MODE,
A
Al Viro 已提交
634
			.ia_mode = S_IFDIR | 0,
635
		};
M
Miklos Szeredi 已提交
636 637 638 639 640 641

		if (work->d_inode) {
			err = -EEXIST;
			if (retried)
				goto out_dput;

642 643 644
			if (persist)
				goto out_unlock;

M
Miklos Szeredi 已提交
645
			retried = true;
M
Miklos Szeredi 已提交
646
			ovl_workdir_cleanup(dir, mnt, work, 0);
M
Miklos Szeredi 已提交
647 648 649 650
			dput(work);
			goto retry;
		}

651 652 653 654
		work = ovl_create_real(dir, work, OVL_CATTR(attr.ia_mode));
		err = PTR_ERR(work);
		if (IS_ERR(work))
			goto out_err;
655

656 657 658 659 660 661 662 663 664 665 666 667 668
		/*
		 * Try to remove POSIX ACL xattrs from workdir.  We are good if:
		 *
		 * a) success (there was a POSIX ACL xattr and was removed)
		 * b) -ENODATA (there was no POSIX ACL xattr)
		 * c) -EOPNOTSUPP (POSIX ACL xattrs are not supported)
		 *
		 * There are various other error values that could effectively
		 * mean that the xattr doesn't exist (e.g. -ERANGE is returned
		 * if the xattr name is too long), but the set of filesystems
		 * allowed as upper are limited to "normal" ones, where checking
		 * for the above two errors is sufficient.
		 */
669
		err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_DEFAULT);
M
Miklos Szeredi 已提交
670
		if (err && err != -ENODATA && err != -EOPNOTSUPP)
671 672 673
			goto out_dput;

		err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_ACCESS);
M
Miklos Szeredi 已提交
674
		if (err && err != -ENODATA && err != -EOPNOTSUPP)
675 676 677 678 679 680 681 682
			goto out_dput;

		/* Clear any inherited mode bits */
		inode_lock(work->d_inode);
		err = notify_change(work, &attr, NULL);
		inode_unlock(work->d_inode);
		if (err)
			goto out_dput;
683 684 685
	} else {
		err = PTR_ERR(work);
		goto out_err;
M
Miklos Szeredi 已提交
686 687
	}
out_unlock:
688 689
	if (locked)
		inode_unlock(dir);
M
Miklos Szeredi 已提交
690 691 692 693 694

	return work;

out_dput:
	dput(work);
695
out_err:
L
lijiazi 已提交
696
	pr_warn("failed to create directory %s/%s (errno: %i); mounting read-only\n",
M
Miklos Szeredi 已提交
697
		ofs->config.workdir, name, -err);
698
	work = NULL;
M
Miklos Szeredi 已提交
699 700 701
	goto out_unlock;
}

M
Miklos Szeredi 已提交
702 703 704 705 706 707 708 709 710 711 712 713 714
static void ovl_unescape(char *s)
{
	char *d = s;

	for (;; s++, d++) {
		if (*s == '\\')
			s++;
		*d = *s;
		if (!*s)
			break;
	}
}

M
Miklos Szeredi 已提交
715 716
static int ovl_mount_dir_noesc(const char *name, struct path *path)
{
717
	int err = -EINVAL;
M
Miklos Szeredi 已提交
718

719
	if (!*name) {
L
lijiazi 已提交
720
		pr_err("empty lowerdir\n");
721 722
		goto out;
	}
M
Miklos Szeredi 已提交
723 724
	err = kern_path(name, LOOKUP_FOLLOW, path);
	if (err) {
L
lijiazi 已提交
725
		pr_err("failed to resolve '%s': %i\n", name, err);
M
Miklos Szeredi 已提交
726 727 728
		goto out;
	}
	err = -EINVAL;
729
	if (ovl_dentry_weird(path->dentry)) {
L
lijiazi 已提交
730
		pr_err("filesystem on '%s' not supported\n", name);
M
Miklos Szeredi 已提交
731 732
		goto out_put;
	}
M
Miklos Szeredi 已提交
733
	if (!d_is_dir(path->dentry)) {
L
lijiazi 已提交
734
		pr_err("'%s' not a directory\n", name);
M
Miklos Szeredi 已提交
735 736 737 738 739
		goto out_put;
	}
	return 0;

out_put:
740
	path_put_init(path);
M
Miklos Szeredi 已提交
741 742 743 744 745 746 747 748 749 750 751 752
out:
	return err;
}

static int ovl_mount_dir(const char *name, struct path *path)
{
	int err = -ENOMEM;
	char *tmp = kstrdup(name, GFP_KERNEL);

	if (tmp) {
		ovl_unescape(tmp);
		err = ovl_mount_dir_noesc(tmp, path);
753 754 755

		if (!err)
			if (ovl_dentry_remote(path->dentry)) {
L
lijiazi 已提交
756
				pr_err("filesystem on '%s' not supported as upperdir\n",
757
				       tmp);
758
				path_put_init(path);
759 760
				err = -EINVAL;
			}
M
Miklos Szeredi 已提交
761 762 763 764 765
		kfree(tmp);
	}
	return err;
}

M
Miklos Szeredi 已提交
766 767
static int ovl_check_namelen(struct path *path, struct ovl_fs *ofs,
			     const char *name)
M
Miklos Szeredi 已提交
768 769
{
	struct kstatfs statfs;
M
Miklos Szeredi 已提交
770 771 772
	int err = vfs_statfs(path, &statfs);

	if (err)
L
lijiazi 已提交
773
		pr_err("statfs failed on '%s'\n", name);
M
Miklos Szeredi 已提交
774 775 776 777 778 779 780 781 782
	else
		ofs->namelen = max(ofs->namelen, statfs.f_namelen);

	return err;
}

static int ovl_lower_dir(const char *name, struct path *path,
			 struct ovl_fs *ofs, int *stack_depth, bool *remote)
{
783
	int fh_type;
M
Miklos Szeredi 已提交
784
	int err;
M
Miklos Szeredi 已提交
785

786
	err = ovl_mount_dir_noesc(name, path);
M
Miklos Szeredi 已提交
787 788 789
	if (err)
		goto out;

M
Miklos Szeredi 已提交
790 791
	err = ovl_check_namelen(path, ofs, name);
	if (err)
M
Miklos Szeredi 已提交
792
		goto out_put;
M
Miklos Szeredi 已提交
793

M
Miklos Szeredi 已提交
794 795
	*stack_depth = max(*stack_depth, path->mnt->mnt_sb->s_stack_depth);

796 797 798
	if (ovl_dentry_remote(path->dentry))
		*remote = true;

799
	/*
800 801
	 * The inodes index feature and NFS export need to encode and decode
	 * file handles, so they require that all layers support them.
802
	 */
803
	fh_type = ovl_can_decode_fh(path->dentry->d_sb);
804
	if ((ofs->config.nfs_export ||
805
	     (ofs->config.index && ofs->config.upperdir)) && !fh_type) {
806
		ofs->config.index = false;
807
		ofs->config.nfs_export = false;
L
lijiazi 已提交
808
		pr_warn("fs on '%s' does not support file handles, falling back to index=off,nfs_export=off.\n",
809
			name);
810 811
	}

812 813 814 815
	/* Check if lower fs has 32bit inode numbers */
	if (fh_type != FILEID_INO32_GEN)
		ofs->xino_bits = 0;

M
Miklos Szeredi 已提交
816 817 818
	return 0;

out_put:
819
	path_put_init(path);
M
Miklos Szeredi 已提交
820 821 822 823
out:
	return err;
}

M
Miklos Szeredi 已提交
824 825 826 827 828 829 830 831 832 833 834 835
/* Workdir should not be subdir of upperdir and vice versa */
static bool ovl_workdir_ok(struct dentry *workdir, struct dentry *upperdir)
{
	bool ok = false;

	if (workdir != upperdir) {
		ok = (lock_rename(workdir, upperdir) == NULL);
		unlock_rename(workdir, upperdir);
	}
	return ok;
}

836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855
static unsigned int ovl_split_lowerdirs(char *str)
{
	unsigned int ctr = 1;
	char *s, *d;

	for (s = d = str;; s++, d++) {
		if (*s == '\\') {
			s++;
		} else if (*s == ':') {
			*d = '\0';
			ctr++;
			continue;
		}
		*d = *s;
		if (!*s)
			break;
	}
	return ctr;
}

856 857 858 859 860
static int __maybe_unused
ovl_posix_acl_xattr_get(const struct xattr_handler *handler,
			struct dentry *dentry, struct inode *inode,
			const char *name, void *buffer, size_t size)
{
861
	return ovl_xattr_get(dentry, inode, handler->name, buffer, size);
862 863
}

864 865 866 867 868
static int __maybe_unused
ovl_posix_acl_xattr_set(const struct xattr_handler *handler,
			struct dentry *dentry, struct inode *inode,
			const char *name, const void *value,
			size_t size, int flags)
M
Miklos Szeredi 已提交
869 870
{
	struct dentry *workdir = ovl_workdir(dentry);
871
	struct inode *realinode = ovl_inode_real(inode);
M
Miklos Szeredi 已提交
872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895
	struct posix_acl *acl = NULL;
	int err;

	/* Check that everything is OK before copy-up */
	if (value) {
		acl = posix_acl_from_xattr(&init_user_ns, value, size);
		if (IS_ERR(acl))
			return PTR_ERR(acl);
	}
	err = -EOPNOTSUPP;
	if (!IS_POSIXACL(d_inode(workdir)))
		goto out_acl_release;
	if (!realinode->i_op->set_acl)
		goto out_acl_release;
	if (handler->flags == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) {
		err = acl ? -EACCES : 0;
		goto out_acl_release;
	}
	err = -EPERM;
	if (!inode_owner_or_capable(inode))
		goto out_acl_release;

	posix_acl_release(acl);

896 897 898 899 900 901 902 903 904 905 906 907 908 909 910
	/*
	 * Check if sgid bit needs to be cleared (actual setacl operation will
	 * be done with mounter's capabilities and so that won't do it for us).
	 */
	if (unlikely(inode->i_mode & S_ISGID) &&
	    handler->flags == ACL_TYPE_ACCESS &&
	    !in_group_p(inode->i_gid) &&
	    !capable_wrt_inode_uidgid(inode, CAP_FSETID)) {
		struct iattr iattr = { .ia_valid = ATTR_KILL_SGID };

		err = ovl_setattr(dentry, &iattr);
		if (err)
			return err;
	}

911
	err = ovl_xattr_set(dentry, inode, handler->name, value, size, flags);
912
	if (!err)
913
		ovl_copyattr(ovl_inode_real(inode), inode);
914 915

	return err;
M
Miklos Szeredi 已提交
916 917 918 919 920 921

out_acl_release:
	posix_acl_release(acl);
	return err;
}

922 923 924 925
static int ovl_own_xattr_get(const struct xattr_handler *handler,
			     struct dentry *dentry, struct inode *inode,
			     const char *name, void *buffer, size_t size)
{
A
Amir Goldstein 已提交
926
	return -EOPNOTSUPP;
927 928
}

M
Miklos Szeredi 已提交
929 930 931 932 933
static int ovl_own_xattr_set(const struct xattr_handler *handler,
			     struct dentry *dentry, struct inode *inode,
			     const char *name, const void *value,
			     size_t size, int flags)
{
A
Amir Goldstein 已提交
934
	return -EOPNOTSUPP;
M
Miklos Szeredi 已提交
935 936
}

937 938 939 940
static int ovl_other_xattr_get(const struct xattr_handler *handler,
			       struct dentry *dentry, struct inode *inode,
			       const char *name, void *buffer, size_t size)
{
941
	return ovl_xattr_get(dentry, inode, name, buffer, size);
942 943
}

944 945 946 947 948
static int ovl_other_xattr_set(const struct xattr_handler *handler,
			       struct dentry *dentry, struct inode *inode,
			       const char *name, const void *value,
			       size_t size, int flags)
{
949
	return ovl_xattr_set(dentry, inode, name, value, size, flags);
950 951
}

952 953
static const struct xattr_handler __maybe_unused
ovl_posix_acl_access_xattr_handler = {
M
Miklos Szeredi 已提交
954 955
	.name = XATTR_NAME_POSIX_ACL_ACCESS,
	.flags = ACL_TYPE_ACCESS,
956
	.get = ovl_posix_acl_xattr_get,
M
Miklos Szeredi 已提交
957 958 959
	.set = ovl_posix_acl_xattr_set,
};

960 961
static const struct xattr_handler __maybe_unused
ovl_posix_acl_default_xattr_handler = {
M
Miklos Szeredi 已提交
962 963
	.name = XATTR_NAME_POSIX_ACL_DEFAULT,
	.flags = ACL_TYPE_DEFAULT,
964
	.get = ovl_posix_acl_xattr_get,
M
Miklos Szeredi 已提交
965 966 967 968 969
	.set = ovl_posix_acl_xattr_set,
};

static const struct xattr_handler ovl_own_xattr_handler = {
	.prefix	= OVL_XATTR_PREFIX,
970
	.get = ovl_own_xattr_get,
M
Miklos Szeredi 已提交
971 972 973 974 975
	.set = ovl_own_xattr_set,
};

static const struct xattr_handler ovl_other_xattr_handler = {
	.prefix	= "", /* catch all */
976
	.get = ovl_other_xattr_get,
M
Miklos Szeredi 已提交
977 978 979 980
	.set = ovl_other_xattr_set,
};

static const struct xattr_handler *ovl_xattr_handlers[] = {
981
#ifdef CONFIG_FS_POSIX_ACL
M
Miklos Szeredi 已提交
982 983
	&ovl_posix_acl_access_xattr_handler,
	&ovl_posix_acl_default_xattr_handler,
984
#endif
M
Miklos Szeredi 已提交
985 986 987 988 989
	&ovl_own_xattr_handler,
	&ovl_other_xattr_handler,
	NULL
};

A
Amir Goldstein 已提交
990 991 992 993 994 995 996
static int ovl_setup_trap(struct super_block *sb, struct dentry *dir,
			  struct inode **ptrap, const char *name)
{
	struct inode *trap;
	int err;

	trap = ovl_get_trap_inode(sb, dir);
997 998
	err = PTR_ERR_OR_ZERO(trap);
	if (err) {
A
Amir Goldstein 已提交
999
		if (err == -ELOOP)
L
lijiazi 已提交
1000
			pr_err("conflicting %s path\n", name);
A
Amir Goldstein 已提交
1001 1002 1003 1004 1005 1006 1007
		return err;
	}

	*ptrap = trap;
	return 0;
}

1008 1009 1010 1011 1012 1013 1014 1015 1016
/*
 * Determine how we treat concurrent use of upperdir/workdir based on the
 * index feature. This is papering over mount leaks of container runtimes,
 * for example, an old overlay mount is leaked and now its upperdir is
 * attempted to be used as a lower layer in a new overlay mount.
 */
static int ovl_report_in_use(struct ovl_fs *ofs, const char *name)
{
	if (ofs->config.index) {
L
lijiazi 已提交
1017
		pr_err("%s is in-use as upperdir/workdir of another mount, mount with '-o index=off' to override exclusive upperdir protection.\n",
1018 1019 1020
		       name);
		return -EBUSY;
	} else {
L
lijiazi 已提交
1021
		pr_warn("%s is in-use as upperdir/workdir of another mount, accessing files from both mounts will result in undefined behavior.\n",
1022 1023 1024 1025 1026
			name);
		return 0;
	}
}

A
Amir Goldstein 已提交
1027 1028
static int ovl_get_upper(struct super_block *sb, struct ovl_fs *ofs,
			 struct path *upperpath)
1029
{
M
Miklos Szeredi 已提交
1030
	struct vfsmount *upper_mnt;
1031 1032
	int err;

M
Miklos Szeredi 已提交
1033
	err = ovl_mount_dir(ofs->config.upperdir, upperpath);
1034 1035 1036 1037 1038
	if (err)
		goto out;

	/* Upper fs should not be r/o */
	if (sb_rdonly(upperpath->mnt->mnt_sb)) {
L
lijiazi 已提交
1039
		pr_err("upper fs is r/o, try multi-lower layers mount\n");
1040 1041 1042 1043
		err = -EINVAL;
		goto out;
	}

M
Miklos Szeredi 已提交
1044
	err = ovl_check_namelen(upperpath, ofs, ofs->config.upperdir);
1045 1046 1047
	if (err)
		goto out;

A
Amir Goldstein 已提交
1048 1049 1050 1051 1052
	err = ovl_setup_trap(sb, upperpath->dentry, &ofs->upperdir_trap,
			     "upperdir");
	if (err)
		goto out;

M
Miklos Szeredi 已提交
1053 1054 1055
	upper_mnt = clone_private_mount(upperpath);
	err = PTR_ERR(upper_mnt);
	if (IS_ERR(upper_mnt)) {
L
lijiazi 已提交
1056
		pr_err("failed to clone upperpath\n");
M
Miklos Szeredi 已提交
1057 1058 1059 1060 1061
		goto out;
	}

	/* Don't inherit atime flags */
	upper_mnt->mnt_flags &= ~(MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME);
M
Miklos Szeredi 已提交
1062
	ofs->upper_mnt = upper_mnt;
1063 1064 1065 1066

	if (ovl_inuse_trylock(ofs->upper_mnt->mnt_root)) {
		ofs->upperdir_locked = true;
	} else {
1067 1068 1069
		err = ovl_report_in_use(ofs, "upperdir");
		if (err)
			goto out;
1070 1071
	}

1072 1073 1074 1075 1076
	err = 0;
out:
	return err;
}

A
Amir Goldstein 已提交
1077 1078
static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs,
			    struct path *workpath)
1079
{
1080
	struct vfsmount *mnt = ofs->upper_mnt;
1081
	struct dentry *temp;
1082
	int fh_type;
1083 1084
	int err;

1085 1086 1087 1088
	err = mnt_want_write(mnt);
	if (err)
		return err;

M
Miklos Szeredi 已提交
1089 1090
	ofs->workdir = ovl_workdir_create(ofs, OVL_WORKDIR_NAME, false);
	if (!ofs->workdir)
1091
		goto out;
1092

A
Amir Goldstein 已提交
1093 1094 1095 1096
	err = ovl_setup_trap(sb, ofs->workdir, &ofs->workdir_trap, "workdir");
	if (err)
		goto out;

1097 1098 1099 1100 1101 1102 1103 1104
	/*
	 * Upper should support d_type, else whiteouts are visible.  Given
	 * workdir and upper are on same fs, we can do iterate_dir() on
	 * workdir. This check requires successful creation of workdir in
	 * previous step.
	 */
	err = ovl_check_d_type_supported(workpath);
	if (err < 0)
1105
		goto out;
1106 1107 1108 1109 1110 1111

	/*
	 * We allowed this configuration and don't want to break users over
	 * kernel upgrade. So warn instead of erroring out.
	 */
	if (!err)
L
lijiazi 已提交
1112
		pr_warn("upper fs needs to support d_type.\n");
1113 1114

	/* Check if upper/work fs supports O_TMPFILE */
M
Miklos Szeredi 已提交
1115 1116 1117
	temp = ovl_do_tmpfile(ofs->workdir, S_IFREG | 0);
	ofs->tmpfile = !IS_ERR(temp);
	if (ofs->tmpfile)
1118 1119
		dput(temp);
	else
L
lijiazi 已提交
1120
		pr_warn("upper fs does not support tmpfile.\n");
1121 1122 1123 1124

	/*
	 * Check if upper/work fs supports trusted.overlay.* xattr
	 */
M
Miklos Szeredi 已提交
1125
	err = ovl_do_setxattr(ofs->workdir, OVL_XATTR_OPAQUE, "0", 1, 0);
1126
	if (err) {
M
Miklos Szeredi 已提交
1127
		ofs->noxattr = true;
1128
		ofs->config.index = false;
1129
		ofs->config.metacopy = false;
L
lijiazi 已提交
1130
		pr_warn("upper fs does not support xattr, falling back to index=off and metacopy=off.\n");
1131
		err = 0;
1132
	} else {
M
Miklos Szeredi 已提交
1133
		vfs_removexattr(ofs->workdir, OVL_XATTR_OPAQUE);
1134 1135 1136
	}

	/* Check if upper/work fs supports file handles */
1137 1138
	fh_type = ovl_can_decode_fh(ofs->workdir->d_sb);
	if (ofs->config.index && !fh_type) {
M
Miklos Szeredi 已提交
1139
		ofs->config.index = false;
L
lijiazi 已提交
1140
		pr_warn("upper fs does not support file handles, falling back to index=off.\n");
1141 1142
	}

1143 1144 1145 1146
	/* Check if upper fs has 32bit inode numbers */
	if (fh_type != FILEID_INO32_GEN)
		ofs->xino_bits = 0;

1147 1148
	/* NFS export of r/w mount depends on index */
	if (ofs->config.nfs_export && !ofs->config.index) {
L
lijiazi 已提交
1149
		pr_warn("NFS export requires \"index=on\", falling back to nfs_export=off.\n");
1150 1151
		ofs->config.nfs_export = false;
	}
1152 1153 1154
out:
	mnt_drop_write(mnt);
	return err;
1155 1156
}

A
Amir Goldstein 已提交
1157 1158
static int ovl_get_workdir(struct super_block *sb, struct ovl_fs *ofs,
			   struct path *upperpath)
1159 1160
{
	int err;
M
Miklos Szeredi 已提交
1161
	struct path workpath = { };
1162

M
Miklos Szeredi 已提交
1163
	err = ovl_mount_dir(ofs->config.workdir, &workpath);
1164 1165 1166 1167
	if (err)
		goto out;

	err = -EINVAL;
M
Miklos Szeredi 已提交
1168
	if (upperpath->mnt != workpath.mnt) {
L
lijiazi 已提交
1169
		pr_err("workdir and upperdir must reside under the same mount\n");
1170 1171
		goto out;
	}
M
Miklos Szeredi 已提交
1172
	if (!ovl_workdir_ok(workpath.dentry, upperpath->dentry)) {
L
lijiazi 已提交
1173
		pr_err("workdir and upperdir must be separate subtrees\n");
1174 1175 1176
		goto out;
	}

1177 1178 1179
	ofs->workbasedir = dget(workpath.dentry);

	if (ovl_inuse_trylock(ofs->workbasedir)) {
M
Miklos Szeredi 已提交
1180
		ofs->workdir_locked = true;
1181
	} else {
1182 1183 1184
		err = ovl_report_in_use(ofs, "workdir");
		if (err)
			goto out;
1185 1186
	}

1187 1188 1189 1190 1191
	err = ovl_setup_trap(sb, ofs->workbasedir, &ofs->workbasedir_trap,
			     "workdir");
	if (err)
		goto out;

A
Amir Goldstein 已提交
1192
	err = ovl_make_workdir(sb, ofs, &workpath);
M
Miklos Szeredi 已提交
1193

1194
out:
M
Miklos Szeredi 已提交
1195 1196
	path_put(&workpath);

1197 1198 1199
	return err;
}

A
Amir Goldstein 已提交
1200 1201
static int ovl_get_indexdir(struct super_block *sb, struct ovl_fs *ofs,
			    struct ovl_entry *oe, struct path *upperpath)
1202
{
1203
	struct vfsmount *mnt = ofs->upper_mnt;
1204 1205
	int err;

1206 1207 1208 1209
	err = mnt_want_write(mnt);
	if (err)
		return err;

1210
	/* Verify lower root is upper root origin */
1211
	err = ovl_verify_origin(upperpath->dentry, oe->lowerstack[0].dentry,
1212
				true);
1213
	if (err) {
L
lijiazi 已提交
1214
		pr_err("failed to verify upper root origin\n");
1215 1216 1217
		goto out;
	}

M
Miklos Szeredi 已提交
1218 1219
	ofs->indexdir = ovl_workdir_create(ofs, OVL_INDEXDIR_NAME, true);
	if (ofs->indexdir) {
A
Amir Goldstein 已提交
1220 1221 1222 1223 1224
		err = ovl_setup_trap(sb, ofs->indexdir, &ofs->indexdir_trap,
				     "indexdir");
		if (err)
			goto out;

1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236
		/*
		 * Verify upper root is exclusively associated with index dir.
		 * Older kernels stored upper fh in "trusted.overlay.origin"
		 * xattr. If that xattr exists, verify that it is a match to
		 * upper dir file handle. In any case, verify or set xattr
		 * "trusted.overlay.upper" to indicate that index may have
		 * directory entries.
		 */
		if (ovl_check_origin_xattr(ofs->indexdir)) {
			err = ovl_verify_set_fh(ofs->indexdir, OVL_XATTR_ORIGIN,
						upperpath->dentry, true, false);
			if (err)
L
lijiazi 已提交
1237
				pr_err("failed to verify index dir 'origin' xattr\n");
1238 1239
		}
		err = ovl_verify_upper(ofs->indexdir, upperpath->dentry, true);
1240
		if (err)
L
lijiazi 已提交
1241
			pr_err("failed to verify index dir 'upper' xattr\n");
1242 1243 1244

		/* Cleanup bad/stale/orphan index entries */
		if (!err)
1245
			err = ovl_indexdir_cleanup(ofs);
1246
	}
M
Miklos Szeredi 已提交
1247
	if (err || !ofs->indexdir)
L
lijiazi 已提交
1248
		pr_warn("try deleting index dir or mounting with '-o index=off' to disable inodes index.\n");
1249 1250

out:
1251
	mnt_drop_write(mnt);
1252 1253 1254
	return err;
}

1255 1256 1257 1258
static bool ovl_lower_uuid_ok(struct ovl_fs *ofs, const uuid_t *uuid)
{
	unsigned int i;

1259
	if (!ofs->config.nfs_export && !ofs->upper_mnt)
1260 1261 1262 1263 1264 1265 1266
		return true;

	for (i = 0; i < ofs->numlowerfs; i++) {
		/*
		 * We use uuid to associate an overlay lower file handle with a
		 * lower layer, so we can accept lower fs with null uuid as long
		 * as all lower layers with null uuid are on the same fs.
1267 1268
		 * if we detect multiple lower fs with the same uuid, we
		 * disable lower file handle decoding on all of them.
1269
		 */
1270 1271
		if (uuid_equal(&ofs->lower_fs[i].sb->s_uuid, uuid)) {
			ofs->lower_fs[i].bad_uuid = true;
1272
			return false;
1273
		}
1274 1275 1276 1277
	}
	return true;
}

1278
/* Get a unique fsid for the layer */
1279
static int ovl_get_fsid(struct ovl_fs *ofs, const struct path *path)
1280
{
1281
	struct super_block *sb = path->mnt->mnt_sb;
1282 1283 1284
	unsigned int i;
	dev_t dev;
	int err;
1285
	bool bad_uuid = false;
1286 1287 1288 1289 1290 1291 1292 1293 1294 1295

	/* fsid 0 is reserved for upper fs even with non upper overlay */
	if (ofs->upper_mnt && ofs->upper_mnt->mnt_sb == sb)
		return 0;

	for (i = 0; i < ofs->numlowerfs; i++) {
		if (ofs->lower_fs[i].sb == sb)
			return i + 1;
	}

1296
	if (!ovl_lower_uuid_ok(ofs, &sb->s_uuid)) {
1297 1298 1299 1300
		bad_uuid = true;
		if (ofs->config.index || ofs->config.nfs_export) {
			ofs->config.index = false;
			ofs->config.nfs_export = false;
L
lijiazi 已提交
1301
			pr_warn("%s uuid detected in lower fs '%pd2', falling back to index=off,nfs_export=off.\n",
1302 1303 1304 1305
				uuid_is_null(&sb->s_uuid) ? "null" :
							    "conflicting",
				path->dentry);
		}
1306 1307
	}

1308 1309
	err = get_anon_bdev(&dev);
	if (err) {
L
lijiazi 已提交
1310
		pr_err("failed to get anonymous bdev for lowerpath\n");
1311 1312 1313 1314 1315
		return err;
	}

	ofs->lower_fs[ofs->numlowerfs].sb = sb;
	ofs->lower_fs[ofs->numlowerfs].pseudo_dev = dev;
1316
	ofs->lower_fs[ofs->numlowerfs].bad_uuid = bad_uuid;
1317 1318 1319 1320 1321
	ofs->numlowerfs++;

	return ofs->numlowerfs;
}

A
Amir Goldstein 已提交
1322 1323
static int ovl_get_lower_layers(struct super_block *sb, struct ovl_fs *ofs,
				struct path *stack, unsigned int numlower)
1324 1325 1326 1327 1328
{
	int err;
	unsigned int i;

	err = -ENOMEM;
M
Miklos Szeredi 已提交
1329
	ofs->lower_layers = kcalloc(numlower, sizeof(struct ovl_layer),
1330
				    GFP_KERNEL);
M
Miklos Szeredi 已提交
1331
	if (ofs->lower_layers == NULL)
1332
		goto out;
1333 1334 1335 1336 1337 1338

	ofs->lower_fs = kcalloc(numlower, sizeof(struct ovl_sb),
				GFP_KERNEL);
	if (ofs->lower_fs == NULL)
		goto out;

1339 1340
	for (i = 0; i < numlower; i++) {
		struct vfsmount *mnt;
A
Amir Goldstein 已提交
1341
		struct inode *trap;
1342
		int fsid;
1343

1344
		err = fsid = ovl_get_fsid(ofs, &stack[i]);
1345
		if (err < 0)
1346 1347
			goto out;

A
Amir Goldstein 已提交
1348 1349 1350 1351
		err = ovl_setup_trap(sb, stack[i].dentry, &trap, "lowerdir");
		if (err)
			goto out;

1352 1353 1354 1355 1356 1357
		if (ovl_is_inuse(stack[i].dentry)) {
			err = ovl_report_in_use(ofs, "lowerdir");
			if (err)
				goto out;
		}

1358 1359 1360
		mnt = clone_private_mount(&stack[i]);
		err = PTR_ERR(mnt);
		if (IS_ERR(mnt)) {
L
lijiazi 已提交
1361
			pr_err("failed to clone lowerpath\n");
A
Amir Goldstein 已提交
1362
			iput(trap);
1363 1364
			goto out;
		}
1365

1366 1367 1368 1369 1370 1371
		/*
		 * Make lower layers R/O.  That way fchmod/fchown on lower file
		 * will fail instead of modifying lower fs.
		 */
		mnt->mnt_flags |= MNT_READONLY | MNT_NOATIME;

A
Amir Goldstein 已提交
1372
		ofs->lower_layers[ofs->numlower].trap = trap;
M
Miklos Szeredi 已提交
1373
		ofs->lower_layers[ofs->numlower].mnt = mnt;
1374
		ofs->lower_layers[ofs->numlower].idx = i + 1;
1375 1376 1377 1378 1379
		ofs->lower_layers[ofs->numlower].fsid = fsid;
		if (fsid) {
			ofs->lower_layers[ofs->numlower].fs =
				&ofs->lower_fs[fsid - 1];
		}
M
Miklos Szeredi 已提交
1380
		ofs->numlower++;
1381
	}
1382

1383 1384 1385 1386 1387 1388 1389 1390 1391
	/*
	 * When all layers on same fs, overlay can use real inode numbers.
	 * With mount option "xino=on", mounter declares that there are enough
	 * free high bits in underlying fs to hold the unique fsid.
	 * If overlayfs does encounter underlying inodes using the high xino
	 * bits reserved for fsid, it emits a warning and uses the original
	 * inode number.
	 */
	if (!ofs->numlowerfs || (ofs->numlowerfs == 1 && !ofs->upper_mnt)) {
1392
		ofs->xino_bits = 0;
1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404
		ofs->config.xino = OVL_XINO_OFF;
	} else if (ofs->config.xino == OVL_XINO_ON && !ofs->xino_bits) {
		/*
		 * This is a roundup of number of bits needed for numlowerfs+1
		 * (i.e. ilog2(numlowerfs+1 - 1) + 1). fsid 0 is reserved for
		 * upper fs even with non upper overlay.
		 */
		BUILD_BUG_ON(ilog2(OVL_MAX_STACK) > 31);
		ofs->xino_bits = ilog2(ofs->numlowerfs) + 1;
	}

	if (ofs->xino_bits) {
L
lijiazi 已提交
1405
		pr_info("\"xino\" feature enabled using %d upper inode bits.\n",
1406 1407
			ofs->xino_bits);
	}
1408

1409 1410 1411 1412 1413
	err = 0;
out:
	return err;
}

1414
static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb,
M
Miklos Szeredi 已提交
1415
					    struct ovl_fs *ofs)
1416 1417 1418
{
	int err;
	char *lowertmp, *lower;
1419 1420
	struct path *stack = NULL;
	unsigned int stacklen, numlower = 0, i;
1421
	bool remote = false;
1422
	struct ovl_entry *oe;
1423 1424

	err = -ENOMEM;
M
Miklos Szeredi 已提交
1425
	lowertmp = kstrdup(ofs->config.lowerdir, GFP_KERNEL);
1426
	if (!lowertmp)
1427
		goto out_err;
1428 1429 1430 1431

	err = -EINVAL;
	stacklen = ovl_split_lowerdirs(lowertmp);
	if (stacklen > OVL_MAX_STACK) {
L
lijiazi 已提交
1432
		pr_err("too many lower directories, limit is %d\n",
1433
		       OVL_MAX_STACK);
1434
		goto out_err;
M
Miklos Szeredi 已提交
1435
	} else if (!ofs->config.upperdir && stacklen == 1) {
L
lijiazi 已提交
1436
		pr_err("at least 2 lowerdir are needed while upperdir nonexistent\n");
1437
		goto out_err;
1438 1439
	} else if (!ofs->config.upperdir && ofs->config.nfs_export &&
		   ofs->config.redirect_follow) {
L
lijiazi 已提交
1440
		pr_warn("NFS export requires \"redirect_dir=nofollow\" on non-upper mount, falling back to nfs_export=off.\n");
1441
		ofs->config.nfs_export = false;
1442 1443 1444 1445 1446
	}

	err = -ENOMEM;
	stack = kcalloc(stacklen, sizeof(struct path), GFP_KERNEL);
	if (!stack)
1447
		goto out_err;
1448 1449 1450 1451

	err = -EINVAL;
	lower = lowertmp;
	for (numlower = 0; numlower < stacklen; numlower++) {
M
Miklos Szeredi 已提交
1452
		err = ovl_lower_dir(lower, &stack[numlower], ofs,
1453 1454
				    &sb->s_stack_depth, &remote);
		if (err)
1455
			goto out_err;
1456 1457 1458 1459 1460 1461 1462

		lower = strchr(lower, '\0') + 1;
	}

	err = -EINVAL;
	sb->s_stack_depth++;
	if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
L
lijiazi 已提交
1463
		pr_err("maximum fs stacking depth exceeded\n");
1464
		goto out_err;
1465 1466
	}

A
Amir Goldstein 已提交
1467
	err = ovl_get_lower_layers(sb, ofs, stack, numlower);
1468 1469 1470 1471 1472 1473 1474 1475 1476 1477
	if (err)
		goto out_err;

	err = -ENOMEM;
	oe = ovl_alloc_entry(numlower);
	if (!oe)
		goto out_err;

	for (i = 0; i < numlower; i++) {
		oe->lowerstack[i].dentry = dget(stack[i].dentry);
M
Miklos Szeredi 已提交
1478
		oe->lowerstack[i].layer = &ofs->lower_layers[i];
1479
	}
1480 1481 1482 1483 1484 1485 1486 1487 1488 1489

	if (remote)
		sb->s_d_op = &ovl_reval_dentry_operations;
	else
		sb->s_d_op = &ovl_dentry_operations;

out:
	for (i = 0; i < numlower; i++)
		path_put(&stack[i]);
	kfree(stack);
1490 1491 1492 1493 1494 1495
	kfree(lowertmp);

	return oe;

out_err:
	oe = ERR_PTR(err);
1496 1497 1498
	goto out;
}

A
Amir Goldstein 已提交
1499 1500 1501 1502 1503
/*
 * Check if this layer root is a descendant of:
 * - another layer of this overlayfs instance
 * - upper/work dir of any overlayfs instance
 */
1504 1505
static int ovl_check_layer(struct super_block *sb, struct ovl_fs *ofs,
			   struct dentry *dentry, const char *name)
A
Amir Goldstein 已提交
1506
{
1507
	struct dentry *next = dentry, *parent;
A
Amir Goldstein 已提交
1508 1509
	int err = 0;

1510
	if (!dentry)
A
Amir Goldstein 已提交
1511 1512
		return 0;

1513 1514 1515 1516
	parent = dget_parent(next);

	/* Walk back ancestors to root (inclusive) looking for traps */
	while (!err && parent != next) {
1517
		if (ovl_lookup_trap_inode(sb, parent)) {
A
Amir Goldstein 已提交
1518
			err = -ELOOP;
L
lijiazi 已提交
1519
			pr_err("overlapping %s path\n", name);
1520 1521
		} else if (ovl_is_inuse(parent)) {
			err = ovl_report_in_use(ofs, name);
A
Amir Goldstein 已提交
1522 1523
		}
		next = parent;
1524 1525
		parent = dget_parent(next);
		dput(next);
A
Amir Goldstein 已提交
1526 1527
	}

1528
	dput(parent);
A
Amir Goldstein 已提交
1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541

	return err;
}

/*
 * Check if any of the layers or work dirs overlap.
 */
static int ovl_check_overlapping_layers(struct super_block *sb,
					struct ovl_fs *ofs)
{
	int i, err;

	if (ofs->upper_mnt) {
1542 1543
		err = ovl_check_layer(sb, ofs, ofs->upper_mnt->mnt_root,
				      "upperdir");
A
Amir Goldstein 已提交
1544 1545 1546 1547 1548 1549 1550 1551 1552 1553
		if (err)
			return err;

		/*
		 * Checking workbasedir avoids hitting ovl_is_inuse(parent) of
		 * this instance and covers overlapping work and index dirs,
		 * unless work or index dir have been moved since created inside
		 * workbasedir.  In that case, we already have their traps in
		 * inode cache and we will catch that case on lookup.
		 */
1554
		err = ovl_check_layer(sb, ofs, ofs->workbasedir, "workdir");
A
Amir Goldstein 已提交
1555 1556 1557 1558 1559
		if (err)
			return err;
	}

	for (i = 0; i < ofs->numlower; i++) {
1560 1561
		err = ovl_check_layer(sb, ofs,
				      ofs->lower_layers[i].mnt->mnt_root,
A
Amir Goldstein 已提交
1562 1563 1564 1565 1566 1567 1568 1569
				      "lowerdir");
		if (err)
			return err;
	}

	return 0;
}

M
Miklos Szeredi 已提交
1570 1571
static int ovl_fill_super(struct super_block *sb, void *data, int silent)
{
K
Kees Cook 已提交
1572
	struct path upperpath = { };
M
Miklos Szeredi 已提交
1573
	struct dentry *root_dentry;
1574
	struct ovl_entry *oe;
M
Miklos Szeredi 已提交
1575
	struct ovl_fs *ofs;
1576
	struct cred *cred;
M
Miklos Szeredi 已提交
1577 1578
	int err;

E
Erez Zadok 已提交
1579
	err = -ENOMEM;
M
Miklos Szeredi 已提交
1580 1581
	ofs = kzalloc(sizeof(struct ovl_fs), GFP_KERNEL);
	if (!ofs)
M
Miklos Szeredi 已提交
1582 1583
		goto out;

M
Miklos Szeredi 已提交
1584
	ofs->creator_cred = cred = prepare_creds();
1585 1586 1587
	if (!cred)
		goto out_err;

M
Miklos Szeredi 已提交
1588
	ofs->config.index = ovl_index_def;
1589
	ofs->config.nfs_export = ovl_nfs_export_def;
1590
	ofs->config.xino = ovl_xino_def();
1591
	ofs->config.metacopy = ovl_metacopy_def;
M
Miklos Szeredi 已提交
1592
	err = ovl_parse_opt((char *) data, &ofs->config);
E
Erez Zadok 已提交
1593
	if (err)
1594
		goto out_err;
E
Erez Zadok 已提交
1595

M
Miklos Szeredi 已提交
1596
	err = -EINVAL;
M
Miklos Szeredi 已提交
1597
	if (!ofs->config.lowerdir) {
1598
		if (!silent)
L
lijiazi 已提交
1599
			pr_err("missing 'lowerdir'\n");
1600
		goto out_err;
M
Miklos Szeredi 已提交
1601 1602
	}

M
Miklos Szeredi 已提交
1603
	sb->s_stack_depth = 0;
1604
	sb->s_maxbytes = MAX_LFS_FILESIZE;
1605
	/* Assume underlaying fs uses 32bit inodes unless proven otherwise */
1606 1607 1608
	if (ofs->config.xino != OVL_XINO_OFF)
		ofs->xino_bits = BITS_PER_LONG - 32;

A
Amir Goldstein 已提交
1609 1610 1611
	/* alloc/destroy_inode needed for setting up traps in inode cache */
	sb->s_op = &ovl_super_operations;

M
Miklos Szeredi 已提交
1612 1613
	if (ofs->config.upperdir) {
		if (!ofs->config.workdir) {
L
lijiazi 已提交
1614
			pr_err("missing 'workdir'\n");
1615
			goto out_err;
M
Miklos Szeredi 已提交
1616
		}
M
Miklos Szeredi 已提交
1617

A
Amir Goldstein 已提交
1618
		err = ovl_get_upper(sb, ofs, &upperpath);
M
Miklos Szeredi 已提交
1619
		if (err)
1620
			goto out_err;
1621

A
Amir Goldstein 已提交
1622
		err = ovl_get_workdir(sb, ofs, &upperpath);
1623
		if (err)
1624
			goto out_err;
1625

M
Miklos Szeredi 已提交
1626
		if (!ofs->workdir)
1627
			sb->s_flags |= SB_RDONLY;
1628

M
Miklos Szeredi 已提交
1629 1630
		sb->s_stack_depth = ofs->upper_mnt->mnt_sb->s_stack_depth;
		sb->s_time_gran = ofs->upper_mnt->mnt_sb->s_time_gran;
1631

M
Miklos Szeredi 已提交
1632
	}
M
Miklos Szeredi 已提交
1633
	oe = ovl_get_lowerstack(sb, ofs);
1634 1635
	err = PTR_ERR(oe);
	if (IS_ERR(oe))
1636
		goto out_err;
M
Miklos Szeredi 已提交
1637

H
hujianyang 已提交
1638
	/* If the upper fs is nonexistent, we mark overlayfs r/o too */
M
Miklos Szeredi 已提交
1639
	if (!ofs->upper_mnt)
1640
		sb->s_flags |= SB_RDONLY;
M
Miklos Szeredi 已提交
1641

M
Miklos Szeredi 已提交
1642
	if (!(ovl_force_readonly(ofs)) && ofs->config.index) {
A
Amir Goldstein 已提交
1643
		err = ovl_get_indexdir(sb, ofs, oe, &upperpath);
1644
		if (err)
1645
			goto out_free_oe;
1646

1647 1648 1649 1650
		/* Force r/o mount with no index dir */
		if (!ofs->indexdir) {
			dput(ofs->workdir);
			ofs->workdir = NULL;
1651
			sb->s_flags |= SB_RDONLY;
1652 1653
		}

1654 1655
	}

A
Amir Goldstein 已提交
1656 1657 1658 1659
	err = ovl_check_overlapping_layers(sb, ofs);
	if (err)
		goto out_free_oe;

1660
	/* Show index=off in /proc/mounts for forced r/o mount */
1661
	if (!ofs->indexdir) {
M
Miklos Szeredi 已提交
1662
		ofs->config.index = false;
1663
		if (ofs->upper_mnt && ofs->config.nfs_export) {
L
lijiazi 已提交
1664
			pr_warn("NFS export requires an index dir, falling back to nfs_export=off.\n");
1665 1666 1667
			ofs->config.nfs_export = false;
		}
	}
1668

1669
	if (ofs->config.metacopy && ofs->config.nfs_export) {
L
lijiazi 已提交
1670
		pr_warn("NFS export is not supported with metadata only copy up, falling back to nfs_export=off.\n");
1671 1672 1673
		ofs->config.nfs_export = false;
	}

1674 1675 1676
	if (ofs->config.nfs_export)
		sb->s_export_op = &ovl_export_operations;

1677 1678 1679
	/* Never override disk quota limits or use reserved space */
	cap_lower(cred->cap_effective, CAP_SYS_RESOURCE);

1680 1681
	sb->s_magic = OVERLAYFS_SUPER_MAGIC;
	sb->s_xattr = ovl_xattr_handlers;
M
Miklos Szeredi 已提交
1682
	sb->s_fs_info = ofs;
1683
	sb->s_flags |= SB_POSIXACL;
1684

1685
	err = -ENOMEM;
1686
	root_dentry = d_make_root(ovl_new_inode(sb, S_IFDIR, 0));
M
Miklos Szeredi 已提交
1687
	if (!root_dentry)
1688
		goto out_free_oe;
M
Miklos Szeredi 已提交
1689

1690 1691
	root_dentry->d_fsdata = oe;

M
Miklos Szeredi 已提交
1692
	mntput(upperpath.mnt);
1693
	if (upperpath.dentry) {
1694
		ovl_dentry_set_upper_alias(root_dentry);
M
Miklos Szeredi 已提交
1695 1696
		if (ovl_is_impuredir(upperpath.dentry))
			ovl_set_flag(OVL_IMPURE, d_inode(root_dentry));
1697
	}
M
Miklos Szeredi 已提交
1698

1699 1700
	/* Root is always merge -> can have whiteouts */
	ovl_set_flag(OVL_WHITEOUTS, d_inode(root_dentry));
1701
	ovl_dentry_set_flag(OVL_E_CONNECTED, root_dentry);
1702
	ovl_set_upperdata(d_inode(root_dentry));
1703
	ovl_inode_init(d_inode(root_dentry), upperpath.dentry,
1704
		       ovl_dentry_lower(root_dentry), NULL);
M
Miklos Szeredi 已提交
1705

M
Miklos Szeredi 已提交
1706 1707 1708 1709
	sb->s_root = root_dentry;

	return 0;

1710 1711
out_free_oe:
	ovl_entry_stack_free(oe);
1712
	kfree(oe);
1713
out_err:
M
Miklos Szeredi 已提交
1714
	path_put(&upperpath);
M
Miklos Szeredi 已提交
1715
	ovl_free_fs(ofs);
M
Miklos Szeredi 已提交
1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727
out:
	return err;
}

static struct dentry *ovl_mount(struct file_system_type *fs_type, int flags,
				const char *dev_name, void *raw_data)
{
	return mount_nodev(fs_type, flags, raw_data, ovl_fill_super);
}

static struct file_system_type ovl_fs_type = {
	.owner		= THIS_MODULE,
1728
	.name		= "overlay",
M
Miklos Szeredi 已提交
1729 1730 1731
	.mount		= ovl_mount,
	.kill_sb	= kill_anon_super,
};
1732
MODULE_ALIAS_FS("overlay");
M
Miklos Szeredi 已提交
1733

1734 1735 1736 1737 1738 1739 1740
static void ovl_inode_init_once(void *foo)
{
	struct ovl_inode *oi = foo;

	inode_init_once(&oi->vfs_inode);
}

M
Miklos Szeredi 已提交
1741 1742
static int __init ovl_init(void)
{
1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757
	int err;

	ovl_inode_cachep = kmem_cache_create("ovl_inode",
					     sizeof(struct ovl_inode), 0,
					     (SLAB_RECLAIM_ACCOUNT|
					      SLAB_MEM_SPREAD|SLAB_ACCOUNT),
					     ovl_inode_init_once);
	if (ovl_inode_cachep == NULL)
		return -ENOMEM;

	err = register_filesystem(&ovl_fs_type);
	if (err)
		kmem_cache_destroy(ovl_inode_cachep);

	return err;
M
Miklos Szeredi 已提交
1758 1759 1760 1761 1762
}

static void __exit ovl_exit(void)
{
	unregister_filesystem(&ovl_fs_type);
1763 1764 1765 1766 1767 1768 1769 1770

	/*
	 * Make sure all delayed rcu free inodes are flushed before we
	 * destroy cache.
	 */
	rcu_barrier();
	kmem_cache_destroy(ovl_inode_cachep);

M
Miklos Szeredi 已提交
1771 1772 1773 1774
}

module_init(ovl_init);
module_exit(ovl_exit);