super.c 42.1 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0-only
M
Miklos Szeredi 已提交
2 3 4 5 6
/*
 *
 * Copyright (C) 2011 Novell Inc.
 */

7
#include <uapi/linux/magic.h>
M
Miklos Szeredi 已提交
8 9 10 11 12 13
#include <linux/fs.h>
#include <linux/namei.h>
#include <linux/xattr.h>
#include <linux/mount.h>
#include <linux/parser.h>
#include <linux/module.h>
A
Andy Whitcroft 已提交
14
#include <linux/statfs.h>
E
Erez Zadok 已提交
15
#include <linux/seq_file.h>
M
Miklos Szeredi 已提交
16
#include <linux/posix_acl_xattr.h>
17
#include <linux/exportfs.h>
M
Miklos Szeredi 已提交
18 19 20 21 22 23 24 25 26
#include "overlayfs.h"

MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
MODULE_DESCRIPTION("Overlay filesystem");
MODULE_LICENSE("GPL");


struct ovl_dir_cache;

27 28
#define OVL_MAX_STACK 500

29 30
static bool ovl_redirect_dir_def = IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_DIR);
module_param_named(redirect_dir, ovl_redirect_dir_def, bool, 0644);
31
MODULE_PARM_DESC(redirect_dir,
32
		 "Default to on or off for the redirect_dir feature");
M
Miklos Szeredi 已提交
33

34 35 36 37
static bool ovl_redirect_always_follow =
	IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW);
module_param_named(redirect_always_follow, ovl_redirect_always_follow,
		   bool, 0644);
38
MODULE_PARM_DESC(redirect_always_follow,
39 40
		 "Follow redirects even if redirect_dir feature is turned off");

41 42
static bool ovl_index_def = IS_ENABLED(CONFIG_OVERLAY_FS_INDEX);
module_param_named(index, ovl_index_def, bool, 0644);
43
MODULE_PARM_DESC(index,
44 45
		 "Default to on or off for the inodes index feature");

46 47
static bool ovl_nfs_export_def = IS_ENABLED(CONFIG_OVERLAY_FS_NFS_EXPORT);
module_param_named(nfs_export, ovl_nfs_export_def, bool, 0644);
48
MODULE_PARM_DESC(nfs_export,
49 50
		 "Default to on or off for the NFS export feature");

51 52
static bool ovl_xino_auto_def = IS_ENABLED(CONFIG_OVERLAY_FS_XINO_AUTO);
module_param_named(xino_auto, ovl_xino_auto_def, bool, 0644);
53
MODULE_PARM_DESC(xino_auto,
54 55
		 "Auto enable xino feature");

56 57 58 59 60 61 62 63
static void ovl_entry_stack_free(struct ovl_entry *oe)
{
	unsigned int i;

	for (i = 0; i < oe->numlower; i++)
		dput(oe->lowerstack[i].dentry);
}

64 65
static bool ovl_metacopy_def = IS_ENABLED(CONFIG_OVERLAY_FS_METACOPY);
module_param_named(metacopy, ovl_metacopy_def, bool, 0644);
66
MODULE_PARM_DESC(metacopy,
67 68
		 "Default to on or off for the metadata only copy up feature");

M
Miklos Szeredi 已提交
69 70 71 72 73
static void ovl_dentry_release(struct dentry *dentry)
{
	struct ovl_entry *oe = dentry->d_fsdata;

	if (oe) {
74
		ovl_entry_stack_free(oe);
M
Miklos Szeredi 已提交
75 76 77 78
		kfree_rcu(oe, rcu);
	}
}

79
static struct dentry *ovl_d_real(struct dentry *dentry,
80
				 const struct inode *inode)
M
Miklos Szeredi 已提交
81 82 83
{
	struct dentry *real;

84 85 86 87
	/* It's an overlay file */
	if (inode && d_inode(dentry) == inode)
		return dentry;

88
	if (!d_is_reg(dentry)) {
M
Miklos Szeredi 已提交
89 90 91 92 93 94
		if (!inode || inode == d_inode(dentry))
			return dentry;
		goto bug;
	}

	real = ovl_dentry_upper(dentry);
95
	if (real && (inode == d_inode(real)))
M
Miklos Szeredi 已提交
96 97
		return real;

98 99 100 101
	if (real && !inode && ovl_has_upperdata(d_inode(dentry)))
		return real;

	real = ovl_dentry_lowerdata(dentry);
M
Miklos Szeredi 已提交
102 103 104
	if (!real)
		goto bug;

M
Miklos Szeredi 已提交
105
	/* Handle recursion */
106
	real = d_real(real, inode);
M
Miklos Szeredi 已提交
107

M
Miklos Szeredi 已提交
108 109 110
	if (!inode || inode == d_inode(real))
		return real;
bug:
M
Miklos Szeredi 已提交
111
	WARN(1, "ovl_d_real(%pd4, %s:%lu): real dentry not found\n", dentry,
M
Miklos Szeredi 已提交
112 113 114 115
	     inode ? inode->i_sb->s_id : "NULL", inode ? inode->i_ino : 0);
	return dentry;
}

116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156
static int ovl_dentry_revalidate(struct dentry *dentry, unsigned int flags)
{
	struct ovl_entry *oe = dentry->d_fsdata;
	unsigned int i;
	int ret = 1;

	for (i = 0; i < oe->numlower; i++) {
		struct dentry *d = oe->lowerstack[i].dentry;

		if (d->d_flags & DCACHE_OP_REVALIDATE) {
			ret = d->d_op->d_revalidate(d, flags);
			if (ret < 0)
				return ret;
			if (!ret) {
				if (!(flags & LOOKUP_RCU))
					d_invalidate(d);
				return -ESTALE;
			}
		}
	}
	return 1;
}

static int ovl_dentry_weak_revalidate(struct dentry *dentry, unsigned int flags)
{
	struct ovl_entry *oe = dentry->d_fsdata;
	unsigned int i;
	int ret = 1;

	for (i = 0; i < oe->numlower; i++) {
		struct dentry *d = oe->lowerstack[i].dentry;

		if (d->d_flags & DCACHE_OP_WEAK_REVALIDATE) {
			ret = d->d_op->d_weak_revalidate(d, flags);
			if (ret <= 0)
				break;
		}
	}
	return ret;
}

M
Miklos Szeredi 已提交
157 158
static const struct dentry_operations ovl_dentry_operations = {
	.d_release = ovl_dentry_release,
M
Miklos Szeredi 已提交
159
	.d_real = ovl_d_real,
M
Miklos Szeredi 已提交
160 161
};

162 163
static const struct dentry_operations ovl_reval_dentry_operations = {
	.d_release = ovl_dentry_release,
M
Miklos Szeredi 已提交
164
	.d_real = ovl_d_real,
165 166 167 168
	.d_revalidate = ovl_dentry_revalidate,
	.d_weak_revalidate = ovl_dentry_weak_revalidate,
};

169 170 171 172 173 174
static struct kmem_cache *ovl_inode_cachep;

static struct inode *ovl_alloc_inode(struct super_block *sb)
{
	struct ovl_inode *oi = kmem_cache_alloc(ovl_inode_cachep, GFP_KERNEL);

175 176 177
	if (!oi)
		return NULL;

178
	oi->cache = NULL;
M
Miklos Szeredi 已提交
179
	oi->redirect = NULL;
180
	oi->version = 0;
M
Miklos Szeredi 已提交
181
	oi->flags = 0;
182
	oi->__upperdentry = NULL;
183
	oi->lower = NULL;
184
	oi->lowerdata = NULL;
185
	mutex_init(&oi->lock);
186

187 188 189
	return &oi->vfs_inode;
}

A
Al Viro 已提交
190
static void ovl_free_inode(struct inode *inode)
191
{
A
Al Viro 已提交
192
	struct ovl_inode *oi = OVL_I(inode);
193

A
Al Viro 已提交
194 195 196
	kfree(oi->redirect);
	mutex_destroy(&oi->lock);
	kmem_cache_free(ovl_inode_cachep, oi);
197 198 199 200
}

static void ovl_destroy_inode(struct inode *inode)
{
201 202 203
	struct ovl_inode *oi = OVL_I(inode);

	dput(oi->__upperdentry);
204
	iput(oi->lower);
205 206 207 208
	if (S_ISDIR(inode->i_mode))
		ovl_dir_cache_free(inode);
	else
		iput(oi->lowerdata);
209 210
}

M
Miklos Szeredi 已提交
211
static void ovl_free_fs(struct ovl_fs *ofs)
M
Miklos Szeredi 已提交
212
{
213
	unsigned i;
M
Miklos Szeredi 已提交
214

215
	iput(ofs->workbasedir_trap);
A
Amir Goldstein 已提交
216 217 218
	iput(ofs->indexdir_trap);
	iput(ofs->workdir_trap);
	iput(ofs->upperdir_trap);
M
Miklos Szeredi 已提交
219 220 221 222 223 224 225 226
	dput(ofs->indexdir);
	dput(ofs->workdir);
	if (ofs->workdir_locked)
		ovl_inuse_unlock(ofs->workbasedir);
	dput(ofs->workbasedir);
	if (ofs->upperdir_locked)
		ovl_inuse_unlock(ofs->upper_mnt->mnt_root);
	mntput(ofs->upper_mnt);
227 228 229
	for (i = 1; i < ofs->numlayer; i++) {
		iput(ofs->layers[i].trap);
		mntput(ofs->layers[i].mnt);
A
Amir Goldstein 已提交
230
	}
231
	kfree(ofs->layers);
232
	for (i = 0; i < ofs->numfs; i++)
233 234
		free_anon_bdev(ofs->fs[i].pseudo_dev);
	kfree(ofs->fs);
M
Miklos Szeredi 已提交
235 236 237 238

	kfree(ofs->config.lowerdir);
	kfree(ofs->config.upperdir);
	kfree(ofs->config.workdir);
239
	kfree(ofs->config.redirect_mode);
M
Miklos Szeredi 已提交
240 241 242
	if (ofs->creator_cred)
		put_cred(ofs->creator_cred);
	kfree(ofs);
M
Miklos Szeredi 已提交
243 244
}

245 246 247 248 249 250 251
static void ovl_put_super(struct super_block *sb)
{
	struct ovl_fs *ofs = sb->s_fs_info;

	ovl_free_fs(ofs);
}

252
/* Sync real dirty inodes in upper filesystem (if it exists) */
253 254
static int ovl_sync_fs(struct super_block *sb, int wait)
{
M
Miklos Szeredi 已提交
255
	struct ovl_fs *ofs = sb->s_fs_info;
256 257 258
	struct super_block *upper_sb;
	int ret;

M
Miklos Szeredi 已提交
259
	if (!ofs->upper_mnt)
260
		return 0;
261 262 263 264 265 266 267 268 269 270

	/*
	 * If this is a sync(2) call or an emergency sync, all the super blocks
	 * will be iterated, including upper_sb, so no need to do anything.
	 *
	 * If this is a syncfs(2) call, then we do need to call
	 * sync_filesystem() on upper_sb, but enough if we do it when being
	 * called with wait == 1.
	 */
	if (!wait)
271 272
		return 0;

273 274
	upper_sb = ofs->upper_mnt->mnt_sb;

275
	down_read(&upper_sb->s_umount);
276
	ret = sync_filesystem(upper_sb);
277
	up_read(&upper_sb->s_umount);
278

279 280 281
	return ret;
}

A
Andy Whitcroft 已提交
282 283 284 285 286 287
/**
 * ovl_statfs
 * @sb: The overlayfs super block
 * @buf: The struct kstatfs to fill in with stats
 *
 * Get the filesystem statistics.  As writes always target the upper layer
288
 * filesystem pass the statfs to the upper filesystem (if it exists)
A
Andy Whitcroft 已提交
289 290 291 292 293 294 295 296
 */
static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf)
{
	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
	struct dentry *root_dentry = dentry->d_sb->s_root;
	struct path path;
	int err;

297
	ovl_path_real(root_dentry, &path);
A
Andy Whitcroft 已提交
298 299 300

	err = vfs_statfs(&path, buf);
	if (!err) {
M
Miklos Szeredi 已提交
301
		buf->f_namelen = ofs->namelen;
A
Andy Whitcroft 已提交
302 303 304 305 306 307
		buf->f_type = OVERLAYFS_SUPER_MAGIC;
	}

	return err;
}

308
/* Will this overlay be forced to mount/remount ro? */
M
Miklos Szeredi 已提交
309
static bool ovl_force_readonly(struct ovl_fs *ofs)
310
{
M
Miklos Szeredi 已提交
311
	return (!ofs->upper_mnt || !ofs->workdir);
312 313
}

314 315 316 317 318
static const char *ovl_redirect_mode_def(void)
{
	return ovl_redirect_dir_def ? "on" : "off";
}

319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335
enum {
	OVL_XINO_OFF,
	OVL_XINO_AUTO,
	OVL_XINO_ON,
};

static const char * const ovl_xino_str[] = {
	"off",
	"auto",
	"on",
};

static inline int ovl_xino_def(void)
{
	return ovl_xino_auto_def ? OVL_XINO_AUTO : OVL_XINO_OFF;
}

E
Erez Zadok 已提交
336 337 338 339 340 341 342 343 344
/**
 * ovl_show_options
 *
 * Prints the mount options for a given superblock.
 * Returns zero; does not fail.
 */
static int ovl_show_options(struct seq_file *m, struct dentry *dentry)
{
	struct super_block *sb = dentry->d_sb;
M
Miklos Szeredi 已提交
345
	struct ovl_fs *ofs = sb->s_fs_info;
E
Erez Zadok 已提交
346

M
Miklos Szeredi 已提交
347 348 349 350
	seq_show_option(m, "lowerdir", ofs->config.lowerdir);
	if (ofs->config.upperdir) {
		seq_show_option(m, "upperdir", ofs->config.upperdir);
		seq_show_option(m, "workdir", ofs->config.workdir);
M
Miklos Szeredi 已提交
351
	}
M
Miklos Szeredi 已提交
352
	if (ofs->config.default_permissions)
M
Miklos Szeredi 已提交
353
		seq_puts(m, ",default_permissions");
354 355
	if (strcmp(ofs->config.redirect_mode, ovl_redirect_mode_def()) != 0)
		seq_printf(m, ",redirect_dir=%s", ofs->config.redirect_mode);
M
Miklos Szeredi 已提交
356
	if (ofs->config.index != ovl_index_def)
357
		seq_printf(m, ",index=%s", ofs->config.index ? "on" : "off");
358 359 360
	if (ofs->config.nfs_export != ovl_nfs_export_def)
		seq_printf(m, ",nfs_export=%s", ofs->config.nfs_export ?
						"on" : "off");
361
	if (ofs->config.xino != ovl_xino_def() && !ovl_same_fs(sb))
362
		seq_printf(m, ",xino=%s", ovl_xino_str[ofs->config.xino]);
363 364 365
	if (ofs->config.metacopy != ovl_metacopy_def)
		seq_printf(m, ",metacopy=%s",
			   ofs->config.metacopy ? "on" : "off");
E
Erez Zadok 已提交
366 367 368
	return 0;
}

369 370
static int ovl_remount(struct super_block *sb, int *flags, char *data)
{
M
Miklos Szeredi 已提交
371
	struct ovl_fs *ofs = sb->s_fs_info;
372

373
	if (!(*flags & SB_RDONLY) && ovl_force_readonly(ofs))
374 375 376 377 378
		return -EROFS;

	return 0;
}

M
Miklos Szeredi 已提交
379
static const struct super_operations ovl_super_operations = {
380
	.alloc_inode	= ovl_alloc_inode,
A
Al Viro 已提交
381
	.free_inode	= ovl_free_inode,
382 383
	.destroy_inode	= ovl_destroy_inode,
	.drop_inode	= generic_delete_inode,
M
Miklos Szeredi 已提交
384
	.put_super	= ovl_put_super,
385
	.sync_fs	= ovl_sync_fs,
A
Andy Whitcroft 已提交
386
	.statfs		= ovl_statfs,
E
Erez Zadok 已提交
387
	.show_options	= ovl_show_options,
388
	.remount_fs	= ovl_remount,
M
Miklos Szeredi 已提交
389 390 391 392 393 394
};

enum {
	OPT_LOWERDIR,
	OPT_UPPERDIR,
	OPT_WORKDIR,
M
Miklos Szeredi 已提交
395
	OPT_DEFAULT_PERMISSIONS,
396
	OPT_REDIRECT_DIR,
397 398
	OPT_INDEX_ON,
	OPT_INDEX_OFF,
399 400
	OPT_NFS_EXPORT_ON,
	OPT_NFS_EXPORT_OFF,
401 402 403
	OPT_XINO_ON,
	OPT_XINO_OFF,
	OPT_XINO_AUTO,
404 405
	OPT_METACOPY_ON,
	OPT_METACOPY_OFF,
M
Miklos Szeredi 已提交
406 407 408 409 410 411 412
	OPT_ERR,
};

static const match_table_t ovl_tokens = {
	{OPT_LOWERDIR,			"lowerdir=%s"},
	{OPT_UPPERDIR,			"upperdir=%s"},
	{OPT_WORKDIR,			"workdir=%s"},
M
Miklos Szeredi 已提交
413
	{OPT_DEFAULT_PERMISSIONS,	"default_permissions"},
414
	{OPT_REDIRECT_DIR,		"redirect_dir=%s"},
415 416
	{OPT_INDEX_ON,			"index=on"},
	{OPT_INDEX_OFF,			"index=off"},
417 418
	{OPT_NFS_EXPORT_ON,		"nfs_export=on"},
	{OPT_NFS_EXPORT_OFF,		"nfs_export=off"},
419 420 421
	{OPT_XINO_ON,			"xino=on"},
	{OPT_XINO_OFF,			"xino=off"},
	{OPT_XINO_AUTO,			"xino=auto"},
422 423
	{OPT_METACOPY_ON,		"metacopy=on"},
	{OPT_METACOPY_OFF,		"metacopy=off"},
M
Miklos Szeredi 已提交
424 425 426
	{OPT_ERR,			NULL}
};

M
Miklos Szeredi 已提交
427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449
static char *ovl_next_opt(char **s)
{
	char *sbegin = *s;
	char *p;

	if (sbegin == NULL)
		return NULL;

	for (p = sbegin; *p; p++) {
		if (*p == '\\') {
			p++;
			if (!*p)
				break;
		} else if (*p == ',') {
			*p = '\0';
			*s = p + 1;
			return sbegin;
		}
	}
	*s = NULL;
	return sbegin;
}

450 451 452 453 454 455 456 457 458 459 460 461 462 463 464
static int ovl_parse_redirect_mode(struct ovl_config *config, const char *mode)
{
	if (strcmp(mode, "on") == 0) {
		config->redirect_dir = true;
		/*
		 * Does not make sense to have redirect creation without
		 * redirect following.
		 */
		config->redirect_follow = true;
	} else if (strcmp(mode, "follow") == 0) {
		config->redirect_follow = true;
	} else if (strcmp(mode, "off") == 0) {
		if (ovl_redirect_always_follow)
			config->redirect_follow = true;
	} else if (strcmp(mode, "nofollow") != 0) {
L
lijiazi 已提交
465
		pr_err("bad mount option \"redirect_dir=%s\"\n",
466 467 468 469 470 471 472
		       mode);
		return -EINVAL;
	}

	return 0;
}

M
Miklos Szeredi 已提交
473 474 475
static int ovl_parse_opt(char *opt, struct ovl_config *config)
{
	char *p;
476
	int err;
477
	bool metacopy_opt = false, redirect_opt = false;
M
Miklos Szeredi 已提交
478

479 480 481 482
	config->redirect_mode = kstrdup(ovl_redirect_mode_def(), GFP_KERNEL);
	if (!config->redirect_mode)
		return -ENOMEM;

M
Miklos Szeredi 已提交
483
	while ((p = ovl_next_opt(&opt)) != NULL) {
M
Miklos Szeredi 已提交
484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512
		int token;
		substring_t args[MAX_OPT_ARGS];

		if (!*p)
			continue;

		token = match_token(p, ovl_tokens, args);
		switch (token) {
		case OPT_UPPERDIR:
			kfree(config->upperdir);
			config->upperdir = match_strdup(&args[0]);
			if (!config->upperdir)
				return -ENOMEM;
			break;

		case OPT_LOWERDIR:
			kfree(config->lowerdir);
			config->lowerdir = match_strdup(&args[0]);
			if (!config->lowerdir)
				return -ENOMEM;
			break;

		case OPT_WORKDIR:
			kfree(config->workdir);
			config->workdir = match_strdup(&args[0]);
			if (!config->workdir)
				return -ENOMEM;
			break;

M
Miklos Szeredi 已提交
513 514 515 516
		case OPT_DEFAULT_PERMISSIONS:
			config->default_permissions = true;
			break;

517 518 519 520 521
		case OPT_REDIRECT_DIR:
			kfree(config->redirect_mode);
			config->redirect_mode = match_strdup(&args[0]);
			if (!config->redirect_mode)
				return -ENOMEM;
522
			redirect_opt = true;
M
Miklos Szeredi 已提交
523 524
			break;

525 526 527 528 529 530 531 532
		case OPT_INDEX_ON:
			config->index = true;
			break;

		case OPT_INDEX_OFF:
			config->index = false;
			break;

533 534 535 536 537 538 539 540
		case OPT_NFS_EXPORT_ON:
			config->nfs_export = true;
			break;

		case OPT_NFS_EXPORT_OFF:
			config->nfs_export = false;
			break;

541 542 543 544 545 546 547 548 549 550 551 552
		case OPT_XINO_ON:
			config->xino = OVL_XINO_ON;
			break;

		case OPT_XINO_OFF:
			config->xino = OVL_XINO_OFF;
			break;

		case OPT_XINO_AUTO:
			config->xino = OVL_XINO_AUTO;
			break;

553 554
		case OPT_METACOPY_ON:
			config->metacopy = true;
555
			metacopy_opt = true;
556 557 558 559 560 561
			break;

		case OPT_METACOPY_OFF:
			config->metacopy = false;
			break;

M
Miklos Szeredi 已提交
562
		default:
L
lijiazi 已提交
563 564
			pr_err("unrecognized mount option \"%s\" or missing value\n",
					p);
M
Miklos Szeredi 已提交
565 566 567
			return -EINVAL;
		}
	}
H
hujianyang 已提交
568 569 570

	/* Workdir is useless in non-upper mount */
	if (!config->upperdir && config->workdir) {
L
lijiazi 已提交
571
		pr_info("option \"workdir=%s\" is useless in a non-upper mount, ignore\n",
H
hujianyang 已提交
572 573 574 575 576
			config->workdir);
		kfree(config->workdir);
		config->workdir = NULL;
	}

577 578 579 580
	err = ovl_parse_redirect_mode(config, config->redirect_mode);
	if (err)
		return err;

581 582 583 584 585 586 587 588 589 590
	/*
	 * This is to make the logic below simpler.  It doesn't make any other
	 * difference, since config->redirect_dir is only used for upper.
	 */
	if (!config->upperdir && config->redirect_follow)
		config->redirect_dir = true;

	/* Resolve metacopy -> redirect_dir dependency */
	if (config->metacopy && !config->redirect_dir) {
		if (metacopy_opt && redirect_opt) {
L
lijiazi 已提交
591
			pr_err("conflicting options: metacopy=on,redirect_dir=%s\n",
592 593 594 595 596 597 598 599
			       config->redirect_mode);
			return -EINVAL;
		}
		if (redirect_opt) {
			/*
			 * There was an explicit redirect_dir=... that resulted
			 * in this conflict.
			 */
L
lijiazi 已提交
600
			pr_info("disabling metacopy due to redirect_dir=%s\n",
601 602 603 604 605 606
				config->redirect_mode);
			config->metacopy = false;
		} else {
			/* Automatically enable redirect otherwise. */
			config->redirect_follow = config->redirect_dir = true;
		}
607 608 609
	}

	return 0;
M
Miklos Szeredi 已提交
610 611 612
}

#define OVL_WORKDIR_NAME "work"
613
#define OVL_INDEXDIR_NAME "index"
M
Miklos Szeredi 已提交
614

M
Miklos Szeredi 已提交
615
static struct dentry *ovl_workdir_create(struct ovl_fs *ofs,
616
					 const char *name, bool persist)
M
Miklos Szeredi 已提交
617
{
M
Miklos Szeredi 已提交
618 619
	struct inode *dir =  ofs->workbasedir->d_inode;
	struct vfsmount *mnt = ofs->upper_mnt;
M
Miklos Szeredi 已提交
620 621 622
	struct dentry *work;
	int err;
	bool retried = false;
623
	bool locked = false;
M
Miklos Szeredi 已提交
624

A
Al Viro 已提交
625
	inode_lock_nested(dir, I_MUTEX_PARENT);
626 627
	locked = true;

M
Miklos Szeredi 已提交
628
retry:
M
Miklos Szeredi 已提交
629
	work = lookup_one_len(name, ofs->workbasedir, strlen(name));
M
Miklos Szeredi 已提交
630 631

	if (!IS_ERR(work)) {
632 633
		struct iattr attr = {
			.ia_valid = ATTR_MODE,
A
Al Viro 已提交
634
			.ia_mode = S_IFDIR | 0,
635
		};
M
Miklos Szeredi 已提交
636 637 638 639 640 641

		if (work->d_inode) {
			err = -EEXIST;
			if (retried)
				goto out_dput;

642 643 644
			if (persist)
				goto out_unlock;

M
Miklos Szeredi 已提交
645
			retried = true;
M
Miklos Szeredi 已提交
646
			ovl_workdir_cleanup(dir, mnt, work, 0);
M
Miklos Szeredi 已提交
647 648 649 650
			dput(work);
			goto retry;
		}

651 652 653 654
		work = ovl_create_real(dir, work, OVL_CATTR(attr.ia_mode));
		err = PTR_ERR(work);
		if (IS_ERR(work))
			goto out_err;
655

656 657 658 659 660 661 662 663 664 665 666 667 668
		/*
		 * Try to remove POSIX ACL xattrs from workdir.  We are good if:
		 *
		 * a) success (there was a POSIX ACL xattr and was removed)
		 * b) -ENODATA (there was no POSIX ACL xattr)
		 * c) -EOPNOTSUPP (POSIX ACL xattrs are not supported)
		 *
		 * There are various other error values that could effectively
		 * mean that the xattr doesn't exist (e.g. -ERANGE is returned
		 * if the xattr name is too long), but the set of filesystems
		 * allowed as upper are limited to "normal" ones, where checking
		 * for the above two errors is sufficient.
		 */
669
		err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_DEFAULT);
M
Miklos Szeredi 已提交
670
		if (err && err != -ENODATA && err != -EOPNOTSUPP)
671 672 673
			goto out_dput;

		err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_ACCESS);
M
Miklos Szeredi 已提交
674
		if (err && err != -ENODATA && err != -EOPNOTSUPP)
675 676 677 678 679 680 681 682
			goto out_dput;

		/* Clear any inherited mode bits */
		inode_lock(work->d_inode);
		err = notify_change(work, &attr, NULL);
		inode_unlock(work->d_inode);
		if (err)
			goto out_dput;
683 684 685
	} else {
		err = PTR_ERR(work);
		goto out_err;
M
Miklos Szeredi 已提交
686 687
	}
out_unlock:
688 689
	if (locked)
		inode_unlock(dir);
M
Miklos Szeredi 已提交
690 691 692 693 694

	return work;

out_dput:
	dput(work);
695
out_err:
L
lijiazi 已提交
696
	pr_warn("failed to create directory %s/%s (errno: %i); mounting read-only\n",
M
Miklos Szeredi 已提交
697
		ofs->config.workdir, name, -err);
698
	work = NULL;
M
Miklos Szeredi 已提交
699 700 701
	goto out_unlock;
}

M
Miklos Szeredi 已提交
702 703 704 705 706 707 708 709 710 711 712 713 714
static void ovl_unescape(char *s)
{
	char *d = s;

	for (;; s++, d++) {
		if (*s == '\\')
			s++;
		*d = *s;
		if (!*s)
			break;
	}
}

M
Miklos Szeredi 已提交
715 716
static int ovl_mount_dir_noesc(const char *name, struct path *path)
{
717
	int err = -EINVAL;
M
Miklos Szeredi 已提交
718

719
	if (!*name) {
L
lijiazi 已提交
720
		pr_err("empty lowerdir\n");
721 722
		goto out;
	}
M
Miklos Szeredi 已提交
723 724
	err = kern_path(name, LOOKUP_FOLLOW, path);
	if (err) {
L
lijiazi 已提交
725
		pr_err("failed to resolve '%s': %i\n", name, err);
M
Miklos Szeredi 已提交
726 727 728
		goto out;
	}
	err = -EINVAL;
729
	if (ovl_dentry_weird(path->dentry)) {
L
lijiazi 已提交
730
		pr_err("filesystem on '%s' not supported\n", name);
M
Miklos Szeredi 已提交
731 732
		goto out_put;
	}
M
Miklos Szeredi 已提交
733
	if (!d_is_dir(path->dentry)) {
L
lijiazi 已提交
734
		pr_err("'%s' not a directory\n", name);
M
Miklos Szeredi 已提交
735 736 737 738 739
		goto out_put;
	}
	return 0;

out_put:
740
	path_put_init(path);
M
Miklos Szeredi 已提交
741 742 743 744 745 746 747 748 749 750 751 752
out:
	return err;
}

static int ovl_mount_dir(const char *name, struct path *path)
{
	int err = -ENOMEM;
	char *tmp = kstrdup(name, GFP_KERNEL);

	if (tmp) {
		ovl_unescape(tmp);
		err = ovl_mount_dir_noesc(tmp, path);
753 754 755

		if (!err)
			if (ovl_dentry_remote(path->dentry)) {
L
lijiazi 已提交
756
				pr_err("filesystem on '%s' not supported as upperdir\n",
757
				       tmp);
758
				path_put_init(path);
759 760
				err = -EINVAL;
			}
M
Miklos Szeredi 已提交
761 762 763 764 765
		kfree(tmp);
	}
	return err;
}

M
Miklos Szeredi 已提交
766 767
static int ovl_check_namelen(struct path *path, struct ovl_fs *ofs,
			     const char *name)
M
Miklos Szeredi 已提交
768 769
{
	struct kstatfs statfs;
M
Miklos Szeredi 已提交
770 771 772
	int err = vfs_statfs(path, &statfs);

	if (err)
L
lijiazi 已提交
773
		pr_err("statfs failed on '%s'\n", name);
M
Miklos Szeredi 已提交
774 775 776 777 778 779 780 781 782
	else
		ofs->namelen = max(ofs->namelen, statfs.f_namelen);

	return err;
}

static int ovl_lower_dir(const char *name, struct path *path,
			 struct ovl_fs *ofs, int *stack_depth, bool *remote)
{
783
	int fh_type;
M
Miklos Szeredi 已提交
784
	int err;
M
Miklos Szeredi 已提交
785

786
	err = ovl_mount_dir_noesc(name, path);
M
Miklos Szeredi 已提交
787 788 789
	if (err)
		goto out;

M
Miklos Szeredi 已提交
790 791
	err = ovl_check_namelen(path, ofs, name);
	if (err)
M
Miklos Szeredi 已提交
792
		goto out_put;
M
Miklos Szeredi 已提交
793

M
Miklos Szeredi 已提交
794 795
	*stack_depth = max(*stack_depth, path->mnt->mnt_sb->s_stack_depth);

796 797 798
	if (ovl_dentry_remote(path->dentry))
		*remote = true;

799
	/*
800 801
	 * The inodes index feature and NFS export need to encode and decode
	 * file handles, so they require that all layers support them.
802
	 */
803
	fh_type = ovl_can_decode_fh(path->dentry->d_sb);
804
	if ((ofs->config.nfs_export ||
805
	     (ofs->config.index && ofs->config.upperdir)) && !fh_type) {
806
		ofs->config.index = false;
807
		ofs->config.nfs_export = false;
L
lijiazi 已提交
808
		pr_warn("fs on '%s' does not support file handles, falling back to index=off,nfs_export=off.\n",
809
			name);
810 811
	}

812 813
	/* Check if lower fs has 32bit inode numbers */
	if (fh_type != FILEID_INO32_GEN)
814
		ofs->xino_mode = -1;
815

M
Miklos Szeredi 已提交
816 817 818
	return 0;

out_put:
819
	path_put_init(path);
M
Miklos Szeredi 已提交
820 821 822 823
out:
	return err;
}

M
Miklos Szeredi 已提交
824 825 826 827 828 829 830 831 832 833 834 835
/* Workdir should not be subdir of upperdir and vice versa */
static bool ovl_workdir_ok(struct dentry *workdir, struct dentry *upperdir)
{
	bool ok = false;

	if (workdir != upperdir) {
		ok = (lock_rename(workdir, upperdir) == NULL);
		unlock_rename(workdir, upperdir);
	}
	return ok;
}

836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855
static unsigned int ovl_split_lowerdirs(char *str)
{
	unsigned int ctr = 1;
	char *s, *d;

	for (s = d = str;; s++, d++) {
		if (*s == '\\') {
			s++;
		} else if (*s == ':') {
			*d = '\0';
			ctr++;
			continue;
		}
		*d = *s;
		if (!*s)
			break;
	}
	return ctr;
}

856 857 858 859 860
static int __maybe_unused
ovl_posix_acl_xattr_get(const struct xattr_handler *handler,
			struct dentry *dentry, struct inode *inode,
			const char *name, void *buffer, size_t size)
{
861
	return ovl_xattr_get(dentry, inode, handler->name, buffer, size);
862 863
}

864 865 866 867 868
static int __maybe_unused
ovl_posix_acl_xattr_set(const struct xattr_handler *handler,
			struct dentry *dentry, struct inode *inode,
			const char *name, const void *value,
			size_t size, int flags)
M
Miklos Szeredi 已提交
869 870
{
	struct dentry *workdir = ovl_workdir(dentry);
871
	struct inode *realinode = ovl_inode_real(inode);
M
Miklos Szeredi 已提交
872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895
	struct posix_acl *acl = NULL;
	int err;

	/* Check that everything is OK before copy-up */
	if (value) {
		acl = posix_acl_from_xattr(&init_user_ns, value, size);
		if (IS_ERR(acl))
			return PTR_ERR(acl);
	}
	err = -EOPNOTSUPP;
	if (!IS_POSIXACL(d_inode(workdir)))
		goto out_acl_release;
	if (!realinode->i_op->set_acl)
		goto out_acl_release;
	if (handler->flags == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) {
		err = acl ? -EACCES : 0;
		goto out_acl_release;
	}
	err = -EPERM;
	if (!inode_owner_or_capable(inode))
		goto out_acl_release;

	posix_acl_release(acl);

896 897 898 899 900 901 902 903 904 905 906 907 908 909 910
	/*
	 * Check if sgid bit needs to be cleared (actual setacl operation will
	 * be done with mounter's capabilities and so that won't do it for us).
	 */
	if (unlikely(inode->i_mode & S_ISGID) &&
	    handler->flags == ACL_TYPE_ACCESS &&
	    !in_group_p(inode->i_gid) &&
	    !capable_wrt_inode_uidgid(inode, CAP_FSETID)) {
		struct iattr iattr = { .ia_valid = ATTR_KILL_SGID };

		err = ovl_setattr(dentry, &iattr);
		if (err)
			return err;
	}

911
	err = ovl_xattr_set(dentry, inode, handler->name, value, size, flags);
912
	if (!err)
913
		ovl_copyattr(ovl_inode_real(inode), inode);
914 915

	return err;
M
Miklos Szeredi 已提交
916 917 918 919 920 921

out_acl_release:
	posix_acl_release(acl);
	return err;
}

922 923 924 925
static int ovl_own_xattr_get(const struct xattr_handler *handler,
			     struct dentry *dentry, struct inode *inode,
			     const char *name, void *buffer, size_t size)
{
A
Amir Goldstein 已提交
926
	return -EOPNOTSUPP;
927 928
}

M
Miklos Szeredi 已提交
929 930 931 932 933
static int ovl_own_xattr_set(const struct xattr_handler *handler,
			     struct dentry *dentry, struct inode *inode,
			     const char *name, const void *value,
			     size_t size, int flags)
{
A
Amir Goldstein 已提交
934
	return -EOPNOTSUPP;
M
Miklos Szeredi 已提交
935 936
}

937 938 939 940
static int ovl_other_xattr_get(const struct xattr_handler *handler,
			       struct dentry *dentry, struct inode *inode,
			       const char *name, void *buffer, size_t size)
{
941
	return ovl_xattr_get(dentry, inode, name, buffer, size);
942 943
}

944 945 946 947 948
static int ovl_other_xattr_set(const struct xattr_handler *handler,
			       struct dentry *dentry, struct inode *inode,
			       const char *name, const void *value,
			       size_t size, int flags)
{
949
	return ovl_xattr_set(dentry, inode, name, value, size, flags);
950 951
}

952 953
static const struct xattr_handler __maybe_unused
ovl_posix_acl_access_xattr_handler = {
M
Miklos Szeredi 已提交
954 955
	.name = XATTR_NAME_POSIX_ACL_ACCESS,
	.flags = ACL_TYPE_ACCESS,
956
	.get = ovl_posix_acl_xattr_get,
M
Miklos Szeredi 已提交
957 958 959
	.set = ovl_posix_acl_xattr_set,
};

960 961
static const struct xattr_handler __maybe_unused
ovl_posix_acl_default_xattr_handler = {
M
Miklos Szeredi 已提交
962 963
	.name = XATTR_NAME_POSIX_ACL_DEFAULT,
	.flags = ACL_TYPE_DEFAULT,
964
	.get = ovl_posix_acl_xattr_get,
M
Miklos Szeredi 已提交
965 966 967 968 969
	.set = ovl_posix_acl_xattr_set,
};

static const struct xattr_handler ovl_own_xattr_handler = {
	.prefix	= OVL_XATTR_PREFIX,
970
	.get = ovl_own_xattr_get,
M
Miklos Szeredi 已提交
971 972 973 974 975
	.set = ovl_own_xattr_set,
};

static const struct xattr_handler ovl_other_xattr_handler = {
	.prefix	= "", /* catch all */
976
	.get = ovl_other_xattr_get,
M
Miklos Szeredi 已提交
977 978 979 980
	.set = ovl_other_xattr_set,
};

static const struct xattr_handler *ovl_xattr_handlers[] = {
981
#ifdef CONFIG_FS_POSIX_ACL
M
Miklos Szeredi 已提交
982 983
	&ovl_posix_acl_access_xattr_handler,
	&ovl_posix_acl_default_xattr_handler,
984
#endif
M
Miklos Szeredi 已提交
985 986 987 988 989
	&ovl_own_xattr_handler,
	&ovl_other_xattr_handler,
	NULL
};

A
Amir Goldstein 已提交
990 991 992 993 994 995 996
static int ovl_setup_trap(struct super_block *sb, struct dentry *dir,
			  struct inode **ptrap, const char *name)
{
	struct inode *trap;
	int err;

	trap = ovl_get_trap_inode(sb, dir);
997 998
	err = PTR_ERR_OR_ZERO(trap);
	if (err) {
A
Amir Goldstein 已提交
999
		if (err == -ELOOP)
L
lijiazi 已提交
1000
			pr_err("conflicting %s path\n", name);
A
Amir Goldstein 已提交
1001 1002 1003 1004 1005 1006 1007
		return err;
	}

	*ptrap = trap;
	return 0;
}

1008 1009 1010 1011 1012 1013 1014 1015 1016
/*
 * Determine how we treat concurrent use of upperdir/workdir based on the
 * index feature. This is papering over mount leaks of container runtimes,
 * for example, an old overlay mount is leaked and now its upperdir is
 * attempted to be used as a lower layer in a new overlay mount.
 */
static int ovl_report_in_use(struct ovl_fs *ofs, const char *name)
{
	if (ofs->config.index) {
L
lijiazi 已提交
1017
		pr_err("%s is in-use as upperdir/workdir of another mount, mount with '-o index=off' to override exclusive upperdir protection.\n",
1018 1019 1020
		       name);
		return -EBUSY;
	} else {
L
lijiazi 已提交
1021
		pr_warn("%s is in-use as upperdir/workdir of another mount, accessing files from both mounts will result in undefined behavior.\n",
1022 1023 1024 1025 1026
			name);
		return 0;
	}
}

A
Amir Goldstein 已提交
1027 1028
static int ovl_get_upper(struct super_block *sb, struct ovl_fs *ofs,
			 struct path *upperpath)
1029
{
M
Miklos Szeredi 已提交
1030
	struct vfsmount *upper_mnt;
1031 1032
	int err;

M
Miklos Szeredi 已提交
1033
	err = ovl_mount_dir(ofs->config.upperdir, upperpath);
1034 1035 1036 1037 1038
	if (err)
		goto out;

	/* Upper fs should not be r/o */
	if (sb_rdonly(upperpath->mnt->mnt_sb)) {
L
lijiazi 已提交
1039
		pr_err("upper fs is r/o, try multi-lower layers mount\n");
1040 1041 1042 1043
		err = -EINVAL;
		goto out;
	}

M
Miklos Szeredi 已提交
1044
	err = ovl_check_namelen(upperpath, ofs, ofs->config.upperdir);
1045 1046 1047
	if (err)
		goto out;

A
Amir Goldstein 已提交
1048 1049 1050 1051 1052
	err = ovl_setup_trap(sb, upperpath->dentry, &ofs->upperdir_trap,
			     "upperdir");
	if (err)
		goto out;

M
Miklos Szeredi 已提交
1053 1054 1055
	upper_mnt = clone_private_mount(upperpath);
	err = PTR_ERR(upper_mnt);
	if (IS_ERR(upper_mnt)) {
L
lijiazi 已提交
1056
		pr_err("failed to clone upperpath\n");
M
Miklos Szeredi 已提交
1057 1058 1059 1060 1061
		goto out;
	}

	/* Don't inherit atime flags */
	upper_mnt->mnt_flags &= ~(MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME);
M
Miklos Szeredi 已提交
1062
	ofs->upper_mnt = upper_mnt;
1063 1064 1065 1066

	if (ovl_inuse_trylock(ofs->upper_mnt->mnt_root)) {
		ofs->upperdir_locked = true;
	} else {
1067 1068 1069
		err = ovl_report_in_use(ofs, "upperdir");
		if (err)
			goto out;
1070 1071
	}

1072 1073 1074 1075 1076
	err = 0;
out:
	return err;
}

A
Amir Goldstein 已提交
1077 1078
static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs,
			    struct path *workpath)
1079
{
1080
	struct vfsmount *mnt = ofs->upper_mnt;
1081
	struct dentry *temp;
1082
	int fh_type;
1083 1084
	int err;

1085 1086 1087 1088
	err = mnt_want_write(mnt);
	if (err)
		return err;

M
Miklos Szeredi 已提交
1089 1090
	ofs->workdir = ovl_workdir_create(ofs, OVL_WORKDIR_NAME, false);
	if (!ofs->workdir)
1091
		goto out;
1092

A
Amir Goldstein 已提交
1093 1094 1095 1096
	err = ovl_setup_trap(sb, ofs->workdir, &ofs->workdir_trap, "workdir");
	if (err)
		goto out;

1097 1098 1099 1100 1101 1102 1103 1104
	/*
	 * Upper should support d_type, else whiteouts are visible.  Given
	 * workdir and upper are on same fs, we can do iterate_dir() on
	 * workdir. This check requires successful creation of workdir in
	 * previous step.
	 */
	err = ovl_check_d_type_supported(workpath);
	if (err < 0)
1105
		goto out;
1106 1107 1108 1109 1110 1111

	/*
	 * We allowed this configuration and don't want to break users over
	 * kernel upgrade. So warn instead of erroring out.
	 */
	if (!err)
L
lijiazi 已提交
1112
		pr_warn("upper fs needs to support d_type.\n");
1113 1114

	/* Check if upper/work fs supports O_TMPFILE */
M
Miklos Szeredi 已提交
1115 1116 1117
	temp = ovl_do_tmpfile(ofs->workdir, S_IFREG | 0);
	ofs->tmpfile = !IS_ERR(temp);
	if (ofs->tmpfile)
1118 1119
		dput(temp);
	else
L
lijiazi 已提交
1120
		pr_warn("upper fs does not support tmpfile.\n");
1121 1122 1123 1124

	/*
	 * Check if upper/work fs supports trusted.overlay.* xattr
	 */
M
Miklos Szeredi 已提交
1125
	err = ovl_do_setxattr(ofs->workdir, OVL_XATTR_OPAQUE, "0", 1, 0);
1126
	if (err) {
M
Miklos Szeredi 已提交
1127
		ofs->noxattr = true;
1128
		ofs->config.index = false;
1129
		ofs->config.metacopy = false;
L
lijiazi 已提交
1130
		pr_warn("upper fs does not support xattr, falling back to index=off and metacopy=off.\n");
1131
		err = 0;
1132
	} else {
M
Miklos Szeredi 已提交
1133
		vfs_removexattr(ofs->workdir, OVL_XATTR_OPAQUE);
1134 1135 1136
	}

	/* Check if upper/work fs supports file handles */
1137 1138
	fh_type = ovl_can_decode_fh(ofs->workdir->d_sb);
	if (ofs->config.index && !fh_type) {
M
Miklos Szeredi 已提交
1139
		ofs->config.index = false;
L
lijiazi 已提交
1140
		pr_warn("upper fs does not support file handles, falling back to index=off.\n");
1141 1142
	}

1143 1144
	/* Check if upper fs has 32bit inode numbers */
	if (fh_type != FILEID_INO32_GEN)
1145
		ofs->xino_mode = -1;
1146

1147 1148
	/* NFS export of r/w mount depends on index */
	if (ofs->config.nfs_export && !ofs->config.index) {
L
lijiazi 已提交
1149
		pr_warn("NFS export requires \"index=on\", falling back to nfs_export=off.\n");
1150 1151
		ofs->config.nfs_export = false;
	}
1152 1153 1154
out:
	mnt_drop_write(mnt);
	return err;
1155 1156
}

A
Amir Goldstein 已提交
1157 1158
static int ovl_get_workdir(struct super_block *sb, struct ovl_fs *ofs,
			   struct path *upperpath)
1159 1160
{
	int err;
M
Miklos Szeredi 已提交
1161
	struct path workpath = { };
1162

M
Miklos Szeredi 已提交
1163
	err = ovl_mount_dir(ofs->config.workdir, &workpath);
1164 1165 1166 1167
	if (err)
		goto out;

	err = -EINVAL;
M
Miklos Szeredi 已提交
1168
	if (upperpath->mnt != workpath.mnt) {
L
lijiazi 已提交
1169
		pr_err("workdir and upperdir must reside under the same mount\n");
1170 1171
		goto out;
	}
M
Miklos Szeredi 已提交
1172
	if (!ovl_workdir_ok(workpath.dentry, upperpath->dentry)) {
L
lijiazi 已提交
1173
		pr_err("workdir and upperdir must be separate subtrees\n");
1174 1175 1176
		goto out;
	}

1177 1178 1179
	ofs->workbasedir = dget(workpath.dentry);

	if (ovl_inuse_trylock(ofs->workbasedir)) {
M
Miklos Szeredi 已提交
1180
		ofs->workdir_locked = true;
1181
	} else {
1182 1183 1184
		err = ovl_report_in_use(ofs, "workdir");
		if (err)
			goto out;
1185 1186
	}

1187 1188 1189 1190 1191
	err = ovl_setup_trap(sb, ofs->workbasedir, &ofs->workbasedir_trap,
			     "workdir");
	if (err)
		goto out;

A
Amir Goldstein 已提交
1192
	err = ovl_make_workdir(sb, ofs, &workpath);
M
Miklos Szeredi 已提交
1193

1194
out:
M
Miklos Szeredi 已提交
1195 1196
	path_put(&workpath);

1197 1198 1199
	return err;
}

A
Amir Goldstein 已提交
1200 1201
static int ovl_get_indexdir(struct super_block *sb, struct ovl_fs *ofs,
			    struct ovl_entry *oe, struct path *upperpath)
1202
{
1203
	struct vfsmount *mnt = ofs->upper_mnt;
1204 1205
	int err;

1206 1207 1208 1209
	err = mnt_want_write(mnt);
	if (err)
		return err;

1210
	/* Verify lower root is upper root origin */
1211
	err = ovl_verify_origin(upperpath->dentry, oe->lowerstack[0].dentry,
1212
				true);
1213
	if (err) {
L
lijiazi 已提交
1214
		pr_err("failed to verify upper root origin\n");
1215 1216 1217
		goto out;
	}

M
Miklos Szeredi 已提交
1218 1219
	ofs->indexdir = ovl_workdir_create(ofs, OVL_INDEXDIR_NAME, true);
	if (ofs->indexdir) {
A
Amir Goldstein 已提交
1220 1221 1222 1223 1224
		err = ovl_setup_trap(sb, ofs->indexdir, &ofs->indexdir_trap,
				     "indexdir");
		if (err)
			goto out;

1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236
		/*
		 * Verify upper root is exclusively associated with index dir.
		 * Older kernels stored upper fh in "trusted.overlay.origin"
		 * xattr. If that xattr exists, verify that it is a match to
		 * upper dir file handle. In any case, verify or set xattr
		 * "trusted.overlay.upper" to indicate that index may have
		 * directory entries.
		 */
		if (ovl_check_origin_xattr(ofs->indexdir)) {
			err = ovl_verify_set_fh(ofs->indexdir, OVL_XATTR_ORIGIN,
						upperpath->dentry, true, false);
			if (err)
L
lijiazi 已提交
1237
				pr_err("failed to verify index dir 'origin' xattr\n");
1238 1239
		}
		err = ovl_verify_upper(ofs->indexdir, upperpath->dentry, true);
1240
		if (err)
L
lijiazi 已提交
1241
			pr_err("failed to verify index dir 'upper' xattr\n");
1242 1243 1244

		/* Cleanup bad/stale/orphan index entries */
		if (!err)
1245
			err = ovl_indexdir_cleanup(ofs);
1246
	}
M
Miklos Szeredi 已提交
1247
	if (err || !ofs->indexdir)
L
lijiazi 已提交
1248
		pr_warn("try deleting index dir or mounting with '-o index=off' to disable inodes index.\n");
1249 1250

out:
1251
	mnt_drop_write(mnt);
1252 1253 1254
	return err;
}

1255 1256 1257 1258
static bool ovl_lower_uuid_ok(struct ovl_fs *ofs, const uuid_t *uuid)
{
	unsigned int i;

1259
	if (!ofs->config.nfs_export && !ofs->upper_mnt)
1260 1261
		return true;

1262
	for (i = 0; i < ofs->numfs; i++) {
1263 1264 1265 1266
		/*
		 * We use uuid to associate an overlay lower file handle with a
		 * lower layer, so we can accept lower fs with null uuid as long
		 * as all lower layers with null uuid are on the same fs.
1267 1268
		 * if we detect multiple lower fs with the same uuid, we
		 * disable lower file handle decoding on all of them.
1269
		 */
1270 1271
		if (ofs->fs[i].is_lower &&
		    uuid_equal(&ofs->fs[i].sb->s_uuid, uuid)) {
1272
			ofs->fs[i].bad_uuid = true;
1273
			return false;
1274
		}
1275 1276 1277 1278
	}
	return true;
}

1279
/* Get a unique fsid for the layer */
1280
static int ovl_get_fsid(struct ovl_fs *ofs, const struct path *path)
1281
{
1282
	struct super_block *sb = path->mnt->mnt_sb;
1283 1284 1285
	unsigned int i;
	dev_t dev;
	int err;
1286
	bool bad_uuid = false;
1287

1288 1289 1290
	for (i = 0; i < ofs->numfs; i++) {
		if (ofs->fs[i].sb == sb)
			return i;
1291 1292
	}

1293
	if (!ovl_lower_uuid_ok(ofs, &sb->s_uuid)) {
1294 1295 1296 1297
		bad_uuid = true;
		if (ofs->config.index || ofs->config.nfs_export) {
			ofs->config.index = false;
			ofs->config.nfs_export = false;
L
lijiazi 已提交
1298
			pr_warn("%s uuid detected in lower fs '%pd2', falling back to index=off,nfs_export=off.\n",
1299 1300 1301 1302
				uuid_is_null(&sb->s_uuid) ? "null" :
							    "conflicting",
				path->dentry);
		}
1303 1304
	}

1305 1306
	err = get_anon_bdev(&dev);
	if (err) {
L
lijiazi 已提交
1307
		pr_err("failed to get anonymous bdev for lowerpath\n");
1308 1309 1310
		return err;
	}

1311 1312 1313
	ofs->fs[ofs->numfs].sb = sb;
	ofs->fs[ofs->numfs].pseudo_dev = dev;
	ofs->fs[ofs->numfs].bad_uuid = bad_uuid;
1314

1315
	return ofs->numfs++;
1316 1317
}

1318 1319
static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs,
			  struct path *stack, unsigned int numlower)
1320 1321 1322
{
	int err;
	unsigned int i;
M
Miklos Szeredi 已提交
1323
	struct ovl_layer *layers;
1324 1325

	err = -ENOMEM;
M
Miklos Szeredi 已提交
1326 1327
	layers = kcalloc(numlower + 1, sizeof(struct ovl_layer), GFP_KERNEL);
	if (!layers)
1328
		goto out;
M
Miklos Szeredi 已提交
1329
	ofs->layers = layers;
1330

1331 1332
	ofs->fs = kcalloc(numlower + 1, sizeof(struct ovl_sb), GFP_KERNEL);
	if (ofs->fs == NULL)
1333 1334
		goto out;

1335 1336 1337
	/* idx/fsid 0 are reserved for upper fs even with lower only overlay */
	ofs->numfs++;

M
Miklos Szeredi 已提交
1338 1339 1340
	layers[0].mnt = ofs->upper_mnt;
	layers[0].idx = 0;
	layers[0].fsid = 0;
1341 1342
	ofs->numlayer = 1;

1343
	/*
1344 1345 1346
	 * All lower layers that share the same fs as upper layer, use the same
	 * pseudo_dev as upper layer.  Allocate fs[0].pseudo_dev even for lower
	 * only overlay to simplify ovl_fs_free().
1347
	 * is_lower will be set if upper fs is shared with a lower layer.
1348
	 */
1349 1350 1351 1352 1353 1354
	err = get_anon_bdev(&ofs->fs[0].pseudo_dev);
	if (err) {
		pr_err("failed to get anonymous bdev for upper fs\n");
		goto out;
	}

1355 1356
	if (ofs->upper_mnt) {
		ofs->fs[0].sb = ofs->upper_mnt->mnt_sb;
1357
		ofs->fs[0].is_lower = false;
1358 1359
	}

1360 1361
	for (i = 0; i < numlower; i++) {
		struct vfsmount *mnt;
A
Amir Goldstein 已提交
1362
		struct inode *trap;
1363
		int fsid;
1364

1365
		err = fsid = ovl_get_fsid(ofs, &stack[i]);
1366
		if (err < 0)
1367 1368
			goto out;

A
Amir Goldstein 已提交
1369 1370 1371 1372
		err = ovl_setup_trap(sb, stack[i].dentry, &trap, "lowerdir");
		if (err)
			goto out;

1373 1374 1375 1376 1377 1378
		if (ovl_is_inuse(stack[i].dentry)) {
			err = ovl_report_in_use(ofs, "lowerdir");
			if (err)
				goto out;
		}

1379 1380 1381
		mnt = clone_private_mount(&stack[i]);
		err = PTR_ERR(mnt);
		if (IS_ERR(mnt)) {
L
lijiazi 已提交
1382
			pr_err("failed to clone lowerpath\n");
A
Amir Goldstein 已提交
1383
			iput(trap);
1384 1385
			goto out;
		}
1386

1387 1388 1389 1390 1391 1392
		/*
		 * Make lower layers R/O.  That way fchmod/fchown on lower file
		 * will fail instead of modifying lower fs.
		 */
		mnt->mnt_flags |= MNT_READONLY | MNT_NOATIME;

M
Miklos Szeredi 已提交
1393 1394 1395 1396 1397
		layers[ofs->numlayer].trap = trap;
		layers[ofs->numlayer].mnt = mnt;
		layers[ofs->numlayer].idx = ofs->numlayer;
		layers[ofs->numlayer].fsid = fsid;
		layers[ofs->numlayer].fs = &ofs->fs[fsid];
1398
		ofs->numlayer++;
1399
		ofs->fs[fsid].is_lower = true;
1400
	}
1401

1402 1403 1404 1405 1406 1407 1408 1409
	/*
	 * When all layers on same fs, overlay can use real inode numbers.
	 * With mount option "xino=on", mounter declares that there are enough
	 * free high bits in underlying fs to hold the unique fsid.
	 * If overlayfs does encounter underlying inodes using the high xino
	 * bits reserved for fsid, it emits a warning and uses the original
	 * inode number.
	 */
1410
	if (ofs->numfs - !ofs->upper_mnt == 1) {
1411 1412 1413 1414
		if (ofs->config.xino == OVL_XINO_ON)
			pr_info("\"xino=on\" is useless with all layers on same fs, ignore.\n");
		ofs->xino_mode = 0;
	} else if (ofs->config.xino == OVL_XINO_ON && ofs->xino_mode < 0) {
1415
		/*
1416 1417 1418
		 * This is a roundup of number of bits needed for encoding
		 * fsid, where fsid 0 is reserved for upper fs even with
		 * lower only overlay.
1419 1420
		 */
		BUILD_BUG_ON(ilog2(OVL_MAX_STACK) > 31);
1421
		ofs->xino_mode = ilog2(ofs->numfs - 1) + 1;
1422 1423
	}

1424
	if (ofs->xino_mode > 0) {
L
lijiazi 已提交
1425
		pr_info("\"xino\" feature enabled using %d upper inode bits.\n",
1426
			ofs->xino_mode);
1427
	}
1428

1429 1430 1431 1432 1433
	err = 0;
out:
	return err;
}

1434
static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb,
M
Miklos Szeredi 已提交
1435
					    struct ovl_fs *ofs)
1436 1437 1438
{
	int err;
	char *lowertmp, *lower;
1439 1440
	struct path *stack = NULL;
	unsigned int stacklen, numlower = 0, i;
1441
	bool remote = false;
1442
	struct ovl_entry *oe;
1443 1444

	err = -ENOMEM;
M
Miklos Szeredi 已提交
1445
	lowertmp = kstrdup(ofs->config.lowerdir, GFP_KERNEL);
1446
	if (!lowertmp)
1447
		goto out_err;
1448 1449 1450 1451

	err = -EINVAL;
	stacklen = ovl_split_lowerdirs(lowertmp);
	if (stacklen > OVL_MAX_STACK) {
L
lijiazi 已提交
1452
		pr_err("too many lower directories, limit is %d\n",
1453
		       OVL_MAX_STACK);
1454
		goto out_err;
M
Miklos Szeredi 已提交
1455
	} else if (!ofs->config.upperdir && stacklen == 1) {
L
lijiazi 已提交
1456
		pr_err("at least 2 lowerdir are needed while upperdir nonexistent\n");
1457
		goto out_err;
1458 1459
	} else if (!ofs->config.upperdir && ofs->config.nfs_export &&
		   ofs->config.redirect_follow) {
L
lijiazi 已提交
1460
		pr_warn("NFS export requires \"redirect_dir=nofollow\" on non-upper mount, falling back to nfs_export=off.\n");
1461
		ofs->config.nfs_export = false;
1462 1463 1464 1465 1466
	}

	err = -ENOMEM;
	stack = kcalloc(stacklen, sizeof(struct path), GFP_KERNEL);
	if (!stack)
1467
		goto out_err;
1468 1469 1470 1471

	err = -EINVAL;
	lower = lowertmp;
	for (numlower = 0; numlower < stacklen; numlower++) {
M
Miklos Szeredi 已提交
1472
		err = ovl_lower_dir(lower, &stack[numlower], ofs,
1473 1474
				    &sb->s_stack_depth, &remote);
		if (err)
1475
			goto out_err;
1476 1477 1478 1479 1480 1481 1482

		lower = strchr(lower, '\0') + 1;
	}

	err = -EINVAL;
	sb->s_stack_depth++;
	if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
L
lijiazi 已提交
1483
		pr_err("maximum fs stacking depth exceeded\n");
1484
		goto out_err;
1485 1486
	}

1487
	err = ovl_get_layers(sb, ofs, stack, numlower);
1488 1489 1490 1491 1492 1493 1494 1495 1496 1497
	if (err)
		goto out_err;

	err = -ENOMEM;
	oe = ovl_alloc_entry(numlower);
	if (!oe)
		goto out_err;

	for (i = 0; i < numlower; i++) {
		oe->lowerstack[i].dentry = dget(stack[i].dentry);
1498
		oe->lowerstack[i].layer = &ofs->layers[i+1];
1499
	}
1500 1501 1502 1503 1504 1505 1506 1507 1508 1509

	if (remote)
		sb->s_d_op = &ovl_reval_dentry_operations;
	else
		sb->s_d_op = &ovl_dentry_operations;

out:
	for (i = 0; i < numlower; i++)
		path_put(&stack[i]);
	kfree(stack);
1510 1511 1512 1513 1514 1515
	kfree(lowertmp);

	return oe;

out_err:
	oe = ERR_PTR(err);
1516 1517 1518
	goto out;
}

A
Amir Goldstein 已提交
1519 1520 1521 1522 1523
/*
 * Check if this layer root is a descendant of:
 * - another layer of this overlayfs instance
 * - upper/work dir of any overlayfs instance
 */
1524 1525
static int ovl_check_layer(struct super_block *sb, struct ovl_fs *ofs,
			   struct dentry *dentry, const char *name)
A
Amir Goldstein 已提交
1526
{
1527
	struct dentry *next = dentry, *parent;
A
Amir Goldstein 已提交
1528 1529
	int err = 0;

1530
	if (!dentry)
A
Amir Goldstein 已提交
1531 1532
		return 0;

1533 1534 1535 1536
	parent = dget_parent(next);

	/* Walk back ancestors to root (inclusive) looking for traps */
	while (!err && parent != next) {
1537
		if (ovl_lookup_trap_inode(sb, parent)) {
A
Amir Goldstein 已提交
1538
			err = -ELOOP;
L
lijiazi 已提交
1539
			pr_err("overlapping %s path\n", name);
1540 1541
		} else if (ovl_is_inuse(parent)) {
			err = ovl_report_in_use(ofs, name);
A
Amir Goldstein 已提交
1542 1543
		}
		next = parent;
1544 1545
		parent = dget_parent(next);
		dput(next);
A
Amir Goldstein 已提交
1546 1547
	}

1548
	dput(parent);
A
Amir Goldstein 已提交
1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561

	return err;
}

/*
 * Check if any of the layers or work dirs overlap.
 */
static int ovl_check_overlapping_layers(struct super_block *sb,
					struct ovl_fs *ofs)
{
	int i, err;

	if (ofs->upper_mnt) {
1562 1563
		err = ovl_check_layer(sb, ofs, ofs->upper_mnt->mnt_root,
				      "upperdir");
A
Amir Goldstein 已提交
1564 1565 1566 1567 1568 1569 1570 1571 1572 1573
		if (err)
			return err;

		/*
		 * Checking workbasedir avoids hitting ovl_is_inuse(parent) of
		 * this instance and covers overlapping work and index dirs,
		 * unless work or index dir have been moved since created inside
		 * workbasedir.  In that case, we already have their traps in
		 * inode cache and we will catch that case on lookup.
		 */
1574
		err = ovl_check_layer(sb, ofs, ofs->workbasedir, "workdir");
A
Amir Goldstein 已提交
1575 1576 1577 1578
		if (err)
			return err;
	}

1579
	for (i = 1; i < ofs->numlayer; i++) {
1580
		err = ovl_check_layer(sb, ofs,
1581
				      ofs->layers[i].mnt->mnt_root,
A
Amir Goldstein 已提交
1582 1583 1584 1585 1586 1587 1588 1589
				      "lowerdir");
		if (err)
			return err;
	}

	return 0;
}

M
Miklos Szeredi 已提交
1590 1591
static int ovl_fill_super(struct super_block *sb, void *data, int silent)
{
K
Kees Cook 已提交
1592
	struct path upperpath = { };
M
Miklos Szeredi 已提交
1593
	struct dentry *root_dentry;
1594
	struct ovl_entry *oe;
M
Miklos Szeredi 已提交
1595
	struct ovl_fs *ofs;
1596
	struct cred *cred;
M
Miklos Szeredi 已提交
1597 1598
	int err;

E
Erez Zadok 已提交
1599
	err = -ENOMEM;
M
Miklos Szeredi 已提交
1600 1601
	ofs = kzalloc(sizeof(struct ovl_fs), GFP_KERNEL);
	if (!ofs)
M
Miklos Szeredi 已提交
1602 1603
		goto out;

M
Miklos Szeredi 已提交
1604
	ofs->creator_cred = cred = prepare_creds();
1605 1606 1607
	if (!cred)
		goto out_err;

M
Miklos Szeredi 已提交
1608
	ofs->config.index = ovl_index_def;
1609
	ofs->config.nfs_export = ovl_nfs_export_def;
1610
	ofs->config.xino = ovl_xino_def();
1611
	ofs->config.metacopy = ovl_metacopy_def;
M
Miklos Szeredi 已提交
1612
	err = ovl_parse_opt((char *) data, &ofs->config);
E
Erez Zadok 已提交
1613
	if (err)
1614
		goto out_err;
E
Erez Zadok 已提交
1615

M
Miklos Szeredi 已提交
1616
	err = -EINVAL;
M
Miklos Szeredi 已提交
1617
	if (!ofs->config.lowerdir) {
1618
		if (!silent)
L
lijiazi 已提交
1619
			pr_err("missing 'lowerdir'\n");
1620
		goto out_err;
M
Miklos Szeredi 已提交
1621 1622
	}

M
Miklos Szeredi 已提交
1623
	sb->s_stack_depth = 0;
1624
	sb->s_maxbytes = MAX_LFS_FILESIZE;
1625
	/* Assume underlaying fs uses 32bit inodes unless proven otherwise */
1626
	if (ofs->config.xino != OVL_XINO_OFF)
1627
		ofs->xino_mode = BITS_PER_LONG - 32;
1628

A
Amir Goldstein 已提交
1629 1630 1631
	/* alloc/destroy_inode needed for setting up traps in inode cache */
	sb->s_op = &ovl_super_operations;

M
Miklos Szeredi 已提交
1632 1633
	if (ofs->config.upperdir) {
		if (!ofs->config.workdir) {
L
lijiazi 已提交
1634
			pr_err("missing 'workdir'\n");
1635
			goto out_err;
M
Miklos Szeredi 已提交
1636
		}
M
Miklos Szeredi 已提交
1637

A
Amir Goldstein 已提交
1638
		err = ovl_get_upper(sb, ofs, &upperpath);
M
Miklos Szeredi 已提交
1639
		if (err)
1640
			goto out_err;
1641

A
Amir Goldstein 已提交
1642
		err = ovl_get_workdir(sb, ofs, &upperpath);
1643
		if (err)
1644
			goto out_err;
1645

M
Miklos Szeredi 已提交
1646
		if (!ofs->workdir)
1647
			sb->s_flags |= SB_RDONLY;
1648

M
Miklos Szeredi 已提交
1649 1650
		sb->s_stack_depth = ofs->upper_mnt->mnt_sb->s_stack_depth;
		sb->s_time_gran = ofs->upper_mnt->mnt_sb->s_time_gran;
1651

M
Miklos Szeredi 已提交
1652
	}
M
Miklos Szeredi 已提交
1653
	oe = ovl_get_lowerstack(sb, ofs);
1654 1655
	err = PTR_ERR(oe);
	if (IS_ERR(oe))
1656
		goto out_err;
M
Miklos Szeredi 已提交
1657

H
hujianyang 已提交
1658
	/* If the upper fs is nonexistent, we mark overlayfs r/o too */
M
Miklos Szeredi 已提交
1659
	if (!ofs->upper_mnt)
1660
		sb->s_flags |= SB_RDONLY;
M
Miklos Szeredi 已提交
1661

M
Miklos Szeredi 已提交
1662
	if (!(ovl_force_readonly(ofs)) && ofs->config.index) {
A
Amir Goldstein 已提交
1663
		err = ovl_get_indexdir(sb, ofs, oe, &upperpath);
1664
		if (err)
1665
			goto out_free_oe;
1666

1667 1668 1669 1670
		/* Force r/o mount with no index dir */
		if (!ofs->indexdir) {
			dput(ofs->workdir);
			ofs->workdir = NULL;
1671
			sb->s_flags |= SB_RDONLY;
1672 1673
		}

1674 1675
	}

A
Amir Goldstein 已提交
1676 1677 1678 1679
	err = ovl_check_overlapping_layers(sb, ofs);
	if (err)
		goto out_free_oe;

1680
	/* Show index=off in /proc/mounts for forced r/o mount */
1681
	if (!ofs->indexdir) {
M
Miklos Szeredi 已提交
1682
		ofs->config.index = false;
1683
		if (ofs->upper_mnt && ofs->config.nfs_export) {
L
lijiazi 已提交
1684
			pr_warn("NFS export requires an index dir, falling back to nfs_export=off.\n");
1685 1686 1687
			ofs->config.nfs_export = false;
		}
	}
1688

1689
	if (ofs->config.metacopy && ofs->config.nfs_export) {
L
lijiazi 已提交
1690
		pr_warn("NFS export is not supported with metadata only copy up, falling back to nfs_export=off.\n");
1691 1692 1693
		ofs->config.nfs_export = false;
	}

1694 1695 1696
	if (ofs->config.nfs_export)
		sb->s_export_op = &ovl_export_operations;

1697 1698 1699
	/* Never override disk quota limits or use reserved space */
	cap_lower(cred->cap_effective, CAP_SYS_RESOURCE);

1700 1701
	sb->s_magic = OVERLAYFS_SUPER_MAGIC;
	sb->s_xattr = ovl_xattr_handlers;
M
Miklos Szeredi 已提交
1702
	sb->s_fs_info = ofs;
1703
	sb->s_flags |= SB_POSIXACL;
1704

1705
	err = -ENOMEM;
1706
	root_dentry = d_make_root(ovl_new_inode(sb, S_IFDIR, 0));
M
Miklos Szeredi 已提交
1707
	if (!root_dentry)
1708
		goto out_free_oe;
M
Miklos Szeredi 已提交
1709

1710 1711
	root_dentry->d_fsdata = oe;

M
Miklos Szeredi 已提交
1712
	mntput(upperpath.mnt);
1713
	if (upperpath.dentry) {
1714
		ovl_dentry_set_upper_alias(root_dentry);
M
Miklos Szeredi 已提交
1715 1716
		if (ovl_is_impuredir(upperpath.dentry))
			ovl_set_flag(OVL_IMPURE, d_inode(root_dentry));
1717
	}
M
Miklos Szeredi 已提交
1718

1719 1720
	/* Root is always merge -> can have whiteouts */
	ovl_set_flag(OVL_WHITEOUTS, d_inode(root_dentry));
1721
	ovl_dentry_set_flag(OVL_E_CONNECTED, root_dentry);
1722
	ovl_set_upperdata(d_inode(root_dentry));
1723
	ovl_inode_init(d_inode(root_dentry), upperpath.dentry,
1724
		       ovl_dentry_lower(root_dentry), NULL);
M
Miklos Szeredi 已提交
1725

M
Miklos Szeredi 已提交
1726 1727 1728 1729
	sb->s_root = root_dentry;

	return 0;

1730 1731
out_free_oe:
	ovl_entry_stack_free(oe);
1732
	kfree(oe);
1733
out_err:
M
Miklos Szeredi 已提交
1734
	path_put(&upperpath);
M
Miklos Szeredi 已提交
1735
	ovl_free_fs(ofs);
M
Miklos Szeredi 已提交
1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747
out:
	return err;
}

static struct dentry *ovl_mount(struct file_system_type *fs_type, int flags,
				const char *dev_name, void *raw_data)
{
	return mount_nodev(fs_type, flags, raw_data, ovl_fill_super);
}

static struct file_system_type ovl_fs_type = {
	.owner		= THIS_MODULE,
1748
	.name		= "overlay",
M
Miklos Szeredi 已提交
1749 1750 1751
	.mount		= ovl_mount,
	.kill_sb	= kill_anon_super,
};
1752
MODULE_ALIAS_FS("overlay");
M
Miklos Szeredi 已提交
1753

1754 1755 1756 1757 1758 1759 1760
static void ovl_inode_init_once(void *foo)
{
	struct ovl_inode *oi = foo;

	inode_init_once(&oi->vfs_inode);
}

M
Miklos Szeredi 已提交
1761 1762
static int __init ovl_init(void)
{
1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777
	int err;

	ovl_inode_cachep = kmem_cache_create("ovl_inode",
					     sizeof(struct ovl_inode), 0,
					     (SLAB_RECLAIM_ACCOUNT|
					      SLAB_MEM_SPREAD|SLAB_ACCOUNT),
					     ovl_inode_init_once);
	if (ovl_inode_cachep == NULL)
		return -ENOMEM;

	err = register_filesystem(&ovl_fs_type);
	if (err)
		kmem_cache_destroy(ovl_inode_cachep);

	return err;
M
Miklos Szeredi 已提交
1778 1779 1780 1781 1782
}

static void __exit ovl_exit(void)
{
	unregister_filesystem(&ovl_fs_type);
1783 1784 1785 1786 1787 1788 1789 1790

	/*
	 * Make sure all delayed rcu free inodes are flushed before we
	 * destroy cache.
	 */
	rcu_barrier();
	kmem_cache_destroy(ovl_inode_cachep);

M
Miklos Szeredi 已提交
1791 1792 1793 1794
}

module_init(ovl_init);
module_exit(ovl_exit);