super.c 34.3 KB
Newer Older
M
Miklos Szeredi 已提交
1 2 3 4 5 6 7 8 9
/*
 *
 * Copyright (C) 2011 Novell Inc.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 as published by
 * the Free Software Foundation.
 */

10
#include <uapi/linux/magic.h>
M
Miklos Szeredi 已提交
11 12 13 14 15 16
#include <linux/fs.h>
#include <linux/namei.h>
#include <linux/xattr.h>
#include <linux/mount.h>
#include <linux/parser.h>
#include <linux/module.h>
A
Andy Whitcroft 已提交
17
#include <linux/statfs.h>
E
Erez Zadok 已提交
18
#include <linux/seq_file.h>
M
Miklos Szeredi 已提交
19
#include <linux/posix_acl_xattr.h>
M
Miklos Szeredi 已提交
20 21 22 23 24 25 26 27 28
#include "overlayfs.h"

MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
MODULE_DESCRIPTION("Overlay filesystem");
MODULE_LICENSE("GPL");


struct ovl_dir_cache;

29 30
#define OVL_MAX_STACK 500

31 32 33 34
static bool ovl_redirect_dir_def = IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_DIR);
module_param_named(redirect_dir, ovl_redirect_dir_def, bool, 0644);
MODULE_PARM_DESC(ovl_redirect_dir_def,
		 "Default to on or off for the redirect_dir feature");
M
Miklos Szeredi 已提交
35

36 37 38 39 40 41 42
static bool ovl_redirect_always_follow =
	IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW);
module_param_named(redirect_always_follow, ovl_redirect_always_follow,
		   bool, 0644);
MODULE_PARM_DESC(ovl_redirect_always_follow,
		 "Follow redirects even if redirect_dir feature is turned off");

43 44 45 46 47
static bool ovl_index_def = IS_ENABLED(CONFIG_OVERLAY_FS_INDEX);
module_param_named(index, ovl_index_def, bool, 0644);
MODULE_PARM_DESC(ovl_index_def,
		 "Default to on or off for the inodes index feature");

48 49 50 51 52
static bool ovl_nfs_export_def = IS_ENABLED(CONFIG_OVERLAY_FS_NFS_EXPORT);
module_param_named(nfs_export, ovl_nfs_export_def, bool, 0644);
MODULE_PARM_DESC(ovl_nfs_export_def,
		 "Default to on or off for the NFS export feature");

53 54 55 56 57 58 59 60
static void ovl_entry_stack_free(struct ovl_entry *oe)
{
	unsigned int i;

	for (i = 0; i < oe->numlower; i++)
		dput(oe->lowerstack[i].dentry);
}

M
Miklos Szeredi 已提交
61 62 63 64 65
static void ovl_dentry_release(struct dentry *dentry)
{
	struct ovl_entry *oe = dentry->d_fsdata;

	if (oe) {
66
		ovl_entry_stack_free(oe);
M
Miklos Szeredi 已提交
67 68 69 70
		kfree_rcu(oe, rcu);
	}
}

71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86
static int ovl_check_append_only(struct inode *inode, int flag)
{
	/*
	 * This test was moot in vfs may_open() because overlay inode does
	 * not have the S_APPEND flag, so re-check on real upper inode
	 */
	if (IS_APPEND(inode)) {
		if  ((flag & O_ACCMODE) != O_RDONLY && !(flag & O_APPEND))
			return -EPERM;
		if (flag & O_TRUNC)
			return -EPERM;
	}

	return 0;
}

87 88
static struct dentry *ovl_d_real(struct dentry *dentry,
				 const struct inode *inode,
M
Miklos Szeredi 已提交
89
				 unsigned int open_flags, unsigned int flags)
M
Miklos Szeredi 已提交
90 91
{
	struct dentry *real;
92
	int err;
M
Miklos Szeredi 已提交
93

94 95 96
	if (flags & D_REAL_UPPER)
		return ovl_dentry_upper(dentry);

97
	if (!d_is_reg(dentry)) {
M
Miklos Szeredi 已提交
98 99 100 101 102
		if (!inode || inode == d_inode(dentry))
			return dentry;
		goto bug;
	}

103
	if (open_flags) {
104
		err = ovl_open_maybe_copy_up(dentry, open_flags);
105 106 107 108
		if (err)
			return ERR_PTR(err);
	}

M
Miklos Szeredi 已提交
109
	real = ovl_dentry_upper(dentry);
110 111 112 113 114 115
	if (real && (!inode || inode == d_inode(real))) {
		if (!inode) {
			err = ovl_check_append_only(d_inode(real), open_flags);
			if (err)
				return ERR_PTR(err);
		}
M
Miklos Szeredi 已提交
116
		return real;
117
	}
M
Miklos Szeredi 已提交
118 119 120 121 122

	real = ovl_dentry_lower(dentry);
	if (!real)
		goto bug;

M
Miklos Szeredi 已提交
123
	/* Handle recursion */
M
Miklos Szeredi 已提交
124
	real = d_real(real, inode, open_flags, 0);
M
Miklos Szeredi 已提交
125

M
Miklos Szeredi 已提交
126 127 128
	if (!inode || inode == d_inode(real))
		return real;
bug:
M
Miklos Szeredi 已提交
129
	WARN(1, "ovl_d_real(%pd4, %s:%lu): real dentry not found\n", dentry,
M
Miklos Szeredi 已提交
130 131 132 133
	     inode ? inode->i_sb->s_id : "NULL", inode ? inode->i_ino : 0);
	return dentry;
}

134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174
static int ovl_dentry_revalidate(struct dentry *dentry, unsigned int flags)
{
	struct ovl_entry *oe = dentry->d_fsdata;
	unsigned int i;
	int ret = 1;

	for (i = 0; i < oe->numlower; i++) {
		struct dentry *d = oe->lowerstack[i].dentry;

		if (d->d_flags & DCACHE_OP_REVALIDATE) {
			ret = d->d_op->d_revalidate(d, flags);
			if (ret < 0)
				return ret;
			if (!ret) {
				if (!(flags & LOOKUP_RCU))
					d_invalidate(d);
				return -ESTALE;
			}
		}
	}
	return 1;
}

static int ovl_dentry_weak_revalidate(struct dentry *dentry, unsigned int flags)
{
	struct ovl_entry *oe = dentry->d_fsdata;
	unsigned int i;
	int ret = 1;

	for (i = 0; i < oe->numlower; i++) {
		struct dentry *d = oe->lowerstack[i].dentry;

		if (d->d_flags & DCACHE_OP_WEAK_REVALIDATE) {
			ret = d->d_op->d_weak_revalidate(d, flags);
			if (ret <= 0)
				break;
		}
	}
	return ret;
}

M
Miklos Szeredi 已提交
175 176
static const struct dentry_operations ovl_dentry_operations = {
	.d_release = ovl_dentry_release,
M
Miklos Szeredi 已提交
177
	.d_real = ovl_d_real,
M
Miklos Szeredi 已提交
178 179
};

180 181
static const struct dentry_operations ovl_reval_dentry_operations = {
	.d_release = ovl_dentry_release,
M
Miklos Szeredi 已提交
182
	.d_real = ovl_d_real,
183 184 185 186
	.d_revalidate = ovl_dentry_revalidate,
	.d_weak_revalidate = ovl_dentry_weak_revalidate,
};

187 188 189 190 191 192
static struct kmem_cache *ovl_inode_cachep;

static struct inode *ovl_alloc_inode(struct super_block *sb)
{
	struct ovl_inode *oi = kmem_cache_alloc(ovl_inode_cachep, GFP_KERNEL);

193 194 195
	if (!oi)
		return NULL;

196
	oi->cache = NULL;
M
Miklos Szeredi 已提交
197
	oi->redirect = NULL;
198
	oi->version = 0;
M
Miklos Szeredi 已提交
199
	oi->flags = 0;
200
	oi->__upperdentry = NULL;
201
	oi->lower = NULL;
202
	mutex_init(&oi->lock);
203

204 205 206 207 208 209 210 211 212 213 214 215
	return &oi->vfs_inode;
}

static void ovl_i_callback(struct rcu_head *head)
{
	struct inode *inode = container_of(head, struct inode, i_rcu);

	kmem_cache_free(ovl_inode_cachep, OVL_I(inode));
}

static void ovl_destroy_inode(struct inode *inode)
{
216 217 218
	struct ovl_inode *oi = OVL_I(inode);

	dput(oi->__upperdentry);
219
	iput(oi->lower);
M
Miklos Szeredi 已提交
220
	kfree(oi->redirect);
221
	ovl_dir_cache_free(inode);
222
	mutex_destroy(&oi->lock);
223

224 225 226
	call_rcu(&inode->i_rcu, ovl_i_callback);
}

M
Miklos Szeredi 已提交
227
static void ovl_free_fs(struct ovl_fs *ofs)
M
Miklos Szeredi 已提交
228
{
229
	unsigned i;
M
Miklos Szeredi 已提交
230

M
Miklos Szeredi 已提交
231 232 233 234 235 236 237 238
	dput(ofs->indexdir);
	dput(ofs->workdir);
	if (ofs->workdir_locked)
		ovl_inuse_unlock(ofs->workbasedir);
	dput(ofs->workbasedir);
	if (ofs->upperdir_locked)
		ovl_inuse_unlock(ofs->upper_mnt->mnt_root);
	mntput(ofs->upper_mnt);
239
	for (i = 0; i < ofs->numlower; i++)
M
Miklos Szeredi 已提交
240
		mntput(ofs->lower_layers[i].mnt);
241 242
	for (i = 0; i < ofs->numlowerfs; i++)
		free_anon_bdev(ofs->lower_fs[i].pseudo_dev);
M
Miklos Szeredi 已提交
243
	kfree(ofs->lower_layers);
244
	kfree(ofs->lower_fs);
M
Miklos Szeredi 已提交
245 246 247 248

	kfree(ofs->config.lowerdir);
	kfree(ofs->config.upperdir);
	kfree(ofs->config.workdir);
249
	kfree(ofs->config.redirect_mode);
M
Miklos Szeredi 已提交
250 251 252
	if (ofs->creator_cred)
		put_cred(ofs->creator_cred);
	kfree(ofs);
M
Miklos Szeredi 已提交
253 254
}

255 256 257 258 259 260 261
static void ovl_put_super(struct super_block *sb)
{
	struct ovl_fs *ofs = sb->s_fs_info;

	ovl_free_fs(ofs);
}

262
/* Sync real dirty inodes in upper filesystem (if it exists) */
263 264
static int ovl_sync_fs(struct super_block *sb, int wait)
{
M
Miklos Szeredi 已提交
265
	struct ovl_fs *ofs = sb->s_fs_info;
266 267 268
	struct super_block *upper_sb;
	int ret;

M
Miklos Szeredi 已提交
269
	if (!ofs->upper_mnt)
270
		return 0;
271 272 273 274 275 276 277 278 279 280

	/*
	 * If this is a sync(2) call or an emergency sync, all the super blocks
	 * will be iterated, including upper_sb, so no need to do anything.
	 *
	 * If this is a syncfs(2) call, then we do need to call
	 * sync_filesystem() on upper_sb, but enough if we do it when being
	 * called with wait == 1.
	 */
	if (!wait)
281 282
		return 0;

283 284
	upper_sb = ofs->upper_mnt->mnt_sb;

285
	down_read(&upper_sb->s_umount);
286
	ret = sync_filesystem(upper_sb);
287
	up_read(&upper_sb->s_umount);
288

289 290 291
	return ret;
}

A
Andy Whitcroft 已提交
292 293 294 295 296 297
/**
 * ovl_statfs
 * @sb: The overlayfs super block
 * @buf: The struct kstatfs to fill in with stats
 *
 * Get the filesystem statistics.  As writes always target the upper layer
298
 * filesystem pass the statfs to the upper filesystem (if it exists)
A
Andy Whitcroft 已提交
299 300 301 302 303 304 305 306
 */
static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf)
{
	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
	struct dentry *root_dentry = dentry->d_sb->s_root;
	struct path path;
	int err;

307
	ovl_path_real(root_dentry, &path);
A
Andy Whitcroft 已提交
308 309 310

	err = vfs_statfs(&path, buf);
	if (!err) {
M
Miklos Szeredi 已提交
311
		buf->f_namelen = ofs->namelen;
A
Andy Whitcroft 已提交
312 313 314 315 316 317
		buf->f_type = OVERLAYFS_SUPER_MAGIC;
	}

	return err;
}

318
/* Will this overlay be forced to mount/remount ro? */
M
Miklos Szeredi 已提交
319
static bool ovl_force_readonly(struct ovl_fs *ofs)
320
{
M
Miklos Szeredi 已提交
321
	return (!ofs->upper_mnt || !ofs->workdir);
322 323
}

324 325 326 327 328
static const char *ovl_redirect_mode_def(void)
{
	return ovl_redirect_dir_def ? "on" : "off";
}

E
Erez Zadok 已提交
329 330 331 332 333 334 335 336 337
/**
 * ovl_show_options
 *
 * Prints the mount options for a given superblock.
 * Returns zero; does not fail.
 */
static int ovl_show_options(struct seq_file *m, struct dentry *dentry)
{
	struct super_block *sb = dentry->d_sb;
M
Miklos Szeredi 已提交
338
	struct ovl_fs *ofs = sb->s_fs_info;
E
Erez Zadok 已提交
339

M
Miklos Szeredi 已提交
340 341 342 343
	seq_show_option(m, "lowerdir", ofs->config.lowerdir);
	if (ofs->config.upperdir) {
		seq_show_option(m, "upperdir", ofs->config.upperdir);
		seq_show_option(m, "workdir", ofs->config.workdir);
M
Miklos Szeredi 已提交
344
	}
M
Miklos Szeredi 已提交
345
	if (ofs->config.default_permissions)
M
Miklos Szeredi 已提交
346
		seq_puts(m, ",default_permissions");
347 348
	if (strcmp(ofs->config.redirect_mode, ovl_redirect_mode_def()) != 0)
		seq_printf(m, ",redirect_dir=%s", ofs->config.redirect_mode);
M
Miklos Szeredi 已提交
349
	if (ofs->config.index != ovl_index_def)
350
		seq_printf(m, ",index=%s", ofs->config.index ? "on" : "off");
351 352 353
	if (ofs->config.nfs_export != ovl_nfs_export_def)
		seq_printf(m, ",nfs_export=%s", ofs->config.nfs_export ?
						"on" : "off");
E
Erez Zadok 已提交
354 355 356
	return 0;
}

357 358
static int ovl_remount(struct super_block *sb, int *flags, char *data)
{
M
Miklos Szeredi 已提交
359
	struct ovl_fs *ofs = sb->s_fs_info;
360

361
	if (!(*flags & SB_RDONLY) && ovl_force_readonly(ofs))
362 363 364 365 366
		return -EROFS;

	return 0;
}

M
Miklos Szeredi 已提交
367
static const struct super_operations ovl_super_operations = {
368 369 370
	.alloc_inode	= ovl_alloc_inode,
	.destroy_inode	= ovl_destroy_inode,
	.drop_inode	= generic_delete_inode,
M
Miklos Szeredi 已提交
371
	.put_super	= ovl_put_super,
372
	.sync_fs	= ovl_sync_fs,
A
Andy Whitcroft 已提交
373
	.statfs		= ovl_statfs,
E
Erez Zadok 已提交
374
	.show_options	= ovl_show_options,
375
	.remount_fs	= ovl_remount,
M
Miklos Szeredi 已提交
376 377 378 379 380 381
};

enum {
	OPT_LOWERDIR,
	OPT_UPPERDIR,
	OPT_WORKDIR,
M
Miklos Szeredi 已提交
382
	OPT_DEFAULT_PERMISSIONS,
383
	OPT_REDIRECT_DIR,
384 385
	OPT_INDEX_ON,
	OPT_INDEX_OFF,
386 387
	OPT_NFS_EXPORT_ON,
	OPT_NFS_EXPORT_OFF,
M
Miklos Szeredi 已提交
388 389 390 391 392 393 394
	OPT_ERR,
};

static const match_table_t ovl_tokens = {
	{OPT_LOWERDIR,			"lowerdir=%s"},
	{OPT_UPPERDIR,			"upperdir=%s"},
	{OPT_WORKDIR,			"workdir=%s"},
M
Miklos Szeredi 已提交
395
	{OPT_DEFAULT_PERMISSIONS,	"default_permissions"},
396
	{OPT_REDIRECT_DIR,		"redirect_dir=%s"},
397 398
	{OPT_INDEX_ON,			"index=on"},
	{OPT_INDEX_OFF,			"index=off"},
399 400
	{OPT_NFS_EXPORT_ON,		"nfs_export=on"},
	{OPT_NFS_EXPORT_OFF,		"nfs_export=off"},
M
Miklos Szeredi 已提交
401 402 403
	{OPT_ERR,			NULL}
};

M
Miklos Szeredi 已提交
404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426
static char *ovl_next_opt(char **s)
{
	char *sbegin = *s;
	char *p;

	if (sbegin == NULL)
		return NULL;

	for (p = sbegin; *p; p++) {
		if (*p == '\\') {
			p++;
			if (!*p)
				break;
		} else if (*p == ',') {
			*p = '\0';
			*s = p + 1;
			return sbegin;
		}
	}
	*s = NULL;
	return sbegin;
}

427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449
static int ovl_parse_redirect_mode(struct ovl_config *config, const char *mode)
{
	if (strcmp(mode, "on") == 0) {
		config->redirect_dir = true;
		/*
		 * Does not make sense to have redirect creation without
		 * redirect following.
		 */
		config->redirect_follow = true;
	} else if (strcmp(mode, "follow") == 0) {
		config->redirect_follow = true;
	} else if (strcmp(mode, "off") == 0) {
		if (ovl_redirect_always_follow)
			config->redirect_follow = true;
	} else if (strcmp(mode, "nofollow") != 0) {
		pr_err("overlayfs: bad mount option \"redirect_dir=%s\"\n",
		       mode);
		return -EINVAL;
	}

	return 0;
}

M
Miklos Szeredi 已提交
450 451 452 453
static int ovl_parse_opt(char *opt, struct ovl_config *config)
{
	char *p;

454 455 456 457
	config->redirect_mode = kstrdup(ovl_redirect_mode_def(), GFP_KERNEL);
	if (!config->redirect_mode)
		return -ENOMEM;

M
Miklos Szeredi 已提交
458
	while ((p = ovl_next_opt(&opt)) != NULL) {
M
Miklos Szeredi 已提交
459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487
		int token;
		substring_t args[MAX_OPT_ARGS];

		if (!*p)
			continue;

		token = match_token(p, ovl_tokens, args);
		switch (token) {
		case OPT_UPPERDIR:
			kfree(config->upperdir);
			config->upperdir = match_strdup(&args[0]);
			if (!config->upperdir)
				return -ENOMEM;
			break;

		case OPT_LOWERDIR:
			kfree(config->lowerdir);
			config->lowerdir = match_strdup(&args[0]);
			if (!config->lowerdir)
				return -ENOMEM;
			break;

		case OPT_WORKDIR:
			kfree(config->workdir);
			config->workdir = match_strdup(&args[0]);
			if (!config->workdir)
				return -ENOMEM;
			break;

M
Miklos Szeredi 已提交
488 489 490 491
		case OPT_DEFAULT_PERMISSIONS:
			config->default_permissions = true;
			break;

492 493 494 495 496
		case OPT_REDIRECT_DIR:
			kfree(config->redirect_mode);
			config->redirect_mode = match_strdup(&args[0]);
			if (!config->redirect_mode)
				return -ENOMEM;
M
Miklos Szeredi 已提交
497 498
			break;

499 500 501 502 503 504 505 506
		case OPT_INDEX_ON:
			config->index = true;
			break;

		case OPT_INDEX_OFF:
			config->index = false;
			break;

507 508 509 510 511 512 513 514
		case OPT_NFS_EXPORT_ON:
			config->nfs_export = true;
			break;

		case OPT_NFS_EXPORT_OFF:
			config->nfs_export = false;
			break;

M
Miklos Szeredi 已提交
515
		default:
516
			pr_err("overlayfs: unrecognized mount option \"%s\" or missing value\n", p);
M
Miklos Szeredi 已提交
517 518 519
			return -EINVAL;
		}
	}
H
hujianyang 已提交
520 521 522 523 524 525 526 527 528

	/* Workdir is useless in non-upper mount */
	if (!config->upperdir && config->workdir) {
		pr_info("overlayfs: option \"workdir=%s\" is useless in a non-upper mount, ignore\n",
			config->workdir);
		kfree(config->workdir);
		config->workdir = NULL;
	}

529
	return ovl_parse_redirect_mode(config, config->redirect_mode);
M
Miklos Szeredi 已提交
530 531 532
}

#define OVL_WORKDIR_NAME "work"
533
#define OVL_INDEXDIR_NAME "index"
M
Miklos Szeredi 已提交
534

M
Miklos Szeredi 已提交
535
static struct dentry *ovl_workdir_create(struct ovl_fs *ofs,
536
					 const char *name, bool persist)
M
Miklos Szeredi 已提交
537
{
M
Miklos Szeredi 已提交
538 539
	struct inode *dir =  ofs->workbasedir->d_inode;
	struct vfsmount *mnt = ofs->upper_mnt;
M
Miklos Szeredi 已提交
540 541 542
	struct dentry *work;
	int err;
	bool retried = false;
543
	bool locked = false;
M
Miklos Szeredi 已提交
544

A
Al Viro 已提交
545
	inode_lock_nested(dir, I_MUTEX_PARENT);
546 547
	locked = true;

M
Miklos Szeredi 已提交
548
retry:
M
Miklos Szeredi 已提交
549
	work = lookup_one_len(name, ofs->workbasedir, strlen(name));
M
Miklos Szeredi 已提交
550 551

	if (!IS_ERR(work)) {
552 553
		struct iattr attr = {
			.ia_valid = ATTR_MODE,
A
Al Viro 已提交
554
			.ia_mode = S_IFDIR | 0,
555
		};
M
Miklos Szeredi 已提交
556 557 558 559 560 561

		if (work->d_inode) {
			err = -EEXIST;
			if (retried)
				goto out_dput;

562 563 564
			if (persist)
				goto out_unlock;

M
Miklos Szeredi 已提交
565
			retried = true;
M
Miklos Szeredi 已提交
566
			ovl_workdir_cleanup(dir, mnt, work, 0);
M
Miklos Szeredi 已提交
567 568 569 570
			dput(work);
			goto retry;
		}

A
Al Viro 已提交
571 572 573
		err = ovl_create_real(dir, work,
				      &(struct cattr){.mode = S_IFDIR | 0},
				      NULL, true);
M
Miklos Szeredi 已提交
574 575
		if (err)
			goto out_dput;
576

577 578 579 580 581 582 583 584 585 586 587 588 589
		/*
		 * Try to remove POSIX ACL xattrs from workdir.  We are good if:
		 *
		 * a) success (there was a POSIX ACL xattr and was removed)
		 * b) -ENODATA (there was no POSIX ACL xattr)
		 * c) -EOPNOTSUPP (POSIX ACL xattrs are not supported)
		 *
		 * There are various other error values that could effectively
		 * mean that the xattr doesn't exist (e.g. -ERANGE is returned
		 * if the xattr name is too long), but the set of filesystems
		 * allowed as upper are limited to "normal" ones, where checking
		 * for the above two errors is sufficient.
		 */
590
		err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_DEFAULT);
M
Miklos Szeredi 已提交
591
		if (err && err != -ENODATA && err != -EOPNOTSUPP)
592 593 594
			goto out_dput;

		err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_ACCESS);
M
Miklos Szeredi 已提交
595
		if (err && err != -ENODATA && err != -EOPNOTSUPP)
596 597 598 599 600 601 602 603
			goto out_dput;

		/* Clear any inherited mode bits */
		inode_lock(work->d_inode);
		err = notify_change(work, &attr, NULL);
		inode_unlock(work->d_inode);
		if (err)
			goto out_dput;
604 605 606
	} else {
		err = PTR_ERR(work);
		goto out_err;
M
Miklos Szeredi 已提交
607 608
	}
out_unlock:
609 610
	if (locked)
		inode_unlock(dir);
M
Miklos Szeredi 已提交
611 612 613 614 615

	return work;

out_dput:
	dput(work);
616 617
out_err:
	pr_warn("overlayfs: failed to create directory %s/%s (errno: %i); mounting read-only\n",
M
Miklos Szeredi 已提交
618
		ofs->config.workdir, name, -err);
619
	work = NULL;
M
Miklos Szeredi 已提交
620 621 622
	goto out_unlock;
}

M
Miklos Szeredi 已提交
623 624 625 626 627 628 629 630 631 632 633 634 635
static void ovl_unescape(char *s)
{
	char *d = s;

	for (;; s++, d++) {
		if (*s == '\\')
			s++;
		*d = *s;
		if (!*s)
			break;
	}
}

M
Miklos Szeredi 已提交
636 637
static int ovl_mount_dir_noesc(const char *name, struct path *path)
{
638
	int err = -EINVAL;
M
Miklos Szeredi 已提交
639

640 641 642 643
	if (!*name) {
		pr_err("overlayfs: empty lowerdir\n");
		goto out;
	}
M
Miklos Szeredi 已提交
644 645 646 647 648 649
	err = kern_path(name, LOOKUP_FOLLOW, path);
	if (err) {
		pr_err("overlayfs: failed to resolve '%s': %i\n", name, err);
		goto out;
	}
	err = -EINVAL;
650
	if (ovl_dentry_weird(path->dentry)) {
M
Miklos Szeredi 已提交
651 652 653
		pr_err("overlayfs: filesystem on '%s' not supported\n", name);
		goto out_put;
	}
M
Miklos Szeredi 已提交
654
	if (!d_is_dir(path->dentry)) {
M
Miklos Szeredi 已提交
655 656 657 658 659 660
		pr_err("overlayfs: '%s' not a directory\n", name);
		goto out_put;
	}
	return 0;

out_put:
661
	path_put_init(path);
M
Miklos Szeredi 已提交
662 663 664 665 666 667 668 669 670 671 672 673
out:
	return err;
}

static int ovl_mount_dir(const char *name, struct path *path)
{
	int err = -ENOMEM;
	char *tmp = kstrdup(name, GFP_KERNEL);

	if (tmp) {
		ovl_unescape(tmp);
		err = ovl_mount_dir_noesc(tmp, path);
674 675 676 677 678

		if (!err)
			if (ovl_dentry_remote(path->dentry)) {
				pr_err("overlayfs: filesystem on '%s' not supported as upperdir\n",
				       tmp);
679
				path_put_init(path);
680 681
				err = -EINVAL;
			}
M
Miklos Szeredi 已提交
682 683 684 685 686
		kfree(tmp);
	}
	return err;
}

M
Miklos Szeredi 已提交
687 688
static int ovl_check_namelen(struct path *path, struct ovl_fs *ofs,
			     const char *name)
M
Miklos Szeredi 已提交
689 690
{
	struct kstatfs statfs;
M
Miklos Szeredi 已提交
691 692 693 694 695 696 697 698 699 700 701 702 703 704
	int err = vfs_statfs(path, &statfs);

	if (err)
		pr_err("overlayfs: statfs failed on '%s'\n", name);
	else
		ofs->namelen = max(ofs->namelen, statfs.f_namelen);

	return err;
}

static int ovl_lower_dir(const char *name, struct path *path,
			 struct ovl_fs *ofs, int *stack_depth, bool *remote)
{
	int err;
M
Miklos Szeredi 已提交
705

706
	err = ovl_mount_dir_noesc(name, path);
M
Miklos Szeredi 已提交
707 708 709
	if (err)
		goto out;

M
Miklos Szeredi 已提交
710 711
	err = ovl_check_namelen(path, ofs, name);
	if (err)
M
Miklos Szeredi 已提交
712
		goto out_put;
M
Miklos Szeredi 已提交
713

M
Miklos Szeredi 已提交
714 715
	*stack_depth = max(*stack_depth, path->mnt->mnt_sb->s_stack_depth);

716 717 718
	if (ovl_dentry_remote(path->dentry))
		*remote = true;

719
	/*
720 721
	 * The inodes index feature and NFS export need to encode and decode
	 * file handles, so they require that all layers support them.
722
	 */
723 724
	if ((ofs->config.nfs_export ||
	     (ofs->config.index && ofs->config.upperdir)) &&
725
	    !ovl_can_decode_fh(path->dentry->d_sb)) {
726
		ofs->config.index = false;
727 728 729
		ofs->config.nfs_export = false;
		pr_warn("overlayfs: fs on '%s' does not support file handles, falling back to index=off,nfs_export=off.\n",
			name);
730 731
	}

M
Miklos Szeredi 已提交
732 733 734
	return 0;

out_put:
735
	path_put_init(path);
M
Miklos Szeredi 已提交
736 737 738 739
out:
	return err;
}

M
Miklos Szeredi 已提交
740 741 742 743 744 745 746 747 748 749 750 751
/* Workdir should not be subdir of upperdir and vice versa */
static bool ovl_workdir_ok(struct dentry *workdir, struct dentry *upperdir)
{
	bool ok = false;

	if (workdir != upperdir) {
		ok = (lock_rename(workdir, upperdir) == NULL);
		unlock_rename(workdir, upperdir);
	}
	return ok;
}

752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771
static unsigned int ovl_split_lowerdirs(char *str)
{
	unsigned int ctr = 1;
	char *s, *d;

	for (s = d = str;; s++, d++) {
		if (*s == '\\') {
			s++;
		} else if (*s == ':') {
			*d = '\0';
			ctr++;
			continue;
		}
		*d = *s;
		if (!*s)
			break;
	}
	return ctr;
}

772 773 774 775 776
static int __maybe_unused
ovl_posix_acl_xattr_get(const struct xattr_handler *handler,
			struct dentry *dentry, struct inode *inode,
			const char *name, void *buffer, size_t size)
{
777
	return ovl_xattr_get(dentry, inode, handler->name, buffer, size);
778 779
}

780 781 782 783 784
static int __maybe_unused
ovl_posix_acl_xattr_set(const struct xattr_handler *handler,
			struct dentry *dentry, struct inode *inode,
			const char *name, const void *value,
			size_t size, int flags)
M
Miklos Szeredi 已提交
785 786
{
	struct dentry *workdir = ovl_workdir(dentry);
787
	struct inode *realinode = ovl_inode_real(inode);
M
Miklos Szeredi 已提交
788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811
	struct posix_acl *acl = NULL;
	int err;

	/* Check that everything is OK before copy-up */
	if (value) {
		acl = posix_acl_from_xattr(&init_user_ns, value, size);
		if (IS_ERR(acl))
			return PTR_ERR(acl);
	}
	err = -EOPNOTSUPP;
	if (!IS_POSIXACL(d_inode(workdir)))
		goto out_acl_release;
	if (!realinode->i_op->set_acl)
		goto out_acl_release;
	if (handler->flags == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) {
		err = acl ? -EACCES : 0;
		goto out_acl_release;
	}
	err = -EPERM;
	if (!inode_owner_or_capable(inode))
		goto out_acl_release;

	posix_acl_release(acl);

812 813 814 815 816 817 818 819 820 821 822 823 824 825 826
	/*
	 * Check if sgid bit needs to be cleared (actual setacl operation will
	 * be done with mounter's capabilities and so that won't do it for us).
	 */
	if (unlikely(inode->i_mode & S_ISGID) &&
	    handler->flags == ACL_TYPE_ACCESS &&
	    !in_group_p(inode->i_gid) &&
	    !capable_wrt_inode_uidgid(inode, CAP_FSETID)) {
		struct iattr iattr = { .ia_valid = ATTR_KILL_SGID };

		err = ovl_setattr(dentry, &iattr);
		if (err)
			return err;
	}

827
	err = ovl_xattr_set(dentry, inode, handler->name, value, size, flags);
828
	if (!err)
829
		ovl_copyattr(ovl_inode_real(inode), inode);
830 831

	return err;
M
Miklos Szeredi 已提交
832 833 834 835 836 837

out_acl_release:
	posix_acl_release(acl);
	return err;
}

838 839 840 841
static int ovl_own_xattr_get(const struct xattr_handler *handler,
			     struct dentry *dentry, struct inode *inode,
			     const char *name, void *buffer, size_t size)
{
A
Amir Goldstein 已提交
842
	return -EOPNOTSUPP;
843 844
}

M
Miklos Szeredi 已提交
845 846 847 848 849
static int ovl_own_xattr_set(const struct xattr_handler *handler,
			     struct dentry *dentry, struct inode *inode,
			     const char *name, const void *value,
			     size_t size, int flags)
{
A
Amir Goldstein 已提交
850
	return -EOPNOTSUPP;
M
Miklos Szeredi 已提交
851 852
}

853 854 855 856
static int ovl_other_xattr_get(const struct xattr_handler *handler,
			       struct dentry *dentry, struct inode *inode,
			       const char *name, void *buffer, size_t size)
{
857
	return ovl_xattr_get(dentry, inode, name, buffer, size);
858 859
}

860 861 862 863 864
static int ovl_other_xattr_set(const struct xattr_handler *handler,
			       struct dentry *dentry, struct inode *inode,
			       const char *name, const void *value,
			       size_t size, int flags)
{
865
	return ovl_xattr_set(dentry, inode, name, value, size, flags);
866 867
}

868 869
static const struct xattr_handler __maybe_unused
ovl_posix_acl_access_xattr_handler = {
M
Miklos Szeredi 已提交
870 871
	.name = XATTR_NAME_POSIX_ACL_ACCESS,
	.flags = ACL_TYPE_ACCESS,
872
	.get = ovl_posix_acl_xattr_get,
M
Miklos Szeredi 已提交
873 874 875
	.set = ovl_posix_acl_xattr_set,
};

876 877
static const struct xattr_handler __maybe_unused
ovl_posix_acl_default_xattr_handler = {
M
Miklos Szeredi 已提交
878 879
	.name = XATTR_NAME_POSIX_ACL_DEFAULT,
	.flags = ACL_TYPE_DEFAULT,
880
	.get = ovl_posix_acl_xattr_get,
M
Miklos Szeredi 已提交
881 882 883 884 885
	.set = ovl_posix_acl_xattr_set,
};

static const struct xattr_handler ovl_own_xattr_handler = {
	.prefix	= OVL_XATTR_PREFIX,
886
	.get = ovl_own_xattr_get,
M
Miklos Szeredi 已提交
887 888 889 890 891
	.set = ovl_own_xattr_set,
};

static const struct xattr_handler ovl_other_xattr_handler = {
	.prefix	= "", /* catch all */
892
	.get = ovl_other_xattr_get,
M
Miklos Szeredi 已提交
893 894 895 896
	.set = ovl_other_xattr_set,
};

static const struct xattr_handler *ovl_xattr_handlers[] = {
897
#ifdef CONFIG_FS_POSIX_ACL
M
Miklos Szeredi 已提交
898 899
	&ovl_posix_acl_access_xattr_handler,
	&ovl_posix_acl_default_xattr_handler,
900
#endif
M
Miklos Szeredi 已提交
901 902 903 904 905
	&ovl_own_xattr_handler,
	&ovl_other_xattr_handler,
	NULL
};

M
Miklos Szeredi 已提交
906
static int ovl_get_upper(struct ovl_fs *ofs, struct path *upperpath)
907
{
M
Miklos Szeredi 已提交
908
	struct vfsmount *upper_mnt;
909 910
	int err;

M
Miklos Szeredi 已提交
911
	err = ovl_mount_dir(ofs->config.upperdir, upperpath);
912 913 914 915 916 917 918 919 920 921
	if (err)
		goto out;

	/* Upper fs should not be r/o */
	if (sb_rdonly(upperpath->mnt->mnt_sb)) {
		pr_err("overlayfs: upper fs is r/o, try multi-lower layers mount\n");
		err = -EINVAL;
		goto out;
	}

M
Miklos Szeredi 已提交
922
	err = ovl_check_namelen(upperpath, ofs, ofs->config.upperdir);
923 924 925 926 927
	if (err)
		goto out;

	err = -EBUSY;
	if (ovl_inuse_trylock(upperpath->dentry)) {
M
Miklos Szeredi 已提交
928 929
		ofs->upperdir_locked = true;
	} else if (ofs->config.index) {
930 931 932 933 934
		pr_err("overlayfs: upperdir is in-use by another mount, mount with '-o index=off' to override exclusive upperdir protection.\n");
		goto out;
	} else {
		pr_warn("overlayfs: upperdir is in-use by another mount, accessing files from both mounts will result in undefined behavior.\n");
	}
M
Miklos Szeredi 已提交
935 936 937 938 939 940 941 942 943 944

	upper_mnt = clone_private_mount(upperpath);
	err = PTR_ERR(upper_mnt);
	if (IS_ERR(upper_mnt)) {
		pr_err("overlayfs: failed to clone upperpath\n");
		goto out;
	}

	/* Don't inherit atime flags */
	upper_mnt->mnt_flags &= ~(MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME);
M
Miklos Szeredi 已提交
945
	ofs->upper_mnt = upper_mnt;
946 947 948 949 950
	err = 0;
out:
	return err;
}

M
Miklos Szeredi 已提交
951
static int ovl_make_workdir(struct ovl_fs *ofs, struct path *workpath)
952
{
953
	struct vfsmount *mnt = ofs->upper_mnt;
954 955 956
	struct dentry *temp;
	int err;

957 958 959 960
	err = mnt_want_write(mnt);
	if (err)
		return err;

M
Miklos Szeredi 已提交
961 962
	ofs->workdir = ovl_workdir_create(ofs, OVL_WORKDIR_NAME, false);
	if (!ofs->workdir)
963
		goto out;
964 965 966 967 968 969 970 971 972

	/*
	 * Upper should support d_type, else whiteouts are visible.  Given
	 * workdir and upper are on same fs, we can do iterate_dir() on
	 * workdir. This check requires successful creation of workdir in
	 * previous step.
	 */
	err = ovl_check_d_type_supported(workpath);
	if (err < 0)
973
		goto out;
974 975 976 977 978 979 980 981 982

	/*
	 * We allowed this configuration and don't want to break users over
	 * kernel upgrade. So warn instead of erroring out.
	 */
	if (!err)
		pr_warn("overlayfs: upper fs needs to support d_type.\n");

	/* Check if upper/work fs supports O_TMPFILE */
M
Miklos Szeredi 已提交
983 984 985
	temp = ovl_do_tmpfile(ofs->workdir, S_IFREG | 0);
	ofs->tmpfile = !IS_ERR(temp);
	if (ofs->tmpfile)
986 987 988 989 990 991 992
		dput(temp);
	else
		pr_warn("overlayfs: upper fs does not support tmpfile.\n");

	/*
	 * Check if upper/work fs supports trusted.overlay.* xattr
	 */
M
Miklos Szeredi 已提交
993
	err = ovl_do_setxattr(ofs->workdir, OVL_XATTR_OPAQUE, "0", 1, 0);
994
	if (err) {
M
Miklos Szeredi 已提交
995
		ofs->noxattr = true;
996 997
		ofs->config.index = false;
		pr_warn("overlayfs: upper fs does not support xattr, falling back to index=off.\n");
998
		err = 0;
999
	} else {
M
Miklos Szeredi 已提交
1000
		vfs_removexattr(ofs->workdir, OVL_XATTR_OPAQUE);
1001 1002 1003
	}

	/* Check if upper/work fs supports file handles */
M
Miklos Szeredi 已提交
1004 1005 1006
	if (ofs->config.index &&
	    !ovl_can_decode_fh(ofs->workdir->d_sb)) {
		ofs->config.index = false;
1007 1008 1009
		pr_warn("overlayfs: upper fs does not support file handles, falling back to index=off.\n");
	}

1010 1011 1012 1013 1014 1015
	/* NFS export of r/w mount depends on index */
	if (ofs->config.nfs_export && !ofs->config.index) {
		pr_warn("overlayfs: NFS export requires \"index=on\", falling back to nfs_export=off.\n");
		ofs->config.nfs_export = false;
	}

1016 1017 1018
out:
	mnt_drop_write(mnt);
	return err;
1019 1020
}

M
Miklos Szeredi 已提交
1021
static int ovl_get_workdir(struct ovl_fs *ofs, struct path *upperpath)
1022 1023
{
	int err;
M
Miklos Szeredi 已提交
1024
	struct path workpath = { };
1025

M
Miklos Szeredi 已提交
1026
	err = ovl_mount_dir(ofs->config.workdir, &workpath);
1027 1028 1029 1030
	if (err)
		goto out;

	err = -EINVAL;
M
Miklos Szeredi 已提交
1031
	if (upperpath->mnt != workpath.mnt) {
1032 1033 1034
		pr_err("overlayfs: workdir and upperdir must reside under the same mount\n");
		goto out;
	}
M
Miklos Szeredi 已提交
1035
	if (!ovl_workdir_ok(workpath.dentry, upperpath->dentry)) {
1036 1037 1038 1039 1040
		pr_err("overlayfs: workdir and upperdir must be separate subtrees\n");
		goto out;
	}

	err = -EBUSY;
M
Miklos Szeredi 已提交
1041
	if (ovl_inuse_trylock(workpath.dentry)) {
M
Miklos Szeredi 已提交
1042 1043
		ofs->workdir_locked = true;
	} else if (ofs->config.index) {
1044 1045 1046 1047 1048 1049
		pr_err("overlayfs: workdir is in-use by another mount, mount with '-o index=off' to override exclusive workdir protection.\n");
		goto out;
	} else {
		pr_warn("overlayfs: workdir is in-use by another mount, accessing files from both mounts will result in undefined behavior.\n");
	}

M
Miklos Szeredi 已提交
1050 1051
	ofs->workbasedir = dget(workpath.dentry);
	err = ovl_make_workdir(ofs, &workpath);
M
Miklos Szeredi 已提交
1052 1053 1054
	if (err)
		goto out;

1055 1056
	err = 0;
out:
M
Miklos Szeredi 已提交
1057 1058
	path_put(&workpath);

1059 1060 1061
	return err;
}

M
Miklos Szeredi 已提交
1062
static int ovl_get_indexdir(struct ovl_fs *ofs, struct ovl_entry *oe,
1063
			    struct path *upperpath)
1064
{
1065
	struct vfsmount *mnt = ofs->upper_mnt;
1066 1067
	int err;

1068 1069 1070 1071
	err = mnt_want_write(mnt);
	if (err)
		return err;

1072
	/* Verify lower root is upper root origin */
1073
	err = ovl_verify_origin(upperpath->dentry, oe->lowerstack[0].dentry,
1074
				true);
1075 1076 1077 1078 1079
	if (err) {
		pr_err("overlayfs: failed to verify upper root origin\n");
		goto out;
	}

M
Miklos Szeredi 已提交
1080 1081
	ofs->indexdir = ovl_workdir_create(ofs, OVL_INDEXDIR_NAME, true);
	if (ofs->indexdir) {
1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096
		/*
		 * Verify upper root is exclusively associated with index dir.
		 * Older kernels stored upper fh in "trusted.overlay.origin"
		 * xattr. If that xattr exists, verify that it is a match to
		 * upper dir file handle. In any case, verify or set xattr
		 * "trusted.overlay.upper" to indicate that index may have
		 * directory entries.
		 */
		if (ovl_check_origin_xattr(ofs->indexdir)) {
			err = ovl_verify_set_fh(ofs->indexdir, OVL_XATTR_ORIGIN,
						upperpath->dentry, true, false);
			if (err)
				pr_err("overlayfs: failed to verify index dir 'origin' xattr\n");
		}
		err = ovl_verify_upper(ofs->indexdir, upperpath->dentry, true);
1097
		if (err)
1098
			pr_err("overlayfs: failed to verify index dir 'upper' xattr\n");
1099 1100 1101

		/* Cleanup bad/stale/orphan index entries */
		if (!err)
1102
			err = ovl_indexdir_cleanup(ofs);
1103
	}
M
Miklos Szeredi 已提交
1104
	if (err || !ofs->indexdir)
1105 1106 1107
		pr_warn("overlayfs: try deleting index dir or mounting with '-o index=off' to disable inodes index.\n");

out:
1108
	mnt_drop_write(mnt);
1109 1110 1111
	return err;
}

1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140
/* Get a unique fsid for the layer */
static int ovl_get_fsid(struct ovl_fs *ofs, struct super_block *sb)
{
	unsigned int i;
	dev_t dev;
	int err;

	/* fsid 0 is reserved for upper fs even with non upper overlay */
	if (ofs->upper_mnt && ofs->upper_mnt->mnt_sb == sb)
		return 0;

	for (i = 0; i < ofs->numlowerfs; i++) {
		if (ofs->lower_fs[i].sb == sb)
			return i + 1;
	}

	err = get_anon_bdev(&dev);
	if (err) {
		pr_err("overlayfs: failed to get anonymous bdev for lowerpath\n");
		return err;
	}

	ofs->lower_fs[ofs->numlowerfs].sb = sb;
	ofs->lower_fs[ofs->numlowerfs].pseudo_dev = dev;
	ofs->numlowerfs++;

	return ofs->numlowerfs;
}

M
Miklos Szeredi 已提交
1141
static int ovl_get_lower_layers(struct ovl_fs *ofs, struct path *stack,
1142 1143 1144 1145 1146 1147
				unsigned int numlower)
{
	int err;
	unsigned int i;

	err = -ENOMEM;
M
Miklos Szeredi 已提交
1148
	ofs->lower_layers = kcalloc(numlower, sizeof(struct ovl_layer),
1149
				    GFP_KERNEL);
M
Miklos Szeredi 已提交
1150
	if (ofs->lower_layers == NULL)
1151
		goto out;
1152 1153 1154 1155 1156 1157

	ofs->lower_fs = kcalloc(numlower, sizeof(struct ovl_sb),
				GFP_KERNEL);
	if (ofs->lower_fs == NULL)
		goto out;

1158 1159
	for (i = 0; i < numlower; i++) {
		struct vfsmount *mnt;
1160
		int fsid;
1161

1162 1163
		err = fsid = ovl_get_fsid(ofs, stack[i].mnt->mnt_sb);
		if (err < 0)
1164 1165 1166 1167 1168 1169 1170 1171
			goto out;

		mnt = clone_private_mount(&stack[i]);
		err = PTR_ERR(mnt);
		if (IS_ERR(mnt)) {
			pr_err("overlayfs: failed to clone lowerpath\n");
			goto out;
		}
1172

1173 1174 1175 1176 1177 1178
		/*
		 * Make lower layers R/O.  That way fchmod/fchown on lower file
		 * will fail instead of modifying lower fs.
		 */
		mnt->mnt_flags |= MNT_READONLY | MNT_NOATIME;

M
Miklos Szeredi 已提交
1179
		ofs->lower_layers[ofs->numlower].mnt = mnt;
1180
		ofs->lower_layers[ofs->numlower].idx = i + 1;
1181 1182 1183 1184 1185
		ofs->lower_layers[ofs->numlower].fsid = fsid;
		if (fsid) {
			ofs->lower_layers[ofs->numlower].fs =
				&ofs->lower_fs[fsid - 1];
		}
M
Miklos Szeredi 已提交
1186
		ofs->numlower++;
1187 1188 1189 1190 1191 1192
	}
	err = 0;
out:
	return err;
}

1193
static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb,
M
Miklos Szeredi 已提交
1194
					    struct ovl_fs *ofs)
1195 1196 1197
{
	int err;
	char *lowertmp, *lower;
1198 1199
	struct path *stack = NULL;
	unsigned int stacklen, numlower = 0, i;
1200
	bool remote = false;
1201
	struct ovl_entry *oe;
1202 1203

	err = -ENOMEM;
M
Miklos Szeredi 已提交
1204
	lowertmp = kstrdup(ofs->config.lowerdir, GFP_KERNEL);
1205
	if (!lowertmp)
1206
		goto out_err;
1207 1208 1209 1210 1211 1212

	err = -EINVAL;
	stacklen = ovl_split_lowerdirs(lowertmp);
	if (stacklen > OVL_MAX_STACK) {
		pr_err("overlayfs: too many lower directories, limit is %d\n",
		       OVL_MAX_STACK);
1213
		goto out_err;
M
Miklos Szeredi 已提交
1214
	} else if (!ofs->config.upperdir && stacklen == 1) {
1215
		pr_err("overlayfs: at least 2 lowerdir are needed while upperdir nonexistent\n");
1216
		goto out_err;
1217 1218 1219 1220
	} else if (!ofs->config.upperdir && ofs->config.nfs_export &&
		   ofs->config.redirect_follow) {
		pr_warn("overlayfs: NFS export requires \"redirect_dir=nofollow\" on non-upper mount, falling back to nfs_export=off.\n");
		ofs->config.nfs_export = false;
1221 1222 1223 1224 1225
	}

	err = -ENOMEM;
	stack = kcalloc(stacklen, sizeof(struct path), GFP_KERNEL);
	if (!stack)
1226
		goto out_err;
1227 1228 1229 1230

	err = -EINVAL;
	lower = lowertmp;
	for (numlower = 0; numlower < stacklen; numlower++) {
M
Miklos Szeredi 已提交
1231
		err = ovl_lower_dir(lower, &stack[numlower], ofs,
1232 1233
				    &sb->s_stack_depth, &remote);
		if (err)
1234
			goto out_err;
1235 1236 1237 1238 1239 1240 1241 1242

		lower = strchr(lower, '\0') + 1;
	}

	err = -EINVAL;
	sb->s_stack_depth++;
	if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
		pr_err("overlayfs: maximum fs stacking depth exceeded\n");
1243
		goto out_err;
1244 1245
	}

M
Miklos Szeredi 已提交
1246
	err = ovl_get_lower_layers(ofs, stack, numlower);
1247 1248 1249 1250 1251 1252 1253 1254 1255 1256
	if (err)
		goto out_err;

	err = -ENOMEM;
	oe = ovl_alloc_entry(numlower);
	if (!oe)
		goto out_err;

	for (i = 0; i < numlower; i++) {
		oe->lowerstack[i].dentry = dget(stack[i].dentry);
M
Miklos Szeredi 已提交
1257
		oe->lowerstack[i].layer = &ofs->lower_layers[i];
1258
	}
1259 1260 1261 1262 1263 1264 1265 1266 1267 1268

	if (remote)
		sb->s_d_op = &ovl_reval_dentry_operations;
	else
		sb->s_d_op = &ovl_dentry_operations;

out:
	for (i = 0; i < numlower; i++)
		path_put(&stack[i]);
	kfree(stack);
1269 1270 1271 1272 1273 1274
	kfree(lowertmp);

	return oe;

out_err:
	oe = ERR_PTR(err);
1275 1276 1277
	goto out;
}

M
Miklos Szeredi 已提交
1278 1279
static int ovl_fill_super(struct super_block *sb, void *data, int silent)
{
K
Kees Cook 已提交
1280
	struct path upperpath = { };
M
Miklos Szeredi 已提交
1281
	struct dentry *root_dentry;
1282
	struct ovl_entry *oe;
M
Miklos Szeredi 已提交
1283
	struct ovl_fs *ofs;
1284
	struct cred *cred;
M
Miklos Szeredi 已提交
1285 1286
	int err;

E
Erez Zadok 已提交
1287
	err = -ENOMEM;
M
Miklos Szeredi 已提交
1288 1289
	ofs = kzalloc(sizeof(struct ovl_fs), GFP_KERNEL);
	if (!ofs)
M
Miklos Szeredi 已提交
1290 1291
		goto out;

M
Miklos Szeredi 已提交
1292
	ofs->creator_cred = cred = prepare_creds();
1293 1294 1295
	if (!cred)
		goto out_err;

M
Miklos Szeredi 已提交
1296
	ofs->config.index = ovl_index_def;
1297
	ofs->config.nfs_export = ovl_nfs_export_def;
M
Miklos Szeredi 已提交
1298
	err = ovl_parse_opt((char *) data, &ofs->config);
E
Erez Zadok 已提交
1299
	if (err)
1300
		goto out_err;
E
Erez Zadok 已提交
1301

M
Miklos Szeredi 已提交
1302
	err = -EINVAL;
M
Miklos Szeredi 已提交
1303
	if (!ofs->config.lowerdir) {
1304 1305
		if (!silent)
			pr_err("overlayfs: missing 'lowerdir'\n");
1306
		goto out_err;
M
Miklos Szeredi 已提交
1307 1308
	}

M
Miklos Szeredi 已提交
1309
	sb->s_stack_depth = 0;
1310
	sb->s_maxbytes = MAX_LFS_FILESIZE;
M
Miklos Szeredi 已提交
1311 1312
	if (ofs->config.upperdir) {
		if (!ofs->config.workdir) {
M
Miklos Szeredi 已提交
1313
			pr_err("overlayfs: missing 'workdir'\n");
1314
			goto out_err;
M
Miklos Szeredi 已提交
1315
		}
M
Miklos Szeredi 已提交
1316

M
Miklos Szeredi 已提交
1317
		err = ovl_get_upper(ofs, &upperpath);
M
Miklos Szeredi 已提交
1318
		if (err)
1319
			goto out_err;
1320

M
Miklos Szeredi 已提交
1321
		err = ovl_get_workdir(ofs, &upperpath);
1322
		if (err)
1323
			goto out_err;
1324

M
Miklos Szeredi 已提交
1325
		if (!ofs->workdir)
1326
			sb->s_flags |= SB_RDONLY;
1327

M
Miklos Szeredi 已提交
1328 1329
		sb->s_stack_depth = ofs->upper_mnt->mnt_sb->s_stack_depth;
		sb->s_time_gran = ofs->upper_mnt->mnt_sb->s_time_gran;
1330

M
Miklos Szeredi 已提交
1331
	}
M
Miklos Szeredi 已提交
1332
	oe = ovl_get_lowerstack(sb, ofs);
1333 1334
	err = PTR_ERR(oe);
	if (IS_ERR(oe))
1335
		goto out_err;
M
Miklos Szeredi 已提交
1336

H
hujianyang 已提交
1337
	/* If the upper fs is nonexistent, we mark overlayfs r/o too */
M
Miklos Szeredi 已提交
1338
	if (!ofs->upper_mnt)
1339
		sb->s_flags |= SB_RDONLY;
M
Miklos Szeredi 已提交
1340

M
Miklos Szeredi 已提交
1341 1342
	if (!(ovl_force_readonly(ofs)) && ofs->config.index) {
		err = ovl_get_indexdir(ofs, oe, &upperpath);
1343
		if (err)
1344
			goto out_free_oe;
1345

1346 1347 1348 1349
		/* Force r/o mount with no index dir */
		if (!ofs->indexdir) {
			dput(ofs->workdir);
			ofs->workdir = NULL;
1350
			sb->s_flags |= SB_RDONLY;
1351 1352
		}

1353 1354
	}

1355
	/* Show index=off in /proc/mounts for forced r/o mount */
1356
	if (!ofs->indexdir) {
M
Miklos Szeredi 已提交
1357
		ofs->config.index = false;
1358 1359 1360 1361 1362
		if (ofs->upper_mnt && ofs->config.nfs_export) {
			pr_warn("overlayfs: NFS export requires an index dir, falling back to nfs_export=off.\n");
			ofs->config.nfs_export = false;
		}
	}
1363

1364 1365 1366
	if (ofs->config.nfs_export)
		sb->s_export_op = &ovl_export_operations;

1367 1368 1369
	/* Never override disk quota limits or use reserved space */
	cap_lower(cred->cap_effective, CAP_SYS_RESOURCE);

1370 1371 1372
	sb->s_magic = OVERLAYFS_SUPER_MAGIC;
	sb->s_op = &ovl_super_operations;
	sb->s_xattr = ovl_xattr_handlers;
M
Miklos Szeredi 已提交
1373
	sb->s_fs_info = ofs;
1374
	sb->s_flags |= SB_POSIXACL | SB_NOREMOTELOCK;
1375

1376
	err = -ENOMEM;
1377
	root_dentry = d_make_root(ovl_new_inode(sb, S_IFDIR, 0));
M
Miklos Szeredi 已提交
1378
	if (!root_dentry)
1379
		goto out_free_oe;
M
Miklos Szeredi 已提交
1380

1381 1382
	root_dentry->d_fsdata = oe;

M
Miklos Szeredi 已提交
1383
	mntput(upperpath.mnt);
1384
	if (upperpath.dentry) {
1385
		ovl_dentry_set_upper_alias(root_dentry);
M
Miklos Szeredi 已提交
1386 1387
		if (ovl_is_impuredir(upperpath.dentry))
			ovl_set_flag(OVL_IMPURE, d_inode(root_dentry));
1388
	}
M
Miklos Szeredi 已提交
1389

1390 1391
	/* Root is always merge -> can have whiteouts */
	ovl_set_flag(OVL_WHITEOUTS, d_inode(root_dentry));
1392
	ovl_dentry_set_flag(OVL_E_CONNECTED, root_dentry);
1393 1394
	ovl_inode_init(d_inode(root_dentry), upperpath.dentry,
		       ovl_dentry_lower(root_dentry));
M
Miklos Szeredi 已提交
1395

M
Miklos Szeredi 已提交
1396 1397 1398 1399
	sb->s_root = root_dentry;

	return 0;

1400 1401
out_free_oe:
	ovl_entry_stack_free(oe);
1402
	kfree(oe);
1403
out_err:
M
Miklos Szeredi 已提交
1404
	path_put(&upperpath);
M
Miklos Szeredi 已提交
1405
	ovl_free_fs(ofs);
M
Miklos Szeredi 已提交
1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417
out:
	return err;
}

static struct dentry *ovl_mount(struct file_system_type *fs_type, int flags,
				const char *dev_name, void *raw_data)
{
	return mount_nodev(fs_type, flags, raw_data, ovl_fill_super);
}

static struct file_system_type ovl_fs_type = {
	.owner		= THIS_MODULE,
1418
	.name		= "overlay",
M
Miklos Szeredi 已提交
1419 1420 1421
	.mount		= ovl_mount,
	.kill_sb	= kill_anon_super,
};
1422
MODULE_ALIAS_FS("overlay");
M
Miklos Szeredi 已提交
1423

1424 1425 1426 1427 1428 1429 1430
static void ovl_inode_init_once(void *foo)
{
	struct ovl_inode *oi = foo;

	inode_init_once(&oi->vfs_inode);
}

M
Miklos Szeredi 已提交
1431 1432
static int __init ovl_init(void)
{
1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447
	int err;

	ovl_inode_cachep = kmem_cache_create("ovl_inode",
					     sizeof(struct ovl_inode), 0,
					     (SLAB_RECLAIM_ACCOUNT|
					      SLAB_MEM_SPREAD|SLAB_ACCOUNT),
					     ovl_inode_init_once);
	if (ovl_inode_cachep == NULL)
		return -ENOMEM;

	err = register_filesystem(&ovl_fs_type);
	if (err)
		kmem_cache_destroy(ovl_inode_cachep);

	return err;
M
Miklos Szeredi 已提交
1448 1449 1450 1451 1452
}

static void __exit ovl_exit(void)
{
	unregister_filesystem(&ovl_fs_type);
1453 1454 1455 1456 1457 1458 1459 1460

	/*
	 * Make sure all delayed rcu free inodes are flushed before we
	 * destroy cache.
	 */
	rcu_barrier();
	kmem_cache_destroy(ovl_inode_cachep);

M
Miklos Szeredi 已提交
1461 1462 1463 1464
}

module_init(ovl_init);
module_exit(ovl_exit);