super.c 33.6 KB
Newer Older
M
Miklos Szeredi 已提交
1 2 3 4 5 6 7 8 9
/*
 *
 * Copyright (C) 2011 Novell Inc.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 as published by
 * the Free Software Foundation.
 */

10
#include <uapi/linux/magic.h>
M
Miklos Szeredi 已提交
11 12 13 14 15 16
#include <linux/fs.h>
#include <linux/namei.h>
#include <linux/xattr.h>
#include <linux/mount.h>
#include <linux/parser.h>
#include <linux/module.h>
A
Andy Whitcroft 已提交
17
#include <linux/statfs.h>
E
Erez Zadok 已提交
18
#include <linux/seq_file.h>
M
Miklos Szeredi 已提交
19
#include <linux/posix_acl_xattr.h>
M
Miklos Szeredi 已提交
20 21 22 23 24 25 26 27 28
#include "overlayfs.h"

MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
MODULE_DESCRIPTION("Overlay filesystem");
MODULE_LICENSE("GPL");


struct ovl_dir_cache;

29 30
#define OVL_MAX_STACK 500

31 32 33 34
static bool ovl_redirect_dir_def = IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_DIR);
module_param_named(redirect_dir, ovl_redirect_dir_def, bool, 0644);
MODULE_PARM_DESC(ovl_redirect_dir_def,
		 "Default to on or off for the redirect_dir feature");
M
Miklos Szeredi 已提交
35

36 37 38 39 40 41 42
static bool ovl_redirect_always_follow =
	IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW);
module_param_named(redirect_always_follow, ovl_redirect_always_follow,
		   bool, 0644);
MODULE_PARM_DESC(ovl_redirect_always_follow,
		 "Follow redirects even if redirect_dir feature is turned off");

43 44 45 46 47
static bool ovl_index_def = IS_ENABLED(CONFIG_OVERLAY_FS_INDEX);
module_param_named(index, ovl_index_def, bool, 0644);
MODULE_PARM_DESC(ovl_index_def,
		 "Default to on or off for the inodes index feature");

48 49 50 51 52
static bool ovl_nfs_export_def = IS_ENABLED(CONFIG_OVERLAY_FS_NFS_EXPORT);
module_param_named(nfs_export, ovl_nfs_export_def, bool, 0644);
MODULE_PARM_DESC(ovl_nfs_export_def,
		 "Default to on or off for the NFS export feature");

53 54 55 56 57 58 59 60
static void ovl_entry_stack_free(struct ovl_entry *oe)
{
	unsigned int i;

	for (i = 0; i < oe->numlower; i++)
		dput(oe->lowerstack[i].dentry);
}

M
Miklos Szeredi 已提交
61 62 63 64 65
static void ovl_dentry_release(struct dentry *dentry)
{
	struct ovl_entry *oe = dentry->d_fsdata;

	if (oe) {
66
		ovl_entry_stack_free(oe);
M
Miklos Szeredi 已提交
67 68 69 70
		kfree_rcu(oe, rcu);
	}
}

71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86
static int ovl_check_append_only(struct inode *inode, int flag)
{
	/*
	 * This test was moot in vfs may_open() because overlay inode does
	 * not have the S_APPEND flag, so re-check on real upper inode
	 */
	if (IS_APPEND(inode)) {
		if  ((flag & O_ACCMODE) != O_RDONLY && !(flag & O_APPEND))
			return -EPERM;
		if (flag & O_TRUNC)
			return -EPERM;
	}

	return 0;
}

87 88
static struct dentry *ovl_d_real(struct dentry *dentry,
				 const struct inode *inode,
M
Miklos Szeredi 已提交
89
				 unsigned int open_flags, unsigned int flags)
M
Miklos Szeredi 已提交
90 91
{
	struct dentry *real;
92
	int err;
M
Miklos Szeredi 已提交
93

94 95 96
	if (flags & D_REAL_UPPER)
		return ovl_dentry_upper(dentry);

97
	if (!d_is_reg(dentry)) {
M
Miklos Szeredi 已提交
98 99 100 101 102
		if (!inode || inode == d_inode(dentry))
			return dentry;
		goto bug;
	}

103
	if (open_flags) {
104
		err = ovl_open_maybe_copy_up(dentry, open_flags);
105 106 107 108
		if (err)
			return ERR_PTR(err);
	}

M
Miklos Szeredi 已提交
109
	real = ovl_dentry_upper(dentry);
110 111 112 113 114 115
	if (real && (!inode || inode == d_inode(real))) {
		if (!inode) {
			err = ovl_check_append_only(d_inode(real), open_flags);
			if (err)
				return ERR_PTR(err);
		}
M
Miklos Szeredi 已提交
116
		return real;
117
	}
M
Miklos Szeredi 已提交
118 119 120 121 122

	real = ovl_dentry_lower(dentry);
	if (!real)
		goto bug;

M
Miklos Szeredi 已提交
123
	/* Handle recursion */
M
Miklos Szeredi 已提交
124
	real = d_real(real, inode, open_flags, 0);
M
Miklos Szeredi 已提交
125

M
Miklos Szeredi 已提交
126 127 128
	if (!inode || inode == d_inode(real))
		return real;
bug:
M
Miklos Szeredi 已提交
129
	WARN(1, "ovl_d_real(%pd4, %s:%lu): real dentry not found\n", dentry,
M
Miklos Szeredi 已提交
130 131 132 133
	     inode ? inode->i_sb->s_id : "NULL", inode ? inode->i_ino : 0);
	return dentry;
}

134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174
static int ovl_dentry_revalidate(struct dentry *dentry, unsigned int flags)
{
	struct ovl_entry *oe = dentry->d_fsdata;
	unsigned int i;
	int ret = 1;

	for (i = 0; i < oe->numlower; i++) {
		struct dentry *d = oe->lowerstack[i].dentry;

		if (d->d_flags & DCACHE_OP_REVALIDATE) {
			ret = d->d_op->d_revalidate(d, flags);
			if (ret < 0)
				return ret;
			if (!ret) {
				if (!(flags & LOOKUP_RCU))
					d_invalidate(d);
				return -ESTALE;
			}
		}
	}
	return 1;
}

static int ovl_dentry_weak_revalidate(struct dentry *dentry, unsigned int flags)
{
	struct ovl_entry *oe = dentry->d_fsdata;
	unsigned int i;
	int ret = 1;

	for (i = 0; i < oe->numlower; i++) {
		struct dentry *d = oe->lowerstack[i].dentry;

		if (d->d_flags & DCACHE_OP_WEAK_REVALIDATE) {
			ret = d->d_op->d_weak_revalidate(d, flags);
			if (ret <= 0)
				break;
		}
	}
	return ret;
}

M
Miklos Szeredi 已提交
175 176
static const struct dentry_operations ovl_dentry_operations = {
	.d_release = ovl_dentry_release,
M
Miklos Szeredi 已提交
177
	.d_real = ovl_d_real,
M
Miklos Szeredi 已提交
178 179
};

180 181
static const struct dentry_operations ovl_reval_dentry_operations = {
	.d_release = ovl_dentry_release,
M
Miklos Szeredi 已提交
182
	.d_real = ovl_d_real,
183 184 185 186
	.d_revalidate = ovl_dentry_revalidate,
	.d_weak_revalidate = ovl_dentry_weak_revalidate,
};

187 188 189 190 191 192
static struct kmem_cache *ovl_inode_cachep;

static struct inode *ovl_alloc_inode(struct super_block *sb)
{
	struct ovl_inode *oi = kmem_cache_alloc(ovl_inode_cachep, GFP_KERNEL);

193 194 195
	if (!oi)
		return NULL;

196
	oi->cache = NULL;
M
Miklos Szeredi 已提交
197
	oi->redirect = NULL;
198
	oi->version = 0;
M
Miklos Szeredi 已提交
199
	oi->flags = 0;
200
	oi->__upperdentry = NULL;
201
	oi->lower = NULL;
202
	mutex_init(&oi->lock);
203

204 205 206 207 208 209 210 211 212 213 214 215
	return &oi->vfs_inode;
}

static void ovl_i_callback(struct rcu_head *head)
{
	struct inode *inode = container_of(head, struct inode, i_rcu);

	kmem_cache_free(ovl_inode_cachep, OVL_I(inode));
}

static void ovl_destroy_inode(struct inode *inode)
{
216 217 218
	struct ovl_inode *oi = OVL_I(inode);

	dput(oi->__upperdentry);
219
	iput(oi->lower);
M
Miklos Szeredi 已提交
220
	kfree(oi->redirect);
221
	ovl_dir_cache_free(inode);
222
	mutex_destroy(&oi->lock);
223

224 225 226
	call_rcu(&inode->i_rcu, ovl_i_callback);
}

M
Miklos Szeredi 已提交
227
static void ovl_free_fs(struct ovl_fs *ofs)
M
Miklos Szeredi 已提交
228
{
229
	unsigned i;
M
Miklos Szeredi 已提交
230

M
Miklos Szeredi 已提交
231 232 233 234 235 236 237 238 239 240 241
	dput(ofs->indexdir);
	dput(ofs->workdir);
	if (ofs->workdir_locked)
		ovl_inuse_unlock(ofs->workbasedir);
	dput(ofs->workbasedir);
	if (ofs->upperdir_locked)
		ovl_inuse_unlock(ofs->upper_mnt->mnt_root);
	mntput(ofs->upper_mnt);
	for (i = 0; i < ofs->numlower; i++) {
		mntput(ofs->lower_layers[i].mnt);
		free_anon_bdev(ofs->lower_layers[i].pseudo_dev);
242
	}
M
Miklos Szeredi 已提交
243 244 245 246 247
	kfree(ofs->lower_layers);

	kfree(ofs->config.lowerdir);
	kfree(ofs->config.upperdir);
	kfree(ofs->config.workdir);
248
	kfree(ofs->config.redirect_mode);
M
Miklos Szeredi 已提交
249 250 251
	if (ofs->creator_cred)
		put_cred(ofs->creator_cred);
	kfree(ofs);
M
Miklos Szeredi 已提交
252 253
}

254 255 256 257 258 259 260
static void ovl_put_super(struct super_block *sb)
{
	struct ovl_fs *ofs = sb->s_fs_info;

	ovl_free_fs(ofs);
}

261
/* Sync real dirty inodes in upper filesystem (if it exists) */
262 263
static int ovl_sync_fs(struct super_block *sb, int wait)
{
M
Miklos Szeredi 已提交
264
	struct ovl_fs *ofs = sb->s_fs_info;
265 266 267
	struct super_block *upper_sb;
	int ret;

M
Miklos Szeredi 已提交
268
	if (!ofs->upper_mnt)
269
		return 0;
270 271 272 273 274 275 276 277 278 279

	/*
	 * If this is a sync(2) call or an emergency sync, all the super blocks
	 * will be iterated, including upper_sb, so no need to do anything.
	 *
	 * If this is a syncfs(2) call, then we do need to call
	 * sync_filesystem() on upper_sb, but enough if we do it when being
	 * called with wait == 1.
	 */
	if (!wait)
280 281
		return 0;

282 283
	upper_sb = ofs->upper_mnt->mnt_sb;

284
	down_read(&upper_sb->s_umount);
285
	ret = sync_filesystem(upper_sb);
286
	up_read(&upper_sb->s_umount);
287

288 289 290
	return ret;
}

A
Andy Whitcroft 已提交
291 292 293 294 295 296
/**
 * ovl_statfs
 * @sb: The overlayfs super block
 * @buf: The struct kstatfs to fill in with stats
 *
 * Get the filesystem statistics.  As writes always target the upper layer
297
 * filesystem pass the statfs to the upper filesystem (if it exists)
A
Andy Whitcroft 已提交
298 299 300 301 302 303 304 305
 */
static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf)
{
	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
	struct dentry *root_dentry = dentry->d_sb->s_root;
	struct path path;
	int err;

306
	ovl_path_real(root_dentry, &path);
A
Andy Whitcroft 已提交
307 308 309

	err = vfs_statfs(&path, buf);
	if (!err) {
M
Miklos Szeredi 已提交
310
		buf->f_namelen = ofs->namelen;
A
Andy Whitcroft 已提交
311 312 313 314 315 316
		buf->f_type = OVERLAYFS_SUPER_MAGIC;
	}

	return err;
}

317
/* Will this overlay be forced to mount/remount ro? */
M
Miklos Szeredi 已提交
318
static bool ovl_force_readonly(struct ovl_fs *ofs)
319
{
M
Miklos Szeredi 已提交
320
	return (!ofs->upper_mnt || !ofs->workdir);
321 322
}

323 324 325 326 327
static const char *ovl_redirect_mode_def(void)
{
	return ovl_redirect_dir_def ? "on" : "off";
}

E
Erez Zadok 已提交
328 329 330 331 332 333 334 335 336
/**
 * ovl_show_options
 *
 * Prints the mount options for a given superblock.
 * Returns zero; does not fail.
 */
static int ovl_show_options(struct seq_file *m, struct dentry *dentry)
{
	struct super_block *sb = dentry->d_sb;
M
Miklos Szeredi 已提交
337
	struct ovl_fs *ofs = sb->s_fs_info;
E
Erez Zadok 已提交
338

M
Miklos Szeredi 已提交
339 340 341 342
	seq_show_option(m, "lowerdir", ofs->config.lowerdir);
	if (ofs->config.upperdir) {
		seq_show_option(m, "upperdir", ofs->config.upperdir);
		seq_show_option(m, "workdir", ofs->config.workdir);
M
Miklos Szeredi 已提交
343
	}
M
Miklos Szeredi 已提交
344
	if (ofs->config.default_permissions)
M
Miklos Szeredi 已提交
345
		seq_puts(m, ",default_permissions");
346 347
	if (strcmp(ofs->config.redirect_mode, ovl_redirect_mode_def()) != 0)
		seq_printf(m, ",redirect_dir=%s", ofs->config.redirect_mode);
M
Miklos Szeredi 已提交
348
	if (ofs->config.index != ovl_index_def)
349
		seq_printf(m, ",index=%s", ofs->config.index ? "on" : "off");
350 351 352
	if (ofs->config.nfs_export != ovl_nfs_export_def)
		seq_printf(m, ",nfs_export=%s", ofs->config.nfs_export ?
						"on" : "off");
E
Erez Zadok 已提交
353 354 355
	return 0;
}

356 357
static int ovl_remount(struct super_block *sb, int *flags, char *data)
{
M
Miklos Szeredi 已提交
358
	struct ovl_fs *ofs = sb->s_fs_info;
359

360
	if (!(*flags & SB_RDONLY) && ovl_force_readonly(ofs))
361 362 363 364 365
		return -EROFS;

	return 0;
}

M
Miklos Szeredi 已提交
366
static const struct super_operations ovl_super_operations = {
367 368 369
	.alloc_inode	= ovl_alloc_inode,
	.destroy_inode	= ovl_destroy_inode,
	.drop_inode	= generic_delete_inode,
M
Miklos Szeredi 已提交
370
	.put_super	= ovl_put_super,
371
	.sync_fs	= ovl_sync_fs,
A
Andy Whitcroft 已提交
372
	.statfs		= ovl_statfs,
E
Erez Zadok 已提交
373
	.show_options	= ovl_show_options,
374
	.remount_fs	= ovl_remount,
M
Miklos Szeredi 已提交
375 376 377 378 379 380
};

enum {
	OPT_LOWERDIR,
	OPT_UPPERDIR,
	OPT_WORKDIR,
M
Miklos Szeredi 已提交
381
	OPT_DEFAULT_PERMISSIONS,
382
	OPT_REDIRECT_DIR,
383 384
	OPT_INDEX_ON,
	OPT_INDEX_OFF,
385 386
	OPT_NFS_EXPORT_ON,
	OPT_NFS_EXPORT_OFF,
M
Miklos Szeredi 已提交
387 388 389 390 391 392 393
	OPT_ERR,
};

static const match_table_t ovl_tokens = {
	{OPT_LOWERDIR,			"lowerdir=%s"},
	{OPT_UPPERDIR,			"upperdir=%s"},
	{OPT_WORKDIR,			"workdir=%s"},
M
Miklos Szeredi 已提交
394
	{OPT_DEFAULT_PERMISSIONS,	"default_permissions"},
395
	{OPT_REDIRECT_DIR,		"redirect_dir=%s"},
396 397
	{OPT_INDEX_ON,			"index=on"},
	{OPT_INDEX_OFF,			"index=off"},
398 399
	{OPT_NFS_EXPORT_ON,		"nfs_export=on"},
	{OPT_NFS_EXPORT_OFF,		"nfs_export=off"},
M
Miklos Szeredi 已提交
400 401 402
	{OPT_ERR,			NULL}
};

M
Miklos Szeredi 已提交
403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425
static char *ovl_next_opt(char **s)
{
	char *sbegin = *s;
	char *p;

	if (sbegin == NULL)
		return NULL;

	for (p = sbegin; *p; p++) {
		if (*p == '\\') {
			p++;
			if (!*p)
				break;
		} else if (*p == ',') {
			*p = '\0';
			*s = p + 1;
			return sbegin;
		}
	}
	*s = NULL;
	return sbegin;
}

426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448
static int ovl_parse_redirect_mode(struct ovl_config *config, const char *mode)
{
	if (strcmp(mode, "on") == 0) {
		config->redirect_dir = true;
		/*
		 * Does not make sense to have redirect creation without
		 * redirect following.
		 */
		config->redirect_follow = true;
	} else if (strcmp(mode, "follow") == 0) {
		config->redirect_follow = true;
	} else if (strcmp(mode, "off") == 0) {
		if (ovl_redirect_always_follow)
			config->redirect_follow = true;
	} else if (strcmp(mode, "nofollow") != 0) {
		pr_err("overlayfs: bad mount option \"redirect_dir=%s\"\n",
		       mode);
		return -EINVAL;
	}

	return 0;
}

M
Miklos Szeredi 已提交
449 450 451 452
static int ovl_parse_opt(char *opt, struct ovl_config *config)
{
	char *p;

453 454 455 456
	config->redirect_mode = kstrdup(ovl_redirect_mode_def(), GFP_KERNEL);
	if (!config->redirect_mode)
		return -ENOMEM;

M
Miklos Szeredi 已提交
457
	while ((p = ovl_next_opt(&opt)) != NULL) {
M
Miklos Szeredi 已提交
458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486
		int token;
		substring_t args[MAX_OPT_ARGS];

		if (!*p)
			continue;

		token = match_token(p, ovl_tokens, args);
		switch (token) {
		case OPT_UPPERDIR:
			kfree(config->upperdir);
			config->upperdir = match_strdup(&args[0]);
			if (!config->upperdir)
				return -ENOMEM;
			break;

		case OPT_LOWERDIR:
			kfree(config->lowerdir);
			config->lowerdir = match_strdup(&args[0]);
			if (!config->lowerdir)
				return -ENOMEM;
			break;

		case OPT_WORKDIR:
			kfree(config->workdir);
			config->workdir = match_strdup(&args[0]);
			if (!config->workdir)
				return -ENOMEM;
			break;

M
Miklos Szeredi 已提交
487 488 489 490
		case OPT_DEFAULT_PERMISSIONS:
			config->default_permissions = true;
			break;

491 492 493 494 495
		case OPT_REDIRECT_DIR:
			kfree(config->redirect_mode);
			config->redirect_mode = match_strdup(&args[0]);
			if (!config->redirect_mode)
				return -ENOMEM;
M
Miklos Szeredi 已提交
496 497
			break;

498 499 500 501 502 503 504 505
		case OPT_INDEX_ON:
			config->index = true;
			break;

		case OPT_INDEX_OFF:
			config->index = false;
			break;

506 507 508 509 510 511 512 513
		case OPT_NFS_EXPORT_ON:
			config->nfs_export = true;
			break;

		case OPT_NFS_EXPORT_OFF:
			config->nfs_export = false;
			break;

M
Miklos Szeredi 已提交
514
		default:
515
			pr_err("overlayfs: unrecognized mount option \"%s\" or missing value\n", p);
M
Miklos Szeredi 已提交
516 517 518
			return -EINVAL;
		}
	}
H
hujianyang 已提交
519 520 521 522 523 524 525 526 527

	/* Workdir is useless in non-upper mount */
	if (!config->upperdir && config->workdir) {
		pr_info("overlayfs: option \"workdir=%s\" is useless in a non-upper mount, ignore\n",
			config->workdir);
		kfree(config->workdir);
		config->workdir = NULL;
	}

528
	return ovl_parse_redirect_mode(config, config->redirect_mode);
M
Miklos Szeredi 已提交
529 530 531
}

#define OVL_WORKDIR_NAME "work"
532
#define OVL_INDEXDIR_NAME "index"
M
Miklos Szeredi 已提交
533

M
Miklos Szeredi 已提交
534
static struct dentry *ovl_workdir_create(struct ovl_fs *ofs,
535
					 const char *name, bool persist)
M
Miklos Szeredi 已提交
536
{
M
Miklos Szeredi 已提交
537 538
	struct inode *dir =  ofs->workbasedir->d_inode;
	struct vfsmount *mnt = ofs->upper_mnt;
M
Miklos Szeredi 已提交
539 540 541
	struct dentry *work;
	int err;
	bool retried = false;
542
	bool locked = false;
M
Miklos Szeredi 已提交
543

A
Al Viro 已提交
544
	inode_lock_nested(dir, I_MUTEX_PARENT);
545 546
	locked = true;

M
Miklos Szeredi 已提交
547
retry:
M
Miklos Szeredi 已提交
548
	work = lookup_one_len(name, ofs->workbasedir, strlen(name));
M
Miklos Szeredi 已提交
549 550

	if (!IS_ERR(work)) {
551 552
		struct iattr attr = {
			.ia_valid = ATTR_MODE,
A
Al Viro 已提交
553
			.ia_mode = S_IFDIR | 0,
554
		};
M
Miklos Szeredi 已提交
555 556 557 558 559 560

		if (work->d_inode) {
			err = -EEXIST;
			if (retried)
				goto out_dput;

561 562 563
			if (persist)
				goto out_unlock;

M
Miklos Szeredi 已提交
564
			retried = true;
M
Miklos Szeredi 已提交
565
			ovl_workdir_cleanup(dir, mnt, work, 0);
M
Miklos Szeredi 已提交
566 567 568 569
			dput(work);
			goto retry;
		}

A
Al Viro 已提交
570 571 572
		err = ovl_create_real(dir, work,
				      &(struct cattr){.mode = S_IFDIR | 0},
				      NULL, true);
M
Miklos Szeredi 已提交
573 574
		if (err)
			goto out_dput;
575

576 577 578 579 580 581 582 583 584 585 586 587 588
		/*
		 * Try to remove POSIX ACL xattrs from workdir.  We are good if:
		 *
		 * a) success (there was a POSIX ACL xattr and was removed)
		 * b) -ENODATA (there was no POSIX ACL xattr)
		 * c) -EOPNOTSUPP (POSIX ACL xattrs are not supported)
		 *
		 * There are various other error values that could effectively
		 * mean that the xattr doesn't exist (e.g. -ERANGE is returned
		 * if the xattr name is too long), but the set of filesystems
		 * allowed as upper are limited to "normal" ones, where checking
		 * for the above two errors is sufficient.
		 */
589
		err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_DEFAULT);
M
Miklos Szeredi 已提交
590
		if (err && err != -ENODATA && err != -EOPNOTSUPP)
591 592 593
			goto out_dput;

		err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_ACCESS);
M
Miklos Szeredi 已提交
594
		if (err && err != -ENODATA && err != -EOPNOTSUPP)
595 596 597 598 599 600 601 602
			goto out_dput;

		/* Clear any inherited mode bits */
		inode_lock(work->d_inode);
		err = notify_change(work, &attr, NULL);
		inode_unlock(work->d_inode);
		if (err)
			goto out_dput;
603 604 605
	} else {
		err = PTR_ERR(work);
		goto out_err;
M
Miklos Szeredi 已提交
606 607
	}
out_unlock:
608 609
	if (locked)
		inode_unlock(dir);
M
Miklos Szeredi 已提交
610 611 612 613 614

	return work;

out_dput:
	dput(work);
615 616
out_err:
	pr_warn("overlayfs: failed to create directory %s/%s (errno: %i); mounting read-only\n",
M
Miklos Szeredi 已提交
617
		ofs->config.workdir, name, -err);
618
	work = NULL;
M
Miklos Szeredi 已提交
619 620 621
	goto out_unlock;
}

M
Miklos Szeredi 已提交
622 623 624 625 626 627 628 629 630 631 632 633 634
static void ovl_unescape(char *s)
{
	char *d = s;

	for (;; s++, d++) {
		if (*s == '\\')
			s++;
		*d = *s;
		if (!*s)
			break;
	}
}

M
Miklos Szeredi 已提交
635 636
static int ovl_mount_dir_noesc(const char *name, struct path *path)
{
637
	int err = -EINVAL;
M
Miklos Szeredi 已提交
638

639 640 641 642
	if (!*name) {
		pr_err("overlayfs: empty lowerdir\n");
		goto out;
	}
M
Miklos Szeredi 已提交
643 644 645 646 647 648
	err = kern_path(name, LOOKUP_FOLLOW, path);
	if (err) {
		pr_err("overlayfs: failed to resolve '%s': %i\n", name, err);
		goto out;
	}
	err = -EINVAL;
649
	if (ovl_dentry_weird(path->dentry)) {
M
Miklos Szeredi 已提交
650 651 652
		pr_err("overlayfs: filesystem on '%s' not supported\n", name);
		goto out_put;
	}
M
Miklos Szeredi 已提交
653
	if (!d_is_dir(path->dentry)) {
M
Miklos Szeredi 已提交
654 655 656 657 658 659
		pr_err("overlayfs: '%s' not a directory\n", name);
		goto out_put;
	}
	return 0;

out_put:
660
	path_put_init(path);
M
Miklos Szeredi 已提交
661 662 663 664 665 666 667 668 669 670 671 672
out:
	return err;
}

static int ovl_mount_dir(const char *name, struct path *path)
{
	int err = -ENOMEM;
	char *tmp = kstrdup(name, GFP_KERNEL);

	if (tmp) {
		ovl_unescape(tmp);
		err = ovl_mount_dir_noesc(tmp, path);
673 674 675 676 677

		if (!err)
			if (ovl_dentry_remote(path->dentry)) {
				pr_err("overlayfs: filesystem on '%s' not supported as upperdir\n",
				       tmp);
678
				path_put_init(path);
679 680
				err = -EINVAL;
			}
M
Miklos Szeredi 已提交
681 682 683 684 685
		kfree(tmp);
	}
	return err;
}

M
Miklos Szeredi 已提交
686 687
static int ovl_check_namelen(struct path *path, struct ovl_fs *ofs,
			     const char *name)
M
Miklos Szeredi 已提交
688 689
{
	struct kstatfs statfs;
M
Miklos Szeredi 已提交
690 691 692 693 694 695 696 697 698 699 700 701 702 703
	int err = vfs_statfs(path, &statfs);

	if (err)
		pr_err("overlayfs: statfs failed on '%s'\n", name);
	else
		ofs->namelen = max(ofs->namelen, statfs.f_namelen);

	return err;
}

static int ovl_lower_dir(const char *name, struct path *path,
			 struct ovl_fs *ofs, int *stack_depth, bool *remote)
{
	int err;
M
Miklos Szeredi 已提交
704

705
	err = ovl_mount_dir_noesc(name, path);
M
Miklos Szeredi 已提交
706 707 708
	if (err)
		goto out;

M
Miklos Szeredi 已提交
709 710
	err = ovl_check_namelen(path, ofs, name);
	if (err)
M
Miklos Szeredi 已提交
711
		goto out_put;
M
Miklos Szeredi 已提交
712

M
Miklos Szeredi 已提交
713 714
	*stack_depth = max(*stack_depth, path->mnt->mnt_sb->s_stack_depth);

715 716 717
	if (ovl_dentry_remote(path->dentry))
		*remote = true;

718
	/*
719 720
	 * The inodes index feature and NFS export need to encode and decode
	 * file handles, so they require that all layers support them.
721
	 */
722 723
	if ((ofs->config.nfs_export ||
	     (ofs->config.index && ofs->config.upperdir)) &&
724
	    !ovl_can_decode_fh(path->dentry->d_sb)) {
725
		ofs->config.index = false;
726 727 728
		ofs->config.nfs_export = false;
		pr_warn("overlayfs: fs on '%s' does not support file handles, falling back to index=off,nfs_export=off.\n",
			name);
729 730
	}

M
Miklos Szeredi 已提交
731 732 733
	return 0;

out_put:
734
	path_put_init(path);
M
Miklos Szeredi 已提交
735 736 737 738
out:
	return err;
}

M
Miklos Szeredi 已提交
739 740 741 742 743 744 745 746 747 748 749 750
/* Workdir should not be subdir of upperdir and vice versa */
static bool ovl_workdir_ok(struct dentry *workdir, struct dentry *upperdir)
{
	bool ok = false;

	if (workdir != upperdir) {
		ok = (lock_rename(workdir, upperdir) == NULL);
		unlock_rename(workdir, upperdir);
	}
	return ok;
}

751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770
static unsigned int ovl_split_lowerdirs(char *str)
{
	unsigned int ctr = 1;
	char *s, *d;

	for (s = d = str;; s++, d++) {
		if (*s == '\\') {
			s++;
		} else if (*s == ':') {
			*d = '\0';
			ctr++;
			continue;
		}
		*d = *s;
		if (!*s)
			break;
	}
	return ctr;
}

771 772 773 774 775
static int __maybe_unused
ovl_posix_acl_xattr_get(const struct xattr_handler *handler,
			struct dentry *dentry, struct inode *inode,
			const char *name, void *buffer, size_t size)
{
776
	return ovl_xattr_get(dentry, inode, handler->name, buffer, size);
777 778
}

779 780 781 782 783
static int __maybe_unused
ovl_posix_acl_xattr_set(const struct xattr_handler *handler,
			struct dentry *dentry, struct inode *inode,
			const char *name, const void *value,
			size_t size, int flags)
M
Miklos Szeredi 已提交
784 785
{
	struct dentry *workdir = ovl_workdir(dentry);
786
	struct inode *realinode = ovl_inode_real(inode);
M
Miklos Szeredi 已提交
787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810
	struct posix_acl *acl = NULL;
	int err;

	/* Check that everything is OK before copy-up */
	if (value) {
		acl = posix_acl_from_xattr(&init_user_ns, value, size);
		if (IS_ERR(acl))
			return PTR_ERR(acl);
	}
	err = -EOPNOTSUPP;
	if (!IS_POSIXACL(d_inode(workdir)))
		goto out_acl_release;
	if (!realinode->i_op->set_acl)
		goto out_acl_release;
	if (handler->flags == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) {
		err = acl ? -EACCES : 0;
		goto out_acl_release;
	}
	err = -EPERM;
	if (!inode_owner_or_capable(inode))
		goto out_acl_release;

	posix_acl_release(acl);

811 812 813 814 815 816 817 818 819 820 821 822 823 824 825
	/*
	 * Check if sgid bit needs to be cleared (actual setacl operation will
	 * be done with mounter's capabilities and so that won't do it for us).
	 */
	if (unlikely(inode->i_mode & S_ISGID) &&
	    handler->flags == ACL_TYPE_ACCESS &&
	    !in_group_p(inode->i_gid) &&
	    !capable_wrt_inode_uidgid(inode, CAP_FSETID)) {
		struct iattr iattr = { .ia_valid = ATTR_KILL_SGID };

		err = ovl_setattr(dentry, &iattr);
		if (err)
			return err;
	}

826
	err = ovl_xattr_set(dentry, inode, handler->name, value, size, flags);
827
	if (!err)
828
		ovl_copyattr(ovl_inode_real(inode), inode);
829 830

	return err;
M
Miklos Szeredi 已提交
831 832 833 834 835 836

out_acl_release:
	posix_acl_release(acl);
	return err;
}

837 838 839 840
static int ovl_own_xattr_get(const struct xattr_handler *handler,
			     struct dentry *dentry, struct inode *inode,
			     const char *name, void *buffer, size_t size)
{
A
Amir Goldstein 已提交
841
	return -EOPNOTSUPP;
842 843
}

M
Miklos Szeredi 已提交
844 845 846 847 848
static int ovl_own_xattr_set(const struct xattr_handler *handler,
			     struct dentry *dentry, struct inode *inode,
			     const char *name, const void *value,
			     size_t size, int flags)
{
A
Amir Goldstein 已提交
849
	return -EOPNOTSUPP;
M
Miklos Szeredi 已提交
850 851
}

852 853 854 855
static int ovl_other_xattr_get(const struct xattr_handler *handler,
			       struct dentry *dentry, struct inode *inode,
			       const char *name, void *buffer, size_t size)
{
856
	return ovl_xattr_get(dentry, inode, name, buffer, size);
857 858
}

859 860 861 862 863
static int ovl_other_xattr_set(const struct xattr_handler *handler,
			       struct dentry *dentry, struct inode *inode,
			       const char *name, const void *value,
			       size_t size, int flags)
{
864
	return ovl_xattr_set(dentry, inode, name, value, size, flags);
865 866
}

867 868
static const struct xattr_handler __maybe_unused
ovl_posix_acl_access_xattr_handler = {
M
Miklos Szeredi 已提交
869 870
	.name = XATTR_NAME_POSIX_ACL_ACCESS,
	.flags = ACL_TYPE_ACCESS,
871
	.get = ovl_posix_acl_xattr_get,
M
Miklos Szeredi 已提交
872 873 874
	.set = ovl_posix_acl_xattr_set,
};

875 876
static const struct xattr_handler __maybe_unused
ovl_posix_acl_default_xattr_handler = {
M
Miklos Szeredi 已提交
877 878
	.name = XATTR_NAME_POSIX_ACL_DEFAULT,
	.flags = ACL_TYPE_DEFAULT,
879
	.get = ovl_posix_acl_xattr_get,
M
Miklos Szeredi 已提交
880 881 882 883 884
	.set = ovl_posix_acl_xattr_set,
};

static const struct xattr_handler ovl_own_xattr_handler = {
	.prefix	= OVL_XATTR_PREFIX,
885
	.get = ovl_own_xattr_get,
M
Miklos Szeredi 已提交
886 887 888 889 890
	.set = ovl_own_xattr_set,
};

static const struct xattr_handler ovl_other_xattr_handler = {
	.prefix	= "", /* catch all */
891
	.get = ovl_other_xattr_get,
M
Miklos Szeredi 已提交
892 893 894 895
	.set = ovl_other_xattr_set,
};

static const struct xattr_handler *ovl_xattr_handlers[] = {
896
#ifdef CONFIG_FS_POSIX_ACL
M
Miklos Szeredi 已提交
897 898
	&ovl_posix_acl_access_xattr_handler,
	&ovl_posix_acl_default_xattr_handler,
899
#endif
M
Miklos Szeredi 已提交
900 901 902 903 904
	&ovl_own_xattr_handler,
	&ovl_other_xattr_handler,
	NULL
};

M
Miklos Szeredi 已提交
905
static int ovl_get_upper(struct ovl_fs *ofs, struct path *upperpath)
906
{
M
Miklos Szeredi 已提交
907
	struct vfsmount *upper_mnt;
908 909
	int err;

M
Miklos Szeredi 已提交
910
	err = ovl_mount_dir(ofs->config.upperdir, upperpath);
911 912 913 914 915 916 917 918 919 920
	if (err)
		goto out;

	/* Upper fs should not be r/o */
	if (sb_rdonly(upperpath->mnt->mnt_sb)) {
		pr_err("overlayfs: upper fs is r/o, try multi-lower layers mount\n");
		err = -EINVAL;
		goto out;
	}

M
Miklos Szeredi 已提交
921
	err = ovl_check_namelen(upperpath, ofs, ofs->config.upperdir);
922 923 924 925 926
	if (err)
		goto out;

	err = -EBUSY;
	if (ovl_inuse_trylock(upperpath->dentry)) {
M
Miklos Szeredi 已提交
927 928
		ofs->upperdir_locked = true;
	} else if (ofs->config.index) {
929 930 931 932 933
		pr_err("overlayfs: upperdir is in-use by another mount, mount with '-o index=off' to override exclusive upperdir protection.\n");
		goto out;
	} else {
		pr_warn("overlayfs: upperdir is in-use by another mount, accessing files from both mounts will result in undefined behavior.\n");
	}
M
Miklos Szeredi 已提交
934 935 936 937 938 939 940 941 942 943

	upper_mnt = clone_private_mount(upperpath);
	err = PTR_ERR(upper_mnt);
	if (IS_ERR(upper_mnt)) {
		pr_err("overlayfs: failed to clone upperpath\n");
		goto out;
	}

	/* Don't inherit atime flags */
	upper_mnt->mnt_flags &= ~(MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME);
M
Miklos Szeredi 已提交
944
	ofs->upper_mnt = upper_mnt;
945 946 947 948 949
	err = 0;
out:
	return err;
}

M
Miklos Szeredi 已提交
950
static int ovl_make_workdir(struct ovl_fs *ofs, struct path *workpath)
951
{
952
	struct vfsmount *mnt = ofs->upper_mnt;
953 954 955
	struct dentry *temp;
	int err;

956 957 958 959
	err = mnt_want_write(mnt);
	if (err)
		return err;

M
Miklos Szeredi 已提交
960 961
	ofs->workdir = ovl_workdir_create(ofs, OVL_WORKDIR_NAME, false);
	if (!ofs->workdir)
962
		goto out;
963 964 965 966 967 968 969 970 971

	/*
	 * Upper should support d_type, else whiteouts are visible.  Given
	 * workdir and upper are on same fs, we can do iterate_dir() on
	 * workdir. This check requires successful creation of workdir in
	 * previous step.
	 */
	err = ovl_check_d_type_supported(workpath);
	if (err < 0)
972
		goto out;
973 974 975 976 977 978 979 980 981

	/*
	 * We allowed this configuration and don't want to break users over
	 * kernel upgrade. So warn instead of erroring out.
	 */
	if (!err)
		pr_warn("overlayfs: upper fs needs to support d_type.\n");

	/* Check if upper/work fs supports O_TMPFILE */
M
Miklos Szeredi 已提交
982 983 984
	temp = ovl_do_tmpfile(ofs->workdir, S_IFREG | 0);
	ofs->tmpfile = !IS_ERR(temp);
	if (ofs->tmpfile)
985 986 987 988 989 990 991
		dput(temp);
	else
		pr_warn("overlayfs: upper fs does not support tmpfile.\n");

	/*
	 * Check if upper/work fs supports trusted.overlay.* xattr
	 */
M
Miklos Szeredi 已提交
992
	err = ovl_do_setxattr(ofs->workdir, OVL_XATTR_OPAQUE, "0", 1, 0);
993
	if (err) {
M
Miklos Szeredi 已提交
994
		ofs->noxattr = true;
995 996
		ofs->config.index = false;
		pr_warn("overlayfs: upper fs does not support xattr, falling back to index=off.\n");
997
		err = 0;
998
	} else {
M
Miklos Szeredi 已提交
999
		vfs_removexattr(ofs->workdir, OVL_XATTR_OPAQUE);
1000 1001 1002
	}

	/* Check if upper/work fs supports file handles */
M
Miklos Szeredi 已提交
1003 1004 1005
	if (ofs->config.index &&
	    !ovl_can_decode_fh(ofs->workdir->d_sb)) {
		ofs->config.index = false;
1006 1007 1008
		pr_warn("overlayfs: upper fs does not support file handles, falling back to index=off.\n");
	}

1009 1010 1011 1012 1013 1014
	/* NFS export of r/w mount depends on index */
	if (ofs->config.nfs_export && !ofs->config.index) {
		pr_warn("overlayfs: NFS export requires \"index=on\", falling back to nfs_export=off.\n");
		ofs->config.nfs_export = false;
	}

1015 1016 1017
out:
	mnt_drop_write(mnt);
	return err;
1018 1019
}

M
Miklos Szeredi 已提交
1020
static int ovl_get_workdir(struct ovl_fs *ofs, struct path *upperpath)
1021 1022
{
	int err;
M
Miklos Szeredi 已提交
1023
	struct path workpath = { };
1024

M
Miklos Szeredi 已提交
1025
	err = ovl_mount_dir(ofs->config.workdir, &workpath);
1026 1027 1028 1029
	if (err)
		goto out;

	err = -EINVAL;
M
Miklos Szeredi 已提交
1030
	if (upperpath->mnt != workpath.mnt) {
1031 1032 1033
		pr_err("overlayfs: workdir and upperdir must reside under the same mount\n");
		goto out;
	}
M
Miklos Szeredi 已提交
1034
	if (!ovl_workdir_ok(workpath.dentry, upperpath->dentry)) {
1035 1036 1037 1038 1039
		pr_err("overlayfs: workdir and upperdir must be separate subtrees\n");
		goto out;
	}

	err = -EBUSY;
M
Miklos Szeredi 已提交
1040
	if (ovl_inuse_trylock(workpath.dentry)) {
M
Miklos Szeredi 已提交
1041 1042
		ofs->workdir_locked = true;
	} else if (ofs->config.index) {
1043 1044 1045 1046 1047 1048
		pr_err("overlayfs: workdir is in-use by another mount, mount with '-o index=off' to override exclusive workdir protection.\n");
		goto out;
	} else {
		pr_warn("overlayfs: workdir is in-use by another mount, accessing files from both mounts will result in undefined behavior.\n");
	}

M
Miklos Szeredi 已提交
1049 1050
	ofs->workbasedir = dget(workpath.dentry);
	err = ovl_make_workdir(ofs, &workpath);
M
Miklos Szeredi 已提交
1051 1052 1053
	if (err)
		goto out;

1054 1055
	err = 0;
out:
M
Miklos Szeredi 已提交
1056 1057
	path_put(&workpath);

1058 1059 1060
	return err;
}

M
Miklos Szeredi 已提交
1061
static int ovl_get_indexdir(struct ovl_fs *ofs, struct ovl_entry *oe,
1062
			    struct path *upperpath)
1063
{
1064
	struct vfsmount *mnt = ofs->upper_mnt;
1065 1066
	int err;

1067 1068 1069 1070
	err = mnt_want_write(mnt);
	if (err)
		return err;

1071
	/* Verify lower root is upper root origin */
1072
	err = ovl_verify_origin(upperpath->dentry, oe->lowerstack[0].dentry,
1073
				true);
1074 1075 1076 1077 1078
	if (err) {
		pr_err("overlayfs: failed to verify upper root origin\n");
		goto out;
	}

M
Miklos Szeredi 已提交
1079 1080
	ofs->indexdir = ovl_workdir_create(ofs, OVL_INDEXDIR_NAME, true);
	if (ofs->indexdir) {
1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095
		/*
		 * Verify upper root is exclusively associated with index dir.
		 * Older kernels stored upper fh in "trusted.overlay.origin"
		 * xattr. If that xattr exists, verify that it is a match to
		 * upper dir file handle. In any case, verify or set xattr
		 * "trusted.overlay.upper" to indicate that index may have
		 * directory entries.
		 */
		if (ovl_check_origin_xattr(ofs->indexdir)) {
			err = ovl_verify_set_fh(ofs->indexdir, OVL_XATTR_ORIGIN,
						upperpath->dentry, true, false);
			if (err)
				pr_err("overlayfs: failed to verify index dir 'origin' xattr\n");
		}
		err = ovl_verify_upper(ofs->indexdir, upperpath->dentry, true);
1096
		if (err)
1097
			pr_err("overlayfs: failed to verify index dir 'upper' xattr\n");
1098 1099 1100

		/* Cleanup bad/stale/orphan index entries */
		if (!err)
1101
			err = ovl_indexdir_cleanup(ofs);
1102
	}
M
Miklos Szeredi 已提交
1103
	if (err || !ofs->indexdir)
1104 1105 1106
		pr_warn("overlayfs: try deleting index dir or mounting with '-o index=off' to disable inodes index.\n");

out:
1107
	mnt_drop_write(mnt);
1108 1109 1110
	return err;
}

M
Miklos Szeredi 已提交
1111
static int ovl_get_lower_layers(struct ovl_fs *ofs, struct path *stack,
1112 1113 1114 1115 1116 1117
				unsigned int numlower)
{
	int err;
	unsigned int i;

	err = -ENOMEM;
M
Miklos Szeredi 已提交
1118
	ofs->lower_layers = kcalloc(numlower, sizeof(struct ovl_layer),
1119
				    GFP_KERNEL);
M
Miklos Szeredi 已提交
1120
	if (ofs->lower_layers == NULL)
1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144
		goto out;
	for (i = 0; i < numlower; i++) {
		struct vfsmount *mnt;
		dev_t dev;

		err = get_anon_bdev(&dev);
		if (err) {
			pr_err("overlayfs: failed to get anonymous bdev for lowerpath\n");
			goto out;
		}

		mnt = clone_private_mount(&stack[i]);
		err = PTR_ERR(mnt);
		if (IS_ERR(mnt)) {
			pr_err("overlayfs: failed to clone lowerpath\n");
			free_anon_bdev(dev);
			goto out;
		}
		/*
		 * Make lower layers R/O.  That way fchmod/fchown on lower file
		 * will fail instead of modifying lower fs.
		 */
		mnt->mnt_flags |= MNT_READONLY | MNT_NOATIME;

M
Miklos Szeredi 已提交
1145 1146
		ofs->lower_layers[ofs->numlower].mnt = mnt;
		ofs->lower_layers[ofs->numlower].pseudo_dev = dev;
1147
		ofs->lower_layers[ofs->numlower].idx = i + 1;
M
Miklos Szeredi 已提交
1148
		ofs->numlower++;
1149 1150 1151

		/* Check if all lower layers are on same sb */
		if (i == 0)
M
Miklos Szeredi 已提交
1152 1153 1154
			ofs->same_sb = mnt->mnt_sb;
		else if (ofs->same_sb != mnt->mnt_sb)
			ofs->same_sb = NULL;
1155 1156 1157 1158 1159 1160
	}
	err = 0;
out:
	return err;
}

1161
static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb,
M
Miklos Szeredi 已提交
1162
					    struct ovl_fs *ofs)
1163 1164 1165
{
	int err;
	char *lowertmp, *lower;
1166 1167
	struct path *stack = NULL;
	unsigned int stacklen, numlower = 0, i;
1168
	bool remote = false;
1169
	struct ovl_entry *oe;
1170 1171

	err = -ENOMEM;
M
Miklos Szeredi 已提交
1172
	lowertmp = kstrdup(ofs->config.lowerdir, GFP_KERNEL);
1173
	if (!lowertmp)
1174
		goto out_err;
1175 1176 1177 1178 1179 1180

	err = -EINVAL;
	stacklen = ovl_split_lowerdirs(lowertmp);
	if (stacklen > OVL_MAX_STACK) {
		pr_err("overlayfs: too many lower directories, limit is %d\n",
		       OVL_MAX_STACK);
1181
		goto out_err;
M
Miklos Szeredi 已提交
1182
	} else if (!ofs->config.upperdir && stacklen == 1) {
1183
		pr_err("overlayfs: at least 2 lowerdir are needed while upperdir nonexistent\n");
1184
		goto out_err;
1185 1186 1187 1188
	} else if (!ofs->config.upperdir && ofs->config.nfs_export &&
		   ofs->config.redirect_follow) {
		pr_warn("overlayfs: NFS export requires \"redirect_dir=nofollow\" on non-upper mount, falling back to nfs_export=off.\n");
		ofs->config.nfs_export = false;
1189 1190 1191 1192 1193
	}

	err = -ENOMEM;
	stack = kcalloc(stacklen, sizeof(struct path), GFP_KERNEL);
	if (!stack)
1194
		goto out_err;
1195 1196 1197 1198

	err = -EINVAL;
	lower = lowertmp;
	for (numlower = 0; numlower < stacklen; numlower++) {
M
Miklos Szeredi 已提交
1199
		err = ovl_lower_dir(lower, &stack[numlower], ofs,
1200 1201
				    &sb->s_stack_depth, &remote);
		if (err)
1202
			goto out_err;
1203 1204 1205 1206 1207 1208 1209 1210

		lower = strchr(lower, '\0') + 1;
	}

	err = -EINVAL;
	sb->s_stack_depth++;
	if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
		pr_err("overlayfs: maximum fs stacking depth exceeded\n");
1211
		goto out_err;
1212 1213
	}

M
Miklos Szeredi 已提交
1214
	err = ovl_get_lower_layers(ofs, stack, numlower);
1215 1216 1217 1218 1219 1220 1221 1222 1223 1224
	if (err)
		goto out_err;

	err = -ENOMEM;
	oe = ovl_alloc_entry(numlower);
	if (!oe)
		goto out_err;

	for (i = 0; i < numlower; i++) {
		oe->lowerstack[i].dentry = dget(stack[i].dentry);
M
Miklos Szeredi 已提交
1225
		oe->lowerstack[i].layer = &ofs->lower_layers[i];
1226
	}
1227 1228 1229 1230 1231 1232 1233 1234 1235 1236

	if (remote)
		sb->s_d_op = &ovl_reval_dentry_operations;
	else
		sb->s_d_op = &ovl_dentry_operations;

out:
	for (i = 0; i < numlower; i++)
		path_put(&stack[i]);
	kfree(stack);
1237 1238 1239 1240 1241 1242
	kfree(lowertmp);

	return oe;

out_err:
	oe = ERR_PTR(err);
1243 1244 1245
	goto out;
}

M
Miklos Szeredi 已提交
1246 1247
static int ovl_fill_super(struct super_block *sb, void *data, int silent)
{
K
Kees Cook 已提交
1248
	struct path upperpath = { };
M
Miklos Szeredi 已提交
1249
	struct dentry *root_dentry;
1250
	struct ovl_entry *oe;
M
Miklos Szeredi 已提交
1251
	struct ovl_fs *ofs;
1252
	struct cred *cred;
M
Miklos Szeredi 已提交
1253 1254
	int err;

E
Erez Zadok 已提交
1255
	err = -ENOMEM;
M
Miklos Szeredi 已提交
1256 1257
	ofs = kzalloc(sizeof(struct ovl_fs), GFP_KERNEL);
	if (!ofs)
M
Miklos Szeredi 已提交
1258 1259
		goto out;

M
Miklos Szeredi 已提交
1260
	ofs->creator_cred = cred = prepare_creds();
1261 1262 1263
	if (!cred)
		goto out_err;

M
Miklos Szeredi 已提交
1264
	ofs->config.index = ovl_index_def;
1265
	ofs->config.nfs_export = ovl_nfs_export_def;
M
Miklos Szeredi 已提交
1266
	err = ovl_parse_opt((char *) data, &ofs->config);
E
Erez Zadok 已提交
1267
	if (err)
1268
		goto out_err;
E
Erez Zadok 已提交
1269

M
Miklos Szeredi 已提交
1270
	err = -EINVAL;
M
Miklos Szeredi 已提交
1271
	if (!ofs->config.lowerdir) {
1272 1273
		if (!silent)
			pr_err("overlayfs: missing 'lowerdir'\n");
1274
		goto out_err;
M
Miklos Szeredi 已提交
1275 1276
	}

M
Miklos Szeredi 已提交
1277
	sb->s_stack_depth = 0;
1278
	sb->s_maxbytes = MAX_LFS_FILESIZE;
M
Miklos Szeredi 已提交
1279 1280
	if (ofs->config.upperdir) {
		if (!ofs->config.workdir) {
M
Miklos Szeredi 已提交
1281
			pr_err("overlayfs: missing 'workdir'\n");
1282
			goto out_err;
M
Miklos Szeredi 已提交
1283
		}
M
Miklos Szeredi 已提交
1284

M
Miklos Szeredi 已提交
1285
		err = ovl_get_upper(ofs, &upperpath);
M
Miklos Szeredi 已提交
1286
		if (err)
1287
			goto out_err;
1288

M
Miklos Szeredi 已提交
1289
		err = ovl_get_workdir(ofs, &upperpath);
1290
		if (err)
1291
			goto out_err;
1292

M
Miklos Szeredi 已提交
1293
		if (!ofs->workdir)
1294
			sb->s_flags |= SB_RDONLY;
1295

M
Miklos Szeredi 已提交
1296 1297
		sb->s_stack_depth = ofs->upper_mnt->mnt_sb->s_stack_depth;
		sb->s_time_gran = ofs->upper_mnt->mnt_sb->s_time_gran;
1298

M
Miklos Szeredi 已提交
1299
	}
M
Miklos Szeredi 已提交
1300
	oe = ovl_get_lowerstack(sb, ofs);
1301 1302
	err = PTR_ERR(oe);
	if (IS_ERR(oe))
1303
		goto out_err;
M
Miklos Szeredi 已提交
1304

H
hujianyang 已提交
1305
	/* If the upper fs is nonexistent, we mark overlayfs r/o too */
M
Miklos Szeredi 已提交
1306
	if (!ofs->upper_mnt)
1307
		sb->s_flags |= SB_RDONLY;
M
Miklos Szeredi 已提交
1308 1309
	else if (ofs->upper_mnt->mnt_sb != ofs->same_sb)
		ofs->same_sb = NULL;
M
Miklos Szeredi 已提交
1310

M
Miklos Szeredi 已提交
1311 1312
	if (!(ovl_force_readonly(ofs)) && ofs->config.index) {
		err = ovl_get_indexdir(ofs, oe, &upperpath);
1313
		if (err)
1314
			goto out_free_oe;
1315

1316 1317 1318 1319
		/* Force r/o mount with no index dir */
		if (!ofs->indexdir) {
			dput(ofs->workdir);
			ofs->workdir = NULL;
1320
			sb->s_flags |= SB_RDONLY;
1321 1322
		}

1323 1324
	}

1325
	/* Show index=off in /proc/mounts for forced r/o mount */
1326
	if (!ofs->indexdir) {
M
Miklos Szeredi 已提交
1327
		ofs->config.index = false;
1328 1329 1330 1331 1332
		if (ofs->upper_mnt && ofs->config.nfs_export) {
			pr_warn("overlayfs: NFS export requires an index dir, falling back to nfs_export=off.\n");
			ofs->config.nfs_export = false;
		}
	}
1333

1334 1335 1336
	/* Never override disk quota limits or use reserved space */
	cap_lower(cred->cap_effective, CAP_SYS_RESOURCE);

1337 1338 1339
	sb->s_magic = OVERLAYFS_SUPER_MAGIC;
	sb->s_op = &ovl_super_operations;
	sb->s_xattr = ovl_xattr_handlers;
M
Miklos Szeredi 已提交
1340
	sb->s_fs_info = ofs;
1341
	sb->s_flags |= SB_POSIXACL | SB_NOREMOTELOCK;
1342

1343
	err = -ENOMEM;
1344
	root_dentry = d_make_root(ovl_new_inode(sb, S_IFDIR, 0));
M
Miklos Szeredi 已提交
1345
	if (!root_dentry)
1346
		goto out_free_oe;
M
Miklos Szeredi 已提交
1347 1348

	mntput(upperpath.mnt);
1349
	if (upperpath.dentry) {
1350
		oe->has_upper = true;
M
Miklos Szeredi 已提交
1351 1352
		if (ovl_is_impuredir(upperpath.dentry))
			ovl_set_flag(OVL_IMPURE, d_inode(root_dentry));
1353
	}
M
Miklos Szeredi 已提交
1354 1355 1356

	root_dentry->d_fsdata = oe;

1357 1358
	/* Root is always merge -> can have whiteouts */
	ovl_set_flag(OVL_WHITEOUTS, d_inode(root_dentry));
1359 1360
	ovl_inode_init(d_inode(root_dentry), upperpath.dentry,
		       ovl_dentry_lower(root_dentry));
M
Miklos Szeredi 已提交
1361

M
Miklos Szeredi 已提交
1362 1363 1364 1365
	sb->s_root = root_dentry;

	return 0;

1366 1367
out_free_oe:
	ovl_entry_stack_free(oe);
1368
	kfree(oe);
1369
out_err:
M
Miklos Szeredi 已提交
1370
	path_put(&upperpath);
M
Miklos Szeredi 已提交
1371
	ovl_free_fs(ofs);
M
Miklos Szeredi 已提交
1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383
out:
	return err;
}

static struct dentry *ovl_mount(struct file_system_type *fs_type, int flags,
				const char *dev_name, void *raw_data)
{
	return mount_nodev(fs_type, flags, raw_data, ovl_fill_super);
}

static struct file_system_type ovl_fs_type = {
	.owner		= THIS_MODULE,
1384
	.name		= "overlay",
M
Miklos Szeredi 已提交
1385 1386 1387
	.mount		= ovl_mount,
	.kill_sb	= kill_anon_super,
};
1388
MODULE_ALIAS_FS("overlay");
M
Miklos Szeredi 已提交
1389

1390 1391 1392 1393 1394 1395 1396
static void ovl_inode_init_once(void *foo)
{
	struct ovl_inode *oi = foo;

	inode_init_once(&oi->vfs_inode);
}

M
Miklos Szeredi 已提交
1397 1398
static int __init ovl_init(void)
{
1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413
	int err;

	ovl_inode_cachep = kmem_cache_create("ovl_inode",
					     sizeof(struct ovl_inode), 0,
					     (SLAB_RECLAIM_ACCOUNT|
					      SLAB_MEM_SPREAD|SLAB_ACCOUNT),
					     ovl_inode_init_once);
	if (ovl_inode_cachep == NULL)
		return -ENOMEM;

	err = register_filesystem(&ovl_fs_type);
	if (err)
		kmem_cache_destroy(ovl_inode_cachep);

	return err;
M
Miklos Szeredi 已提交
1414 1415 1416 1417 1418
}

static void __exit ovl_exit(void)
{
	unregister_filesystem(&ovl_fs_type);
1419 1420 1421 1422 1423 1424 1425 1426

	/*
	 * Make sure all delayed rcu free inodes are flushed before we
	 * destroy cache.
	 */
	rcu_barrier();
	kmem_cache_destroy(ovl_inode_cachep);

M
Miklos Szeredi 已提交
1427 1428 1429 1430
}

module_init(ovl_init);
module_exit(ovl_exit);