super.c 41.2 KB
Newer Older
M
Miklos Szeredi 已提交
1 2 3 4 5 6 7 8 9
/*
 *
 * Copyright (C) 2011 Novell Inc.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 as published by
 * the Free Software Foundation.
 */

10
#include <uapi/linux/magic.h>
M
Miklos Szeredi 已提交
11 12 13 14 15 16
#include <linux/fs.h>
#include <linux/namei.h>
#include <linux/xattr.h>
#include <linux/mount.h>
#include <linux/parser.h>
#include <linux/module.h>
A
Andy Whitcroft 已提交
17
#include <linux/statfs.h>
E
Erez Zadok 已提交
18
#include <linux/seq_file.h>
M
Miklos Szeredi 已提交
19
#include <linux/posix_acl_xattr.h>
20
#include <linux/exportfs.h>
M
Miklos Szeredi 已提交
21 22 23 24 25 26 27 28 29
#include "overlayfs.h"

MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
MODULE_DESCRIPTION("Overlay filesystem");
MODULE_LICENSE("GPL");


struct ovl_dir_cache;

30 31
#define OVL_MAX_STACK 500

32 33 34 35
static bool ovl_redirect_dir_def = IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_DIR);
module_param_named(redirect_dir, ovl_redirect_dir_def, bool, 0644);
MODULE_PARM_DESC(ovl_redirect_dir_def,
		 "Default to on or off for the redirect_dir feature");
M
Miklos Szeredi 已提交
36

37 38 39 40 41 42 43
static bool ovl_redirect_always_follow =
	IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW);
module_param_named(redirect_always_follow, ovl_redirect_always_follow,
		   bool, 0644);
MODULE_PARM_DESC(ovl_redirect_always_follow,
		 "Follow redirects even if redirect_dir feature is turned off");

44 45 46 47 48
static bool ovl_index_def = IS_ENABLED(CONFIG_OVERLAY_FS_INDEX);
module_param_named(index, ovl_index_def, bool, 0644);
MODULE_PARM_DESC(ovl_index_def,
		 "Default to on or off for the inodes index feature");

49 50 51 52 53
static bool ovl_nfs_export_def = IS_ENABLED(CONFIG_OVERLAY_FS_NFS_EXPORT);
module_param_named(nfs_export, ovl_nfs_export_def, bool, 0644);
MODULE_PARM_DESC(ovl_nfs_export_def,
		 "Default to on or off for the NFS export feature");

54 55 56 57 58
static bool ovl_xino_auto_def = IS_ENABLED(CONFIG_OVERLAY_FS_XINO_AUTO);
module_param_named(xino_auto, ovl_xino_auto_def, bool, 0644);
MODULE_PARM_DESC(ovl_xino_auto_def,
		 "Auto enable xino feature");

59 60 61 62 63 64 65 66
static void ovl_entry_stack_free(struct ovl_entry *oe)
{
	unsigned int i;

	for (i = 0; i < oe->numlower; i++)
		dput(oe->lowerstack[i].dentry);
}

67 68 69 70 71
static bool ovl_metacopy_def = IS_ENABLED(CONFIG_OVERLAY_FS_METACOPY);
module_param_named(metacopy, ovl_metacopy_def, bool, 0644);
MODULE_PARM_DESC(ovl_metacopy_def,
		 "Default to on or off for the metadata only copy up feature");

M
Miklos Szeredi 已提交
72 73 74 75 76
static void ovl_dentry_release(struct dentry *dentry)
{
	struct ovl_entry *oe = dentry->d_fsdata;

	if (oe) {
77
		ovl_entry_stack_free(oe);
M
Miklos Szeredi 已提交
78 79 80 81
		kfree_rcu(oe, rcu);
	}
}

82
static struct dentry *ovl_d_real(struct dentry *dentry,
83
				 const struct inode *inode)
M
Miklos Szeredi 已提交
84 85 86
{
	struct dentry *real;

87 88 89 90
	/* It's an overlay file */
	if (inode && d_inode(dentry) == inode)
		return dentry;

91
	if (!d_is_reg(dentry)) {
M
Miklos Szeredi 已提交
92 93 94 95 96 97
		if (!inode || inode == d_inode(dentry))
			return dentry;
		goto bug;
	}

	real = ovl_dentry_upper(dentry);
98
	if (real && (inode == d_inode(real)))
M
Miklos Szeredi 已提交
99 100
		return real;

101 102 103 104
	if (real && !inode && ovl_has_upperdata(d_inode(dentry)))
		return real;

	real = ovl_dentry_lowerdata(dentry);
M
Miklos Szeredi 已提交
105 106 107
	if (!real)
		goto bug;

M
Miklos Szeredi 已提交
108
	/* Handle recursion */
109
	real = d_real(real, inode);
M
Miklos Szeredi 已提交
110

M
Miklos Szeredi 已提交
111 112 113
	if (!inode || inode == d_inode(real))
		return real;
bug:
M
Miklos Szeredi 已提交
114
	WARN(1, "ovl_d_real(%pd4, %s:%lu): real dentry not found\n", dentry,
M
Miklos Szeredi 已提交
115 116 117 118
	     inode ? inode->i_sb->s_id : "NULL", inode ? inode->i_ino : 0);
	return dentry;
}

119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159
static int ovl_dentry_revalidate(struct dentry *dentry, unsigned int flags)
{
	struct ovl_entry *oe = dentry->d_fsdata;
	unsigned int i;
	int ret = 1;

	for (i = 0; i < oe->numlower; i++) {
		struct dentry *d = oe->lowerstack[i].dentry;

		if (d->d_flags & DCACHE_OP_REVALIDATE) {
			ret = d->d_op->d_revalidate(d, flags);
			if (ret < 0)
				return ret;
			if (!ret) {
				if (!(flags & LOOKUP_RCU))
					d_invalidate(d);
				return -ESTALE;
			}
		}
	}
	return 1;
}

static int ovl_dentry_weak_revalidate(struct dentry *dentry, unsigned int flags)
{
	struct ovl_entry *oe = dentry->d_fsdata;
	unsigned int i;
	int ret = 1;

	for (i = 0; i < oe->numlower; i++) {
		struct dentry *d = oe->lowerstack[i].dentry;

		if (d->d_flags & DCACHE_OP_WEAK_REVALIDATE) {
			ret = d->d_op->d_weak_revalidate(d, flags);
			if (ret <= 0)
				break;
		}
	}
	return ret;
}

M
Miklos Szeredi 已提交
160 161
static const struct dentry_operations ovl_dentry_operations = {
	.d_release = ovl_dentry_release,
M
Miklos Szeredi 已提交
162
	.d_real = ovl_d_real,
M
Miklos Szeredi 已提交
163 164
};

165 166
static const struct dentry_operations ovl_reval_dentry_operations = {
	.d_release = ovl_dentry_release,
M
Miklos Szeredi 已提交
167
	.d_real = ovl_d_real,
168 169 170 171
	.d_revalidate = ovl_dentry_revalidate,
	.d_weak_revalidate = ovl_dentry_weak_revalidate,
};

172 173 174 175 176 177
static struct kmem_cache *ovl_inode_cachep;

static struct inode *ovl_alloc_inode(struct super_block *sb)
{
	struct ovl_inode *oi = kmem_cache_alloc(ovl_inode_cachep, GFP_KERNEL);

178 179 180
	if (!oi)
		return NULL;

181
	oi->cache = NULL;
M
Miklos Szeredi 已提交
182
	oi->redirect = NULL;
183
	oi->version = 0;
M
Miklos Szeredi 已提交
184
	oi->flags = 0;
185
	oi->__upperdentry = NULL;
186
	oi->lower = NULL;
187
	oi->lowerdata = NULL;
188
	mutex_init(&oi->lock);
189

190 191 192 193 194 195 196 197 198 199 200 201
	return &oi->vfs_inode;
}

static void ovl_i_callback(struct rcu_head *head)
{
	struct inode *inode = container_of(head, struct inode, i_rcu);

	kmem_cache_free(ovl_inode_cachep, OVL_I(inode));
}

static void ovl_destroy_inode(struct inode *inode)
{
202 203 204
	struct ovl_inode *oi = OVL_I(inode);

	dput(oi->__upperdentry);
205
	iput(oi->lower);
206 207 208 209
	if (S_ISDIR(inode->i_mode))
		ovl_dir_cache_free(inode);
	else
		iput(oi->lowerdata);
M
Miklos Szeredi 已提交
210
	kfree(oi->redirect);
211
	mutex_destroy(&oi->lock);
212

213 214 215
	call_rcu(&inode->i_rcu, ovl_i_callback);
}

M
Miklos Szeredi 已提交
216
static void ovl_free_fs(struct ovl_fs *ofs)
M
Miklos Szeredi 已提交
217
{
218
	unsigned i;
M
Miklos Szeredi 已提交
219

A
Amir Goldstein 已提交
220 221 222
	iput(ofs->indexdir_trap);
	iput(ofs->workdir_trap);
	iput(ofs->upperdir_trap);
M
Miklos Szeredi 已提交
223 224 225 226 227 228 229 230
	dput(ofs->indexdir);
	dput(ofs->workdir);
	if (ofs->workdir_locked)
		ovl_inuse_unlock(ofs->workbasedir);
	dput(ofs->workbasedir);
	if (ofs->upperdir_locked)
		ovl_inuse_unlock(ofs->upper_mnt->mnt_root);
	mntput(ofs->upper_mnt);
A
Amir Goldstein 已提交
231 232
	for (i = 0; i < ofs->numlower; i++) {
		iput(ofs->lower_layers[i].trap);
M
Miklos Szeredi 已提交
233
		mntput(ofs->lower_layers[i].mnt);
A
Amir Goldstein 已提交
234
	}
235 236
	for (i = 0; i < ofs->numlowerfs; i++)
		free_anon_bdev(ofs->lower_fs[i].pseudo_dev);
M
Miklos Szeredi 已提交
237
	kfree(ofs->lower_layers);
238
	kfree(ofs->lower_fs);
M
Miklos Szeredi 已提交
239 240 241 242

	kfree(ofs->config.lowerdir);
	kfree(ofs->config.upperdir);
	kfree(ofs->config.workdir);
243
	kfree(ofs->config.redirect_mode);
M
Miklos Szeredi 已提交
244 245 246
	if (ofs->creator_cred)
		put_cred(ofs->creator_cred);
	kfree(ofs);
M
Miklos Szeredi 已提交
247 248
}

249 250 251 252 253 254 255
static void ovl_put_super(struct super_block *sb)
{
	struct ovl_fs *ofs = sb->s_fs_info;

	ovl_free_fs(ofs);
}

256
/* Sync real dirty inodes in upper filesystem (if it exists) */
257 258
static int ovl_sync_fs(struct super_block *sb, int wait)
{
M
Miklos Szeredi 已提交
259
	struct ovl_fs *ofs = sb->s_fs_info;
260 261 262
	struct super_block *upper_sb;
	int ret;

M
Miklos Szeredi 已提交
263
	if (!ofs->upper_mnt)
264
		return 0;
265 266 267 268 269 270 271 272 273 274

	/*
	 * If this is a sync(2) call or an emergency sync, all the super blocks
	 * will be iterated, including upper_sb, so no need to do anything.
	 *
	 * If this is a syncfs(2) call, then we do need to call
	 * sync_filesystem() on upper_sb, but enough if we do it when being
	 * called with wait == 1.
	 */
	if (!wait)
275 276
		return 0;

277 278
	upper_sb = ofs->upper_mnt->mnt_sb;

279
	down_read(&upper_sb->s_umount);
280
	ret = sync_filesystem(upper_sb);
281
	up_read(&upper_sb->s_umount);
282

283 284 285
	return ret;
}

A
Andy Whitcroft 已提交
286 287 288 289 290 291
/**
 * ovl_statfs
 * @sb: The overlayfs super block
 * @buf: The struct kstatfs to fill in with stats
 *
 * Get the filesystem statistics.  As writes always target the upper layer
292
 * filesystem pass the statfs to the upper filesystem (if it exists)
A
Andy Whitcroft 已提交
293 294 295 296 297 298 299 300
 */
static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf)
{
	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
	struct dentry *root_dentry = dentry->d_sb->s_root;
	struct path path;
	int err;

301
	ovl_path_real(root_dentry, &path);
A
Andy Whitcroft 已提交
302 303 304

	err = vfs_statfs(&path, buf);
	if (!err) {
M
Miklos Szeredi 已提交
305
		buf->f_namelen = ofs->namelen;
A
Andy Whitcroft 已提交
306 307 308 309 310 311
		buf->f_type = OVERLAYFS_SUPER_MAGIC;
	}

	return err;
}

312
/* Will this overlay be forced to mount/remount ro? */
M
Miklos Szeredi 已提交
313
static bool ovl_force_readonly(struct ovl_fs *ofs)
314
{
M
Miklos Szeredi 已提交
315
	return (!ofs->upper_mnt || !ofs->workdir);
316 317
}

318 319 320 321 322
static const char *ovl_redirect_mode_def(void)
{
	return ovl_redirect_dir_def ? "on" : "off";
}

323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339
enum {
	OVL_XINO_OFF,
	OVL_XINO_AUTO,
	OVL_XINO_ON,
};

static const char * const ovl_xino_str[] = {
	"off",
	"auto",
	"on",
};

static inline int ovl_xino_def(void)
{
	return ovl_xino_auto_def ? OVL_XINO_AUTO : OVL_XINO_OFF;
}

E
Erez Zadok 已提交
340 341 342 343 344 345 346 347 348
/**
 * ovl_show_options
 *
 * Prints the mount options for a given superblock.
 * Returns zero; does not fail.
 */
static int ovl_show_options(struct seq_file *m, struct dentry *dentry)
{
	struct super_block *sb = dentry->d_sb;
M
Miklos Szeredi 已提交
349
	struct ovl_fs *ofs = sb->s_fs_info;
E
Erez Zadok 已提交
350

M
Miklos Szeredi 已提交
351 352 353 354
	seq_show_option(m, "lowerdir", ofs->config.lowerdir);
	if (ofs->config.upperdir) {
		seq_show_option(m, "upperdir", ofs->config.upperdir);
		seq_show_option(m, "workdir", ofs->config.workdir);
M
Miklos Szeredi 已提交
355
	}
M
Miklos Szeredi 已提交
356
	if (ofs->config.default_permissions)
M
Miklos Szeredi 已提交
357
		seq_puts(m, ",default_permissions");
358 359
	if (strcmp(ofs->config.redirect_mode, ovl_redirect_mode_def()) != 0)
		seq_printf(m, ",redirect_dir=%s", ofs->config.redirect_mode);
M
Miklos Szeredi 已提交
360
	if (ofs->config.index != ovl_index_def)
361
		seq_printf(m, ",index=%s", ofs->config.index ? "on" : "off");
362 363 364
	if (ofs->config.nfs_export != ovl_nfs_export_def)
		seq_printf(m, ",nfs_export=%s", ofs->config.nfs_export ?
						"on" : "off");
365 366
	if (ofs->config.xino != ovl_xino_def())
		seq_printf(m, ",xino=%s", ovl_xino_str[ofs->config.xino]);
367 368 369
	if (ofs->config.metacopy != ovl_metacopy_def)
		seq_printf(m, ",metacopy=%s",
			   ofs->config.metacopy ? "on" : "off");
E
Erez Zadok 已提交
370 371 372
	return 0;
}

373 374
static int ovl_remount(struct super_block *sb, int *flags, char *data)
{
M
Miklos Szeredi 已提交
375
	struct ovl_fs *ofs = sb->s_fs_info;
376

377
	if (!(*flags & SB_RDONLY) && ovl_force_readonly(ofs))
378 379 380 381 382
		return -EROFS;

	return 0;
}

M
Miklos Szeredi 已提交
383
static const struct super_operations ovl_super_operations = {
384 385 386
	.alloc_inode	= ovl_alloc_inode,
	.destroy_inode	= ovl_destroy_inode,
	.drop_inode	= generic_delete_inode,
M
Miklos Szeredi 已提交
387
	.put_super	= ovl_put_super,
388
	.sync_fs	= ovl_sync_fs,
A
Andy Whitcroft 已提交
389
	.statfs		= ovl_statfs,
E
Erez Zadok 已提交
390
	.show_options	= ovl_show_options,
391
	.remount_fs	= ovl_remount,
M
Miklos Szeredi 已提交
392 393 394 395 396 397
};

enum {
	OPT_LOWERDIR,
	OPT_UPPERDIR,
	OPT_WORKDIR,
M
Miklos Szeredi 已提交
398
	OPT_DEFAULT_PERMISSIONS,
399
	OPT_REDIRECT_DIR,
400 401
	OPT_INDEX_ON,
	OPT_INDEX_OFF,
402 403
	OPT_NFS_EXPORT_ON,
	OPT_NFS_EXPORT_OFF,
404 405 406
	OPT_XINO_ON,
	OPT_XINO_OFF,
	OPT_XINO_AUTO,
407 408
	OPT_METACOPY_ON,
	OPT_METACOPY_OFF,
M
Miklos Szeredi 已提交
409 410 411 412 413 414 415
	OPT_ERR,
};

static const match_table_t ovl_tokens = {
	{OPT_LOWERDIR,			"lowerdir=%s"},
	{OPT_UPPERDIR,			"upperdir=%s"},
	{OPT_WORKDIR,			"workdir=%s"},
M
Miklos Szeredi 已提交
416
	{OPT_DEFAULT_PERMISSIONS,	"default_permissions"},
417
	{OPT_REDIRECT_DIR,		"redirect_dir=%s"},
418 419
	{OPT_INDEX_ON,			"index=on"},
	{OPT_INDEX_OFF,			"index=off"},
420 421
	{OPT_NFS_EXPORT_ON,		"nfs_export=on"},
	{OPT_NFS_EXPORT_OFF,		"nfs_export=off"},
422 423 424
	{OPT_XINO_ON,			"xino=on"},
	{OPT_XINO_OFF,			"xino=off"},
	{OPT_XINO_AUTO,			"xino=auto"},
425 426
	{OPT_METACOPY_ON,		"metacopy=on"},
	{OPT_METACOPY_OFF,		"metacopy=off"},
M
Miklos Szeredi 已提交
427 428 429
	{OPT_ERR,			NULL}
};

M
Miklos Szeredi 已提交
430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452
static char *ovl_next_opt(char **s)
{
	char *sbegin = *s;
	char *p;

	if (sbegin == NULL)
		return NULL;

	for (p = sbegin; *p; p++) {
		if (*p == '\\') {
			p++;
			if (!*p)
				break;
		} else if (*p == ',') {
			*p = '\0';
			*s = p + 1;
			return sbegin;
		}
	}
	*s = NULL;
	return sbegin;
}

453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475
static int ovl_parse_redirect_mode(struct ovl_config *config, const char *mode)
{
	if (strcmp(mode, "on") == 0) {
		config->redirect_dir = true;
		/*
		 * Does not make sense to have redirect creation without
		 * redirect following.
		 */
		config->redirect_follow = true;
	} else if (strcmp(mode, "follow") == 0) {
		config->redirect_follow = true;
	} else if (strcmp(mode, "off") == 0) {
		if (ovl_redirect_always_follow)
			config->redirect_follow = true;
	} else if (strcmp(mode, "nofollow") != 0) {
		pr_err("overlayfs: bad mount option \"redirect_dir=%s\"\n",
		       mode);
		return -EINVAL;
	}

	return 0;
}

M
Miklos Szeredi 已提交
476 477 478
static int ovl_parse_opt(char *opt, struct ovl_config *config)
{
	char *p;
479
	int err;
480
	bool metacopy_opt = false, redirect_opt = false;
M
Miklos Szeredi 已提交
481

482 483 484 485
	config->redirect_mode = kstrdup(ovl_redirect_mode_def(), GFP_KERNEL);
	if (!config->redirect_mode)
		return -ENOMEM;

M
Miklos Szeredi 已提交
486
	while ((p = ovl_next_opt(&opt)) != NULL) {
M
Miklos Szeredi 已提交
487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515
		int token;
		substring_t args[MAX_OPT_ARGS];

		if (!*p)
			continue;

		token = match_token(p, ovl_tokens, args);
		switch (token) {
		case OPT_UPPERDIR:
			kfree(config->upperdir);
			config->upperdir = match_strdup(&args[0]);
			if (!config->upperdir)
				return -ENOMEM;
			break;

		case OPT_LOWERDIR:
			kfree(config->lowerdir);
			config->lowerdir = match_strdup(&args[0]);
			if (!config->lowerdir)
				return -ENOMEM;
			break;

		case OPT_WORKDIR:
			kfree(config->workdir);
			config->workdir = match_strdup(&args[0]);
			if (!config->workdir)
				return -ENOMEM;
			break;

M
Miklos Szeredi 已提交
516 517 518 519
		case OPT_DEFAULT_PERMISSIONS:
			config->default_permissions = true;
			break;

520 521 522 523 524
		case OPT_REDIRECT_DIR:
			kfree(config->redirect_mode);
			config->redirect_mode = match_strdup(&args[0]);
			if (!config->redirect_mode)
				return -ENOMEM;
525
			redirect_opt = true;
M
Miklos Szeredi 已提交
526 527
			break;

528 529 530 531 532 533 534 535
		case OPT_INDEX_ON:
			config->index = true;
			break;

		case OPT_INDEX_OFF:
			config->index = false;
			break;

536 537 538 539 540 541 542 543
		case OPT_NFS_EXPORT_ON:
			config->nfs_export = true;
			break;

		case OPT_NFS_EXPORT_OFF:
			config->nfs_export = false;
			break;

544 545 546 547 548 549 550 551 552 553 554 555
		case OPT_XINO_ON:
			config->xino = OVL_XINO_ON;
			break;

		case OPT_XINO_OFF:
			config->xino = OVL_XINO_OFF;
			break;

		case OPT_XINO_AUTO:
			config->xino = OVL_XINO_AUTO;
			break;

556 557
		case OPT_METACOPY_ON:
			config->metacopy = true;
558
			metacopy_opt = true;
559 560 561 562 563 564
			break;

		case OPT_METACOPY_OFF:
			config->metacopy = false;
			break;

M
Miklos Szeredi 已提交
565
		default:
566
			pr_err("overlayfs: unrecognized mount option \"%s\" or missing value\n", p);
M
Miklos Szeredi 已提交
567 568 569
			return -EINVAL;
		}
	}
H
hujianyang 已提交
570 571 572 573 574 575 576 577 578

	/* Workdir is useless in non-upper mount */
	if (!config->upperdir && config->workdir) {
		pr_info("overlayfs: option \"workdir=%s\" is useless in a non-upper mount, ignore\n",
			config->workdir);
		kfree(config->workdir);
		config->workdir = NULL;
	}

579 580 581 582
	err = ovl_parse_redirect_mode(config, config->redirect_mode);
	if (err)
		return err;

583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608
	/*
	 * This is to make the logic below simpler.  It doesn't make any other
	 * difference, since config->redirect_dir is only used for upper.
	 */
	if (!config->upperdir && config->redirect_follow)
		config->redirect_dir = true;

	/* Resolve metacopy -> redirect_dir dependency */
	if (config->metacopy && !config->redirect_dir) {
		if (metacopy_opt && redirect_opt) {
			pr_err("overlayfs: conflicting options: metacopy=on,redirect_dir=%s\n",
			       config->redirect_mode);
			return -EINVAL;
		}
		if (redirect_opt) {
			/*
			 * There was an explicit redirect_dir=... that resulted
			 * in this conflict.
			 */
			pr_info("overlayfs: disabling metacopy due to redirect_dir=%s\n",
				config->redirect_mode);
			config->metacopy = false;
		} else {
			/* Automatically enable redirect otherwise. */
			config->redirect_follow = config->redirect_dir = true;
		}
609 610 611
	}

	return 0;
M
Miklos Szeredi 已提交
612 613 614
}

#define OVL_WORKDIR_NAME "work"
615
#define OVL_INDEXDIR_NAME "index"
M
Miklos Szeredi 已提交
616

M
Miklos Szeredi 已提交
617
static struct dentry *ovl_workdir_create(struct ovl_fs *ofs,
618
					 const char *name, bool persist)
M
Miklos Szeredi 已提交
619
{
M
Miklos Szeredi 已提交
620 621
	struct inode *dir =  ofs->workbasedir->d_inode;
	struct vfsmount *mnt = ofs->upper_mnt;
M
Miklos Szeredi 已提交
622 623 624
	struct dentry *work;
	int err;
	bool retried = false;
625
	bool locked = false;
M
Miklos Szeredi 已提交
626

A
Al Viro 已提交
627
	inode_lock_nested(dir, I_MUTEX_PARENT);
628 629
	locked = true;

M
Miklos Szeredi 已提交
630
retry:
M
Miklos Szeredi 已提交
631
	work = lookup_one_len(name, ofs->workbasedir, strlen(name));
M
Miklos Szeredi 已提交
632 633

	if (!IS_ERR(work)) {
634 635
		struct iattr attr = {
			.ia_valid = ATTR_MODE,
A
Al Viro 已提交
636
			.ia_mode = S_IFDIR | 0,
637
		};
M
Miklos Szeredi 已提交
638 639 640 641 642 643

		if (work->d_inode) {
			err = -EEXIST;
			if (retried)
				goto out_dput;

644 645 646
			if (persist)
				goto out_unlock;

M
Miklos Szeredi 已提交
647
			retried = true;
M
Miklos Szeredi 已提交
648
			ovl_workdir_cleanup(dir, mnt, work, 0);
M
Miklos Szeredi 已提交
649 650 651 652
			dput(work);
			goto retry;
		}

653 654 655 656
		work = ovl_create_real(dir, work, OVL_CATTR(attr.ia_mode));
		err = PTR_ERR(work);
		if (IS_ERR(work))
			goto out_err;
657

658 659 660 661 662 663 664 665 666 667 668 669 670
		/*
		 * Try to remove POSIX ACL xattrs from workdir.  We are good if:
		 *
		 * a) success (there was a POSIX ACL xattr and was removed)
		 * b) -ENODATA (there was no POSIX ACL xattr)
		 * c) -EOPNOTSUPP (POSIX ACL xattrs are not supported)
		 *
		 * There are various other error values that could effectively
		 * mean that the xattr doesn't exist (e.g. -ERANGE is returned
		 * if the xattr name is too long), but the set of filesystems
		 * allowed as upper are limited to "normal" ones, where checking
		 * for the above two errors is sufficient.
		 */
671
		err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_DEFAULT);
M
Miklos Szeredi 已提交
672
		if (err && err != -ENODATA && err != -EOPNOTSUPP)
673 674 675
			goto out_dput;

		err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_ACCESS);
M
Miklos Szeredi 已提交
676
		if (err && err != -ENODATA && err != -EOPNOTSUPP)
677 678 679 680 681 682 683 684
			goto out_dput;

		/* Clear any inherited mode bits */
		inode_lock(work->d_inode);
		err = notify_change(work, &attr, NULL);
		inode_unlock(work->d_inode);
		if (err)
			goto out_dput;
685 686 687
	} else {
		err = PTR_ERR(work);
		goto out_err;
M
Miklos Szeredi 已提交
688 689
	}
out_unlock:
690 691
	if (locked)
		inode_unlock(dir);
M
Miklos Szeredi 已提交
692 693 694 695 696

	return work;

out_dput:
	dput(work);
697 698
out_err:
	pr_warn("overlayfs: failed to create directory %s/%s (errno: %i); mounting read-only\n",
M
Miklos Szeredi 已提交
699
		ofs->config.workdir, name, -err);
700
	work = NULL;
M
Miklos Szeredi 已提交
701 702 703
	goto out_unlock;
}

M
Miklos Szeredi 已提交
704 705 706 707 708 709 710 711 712 713 714 715 716
static void ovl_unescape(char *s)
{
	char *d = s;

	for (;; s++, d++) {
		if (*s == '\\')
			s++;
		*d = *s;
		if (!*s)
			break;
	}
}

M
Miklos Szeredi 已提交
717 718
static int ovl_mount_dir_noesc(const char *name, struct path *path)
{
719
	int err = -EINVAL;
M
Miklos Szeredi 已提交
720

721 722 723 724
	if (!*name) {
		pr_err("overlayfs: empty lowerdir\n");
		goto out;
	}
M
Miklos Szeredi 已提交
725 726 727 728 729 730
	err = kern_path(name, LOOKUP_FOLLOW, path);
	if (err) {
		pr_err("overlayfs: failed to resolve '%s': %i\n", name, err);
		goto out;
	}
	err = -EINVAL;
731
	if (ovl_dentry_weird(path->dentry)) {
M
Miklos Szeredi 已提交
732 733 734
		pr_err("overlayfs: filesystem on '%s' not supported\n", name);
		goto out_put;
	}
M
Miklos Szeredi 已提交
735
	if (!d_is_dir(path->dentry)) {
M
Miklos Szeredi 已提交
736 737 738 739 740 741
		pr_err("overlayfs: '%s' not a directory\n", name);
		goto out_put;
	}
	return 0;

out_put:
742
	path_put_init(path);
M
Miklos Szeredi 已提交
743 744 745 746 747 748 749 750 751 752 753 754
out:
	return err;
}

static int ovl_mount_dir(const char *name, struct path *path)
{
	int err = -ENOMEM;
	char *tmp = kstrdup(name, GFP_KERNEL);

	if (tmp) {
		ovl_unescape(tmp);
		err = ovl_mount_dir_noesc(tmp, path);
755 756 757 758 759

		if (!err)
			if (ovl_dentry_remote(path->dentry)) {
				pr_err("overlayfs: filesystem on '%s' not supported as upperdir\n",
				       tmp);
760
				path_put_init(path);
761 762
				err = -EINVAL;
			}
M
Miklos Szeredi 已提交
763 764 765 766 767
		kfree(tmp);
	}
	return err;
}

M
Miklos Szeredi 已提交
768 769
static int ovl_check_namelen(struct path *path, struct ovl_fs *ofs,
			     const char *name)
M
Miklos Szeredi 已提交
770 771
{
	struct kstatfs statfs;
M
Miklos Szeredi 已提交
772 773 774 775 776 777 778 779 780 781 782 783 784
	int err = vfs_statfs(path, &statfs);

	if (err)
		pr_err("overlayfs: statfs failed on '%s'\n", name);
	else
		ofs->namelen = max(ofs->namelen, statfs.f_namelen);

	return err;
}

static int ovl_lower_dir(const char *name, struct path *path,
			 struct ovl_fs *ofs, int *stack_depth, bool *remote)
{
785
	int fh_type;
M
Miklos Szeredi 已提交
786
	int err;
M
Miklos Szeredi 已提交
787

788
	err = ovl_mount_dir_noesc(name, path);
M
Miklos Szeredi 已提交
789 790 791
	if (err)
		goto out;

M
Miklos Szeredi 已提交
792 793
	err = ovl_check_namelen(path, ofs, name);
	if (err)
M
Miklos Szeredi 已提交
794
		goto out_put;
M
Miklos Szeredi 已提交
795

M
Miklos Szeredi 已提交
796 797
	*stack_depth = max(*stack_depth, path->mnt->mnt_sb->s_stack_depth);

798 799 800
	if (ovl_dentry_remote(path->dentry))
		*remote = true;

801
	/*
802 803
	 * The inodes index feature and NFS export need to encode and decode
	 * file handles, so they require that all layers support them.
804
	 */
805
	fh_type = ovl_can_decode_fh(path->dentry->d_sb);
806
	if ((ofs->config.nfs_export ||
807
	     (ofs->config.index && ofs->config.upperdir)) && !fh_type) {
808
		ofs->config.index = false;
809 810 811
		ofs->config.nfs_export = false;
		pr_warn("overlayfs: fs on '%s' does not support file handles, falling back to index=off,nfs_export=off.\n",
			name);
812 813
	}

814 815 816 817
	/* Check if lower fs has 32bit inode numbers */
	if (fh_type != FILEID_INO32_GEN)
		ofs->xino_bits = 0;

M
Miklos Szeredi 已提交
818 819 820
	return 0;

out_put:
821
	path_put_init(path);
M
Miklos Szeredi 已提交
822 823 824 825
out:
	return err;
}

M
Miklos Szeredi 已提交
826 827 828 829 830 831 832 833 834 835 836 837
/* Workdir should not be subdir of upperdir and vice versa */
static bool ovl_workdir_ok(struct dentry *workdir, struct dentry *upperdir)
{
	bool ok = false;

	if (workdir != upperdir) {
		ok = (lock_rename(workdir, upperdir) == NULL);
		unlock_rename(workdir, upperdir);
	}
	return ok;
}

838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857
static unsigned int ovl_split_lowerdirs(char *str)
{
	unsigned int ctr = 1;
	char *s, *d;

	for (s = d = str;; s++, d++) {
		if (*s == '\\') {
			s++;
		} else if (*s == ':') {
			*d = '\0';
			ctr++;
			continue;
		}
		*d = *s;
		if (!*s)
			break;
	}
	return ctr;
}

858 859 860 861 862
static int __maybe_unused
ovl_posix_acl_xattr_get(const struct xattr_handler *handler,
			struct dentry *dentry, struct inode *inode,
			const char *name, void *buffer, size_t size)
{
863
	return ovl_xattr_get(dentry, inode, handler->name, buffer, size);
864 865
}

866 867 868 869 870
static int __maybe_unused
ovl_posix_acl_xattr_set(const struct xattr_handler *handler,
			struct dentry *dentry, struct inode *inode,
			const char *name, const void *value,
			size_t size, int flags)
M
Miklos Szeredi 已提交
871 872
{
	struct dentry *workdir = ovl_workdir(dentry);
873
	struct inode *realinode = ovl_inode_real(inode);
M
Miklos Szeredi 已提交
874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897
	struct posix_acl *acl = NULL;
	int err;

	/* Check that everything is OK before copy-up */
	if (value) {
		acl = posix_acl_from_xattr(&init_user_ns, value, size);
		if (IS_ERR(acl))
			return PTR_ERR(acl);
	}
	err = -EOPNOTSUPP;
	if (!IS_POSIXACL(d_inode(workdir)))
		goto out_acl_release;
	if (!realinode->i_op->set_acl)
		goto out_acl_release;
	if (handler->flags == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) {
		err = acl ? -EACCES : 0;
		goto out_acl_release;
	}
	err = -EPERM;
	if (!inode_owner_or_capable(inode))
		goto out_acl_release;

	posix_acl_release(acl);

898 899 900 901 902 903 904 905 906 907 908 909 910 911 912
	/*
	 * Check if sgid bit needs to be cleared (actual setacl operation will
	 * be done with mounter's capabilities and so that won't do it for us).
	 */
	if (unlikely(inode->i_mode & S_ISGID) &&
	    handler->flags == ACL_TYPE_ACCESS &&
	    !in_group_p(inode->i_gid) &&
	    !capable_wrt_inode_uidgid(inode, CAP_FSETID)) {
		struct iattr iattr = { .ia_valid = ATTR_KILL_SGID };

		err = ovl_setattr(dentry, &iattr);
		if (err)
			return err;
	}

913
	err = ovl_xattr_set(dentry, inode, handler->name, value, size, flags);
914
	if (!err)
915
		ovl_copyattr(ovl_inode_real(inode), inode);
916 917

	return err;
M
Miklos Szeredi 已提交
918 919 920 921 922 923

out_acl_release:
	posix_acl_release(acl);
	return err;
}

924 925 926 927
static int ovl_own_xattr_get(const struct xattr_handler *handler,
			     struct dentry *dentry, struct inode *inode,
			     const char *name, void *buffer, size_t size)
{
A
Amir Goldstein 已提交
928
	return -EOPNOTSUPP;
929 930
}

M
Miklos Szeredi 已提交
931 932 933 934 935
static int ovl_own_xattr_set(const struct xattr_handler *handler,
			     struct dentry *dentry, struct inode *inode,
			     const char *name, const void *value,
			     size_t size, int flags)
{
A
Amir Goldstein 已提交
936
	return -EOPNOTSUPP;
M
Miklos Szeredi 已提交
937 938
}

939 940 941 942
static int ovl_other_xattr_get(const struct xattr_handler *handler,
			       struct dentry *dentry, struct inode *inode,
			       const char *name, void *buffer, size_t size)
{
943
	return ovl_xattr_get(dentry, inode, name, buffer, size);
944 945
}

946 947 948 949 950
static int ovl_other_xattr_set(const struct xattr_handler *handler,
			       struct dentry *dentry, struct inode *inode,
			       const char *name, const void *value,
			       size_t size, int flags)
{
951
	return ovl_xattr_set(dentry, inode, name, value, size, flags);
952 953
}

954 955
static const struct xattr_handler __maybe_unused
ovl_posix_acl_access_xattr_handler = {
M
Miklos Szeredi 已提交
956 957
	.name = XATTR_NAME_POSIX_ACL_ACCESS,
	.flags = ACL_TYPE_ACCESS,
958
	.get = ovl_posix_acl_xattr_get,
M
Miklos Szeredi 已提交
959 960 961
	.set = ovl_posix_acl_xattr_set,
};

962 963
static const struct xattr_handler __maybe_unused
ovl_posix_acl_default_xattr_handler = {
M
Miklos Szeredi 已提交
964 965
	.name = XATTR_NAME_POSIX_ACL_DEFAULT,
	.flags = ACL_TYPE_DEFAULT,
966
	.get = ovl_posix_acl_xattr_get,
M
Miklos Szeredi 已提交
967 968 969 970 971
	.set = ovl_posix_acl_xattr_set,
};

static const struct xattr_handler ovl_own_xattr_handler = {
	.prefix	= OVL_XATTR_PREFIX,
972
	.get = ovl_own_xattr_get,
M
Miklos Szeredi 已提交
973 974 975 976 977
	.set = ovl_own_xattr_set,
};

static const struct xattr_handler ovl_other_xattr_handler = {
	.prefix	= "", /* catch all */
978
	.get = ovl_other_xattr_get,
M
Miklos Szeredi 已提交
979 980 981 982
	.set = ovl_other_xattr_set,
};

static const struct xattr_handler *ovl_xattr_handlers[] = {
983
#ifdef CONFIG_FS_POSIX_ACL
M
Miklos Szeredi 已提交
984 985
	&ovl_posix_acl_access_xattr_handler,
	&ovl_posix_acl_default_xattr_handler,
986
#endif
M
Miklos Szeredi 已提交
987 988 989 990 991
	&ovl_own_xattr_handler,
	&ovl_other_xattr_handler,
	NULL
};

A
Amir Goldstein 已提交
992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011
static int ovl_setup_trap(struct super_block *sb, struct dentry *dir,
			  struct inode **ptrap, const char *name)
{
	struct inode *trap;
	int err;

	trap = ovl_get_trap_inode(sb, dir);
	err = PTR_ERR(trap);
	if (IS_ERR(trap)) {
		if (err == -ELOOP)
			pr_err("overlayfs: conflicting %s path\n", name);
		return err;
	}

	*ptrap = trap;
	return 0;
}

static int ovl_get_upper(struct super_block *sb, struct ovl_fs *ofs,
			 struct path *upperpath)
1012
{
M
Miklos Szeredi 已提交
1013
	struct vfsmount *upper_mnt;
1014 1015
	int err;

M
Miklos Szeredi 已提交
1016
	err = ovl_mount_dir(ofs->config.upperdir, upperpath);
1017 1018 1019 1020 1021 1022 1023 1024 1025 1026
	if (err)
		goto out;

	/* Upper fs should not be r/o */
	if (sb_rdonly(upperpath->mnt->mnt_sb)) {
		pr_err("overlayfs: upper fs is r/o, try multi-lower layers mount\n");
		err = -EINVAL;
		goto out;
	}

M
Miklos Szeredi 已提交
1027
	err = ovl_check_namelen(upperpath, ofs, ofs->config.upperdir);
1028 1029 1030
	if (err)
		goto out;

A
Amir Goldstein 已提交
1031 1032 1033 1034 1035
	err = ovl_setup_trap(sb, upperpath->dentry, &ofs->upperdir_trap,
			     "upperdir");
	if (err)
		goto out;

M
Miklos Szeredi 已提交
1036 1037 1038 1039 1040 1041 1042 1043 1044
	upper_mnt = clone_private_mount(upperpath);
	err = PTR_ERR(upper_mnt);
	if (IS_ERR(upper_mnt)) {
		pr_err("overlayfs: failed to clone upperpath\n");
		goto out;
	}

	/* Don't inherit atime flags */
	upper_mnt->mnt_flags &= ~(MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME);
M
Miklos Szeredi 已提交
1045
	ofs->upper_mnt = upper_mnt;
1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056

	err = -EBUSY;
	if (ovl_inuse_trylock(ofs->upper_mnt->mnt_root)) {
		ofs->upperdir_locked = true;
	} else if (ofs->config.index) {
		pr_err("overlayfs: upperdir is in-use by another mount, mount with '-o index=off' to override exclusive upperdir protection.\n");
		goto out;
	} else {
		pr_warn("overlayfs: upperdir is in-use by another mount, accessing files from both mounts will result in undefined behavior.\n");
	}

1057 1058 1059 1060 1061
	err = 0;
out:
	return err;
}

A
Amir Goldstein 已提交
1062 1063
static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs,
			    struct path *workpath)
1064
{
1065
	struct vfsmount *mnt = ofs->upper_mnt;
1066
	struct dentry *temp;
1067
	int fh_type;
1068 1069
	int err;

1070 1071 1072 1073
	err = mnt_want_write(mnt);
	if (err)
		return err;

M
Miklos Szeredi 已提交
1074 1075
	ofs->workdir = ovl_workdir_create(ofs, OVL_WORKDIR_NAME, false);
	if (!ofs->workdir)
1076
		goto out;
1077

A
Amir Goldstein 已提交
1078 1079 1080 1081
	err = ovl_setup_trap(sb, ofs->workdir, &ofs->workdir_trap, "workdir");
	if (err)
		goto out;

1082 1083 1084 1085 1086 1087 1088 1089
	/*
	 * Upper should support d_type, else whiteouts are visible.  Given
	 * workdir and upper are on same fs, we can do iterate_dir() on
	 * workdir. This check requires successful creation of workdir in
	 * previous step.
	 */
	err = ovl_check_d_type_supported(workpath);
	if (err < 0)
1090
		goto out;
1091 1092 1093 1094 1095 1096 1097 1098 1099

	/*
	 * We allowed this configuration and don't want to break users over
	 * kernel upgrade. So warn instead of erroring out.
	 */
	if (!err)
		pr_warn("overlayfs: upper fs needs to support d_type.\n");

	/* Check if upper/work fs supports O_TMPFILE */
M
Miklos Szeredi 已提交
1100 1101 1102
	temp = ovl_do_tmpfile(ofs->workdir, S_IFREG | 0);
	ofs->tmpfile = !IS_ERR(temp);
	if (ofs->tmpfile)
1103 1104 1105 1106 1107 1108 1109
		dput(temp);
	else
		pr_warn("overlayfs: upper fs does not support tmpfile.\n");

	/*
	 * Check if upper/work fs supports trusted.overlay.* xattr
	 */
M
Miklos Szeredi 已提交
1110
	err = ovl_do_setxattr(ofs->workdir, OVL_XATTR_OPAQUE, "0", 1, 0);
1111
	if (err) {
M
Miklos Szeredi 已提交
1112
		ofs->noxattr = true;
1113
		ofs->config.index = false;
1114 1115
		ofs->config.metacopy = false;
		pr_warn("overlayfs: upper fs does not support xattr, falling back to index=off and metacopy=off.\n");
1116
		err = 0;
1117
	} else {
M
Miklos Szeredi 已提交
1118
		vfs_removexattr(ofs->workdir, OVL_XATTR_OPAQUE);
1119 1120 1121
	}

	/* Check if upper/work fs supports file handles */
1122 1123
	fh_type = ovl_can_decode_fh(ofs->workdir->d_sb);
	if (ofs->config.index && !fh_type) {
M
Miklos Szeredi 已提交
1124
		ofs->config.index = false;
1125 1126 1127
		pr_warn("overlayfs: upper fs does not support file handles, falling back to index=off.\n");
	}

1128 1129 1130 1131
	/* Check if upper fs has 32bit inode numbers */
	if (fh_type != FILEID_INO32_GEN)
		ofs->xino_bits = 0;

1132 1133 1134 1135 1136
	/* NFS export of r/w mount depends on index */
	if (ofs->config.nfs_export && !ofs->config.index) {
		pr_warn("overlayfs: NFS export requires \"index=on\", falling back to nfs_export=off.\n");
		ofs->config.nfs_export = false;
	}
1137 1138 1139
out:
	mnt_drop_write(mnt);
	return err;
1140 1141
}

A
Amir Goldstein 已提交
1142 1143
static int ovl_get_workdir(struct super_block *sb, struct ovl_fs *ofs,
			   struct path *upperpath)
1144 1145
{
	int err;
M
Miklos Szeredi 已提交
1146
	struct path workpath = { };
1147

M
Miklos Szeredi 已提交
1148
	err = ovl_mount_dir(ofs->config.workdir, &workpath);
1149 1150 1151 1152
	if (err)
		goto out;

	err = -EINVAL;
M
Miklos Szeredi 已提交
1153
	if (upperpath->mnt != workpath.mnt) {
1154 1155 1156
		pr_err("overlayfs: workdir and upperdir must reside under the same mount\n");
		goto out;
	}
M
Miklos Szeredi 已提交
1157
	if (!ovl_workdir_ok(workpath.dentry, upperpath->dentry)) {
1158 1159 1160 1161
		pr_err("overlayfs: workdir and upperdir must be separate subtrees\n");
		goto out;
	}

1162 1163
	ofs->workbasedir = dget(workpath.dentry);

1164
	err = -EBUSY;
1165
	if (ovl_inuse_trylock(ofs->workbasedir)) {
M
Miklos Szeredi 已提交
1166 1167
		ofs->workdir_locked = true;
	} else if (ofs->config.index) {
1168 1169 1170 1171 1172 1173
		pr_err("overlayfs: workdir is in-use by another mount, mount with '-o index=off' to override exclusive workdir protection.\n");
		goto out;
	} else {
		pr_warn("overlayfs: workdir is in-use by another mount, accessing files from both mounts will result in undefined behavior.\n");
	}

A
Amir Goldstein 已提交
1174
	err = ovl_make_workdir(sb, ofs, &workpath);
M
Miklos Szeredi 已提交
1175

1176
out:
M
Miklos Szeredi 已提交
1177 1178
	path_put(&workpath);

1179 1180 1181
	return err;
}

A
Amir Goldstein 已提交
1182 1183
static int ovl_get_indexdir(struct super_block *sb, struct ovl_fs *ofs,
			    struct ovl_entry *oe, struct path *upperpath)
1184
{
1185
	struct vfsmount *mnt = ofs->upper_mnt;
1186 1187
	int err;

1188 1189 1190 1191
	err = mnt_want_write(mnt);
	if (err)
		return err;

1192
	/* Verify lower root is upper root origin */
1193
	err = ovl_verify_origin(upperpath->dentry, oe->lowerstack[0].dentry,
1194
				true);
1195 1196 1197 1198 1199
	if (err) {
		pr_err("overlayfs: failed to verify upper root origin\n");
		goto out;
	}

M
Miklos Szeredi 已提交
1200 1201
	ofs->indexdir = ovl_workdir_create(ofs, OVL_INDEXDIR_NAME, true);
	if (ofs->indexdir) {
A
Amir Goldstein 已提交
1202 1203 1204 1205 1206
		err = ovl_setup_trap(sb, ofs->indexdir, &ofs->indexdir_trap,
				     "indexdir");
		if (err)
			goto out;

1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221
		/*
		 * Verify upper root is exclusively associated with index dir.
		 * Older kernels stored upper fh in "trusted.overlay.origin"
		 * xattr. If that xattr exists, verify that it is a match to
		 * upper dir file handle. In any case, verify or set xattr
		 * "trusted.overlay.upper" to indicate that index may have
		 * directory entries.
		 */
		if (ovl_check_origin_xattr(ofs->indexdir)) {
			err = ovl_verify_set_fh(ofs->indexdir, OVL_XATTR_ORIGIN,
						upperpath->dentry, true, false);
			if (err)
				pr_err("overlayfs: failed to verify index dir 'origin' xattr\n");
		}
		err = ovl_verify_upper(ofs->indexdir, upperpath->dentry, true);
1222
		if (err)
1223
			pr_err("overlayfs: failed to verify index dir 'upper' xattr\n");
1224 1225 1226

		/* Cleanup bad/stale/orphan index entries */
		if (!err)
1227
			err = ovl_indexdir_cleanup(ofs);
1228
	}
M
Miklos Szeredi 已提交
1229
	if (err || !ofs->indexdir)
1230 1231 1232
		pr_warn("overlayfs: try deleting index dir or mounting with '-o index=off' to disable inodes index.\n");

out:
1233
	mnt_drop_write(mnt);
1234 1235 1236
	return err;
}

1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265
/* Get a unique fsid for the layer */
static int ovl_get_fsid(struct ovl_fs *ofs, struct super_block *sb)
{
	unsigned int i;
	dev_t dev;
	int err;

	/* fsid 0 is reserved for upper fs even with non upper overlay */
	if (ofs->upper_mnt && ofs->upper_mnt->mnt_sb == sb)
		return 0;

	for (i = 0; i < ofs->numlowerfs; i++) {
		if (ofs->lower_fs[i].sb == sb)
			return i + 1;
	}

	err = get_anon_bdev(&dev);
	if (err) {
		pr_err("overlayfs: failed to get anonymous bdev for lowerpath\n");
		return err;
	}

	ofs->lower_fs[ofs->numlowerfs].sb = sb;
	ofs->lower_fs[ofs->numlowerfs].pseudo_dev = dev;
	ofs->numlowerfs++;

	return ofs->numlowerfs;
}

A
Amir Goldstein 已提交
1266 1267
static int ovl_get_lower_layers(struct super_block *sb, struct ovl_fs *ofs,
				struct path *stack, unsigned int numlower)
1268 1269 1270 1271 1272
{
	int err;
	unsigned int i;

	err = -ENOMEM;
M
Miklos Szeredi 已提交
1273
	ofs->lower_layers = kcalloc(numlower, sizeof(struct ovl_layer),
1274
				    GFP_KERNEL);
M
Miklos Szeredi 已提交
1275
	if (ofs->lower_layers == NULL)
1276
		goto out;
1277 1278 1279 1280 1281 1282

	ofs->lower_fs = kcalloc(numlower, sizeof(struct ovl_sb),
				GFP_KERNEL);
	if (ofs->lower_fs == NULL)
		goto out;

1283 1284
	for (i = 0; i < numlower; i++) {
		struct vfsmount *mnt;
A
Amir Goldstein 已提交
1285
		struct inode *trap;
1286
		int fsid;
1287

1288 1289
		err = fsid = ovl_get_fsid(ofs, stack[i].mnt->mnt_sb);
		if (err < 0)
1290 1291
			goto out;

A
Amir Goldstein 已提交
1292 1293 1294 1295 1296 1297 1298 1299 1300 1301
		err = -EBUSY;
		if (ovl_is_inuse(stack[i].dentry)) {
			pr_err("overlayfs: lowerdir is in-use as upperdir/workdir\n");
			goto out;
		}

		err = ovl_setup_trap(sb, stack[i].dentry, &trap, "lowerdir");
		if (err)
			goto out;

1302 1303 1304 1305
		mnt = clone_private_mount(&stack[i]);
		err = PTR_ERR(mnt);
		if (IS_ERR(mnt)) {
			pr_err("overlayfs: failed to clone lowerpath\n");
A
Amir Goldstein 已提交
1306
			iput(trap);
1307 1308
			goto out;
		}
1309

1310 1311 1312 1313 1314 1315
		/*
		 * Make lower layers R/O.  That way fchmod/fchown on lower file
		 * will fail instead of modifying lower fs.
		 */
		mnt->mnt_flags |= MNT_READONLY | MNT_NOATIME;

A
Amir Goldstein 已提交
1316
		ofs->lower_layers[ofs->numlower].trap = trap;
M
Miklos Szeredi 已提交
1317
		ofs->lower_layers[ofs->numlower].mnt = mnt;
1318
		ofs->lower_layers[ofs->numlower].idx = i + 1;
1319 1320 1321 1322 1323
		ofs->lower_layers[ofs->numlower].fsid = fsid;
		if (fsid) {
			ofs->lower_layers[ofs->numlower].fs =
				&ofs->lower_fs[fsid - 1];
		}
M
Miklos Szeredi 已提交
1324
		ofs->numlower++;
1325
	}
1326

1327 1328 1329 1330 1331 1332 1333 1334 1335
	/*
	 * When all layers on same fs, overlay can use real inode numbers.
	 * With mount option "xino=on", mounter declares that there are enough
	 * free high bits in underlying fs to hold the unique fsid.
	 * If overlayfs does encounter underlying inodes using the high xino
	 * bits reserved for fsid, it emits a warning and uses the original
	 * inode number.
	 */
	if (!ofs->numlowerfs || (ofs->numlowerfs == 1 && !ofs->upper_mnt)) {
1336
		ofs->xino_bits = 0;
1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351
		ofs->config.xino = OVL_XINO_OFF;
	} else if (ofs->config.xino == OVL_XINO_ON && !ofs->xino_bits) {
		/*
		 * This is a roundup of number of bits needed for numlowerfs+1
		 * (i.e. ilog2(numlowerfs+1 - 1) + 1). fsid 0 is reserved for
		 * upper fs even with non upper overlay.
		 */
		BUILD_BUG_ON(ilog2(OVL_MAX_STACK) > 31);
		ofs->xino_bits = ilog2(ofs->numlowerfs) + 1;
	}

	if (ofs->xino_bits) {
		pr_info("overlayfs: \"xino\" feature enabled using %d upper inode bits.\n",
			ofs->xino_bits);
	}
1352

1353 1354 1355 1356 1357
	err = 0;
out:
	return err;
}

1358
static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb,
M
Miklos Szeredi 已提交
1359
					    struct ovl_fs *ofs)
1360 1361 1362
{
	int err;
	char *lowertmp, *lower;
1363 1364
	struct path *stack = NULL;
	unsigned int stacklen, numlower = 0, i;
1365
	bool remote = false;
1366
	struct ovl_entry *oe;
1367 1368

	err = -ENOMEM;
M
Miklos Szeredi 已提交
1369
	lowertmp = kstrdup(ofs->config.lowerdir, GFP_KERNEL);
1370
	if (!lowertmp)
1371
		goto out_err;
1372 1373 1374 1375 1376 1377

	err = -EINVAL;
	stacklen = ovl_split_lowerdirs(lowertmp);
	if (stacklen > OVL_MAX_STACK) {
		pr_err("overlayfs: too many lower directories, limit is %d\n",
		       OVL_MAX_STACK);
1378
		goto out_err;
M
Miklos Szeredi 已提交
1379
	} else if (!ofs->config.upperdir && stacklen == 1) {
1380
		pr_err("overlayfs: at least 2 lowerdir are needed while upperdir nonexistent\n");
1381
		goto out_err;
1382 1383 1384 1385
	} else if (!ofs->config.upperdir && ofs->config.nfs_export &&
		   ofs->config.redirect_follow) {
		pr_warn("overlayfs: NFS export requires \"redirect_dir=nofollow\" on non-upper mount, falling back to nfs_export=off.\n");
		ofs->config.nfs_export = false;
1386 1387 1388 1389 1390
	}

	err = -ENOMEM;
	stack = kcalloc(stacklen, sizeof(struct path), GFP_KERNEL);
	if (!stack)
1391
		goto out_err;
1392 1393 1394 1395

	err = -EINVAL;
	lower = lowertmp;
	for (numlower = 0; numlower < stacklen; numlower++) {
M
Miklos Szeredi 已提交
1396
		err = ovl_lower_dir(lower, &stack[numlower], ofs,
1397 1398
				    &sb->s_stack_depth, &remote);
		if (err)
1399
			goto out_err;
1400 1401 1402 1403 1404 1405 1406 1407

		lower = strchr(lower, '\0') + 1;
	}

	err = -EINVAL;
	sb->s_stack_depth++;
	if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
		pr_err("overlayfs: maximum fs stacking depth exceeded\n");
1408
		goto out_err;
1409 1410
	}

A
Amir Goldstein 已提交
1411
	err = ovl_get_lower_layers(sb, ofs, stack, numlower);
1412 1413 1414 1415 1416 1417 1418 1419 1420 1421
	if (err)
		goto out_err;

	err = -ENOMEM;
	oe = ovl_alloc_entry(numlower);
	if (!oe)
		goto out_err;

	for (i = 0; i < numlower; i++) {
		oe->lowerstack[i].dentry = dget(stack[i].dentry);
M
Miklos Szeredi 已提交
1422
		oe->lowerstack[i].layer = &ofs->lower_layers[i];
1423
	}
1424 1425 1426 1427 1428 1429 1430 1431 1432 1433

	if (remote)
		sb->s_d_op = &ovl_reval_dentry_operations;
	else
		sb->s_d_op = &ovl_dentry_operations;

out:
	for (i = 0; i < numlower; i++)
		path_put(&stack[i]);
	kfree(stack);
1434 1435 1436 1437 1438 1439
	kfree(lowertmp);

	return oe;

out_err:
	oe = ERR_PTR(err);
1440 1441 1442
	goto out;
}

A
Amir Goldstein 已提交
1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521
/*
 * Check if this layer root is a descendant of:
 * - another layer of this overlayfs instance
 * - upper/work dir of any overlayfs instance
 * - a disconnected dentry (detached root)
 */
static int ovl_check_layer(struct super_block *sb, struct dentry *dentry,
			   const char *name)
{
	struct dentry *next, *parent;
	bool is_root = false;
	int err = 0;

	if (!dentry || dentry == dentry->d_sb->s_root)
		return 0;

	next = dget(dentry);
	/* Walk back ancestors to fs root (inclusive) looking for traps */
	do {
		parent = dget_parent(next);
		is_root = (parent == next);
		if (ovl_is_inuse(parent)) {
			err = -EBUSY;
			pr_err("overlayfs: %s path overlapping in-use upperdir/workdir\n",
			       name);
		} else if (ovl_lookup_trap_inode(sb, parent)) {
			err = -ELOOP;
			pr_err("overlayfs: overlapping %s path\n", name);
		}
		dput(next);
		next = parent;
	} while (!err && !is_root);

	/* Did we really walk to fs root or found a detached root? */
	if (!err && next != dentry->d_sb->s_root) {
		err = -ESTALE;
		pr_err("overlayfs: disconnected %s path\n", name);
	}

	dput(next);

	return err;
}

/*
 * Check if any of the layers or work dirs overlap.
 */
static int ovl_check_overlapping_layers(struct super_block *sb,
					struct ovl_fs *ofs)
{
	int i, err;

	if (ofs->upper_mnt) {
		err = ovl_check_layer(sb, ofs->upper_mnt->mnt_root, "upperdir");
		if (err)
			return err;

		/*
		 * Checking workbasedir avoids hitting ovl_is_inuse(parent) of
		 * this instance and covers overlapping work and index dirs,
		 * unless work or index dir have been moved since created inside
		 * workbasedir.  In that case, we already have their traps in
		 * inode cache and we will catch that case on lookup.
		 */
		err = ovl_check_layer(sb, ofs->workbasedir, "workdir");
		if (err)
			return err;
	}

	for (i = 0; i < ofs->numlower; i++) {
		err = ovl_check_layer(sb, ofs->lower_layers[i].mnt->mnt_root,
				      "lowerdir");
		if (err)
			return err;
	}

	return 0;
}

M
Miklos Szeredi 已提交
1522 1523
static int ovl_fill_super(struct super_block *sb, void *data, int silent)
{
K
Kees Cook 已提交
1524
	struct path upperpath = { };
M
Miklos Szeredi 已提交
1525
	struct dentry *root_dentry;
1526
	struct ovl_entry *oe;
M
Miklos Szeredi 已提交
1527
	struct ovl_fs *ofs;
1528
	struct cred *cred;
M
Miklos Szeredi 已提交
1529 1530
	int err;

E
Erez Zadok 已提交
1531
	err = -ENOMEM;
M
Miklos Szeredi 已提交
1532 1533
	ofs = kzalloc(sizeof(struct ovl_fs), GFP_KERNEL);
	if (!ofs)
M
Miklos Szeredi 已提交
1534 1535
		goto out;

M
Miklos Szeredi 已提交
1536
	ofs->creator_cred = cred = prepare_creds();
1537 1538 1539
	if (!cred)
		goto out_err;

M
Miklos Szeredi 已提交
1540
	ofs->config.index = ovl_index_def;
1541
	ofs->config.nfs_export = ovl_nfs_export_def;
1542
	ofs->config.xino = ovl_xino_def();
1543
	ofs->config.metacopy = ovl_metacopy_def;
M
Miklos Szeredi 已提交
1544
	err = ovl_parse_opt((char *) data, &ofs->config);
E
Erez Zadok 已提交
1545
	if (err)
1546
		goto out_err;
E
Erez Zadok 已提交
1547

M
Miklos Szeredi 已提交
1548
	err = -EINVAL;
M
Miklos Szeredi 已提交
1549
	if (!ofs->config.lowerdir) {
1550 1551
		if (!silent)
			pr_err("overlayfs: missing 'lowerdir'\n");
1552
		goto out_err;
M
Miklos Szeredi 已提交
1553 1554
	}

M
Miklos Szeredi 已提交
1555
	sb->s_stack_depth = 0;
1556
	sb->s_maxbytes = MAX_LFS_FILESIZE;
1557
	/* Assume underlaying fs uses 32bit inodes unless proven otherwise */
1558 1559 1560
	if (ofs->config.xino != OVL_XINO_OFF)
		ofs->xino_bits = BITS_PER_LONG - 32;

A
Amir Goldstein 已提交
1561 1562 1563
	/* alloc/destroy_inode needed for setting up traps in inode cache */
	sb->s_op = &ovl_super_operations;

M
Miklos Szeredi 已提交
1564 1565
	if (ofs->config.upperdir) {
		if (!ofs->config.workdir) {
M
Miklos Szeredi 已提交
1566
			pr_err("overlayfs: missing 'workdir'\n");
1567
			goto out_err;
M
Miklos Szeredi 已提交
1568
		}
M
Miklos Szeredi 已提交
1569

A
Amir Goldstein 已提交
1570
		err = ovl_get_upper(sb, ofs, &upperpath);
M
Miklos Szeredi 已提交
1571
		if (err)
1572
			goto out_err;
1573

A
Amir Goldstein 已提交
1574
		err = ovl_get_workdir(sb, ofs, &upperpath);
1575
		if (err)
1576
			goto out_err;
1577

M
Miklos Szeredi 已提交
1578
		if (!ofs->workdir)
1579
			sb->s_flags |= SB_RDONLY;
1580

M
Miklos Szeredi 已提交
1581 1582
		sb->s_stack_depth = ofs->upper_mnt->mnt_sb->s_stack_depth;
		sb->s_time_gran = ofs->upper_mnt->mnt_sb->s_time_gran;
1583

M
Miklos Szeredi 已提交
1584
	}
M
Miklos Szeredi 已提交
1585
	oe = ovl_get_lowerstack(sb, ofs);
1586 1587
	err = PTR_ERR(oe);
	if (IS_ERR(oe))
1588
		goto out_err;
M
Miklos Szeredi 已提交
1589

H
hujianyang 已提交
1590
	/* If the upper fs is nonexistent, we mark overlayfs r/o too */
M
Miklos Szeredi 已提交
1591
	if (!ofs->upper_mnt)
1592
		sb->s_flags |= SB_RDONLY;
M
Miklos Szeredi 已提交
1593

M
Miklos Szeredi 已提交
1594
	if (!(ovl_force_readonly(ofs)) && ofs->config.index) {
A
Amir Goldstein 已提交
1595
		err = ovl_get_indexdir(sb, ofs, oe, &upperpath);
1596
		if (err)
1597
			goto out_free_oe;
1598

1599 1600 1601 1602
		/* Force r/o mount with no index dir */
		if (!ofs->indexdir) {
			dput(ofs->workdir);
			ofs->workdir = NULL;
1603
			sb->s_flags |= SB_RDONLY;
1604 1605
		}

1606 1607
	}

A
Amir Goldstein 已提交
1608 1609 1610 1611
	err = ovl_check_overlapping_layers(sb, ofs);
	if (err)
		goto out_free_oe;

1612
	/* Show index=off in /proc/mounts for forced r/o mount */
1613
	if (!ofs->indexdir) {
M
Miklos Szeredi 已提交
1614
		ofs->config.index = false;
1615 1616 1617 1618 1619
		if (ofs->upper_mnt && ofs->config.nfs_export) {
			pr_warn("overlayfs: NFS export requires an index dir, falling back to nfs_export=off.\n");
			ofs->config.nfs_export = false;
		}
	}
1620

1621 1622 1623 1624 1625
	if (ofs->config.metacopy && ofs->config.nfs_export) {
		pr_warn("overlayfs: NFS export is not supported with metadata only copy up, falling back to nfs_export=off.\n");
		ofs->config.nfs_export = false;
	}

1626 1627 1628
	if (ofs->config.nfs_export)
		sb->s_export_op = &ovl_export_operations;

1629 1630 1631
	/* Never override disk quota limits or use reserved space */
	cap_lower(cred->cap_effective, CAP_SYS_RESOURCE);

1632 1633
	sb->s_magic = OVERLAYFS_SUPER_MAGIC;
	sb->s_xattr = ovl_xattr_handlers;
M
Miklos Szeredi 已提交
1634
	sb->s_fs_info = ofs;
1635
	sb->s_flags |= SB_POSIXACL;
1636

1637
	err = -ENOMEM;
1638
	root_dentry = d_make_root(ovl_new_inode(sb, S_IFDIR, 0));
M
Miklos Szeredi 已提交
1639
	if (!root_dentry)
1640
		goto out_free_oe;
M
Miklos Szeredi 已提交
1641

1642 1643
	root_dentry->d_fsdata = oe;

M
Miklos Szeredi 已提交
1644
	mntput(upperpath.mnt);
1645
	if (upperpath.dentry) {
1646
		ovl_dentry_set_upper_alias(root_dentry);
M
Miklos Szeredi 已提交
1647 1648
		if (ovl_is_impuredir(upperpath.dentry))
			ovl_set_flag(OVL_IMPURE, d_inode(root_dentry));
1649
	}
M
Miklos Szeredi 已提交
1650

1651 1652
	/* Root is always merge -> can have whiteouts */
	ovl_set_flag(OVL_WHITEOUTS, d_inode(root_dentry));
1653
	ovl_dentry_set_flag(OVL_E_CONNECTED, root_dentry);
1654
	ovl_set_upperdata(d_inode(root_dentry));
1655
	ovl_inode_init(d_inode(root_dentry), upperpath.dentry,
1656
		       ovl_dentry_lower(root_dentry), NULL);
M
Miklos Szeredi 已提交
1657

M
Miklos Szeredi 已提交
1658 1659 1660 1661
	sb->s_root = root_dentry;

	return 0;

1662 1663
out_free_oe:
	ovl_entry_stack_free(oe);
1664
	kfree(oe);
1665
out_err:
M
Miklos Szeredi 已提交
1666
	path_put(&upperpath);
M
Miklos Szeredi 已提交
1667
	ovl_free_fs(ofs);
M
Miklos Szeredi 已提交
1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679
out:
	return err;
}

static struct dentry *ovl_mount(struct file_system_type *fs_type, int flags,
				const char *dev_name, void *raw_data)
{
	return mount_nodev(fs_type, flags, raw_data, ovl_fill_super);
}

static struct file_system_type ovl_fs_type = {
	.owner		= THIS_MODULE,
1680
	.name		= "overlay",
M
Miklos Szeredi 已提交
1681 1682 1683
	.mount		= ovl_mount,
	.kill_sb	= kill_anon_super,
};
1684
MODULE_ALIAS_FS("overlay");
M
Miklos Szeredi 已提交
1685

1686 1687 1688 1689 1690 1691 1692
static void ovl_inode_init_once(void *foo)
{
	struct ovl_inode *oi = foo;

	inode_init_once(&oi->vfs_inode);
}

M
Miklos Szeredi 已提交
1693 1694
static int __init ovl_init(void)
{
1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709
	int err;

	ovl_inode_cachep = kmem_cache_create("ovl_inode",
					     sizeof(struct ovl_inode), 0,
					     (SLAB_RECLAIM_ACCOUNT|
					      SLAB_MEM_SPREAD|SLAB_ACCOUNT),
					     ovl_inode_init_once);
	if (ovl_inode_cachep == NULL)
		return -ENOMEM;

	err = register_filesystem(&ovl_fs_type);
	if (err)
		kmem_cache_destroy(ovl_inode_cachep);

	return err;
M
Miklos Szeredi 已提交
1710 1711 1712 1713 1714
}

static void __exit ovl_exit(void)
{
	unregister_filesystem(&ovl_fs_type);
1715 1716 1717 1718 1719 1720 1721 1722

	/*
	 * Make sure all delayed rcu free inodes are flushed before we
	 * destroy cache.
	 */
	rcu_barrier();
	kmem_cache_destroy(ovl_inode_cachep);

M
Miklos Szeredi 已提交
1723 1724 1725 1726
}

module_init(ovl_init);
module_exit(ovl_exit);