inode.c 11.8 KB
Newer Older
M
Miklos Szeredi 已提交
1 2 3 4 5 6 7 8 9 10 11
/*
 *
 * Copyright (C) 2011 Novell Inc.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 as published by
 * the Free Software Foundation.
 */

#include <linux/fs.h>
#include <linux/slab.h>
12
#include <linux/cred.h>
M
Miklos Szeredi 已提交
13
#include <linux/xattr.h>
14
#include <linux/posix_acl.h>
M
Miklos Szeredi 已提交
15 16 17 18 19 20
#include "overlayfs.h"

int ovl_setattr(struct dentry *dentry, struct iattr *attr)
{
	int err;
	struct dentry *upperdentry;
21
	const struct cred *old_cred;
M
Miklos Szeredi 已提交
22

23 24 25 26 27 28 29 30 31
	/*
	 * Check for permissions before trying to copy-up.  This is redundant
	 * since it will be rechecked later by ->setattr() on upper dentry.  But
	 * without this, copy-up can be triggered by just about anybody.
	 *
	 * We don't initialize inode->size, which just means that
	 * inode_newsize_ok() will always check against MAX_LFS_FILESIZE and not
	 * check for a swapfile (which this won't be anyway).
	 */
32
	err = setattr_prepare(dentry, attr);
33 34 35
	if (err)
		return err;

M
Miklos Szeredi 已提交
36 37 38 39
	err = ovl_want_write(dentry);
	if (err)
		goto out;

40 41 42 43
	err = ovl_copy_up(dentry);
	if (!err) {
		upperdentry = ovl_dentry_upper(dentry);

M
Miklos Szeredi 已提交
44 45 46
		if (attr->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID))
			attr->ia_valid &= ~ATTR_MODE;

A
Al Viro 已提交
47
		inode_lock(upperdentry->d_inode);
48
		old_cred = ovl_override_creds(dentry->d_sb);
M
Miklos Szeredi 已提交
49
		err = notify_change(upperdentry, attr, NULL);
50
		revert_creds(old_cred);
51 52
		if (!err)
			ovl_copyattr(upperdentry->d_inode, dentry->d_inode);
A
Al Viro 已提交
53
		inode_unlock(upperdentry->d_inode);
M
Miklos Szeredi 已提交
54 55 56 57 58 59
	}
	ovl_drop_write(dentry);
out:
	return err;
}

60 61
int ovl_getattr(const struct path *path, struct kstat *stat,
		u32 request_mask, unsigned int flags)
M
Miklos Szeredi 已提交
62
{
63
	struct dentry *dentry = path->dentry;
64
	enum ovl_path_type type;
M
Miklos Szeredi 已提交
65
	struct path realpath;
66
	const struct cred *old_cred;
67
	bool is_dir = S_ISDIR(dentry->d_inode->i_mode);
68
	int err;
M
Miklos Szeredi 已提交
69

70
	type = ovl_path_real(dentry, &realpath);
71
	old_cred = ovl_override_creds(dentry->d_sb);
72
	err = vfs_getattr(&realpath, stat, request_mask, flags);
73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88
	if (err)
		goto out;

	/*
	 * When all layers are on the same fs, all real inode number are
	 * unique, so we use the overlay st_dev, which is friendly to du -x.
	 *
	 * We also use st_ino of the copy up origin, if we know it.
	 * This guaranties constant st_dev/st_ino across copy up.
	 *
	 * If filesystem supports NFS export ops, this also guaranties
	 * persistent st_ino across mount cycle.
	 */
	if (ovl_same_sb(dentry->d_sb)) {
		if (OVL_TYPE_ORIGIN(type)) {
			struct kstat lowerstat;
89
			u32 lowermask = STATX_INO | (!is_dir ? STATX_NLINK : 0);
90 91 92

			ovl_path_lower(dentry, &realpath);
			err = vfs_getattr(&realpath, &lowerstat,
93
					  lowermask, flags);
94 95 96 97 98 99 100 101 102
			if (err)
				goto out;

			WARN_ON_ONCE(stat->dev != lowerstat.dev);
			/*
			 * Lower hardlinks are broken on copy up to different
			 * upper files, so we cannot use the lower origin st_ino
			 * for those different files, even for the same fs case.
			 */
103
			if (is_dir || lowerstat.nlink == 1)
104 105 106
				stat->ino = lowerstat.ino;
		}
		stat->dev = dentry->d_sb->s_dev;
107 108 109 110 111 112 113 114 115 116 117 118
	} else if (is_dir) {
		/*
		 * If not all layers are on the same fs the pair {real st_ino;
		 * overlay st_dev} is not unique, so use the non persistent
		 * overlay st_ino.
		 *
		 * Always use the overlay st_dev for directories, so 'find
		 * -xdev' will scan the entire overlay mount and won't cross the
		 * overlay mount boundaries.
		 */
		stat->dev = dentry->d_sb->s_dev;
		stat->ino = dentry->d_inode->i_ino;
119
	}
120 121 122 123 124 125 126 127 128

	/*
	 * It's probably not worth it to count subdirs to get the
	 * correct link count.  nlink=1 seems to pacify 'find' and
	 * other utilities.
	 */
	if (is_dir && OVL_TYPE_MERGE(type))
		stat->nlink = 1;

129
out:
130
	revert_creds(old_cred);
131

132
	return err;
M
Miklos Szeredi 已提交
133 134 135 136 137
}

int ovl_permission(struct inode *inode, int mask)
{
	bool is_upper;
138
	struct inode *realinode = ovl_inode_real(inode, &is_upper);
139
	const struct cred *old_cred;
M
Miklos Szeredi 已提交
140 141 142 143 144
	int err;

	/* Careful in RCU walk mode */
	if (!realinode) {
		WARN_ON(!(mask & MAY_NOT_BLOCK));
145
		return -ECHILD;
M
Miklos Szeredi 已提交
146 147
	}

148 149 150 151 152 153 154 155 156
	/*
	 * Check overlay inode with the creds of task and underlying inode
	 * with creds of mounter
	 */
	err = generic_permission(inode, mask);
	if (err)
		return err;

	old_cred = ovl_override_creds(inode->i_sb);
157
	if (!is_upper && !special_file(realinode->i_mode) && mask & MAY_WRITE) {
158
		mask &= ~(MAY_WRITE | MAY_APPEND);
159 160 161
		/* Make sure mounter can read file for copy up later */
		mask |= MAY_READ;
	}
162
	err = inode_permission(realinode, mask);
163 164 165
	revert_creds(old_cred);

	return err;
M
Miklos Szeredi 已提交
166 167
}

168
static const char *ovl_get_link(struct dentry *dentry,
169 170
				struct inode *inode,
				struct delayed_call *done)
M
Miklos Szeredi 已提交
171
{
172 173
	const struct cred *old_cred;
	const char *p;
M
Miklos Szeredi 已提交
174

175 176 177
	if (!dentry)
		return ERR_PTR(-ECHILD);

178
	old_cred = ovl_override_creds(dentry->d_sb);
M
Miklos Szeredi 已提交
179
	p = vfs_get_link(ovl_dentry_real(dentry), done);
180 181
	revert_creds(old_cred);
	return p;
M
Miklos Szeredi 已提交
182 183
}

M
Miklos Szeredi 已提交
184
bool ovl_is_private_xattr(const char *name)
M
Miklos Szeredi 已提交
185
{
A
Andreas Gruenbacher 已提交
186 187
	return strncmp(name, OVL_XATTR_PREFIX,
		       sizeof(OVL_XATTR_PREFIX) - 1) == 0;
M
Miklos Szeredi 已提交
188 189
}

190 191
int ovl_xattr_set(struct dentry *dentry, const char *name, const void *value,
		  size_t size, int flags)
M
Miklos Szeredi 已提交
192 193
{
	int err;
194 195
	struct path realpath;
	enum ovl_path_type type = ovl_path_real(dentry, &realpath);
196
	const struct cred *old_cred;
M
Miklos Szeredi 已提交
197 198 199 200 201

	err = ovl_want_write(dentry);
	if (err)
		goto out;

202 203 204 205 206 207
	if (!value && !OVL_TYPE_UPPER(type)) {
		err = vfs_getxattr(realpath.dentry, name, NULL, 0);
		if (err < 0)
			goto out_drop_write;
	}

M
Miklos Szeredi 已提交
208 209 210 211
	err = ovl_copy_up(dentry);
	if (err)
		goto out_drop_write;

212 213 214
	if (!OVL_TYPE_UPPER(type))
		ovl_path_upper(dentry, &realpath);

215
	old_cred = ovl_override_creds(dentry->d_sb);
216 217 218 219 220 221
	if (value)
		err = vfs_setxattr(realpath.dentry, name, value, size, flags);
	else {
		WARN_ON(flags != XATTR_REPLACE);
		err = vfs_removexattr(realpath.dentry, name);
	}
222
	revert_creds(old_cred);
M
Miklos Szeredi 已提交
223 224 225 226 227 228 229

out_drop_write:
	ovl_drop_write(dentry);
out:
	return err;
}

230 231
int ovl_xattr_get(struct dentry *dentry, const char *name,
		  void *value, size_t size)
M
Miklos Szeredi 已提交
232
{
M
Miklos Szeredi 已提交
233
	struct dentry *realdentry = ovl_dentry_real(dentry);
234 235
	ssize_t res;
	const struct cred *old_cred;
236

237 238 239 240
	old_cred = ovl_override_creds(dentry->d_sb);
	res = vfs_getxattr(realdentry, name, value, size);
	revert_creds(old_cred);
	return res;
M
Miklos Szeredi 已提交
241 242
}

243 244 245 246 247 248 249 250 251 252
static bool ovl_can_list(const char *s)
{
	/* List all non-trusted xatts */
	if (strncmp(s, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) != 0)
		return true;

	/* Never list trusted.overlay, list other trusted for superuser only */
	return !ovl_is_private_xattr(s) && capable(CAP_SYS_ADMIN);
}

M
Miklos Szeredi 已提交
253 254
ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
{
M
Miklos Szeredi 已提交
255
	struct dentry *realdentry = ovl_dentry_real(dentry);
M
Miklos Szeredi 已提交
256
	ssize_t res;
M
Miklos Szeredi 已提交
257 258
	size_t len;
	char *s;
259
	const struct cred *old_cred;
M
Miklos Szeredi 已提交
260

261
	old_cred = ovl_override_creds(dentry->d_sb);
M
Miklos Szeredi 已提交
262
	res = vfs_listxattr(realdentry, list, size);
263
	revert_creds(old_cred);
M
Miklos Szeredi 已提交
264 265 266 267
	if (res <= 0 || size == 0)
		return res;

	/* filter out private xattrs */
M
Miklos Szeredi 已提交
268 269
	for (s = list, len = res; len;) {
		size_t slen = strnlen(s, len) + 1;
M
Miklos Szeredi 已提交
270

M
Miklos Szeredi 已提交
271 272 273
		/* underlying fs providing us with an broken xattr list? */
		if (WARN_ON(slen > len))
			return -EIO;
M
Miklos Szeredi 已提交
274

M
Miklos Szeredi 已提交
275
		len -= slen;
276
		if (!ovl_can_list(s)) {
M
Miklos Szeredi 已提交
277
			res -= slen;
M
Miklos Szeredi 已提交
278
			memmove(s, s + slen, len);
M
Miklos Szeredi 已提交
279
		} else {
M
Miklos Szeredi 已提交
280
			s += slen;
M
Miklos Szeredi 已提交
281 282 283 284 285 286
		}
	}

	return res;
}

287 288
struct posix_acl *ovl_get_acl(struct inode *inode, int type)
{
289
	struct inode *realinode = ovl_inode_real(inode, NULL);
290 291
	const struct cred *old_cred;
	struct posix_acl *acl;
292

293
	if (!IS_ENABLED(CONFIG_FS_POSIX_ACL) || !IS_POSIXACL(realinode))
294 295
		return NULL;

296
	old_cred = ovl_override_creds(inode->i_sb);
297
	acl = get_acl(realinode, type);
298 299 300
	revert_creds(old_cred);

	return acl;
301 302
}

M
Miklos Szeredi 已提交
303 304 305
static bool ovl_open_need_copy_up(int flags, enum ovl_path_type type,
				  struct dentry *realdentry)
{
M
Miklos Szeredi 已提交
306
	if (OVL_TYPE_UPPER(type))
M
Miklos Szeredi 已提交
307 308 309 310 311 312 313 314 315 316 317
		return false;

	if (special_file(realdentry->d_inode->i_mode))
		return false;

	if (!(OPEN_FMODE(flags) & FMODE_WRITE) && !(flags & O_TRUNC))
		return false;

	return true;
}

318
int ovl_open_maybe_copy_up(struct dentry *dentry, unsigned int file_flags)
M
Miklos Szeredi 已提交
319
{
320
	int err = 0;
M
Miklos Szeredi 已提交
321 322 323 324
	struct path realpath;
	enum ovl_path_type type;

	type = ovl_path_real(dentry, &realpath);
325
	if (ovl_open_need_copy_up(file_flags, type, realpath.dentry)) {
M
Miklos Szeredi 已提交
326
		err = ovl_want_write(dentry);
327
		if (!err) {
328
			err = ovl_copy_up_flags(dentry, file_flags);
329 330
			ovl_drop_write(dentry);
		}
M
Miklos Szeredi 已提交
331 332
	}

333
	return err;
M
Miklos Szeredi 已提交
334 335
}

M
Miklos Szeredi 已提交
336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358
int ovl_update_time(struct inode *inode, struct timespec *ts, int flags)
{
	struct dentry *alias;
	struct path upperpath;

	if (!(flags & S_ATIME))
		return 0;

	alias = d_find_any_alias(inode);
	if (!alias)
		return 0;

	ovl_path_upper(alias, &upperpath);
	if (upperpath.dentry) {
		touch_atime(&upperpath);
		inode->i_atime = d_inode(upperpath.dentry)->i_atime;
	}

	dput(alias);

	return 0;
}

M
Miklos Szeredi 已提交
359 360 361 362 363
static const struct inode_operations ovl_file_inode_operations = {
	.setattr	= ovl_setattr,
	.permission	= ovl_permission,
	.getattr	= ovl_getattr,
	.listxattr	= ovl_listxattr,
364
	.get_acl	= ovl_get_acl,
M
Miklos Szeredi 已提交
365
	.update_time	= ovl_update_time,
M
Miklos Szeredi 已提交
366 367 368 369
};

static const struct inode_operations ovl_symlink_inode_operations = {
	.setattr	= ovl_setattr,
370
	.get_link	= ovl_get_link,
M
Miklos Szeredi 已提交
371 372
	.getattr	= ovl_getattr,
	.listxattr	= ovl_listxattr,
M
Miklos Szeredi 已提交
373
	.update_time	= ovl_update_time,
M
Miklos Szeredi 已提交
374 375
};

376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410
/*
 * It is possible to stack overlayfs instance on top of another
 * overlayfs instance as lower layer. We need to annonate the
 * stackable i_mutex locks according to stack level of the super
 * block instance. An overlayfs instance can never be in stack
 * depth 0 (there is always a real fs below it).  An overlayfs
 * inode lock will use the lockdep annotaion ovl_i_mutex_key[depth].
 *
 * For example, here is a snip from /proc/lockdep_chains after
 * dir_iterate of nested overlayfs:
 *
 * [...] &ovl_i_mutex_dir_key[depth]   (stack_depth=2)
 * [...] &ovl_i_mutex_dir_key[depth]#2 (stack_depth=1)
 * [...] &type->i_mutex_dir_key        (stack_depth=0)
 */
#define OVL_MAX_NESTING FILESYSTEM_MAX_STACK_DEPTH

static inline void ovl_lockdep_annotate_inode_mutex_key(struct inode *inode)
{
#ifdef CONFIG_LOCKDEP
	static struct lock_class_key ovl_i_mutex_key[OVL_MAX_NESTING];
	static struct lock_class_key ovl_i_mutex_dir_key[OVL_MAX_NESTING];

	int depth = inode->i_sb->s_stack_depth - 1;

	if (WARN_ON_ONCE(depth < 0 || depth >= OVL_MAX_NESTING))
		depth = 0;

	if (S_ISDIR(inode->i_mode))
		lockdep_set_class(&inode->i_rwsem, &ovl_i_mutex_dir_key[depth]);
	else
		lockdep_set_class(&inode->i_rwsem, &ovl_i_mutex_key[depth]);
#endif
}

411
static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev)
M
Miklos Szeredi 已提交
412 413 414
{
	inode->i_ino = get_next_ino();
	inode->i_mode = mode;
M
Miklos Szeredi 已提交
415
	inode->i_flags |= S_NOCMTIME;
416 417 418
#ifdef CONFIG_FS_POSIX_ACL
	inode->i_acl = inode->i_default_acl = ACL_DONT_CACHE;
#endif
M
Miklos Szeredi 已提交
419

420 421
	ovl_lockdep_annotate_inode_mutex_key(inode);

422 423 424 425 426
	switch (mode & S_IFMT) {
	case S_IFREG:
		inode->i_op = &ovl_file_inode_operations;
		break;

M
Miklos Szeredi 已提交
427 428 429 430 431 432 433 434 435
	case S_IFDIR:
		inode->i_op = &ovl_dir_inode_operations;
		inode->i_fop = &ovl_dir_operations;
		break;

	case S_IFLNK:
		inode->i_op = &ovl_symlink_inode_operations;
		break;

M
Miklos Szeredi 已提交
436
	default:
M
Miklos Szeredi 已提交
437
		inode->i_op = &ovl_file_inode_operations;
438
		init_special_inode(inode, mode, rdev);
M
Miklos Szeredi 已提交
439
		break;
M
Miklos Szeredi 已提交
440 441
	}
}
M
Miklos Szeredi 已提交
442

443
struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev)
M
Miklos Szeredi 已提交
444 445 446 447 448
{
	struct inode *inode;

	inode = new_inode(sb);
	if (inode)
449
		ovl_fill_inode(inode, mode, rdev);
M
Miklos Szeredi 已提交
450 451 452 453 454 455 456 457 458 459 460 461 462 463 464

	return inode;
}

static int ovl_inode_test(struct inode *inode, void *data)
{
	return ovl_inode_real(inode, NULL) == data;
}

static int ovl_inode_set(struct inode *inode, void *data)
{
	inode->i_private = (void *) (((unsigned long) data) | OVL_ISUPPER_MASK);
	return 0;
}

M
Miklos Szeredi 已提交
465
struct inode *ovl_get_inode(struct dentry *dentry)
M
Miklos Szeredi 已提交
466
{
M
Miklos Szeredi 已提交
467 468
	struct dentry *upperdentry = ovl_dentry_upper(dentry);
	struct inode *realinode = d_inode(ovl_dentry_real(dentry));
M
Miklos Szeredi 已提交
469 470
	struct inode *inode;

M
Miklos Szeredi 已提交
471 472 473 474 475 476
	if (upperdentry && !d_is_dir(upperdentry)) {
		inode = iget5_locked(dentry->d_sb, (unsigned long) realinode,
				     ovl_inode_test, ovl_inode_set, realinode);
		if (!inode || !(inode->i_state & I_NEW))
			goto out;

M
Miklos Szeredi 已提交
477
		set_nlink(inode, realinode->i_nlink);
M
Miklos Szeredi 已提交
478 479 480 481
	} else {
		inode = new_inode(dentry->d_sb);
		if (!inode)
			goto out;
M
Miklos Szeredi 已提交
482
	}
M
Miklos Szeredi 已提交
483 484 485 486 487
	ovl_fill_inode(inode, realinode->i_mode, realinode->i_rdev);
	ovl_inode_init(inode, dentry);
	if (inode->i_state & I_NEW)
		unlock_new_inode(inode);
out:
M
Miklos Szeredi 已提交
488 489
	return inode;
}