copy_up.c 22.7 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0-only
M
Miklos Szeredi 已提交
2 3 4 5 6
/*
 *
 * Copyright (C) 2011 Novell Inc.
 */

7
#include <linux/module.h>
M
Miklos Szeredi 已提交
8 9 10 11 12 13 14
#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/file.h>
#include <linux/splice.h>
#include <linux/xattr.h>
#include <linux/security.h>
#include <linux/uaccess.h>
15
#include <linux/sched/signal.h>
16
#include <linux/cred.h>
M
Miklos Szeredi 已提交
17
#include <linux/namei.h>
18 19
#include <linux/fdtable.h>
#include <linux/ratelimit.h>
20
#include <linux/exportfs.h>
M
Miklos Szeredi 已提交
21 22 23 24
#include "overlayfs.h"

#define OVL_COPY_UP_CHUNK_SIZE (1 << 20)

25
static int ovl_ccup_set(const char *buf, const struct kernel_param *param)
26
{
L
lijiazi 已提交
27
	pr_warn("\"check_copy_up\" module option is obsolete\n");
28 29 30
	return 0;
}

31
static int ovl_ccup_get(char *buf, const struct kernel_param *param)
32
{
33
	return sprintf(buf, "N\n");
34 35
}

36
module_param_call(check_copy_up, ovl_ccup_set, ovl_ccup_get, NULL, 0644);
37
MODULE_PARM_DESC(check_copy_up, "Obsolete; does nothing");
38

39 40 41 42 43 44 45
static bool ovl_must_copy_xattr(const char *name)
{
	return !strcmp(name, XATTR_POSIX_ACL_ACCESS) ||
	       !strcmp(name, XATTR_POSIX_ACL_DEFAULT) ||
	       !strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN);
}

46 47
int ovl_copy_xattr(struct super_block *sb, struct dentry *old,
		   struct dentry *new)
M
Miklos Szeredi 已提交
48
{
49 50
	ssize_t list_size, size, value_size = 0;
	char *buf, *name, *value = NULL;
51
	int error = 0;
52
	size_t slen;
M
Miklos Szeredi 已提交
53

54 55
	if (!(old->d_inode->i_opflags & IOP_XATTR) ||
	    !(new->d_inode->i_opflags & IOP_XATTR))
M
Miklos Szeredi 已提交
56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
		return 0;

	list_size = vfs_listxattr(old, NULL, 0);
	if (list_size <= 0) {
		if (list_size == -EOPNOTSUPP)
			return 0;
		return list_size;
	}

	buf = kzalloc(list_size, GFP_KERNEL);
	if (!buf)
		return -ENOMEM;

	list_size = vfs_listxattr(old, buf, list_size);
	if (list_size <= 0) {
		error = list_size;
72
		goto out;
M
Miklos Szeredi 已提交
73 74
	}

75 76 77 78 79 80 81 82 83 84
	for (name = buf; list_size; name += slen) {
		slen = strnlen(name, list_size) + 1;

		/* underlying fs providing us with an broken xattr list? */
		if (WARN_ON(slen > list_size)) {
			error = -EIO;
			break;
		}
		list_size -= slen;

85
		if (ovl_is_private_xattr(sb, name))
M
Miklos Szeredi 已提交
86
			continue;
87 88 89 90 91 92 93 94

		error = security_inode_copy_up_xattr(name);
		if (error < 0 && error != -EOPNOTSUPP)
			break;
		if (error == 1) {
			error = 0;
			continue; /* Discard */
		}
95 96 97 98 99
retry:
		size = vfs_getxattr(old, name, value, value_size);
		if (size == -ERANGE)
			size = vfs_getxattr(old, name, NULL, 0);

M
Miklos Szeredi 已提交
100
		if (size < 0) {
M
Miklos Szeredi 已提交
101
			error = size;
102
			break;
M
Miklos Szeredi 已提交
103
		}
104 105 106 107 108 109 110 111 112 113 114 115 116 117

		if (size > value_size) {
			void *new;

			new = krealloc(value, size, GFP_KERNEL);
			if (!new) {
				error = -ENOMEM;
				break;
			}
			value = new;
			value_size = size;
			goto retry;
		}

M
Miklos Szeredi 已提交
118
		error = vfs_setxattr(new, name, value, size, 0);
119 120 121 122 123 124 125
		if (error) {
			if (error != -EOPNOTSUPP || ovl_must_copy_xattr(name))
				break;

			/* Ignore failure to copy unknown xattrs */
			error = 0;
		}
M
Miklos Szeredi 已提交
126 127 128 129 130 131 132
	}
	kfree(value);
out:
	kfree(buf);
	return error;
}

133 134
static int ovl_copy_up_data(struct ovl_fs *ofs, struct path *old,
			    struct path *new, loff_t len)
M
Miklos Szeredi 已提交
135 136 137 138 139
{
	struct file *old_file;
	struct file *new_file;
	loff_t old_pos = 0;
	loff_t new_pos = 0;
140
	loff_t cloned;
141 142 143
	loff_t data_pos = -1;
	loff_t hole_len;
	bool skip_hole = false;
M
Miklos Szeredi 已提交
144 145 146 147 148
	int error = 0;

	if (len == 0)
		return 0;

149
	old_file = ovl_path_open(old, O_LARGEFILE | O_RDONLY);
M
Miklos Szeredi 已提交
150 151 152
	if (IS_ERR(old_file))
		return PTR_ERR(old_file);

153
	new_file = ovl_path_open(new, O_LARGEFILE | O_WRONLY);
M
Miklos Szeredi 已提交
154 155 156 157 158
	if (IS_ERR(new_file)) {
		error = PTR_ERR(new_file);
		goto out_fput;
	}

159
	/* Try to use clone_file_range to clone up within the same fs */
160
	cloned = do_clone_file_range(old_file, 0, new_file, 0, len, 0);
161
	if (cloned == len)
162 163 164
		goto out;
	/* Couldn't clone, so now we try to copy the data */

165 166 167 168 169
	/* Check if lower fs supports seek operation */
	if (old_file->f_mode & FMODE_LSEEK &&
	    old_file->f_op->llseek)
		skip_hole = true;

M
Miklos Szeredi 已提交
170 171 172 173 174 175 176 177 178 179 180 181
	while (len) {
		size_t this_len = OVL_COPY_UP_CHUNK_SIZE;
		long bytes;

		if (len < this_len)
			this_len = len;

		if (signal_pending_state(TASK_KILLABLE, current)) {
			error = -EINTR;
			break;
		}

182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211
		/*
		 * Fill zero for hole will cost unnecessary disk space
		 * and meanwhile slow down the copy-up speed, so we do
		 * an optimization for hole during copy-up, it relies
		 * on SEEK_DATA implementation in lower fs so if lower
		 * fs does not support it, copy-up will behave as before.
		 *
		 * Detail logic of hole detection as below:
		 * When we detect next data position is larger than current
		 * position we will skip that hole, otherwise we copy
		 * data in the size of OVL_COPY_UP_CHUNK_SIZE. Actually,
		 * it may not recognize all kind of holes and sometimes
		 * only skips partial of hole area. However, it will be
		 * enough for most of the use cases.
		 */

		if (skip_hole && data_pos < old_pos) {
			data_pos = vfs_llseek(old_file, old_pos, SEEK_DATA);
			if (data_pos > old_pos) {
				hole_len = data_pos - old_pos;
				len -= hole_len;
				old_pos = new_pos = data_pos;
				continue;
			} else if (data_pos == -ENXIO) {
				break;
			} else if (data_pos < 0) {
				skip_hole = false;
			}
		}

M
Miklos Szeredi 已提交
212 213 214 215 216 217 218 219 220 221 222
		bytes = do_splice_direct(old_file, &old_pos,
					 new_file, &new_pos,
					 this_len, SPLICE_F_MOVE);
		if (bytes <= 0) {
			error = bytes;
			break;
		}
		WARN_ON(old_pos != new_pos);

		len -= bytes;
	}
223
out:
224
	if (!error && ovl_should_sync(ofs))
M
Miklos Szeredi 已提交
225
		error = vfs_fsync(new_file, 0);
M
Miklos Szeredi 已提交
226 227 228 229 230 231
	fput(new_file);
out_fput:
	fput(old_file);
	return error;
}

232 233 234 235 236 237 238 239 240 241
static int ovl_set_size(struct dentry *upperdentry, struct kstat *stat)
{
	struct iattr attr = {
		.ia_valid = ATTR_SIZE,
		.ia_size = stat->size,
	};

	return notify_change(upperdentry, &attr, NULL);
}

M
Miklos Szeredi 已提交
242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278
static int ovl_set_timestamps(struct dentry *upperdentry, struct kstat *stat)
{
	struct iattr attr = {
		.ia_valid =
		     ATTR_ATIME | ATTR_MTIME | ATTR_ATIME_SET | ATTR_MTIME_SET,
		.ia_atime = stat->atime,
		.ia_mtime = stat->mtime,
	};

	return notify_change(upperdentry, &attr, NULL);
}

int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat)
{
	int err = 0;

	if (!S_ISLNK(stat->mode)) {
		struct iattr attr = {
			.ia_valid = ATTR_MODE,
			.ia_mode = stat->mode,
		};
		err = notify_change(upperdentry, &attr, NULL);
	}
	if (!err) {
		struct iattr attr = {
			.ia_valid = ATTR_UID | ATTR_GID,
			.ia_uid = stat->uid,
			.ia_gid = stat->gid,
		};
		err = notify_change(upperdentry, &attr, NULL);
	}
	if (!err)
		ovl_set_timestamps(upperdentry, stat);

	return err;
}

279 280
struct ovl_fh *ovl_encode_real_fh(struct ovl_fs *ofs, struct dentry *real,
				  bool is_upper)
281 282
{
	struct ovl_fh *fh;
283
	int fh_type, dwords;
284
	int buflen = MAX_HANDLE_SZ;
285
	uuid_t *uuid = &real->d_sb->s_uuid;
286
	int err;
287

288 289 290 291 292 293
	/* Make sure the real fid stays 32bit aligned */
	BUILD_BUG_ON(OVL_FH_FID_OFFSET % 4);
	BUILD_BUG_ON(MAX_HANDLE_SZ + OVL_FH_FID_OFFSET > 255);

	fh = kzalloc(buflen + OVL_FH_FID_OFFSET, GFP_KERNEL);
	if (!fh)
294 295 296 297 298 299 300 301
		return ERR_PTR(-ENOMEM);

	/*
	 * We encode a non-connectable file handle for non-dir, because we
	 * only need to find the lower inode number and we don't want to pay
	 * the price or reconnecting the dentry.
	 */
	dwords = buflen >> 2;
302
	fh_type = exportfs_encode_fh(real, (void *)fh->fb.fid, &dwords, 0);
303 304
	buflen = (dwords << 2);

305
	err = -EIO;
306 307 308
	if (WARN_ON(fh_type < 0) ||
	    WARN_ON(buflen > MAX_HANDLE_SZ) ||
	    WARN_ON(fh_type == FILEID_INVALID))
309
		goto out_err;
310

311 312 313 314
	fh->fb.version = OVL_FH_VERSION;
	fh->fb.magic = OVL_FH_MAGIC;
	fh->fb.type = fh_type;
	fh->fb.flags = OVL_FH_FLAG_CPU_ENDIAN;
315 316 317 318 319 320 321
	/*
	 * When we will want to decode an overlay dentry from this handle
	 * and all layers are on the same fs, if we get a disconncted real
	 * dentry when we decode fid, the only way to tell if we should assign
	 * it to upperdentry or to lowerstack is by checking this flag.
	 */
	if (is_upper)
322
		fh->fb.flags |= OVL_FH_FLAG_PATH_UPPER;
323
	fh->fb.len = sizeof(fh->fb) + buflen;
324 325
	if (ofs->config.uuid)
		fh->fb.uuid = *uuid;
326 327

	return fh;
328 329 330 331

out_err:
	kfree(fh);
	return ERR_PTR(err);
332 333
}

334 335
int ovl_set_origin(struct ovl_fs *ofs, struct dentry *dentry,
		   struct dentry *lower, struct dentry *upper)
336 337 338 339 340 341 342 343 344
{
	const struct ovl_fh *fh = NULL;
	int err;

	/*
	 * When lower layer doesn't support export operations store a 'null' fh,
	 * so we can use the overlay.origin xattr to distignuish between a copy
	 * up and a pure upper inode.
	 */
345
	if (ovl_can_decode_fh(lower->d_sb)) {
346
		fh = ovl_encode_real_fh(ofs, lower, false);
347 348 349 350
		if (IS_ERR(fh))
			return PTR_ERR(fh);
	}

351 352 353
	/*
	 * Do not fail when upper doesn't support xattrs.
	 */
354 355
	err = ovl_check_setxattr(dentry, upper, OVL_XATTR_ORIGIN, fh->buf,
				 fh ? fh->fb.len : 0, 0);
356 357
	kfree(fh);

358 359
	/* Ignore -EPERM from setting "user.*" on symlink/special */
	return err == -EPERM ? 0 : err;
360 361
}

362
/* Store file handle of @upper dir in @index dir entry */
363 364
static int ovl_set_upper_fh(struct ovl_fs *ofs, struct dentry *upper,
			    struct dentry *index)
365 366 367 368
{
	const struct ovl_fh *fh;
	int err;

369
	fh = ovl_encode_real_fh(ofs, upper, true);
370 371 372
	if (IS_ERR(fh))
		return PTR_ERR(fh);

373
	err = ovl_do_setxattr(ofs, index, OVL_XATTR_UPPER, fh->buf, fh->fb.len);
374 375 376 377 378 379 380 381 382 383 384 385 386

	kfree(fh);
	return err;
}

/*
 * Create and install index entry.
 *
 * Caller must hold i_mutex on indexdir.
 */
static int ovl_create_index(struct dentry *dentry, struct dentry *origin,
			    struct dentry *upper)
{
387
	struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409
	struct dentry *indexdir = ovl_indexdir(dentry->d_sb);
	struct inode *dir = d_inode(indexdir);
	struct dentry *index = NULL;
	struct dentry *temp = NULL;
	struct qstr name = { };
	int err;

	/*
	 * For now this is only used for creating index entry for directories,
	 * because non-dir are copied up directly to index and then hardlinked
	 * to upper dir.
	 *
	 * TODO: implement create index for non-dir, so we can call it when
	 * encoding file handle for non-dir in case index does not exist.
	 */
	if (WARN_ON(!d_is_dir(dentry)))
		return -EIO;

	/* Directory not expected to be indexed before copy up */
	if (WARN_ON(ovl_test_flag(OVL_INDEX, d_inode(dentry))))
		return -EIO;

410
	err = ovl_get_index_name(ofs, origin, &name);
411 412 413
	if (err)
		return err;

414
	temp = ovl_create_temp(indexdir, OVL_CATTR(S_IFDIR | 0));
415
	err = PTR_ERR(temp);
416
	if (IS_ERR(temp))
417
		goto free_name;
418

419
	err = ovl_set_upper_fh(ofs, upper, temp);
420
	if (err)
421
		goto out;
422 423 424 425 426 427 428 429 430

	index = lookup_one_len(name.name, indexdir, name.len);
	if (IS_ERR(index)) {
		err = PTR_ERR(index);
	} else {
		err = ovl_do_rename(dir, temp, dir, index, 0);
		dput(index);
	}
out:
431 432
	if (err)
		ovl_cleanup(dir, temp);
433
	dput(temp);
434
free_name:
435 436 437 438
	kfree(name.name);
	return err;
}

439 440 441 442 443 444 445 446 447 448 449
struct ovl_copy_up_ctx {
	struct dentry *parent;
	struct dentry *dentry;
	struct path lowerpath;
	struct kstat stat;
	struct kstat pstat;
	const char *link;
	struct dentry *destdir;
	struct qstr destname;
	struct dentry *workdir;
	bool origin;
450
	bool indexed;
451
	bool metacopy;
452 453 454
};

static int ovl_link_up(struct ovl_copy_up_ctx *c)
455 456 457
{
	int err;
	struct dentry *upper;
458
	struct dentry *upperdir = ovl_dentry_upper(c->parent);
459 460
	struct inode *udir = d_inode(upperdir);

461 462 463 464 465 466
	/* Mark parent "impure" because it may now contain non-pure upper */
	err = ovl_set_impure(c->parent, upperdir);
	if (err)
		return err;

	err = ovl_set_nlink_lower(c->dentry);
467 468 469
	if (err)
		return err;

470
	inode_lock_nested(udir, I_MUTEX_PARENT);
471 472
	upper = lookup_one_len(c->dentry->d_name.name, upperdir,
			       c->dentry->d_name.len);
473 474
	err = PTR_ERR(upper);
	if (!IS_ERR(upper)) {
475
		err = ovl_do_link(ovl_dentry_upper(c->dentry), udir, upper);
476 477
		dput(upper);

478 479 480 481 482
		if (!err) {
			/* Restore timestamps on parent (best effort) */
			ovl_set_timestamps(upperdir, &c->pstat);
			ovl_dentry_set_upper_alias(c->dentry);
		}
483 484
	}
	inode_unlock(udir);
485 486 487 488
	if (err)
		return err;

	err = ovl_set_nlink_upper(c->dentry);
489 490 491 492

	return err;
}

493
static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp)
494
{
495
	struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb);
496 497
	int err;

498 499 500 501 502 503 504 505 506 507 508 509 510
	/*
	 * Copy up data first and then xattrs. Writing data after
	 * xattrs will remove security.capability xattr automatically.
	 */
	if (S_ISREG(c->stat.mode) && !c->metacopy) {
		struct path upperpath, datapath;

		ovl_path_upper(c->dentry, &upperpath);
		if (WARN_ON(upperpath.dentry != NULL))
			return -EIO;
		upperpath.dentry = temp;

		ovl_path_lowerdata(c->dentry, &datapath);
511 512
		err = ovl_copy_up_data(ofs, &datapath, &upperpath,
				       c->stat.size);
513 514 515 516
		if (err)
			return err;
	}

517
	err = ovl_copy_xattr(c->dentry->d_sb, c->lowerpath.dentry, temp);
M
Miklos Szeredi 已提交
518
	if (err)
519
		return err;
M
Miklos Szeredi 已提交
520

521 522 523
	/*
	 * Store identifier of lower inode in upper inode xattr to
	 * allow lookup of the copy up origin inode.
524 525 526
	 *
	 * Don't set origin when we are breaking the association with a lower
	 * hard link.
527
	 */
528
	if (c->origin) {
529
		err = ovl_set_origin(ofs, c->dentry, c->lowerpath.dentry, temp);
530
		if (err)
531
			return err;
532
	}
533

534 535 536 537 538 539 540
	if (c->metacopy) {
		err = ovl_check_setxattr(c->dentry, temp, OVL_XATTR_METACOPY,
					 NULL, 0, -EOPNOTSUPP);
		if (err)
			return err;
	}

541
	inode_lock(temp->d_inode);
542
	if (S_ISREG(c->stat.mode))
543 544 545
		err = ovl_set_size(temp, &c->stat);
	if (!err)
		err = ovl_set_attr(temp, &c->stat);
546 547 548
	inode_unlock(temp->d_inode);

	return err;
549 550
}

551 552 553 554 555 556
struct ovl_cu_creds {
	const struct cred *old;
	struct cred *new;
};

static int ovl_prep_cu_creds(struct dentry *dentry, struct ovl_cu_creds *cc)
A
Amir Goldstein 已提交
557 558 559
{
	int err;

560 561
	cc->old = cc->new = NULL;
	err = security_inode_copy_up(dentry, &cc->new);
A
Amir Goldstein 已提交
562
	if (err < 0)
563
		return err;
A
Amir Goldstein 已提交
564

565 566
	if (cc->new)
		cc->old = override_creds(cc->new);
A
Amir Goldstein 已提交
567

568
	return 0;
A
Amir Goldstein 已提交
569 570
}

571
static void ovl_revert_cu_creds(struct ovl_cu_creds *cc)
A
Amir Goldstein 已提交
572
{
573 574 575 576
	if (cc->new) {
		revert_creds(cc->old);
		put_cred(cc->new);
	}
A
Amir Goldstein 已提交
577 578 579 580 581 582 583
}

/*
 * Copyup using workdir to prepare temp file.  Used when copying up directories,
 * special files or when upper fs doesn't support O_TMPFILE.
 */
static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c)
584
{
585
	struct inode *inode;
586 587 588
	struct inode *udir = d_inode(c->destdir), *wdir = d_inode(c->workdir);
	struct dentry *temp, *upper;
	struct ovl_cu_creds cc;
589
	int err;
590 591 592 593 594 595
	struct ovl_cattr cattr = {
		/* Can't properly set mode on creation because of the umask */
		.mode = c->stat.mode & S_IFMT,
		.rdev = c->stat.rdev,
		.link = c->link
	};
596

597 598 599 600
	/* workdir and destdir could be the same when copying up to indexdir */
	err = -EIO;
	if (lock_rename(c->workdir, c->destdir) != NULL)
		goto unlock;
A
Amir Goldstein 已提交
601

602 603 604 605 606 607 608
	err = ovl_prep_cu_creds(c->dentry, &cc);
	if (err)
		goto unlock;

	temp = ovl_create_temp(c->workdir, &cattr);
	ovl_revert_cu_creds(&cc);

A
Amir Goldstein 已提交
609
	err = PTR_ERR(temp);
610
	if (IS_ERR(temp))
A
Amir Goldstein 已提交
611
		goto unlock;
612

613
	err = ovl_copy_up_inode(c, temp);
614
	if (err)
A
Amir Goldstein 已提交
615
		goto cleanup;
616

617 618 619
	if (S_ISDIR(c->stat.mode) && c->indexed) {
		err = ovl_create_index(c->dentry, c->lowerpath.dentry, temp);
		if (err)
A
Amir Goldstein 已提交
620
			goto cleanup;
621 622
	}

623 624 625 626 627 628 629
	upper = lookup_one_len(c->destname.name, c->destdir, c->destname.len);
	err = PTR_ERR(upper);
	if (IS_ERR(upper))
		goto cleanup;

	err = ovl_do_rename(wdir, temp, udir, upper, 0);
	dput(upper);
M
Miklos Szeredi 已提交
630
	if (err)
A
Amir Goldstein 已提交
631
		goto cleanup;
M
Miklos Szeredi 已提交
632

633 634
	if (!c->metacopy)
		ovl_set_upperdata(d_inode(c->dentry));
635
	inode = d_inode(c->dentry);
636
	ovl_inode_update(inode, temp);
637 638
	if (S_ISDIR(inode->i_mode))
		ovl_set_flag(OVL_WHITEOUTS, inode);
A
Amir Goldstein 已提交
639 640 641 642 643 644
unlock:
	unlock_rename(c->workdir, c->destdir);

	return err;

cleanup:
645 646 647
	ovl_cleanup(wdir, temp);
	dput(temp);
	goto unlock;
A
Amir Goldstein 已提交
648 649
}

650 651
/* Copyup using O_TMPFILE which does not require cross dir locking */
static int ovl_copy_up_tmpfile(struct ovl_copy_up_ctx *c)
A
Amir Goldstein 已提交
652
{
653 654 655
	struct inode *udir = d_inode(c->destdir);
	struct dentry *temp, *upper;
	struct ovl_cu_creds cc;
A
Amir Goldstein 已提交
656 657
	int err;

658 659 660
	err = ovl_prep_cu_creds(c->dentry, &cc);
	if (err)
		return err;
A
Amir Goldstein 已提交
661 662

	temp = ovl_do_tmpfile(c->workdir, c->stat.mode);
663
	ovl_revert_cu_creds(&cc);
A
Amir Goldstein 已提交
664

665 666
	if (IS_ERR(temp))
		return PTR_ERR(temp);
A
Amir Goldstein 已提交
667

668 669 670
	err = ovl_copy_up_inode(c, temp);
	if (err)
		goto out_dput;
A
Amir Goldstein 已提交
671 672 673 674 675

	inode_lock_nested(udir, I_MUTEX_PARENT);

	upper = lookup_one_len(c->destname.name, c->destdir, c->destname.len);
	err = PTR_ERR(upper);
676 677 678 679
	if (!IS_ERR(upper)) {
		err = ovl_do_link(temp, udir, upper);
		dput(upper);
	}
A
Amir Goldstein 已提交
680 681 682
	inode_unlock(udir);

	if (err)
683
		goto out_dput;
A
Amir Goldstein 已提交
684 685 686

	if (!c->metacopy)
		ovl_set_upperdata(d_inode(c->dentry));
687
	ovl_inode_update(d_inode(c->dentry), temp);
688

689 690 691
	return 0;

out_dput:
692
	dput(temp);
M
Miklos Szeredi 已提交
693 694 695 696 697 698
	return err;
}

/*
 * Copy up a single dentry
 *
M
Miklos Szeredi 已提交
699 700 701 702 703
 * All renames start with copy up of source if necessary.  The actual
 * rename will only proceed once the copy up was successful.  Copy up uses
 * upper parent i_mutex for exclusion.  Since rename can change d_parent it
 * is possible that the copy up will lock the old parent.  At that point
 * the file will have already been copied up anyway.
M
Miklos Szeredi 已提交
704
 */
M
Miklos Szeredi 已提交
705
static int ovl_do_copy_up(struct ovl_copy_up_ctx *c)
M
Miklos Szeredi 已提交
706 707
{
	int err;
708
	struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb);
709
	bool to_index = false;
710

711 712 713 714 715 716 717 718 719 720 721 722 723 724 725
	/*
	 * Indexed non-dir is copied up directly to the index entry and then
	 * hardlinked to upper dir. Indexed dir is copied up to indexdir,
	 * then index entry is created and then copied up dir installed.
	 * Copying dir up to indexdir instead of workdir simplifies locking.
	 */
	if (ovl_need_index(c->dentry)) {
		c->indexed = true;
		if (S_ISDIR(c->stat.mode))
			c->workdir = ovl_indexdir(c->dentry->d_sb);
		else
			to_index = true;
	}

	if (S_ISDIR(c->stat.mode) || c->stat.nlink == 1 || to_index)
726 727
		c->origin = true;

728
	if (to_index) {
729
		c->destdir = ovl_indexdir(c->dentry->d_sb);
730
		err = ovl_get_index_name(ofs, c->lowerpath.dentry, &c->destname);
731 732
		if (err)
			return err;
733 734 735
	} else if (WARN_ON(!c->parent)) {
		/* Disconnected dentry must be copied up to index dir */
		return -EIO;
736 737 738 739 740 741 742 743 744
	} else {
		/*
		 * Mark parent "impure" because it may now contain non-pure
		 * upper
		 */
		err = ovl_set_impure(c->parent, c->destdir);
		if (err)
			return err;
	}
M
Miklos Szeredi 已提交
745

746
	/* Should we copyup with O_TMPFILE or with workdir? */
A
Amir Goldstein 已提交
747 748 749 750
	if (S_ISREG(c->stat.mode) && ofs->tmpfile)
		err = ovl_copy_up_tmpfile(c);
	else
		err = ovl_copy_up_workdir(c);
751 752 753 754
	if (err)
		goto out;

	if (c->indexed)
755 756 757
		ovl_set_flag(OVL_INDEX, d_inode(c->dentry));

	if (to_index) {
758 759 760
		/* Initialize nlink for copy up of disconnected dentry */
		err = ovl_set_nlink_upper(c->dentry);
	} else {
761 762 763 764 765 766 767 768
		struct inode *udir = d_inode(c->destdir);

		/* Restore timestamps on parent (best effort) */
		inode_lock(udir);
		ovl_set_timestamps(c->destdir, &c->pstat);
		inode_unlock(udir);

		ovl_dentry_set_upper_alias(c->dentry);
M
Miklos Szeredi 已提交
769 770
	}

771 772 773
out:
	if (to_index)
		kfree(c->destname.name);
M
Miklos Szeredi 已提交
774 775 776
	return err;
}

777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793
static bool ovl_need_meta_copy_up(struct dentry *dentry, umode_t mode,
				  int flags)
{
	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;

	if (!ofs->config.metacopy)
		return false;

	if (!S_ISREG(mode))
		return false;

	if (flags && ((OPEN_FMODE(flags) & FMODE_WRITE) || (flags & O_TRUNC)))
		return false;

	return true;
}

794
static ssize_t ovl_getxattr(struct dentry *dentry, char *name, char **value)
M
Miklos Szeredi 已提交
795 796
{
	ssize_t res;
797
	char *buf;
M
Miklos Szeredi 已提交
798 799

	res = vfs_getxattr(dentry, name, NULL, 0);
800 801
	if (res == -ENODATA || res == -EOPNOTSUPP)
		res = 0;
M
Miklos Szeredi 已提交
802

803 804
	if (res > 0) {
		buf = kzalloc(res, GFP_KERNEL);
M
Miklos Szeredi 已提交
805 806 807 808 809
		if (!buf)
			return -ENOMEM;

		res = vfs_getxattr(dentry, name, buf, res);
		if (res < 0)
810 811 812
			kfree(buf);
		else
			*value = buf;
M
Miklos Szeredi 已提交
813 814 815 816
	}
	return res;
}

817 818 819
/* Copy up data of an inode which was copied up metadata only in the past. */
static int ovl_copy_up_meta_inode_data(struct ovl_copy_up_ctx *c)
{
820
	struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb);
821
	struct path upperpath, datapath;
822
	int err;
823
	char *capability = NULL;
824
	ssize_t cap_size;
825 826 827 828 829

	ovl_path_upper(c->dentry, &upperpath);
	if (WARN_ON(upperpath.dentry == NULL))
		return -EIO;

830 831 832 833
	ovl_path_lowerdata(c->dentry, &datapath);
	if (WARN_ON(datapath.dentry == NULL))
		return -EIO;

834 835
	if (c->stat.size) {
		err = cap_size = ovl_getxattr(upperpath.dentry, XATTR_NAME_CAPS,
836 837
					      &capability);
		if (cap_size < 0)
838 839 840
			goto out;
	}

841
	err = ovl_copy_up_data(ofs, &datapath, &upperpath, c->stat.size);
842
	if (err)
843 844 845 846 847 848 849
		goto out_free;

	/*
	 * Writing to upper file will clear security.capability xattr. We
	 * don't want that to happen for normal copy-up operation.
	 */
	if (capability) {
850 851
		err = vfs_setxattr(upperpath.dentry, XATTR_NAME_CAPS,
				   capability, cap_size, 0);
852 853 854 855
		if (err)
			goto out_free;
	}

856

857
	err = ovl_do_removexattr(ofs, upperpath.dentry, OVL_XATTR_METACOPY);
858
	if (err)
859
		goto out_free;
860 861

	ovl_set_upperdata(d_inode(c->dentry));
862 863 864
out_free:
	kfree(capability);
out:
865 866 867
	return err;
}

M
Miklos Szeredi 已提交
868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888
static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
			   int flags)
{
	int err;
	DEFINE_DELAYED_CALL(done);
	struct path parentpath;
	struct ovl_copy_up_ctx ctx = {
		.parent = parent,
		.dentry = dentry,
		.workdir = ovl_workdir(dentry),
	};

	if (WARN_ON(!ctx.workdir))
		return -EROFS;

	ovl_path_lower(dentry, &ctx.lowerpath);
	err = vfs_getattr(&ctx.lowerpath, &ctx.stat,
			  STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT);
	if (err)
		return err;

889 890
	ctx.metacopy = ovl_need_meta_copy_up(dentry, ctx.stat.mode, flags);

891 892 893 894
	if (parent) {
		ovl_path_upper(parent, &parentpath);
		ctx.destdir = parentpath.dentry;
		ctx.destname = dentry->d_name;
M
Miklos Szeredi 已提交
895

896 897 898 899 900 901
		err = vfs_getattr(&parentpath, &ctx.pstat,
				  STATX_ATIME | STATX_MTIME,
				  AT_STATX_SYNC_AS_STAT);
		if (err)
			return err;
	}
M
Miklos Szeredi 已提交
902 903 904 905 906 907 908 909 910 911 912

	/* maybe truncate regular file. this has no effect on dirs */
	if (flags & O_TRUNC)
		ctx.stat.size = 0;

	if (S_ISLNK(ctx.stat.mode)) {
		ctx.link = vfs_get_link(ctx.lowerpath.dentry, &done);
		if (IS_ERR(ctx.link))
			return PTR_ERR(ctx.link);
	}

913
	err = ovl_copy_up_start(dentry, flags);
M
Miklos Szeredi 已提交
914 915 916 917 918
	/* err < 0: interrupted, err > 0: raced with another copy-up */
	if (unlikely(err)) {
		if (err > 0)
			err = 0;
	} else {
919 920
		if (!ovl_dentry_upper(dentry))
			err = ovl_do_copy_up(&ctx);
921
		if (!err && parent && !ovl_dentry_has_upper_alias(dentry))
922
			err = ovl_link_up(&ctx);
923 924
		if (!err && ovl_dentry_needs_data_copy_up_locked(dentry, flags))
			err = ovl_copy_up_meta_inode_data(&ctx);
M
Miklos Szeredi 已提交
925 926
		ovl_copy_up_end(dentry);
	}
M
Miklos Szeredi 已提交
927
	do_delayed_call(&done);
M
Miklos Szeredi 已提交
928 929 930 931

	return err;
}

Y
youngjun 已提交
932
static int ovl_copy_up_flags(struct dentry *dentry, int flags)
M
Miklos Szeredi 已提交
933
{
934 935
	int err = 0;
	const struct cred *old_cred = ovl_override_creds(dentry->d_sb);
936 937 938 939 940 941 942 943 944
	bool disconnected = (dentry->d_flags & DCACHE_DISCONNECTED);

	/*
	 * With NFS export, copy up can get called for a disconnected non-dir.
	 * In this case, we will copy up lower inode to index dir without
	 * linking it to upper dir.
	 */
	if (WARN_ON(disconnected && d_is_dir(dentry)))
		return -EIO;
M
Miklos Szeredi 已提交
945 946 947

	while (!err) {
		struct dentry *next;
948
		struct dentry *parent = NULL;
M
Miklos Szeredi 已提交
949

950
		if (ovl_already_copied_up(dentry, flags))
M
Miklos Szeredi 已提交
951 952 953 954
			break;

		next = dget(dentry);
		/* find the topmost dentry not yet copied up */
955
		for (; !disconnected;) {
M
Miklos Szeredi 已提交
956 957
			parent = dget_parent(next);

958
			if (ovl_dentry_upper(parent))
M
Miklos Szeredi 已提交
959 960 961 962 963 964
				break;

			dput(next);
			next = parent;
		}

M
Miklos Szeredi 已提交
965
		err = ovl_copy_up_one(parent, next, flags);
M
Miklos Szeredi 已提交
966 967 968 969

		dput(parent);
		dput(next);
	}
970
	revert_creds(old_cred);
M
Miklos Szeredi 已提交
971 972 973

	return err;
}
974

975 976 977
static bool ovl_open_need_copy_up(struct dentry *dentry, int flags)
{
	/* Copy up of disconnected dentry does not set upper alias */
978
	if (ovl_already_copied_up(dentry, flags))
979 980 981 982 983
		return false;

	if (special_file(d_inode(dentry)->i_mode))
		return false;

984
	if (!ovl_open_flags_need_copy_up(flags))
985 986 987 988 989
		return false;

	return true;
}

990
int ovl_maybe_copy_up(struct dentry *dentry, int flags)
991 992 993
{
	int err = 0;

994
	if (ovl_open_need_copy_up(dentry, flags)) {
995 996
		err = ovl_want_write(dentry);
		if (!err) {
997
			err = ovl_copy_up_flags(dentry, flags);
998 999 1000 1001 1002 1003 1004
			ovl_drop_write(dentry);
		}
	}

	return err;
}

1005 1006 1007 1008 1009
int ovl_copy_up_with_data(struct dentry *dentry)
{
	return ovl_copy_up_flags(dentry, O_WRONLY);
}

1010 1011 1012 1013
int ovl_copy_up(struct dentry *dentry)
{
	return ovl_copy_up_flags(dentry, 0);
}